mirror of
https://github.com/rizonesoft/Notepad3.git
synced 2026-06-11 21:03:05 +08:00
+ upd: Oniguruma current development
This commit is contained in:
parent
9ea9391200
commit
de5fa9bb45
@ -330,8 +330,8 @@ Oniguruma API Version 6.9.7 2021/01/18
|
||||
|
||||
return value
|
||||
normal: match position offset (i.e. p - str >= 0)
|
||||
not found: ONIG_MISMATCH (< 0)
|
||||
error: error code (< 0)
|
||||
not found: ONIG_MISMATCH (< 0)
|
||||
error: error code (< 0)
|
||||
|
||||
arguments
|
||||
1 reg: regex object
|
||||
@ -371,8 +371,8 @@ Oniguruma API Version 6.9.7 2021/01/18
|
||||
|
||||
return value
|
||||
normal: match length (>= 0)
|
||||
not match: ONIG_MISMATCH (< 0)
|
||||
error: error code (< 0)
|
||||
not match: ONIG_MISMATCH (< 0)
|
||||
error: error code (< 0)
|
||||
|
||||
arguments
|
||||
1 reg: regex object
|
||||
@ -411,8 +411,8 @@ Oniguruma API Version 6.9.7 2021/01/18
|
||||
|
||||
return value
|
||||
normal: number of matching times
|
||||
error: error code
|
||||
interruption: return value of callback function (!= 0)
|
||||
error: error code
|
||||
interruption: return value of callback function (!= 0)
|
||||
|
||||
arguments
|
||||
1 reg: regex object
|
||||
@ -511,8 +511,8 @@ Oniguruma API Version 6.9.7 2021/01/18
|
||||
|
||||
return value:
|
||||
normal: index of match regex (zero origin)
|
||||
not found: ONIG_MISMATCH (< 0)
|
||||
error: error code (< 0)
|
||||
not found: ONIG_MISMATCH (< 0)
|
||||
error: error code (< 0)
|
||||
|
||||
arguments
|
||||
1 set: regset object
|
||||
@ -544,8 +544,8 @@ Oniguruma API Version 6.9.7 2021/01/18
|
||||
|
||||
return value:
|
||||
normal: index of match regex (zero origin)
|
||||
not found: ONIG_MISMATCH (< 0)
|
||||
error: error code (< 0)
|
||||
not found: ONIG_MISMATCH (< 0)
|
||||
error: error code (< 0)
|
||||
|
||||
arguments
|
||||
1 set: regset object
|
||||
@ -620,7 +620,7 @@ Oniguruma API Version 6.9.7 2021/01/18
|
||||
|
||||
return value
|
||||
normal: number of groups for the name.
|
||||
(ex. /(?<x>..)(?<x>..)/ ==> 2)
|
||||
(ex. /(?<x>..)(?<x>..)/ ==> 2)
|
||||
name not found: -1
|
||||
|
||||
arguments
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
regcomp.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2020 K.Kosako
|
||||
* Copyright (c) 2002-2021 K.Kosako
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@ -3638,8 +3638,8 @@ node_min_byte_len(Node* node, ScanEnv* env)
|
||||
case NODE_CALL:
|
||||
{
|
||||
Node* t = NODE_BODY(node);
|
||||
if (NODE_IS_FIXED_MIN(t))
|
||||
len = BAG_(t)->min_len;
|
||||
if (NODE_IS_FIXED_MIN(t))
|
||||
len = BAG_(t)->min_len;
|
||||
else
|
||||
len = node_min_byte_len(t, env);
|
||||
}
|
||||
@ -4452,19 +4452,21 @@ recursive_call_check_trav(Node* node, ScanEnv* env, int state)
|
||||
BagNode* en = BAG_(node);
|
||||
|
||||
if (en->type == BAG_MEMORY) {
|
||||
if (NODE_IS_CALLED(node) || (state & IN_RECURSION) != 0) {
|
||||
if (NODE_IS_CALLED(node)) {
|
||||
r = FOUND_CALLED_NODE;
|
||||
goto check_recursion;
|
||||
}
|
||||
else if ((state & IN_RECURSION) != 0) {
|
||||
check_recursion:
|
||||
if (! NODE_IS_RECURSION(node)) {
|
||||
NODE_STATUS_ADD(node, MARK1);
|
||||
r = recursive_call_check(NODE_BODY(node));
|
||||
if (r != 0) {
|
||||
ret = recursive_call_check(NODE_BODY(node));
|
||||
if (ret != 0) {
|
||||
NODE_STATUS_ADD(node, RECURSION);
|
||||
MEM_STATUS_ON(env->backtrack_mem, en->m.regnum);
|
||||
}
|
||||
NODE_STATUS_REMOVE(node, MARK1);
|
||||
}
|
||||
|
||||
if (NODE_IS_CALLED(node))
|
||||
r = FOUND_CALLED_NODE;
|
||||
}
|
||||
}
|
||||
|
||||
@ -6277,7 +6279,7 @@ concat_opt_exact(OptStr* to, OptStr* add, OnigEncoding enc)
|
||||
for (j = 0; j < len && p < end; j++) {
|
||||
/* coverity[overrun-local] */
|
||||
to->s[i++] = *p++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
to->len = i;
|
||||
@ -7326,7 +7328,7 @@ static int parse_and_tune(regex_t* reg, const UChar* pattern,
|
||||
)
|
||||
{
|
||||
int r;
|
||||
Node* root;
|
||||
Node* root;
|
||||
|
||||
root = NULL_NODE;
|
||||
if (IS_NOT_NULL(einfo)) {
|
||||
@ -7825,6 +7827,128 @@ onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
|
||||
}
|
||||
|
||||
|
||||
#define MANY_REPEAT_OF_ANYCHAR 20
|
||||
|
||||
typedef enum {
|
||||
MJ_NO = 0,
|
||||
MJ_YES = 1,
|
||||
MJ_IGNORE = 2,
|
||||
} MJ_RESULT;
|
||||
|
||||
static MJ_RESULT
|
||||
mostly_just_anychar(Node* node, int in_reluctant)
|
||||
{
|
||||
MJ_RESULT r;
|
||||
|
||||
r = MJ_NO;
|
||||
switch (NODE_TYPE(node)) {
|
||||
case NODE_LIST:
|
||||
{
|
||||
int found = FALSE;
|
||||
do {
|
||||
r = mostly_just_anychar(NODE_CAR(node), in_reluctant);
|
||||
if (r == MJ_NO) break;
|
||||
if (r == MJ_YES) found = TRUE;
|
||||
} while (IS_NOT_NULL(node = NODE_CDR(node)));
|
||||
if (r == MJ_IGNORE) {
|
||||
if (found == TRUE) r = MJ_YES;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case NODE_ALT:
|
||||
r = MJ_IGNORE;
|
||||
do {
|
||||
r = mostly_just_anychar(NODE_CAR(node), in_reluctant);
|
||||
if (r == MJ_YES) break;
|
||||
} while (IS_NOT_NULL(node = NODE_CDR(node)));
|
||||
break;
|
||||
|
||||
case NODE_QUANT:
|
||||
{
|
||||
QuantNode* qn = QUANT_(node);
|
||||
|
||||
if (qn->upper == 0)
|
||||
r = MJ_IGNORE;
|
||||
else {
|
||||
if (in_reluctant == FALSE) {
|
||||
if (qn->greedy != 0 &&
|
||||
(! IS_INFINITE_REPEAT(qn->upper) &&
|
||||
qn->upper <= MANY_REPEAT_OF_ANYCHAR)) {
|
||||
in_reluctant = TRUE;
|
||||
}
|
||||
}
|
||||
r = mostly_just_anychar(NODE_BODY(node), in_reluctant);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case NODE_ANCHOR:
|
||||
switch (ANCHOR_(node)->type) {
|
||||
case ANCR_PREC_READ:
|
||||
case ANCR_PREC_READ_NOT:
|
||||
case ANCR_LOOK_BEHIND:
|
||||
case ANCR_LOOK_BEHIND_NOT:
|
||||
case ANCR_TEXT_SEGMENT_BOUNDARY: /* \y */
|
||||
r = MJ_IGNORE;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case NODE_BAG:
|
||||
{
|
||||
BagNode* en = BAG_(node);
|
||||
|
||||
if (en->type == BAG_IF_ELSE) {
|
||||
if (IS_NOT_NULL(en->te.Then)) {
|
||||
r = mostly_just_anychar(en->te.Then, in_reluctant);
|
||||
if (r == MJ_YES) break;
|
||||
}
|
||||
if (IS_NOT_NULL(en->te.Else)) {
|
||||
r = mostly_just_anychar(en->te.Else, in_reluctant);
|
||||
}
|
||||
}
|
||||
else {
|
||||
r = mostly_just_anychar(NODE_BODY(node), in_reluctant);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case NODE_CTYPE:
|
||||
if (CTYPE_(node)->ctype == CTYPE_ANYCHAR)
|
||||
r = MJ_YES;
|
||||
else
|
||||
r = MJ_NO;
|
||||
break;
|
||||
|
||||
case NODE_STRING:
|
||||
if (NODE_STRING_LEN(node) == 0) {
|
||||
r = MJ_IGNORE;
|
||||
break;
|
||||
}
|
||||
/* fall */
|
||||
case NODE_CCLASS:
|
||||
r = MJ_NO;
|
||||
break;
|
||||
|
||||
#ifdef USE_CALL
|
||||
case NODE_CALL:
|
||||
/* ignore call */
|
||||
#endif
|
||||
case NODE_BACKREF:
|
||||
case NODE_GIMMICK:
|
||||
r = MJ_IGNORE;
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
#define MAX_CALLS_IN_DETECT 10
|
||||
|
||||
typedef struct {
|
||||
@ -7833,6 +7957,7 @@ typedef struct {
|
||||
int backref;
|
||||
int backref_with_level;
|
||||
int call;
|
||||
int anychar_reluctant_many;
|
||||
int empty_check_nest_level;
|
||||
int max_empty_check_nest_level;
|
||||
int heavy_element;
|
||||
@ -7856,17 +7981,28 @@ detect_can_be_slow(Node* node, SlowElementCount* ct, int ncall, int calls[])
|
||||
case NODE_QUANT:
|
||||
{
|
||||
int prev_heavy_element;
|
||||
QuantNode* qn;
|
||||
Node* body;
|
||||
|
||||
if (QUANT_(node)->emptiness != BODY_IS_NOT_EMPTY) {
|
||||
qn = QUANT_(node);
|
||||
body = NODE_BODY(node);
|
||||
|
||||
if (qn->emptiness != BODY_IS_NOT_EMPTY) {
|
||||
prev_heavy_element = ct->heavy_element;
|
||||
ct->empty_check_nest_level++;
|
||||
if (ct->empty_check_nest_level > ct->max_empty_check_nest_level)
|
||||
ct->max_empty_check_nest_level = ct->empty_check_nest_level;
|
||||
}
|
||||
else if (IS_INFINITE_REPEAT(qn->upper) ||
|
||||
qn->upper > MANY_REPEAT_OF_ANYCHAR) {
|
||||
MJ_RESULT mr = mostly_just_anychar(body, (qn->greedy == 0));
|
||||
if (mr == MJ_YES)
|
||||
ct->anychar_reluctant_many++;
|
||||
}
|
||||
|
||||
r = detect_can_be_slow(NODE_BODY(node), ct, ncall, calls);
|
||||
r = detect_can_be_slow(body, ct, ncall, calls);
|
||||
|
||||
if (QUANT_(node)->emptiness != BODY_IS_NOT_EMPTY) {
|
||||
if (qn->emptiness != BODY_IS_NOT_EMPTY) {
|
||||
if (NODE_IS_INPEEK(node)) {
|
||||
if (ct->empty_check_nest_level > 2) {
|
||||
if (prev_heavy_element == ct->heavy_element)
|
||||
@ -7933,7 +8069,7 @@ detect_can_be_slow(Node* node, SlowElementCount* ct, int ncall, int calls[])
|
||||
int gnum;
|
||||
|
||||
gnum = CALL_(node)->called_gnum;
|
||||
ct->call++;
|
||||
ct->call++;
|
||||
|
||||
if (NODE_IS_RECURSION(node) && NODE_IS_INPEEK(node) &&
|
||||
NODE_IS_IN_REAL_REPEAT(node)) {
|
||||
@ -8005,24 +8141,26 @@ onig_detect_can_be_slow_pattern(const UChar* pattern,
|
||||
}
|
||||
#endif
|
||||
|
||||
count.prec_read = 0;
|
||||
count.look_behind = 0;
|
||||
count.backref = 0;
|
||||
count.backref_with_level = 0;
|
||||
count.call = 0;
|
||||
count.prec_read = 0;
|
||||
count.look_behind = 0;
|
||||
count.backref = 0;
|
||||
count.backref_with_level = 0;
|
||||
count.call = 0;
|
||||
count.anychar_reluctant_many = 0;
|
||||
count.empty_check_nest_level = 0;
|
||||
count.max_empty_check_nest_level = 0;
|
||||
count.heavy_element = 0;
|
||||
|
||||
r = detect_can_be_slow(root, &count, 0, calls);
|
||||
if (r == 0) {
|
||||
int n = count.prec_read + count.look_behind
|
||||
+ count.backref + count.backref_with_level + count.call;
|
||||
if (r == 0) {
|
||||
int n = count.prec_read + count.look_behind
|
||||
+ count.backref + count.backref_with_level + count.call
|
||||
+ count.anychar_reluctant_many;
|
||||
if (count.heavy_element != 0)
|
||||
n += count.heavy_element * 10;
|
||||
|
||||
r = n;
|
||||
}
|
||||
r = n;
|
||||
}
|
||||
|
||||
if (IS_NOT_NULL(scan_env.mem_env_dynamic))
|
||||
xfree(scan_env.mem_env_dynamic);
|
||||
@ -8233,71 +8371,71 @@ print_indent_tree(FILE* f, Node* node, int indent)
|
||||
case NODE_QUANT:
|
||||
{
|
||||
fprintf(f, "<quantifier:%p>{%d,%d}%s%s%s", node,
|
||||
QUANT_(node)->lower, QUANT_(node)->upper,
|
||||
(QUANT_(node)->greedy ? "" : "?"),
|
||||
QUANT_(node)->lower, QUANT_(node)->upper,
|
||||
(QUANT_(node)->greedy ? "" : "?"),
|
||||
QUANT_(node)->include_referred == 0 ? "" : " referred",
|
||||
emptiness_name[QUANT_(node)->emptiness]);
|
||||
if (NODE_IS_INPEEK(node)) fprintf(f, ", in-peek");
|
||||
fprintf(f, "\n");
|
||||
print_indent_tree(f, NODE_BODY(node), indent + add);
|
||||
print_indent_tree(f, NODE_BODY(node), indent + add);
|
||||
}
|
||||
break;
|
||||
|
||||
case NODE_BAG:
|
||||
{
|
||||
BagNode* bn = BAG_(node);
|
||||
fprintf(f, "<bag:%p> ", node);
|
||||
fprintf(f, "<bag:%p> ", node);
|
||||
if (bn->type == BAG_IF_ELSE) {
|
||||
Node* Then;
|
||||
Node* Else;
|
||||
Node* Then;
|
||||
Node* Else;
|
||||
|
||||
fprintf(f, "if-else\n");
|
||||
print_indent_tree(f, NODE_BODY(node), indent + add);
|
||||
fprintf(f, "if-else\n");
|
||||
print_indent_tree(f, NODE_BODY(node), indent + add);
|
||||
|
||||
Then = bn->te.Then;
|
||||
Else = bn->te.Else;
|
||||
if (IS_NULL(Then)) {
|
||||
Indent(f, indent + add);
|
||||
fprintf(f, "THEN empty\n");
|
||||
Then = bn->te.Then;
|
||||
Else = bn->te.Else;
|
||||
if (IS_NULL(Then)) {
|
||||
Indent(f, indent + add);
|
||||
fprintf(f, "THEN empty\n");
|
||||
}
|
||||
else
|
||||
print_indent_tree(f, Then, indent + add);
|
||||
|
||||
if (IS_NULL(Else)) {
|
||||
Indent(f, indent + add);
|
||||
fprintf(f, "ELSE empty\n");
|
||||
}
|
||||
else
|
||||
print_indent_tree(f, Else, indent + add);
|
||||
}
|
||||
else
|
||||
print_indent_tree(f, Then, indent + add);
|
||||
|
||||
if (IS_NULL(Else)) {
|
||||
Indent(f, indent + add);
|
||||
fprintf(f, "ELSE empty\n");
|
||||
}
|
||||
else
|
||||
print_indent_tree(f, Else, indent + add);
|
||||
}
|
||||
else {
|
||||
switch (bn->type) {
|
||||
case BAG_OPTION:
|
||||
case BAG_OPTION:
|
||||
fprintf(f, "option:%d", bn->o.options);
|
||||
break;
|
||||
case BAG_MEMORY:
|
||||
break;
|
||||
case BAG_MEMORY:
|
||||
fprintf(f, "memory:%d", bn->m.regnum);
|
||||
if (NODE_IS_CALLED(node)) {
|
||||
fprintf(f, ", called");
|
||||
fprintf(f, ", called");
|
||||
if (NODE_IS_RECURSION(node))
|
||||
fprintf(f, ", recursion");
|
||||
}
|
||||
else if (NODE_IS_REFERENCED(node))
|
||||
fprintf(f, ", referenced");
|
||||
else if (NODE_IS_REFERENCED(node))
|
||||
fprintf(f, ", referenced");
|
||||
|
||||
if (NODE_IS_FIXED_ADDR(node))
|
||||
fprintf(f, ", fixed-addr");
|
||||
if (NODE_IS_FIXED_ADDR(node))
|
||||
fprintf(f, ", fixed-addr");
|
||||
if ((bn->m.called_state & IN_PEEK) != 0)
|
||||
fprintf(f, ", in-peek");
|
||||
break;
|
||||
case BAG_STOP_BACKTRACK:
|
||||
fprintf(f, "stop-bt");
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
fprintf(f, "\n");
|
||||
print_indent_tree(f, NODE_BODY(node), indent + add);
|
||||
break;
|
||||
case BAG_STOP_BACKTRACK:
|
||||
fprintf(f, "stop-bt");
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
fprintf(f, "\n");
|
||||
print_indent_tree(f, NODE_BODY(node), indent + add);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
@ -122,7 +122,7 @@ struct PropertyNameCtype {
|
||||
#define USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER
|
||||
#define USE_UNICODE_WORD_BREAK
|
||||
/* #define USE_UNICODE_CASE_FOLD_TURKISH_AZERI */
|
||||
/* #define USE_UNICODE_ALL_LINE_TERMINATORS */ /* see Unicode.org UTS #18 */
|
||||
/* #define USE_UNICODE_ALL_LINE_TERMINATORS */ /* see Unicode.org UTS #18 */
|
||||
|
||||
|
||||
//~#define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_ASCII
|
||||
|
||||
@ -2200,12 +2200,12 @@ stack_double(int* is_alloca, char** arg_alloc_base,
|
||||
if (kk->u.mem.prev_end.i == INVALID_STACK_INDEX || \
|
||||
((STACK_AT(kk->u.mem.prev_end.i)->u.mem.pstr != k->u.mem.pstr || STACK_AT(kk->u.mem.prev_start.i)->u.mem.pstr != STACK_AT(k->u.mem.prev_start.i)->u.mem.pstr) && (STACK_AT(k->u.mem.prev_start.i)->u.mem.pstr != k->u.mem.pstr || STACK_AT(kk->u.mem.prev_start.i)->u.mem.pstr != STACK_AT(kk->u.mem.prev_end.i)->u.mem.pstr))) {\
|
||||
goto stack_empty_check_mem_not_empty;\
|
||||
}\
|
||||
}\
|
||||
else {\
|
||||
ms &= ~((MemStatusType )1 << k->zid);\
|
||||
break;\
|
||||
}\
|
||||
}\
|
||||
}\
|
||||
}\
|
||||
kk++;\
|
||||
}\
|
||||
if (ms == 0) break;\
|
||||
@ -2246,7 +2246,7 @@ stack_double(int* is_alloca, char** arg_alloc_base,
|
||||
if (kk->u.mem.prev_end.i == INVALID_STACK_INDEX || \
|
||||
((STACK_AT(kk->u.mem.prev_end.i)->u.mem.pstr != k->u.mem.pstr || STACK_AT(kk->u.mem.prev_start.i)->u.mem.pstr != STACK_AT(k->u.mem.prev_start.i)->u.mem.pstr) && (STACK_AT(k->u.mem.prev_start.i)->u.mem.pstr != k->u.mem.pstr || STACK_AT(kk->u.mem.prev_start.i)->u.mem.pstr != STACK_AT(kk->u.mem.prev_end.i)->u.mem.pstr))) {\
|
||||
goto stack_empty_check_mem_rec_not_empty;\
|
||||
}\
|
||||
}\
|
||||
else {\
|
||||
ms &= ~((MemStatusType )1 << k->zid);\
|
||||
break;\
|
||||
@ -2254,10 +2254,10 @@ stack_double(int* is_alloca, char** arg_alloc_base,
|
||||
}\
|
||||
else if (kk->type == STK_EMPTY_CHECK_START) {\
|
||||
if (kk->zid == (sid)) level++;\
|
||||
}\
|
||||
}\
|
||||
else if (kk->type == STK_EMPTY_CHECK_END) {\
|
||||
if (kk->zid == (sid)) level--;\
|
||||
}\
|
||||
}\
|
||||
kk++;\
|
||||
}\
|
||||
level = 0;\
|
||||
@ -3376,7 +3376,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
||||
STACK_PUSH_ALT(p, s);
|
||||
n = enclen(encode, s);
|
||||
DATA_ENSURE(n);
|
||||
if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
|
||||
if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
|
||||
s += n;
|
||||
}
|
||||
JUMP_OUT;
|
||||
@ -3408,7 +3408,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
||||
}
|
||||
n = enclen(encode, s);
|
||||
DATA_ENSURE(n);
|
||||
if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
|
||||
if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
|
||||
s += n;
|
||||
}
|
||||
}
|
||||
@ -3485,15 +3485,15 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
||||
else {
|
||||
UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
|
||||
if (ON_STR_END(s)) {
|
||||
if (! IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
|
||||
goto fail;
|
||||
if (! IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
|
||||
goto fail;
|
||||
}
|
||||
else {
|
||||
if (IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)
|
||||
== IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)
|
||||
== IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
}
|
||||
INC_OP;
|
||||
JUMP_OUT;
|
||||
@ -3510,15 +3510,15 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
||||
else {
|
||||
UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
|
||||
if (ON_STR_END(s)) {
|
||||
if (IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
|
||||
goto fail;
|
||||
if (IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
|
||||
goto fail;
|
||||
}
|
||||
else {
|
||||
if (IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)
|
||||
!= IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)
|
||||
!= IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
}
|
||||
INC_OP;
|
||||
JUMP_OUT;
|
||||
@ -3552,13 +3552,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
||||
if (! ON_STR_BEGIN(s)) {
|
||||
UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
|
||||
if (IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) {
|
||||
if (ON_STR_END(s) || ! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) {
|
||||
INC_OP;
|
||||
JUMP_OUT;
|
||||
if (ON_STR_END(s) || ! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) {
|
||||
INC_OP;
|
||||
JUMP_OUT;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
goto fail;
|
||||
#endif
|
||||
|
||||
@ -3619,11 +3619,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
||||
UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
|
||||
if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
|
||||
if (!IS_CRLF_NEWLINE(encode) || IS_LF_CODE(encode, sprev, end)) {
|
||||
INC_OP;
|
||||
JUMP_OUT;
|
||||
}
|
||||
INC_OP;
|
||||
JUMP_OUT;
|
||||
}
|
||||
}
|
||||
}
|
||||
goto fail;
|
||||
|
||||
CASE_OP(END_LINE)
|
||||
@ -3641,9 +3641,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
||||
}
|
||||
else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) {
|
||||
if (!IS_CRLF_NEWLINE(encode) || IS_CR_CODE(encode, s, end)) {
|
||||
INC_OP;
|
||||
JUMP_OUT;
|
||||
}
|
||||
INC_OP;
|
||||
JUMP_OUT;
|
||||
}
|
||||
}
|
||||
#ifdef USE_CRNL_AS_LINE_TERMINATOR
|
||||
else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
|
||||
@ -3877,9 +3877,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
||||
mems = tlen == 1 ? &(p->backref_general.n1) : p->backref_general.ns;
|
||||
|
||||
if (! backref_match_at_nested_level(reg, stk, stk_base, n,
|
||||
case_fold_flag, level, (int )tlen, mems, &s, end)) {
|
||||
case_fold_flag, level, (int )tlen, mems, &s, end)) {
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
}
|
||||
INC_OP;
|
||||
JUMP_OUT;
|
||||
@ -4133,8 +4133,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
||||
if (SubexpCallLimitInSearch != 0) {
|
||||
msa->subexp_call_in_search_counter++;
|
||||
#ifdef ONIG_DEBUG_MATCH_COUNTER
|
||||
if (p->call.called_mem < MAX_SUBEXP_CALL_COUNTERS)
|
||||
subexp_call_counters[p->call.called_mem]++;
|
||||
if (p->call.called_mem < MAX_SUBEXP_CALL_COUNTERS)
|
||||
subexp_call_counters[p->call.called_mem]++;
|
||||
if (msa->subexp_call_in_search_counter % 1000 == 0)
|
||||
MATCH_COUNTER_OUT("CALL");
|
||||
#endif
|
||||
@ -4210,7 +4210,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
||||
mem = p->cut_to_mark.id; /* mem: mark id */
|
||||
STACK_TO_VOID_TO_MARK(stkp, mem);
|
||||
if (p->cut_to_mark.restore_pos != 0) {
|
||||
s = stkp->u.val.v;
|
||||
s = stkp->u.val.v;
|
||||
}
|
||||
INC_OP;
|
||||
JUMP_OUT;
|
||||
@ -4370,8 +4370,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
||||
fail:
|
||||
#endif
|
||||
STACK_POP;
|
||||
p = stk->u.state.pcode;
|
||||
s = stk->u.state.pstr;
|
||||
p = stk->u.state.pcode;
|
||||
s = stk->u.state.pstr;
|
||||
CHECK_RETRY_LIMIT_IN_MATCH;
|
||||
JUMP_OUT;
|
||||
|
||||
@ -5175,7 +5175,7 @@ forward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* start,
|
||||
prev = onigenc_get_prev_char_head(reg->enc, (pprev ? pprev : str), p);
|
||||
if (IS_NOT_NULL(prev)) {
|
||||
if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
|
||||
goto retry_gate;
|
||||
goto retry_gate;
|
||||
} else if (IS_CRLF_NEWLINE(reg->enc) && !IS_LF_CODE(reg->enc, prev, end)) {
|
||||
goto retry_gate;
|
||||
}
|
||||
@ -5206,7 +5206,7 @@ forward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* start,
|
||||
}
|
||||
|
||||
if (reg->dist_max == 0) {
|
||||
*low = p;
|
||||
*low = p;
|
||||
*high = p;
|
||||
}
|
||||
else {
|
||||
@ -5277,14 +5277,14 @@ backward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
|
||||
prev = onigenc_get_prev_char_head(reg->enc, str, p);
|
||||
if (IS_NOT_NULL(prev)) {
|
||||
if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
|
||||
p = prev;
|
||||
goto retry;
|
||||
p = prev;
|
||||
goto retry;
|
||||
} else if (IS_CRLF_NEWLINE(reg->enc) && !IS_LF_CODE(reg->enc, prev, end)) {
|
||||
p = prev;
|
||||
goto retry;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case ANCR_END_LINE:
|
||||
@ -5301,7 +5301,7 @@ backward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
|
||||
#ifdef USE_CRNL_AS_LINE_TERMINATOR
|
||||
&& !ONIGENC_IS_MBC_CRNL(reg->enc, p, end)
|
||||
#endif
|
||||
) {
|
||||
) {
|
||||
p = onigenc_get_prev_char_head(reg->enc, adjrange, p);
|
||||
if (IS_NULL(p)) goto fail;
|
||||
goto retry;
|
||||
|
||||
@ -3714,15 +3714,15 @@ get_next_code_point(UChar** src, UChar* end, int base, OnigEncoding enc, int in_
|
||||
while (! PEND) {
|
||||
PFETCH(c);
|
||||
if (! IS_CODE_POINT_DIVIDE(c)) {
|
||||
if (c == '}') {
|
||||
*src = p;
|
||||
return 1; /* end of sequence */
|
||||
}
|
||||
else if (c == '-' && in_cc == TRUE) {
|
||||
*src = p;
|
||||
return 2; /* range */
|
||||
}
|
||||
PUNFETCH;
|
||||
if (c == '}') {
|
||||
*src = p;
|
||||
return 1; /* end of sequence */
|
||||
}
|
||||
else if (c == '-' && in_cc == TRUE) {
|
||||
*src = p;
|
||||
return 2; /* range */
|
||||
}
|
||||
PUNFETCH;
|
||||
break;
|
||||
}
|
||||
else {
|
||||
@ -5598,9 +5598,9 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
|
||||
goto end_buf;
|
||||
}
|
||||
else {
|
||||
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
|
||||
tok->type = TK_ANCHOR;
|
||||
tok->u.subtype = ANCR_SEMI_END_BUF;
|
||||
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
|
||||
tok->type = TK_ANCHOR;
|
||||
tok->u.subtype = ANCR_SEMI_END_BUF;
|
||||
}
|
||||
break;
|
||||
|
||||
@ -8460,7 +8460,7 @@ i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[], int to_len,
|
||||
if (index == 0)
|
||||
NODE_STATUS_ADD(csnode, IGNORECASE);
|
||||
else
|
||||
NODE_STRING_SET_CASE_EXPANDED(csnode);
|
||||
NODE_STRING_SET_CASE_EXPANDED(csnode);
|
||||
|
||||
ns[n++] = csnode;
|
||||
}
|
||||
|
||||
@ -104,28 +104,28 @@ onigenc_unicode_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag,
|
||||
#endif
|
||||
|
||||
if (CASE_FOLD_IS_NOT_ASCII_ONLY(flag) || ONIGENC_IS_ASCII_CODE(code)) {
|
||||
buk = onigenc_unicode_unfold_key(code);
|
||||
if (buk != 0) {
|
||||
if (buk->fold_len == 1) {
|
||||
buk = onigenc_unicode_unfold_key(code);
|
||||
if (buk != 0) {
|
||||
if (buk->fold_len == 1) {
|
||||
if (CASE_FOLD_IS_NOT_ASCII_ONLY(flag) ||
|
||||
ONIGENC_IS_ASCII_CODE(*FOLDS1_FOLD(buk->index)))
|
||||
return ONIGENC_CODE_TO_MBC(enc, *FOLDS1_FOLD(buk->index), fold);
|
||||
}
|
||||
else {
|
||||
OnigCodePoint* addr;
|
||||
|
||||
FOLDS_FOLD_ADDR_BUK(buk, addr);
|
||||
rlen = 0;
|
||||
for (i = 0; i < buk->fold_len; i++) {
|
||||
OnigCodePoint c = addr[i];
|
||||
len = ONIGENC_CODE_TO_MBC(enc, c, fold);
|
||||
fold += len;
|
||||
rlen += len;
|
||||
return ONIGENC_CODE_TO_MBC(enc, *FOLDS1_FOLD(buk->index), fold);
|
||||
}
|
||||
else {
|
||||
OnigCodePoint* addr;
|
||||
|
||||
FOLDS_FOLD_ADDR_BUK(buk, addr);
|
||||
rlen = 0;
|
||||
for (i = 0; i < buk->fold_len; i++) {
|
||||
OnigCodePoint c = addr[i];
|
||||
len = ONIGENC_CODE_TO_MBC(enc, c, fold);
|
||||
fold += len;
|
||||
rlen += len;
|
||||
}
|
||||
return rlen;
|
||||
}
|
||||
return rlen;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < len; i++) {
|
||||
*fold++ = *p++;
|
||||
@ -467,10 +467,10 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
|
||||
|
||||
if (CASE_FOLD_IS_NOT_ASCII_ONLY(flag) ||
|
||||
ONIGENC_IS_ASCII_CODE(*FOLDS1_FOLD(buk1->index))) {
|
||||
items[0].byte_len = lens[0];
|
||||
items[0].code_len = 1;
|
||||
items[0].code[0] = *FOLDS1_FOLD(buk1->index);
|
||||
n++;
|
||||
items[0].byte_len = lens[0];
|
||||
items[0].code_len = 1;
|
||||
items[0].code[0] = *FOLDS1_FOLD(buk1->index);
|
||||
n++;
|
||||
}
|
||||
|
||||
un = FOLDS1_UNFOLDS_NUM(buk1->index);
|
||||
@ -479,14 +479,14 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
|
||||
if (unfold != orig_codes[0]) {
|
||||
if (CASE_FOLD_IS_NOT_ASCII_ONLY(flag) ||
|
||||
ONIGENC_IS_ASCII_CODE(unfold)) {
|
||||
items[n].byte_len = lens[0];
|
||||
items[n].code_len = 1;
|
||||
items[n].code[0] = unfold;
|
||||
n++;
|
||||
items[n].byte_len = lens[0];
|
||||
items[n].code_len = 1;
|
||||
items[n].code[0] = unfold;
|
||||
n++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
|
||||
if (buk1->fold_len == 2) {
|
||||
m = FOLDS2_UNFOLDS_NUM(buk1->index);
|
||||
@ -572,10 +572,10 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
|
||||
for (i = 0; i < m; i++) {
|
||||
code = FOLDS1_UNFOLDS(index)[i];
|
||||
if (CASE_FOLD_IS_NOT_ASCII_ONLY(flag)||ONIGENC_IS_ASCII_CODE(code)) {
|
||||
items[n].byte_len = lens[0];
|
||||
items[n].code_len = 1;
|
||||
items[n].byte_len = lens[0];
|
||||
items[n].code_len = 1;
|
||||
items[n].code[0] = code;
|
||||
n++;
|
||||
n++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user