+ upd: Oniguruma current development

This commit is contained in:
Rainer Kottenhoff 2021-02-05 16:37:22 +01:00
parent 9ea9391200
commit de5fa9bb45
6 changed files with 304 additions and 166 deletions

View File

@ -330,8 +330,8 @@ Oniguruma API Version 6.9.7 2021/01/18
return value
normal: match position offset (i.e. p - str >= 0)
not found: ONIG_MISMATCH (< 0)
error: error code (< 0)
not found: ONIG_MISMATCH (< 0)
error: error code (< 0)
arguments
1 reg: regex object
@ -371,8 +371,8 @@ Oniguruma API Version 6.9.7 2021/01/18
return value
normal: match length (>= 0)
not match: ONIG_MISMATCH (< 0)
error: error code (< 0)
not match: ONIG_MISMATCH (< 0)
error: error code (< 0)
arguments
1 reg: regex object
@ -411,8 +411,8 @@ Oniguruma API Version 6.9.7 2021/01/18
return value
normal: number of matching times
error: error code
interruption: return value of callback function (!= 0)
error: error code
interruption: return value of callback function (!= 0)
arguments
1 reg: regex object
@ -511,8 +511,8 @@ Oniguruma API Version 6.9.7 2021/01/18
return value:
normal: index of match regex (zero origin)
not found: ONIG_MISMATCH (< 0)
error: error code (< 0)
not found: ONIG_MISMATCH (< 0)
error: error code (< 0)
arguments
1 set: regset object
@ -544,8 +544,8 @@ Oniguruma API Version 6.9.7 2021/01/18
return value:
normal: index of match regex (zero origin)
not found: ONIG_MISMATCH (< 0)
error: error code (< 0)
not found: ONIG_MISMATCH (< 0)
error: error code (< 0)
arguments
1 set: regset object
@ -620,7 +620,7 @@ Oniguruma API Version 6.9.7 2021/01/18
return value
normal: number of groups for the name.
(ex. /(?<x>..)(?<x>..)/ ==> 2)
(ex. /(?<x>..)(?<x>..)/ ==> 2)
name not found: -1
arguments

View File

@ -2,7 +2,7 @@
regcomp.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2020 K.Kosako
* Copyright (c) 2002-2021 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -3638,8 +3638,8 @@ node_min_byte_len(Node* node, ScanEnv* env)
case NODE_CALL:
{
Node* t = NODE_BODY(node);
if (NODE_IS_FIXED_MIN(t))
len = BAG_(t)->min_len;
if (NODE_IS_FIXED_MIN(t))
len = BAG_(t)->min_len;
else
len = node_min_byte_len(t, env);
}
@ -4452,19 +4452,21 @@ recursive_call_check_trav(Node* node, ScanEnv* env, int state)
BagNode* en = BAG_(node);
if (en->type == BAG_MEMORY) {
if (NODE_IS_CALLED(node) || (state & IN_RECURSION) != 0) {
if (NODE_IS_CALLED(node)) {
r = FOUND_CALLED_NODE;
goto check_recursion;
}
else if ((state & IN_RECURSION) != 0) {
check_recursion:
if (! NODE_IS_RECURSION(node)) {
NODE_STATUS_ADD(node, MARK1);
r = recursive_call_check(NODE_BODY(node));
if (r != 0) {
ret = recursive_call_check(NODE_BODY(node));
if (ret != 0) {
NODE_STATUS_ADD(node, RECURSION);
MEM_STATUS_ON(env->backtrack_mem, en->m.regnum);
}
NODE_STATUS_REMOVE(node, MARK1);
}
if (NODE_IS_CALLED(node))
r = FOUND_CALLED_NODE;
}
}
@ -6277,7 +6279,7 @@ concat_opt_exact(OptStr* to, OptStr* add, OnigEncoding enc)
for (j = 0; j < len && p < end; j++) {
/* coverity[overrun-local] */
to->s[i++] = *p++;
}
}
}
to->len = i;
@ -7326,7 +7328,7 @@ static int parse_and_tune(regex_t* reg, const UChar* pattern,
)
{
int r;
Node* root;
Node* root;
root = NULL_NODE;
if (IS_NOT_NULL(einfo)) {
@ -7825,6 +7827,128 @@ onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
}
#define MANY_REPEAT_OF_ANYCHAR 20
typedef enum {
MJ_NO = 0,
MJ_YES = 1,
MJ_IGNORE = 2,
} MJ_RESULT;
static MJ_RESULT
mostly_just_anychar(Node* node, int in_reluctant)
{
MJ_RESULT r;
r = MJ_NO;
switch (NODE_TYPE(node)) {
case NODE_LIST:
{
int found = FALSE;
do {
r = mostly_just_anychar(NODE_CAR(node), in_reluctant);
if (r == MJ_NO) break;
if (r == MJ_YES) found = TRUE;
} while (IS_NOT_NULL(node = NODE_CDR(node)));
if (r == MJ_IGNORE) {
if (found == TRUE) r = MJ_YES;
}
}
break;
case NODE_ALT:
r = MJ_IGNORE;
do {
r = mostly_just_anychar(NODE_CAR(node), in_reluctant);
if (r == MJ_YES) break;
} while (IS_NOT_NULL(node = NODE_CDR(node)));
break;
case NODE_QUANT:
{
QuantNode* qn = QUANT_(node);
if (qn->upper == 0)
r = MJ_IGNORE;
else {
if (in_reluctant == FALSE) {
if (qn->greedy != 0 &&
(! IS_INFINITE_REPEAT(qn->upper) &&
qn->upper <= MANY_REPEAT_OF_ANYCHAR)) {
in_reluctant = TRUE;
}
}
r = mostly_just_anychar(NODE_BODY(node), in_reluctant);
}
}
break;
case NODE_ANCHOR:
switch (ANCHOR_(node)->type) {
case ANCR_PREC_READ:
case ANCR_PREC_READ_NOT:
case ANCR_LOOK_BEHIND:
case ANCR_LOOK_BEHIND_NOT:
case ANCR_TEXT_SEGMENT_BOUNDARY: /* \y */
r = MJ_IGNORE;
break;
default:
break;
}
break;
case NODE_BAG:
{
BagNode* en = BAG_(node);
if (en->type == BAG_IF_ELSE) {
if (IS_NOT_NULL(en->te.Then)) {
r = mostly_just_anychar(en->te.Then, in_reluctant);
if (r == MJ_YES) break;
}
if (IS_NOT_NULL(en->te.Else)) {
r = mostly_just_anychar(en->te.Else, in_reluctant);
}
}
else {
r = mostly_just_anychar(NODE_BODY(node), in_reluctant);
}
}
break;
case NODE_CTYPE:
if (CTYPE_(node)->ctype == CTYPE_ANYCHAR)
r = MJ_YES;
else
r = MJ_NO;
break;
case NODE_STRING:
if (NODE_STRING_LEN(node) == 0) {
r = MJ_IGNORE;
break;
}
/* fall */
case NODE_CCLASS:
r = MJ_NO;
break;
#ifdef USE_CALL
case NODE_CALL:
/* ignore call */
#endif
case NODE_BACKREF:
case NODE_GIMMICK:
r = MJ_IGNORE;
break;
default:
break;
}
return r;
}
#define MAX_CALLS_IN_DETECT 10
typedef struct {
@ -7833,6 +7957,7 @@ typedef struct {
int backref;
int backref_with_level;
int call;
int anychar_reluctant_many;
int empty_check_nest_level;
int max_empty_check_nest_level;
int heavy_element;
@ -7856,17 +7981,28 @@ detect_can_be_slow(Node* node, SlowElementCount* ct, int ncall, int calls[])
case NODE_QUANT:
{
int prev_heavy_element;
QuantNode* qn;
Node* body;
if (QUANT_(node)->emptiness != BODY_IS_NOT_EMPTY) {
qn = QUANT_(node);
body = NODE_BODY(node);
if (qn->emptiness != BODY_IS_NOT_EMPTY) {
prev_heavy_element = ct->heavy_element;
ct->empty_check_nest_level++;
if (ct->empty_check_nest_level > ct->max_empty_check_nest_level)
ct->max_empty_check_nest_level = ct->empty_check_nest_level;
}
else if (IS_INFINITE_REPEAT(qn->upper) ||
qn->upper > MANY_REPEAT_OF_ANYCHAR) {
MJ_RESULT mr = mostly_just_anychar(body, (qn->greedy == 0));
if (mr == MJ_YES)
ct->anychar_reluctant_many++;
}
r = detect_can_be_slow(NODE_BODY(node), ct, ncall, calls);
r = detect_can_be_slow(body, ct, ncall, calls);
if (QUANT_(node)->emptiness != BODY_IS_NOT_EMPTY) {
if (qn->emptiness != BODY_IS_NOT_EMPTY) {
if (NODE_IS_INPEEK(node)) {
if (ct->empty_check_nest_level > 2) {
if (prev_heavy_element == ct->heavy_element)
@ -7933,7 +8069,7 @@ detect_can_be_slow(Node* node, SlowElementCount* ct, int ncall, int calls[])
int gnum;
gnum = CALL_(node)->called_gnum;
ct->call++;
ct->call++;
if (NODE_IS_RECURSION(node) && NODE_IS_INPEEK(node) &&
NODE_IS_IN_REAL_REPEAT(node)) {
@ -8005,24 +8141,26 @@ onig_detect_can_be_slow_pattern(const UChar* pattern,
}
#endif
count.prec_read = 0;
count.look_behind = 0;
count.backref = 0;
count.backref_with_level = 0;
count.call = 0;
count.prec_read = 0;
count.look_behind = 0;
count.backref = 0;
count.backref_with_level = 0;
count.call = 0;
count.anychar_reluctant_many = 0;
count.empty_check_nest_level = 0;
count.max_empty_check_nest_level = 0;
count.heavy_element = 0;
r = detect_can_be_slow(root, &count, 0, calls);
if (r == 0) {
int n = count.prec_read + count.look_behind
+ count.backref + count.backref_with_level + count.call;
if (r == 0) {
int n = count.prec_read + count.look_behind
+ count.backref + count.backref_with_level + count.call
+ count.anychar_reluctant_many;
if (count.heavy_element != 0)
n += count.heavy_element * 10;
r = n;
}
r = n;
}
if (IS_NOT_NULL(scan_env.mem_env_dynamic))
xfree(scan_env.mem_env_dynamic);
@ -8233,71 +8371,71 @@ print_indent_tree(FILE* f, Node* node, int indent)
case NODE_QUANT:
{
fprintf(f, "<quantifier:%p>{%d,%d}%s%s%s", node,
QUANT_(node)->lower, QUANT_(node)->upper,
(QUANT_(node)->greedy ? "" : "?"),
QUANT_(node)->lower, QUANT_(node)->upper,
(QUANT_(node)->greedy ? "" : "?"),
QUANT_(node)->include_referred == 0 ? "" : " referred",
emptiness_name[QUANT_(node)->emptiness]);
if (NODE_IS_INPEEK(node)) fprintf(f, ", in-peek");
fprintf(f, "\n");
print_indent_tree(f, NODE_BODY(node), indent + add);
print_indent_tree(f, NODE_BODY(node), indent + add);
}
break;
case NODE_BAG:
{
BagNode* bn = BAG_(node);
fprintf(f, "<bag:%p> ", node);
fprintf(f, "<bag:%p> ", node);
if (bn->type == BAG_IF_ELSE) {
Node* Then;
Node* Else;
Node* Then;
Node* Else;
fprintf(f, "if-else\n");
print_indent_tree(f, NODE_BODY(node), indent + add);
fprintf(f, "if-else\n");
print_indent_tree(f, NODE_BODY(node), indent + add);
Then = bn->te.Then;
Else = bn->te.Else;
if (IS_NULL(Then)) {
Indent(f, indent + add);
fprintf(f, "THEN empty\n");
Then = bn->te.Then;
Else = bn->te.Else;
if (IS_NULL(Then)) {
Indent(f, indent + add);
fprintf(f, "THEN empty\n");
}
else
print_indent_tree(f, Then, indent + add);
if (IS_NULL(Else)) {
Indent(f, indent + add);
fprintf(f, "ELSE empty\n");
}
else
print_indent_tree(f, Else, indent + add);
}
else
print_indent_tree(f, Then, indent + add);
if (IS_NULL(Else)) {
Indent(f, indent + add);
fprintf(f, "ELSE empty\n");
}
else
print_indent_tree(f, Else, indent + add);
}
else {
switch (bn->type) {
case BAG_OPTION:
case BAG_OPTION:
fprintf(f, "option:%d", bn->o.options);
break;
case BAG_MEMORY:
break;
case BAG_MEMORY:
fprintf(f, "memory:%d", bn->m.regnum);
if (NODE_IS_CALLED(node)) {
fprintf(f, ", called");
fprintf(f, ", called");
if (NODE_IS_RECURSION(node))
fprintf(f, ", recursion");
}
else if (NODE_IS_REFERENCED(node))
fprintf(f, ", referenced");
else if (NODE_IS_REFERENCED(node))
fprintf(f, ", referenced");
if (NODE_IS_FIXED_ADDR(node))
fprintf(f, ", fixed-addr");
if (NODE_IS_FIXED_ADDR(node))
fprintf(f, ", fixed-addr");
if ((bn->m.called_state & IN_PEEK) != 0)
fprintf(f, ", in-peek");
break;
case BAG_STOP_BACKTRACK:
fprintf(f, "stop-bt");
break;
default:
break;
}
fprintf(f, "\n");
print_indent_tree(f, NODE_BODY(node), indent + add);
break;
case BAG_STOP_BACKTRACK:
fprintf(f, "stop-bt");
break;
default:
break;
}
fprintf(f, "\n");
print_indent_tree(f, NODE_BODY(node), indent + add);
}
}
break;

View File

@ -122,7 +122,7 @@ struct PropertyNameCtype {
#define USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER
#define USE_UNICODE_WORD_BREAK
/* #define USE_UNICODE_CASE_FOLD_TURKISH_AZERI */
/* #define USE_UNICODE_ALL_LINE_TERMINATORS */ /* see Unicode.org UTS #18 */
/* #define USE_UNICODE_ALL_LINE_TERMINATORS */ /* see Unicode.org UTS #18 */
//~#define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_ASCII

View File

@ -2200,12 +2200,12 @@ stack_double(int* is_alloca, char** arg_alloc_base,
if (kk->u.mem.prev_end.i == INVALID_STACK_INDEX || \
((STACK_AT(kk->u.mem.prev_end.i)->u.mem.pstr != k->u.mem.pstr || STACK_AT(kk->u.mem.prev_start.i)->u.mem.pstr != STACK_AT(k->u.mem.prev_start.i)->u.mem.pstr) && (STACK_AT(k->u.mem.prev_start.i)->u.mem.pstr != k->u.mem.pstr || STACK_AT(kk->u.mem.prev_start.i)->u.mem.pstr != STACK_AT(kk->u.mem.prev_end.i)->u.mem.pstr))) {\
goto stack_empty_check_mem_not_empty;\
}\
}\
else {\
ms &= ~((MemStatusType )1 << k->zid);\
break;\
}\
}\
}\
}\
kk++;\
}\
if (ms == 0) break;\
@ -2246,7 +2246,7 @@ stack_double(int* is_alloca, char** arg_alloc_base,
if (kk->u.mem.prev_end.i == INVALID_STACK_INDEX || \
((STACK_AT(kk->u.mem.prev_end.i)->u.mem.pstr != k->u.mem.pstr || STACK_AT(kk->u.mem.prev_start.i)->u.mem.pstr != STACK_AT(k->u.mem.prev_start.i)->u.mem.pstr) && (STACK_AT(k->u.mem.prev_start.i)->u.mem.pstr != k->u.mem.pstr || STACK_AT(kk->u.mem.prev_start.i)->u.mem.pstr != STACK_AT(kk->u.mem.prev_end.i)->u.mem.pstr))) {\
goto stack_empty_check_mem_rec_not_empty;\
}\
}\
else {\
ms &= ~((MemStatusType )1 << k->zid);\
break;\
@ -2254,10 +2254,10 @@ stack_double(int* is_alloca, char** arg_alloc_base,
}\
else if (kk->type == STK_EMPTY_CHECK_START) {\
if (kk->zid == (sid)) level++;\
}\
}\
else if (kk->type == STK_EMPTY_CHECK_END) {\
if (kk->zid == (sid)) level--;\
}\
}\
kk++;\
}\
level = 0;\
@ -3376,7 +3376,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
STACK_PUSH_ALT(p, s);
n = enclen(encode, s);
DATA_ENSURE(n);
if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
s += n;
}
JUMP_OUT;
@ -3408,7 +3408,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
}
n = enclen(encode, s);
DATA_ENSURE(n);
if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
s += n;
}
}
@ -3485,15 +3485,15 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
else {
UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
if (ON_STR_END(s)) {
if (! IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
goto fail;
if (! IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
goto fail;
}
else {
if (IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)
== IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
goto fail;
}
}
else {
if (IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)
== IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
goto fail;
}
}
}
INC_OP;
JUMP_OUT;
@ -3510,15 +3510,15 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
else {
UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
if (ON_STR_END(s)) {
if (IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
goto fail;
if (IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
goto fail;
}
else {
if (IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)
!= IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
goto fail;
}
}
else {
if (IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)
!= IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
goto fail;
}
}
}
INC_OP;
JUMP_OUT;
@ -3552,13 +3552,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (! ON_STR_BEGIN(s)) {
UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
if (IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) {
if (ON_STR_END(s) || ! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) {
INC_OP;
JUMP_OUT;
if (ON_STR_END(s) || ! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) {
INC_OP;
JUMP_OUT;
}
}
}
}
}
goto fail;
#endif
@ -3619,11 +3619,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
if (!IS_CRLF_NEWLINE(encode) || IS_LF_CODE(encode, sprev, end)) {
INC_OP;
JUMP_OUT;
}
INC_OP;
JUMP_OUT;
}
}
}
goto fail;
CASE_OP(END_LINE)
@ -3641,9 +3641,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
}
else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) {
if (!IS_CRLF_NEWLINE(encode) || IS_CR_CODE(encode, s, end)) {
INC_OP;
JUMP_OUT;
}
INC_OP;
JUMP_OUT;
}
}
#ifdef USE_CRNL_AS_LINE_TERMINATOR
else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
@ -3877,9 +3877,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
mems = tlen == 1 ? &(p->backref_general.n1) : p->backref_general.ns;
if (! backref_match_at_nested_level(reg, stk, stk_base, n,
case_fold_flag, level, (int )tlen, mems, &s, end)) {
case_fold_flag, level, (int )tlen, mems, &s, end)) {
goto fail;
}
}
}
INC_OP;
JUMP_OUT;
@ -4133,8 +4133,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (SubexpCallLimitInSearch != 0) {
msa->subexp_call_in_search_counter++;
#ifdef ONIG_DEBUG_MATCH_COUNTER
if (p->call.called_mem < MAX_SUBEXP_CALL_COUNTERS)
subexp_call_counters[p->call.called_mem]++;
if (p->call.called_mem < MAX_SUBEXP_CALL_COUNTERS)
subexp_call_counters[p->call.called_mem]++;
if (msa->subexp_call_in_search_counter % 1000 == 0)
MATCH_COUNTER_OUT("CALL");
#endif
@ -4210,7 +4210,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
mem = p->cut_to_mark.id; /* mem: mark id */
STACK_TO_VOID_TO_MARK(stkp, mem);
if (p->cut_to_mark.restore_pos != 0) {
s = stkp->u.val.v;
s = stkp->u.val.v;
}
INC_OP;
JUMP_OUT;
@ -4370,8 +4370,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
fail:
#endif
STACK_POP;
p = stk->u.state.pcode;
s = stk->u.state.pstr;
p = stk->u.state.pcode;
s = stk->u.state.pstr;
CHECK_RETRY_LIMIT_IN_MATCH;
JUMP_OUT;
@ -5175,7 +5175,7 @@ forward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* start,
prev = onigenc_get_prev_char_head(reg->enc, (pprev ? pprev : str), p);
if (IS_NOT_NULL(prev)) {
if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
goto retry_gate;
goto retry_gate;
} else if (IS_CRLF_NEWLINE(reg->enc) && !IS_LF_CODE(reg->enc, prev, end)) {
goto retry_gate;
}
@ -5206,7 +5206,7 @@ forward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* start,
}
if (reg->dist_max == 0) {
*low = p;
*low = p;
*high = p;
}
else {
@ -5277,14 +5277,14 @@ backward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
prev = onigenc_get_prev_char_head(reg->enc, str, p);
if (IS_NOT_NULL(prev)) {
if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
p = prev;
goto retry;
p = prev;
goto retry;
} else if (IS_CRLF_NEWLINE(reg->enc) && !IS_LF_CODE(reg->enc, prev, end)) {
p = prev;
goto retry;
}
}
}
}
break;
case ANCR_END_LINE:
@ -5301,7 +5301,7 @@ backward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
#ifdef USE_CRNL_AS_LINE_TERMINATOR
&& !ONIGENC_IS_MBC_CRNL(reg->enc, p, end)
#endif
) {
) {
p = onigenc_get_prev_char_head(reg->enc, adjrange, p);
if (IS_NULL(p)) goto fail;
goto retry;

View File

@ -3714,15 +3714,15 @@ get_next_code_point(UChar** src, UChar* end, int base, OnigEncoding enc, int in_
while (! PEND) {
PFETCH(c);
if (! IS_CODE_POINT_DIVIDE(c)) {
if (c == '}') {
*src = p;
return 1; /* end of sequence */
}
else if (c == '-' && in_cc == TRUE) {
*src = p;
return 2; /* range */
}
PUNFETCH;
if (c == '}') {
*src = p;
return 1; /* end of sequence */
}
else if (c == '-' && in_cc == TRUE) {
*src = p;
return 2; /* range */
}
PUNFETCH;
break;
}
else {
@ -5598,9 +5598,9 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
goto end_buf;
}
else {
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
tok->type = TK_ANCHOR;
tok->u.subtype = ANCR_SEMI_END_BUF;
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
tok->type = TK_ANCHOR;
tok->u.subtype = ANCR_SEMI_END_BUF;
}
break;
@ -8460,7 +8460,7 @@ i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[], int to_len,
if (index == 0)
NODE_STATUS_ADD(csnode, IGNORECASE);
else
NODE_STRING_SET_CASE_EXPANDED(csnode);
NODE_STRING_SET_CASE_EXPANDED(csnode);
ns[n++] = csnode;
}

View File

@ -104,28 +104,28 @@ onigenc_unicode_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag,
#endif
if (CASE_FOLD_IS_NOT_ASCII_ONLY(flag) || ONIGENC_IS_ASCII_CODE(code)) {
buk = onigenc_unicode_unfold_key(code);
if (buk != 0) {
if (buk->fold_len == 1) {
buk = onigenc_unicode_unfold_key(code);
if (buk != 0) {
if (buk->fold_len == 1) {
if (CASE_FOLD_IS_NOT_ASCII_ONLY(flag) ||
ONIGENC_IS_ASCII_CODE(*FOLDS1_FOLD(buk->index)))
return ONIGENC_CODE_TO_MBC(enc, *FOLDS1_FOLD(buk->index), fold);
}
else {
OnigCodePoint* addr;
FOLDS_FOLD_ADDR_BUK(buk, addr);
rlen = 0;
for (i = 0; i < buk->fold_len; i++) {
OnigCodePoint c = addr[i];
len = ONIGENC_CODE_TO_MBC(enc, c, fold);
fold += len;
rlen += len;
return ONIGENC_CODE_TO_MBC(enc, *FOLDS1_FOLD(buk->index), fold);
}
else {
OnigCodePoint* addr;
FOLDS_FOLD_ADDR_BUK(buk, addr);
rlen = 0;
for (i = 0; i < buk->fold_len; i++) {
OnigCodePoint c = addr[i];
len = ONIGENC_CODE_TO_MBC(enc, c, fold);
fold += len;
rlen += len;
}
return rlen;
}
return rlen;
}
}
}
for (i = 0; i < len; i++) {
*fold++ = *p++;
@ -467,10 +467,10 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
if (CASE_FOLD_IS_NOT_ASCII_ONLY(flag) ||
ONIGENC_IS_ASCII_CODE(*FOLDS1_FOLD(buk1->index))) {
items[0].byte_len = lens[0];
items[0].code_len = 1;
items[0].code[0] = *FOLDS1_FOLD(buk1->index);
n++;
items[0].byte_len = lens[0];
items[0].code_len = 1;
items[0].code[0] = *FOLDS1_FOLD(buk1->index);
n++;
}
un = FOLDS1_UNFOLDS_NUM(buk1->index);
@ -479,14 +479,14 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
if (unfold != orig_codes[0]) {
if (CASE_FOLD_IS_NOT_ASCII_ONLY(flag) ||
ONIGENC_IS_ASCII_CODE(unfold)) {
items[n].byte_len = lens[0];
items[n].code_len = 1;
items[n].code[0] = unfold;
n++;
items[n].byte_len = lens[0];
items[n].code_len = 1;
items[n].code[0] = unfold;
n++;
}
}
}
}
}
else if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
if (buk1->fold_len == 2) {
m = FOLDS2_UNFOLDS_NUM(buk1->index);
@ -572,10 +572,10 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
for (i = 0; i < m; i++) {
code = FOLDS1_UNFOLDS(index)[i];
if (CASE_FOLD_IS_NOT_ASCII_ONLY(flag)||ONIGENC_IS_ASCII_CODE(code)) {
items[n].byte_len = lens[0];
items[n].code_len = 1;
items[n].byte_len = lens[0];
items[n].code_len = 1;
items[n].code[0] = code;
n++;
n++;
}
}
}