diff --git a/scintilla/oniguruma/doc/API b/scintilla/oniguruma/doc/API index bd0186196..2ebf9d89b 100644 --- a/scintilla/oniguruma/doc/API +++ b/scintilla/oniguruma/doc/API @@ -330,8 +330,8 @@ Oniguruma API Version 6.9.7 2021/01/18 return value normal: match position offset (i.e. p - str >= 0) - not found: ONIG_MISMATCH (< 0) - error: error code (< 0) + not found: ONIG_MISMATCH (< 0) + error: error code (< 0) arguments 1 reg: regex object @@ -371,8 +371,8 @@ Oniguruma API Version 6.9.7 2021/01/18 return value normal: match length (>= 0) - not match: ONIG_MISMATCH (< 0) - error: error code (< 0) + not match: ONIG_MISMATCH (< 0) + error: error code (< 0) arguments 1 reg: regex object @@ -411,8 +411,8 @@ Oniguruma API Version 6.9.7 2021/01/18 return value normal: number of matching times - error: error code - interruption: return value of callback function (!= 0) + error: error code + interruption: return value of callback function (!= 0) arguments 1 reg: regex object @@ -511,8 +511,8 @@ Oniguruma API Version 6.9.7 2021/01/18 return value: normal: index of match regex (zero origin) - not found: ONIG_MISMATCH (< 0) - error: error code (< 0) + not found: ONIG_MISMATCH (< 0) + error: error code (< 0) arguments 1 set: regset object @@ -544,8 +544,8 @@ Oniguruma API Version 6.9.7 2021/01/18 return value: normal: index of match regex (zero origin) - not found: ONIG_MISMATCH (< 0) - error: error code (< 0) + not found: ONIG_MISMATCH (< 0) + error: error code (< 0) arguments 1 set: regset object @@ -620,7 +620,7 @@ Oniguruma API Version 6.9.7 2021/01/18 return value normal: number of groups for the name. - (ex. /(?..)(?..)/ ==> 2) + (ex. /(?..)(?..)/ ==> 2) name not found: -1 arguments diff --git a/scintilla/oniguruma/src/regcomp.c b/scintilla/oniguruma/src/regcomp.c index 1ace39ed5..307b0cf72 100644 --- a/scintilla/oniguruma/src/regcomp.c +++ b/scintilla/oniguruma/src/regcomp.c @@ -2,7 +2,7 @@ regcomp.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2020 K.Kosako + * Copyright (c) 2002-2021 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -3638,8 +3638,8 @@ node_min_byte_len(Node* node, ScanEnv* env) case NODE_CALL: { Node* t = NODE_BODY(node); - if (NODE_IS_FIXED_MIN(t)) - len = BAG_(t)->min_len; + if (NODE_IS_FIXED_MIN(t)) + len = BAG_(t)->min_len; else len = node_min_byte_len(t, env); } @@ -4452,19 +4452,21 @@ recursive_call_check_trav(Node* node, ScanEnv* env, int state) BagNode* en = BAG_(node); if (en->type == BAG_MEMORY) { - if (NODE_IS_CALLED(node) || (state & IN_RECURSION) != 0) { + if (NODE_IS_CALLED(node)) { + r = FOUND_CALLED_NODE; + goto check_recursion; + } + else if ((state & IN_RECURSION) != 0) { + check_recursion: if (! NODE_IS_RECURSION(node)) { NODE_STATUS_ADD(node, MARK1); - r = recursive_call_check(NODE_BODY(node)); - if (r != 0) { + ret = recursive_call_check(NODE_BODY(node)); + if (ret != 0) { NODE_STATUS_ADD(node, RECURSION); MEM_STATUS_ON(env->backtrack_mem, en->m.regnum); } NODE_STATUS_REMOVE(node, MARK1); } - - if (NODE_IS_CALLED(node)) - r = FOUND_CALLED_NODE; } } @@ -6277,7 +6279,7 @@ concat_opt_exact(OptStr* to, OptStr* add, OnigEncoding enc) for (j = 0; j < len && p < end; j++) { /* coverity[overrun-local] */ to->s[i++] = *p++; - } + } } to->len = i; @@ -7326,7 +7328,7 @@ static int parse_and_tune(regex_t* reg, const UChar* pattern, ) { int r; - Node* root; + Node* root; root = NULL_NODE; if (IS_NOT_NULL(einfo)) { @@ -7825,6 +7827,128 @@ onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc) } +#define MANY_REPEAT_OF_ANYCHAR 20 + +typedef enum { + MJ_NO = 0, + MJ_YES = 1, + MJ_IGNORE = 2, +} MJ_RESULT; + +static MJ_RESULT +mostly_just_anychar(Node* node, int in_reluctant) +{ + MJ_RESULT r; + + r = MJ_NO; + switch (NODE_TYPE(node)) { + case NODE_LIST: + { + int found = FALSE; + do { + r = mostly_just_anychar(NODE_CAR(node), in_reluctant); + if (r == MJ_NO) break; + if (r == MJ_YES) found = TRUE; + } while (IS_NOT_NULL(node = NODE_CDR(node))); + if (r == MJ_IGNORE) { + if (found == TRUE) r = MJ_YES; + } + } + break; + + case NODE_ALT: + r = MJ_IGNORE; + do { + r = mostly_just_anychar(NODE_CAR(node), in_reluctant); + if (r == MJ_YES) break; + } while (IS_NOT_NULL(node = NODE_CDR(node))); + break; + + case NODE_QUANT: + { + QuantNode* qn = QUANT_(node); + + if (qn->upper == 0) + r = MJ_IGNORE; + else { + if (in_reluctant == FALSE) { + if (qn->greedy != 0 && + (! IS_INFINITE_REPEAT(qn->upper) && + qn->upper <= MANY_REPEAT_OF_ANYCHAR)) { + in_reluctant = TRUE; + } + } + r = mostly_just_anychar(NODE_BODY(node), in_reluctant); + } + } + break; + + case NODE_ANCHOR: + switch (ANCHOR_(node)->type) { + case ANCR_PREC_READ: + case ANCR_PREC_READ_NOT: + case ANCR_LOOK_BEHIND: + case ANCR_LOOK_BEHIND_NOT: + case ANCR_TEXT_SEGMENT_BOUNDARY: /* \y */ + r = MJ_IGNORE; + break; + default: + break; + } + break; + + case NODE_BAG: + { + BagNode* en = BAG_(node); + + if (en->type == BAG_IF_ELSE) { + if (IS_NOT_NULL(en->te.Then)) { + r = mostly_just_anychar(en->te.Then, in_reluctant); + if (r == MJ_YES) break; + } + if (IS_NOT_NULL(en->te.Else)) { + r = mostly_just_anychar(en->te.Else, in_reluctant); + } + } + else { + r = mostly_just_anychar(NODE_BODY(node), in_reluctant); + } + } + break; + + case NODE_CTYPE: + if (CTYPE_(node)->ctype == CTYPE_ANYCHAR) + r = MJ_YES; + else + r = MJ_NO; + break; + + case NODE_STRING: + if (NODE_STRING_LEN(node) == 0) { + r = MJ_IGNORE; + break; + } + /* fall */ + case NODE_CCLASS: + r = MJ_NO; + break; + +#ifdef USE_CALL + case NODE_CALL: + /* ignore call */ +#endif + case NODE_BACKREF: + case NODE_GIMMICK: + r = MJ_IGNORE; + break; + + default: + break; + } + + return r; +} + #define MAX_CALLS_IN_DETECT 10 typedef struct { @@ -7833,6 +7957,7 @@ typedef struct { int backref; int backref_with_level; int call; + int anychar_reluctant_many; int empty_check_nest_level; int max_empty_check_nest_level; int heavy_element; @@ -7856,17 +7981,28 @@ detect_can_be_slow(Node* node, SlowElementCount* ct, int ncall, int calls[]) case NODE_QUANT: { int prev_heavy_element; + QuantNode* qn; + Node* body; - if (QUANT_(node)->emptiness != BODY_IS_NOT_EMPTY) { + qn = QUANT_(node); + body = NODE_BODY(node); + + if (qn->emptiness != BODY_IS_NOT_EMPTY) { prev_heavy_element = ct->heavy_element; ct->empty_check_nest_level++; if (ct->empty_check_nest_level > ct->max_empty_check_nest_level) ct->max_empty_check_nest_level = ct->empty_check_nest_level; } + else if (IS_INFINITE_REPEAT(qn->upper) || + qn->upper > MANY_REPEAT_OF_ANYCHAR) { + MJ_RESULT mr = mostly_just_anychar(body, (qn->greedy == 0)); + if (mr == MJ_YES) + ct->anychar_reluctant_many++; + } - r = detect_can_be_slow(NODE_BODY(node), ct, ncall, calls); + r = detect_can_be_slow(body, ct, ncall, calls); - if (QUANT_(node)->emptiness != BODY_IS_NOT_EMPTY) { + if (qn->emptiness != BODY_IS_NOT_EMPTY) { if (NODE_IS_INPEEK(node)) { if (ct->empty_check_nest_level > 2) { if (prev_heavy_element == ct->heavy_element) @@ -7933,7 +8069,7 @@ detect_can_be_slow(Node* node, SlowElementCount* ct, int ncall, int calls[]) int gnum; gnum = CALL_(node)->called_gnum; - ct->call++; + ct->call++; if (NODE_IS_RECURSION(node) && NODE_IS_INPEEK(node) && NODE_IS_IN_REAL_REPEAT(node)) { @@ -8005,24 +8141,26 @@ onig_detect_can_be_slow_pattern(const UChar* pattern, } #endif - count.prec_read = 0; - count.look_behind = 0; - count.backref = 0; - count.backref_with_level = 0; - count.call = 0; + count.prec_read = 0; + count.look_behind = 0; + count.backref = 0; + count.backref_with_level = 0; + count.call = 0; + count.anychar_reluctant_many = 0; count.empty_check_nest_level = 0; count.max_empty_check_nest_level = 0; count.heavy_element = 0; r = detect_can_be_slow(root, &count, 0, calls); - if (r == 0) { - int n = count.prec_read + count.look_behind - + count.backref + count.backref_with_level + count.call; + if (r == 0) { + int n = count.prec_read + count.look_behind + + count.backref + count.backref_with_level + count.call + + count.anychar_reluctant_many; if (count.heavy_element != 0) n += count.heavy_element * 10; - r = n; - } + r = n; + } if (IS_NOT_NULL(scan_env.mem_env_dynamic)) xfree(scan_env.mem_env_dynamic); @@ -8233,71 +8371,71 @@ print_indent_tree(FILE* f, Node* node, int indent) case NODE_QUANT: { fprintf(f, "{%d,%d}%s%s%s", node, - QUANT_(node)->lower, QUANT_(node)->upper, - (QUANT_(node)->greedy ? "" : "?"), + QUANT_(node)->lower, QUANT_(node)->upper, + (QUANT_(node)->greedy ? "" : "?"), QUANT_(node)->include_referred == 0 ? "" : " referred", emptiness_name[QUANT_(node)->emptiness]); if (NODE_IS_INPEEK(node)) fprintf(f, ", in-peek"); fprintf(f, "\n"); - print_indent_tree(f, NODE_BODY(node), indent + add); + print_indent_tree(f, NODE_BODY(node), indent + add); } break; case NODE_BAG: { BagNode* bn = BAG_(node); - fprintf(f, " ", node); + fprintf(f, " ", node); if (bn->type == BAG_IF_ELSE) { - Node* Then; - Node* Else; + Node* Then; + Node* Else; - fprintf(f, "if-else\n"); - print_indent_tree(f, NODE_BODY(node), indent + add); + fprintf(f, "if-else\n"); + print_indent_tree(f, NODE_BODY(node), indent + add); - Then = bn->te.Then; - Else = bn->te.Else; - if (IS_NULL(Then)) { - Indent(f, indent + add); - fprintf(f, "THEN empty\n"); + Then = bn->te.Then; + Else = bn->te.Else; + if (IS_NULL(Then)) { + Indent(f, indent + add); + fprintf(f, "THEN empty\n"); + } + else + print_indent_tree(f, Then, indent + add); + + if (IS_NULL(Else)) { + Indent(f, indent + add); + fprintf(f, "ELSE empty\n"); + } + else + print_indent_tree(f, Else, indent + add); } - else - print_indent_tree(f, Then, indent + add); - - if (IS_NULL(Else)) { - Indent(f, indent + add); - fprintf(f, "ELSE empty\n"); - } - else - print_indent_tree(f, Else, indent + add); - } else { switch (bn->type) { - case BAG_OPTION: + case BAG_OPTION: fprintf(f, "option:%d", bn->o.options); - break; - case BAG_MEMORY: + break; + case BAG_MEMORY: fprintf(f, "memory:%d", bn->m.regnum); if (NODE_IS_CALLED(node)) { - fprintf(f, ", called"); + fprintf(f, ", called"); if (NODE_IS_RECURSION(node)) fprintf(f, ", recursion"); } - else if (NODE_IS_REFERENCED(node)) - fprintf(f, ", referenced"); + else if (NODE_IS_REFERENCED(node)) + fprintf(f, ", referenced"); - if (NODE_IS_FIXED_ADDR(node)) - fprintf(f, ", fixed-addr"); + if (NODE_IS_FIXED_ADDR(node)) + fprintf(f, ", fixed-addr"); if ((bn->m.called_state & IN_PEEK) != 0) fprintf(f, ", in-peek"); - break; - case BAG_STOP_BACKTRACK: - fprintf(f, "stop-bt"); - break; - default: - break; - } - fprintf(f, "\n"); - print_indent_tree(f, NODE_BODY(node), indent + add); + break; + case BAG_STOP_BACKTRACK: + fprintf(f, "stop-bt"); + break; + default: + break; + } + fprintf(f, "\n"); + print_indent_tree(f, NODE_BODY(node), indent + add); } } break; diff --git a/scintilla/oniguruma/src/regenc.h b/scintilla/oniguruma/src/regenc.h index 9d3b3ae15..c98399ebb 100644 --- a/scintilla/oniguruma/src/regenc.h +++ b/scintilla/oniguruma/src/regenc.h @@ -122,7 +122,7 @@ struct PropertyNameCtype { #define USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER #define USE_UNICODE_WORD_BREAK /* #define USE_UNICODE_CASE_FOLD_TURKISH_AZERI */ -/* #define USE_UNICODE_ALL_LINE_TERMINATORS */ /* see Unicode.org UTS #18 */ +/* #define USE_UNICODE_ALL_LINE_TERMINATORS */ /* see Unicode.org UTS #18 */ //~#define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_ASCII diff --git a/scintilla/oniguruma/src/regexec.c b/scintilla/oniguruma/src/regexec.c index 4d2a22566..57c751a84 100644 --- a/scintilla/oniguruma/src/regexec.c +++ b/scintilla/oniguruma/src/regexec.c @@ -2200,12 +2200,12 @@ stack_double(int* is_alloca, char** arg_alloc_base, if (kk->u.mem.prev_end.i == INVALID_STACK_INDEX || \ ((STACK_AT(kk->u.mem.prev_end.i)->u.mem.pstr != k->u.mem.pstr || STACK_AT(kk->u.mem.prev_start.i)->u.mem.pstr != STACK_AT(k->u.mem.prev_start.i)->u.mem.pstr) && (STACK_AT(k->u.mem.prev_start.i)->u.mem.pstr != k->u.mem.pstr || STACK_AT(kk->u.mem.prev_start.i)->u.mem.pstr != STACK_AT(kk->u.mem.prev_end.i)->u.mem.pstr))) {\ goto stack_empty_check_mem_not_empty;\ - }\ + }\ else {\ ms &= ~((MemStatusType )1 << k->zid);\ break;\ - }\ - }\ + }\ + }\ kk++;\ }\ if (ms == 0) break;\ @@ -2246,7 +2246,7 @@ stack_double(int* is_alloca, char** arg_alloc_base, if (kk->u.mem.prev_end.i == INVALID_STACK_INDEX || \ ((STACK_AT(kk->u.mem.prev_end.i)->u.mem.pstr != k->u.mem.pstr || STACK_AT(kk->u.mem.prev_start.i)->u.mem.pstr != STACK_AT(k->u.mem.prev_start.i)->u.mem.pstr) && (STACK_AT(k->u.mem.prev_start.i)->u.mem.pstr != k->u.mem.pstr || STACK_AT(kk->u.mem.prev_start.i)->u.mem.pstr != STACK_AT(kk->u.mem.prev_end.i)->u.mem.pstr))) {\ goto stack_empty_check_mem_rec_not_empty;\ - }\ + }\ else {\ ms &= ~((MemStatusType )1 << k->zid);\ break;\ @@ -2254,10 +2254,10 @@ stack_double(int* is_alloca, char** arg_alloc_base, }\ else if (kk->type == STK_EMPTY_CHECK_START) {\ if (kk->zid == (sid)) level++;\ - }\ + }\ else if (kk->type == STK_EMPTY_CHECK_END) {\ if (kk->zid == (sid)) level--;\ - }\ + }\ kk++;\ }\ level = 0;\ @@ -3376,7 +3376,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, STACK_PUSH_ALT(p, s); n = enclen(encode, s); DATA_ENSURE(n); - if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; + if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; s += n; } JUMP_OUT; @@ -3408,7 +3408,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } n = enclen(encode, s); DATA_ENSURE(n); - if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; + if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; s += n; } } @@ -3485,15 +3485,15 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, else { UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); if (ON_STR_END(s)) { - if (! IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) - goto fail; + if (! IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) + goto fail; + } + else { + if (IS_MBC_WORD_ASCII_MODE(encode, s, end, mode) + == IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) + goto fail; + } } - else { - if (IS_MBC_WORD_ASCII_MODE(encode, s, end, mode) - == IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) - goto fail; - } - } } INC_OP; JUMP_OUT; @@ -3510,15 +3510,15 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, else { UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); if (ON_STR_END(s)) { - if (IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) - goto fail; + if (IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) + goto fail; + } + else { + if (IS_MBC_WORD_ASCII_MODE(encode, s, end, mode) + != IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) + goto fail; + } } - else { - if (IS_MBC_WORD_ASCII_MODE(encode, s, end, mode) - != IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) - goto fail; - } - } } INC_OP; JUMP_OUT; @@ -3552,13 +3552,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (! ON_STR_BEGIN(s)) { UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); if (IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) { - if (ON_STR_END(s) || ! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) { - INC_OP; - JUMP_OUT; + if (ON_STR_END(s) || ! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) { + INC_OP; + JUMP_OUT; + } } } } - } goto fail; #endif @@ -3619,11 +3619,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) { if (!IS_CRLF_NEWLINE(encode) || IS_LF_CODE(encode, sprev, end)) { - INC_OP; - JUMP_OUT; - } + INC_OP; + JUMP_OUT; } } + } goto fail; CASE_OP(END_LINE) @@ -3641,9 +3641,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) { if (!IS_CRLF_NEWLINE(encode) || IS_CR_CODE(encode, s, end)) { - INC_OP; - JUMP_OUT; - } + INC_OP; + JUMP_OUT; + } } #ifdef USE_CRNL_AS_LINE_TERMINATOR else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) { @@ -3877,9 +3877,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, mems = tlen == 1 ? &(p->backref_general.n1) : p->backref_general.ns; if (! backref_match_at_nested_level(reg, stk, stk_base, n, - case_fold_flag, level, (int )tlen, mems, &s, end)) { + case_fold_flag, level, (int )tlen, mems, &s, end)) { goto fail; - } + } } INC_OP; JUMP_OUT; @@ -4133,8 +4133,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (SubexpCallLimitInSearch != 0) { msa->subexp_call_in_search_counter++; #ifdef ONIG_DEBUG_MATCH_COUNTER - if (p->call.called_mem < MAX_SUBEXP_CALL_COUNTERS) - subexp_call_counters[p->call.called_mem]++; + if (p->call.called_mem < MAX_SUBEXP_CALL_COUNTERS) + subexp_call_counters[p->call.called_mem]++; if (msa->subexp_call_in_search_counter % 1000 == 0) MATCH_COUNTER_OUT("CALL"); #endif @@ -4210,7 +4210,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, mem = p->cut_to_mark.id; /* mem: mark id */ STACK_TO_VOID_TO_MARK(stkp, mem); if (p->cut_to_mark.restore_pos != 0) { - s = stkp->u.val.v; + s = stkp->u.val.v; } INC_OP; JUMP_OUT; @@ -4370,8 +4370,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, fail: #endif STACK_POP; - p = stk->u.state.pcode; - s = stk->u.state.pstr; + p = stk->u.state.pcode; + s = stk->u.state.pstr; CHECK_RETRY_LIMIT_IN_MATCH; JUMP_OUT; @@ -5175,7 +5175,7 @@ forward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* start, prev = onigenc_get_prev_char_head(reg->enc, (pprev ? pprev : str), p); if (IS_NOT_NULL(prev)) { if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) { - goto retry_gate; + goto retry_gate; } else if (IS_CRLF_NEWLINE(reg->enc) && !IS_LF_CODE(reg->enc, prev, end)) { goto retry_gate; } @@ -5206,7 +5206,7 @@ forward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* start, } if (reg->dist_max == 0) { - *low = p; + *low = p; *high = p; } else { @@ -5277,14 +5277,14 @@ backward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* s, prev = onigenc_get_prev_char_head(reg->enc, str, p); if (IS_NOT_NULL(prev)) { if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) { - p = prev; - goto retry; + p = prev; + goto retry; } else if (IS_CRLF_NEWLINE(reg->enc) && !IS_LF_CODE(reg->enc, prev, end)) { p = prev; goto retry; - } } } + } break; case ANCR_END_LINE: @@ -5301,7 +5301,7 @@ backward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* s, #ifdef USE_CRNL_AS_LINE_TERMINATOR && !ONIGENC_IS_MBC_CRNL(reg->enc, p, end) #endif - ) { + ) { p = onigenc_get_prev_char_head(reg->enc, adjrange, p); if (IS_NULL(p)) goto fail; goto retry; diff --git a/scintilla/oniguruma/src/regparse.c b/scintilla/oniguruma/src/regparse.c index 5da8ef265..0714b8cd1 100644 --- a/scintilla/oniguruma/src/regparse.c +++ b/scintilla/oniguruma/src/regparse.c @@ -3714,15 +3714,15 @@ get_next_code_point(UChar** src, UChar* end, int base, OnigEncoding enc, int in_ while (! PEND) { PFETCH(c); if (! IS_CODE_POINT_DIVIDE(c)) { - if (c == '}') { - *src = p; - return 1; /* end of sequence */ - } - else if (c == '-' && in_cc == TRUE) { - *src = p; - return 2; /* range */ - } - PUNFETCH; + if (c == '}') { + *src = p; + return 1; /* end of sequence */ + } + else if (c == '-' && in_cc == TRUE) { + *src = p; + return 2; /* range */ + } + PUNFETCH; break; } else { @@ -5598,9 +5598,9 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env) goto end_buf; } else { - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break; - tok->type = TK_ANCHOR; - tok->u.subtype = ANCR_SEMI_END_BUF; + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break; + tok->type = TK_ANCHOR; + tok->u.subtype = ANCR_SEMI_END_BUF; } break; @@ -8460,7 +8460,7 @@ i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[], int to_len, if (index == 0) NODE_STATUS_ADD(csnode, IGNORECASE); else - NODE_STRING_SET_CASE_EXPANDED(csnode); + NODE_STRING_SET_CASE_EXPANDED(csnode); ns[n++] = csnode; } diff --git a/scintilla/oniguruma/src/unicode.c b/scintilla/oniguruma/src/unicode.c index 276eb5224..ceda0a1c7 100644 --- a/scintilla/oniguruma/src/unicode.c +++ b/scintilla/oniguruma/src/unicode.c @@ -104,28 +104,28 @@ onigenc_unicode_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag, #endif if (CASE_FOLD_IS_NOT_ASCII_ONLY(flag) || ONIGENC_IS_ASCII_CODE(code)) { - buk = onigenc_unicode_unfold_key(code); - if (buk != 0) { - if (buk->fold_len == 1) { + buk = onigenc_unicode_unfold_key(code); + if (buk != 0) { + if (buk->fold_len == 1) { if (CASE_FOLD_IS_NOT_ASCII_ONLY(flag) || ONIGENC_IS_ASCII_CODE(*FOLDS1_FOLD(buk->index))) - return ONIGENC_CODE_TO_MBC(enc, *FOLDS1_FOLD(buk->index), fold); - } - else { - OnigCodePoint* addr; - - FOLDS_FOLD_ADDR_BUK(buk, addr); - rlen = 0; - for (i = 0; i < buk->fold_len; i++) { - OnigCodePoint c = addr[i]; - len = ONIGENC_CODE_TO_MBC(enc, c, fold); - fold += len; - rlen += len; + return ONIGENC_CODE_TO_MBC(enc, *FOLDS1_FOLD(buk->index), fold); + } + else { + OnigCodePoint* addr; + + FOLDS_FOLD_ADDR_BUK(buk, addr); + rlen = 0; + for (i = 0; i < buk->fold_len; i++) { + OnigCodePoint c = addr[i]; + len = ONIGENC_CODE_TO_MBC(enc, c, fold); + fold += len; + rlen += len; + } + return rlen; } - return rlen; } } - } for (i = 0; i < len; i++) { *fold++ = *p++; @@ -467,10 +467,10 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc, if (CASE_FOLD_IS_NOT_ASCII_ONLY(flag) || ONIGENC_IS_ASCII_CODE(*FOLDS1_FOLD(buk1->index))) { - items[0].byte_len = lens[0]; - items[0].code_len = 1; - items[0].code[0] = *FOLDS1_FOLD(buk1->index); - n++; + items[0].byte_len = lens[0]; + items[0].code_len = 1; + items[0].code[0] = *FOLDS1_FOLD(buk1->index); + n++; } un = FOLDS1_UNFOLDS_NUM(buk1->index); @@ -479,14 +479,14 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc, if (unfold != orig_codes[0]) { if (CASE_FOLD_IS_NOT_ASCII_ONLY(flag) || ONIGENC_IS_ASCII_CODE(unfold)) { - items[n].byte_len = lens[0]; - items[n].code_len = 1; - items[n].code[0] = unfold; - n++; + items[n].byte_len = lens[0]; + items[n].code_len = 1; + items[n].code[0] = unfold; + n++; + } } } } - } else if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { if (buk1->fold_len == 2) { m = FOLDS2_UNFOLDS_NUM(buk1->index); @@ -572,10 +572,10 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc, for (i = 0; i < m; i++) { code = FOLDS1_UNFOLDS(index)[i]; if (CASE_FOLD_IS_NOT_ASCII_ONLY(flag)||ONIGENC_IS_ASCII_CODE(code)) { - items[n].byte_len = lens[0]; - items[n].code_len = 1; + items[n].byte_len = lens[0]; + items[n].code_len = 1; items[n].code[0] = code; - n++; + n++; } } }