From 6d8fa6d5787e7ba87340bf2e12a12e820b288559 Mon Sep 17 00:00:00 2001 From: RaiKoHoff Date: Mon, 21 Sep 2020 17:42:11 +0200 Subject: [PATCH] + upd: Oniguruma current development (bug-fixes) --- scintilla/oniguruma/README.md | 1 + scintilla/oniguruma/src/regcomp.c | 10 +- scintilla/oniguruma/src/regexec.c | 198 ++++++++++------------------- scintilla/oniguruma/src/regparse.c | 52 +++++--- 4 files changed, 107 insertions(+), 154 deletions(-) diff --git a/scintilla/oniguruma/README.md b/scintilla/oniguruma/README.md index 8afe0dcd6..67048fd4d 100644 --- a/scintilla/oniguruma/README.md +++ b/scintilla/oniguruma/README.md @@ -34,6 +34,7 @@ Master branch * NEW API: Limiting the maximum number of calls of subexp-call * NEW API: ONIG_OPTION_NOT_BEGIN_STRING / NOT_END_STRING / NOT_BEGIN_POSITION * Fixed behavior of ONIG_OPTION_NOTBOL / NOTEOL +* Fixed many problems found by OSS-Fuzz Version 6.9.5 revised 1 diff --git a/scintilla/oniguruma/src/regcomp.c b/scintilla/oniguruma/src/regcomp.c index 7ddc9e484..2e42b8b2e 100644 --- a/scintilla/oniguruma/src/regcomp.c +++ b/scintilla/oniguruma/src/regcomp.c @@ -134,6 +134,7 @@ ops_init(regex_t* reg, int init_alloc_size) size = sizeof(Operation) * init_alloc_size; p = (Operation* )xrealloc(reg->ops, size); CHECK_NULL_RETURN_MEMERR(p); + reg->ops = p; #ifdef USE_DIRECT_THREADED_CODE { enum OpCode* cp; @@ -145,13 +146,12 @@ ops_init(regex_t* reg, int init_alloc_size) #endif } else { - p = (Operation* )0; + reg->ops = (Operation* )0; #ifdef USE_DIRECT_THREADED_CODE reg->ocs = (enum OpCode* )0; #endif } - reg->ops = p; reg->ops_curr = 0; /* !!! not yet done ops_new() */ reg->ops_alloc = init_alloc_size; reg->ops_used = 0; @@ -177,6 +177,7 @@ ops_expand(regex_t* reg, int n) size = sizeof(Operation) * n; p = (Operation* )xrealloc(reg->ops, size); CHECK_NULL_RETURN_MEMERR(p); + reg->ops = p; #ifdef USE_DIRECT_THREADED_CODE size = sizeof(enum OpCode) * n; @@ -185,7 +186,6 @@ ops_expand(regex_t* reg, int n) reg->ocs = cp; #endif - reg->ops = p; reg->ops_alloc = n; if (reg->ops_used == 0) reg->ops_curr = 0; @@ -1831,7 +1831,6 @@ compile_bag_memory_node(BagNode* node, regex_t* reg, ScanEnv* env) COP(reg)->memory_end.num = node->m.regnum; if (NODE_IS_CALLED(node)) { - if (r != 0) return r; r = add_op(reg, OP_RETURN); } #else @@ -3989,6 +3988,7 @@ set_empty_repeat_node_trav(Node* node, Node* empty, ScanEnv* env) { BagNode* en = BAG_(node); + r = 0; if (en->type == BAG_MEMORY) { if (NODE_IS_BACKREF(node)) { if (IS_NOT_NULL(empty)) @@ -6258,7 +6258,7 @@ concat_opt_exact_str(OptStr* to, UChar* s, UChar* end, OnigEncoding enc) for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) { len = enclen(enc, p); - if (i + len > OPT_EXACT_MAXLEN) break; + if (i + len >= OPT_EXACT_MAXLEN) break; for (j = 0; j < len && p < end; j++) to->s[i++] = *p++; } diff --git a/scintilla/oniguruma/src/regexec.c b/scintilla/oniguruma/src/regexec.c index df4d71b53..31d98093e 100644 --- a/scintilla/oniguruma/src/regexec.c +++ b/scintilla/oniguruma/src/regexec.c @@ -1116,7 +1116,6 @@ typedef struct _StackType { struct { Operation* pcode; /* byte code position */ UChar* pstr; /* string position */ - UChar* pstr_prev; /* previous char position of pstr */ } state; struct { int count; @@ -1726,22 +1725,20 @@ stack_double(int* is_alloca, char** arg_alloc_base, #define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0) -#define STACK_PUSH(stack_type,pat,s,sprev) do {\ +#define STACK_PUSH(stack_type,pat,s) do {\ STACK_ENSURE(1);\ stk->type = (stack_type);\ stk->u.state.pcode = (pat);\ stk->u.state.pstr = (s);\ - stk->u.state.pstr_prev = (sprev);\ STACK_INC;\ } while(0) -#define STACK_PUSH_WITH_ZID(stack_type,pat,s,sprev,id) do {\ +#define STACK_PUSH_WITH_ZID(stack_type,pat,s,id) do {\ STACK_ENSURE(1);\ stk->type = (stack_type);\ stk->zid = (int )(id);\ stk->u.state.pcode = (pat);\ stk->u.state.pstr = (s);\ - stk->u.state.pstr_prev = (sprev);\ STACK_INC;\ } while(0) @@ -1756,7 +1753,6 @@ stack_double(int* is_alloca, char** arg_alloc_base, stk->type = (stack_type);\ stk->u.state.pcode = (pat);\ stk->u.state.pstr = s;\ - stk->u.state.pstr_prev = sprev;\ STACK_INC;\ } while (0) #else @@ -1767,10 +1763,9 @@ stack_double(int* is_alloca, char** arg_alloc_base, } while (0) #endif -#define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev) -#define STACK_PUSH_SUPER_ALT(pat,s,sprev) STACK_PUSH(STK_SUPER_ALT,pat,s,sprev) -#define STACK_PUSH_ALT_WITH_ZID(pat,s,sprev,id) \ - STACK_PUSH_WITH_ZID(STK_ALT,pat,s,sprev,id) +#define STACK_PUSH_ALT(pat,s) STACK_PUSH(STK_ALT,pat,s) +#define STACK_PUSH_SUPER_ALT(pat,s) STACK_PUSH(STK_SUPER_ALT,pat,s) +#define STACK_PUSH_ALT_WITH_ZID(pat,s,id) STACK_PUSH_WITH_ZID(STK_ALT,pat,s,id) #if 0 #define STACK_PUSH_REPEAT(sid, pat) do {\ @@ -1893,12 +1888,11 @@ stack_double(int* is_alloca, char** arg_alloc_base, STACK_INC;\ } while(0) -#define STACK_PUSH_MARK_WITH_POS(sid, s, sprev) do {\ +#define STACK_PUSH_MARK_WITH_POS(sid, s) do {\ STACK_ENSURE(1);\ stk->type = STK_MARK;\ stk->zid = (sid);\ stk->u.val.v = (UChar* )(s);\ - stk->u.val.v2 = (sprev);\ STACK_INC;\ } while(0) @@ -1917,7 +1911,6 @@ stack_double(int* is_alloca, char** arg_alloc_base, stk->zid = (sid);\ stk->u.val.type = (stype);\ stk->u.val.v = (UChar* )(sval);\ - stk->u.val.v2 = sprev;\ STACK_INC;\ } while(0) @@ -1964,7 +1957,6 @@ stack_double(int* is_alloca, char** arg_alloc_base, && k->zid == (sid)) {\ if (level == 0) {\ (sval) = k->u.val.v;\ - sprev = k->u.val.v2;\ break;\ }\ }\ @@ -2668,9 +2660,9 @@ typedef struct { #define BYTECODE_INTERPRETER_START GOTO_OP; #define BYTECODE_INTERPRETER_END -#define CASE_OP(x) L_##x: SOP_IN(OP_##x); sbegin = s; MATCH_DEBUG_OUT(0) +#define CASE_OP(x) L_##x: SOP_IN(OP_##x); MATCH_DEBUG_OUT(0) #define DEFAULT_OP /* L_DEFAULT: */ -#define NEXT_OP sprev = sbegin; JUMP_OP +#define NEXT_OP JUMP_OP #define JUMP_OP GOTO_OP #ifdef USE_DIRECT_THREADED_CODE #define GOTO_OP goto *(p->opaddr) @@ -2684,9 +2676,8 @@ typedef struct { #define BYTECODE_INTERPRETER_START \ while (1) {\ MATCH_DEBUG_OUT(0)\ - sbegin = s;\ switch (p->opcode) { -#define BYTECODE_INTERPRETER_END } sprev = sbegin; } +#define BYTECODE_INTERPRETER_END } } #define CASE_OP(x) case OP_##x: SOP_IN(OP_##x); #define DEFAULT_OP default: #define NEXT_OP break @@ -2767,10 +2758,9 @@ typedef struct { /* match data(str - end) from position (sstart). */ -/* if sstart == str then set sprev to NULL. */ static int match_at(regex_t* reg, const UChar* str, const UChar* end, - const UChar* in_right_range, const UChar* sstart, UChar* sprev, + const UChar* in_right_range, const UChar* sstart, MatchArg* msa) { @@ -2889,7 +2879,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, LengthType tlen, tlen2; MemNumType mem; RelAddrType addr; - UChar *s, *ps, *sbegin; + UChar *s, *ps; UChar *right_range; int is_alloca; char *alloc_base; @@ -2968,8 +2958,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } #ifdef ONIG_DEBUG_MATCH - fprintf(DBGFP, "match_at: str: %p, end: %p, start: %p, sprev: %p\n", - str, end, sstart, sprev); + fprintf(DBGFP, "match_at: str: %p, end: %p, start: %p\n", str, end, sstart); fprintf(DBGFP, "size: %d, start offset: %d\n", (int )(end - str), (int )(sstart - str)); #endif @@ -3101,7 +3090,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (*ps != *s) goto fail; ps++; s++; if (*ps != *s) goto fail; - sprev = s; s++; INC_OP; JUMP_OUT; @@ -3114,7 +3102,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (*ps != *s) goto fail; ps++; s++; if (*ps != *s) goto fail; - sprev = s; s++; INC_OP; JUMP_OUT; @@ -3129,7 +3116,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (*ps != *s) goto fail; ps++; s++; if (*ps != *s) goto fail; - sprev = s; s++; INC_OP; JUMP_OUT; @@ -3146,7 +3132,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (*ps != *s) goto fail; ps++; s++; if (*ps != *s) goto fail; - sprev = s; s++; INC_OP; JUMP_OUT; @@ -3158,7 +3143,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, while (tlen-- > 0) { if (*ps++ != *s++) goto fail; } - sprev = s - 1; INC_OP; JUMP_OUT; @@ -3179,7 +3163,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, ps++; s++; if (*ps != *s) goto fail; ps++; s++; - sprev = s; if (*ps != *s) goto fail; ps++; s++; if (*ps != *s) goto fail; @@ -3198,7 +3181,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, ps++; s++; if (*ps != *s) goto fail; ps++; s++; - sprev = s; if (*ps != *s) goto fail; ps++; s++; if (*ps != *s) goto fail; @@ -3216,7 +3198,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (*ps != *s) goto fail; ps++; s++; } - sprev = s - 2; INC_OP; JUMP_OUT; @@ -3232,7 +3213,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (*ps != *s) goto fail; ps++; s++; } - sprev = s - 3; INC_OP; JUMP_OUT; @@ -3246,7 +3226,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (*ps != *s) goto fail; ps++; s++; } - sprev = s - tlen; INC_OP; JUMP_OUT; @@ -3362,11 +3341,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE_OP(ANYCHAR_STAR) INC_OP; while (DATA_ENSURE_CHECK1) { - STACK_PUSH_ALT(p, s, sprev); + STACK_PUSH_ALT(p, s); n = enclen(encode, s); DATA_ENSURE(n); if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; - sprev = s; s += n; } JUMP_OUT; @@ -3374,15 +3352,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE_OP(ANYCHAR_ML_STAR) INC_OP; while (DATA_ENSURE_CHECK1) { - STACK_PUSH_ALT(p, s, sprev); + STACK_PUSH_ALT(p, s); n = enclen(encode, s); if (n > 1) { DATA_ENSURE(n); - sprev = s; s += n; } else { - sprev = s; s++; } } @@ -3396,12 +3372,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, INC_OP; while (DATA_ENSURE_CHECK1) { if (c == *s) { - STACK_PUSH_ALT(p, s, sprev); + STACK_PUSH_ALT(p, s); } n = enclen(encode, s); DATA_ENSURE(n); if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; - sprev = s; s += n; } } @@ -3415,16 +3390,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, INC_OP; while (DATA_ENSURE_CHECK1) { if (c == *s) { - STACK_PUSH_ALT(p, s, sprev); + STACK_PUSH_ALT(p, s); } n = enclen(encode, s); if (n > 1) { DATA_ENSURE(n); - sprev = s; s += n; } else { - sprev = s; s++; } } @@ -3477,7 +3450,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) goto fail; } - else if (ON_STR_END(s)) { + else { + UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); + if (ON_STR_END(s)) { if (! IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) goto fail; } @@ -3487,6 +3462,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, goto fail; } } + } INC_OP; JUMP_OUT; @@ -3499,7 +3475,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (DATA_ENSURE_CHECK1 && IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) goto fail; } - else if (ON_STR_END(s)) { + else { + UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); + if (ON_STR_END(s)) { if (IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) goto fail; } @@ -3509,6 +3487,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, goto fail; } } + } INC_OP; JUMP_OUT; @@ -3519,7 +3498,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, mode = p->word_boundary.mode; if (DATA_ENSURE_CHECK1 && IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) { - if (ON_STR_BEGIN(s) || !IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) { + UChar* sprev; + if (ON_STR_BEGIN(s)) { + INC_OP; + JUMP_OUT; + } + sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); + if (! IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) { INC_OP; JUMP_OUT; } @@ -3532,19 +3517,23 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, ModeType mode; mode = p->word_boundary.mode; - if (!ON_STR_BEGIN(s) && IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) { + if (! ON_STR_BEGIN(s)) { + UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); + if (IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) { if (ON_STR_END(s) || ! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) { INC_OP; JUMP_OUT; } } } + } goto fail; #endif CASE_OP(TEXT_SEGMENT_BOUNDARY) { int is_break; + UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); switch (p->text_segment_boundary.type) { case EXTENDED_GRAPHEME_CLUSTER_BOUNDARY: @@ -3594,15 +3583,19 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, INC_OP; JUMP_OUT; } - else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) && !ON_STR_END(s)) { + else if (! ON_STR_END(s)) { + UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); + if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) { INC_OP; JUMP_OUT; } + } goto fail; CASE_OP(END_LINE) if (ON_STR_END(s)) { #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE + UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) { #endif if (OPTON_NOTEOL(msa->options)) goto fail; @@ -3627,6 +3620,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE_OP(SEMI_END_BUF) if (ON_STR_END(s)) { #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE + UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) { #endif if (OPTON_NOTEOL(msa->options)) goto fail; @@ -3738,7 +3732,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, mem = p->backref_n.n1; backref: { - int len; UChar *pstart, *pend; if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail; @@ -3749,10 +3742,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, n = (int )(pend - pstart); if (n != 0) { DATA_ENSURE(n); - sprev = s; STRING_CMP(s, pstart, n); - while (sprev + (len = enclen(encode, sprev)) < s) - sprev += len; } } INC_OP; @@ -3761,7 +3751,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE_OP(BACKREF_N_IC) mem = p->backref_n.n1; { - int len; UChar *pstart, *pend; if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail; @@ -3772,10 +3761,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, n = (int )(pend - pstart); if (n != 0) { DATA_ENSURE(n); - sprev = s; STRING_CMP_IC(case_fold_flag, pstart, &s, n); - while (sprev + (len = enclen(encode, sprev)) < s) - sprev += len; } } INC_OP; @@ -3783,7 +3769,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE_OP(BACKREF_MULTI) { - int len, is_fail; + int is_fail; UChar *pstart, *pend, *swork; tlen = p->backref_general.num; @@ -3798,13 +3784,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, n = (int )(pend - pstart); if (n != 0) { DATA_ENSURE(n); - sprev = s; swork = s; STRING_CMP_VALUE(swork, pstart, n, is_fail); if (is_fail) continue; s = swork; - while (sprev + (len = enclen(encode, sprev)) < s) - sprev += len; } break; /* success */ } @@ -3815,7 +3798,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE_OP(BACKREF_MULTI_IC) { - int len, is_fail; + int is_fail; UChar *pstart, *pend, *swork; tlen = p->backref_general.num; @@ -3830,13 +3813,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, n = (int )(pend - pstart); if (n != 0) { DATA_ENSURE(n); - sprev = s; swork = s; STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, is_fail); if (is_fail) continue; s = swork; - while (sprev + (len = enclen(encode, sprev)) < s) - sprev += len; } break; /* success */ } @@ -3851,10 +3831,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, goto backref_with_level; CASE_OP(BACKREF_WITH_LEVEL) { - int len; int level; MemNumType* mems; - UChar* ssave; n = 0; backref_with_level: @@ -3862,18 +3840,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, tlen = p->backref_general.num; mems = tlen == 1 ? &(p->backref_general.n1) : p->backref_general.ns; - ssave = s; - if (backref_match_at_nested_level(reg, stk, stk_base, n, + if (! backref_match_at_nested_level(reg, stk, stk_base, n, case_fold_flag, level, (int )tlen, mems, &s, end)) { - if (ssave != s) { - sprev = ssave; - while (sprev + (len = enclen(encode, sprev)) < s) - sprev += len; - } - } - else goto fail; } + } INC_OP; JUMP_OUT; #endif @@ -4005,13 +3976,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE_OP(PUSH) addr = p->push.addr; - STACK_PUSH_ALT(p + addr, s, sprev); + STACK_PUSH_ALT(p + addr, s); INC_OP; JUMP_OUT; CASE_OP(PUSH_SUPER) addr = p->push.addr; - STACK_PUSH_SUPER_ALT(p + addr, s, sprev); + STACK_PUSH_SUPER_ALT(p + addr, s); INC_OP; JUMP_OUT; @@ -4033,7 +4004,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, addr = p->push_or_jump_exact1.addr; c = p->push_or_jump_exact1.c; if (DATA_ENSURE_CHECK1 && c == *s) { - STACK_PUSH_ALT(p + addr, s, sprev); + STACK_PUSH_ALT(p + addr, s); INC_OP; JUMP_OUT; } @@ -4049,7 +4020,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, addr = p->push_if_peek_next.addr; c = p->push_if_peek_next.c; if (DATA_ENSURE_CHECK1 && c == *s) { - STACK_PUSH_ALT(p + addr, s, sprev); + STACK_PUSH_ALT(p + addr, s); } } INC_OP; @@ -4061,7 +4032,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, STACK_PUSH_REPEAT_INC(mem, 0); if (reg->repeat_range[mem].lower == 0) { - STACK_PUSH_ALT(p + addr, s, sprev); + STACK_PUSH_ALT(p + addr, s); } INC_OP; JUMP_OUT; @@ -4072,7 +4043,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, STACK_PUSH_REPEAT_INC(mem, 0); if (reg->repeat_range[mem].lower == 0) { - STACK_PUSH_ALT(p + 1, s, sprev); + STACK_PUSH_ALT(p + 1, s); p += addr; } else @@ -4089,7 +4060,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } else if (n >= reg->repeat_range[mem].lower) { INC_OP; - STACK_PUSH_ALT(p, s, sprev); + STACK_PUSH_ALT(p, s); p = reg->repeat_range[mem].u.pcode; } else { @@ -4108,7 +4079,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } else { if (n >= reg->repeat_range[mem].lower) { - STACK_PUSH_ALT(reg->repeat_range[mem].u.pcode, s, sprev); + STACK_PUSH_ALT(reg->repeat_range[mem].u.pcode, s); INC_OP; } else { @@ -4160,7 +4131,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, for (tlen = p->move.n; tlen > 0; tlen--) { len = enclen(encode, s); - sprev = s; s += len; if (s > end) goto fail; if (s == end) { @@ -4169,7 +4139,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } } } - sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); INC_OP; JUMP_OUT; @@ -4178,10 +4147,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (tlen != 0) { s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen); if (IS_NULL(s)) goto fail; - sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); } if (p->step_back_start.remaining != 0) { - STACK_PUSH_ALT_WITH_ZID(p + 1, s, sprev, p->step_back_start.remaining); + STACK_PUSH_ALT_WITH_ZID(p + 1, s, p->step_back_start.remaining); p += p->step_back_start.addr; } else @@ -4193,9 +4161,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (tlen != INFINITE_LEN) tlen--; s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, 1); if (IS_NULL(s)) goto fail; - sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); if (tlen != 0) { - STACK_PUSH_ALT_WITH_ZID(p, s, sprev, (int )tlen); + STACK_PUSH_ALT_WITH_ZID(p, s, (int )tlen); } INC_OP; JUMP_OUT; @@ -4205,7 +4172,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, STACK_TO_VOID_TO_MARK(stkp, mem); if (p->cut_to_mark.restore_pos != 0) { s = stkp->u.val.v; - sprev = stkp->u.val.v2; } INC_OP; JUMP_OUT; @@ -4213,7 +4179,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE_OP(MARK) mem = p->mark.id; /* mem: mark id */ if (p->mark.save_pos != 0) - STACK_PUSH_MARK_WITH_POS(mem, s, sprev); + STACK_PUSH_MARK_WITH_POS(mem, s); else STACK_PUSH_MARK(mem); @@ -4367,7 +4333,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, STACK_POP; p = stk->u.state.pcode; s = stk->u.state.pstr; - sprev = stk->u.state.pstr_prev; CHECK_RETRY_LIMIT_IN_MATCH; JUMP_OUT; @@ -4419,12 +4384,11 @@ typedef struct { int state; /* value of enum SearchRangeStatus */ UChar* low; UChar* high; - UChar* low_prev; UChar* sch_range; } SearchRange; #define REGSET_MATCH_AND_RETURN_CHECK(upper_range) \ - r = match_at(reg, str, end, (upper_range), s, prev, msas + i); \ + r = match_at(reg, str, end, (upper_range), s, msas + i); \ if (r != ONIG_MISMATCH) {\ if (r >= 0) {\ goto match;\ @@ -4440,7 +4404,7 @@ regset_search_body_position_lead(OnigRegSet* set, OnigOptionType option, MatchArg* msas, int* rmatch_pos) { int r, n, i; - UChar *s, *prev; + UChar *s; UChar *low, *high, *low_prev; UChar* sch_range; regex_t* reg; @@ -4449,12 +4413,7 @@ regset_search_body_position_lead(OnigRegSet* set, n = set->n; enc = set->enc; - s = (UChar* )start; - if (s > str) - prev = onigenc_get_prev_char_head(enc, str, s); - else - prev = (UChar* )NULL; sr = (SearchRange* )xmalloc(sizeof(*sr) * n); CHECK_NULL_RETURN_MEMERR(sr); @@ -4474,7 +4433,6 @@ regset_search_body_position_lead(OnigRegSet* set, sr[i].state = SRS_LOW_HIGH; sr[i].low = low; sr[i].high = high; - sr[i].low_prev = low_prev; sr[i].sch_range = sch_range; } } @@ -4491,7 +4449,6 @@ regset_search_body_position_lead(OnigRegSet* set, sr[i].state = SRS_ALL_RANGE; sr[i].low = s; sr[i].high = (UChar* )range; - sr[i].low_prev = prev; } } @@ -4510,7 +4467,6 @@ regset_search_body_position_lead(OnigRegSet* set, &low, &high, &low_prev) != 0) { sr[i].low = low; sr[i].high = high; - sr[i].low_prev = low_prev; if (s < low) continue; } else { @@ -4531,16 +4487,13 @@ regset_search_body_position_lead(OnigRegSet* set, for (i = 0; i < n; i++) { if (sr[i].state == SRS_LOW_HIGH && low > sr[i].low) { low = sr[i].low; - low_prev = sr[i].low_prev; } } if (low == range) break; s = low; - prev = low_prev; } else { - prev = s; s += enclen(enc, s); } } while (1); @@ -4578,7 +4531,6 @@ regset_search_body_position_lead(OnigRegSet* set, if (set->anychar_inf != 0) prev_is_newline = ONIGENC_IS_MBC_NEWLINE(set->enc, s, end); - prev = s; s += enclen(enc, s); } while (1); } @@ -4647,7 +4599,7 @@ onig_regset_search_with_param(OnigRegSet* set, { int r; int i; - UChar *s, *prev; + UChar *s; regex_t* reg; OnigEncoding enc; OnigRegion* region; @@ -4749,7 +4701,6 @@ onig_regset_search_with_param(OnigRegSet* set, else if (str == end) { /* empty string */ start = end = str; s = (UChar* )start; - prev = (UChar* )NULL; msas = (MatchArg* )xmalloc(sizeof(*msas) * set->n); CHECK_NULL_RETURN_MEMERR(msas); @@ -4764,7 +4715,7 @@ onig_regset_search_with_param(OnigRegSet* set, /* Can't use REGSET_MATCH_AND_RETURN_CHECK() because r must be set regex index (i) */ - r = match_at(reg, str, end, end, s, prev, msas + i); + r = match_at(reg, str, end, end, s, msas + i); if (r != ONIG_MISMATCH) { if (r >= 0) { r = i; @@ -5059,7 +5010,6 @@ onig_match_with_param(regex_t* reg, const UChar* str, const UChar* end, OnigMatchParam* mp) { int r; - UChar *prev; MatchArg msa; #ifndef USE_POSIX_API @@ -5086,8 +5036,7 @@ onig_match_with_param(regex_t* reg, const UChar* str, const UChar* end, } } - prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at); - r = match_at(reg, str, end, end, at, prev, &msa); + r = match_at(reg, str, end, end, at, &msa); #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE if (OPTON_FIND_LONGEST(option) && r == ONIG_MISMATCH) { if (msa.best_len >= 0) { @@ -5370,7 +5319,7 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end, OnigOptionType option, OnigMatchParam* mp) { int r; - UChar *s, *prev; + UChar *s; MatchArg msa; const UChar *orig_start = start; @@ -5409,7 +5358,7 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end, #define MATCH_AND_RETURN_CHECK(upper_range) \ - r = match_at(reg, str, end, (upper_range), s, prev, &msa); \ + r = match_at(reg, str, end, (upper_range), s, &msa);\ if (r != ONIG_MISMATCH) {\ if (r >= 0) {\ goto match;\ @@ -5523,7 +5472,6 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end, if (reg->threshold_len == 0) { start = end = str = address_for_empty_string; s = (UChar* )start; - prev = (UChar* )NULL; MATCH_ARG_INIT(msa, reg, option, region, start, mp); MATCH_AND_RETURN_CHECK(end); @@ -5541,11 +5489,6 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end, s = (UChar* )start; if (range > start) { /* forward search */ - if (s > str) - prev = onigenc_get_prev_char_head(reg->enc, str, s); - else - prev = (UChar* )NULL; - if (reg->optimize != OPTIMIZE_NONE) { UChar *sch_range, *low, *high, *low_prev; @@ -5572,11 +5515,9 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end, &low_prev)) goto mismatch; if (s < low) { s = low; - prev = low_prev; } while (s <= high) { MATCH_AND_RETURN_CHECK(data_range); - prev = s; s += enclen(reg->enc, s); } } while (s < range); @@ -5589,6 +5530,8 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end, if ((reg->anchor & ANCR_ANYCHAR_INF) != 0 && (reg->anchor & (ANCR_LOOK_BEHIND | ANCR_PREC_READ_NOT)) == 0) { do { + UChar* prev; + MATCH_AND_RETURN_CHECK(data_range); prev = s; s += enclen(reg->enc, s); @@ -5605,7 +5548,6 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end, do { MATCH_AND_RETURN_CHECK(data_range); - prev = s; s += enclen(reg->enc, s); } while (s < range); @@ -5651,9 +5593,8 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end, s = high; while (s >= low) { - prev = onigenc_get_prev_char_head(reg->enc, str, s); MATCH_AND_RETURN_CHECK(orig_start); - s = prev; + s = onigenc_get_prev_char_head(reg->enc, str, s); } } while (s >= range); goto mismatch; @@ -5667,9 +5608,8 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end, } do { - prev = onigenc_get_prev_char_head(reg->enc, str, s); MATCH_AND_RETURN_CHECK(orig_start); - s = prev; + s = onigenc_get_prev_char_head(reg->enc, str, s); } while (s >= range); } @@ -6053,7 +5993,7 @@ extern int onig_init_for_match_at(regex_t* reg) { return match_at(reg, (const UChar* )NULL, (const UChar* )NULL, - (const UChar* )NULL, (const UChar* )NULL, (UChar* )NULL, + (const UChar* )NULL, (const UChar* )NULL, (MatchArg* )NULL); } #endif diff --git a/scintilla/oniguruma/src/regparse.c b/scintilla/oniguruma/src/regparse.c index 1ce1114b9..b23601b75 100644 --- a/scintilla/oniguruma/src/regparse.c +++ b/scintilla/oniguruma/src/regparse.c @@ -291,7 +291,7 @@ bbuf_clone(BBuf** rto, BBuf* from) CHECK_NULL_RETURN_MEMERR(to); r = BB_INIT(to, from->alloc); if (r != 0) { - xfree(to->p); + bbuf_free(to); *rto = 0; return r; } @@ -3141,7 +3141,6 @@ make_absent_tree_for_simple_one_char_repeat(Node** node, Node* absent, Node* qua lower = QUANT_(quant)->lower; upper = QUANT_(quant)->upper; - onig_node_free(quant); r = node_new_save_gimmick(&ns[0], SAVE_RIGHT_RANGE, env); if (r != 0) goto err; @@ -3208,9 +3207,9 @@ make_absent_tree(Node** node, Node* absent, Node* expr, int is_range_cutter, simple: r = make_absent_tree_for_simple_one_char_repeat(node, absent, quant, body, possessive, env); + onig_node_free(quant); if (r != 0) { ns[4] = NULL_NODE; - onig_node_free(quant); onig_node_free(body); goto err; } @@ -3714,11 +3713,7 @@ get_next_code_point(UChar** src, UChar* end, int base, OnigEncoding enc, int in_ while (! PEND) { PFETCH(c); - if (! IS_CODE_POINT_DIVIDE(c)) break; - } - if (IS_CODE_POINT_DIVIDE(c)) - return ONIGERR_INVALID_CODE_POINT_VALUE; - + if (! IS_CODE_POINT_DIVIDE(c)) { if (c == '}') { *src = p; return 1; /* end of sequence */ @@ -3727,8 +3722,15 @@ get_next_code_point(UChar** src, UChar* end, int base, OnigEncoding enc, int in_ *src = p; return 2; /* range */ } - PUNFETCH; + break; + } + else { + if (PEND) + return ONIGERR_INVALID_CODE_POINT_VALUE; + } + } + r = scan_number_of_base(&p, end, 1, enc, rcode, base); if (r != 0) return r; @@ -3879,13 +3881,17 @@ not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf) to = data[i*2+1]; if (pre <= from - 1) { r = add_code_range_to_buf(pbuf, pre, from - 1); - if (r != 0) return r; + if (r != 0) { + bbuf_free(*pbuf); + return r; + } } if (to == ~((OnigCodePoint )0)) break; pre = to + 1; } if (to < ~((OnigCodePoint )0)) { r = add_code_range_to_buf(pbuf, to + 1, ~((OnigCodePoint )0)); + if (r != 0) bbuf_free(*pbuf); } return r; } @@ -6736,6 +6742,7 @@ prs_cc(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env) p = psave; for (i = 1; i < len; i++) { r = fetch_token_cc(tok, &p, end, env, CS_COMPLETE); + if (r < 0) goto err; } fetched = 0; } @@ -7099,18 +7106,18 @@ prs_callout_of_contents(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* contents = onigenc_strdup(enc, code_start, code_end); CHECK_NULL_RETURN_MEMERR(contents); - r = node_new_callout(np, ONIG_CALLOUT_OF_CONTENTS, num, ONIG_NON_NAME_ID, env); - if (r != 0) { - xfree(contents); - return r; - } - e = onig_reg_callout_list_at(env->reg, num); if (IS_NULL(e)) { xfree(contents); return ONIGERR_MEMORY; } + r = node_new_callout(np, ONIG_CALLOUT_OF_CONTENTS, num, ONIG_NON_NAME_ID, env); + if (r != 0) { + xfree(contents); + return r; + } + e->of = ONIG_CALLOUT_OF_CONTENTS; e->in = in; e->name_id = ONIG_NON_NAME_ID; @@ -7472,14 +7479,14 @@ prs_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* env) if (r != ONIG_NORMAL) goto err_clear; } - r = node_new_callout(&node, ONIG_CALLOUT_OF_NAME, num, name_id, env); - if (r != ONIG_NORMAL) goto err_clear; - e = onig_reg_callout_list_at(env->reg, num); if (IS_NULL(e)) { r = ONIGERR_MEMORY; goto err_clear; } + r = node_new_callout(&node, ONIG_CALLOUT_OF_NAME, num, name_id, env); + if (r != ONIG_NORMAL) goto err_clear; + e->of = ONIG_CALLOUT_OF_NAME; e->in = in; e->name_id = name_id; @@ -8546,7 +8553,12 @@ prs_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end, *np = node_new_cclass(); CHECK_NULL_RETURN_MEMERR(*np); cc = CCLASS_(*np); - add_ctype_to_cc(cc, tok->u.prop.ctype, FALSE, env); + r = add_ctype_to_cc(cc, tok->u.prop.ctype, FALSE, env); + if (r != 0) { + onig_node_free(*np); + *np = NULL_NODE; + return r; + } if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc); } break;