+ upd: Oniguruma current development (bug-fixes)

This commit is contained in:
RaiKoHoff 2020-09-21 17:42:11 +02:00
parent 848bc363ee
commit 6d8fa6d578
4 changed files with 107 additions and 154 deletions

View File

@ -34,6 +34,7 @@ Master branch
* NEW API: Limiting the maximum number of calls of subexp-call
* NEW API: ONIG_OPTION_NOT_BEGIN_STRING / NOT_END_STRING / NOT_BEGIN_POSITION
* Fixed behavior of ONIG_OPTION_NOTBOL / NOTEOL
* Fixed many problems found by OSS-Fuzz
Version 6.9.5 revised 1

View File

@ -134,6 +134,7 @@ ops_init(regex_t* reg, int init_alloc_size)
size = sizeof(Operation) * init_alloc_size;
p = (Operation* )xrealloc(reg->ops, size);
CHECK_NULL_RETURN_MEMERR(p);
reg->ops = p;
#ifdef USE_DIRECT_THREADED_CODE
{
enum OpCode* cp;
@ -145,13 +146,12 @@ ops_init(regex_t* reg, int init_alloc_size)
#endif
}
else {
p = (Operation* )0;
reg->ops = (Operation* )0;
#ifdef USE_DIRECT_THREADED_CODE
reg->ocs = (enum OpCode* )0;
#endif
}
reg->ops = p;
reg->ops_curr = 0; /* !!! not yet done ops_new() */
reg->ops_alloc = init_alloc_size;
reg->ops_used = 0;
@ -177,6 +177,7 @@ ops_expand(regex_t* reg, int n)
size = sizeof(Operation) * n;
p = (Operation* )xrealloc(reg->ops, size);
CHECK_NULL_RETURN_MEMERR(p);
reg->ops = p;
#ifdef USE_DIRECT_THREADED_CODE
size = sizeof(enum OpCode) * n;
@ -185,7 +186,6 @@ ops_expand(regex_t* reg, int n)
reg->ocs = cp;
#endif
reg->ops = p;
reg->ops_alloc = n;
if (reg->ops_used == 0)
reg->ops_curr = 0;
@ -1831,7 +1831,6 @@ compile_bag_memory_node(BagNode* node, regex_t* reg, ScanEnv* env)
COP(reg)->memory_end.num = node->m.regnum;
if (NODE_IS_CALLED(node)) {
if (r != 0) return r;
r = add_op(reg, OP_RETURN);
}
#else
@ -3989,6 +3988,7 @@ set_empty_repeat_node_trav(Node* node, Node* empty, ScanEnv* env)
{
BagNode* en = BAG_(node);
r = 0;
if (en->type == BAG_MEMORY) {
if (NODE_IS_BACKREF(node)) {
if (IS_NOT_NULL(empty))
@ -6258,7 +6258,7 @@ concat_opt_exact_str(OptStr* to, UChar* s, UChar* end, OnigEncoding enc)
for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
len = enclen(enc, p);
if (i + len > OPT_EXACT_MAXLEN) break;
if (i + len >= OPT_EXACT_MAXLEN) break;
for (j = 0; j < len && p < end; j++)
to->s[i++] = *p++;
}

View File

@ -1116,7 +1116,6 @@ typedef struct _StackType {
struct {
Operation* pcode; /* byte code position */
UChar* pstr; /* string position */
UChar* pstr_prev; /* previous char position of pstr */
} state;
struct {
int count;
@ -1726,22 +1725,20 @@ stack_double(int* is_alloca, char** arg_alloc_base,
#define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0)
#define STACK_PUSH(stack_type,pat,s,sprev) do {\
#define STACK_PUSH(stack_type,pat,s) do {\
STACK_ENSURE(1);\
stk->type = (stack_type);\
stk->u.state.pcode = (pat);\
stk->u.state.pstr = (s);\
stk->u.state.pstr_prev = (sprev);\
STACK_INC;\
} while(0)
#define STACK_PUSH_WITH_ZID(stack_type,pat,s,sprev,id) do {\
#define STACK_PUSH_WITH_ZID(stack_type,pat,s,id) do {\
STACK_ENSURE(1);\
stk->type = (stack_type);\
stk->zid = (int )(id);\
stk->u.state.pcode = (pat);\
stk->u.state.pstr = (s);\
stk->u.state.pstr_prev = (sprev);\
STACK_INC;\
} while(0)
@ -1756,7 +1753,6 @@ stack_double(int* is_alloca, char** arg_alloc_base,
stk->type = (stack_type);\
stk->u.state.pcode = (pat);\
stk->u.state.pstr = s;\
stk->u.state.pstr_prev = sprev;\
STACK_INC;\
} while (0)
#else
@ -1767,10 +1763,9 @@ stack_double(int* is_alloca, char** arg_alloc_base,
} while (0)
#endif
#define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev)
#define STACK_PUSH_SUPER_ALT(pat,s,sprev) STACK_PUSH(STK_SUPER_ALT,pat,s,sprev)
#define STACK_PUSH_ALT_WITH_ZID(pat,s,sprev,id) \
STACK_PUSH_WITH_ZID(STK_ALT,pat,s,sprev,id)
#define STACK_PUSH_ALT(pat,s) STACK_PUSH(STK_ALT,pat,s)
#define STACK_PUSH_SUPER_ALT(pat,s) STACK_PUSH(STK_SUPER_ALT,pat,s)
#define STACK_PUSH_ALT_WITH_ZID(pat,s,id) STACK_PUSH_WITH_ZID(STK_ALT,pat,s,id)
#if 0
#define STACK_PUSH_REPEAT(sid, pat) do {\
@ -1893,12 +1888,11 @@ stack_double(int* is_alloca, char** arg_alloc_base,
STACK_INC;\
} while(0)
#define STACK_PUSH_MARK_WITH_POS(sid, s, sprev) do {\
#define STACK_PUSH_MARK_WITH_POS(sid, s) do {\
STACK_ENSURE(1);\
stk->type = STK_MARK;\
stk->zid = (sid);\
stk->u.val.v = (UChar* )(s);\
stk->u.val.v2 = (sprev);\
STACK_INC;\
} while(0)
@ -1917,7 +1911,6 @@ stack_double(int* is_alloca, char** arg_alloc_base,
stk->zid = (sid);\
stk->u.val.type = (stype);\
stk->u.val.v = (UChar* )(sval);\
stk->u.val.v2 = sprev;\
STACK_INC;\
} while(0)
@ -1964,7 +1957,6 @@ stack_double(int* is_alloca, char** arg_alloc_base,
&& k->zid == (sid)) {\
if (level == 0) {\
(sval) = k->u.val.v;\
sprev = k->u.val.v2;\
break;\
}\
}\
@ -2668,9 +2660,9 @@ typedef struct {
#define BYTECODE_INTERPRETER_START GOTO_OP;
#define BYTECODE_INTERPRETER_END
#define CASE_OP(x) L_##x: SOP_IN(OP_##x); sbegin = s; MATCH_DEBUG_OUT(0)
#define CASE_OP(x) L_##x: SOP_IN(OP_##x); MATCH_DEBUG_OUT(0)
#define DEFAULT_OP /* L_DEFAULT: */
#define NEXT_OP sprev = sbegin; JUMP_OP
#define NEXT_OP JUMP_OP
#define JUMP_OP GOTO_OP
#ifdef USE_DIRECT_THREADED_CODE
#define GOTO_OP goto *(p->opaddr)
@ -2684,9 +2676,8 @@ typedef struct {
#define BYTECODE_INTERPRETER_START \
while (1) {\
MATCH_DEBUG_OUT(0)\
sbegin = s;\
switch (p->opcode) {
#define BYTECODE_INTERPRETER_END } sprev = sbegin; }
#define BYTECODE_INTERPRETER_END } }
#define CASE_OP(x) case OP_##x: SOP_IN(OP_##x);
#define DEFAULT_OP default:
#define NEXT_OP break
@ -2767,10 +2758,9 @@ typedef struct {
/* match data(str - end) from position (sstart). */
/* if sstart == str then set sprev to NULL. */
static int
match_at(regex_t* reg, const UChar* str, const UChar* end,
const UChar* in_right_range, const UChar* sstart, UChar* sprev,
const UChar* in_right_range, const UChar* sstart,
MatchArg* msa)
{
@ -2889,7 +2879,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
LengthType tlen, tlen2;
MemNumType mem;
RelAddrType addr;
UChar *s, *ps, *sbegin;
UChar *s, *ps;
UChar *right_range;
int is_alloca;
char *alloc_base;
@ -2968,8 +2958,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
}
#ifdef ONIG_DEBUG_MATCH
fprintf(DBGFP, "match_at: str: %p, end: %p, start: %p, sprev: %p\n",
str, end, sstart, sprev);
fprintf(DBGFP, "match_at: str: %p, end: %p, start: %p\n", str, end, sstart);
fprintf(DBGFP, "size: %d, start offset: %d\n",
(int )(end - str), (int )(sstart - str));
#endif
@ -3101,7 +3090,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (*ps != *s) goto fail;
ps++; s++;
if (*ps != *s) goto fail;
sprev = s;
s++;
INC_OP;
JUMP_OUT;
@ -3114,7 +3102,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (*ps != *s) goto fail;
ps++; s++;
if (*ps != *s) goto fail;
sprev = s;
s++;
INC_OP;
JUMP_OUT;
@ -3129,7 +3116,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (*ps != *s) goto fail;
ps++; s++;
if (*ps != *s) goto fail;
sprev = s;
s++;
INC_OP;
JUMP_OUT;
@ -3146,7 +3132,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (*ps != *s) goto fail;
ps++; s++;
if (*ps != *s) goto fail;
sprev = s;
s++;
INC_OP;
JUMP_OUT;
@ -3158,7 +3143,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
while (tlen-- > 0) {
if (*ps++ != *s++) goto fail;
}
sprev = s - 1;
INC_OP;
JUMP_OUT;
@ -3179,7 +3163,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
ps++; s++;
if (*ps != *s) goto fail;
ps++; s++;
sprev = s;
if (*ps != *s) goto fail;
ps++; s++;
if (*ps != *s) goto fail;
@ -3198,7 +3181,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
ps++; s++;
if (*ps != *s) goto fail;
ps++; s++;
sprev = s;
if (*ps != *s) goto fail;
ps++; s++;
if (*ps != *s) goto fail;
@ -3216,7 +3198,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (*ps != *s) goto fail;
ps++; s++;
}
sprev = s - 2;
INC_OP;
JUMP_OUT;
@ -3232,7 +3213,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (*ps != *s) goto fail;
ps++; s++;
}
sprev = s - 3;
INC_OP;
JUMP_OUT;
@ -3246,7 +3226,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (*ps != *s) goto fail;
ps++; s++;
}
sprev = s - tlen;
INC_OP;
JUMP_OUT;
@ -3362,11 +3341,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE_OP(ANYCHAR_STAR)
INC_OP;
while (DATA_ENSURE_CHECK1) {
STACK_PUSH_ALT(p, s, sprev);
STACK_PUSH_ALT(p, s);
n = enclen(encode, s);
DATA_ENSURE(n);
if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
sprev = s;
s += n;
}
JUMP_OUT;
@ -3374,15 +3352,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE_OP(ANYCHAR_ML_STAR)
INC_OP;
while (DATA_ENSURE_CHECK1) {
STACK_PUSH_ALT(p, s, sprev);
STACK_PUSH_ALT(p, s);
n = enclen(encode, s);
if (n > 1) {
DATA_ENSURE(n);
sprev = s;
s += n;
}
else {
sprev = s;
s++;
}
}
@ -3396,12 +3372,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
INC_OP;
while (DATA_ENSURE_CHECK1) {
if (c == *s) {
STACK_PUSH_ALT(p, s, sprev);
STACK_PUSH_ALT(p, s);
}
n = enclen(encode, s);
DATA_ENSURE(n);
if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
sprev = s;
s += n;
}
}
@ -3415,16 +3390,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
INC_OP;
while (DATA_ENSURE_CHECK1) {
if (c == *s) {
STACK_PUSH_ALT(p, s, sprev);
STACK_PUSH_ALT(p, s);
}
n = enclen(encode, s);
if (n > 1) {
DATA_ENSURE(n);
sprev = s;
s += n;
}
else {
sprev = s;
s++;
}
}
@ -3477,7 +3450,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode))
goto fail;
}
else if (ON_STR_END(s)) {
else {
UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
if (ON_STR_END(s)) {
if (! IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
goto fail;
}
@ -3487,6 +3462,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
goto fail;
}
}
}
INC_OP;
JUMP_OUT;
@ -3499,7 +3475,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (DATA_ENSURE_CHECK1 && IS_MBC_WORD_ASCII_MODE(encode, s, end, mode))
goto fail;
}
else if (ON_STR_END(s)) {
else {
UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
if (ON_STR_END(s)) {
if (IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
goto fail;
}
@ -3509,6 +3487,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
goto fail;
}
}
}
INC_OP;
JUMP_OUT;
@ -3519,7 +3498,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
mode = p->word_boundary.mode;
if (DATA_ENSURE_CHECK1 && IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) {
if (ON_STR_BEGIN(s) || !IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) {
UChar* sprev;
if (ON_STR_BEGIN(s)) {
INC_OP;
JUMP_OUT;
}
sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
if (! IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) {
INC_OP;
JUMP_OUT;
}
@ -3532,19 +3517,23 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
ModeType mode;
mode = p->word_boundary.mode;
if (!ON_STR_BEGIN(s) && IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) {
if (! ON_STR_BEGIN(s)) {
UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
if (IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) {
if (ON_STR_END(s) || ! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) {
INC_OP;
JUMP_OUT;
}
}
}
}
goto fail;
#endif
CASE_OP(TEXT_SEGMENT_BOUNDARY)
{
int is_break;
UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
switch (p->text_segment_boundary.type) {
case EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
@ -3594,15 +3583,19 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
INC_OP;
JUMP_OUT;
}
else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) && !ON_STR_END(s)) {
else if (! ON_STR_END(s)) {
UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
INC_OP;
JUMP_OUT;
}
}
goto fail;
CASE_OP(END_LINE)
if (ON_STR_END(s)) {
#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
#endif
if (OPTON_NOTEOL(msa->options)) goto fail;
@ -3627,6 +3620,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE_OP(SEMI_END_BUF)
if (ON_STR_END(s)) {
#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
#endif
if (OPTON_NOTEOL(msa->options)) goto fail;
@ -3738,7 +3732,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
mem = p->backref_n.n1;
backref:
{
int len;
UChar *pstart, *pend;
if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
@ -3749,10 +3742,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
n = (int )(pend - pstart);
if (n != 0) {
DATA_ENSURE(n);
sprev = s;
STRING_CMP(s, pstart, n);
while (sprev + (len = enclen(encode, sprev)) < s)
sprev += len;
}
}
INC_OP;
@ -3761,7 +3751,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE_OP(BACKREF_N_IC)
mem = p->backref_n.n1;
{
int len;
UChar *pstart, *pend;
if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
@ -3772,10 +3761,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
n = (int )(pend - pstart);
if (n != 0) {
DATA_ENSURE(n);
sprev = s;
STRING_CMP_IC(case_fold_flag, pstart, &s, n);
while (sprev + (len = enclen(encode, sprev)) < s)
sprev += len;
}
}
INC_OP;
@ -3783,7 +3769,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE_OP(BACKREF_MULTI)
{
int len, is_fail;
int is_fail;
UChar *pstart, *pend, *swork;
tlen = p->backref_general.num;
@ -3798,13 +3784,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
n = (int )(pend - pstart);
if (n != 0) {
DATA_ENSURE(n);
sprev = s;
swork = s;
STRING_CMP_VALUE(swork, pstart, n, is_fail);
if (is_fail) continue;
s = swork;
while (sprev + (len = enclen(encode, sprev)) < s)
sprev += len;
}
break; /* success */
}
@ -3815,7 +3798,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE_OP(BACKREF_MULTI_IC)
{
int len, is_fail;
int is_fail;
UChar *pstart, *pend, *swork;
tlen = p->backref_general.num;
@ -3830,13 +3813,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
n = (int )(pend - pstart);
if (n != 0) {
DATA_ENSURE(n);
sprev = s;
swork = s;
STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, is_fail);
if (is_fail) continue;
s = swork;
while (sprev + (len = enclen(encode, sprev)) < s)
sprev += len;
}
break; /* success */
}
@ -3851,10 +3831,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
goto backref_with_level;
CASE_OP(BACKREF_WITH_LEVEL)
{
int len;
int level;
MemNumType* mems;
UChar* ssave;
n = 0;
backref_with_level:
@ -3862,18 +3840,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
tlen = p->backref_general.num;
mems = tlen == 1 ? &(p->backref_general.n1) : p->backref_general.ns;
ssave = s;
if (backref_match_at_nested_level(reg, stk, stk_base, n,
if (! backref_match_at_nested_level(reg, stk, stk_base, n,
case_fold_flag, level, (int )tlen, mems, &s, end)) {
if (ssave != s) {
sprev = ssave;
while (sprev + (len = enclen(encode, sprev)) < s)
sprev += len;
}
}
else
goto fail;
}
}
INC_OP;
JUMP_OUT;
#endif
@ -4005,13 +3976,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE_OP(PUSH)
addr = p->push.addr;
STACK_PUSH_ALT(p + addr, s, sprev);
STACK_PUSH_ALT(p + addr, s);
INC_OP;
JUMP_OUT;
CASE_OP(PUSH_SUPER)
addr = p->push.addr;
STACK_PUSH_SUPER_ALT(p + addr, s, sprev);
STACK_PUSH_SUPER_ALT(p + addr, s);
INC_OP;
JUMP_OUT;
@ -4033,7 +4004,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
addr = p->push_or_jump_exact1.addr;
c = p->push_or_jump_exact1.c;
if (DATA_ENSURE_CHECK1 && c == *s) {
STACK_PUSH_ALT(p + addr, s, sprev);
STACK_PUSH_ALT(p + addr, s);
INC_OP;
JUMP_OUT;
}
@ -4049,7 +4020,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
addr = p->push_if_peek_next.addr;
c = p->push_if_peek_next.c;
if (DATA_ENSURE_CHECK1 && c == *s) {
STACK_PUSH_ALT(p + addr, s, sprev);
STACK_PUSH_ALT(p + addr, s);
}
}
INC_OP;
@ -4061,7 +4032,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
STACK_PUSH_REPEAT_INC(mem, 0);
if (reg->repeat_range[mem].lower == 0) {
STACK_PUSH_ALT(p + addr, s, sprev);
STACK_PUSH_ALT(p + addr, s);
}
INC_OP;
JUMP_OUT;
@ -4072,7 +4043,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
STACK_PUSH_REPEAT_INC(mem, 0);
if (reg->repeat_range[mem].lower == 0) {
STACK_PUSH_ALT(p + 1, s, sprev);
STACK_PUSH_ALT(p + 1, s);
p += addr;
}
else
@ -4089,7 +4060,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
}
else if (n >= reg->repeat_range[mem].lower) {
INC_OP;
STACK_PUSH_ALT(p, s, sprev);
STACK_PUSH_ALT(p, s);
p = reg->repeat_range[mem].u.pcode;
}
else {
@ -4108,7 +4079,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
}
else {
if (n >= reg->repeat_range[mem].lower) {
STACK_PUSH_ALT(reg->repeat_range[mem].u.pcode, s, sprev);
STACK_PUSH_ALT(reg->repeat_range[mem].u.pcode, s);
INC_OP;
}
else {
@ -4160,7 +4131,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
for (tlen = p->move.n; tlen > 0; tlen--) {
len = enclen(encode, s);
sprev = s;
s += len;
if (s > end) goto fail;
if (s == end) {
@ -4169,7 +4139,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
}
}
}
sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
INC_OP;
JUMP_OUT;
@ -4178,10 +4147,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (tlen != 0) {
s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
if (IS_NULL(s)) goto fail;
sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
}
if (p->step_back_start.remaining != 0) {
STACK_PUSH_ALT_WITH_ZID(p + 1, s, sprev, p->step_back_start.remaining);
STACK_PUSH_ALT_WITH_ZID(p + 1, s, p->step_back_start.remaining);
p += p->step_back_start.addr;
}
else
@ -4193,9 +4161,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (tlen != INFINITE_LEN) tlen--;
s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, 1);
if (IS_NULL(s)) goto fail;
sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
if (tlen != 0) {
STACK_PUSH_ALT_WITH_ZID(p, s, sprev, (int )tlen);
STACK_PUSH_ALT_WITH_ZID(p, s, (int )tlen);
}
INC_OP;
JUMP_OUT;
@ -4205,7 +4172,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
STACK_TO_VOID_TO_MARK(stkp, mem);
if (p->cut_to_mark.restore_pos != 0) {
s = stkp->u.val.v;
sprev = stkp->u.val.v2;
}
INC_OP;
JUMP_OUT;
@ -4213,7 +4179,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE_OP(MARK)
mem = p->mark.id; /* mem: mark id */
if (p->mark.save_pos != 0)
STACK_PUSH_MARK_WITH_POS(mem, s, sprev);
STACK_PUSH_MARK_WITH_POS(mem, s);
else
STACK_PUSH_MARK(mem);
@ -4367,7 +4333,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
STACK_POP;
p = stk->u.state.pcode;
s = stk->u.state.pstr;
sprev = stk->u.state.pstr_prev;
CHECK_RETRY_LIMIT_IN_MATCH;
JUMP_OUT;
@ -4419,12 +4384,11 @@ typedef struct {
int state; /* value of enum SearchRangeStatus */
UChar* low;
UChar* high;
UChar* low_prev;
UChar* sch_range;
} SearchRange;
#define REGSET_MATCH_AND_RETURN_CHECK(upper_range) \
r = match_at(reg, str, end, (upper_range), s, prev, msas + i); \
r = match_at(reg, str, end, (upper_range), s, msas + i); \
if (r != ONIG_MISMATCH) {\
if (r >= 0) {\
goto match;\
@ -4440,7 +4404,7 @@ regset_search_body_position_lead(OnigRegSet* set,
OnigOptionType option, MatchArg* msas, int* rmatch_pos)
{
int r, n, i;
UChar *s, *prev;
UChar *s;
UChar *low, *high, *low_prev;
UChar* sch_range;
regex_t* reg;
@ -4449,12 +4413,7 @@ regset_search_body_position_lead(OnigRegSet* set,
n = set->n;
enc = set->enc;
s = (UChar* )start;
if (s > str)
prev = onigenc_get_prev_char_head(enc, str, s);
else
prev = (UChar* )NULL;
sr = (SearchRange* )xmalloc(sizeof(*sr) * n);
CHECK_NULL_RETURN_MEMERR(sr);
@ -4474,7 +4433,6 @@ regset_search_body_position_lead(OnigRegSet* set,
sr[i].state = SRS_LOW_HIGH;
sr[i].low = low;
sr[i].high = high;
sr[i].low_prev = low_prev;
sr[i].sch_range = sch_range;
}
}
@ -4491,7 +4449,6 @@ regset_search_body_position_lead(OnigRegSet* set,
sr[i].state = SRS_ALL_RANGE;
sr[i].low = s;
sr[i].high = (UChar* )range;
sr[i].low_prev = prev;
}
}
@ -4510,7 +4467,6 @@ regset_search_body_position_lead(OnigRegSet* set,
&low, &high, &low_prev) != 0) {
sr[i].low = low;
sr[i].high = high;
sr[i].low_prev = low_prev;
if (s < low) continue;
}
else {
@ -4531,16 +4487,13 @@ regset_search_body_position_lead(OnigRegSet* set,
for (i = 0; i < n; i++) {
if (sr[i].state == SRS_LOW_HIGH && low > sr[i].low) {
low = sr[i].low;
low_prev = sr[i].low_prev;
}
}
if (low == range) break;
s = low;
prev = low_prev;
}
else {
prev = s;
s += enclen(enc, s);
}
} while (1);
@ -4578,7 +4531,6 @@ regset_search_body_position_lead(OnigRegSet* set,
if (set->anychar_inf != 0)
prev_is_newline = ONIGENC_IS_MBC_NEWLINE(set->enc, s, end);
prev = s;
s += enclen(enc, s);
} while (1);
}
@ -4647,7 +4599,7 @@ onig_regset_search_with_param(OnigRegSet* set,
{
int r;
int i;
UChar *s, *prev;
UChar *s;
regex_t* reg;
OnigEncoding enc;
OnigRegion* region;
@ -4749,7 +4701,6 @@ onig_regset_search_with_param(OnigRegSet* set,
else if (str == end) { /* empty string */
start = end = str;
s = (UChar* )start;
prev = (UChar* )NULL;
msas = (MatchArg* )xmalloc(sizeof(*msas) * set->n);
CHECK_NULL_RETURN_MEMERR(msas);
@ -4764,7 +4715,7 @@ onig_regset_search_with_param(OnigRegSet* set,
/* Can't use REGSET_MATCH_AND_RETURN_CHECK()
because r must be set regex index (i)
*/
r = match_at(reg, str, end, end, s, prev, msas + i);
r = match_at(reg, str, end, end, s, msas + i);
if (r != ONIG_MISMATCH) {
if (r >= 0) {
r = i;
@ -5059,7 +5010,6 @@ onig_match_with_param(regex_t* reg, const UChar* str, const UChar* end,
OnigMatchParam* mp)
{
int r;
UChar *prev;
MatchArg msa;
#ifndef USE_POSIX_API
@ -5086,8 +5036,7 @@ onig_match_with_param(regex_t* reg, const UChar* str, const UChar* end,
}
}
prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at);
r = match_at(reg, str, end, end, at, prev, &msa);
r = match_at(reg, str, end, end, at, &msa);
#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
if (OPTON_FIND_LONGEST(option) && r == ONIG_MISMATCH) {
if (msa.best_len >= 0) {
@ -5370,7 +5319,7 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,
OnigOptionType option, OnigMatchParam* mp)
{
int r;
UChar *s, *prev;
UChar *s;
MatchArg msa;
const UChar *orig_start = start;
@ -5409,7 +5358,7 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,
#define MATCH_AND_RETURN_CHECK(upper_range) \
r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
r = match_at(reg, str, end, (upper_range), s, &msa);\
if (r != ONIG_MISMATCH) {\
if (r >= 0) {\
goto match;\
@ -5523,7 +5472,6 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,
if (reg->threshold_len == 0) {
start = end = str = address_for_empty_string;
s = (UChar* )start;
prev = (UChar* )NULL;
MATCH_ARG_INIT(msa, reg, option, region, start, mp);
MATCH_AND_RETURN_CHECK(end);
@ -5541,11 +5489,6 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,
s = (UChar* )start;
if (range > start) { /* forward search */
if (s > str)
prev = onigenc_get_prev_char_head(reg->enc, str, s);
else
prev = (UChar* )NULL;
if (reg->optimize != OPTIMIZE_NONE) {
UChar *sch_range, *low, *high, *low_prev;
@ -5572,11 +5515,9 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,
&low_prev)) goto mismatch;
if (s < low) {
s = low;
prev = low_prev;
}
while (s <= high) {
MATCH_AND_RETURN_CHECK(data_range);
prev = s;
s += enclen(reg->enc, s);
}
} while (s < range);
@ -5589,6 +5530,8 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,
if ((reg->anchor & ANCR_ANYCHAR_INF) != 0 &&
(reg->anchor & (ANCR_LOOK_BEHIND | ANCR_PREC_READ_NOT)) == 0) {
do {
UChar* prev;
MATCH_AND_RETURN_CHECK(data_range);
prev = s;
s += enclen(reg->enc, s);
@ -5605,7 +5548,6 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,
do {
MATCH_AND_RETURN_CHECK(data_range);
prev = s;
s += enclen(reg->enc, s);
} while (s < range);
@ -5651,9 +5593,8 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,
s = high;
while (s >= low) {
prev = onigenc_get_prev_char_head(reg->enc, str, s);
MATCH_AND_RETURN_CHECK(orig_start);
s = prev;
s = onigenc_get_prev_char_head(reg->enc, str, s);
}
} while (s >= range);
goto mismatch;
@ -5667,9 +5608,8 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,
}
do {
prev = onigenc_get_prev_char_head(reg->enc, str, s);
MATCH_AND_RETURN_CHECK(orig_start);
s = prev;
s = onigenc_get_prev_char_head(reg->enc, str, s);
} while (s >= range);
}
@ -6053,7 +5993,7 @@ extern int
onig_init_for_match_at(regex_t* reg)
{
return match_at(reg, (const UChar* )NULL, (const UChar* )NULL,
(const UChar* )NULL, (const UChar* )NULL, (UChar* )NULL,
(const UChar* )NULL, (const UChar* )NULL,
(MatchArg* )NULL);
}
#endif

View File

@ -291,7 +291,7 @@ bbuf_clone(BBuf** rto, BBuf* from)
CHECK_NULL_RETURN_MEMERR(to);
r = BB_INIT(to, from->alloc);
if (r != 0) {
xfree(to->p);
bbuf_free(to);
*rto = 0;
return r;
}
@ -3141,7 +3141,6 @@ make_absent_tree_for_simple_one_char_repeat(Node** node, Node* absent, Node* qua
lower = QUANT_(quant)->lower;
upper = QUANT_(quant)->upper;
onig_node_free(quant);
r = node_new_save_gimmick(&ns[0], SAVE_RIGHT_RANGE, env);
if (r != 0) goto err;
@ -3208,9 +3207,9 @@ make_absent_tree(Node** node, Node* absent, Node* expr, int is_range_cutter,
simple:
r = make_absent_tree_for_simple_one_char_repeat(node, absent, quant,
body, possessive, env);
onig_node_free(quant);
if (r != 0) {
ns[4] = NULL_NODE;
onig_node_free(quant);
onig_node_free(body);
goto err;
}
@ -3714,11 +3713,7 @@ get_next_code_point(UChar** src, UChar* end, int base, OnigEncoding enc, int in_
while (! PEND) {
PFETCH(c);
if (! IS_CODE_POINT_DIVIDE(c)) break;
}
if (IS_CODE_POINT_DIVIDE(c))
return ONIGERR_INVALID_CODE_POINT_VALUE;
if (! IS_CODE_POINT_DIVIDE(c)) {
if (c == '}') {
*src = p;
return 1; /* end of sequence */
@ -3727,8 +3722,15 @@ get_next_code_point(UChar** src, UChar* end, int base, OnigEncoding enc, int in_
*src = p;
return 2; /* range */
}
PUNFETCH;
break;
}
else {
if (PEND)
return ONIGERR_INVALID_CODE_POINT_VALUE;
}
}
r = scan_number_of_base(&p, end, 1, enc, rcode, base);
if (r != 0) return r;
@ -3879,13 +3881,17 @@ not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf)
to = data[i*2+1];
if (pre <= from - 1) {
r = add_code_range_to_buf(pbuf, pre, from - 1);
if (r != 0) return r;
if (r != 0) {
bbuf_free(*pbuf);
return r;
}
}
if (to == ~((OnigCodePoint )0)) break;
pre = to + 1;
}
if (to < ~((OnigCodePoint )0)) {
r = add_code_range_to_buf(pbuf, to + 1, ~((OnigCodePoint )0));
if (r != 0) bbuf_free(*pbuf);
}
return r;
}
@ -6736,6 +6742,7 @@ prs_cc(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)
p = psave;
for (i = 1; i < len; i++) {
r = fetch_token_cc(tok, &p, end, env, CS_COMPLETE);
if (r < 0) goto err;
}
fetched = 0;
}
@ -7099,18 +7106,18 @@ prs_callout_of_contents(Node** np, int cterm, UChar** src, UChar* end, ScanEnv*
contents = onigenc_strdup(enc, code_start, code_end);
CHECK_NULL_RETURN_MEMERR(contents);
r = node_new_callout(np, ONIG_CALLOUT_OF_CONTENTS, num, ONIG_NON_NAME_ID, env);
if (r != 0) {
xfree(contents);
return r;
}
e = onig_reg_callout_list_at(env->reg, num);
if (IS_NULL(e)) {
xfree(contents);
return ONIGERR_MEMORY;
}
r = node_new_callout(np, ONIG_CALLOUT_OF_CONTENTS, num, ONIG_NON_NAME_ID, env);
if (r != 0) {
xfree(contents);
return r;
}
e->of = ONIG_CALLOUT_OF_CONTENTS;
e->in = in;
e->name_id = ONIG_NON_NAME_ID;
@ -7472,14 +7479,14 @@ prs_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* env)
if (r != ONIG_NORMAL) goto err_clear;
}
r = node_new_callout(&node, ONIG_CALLOUT_OF_NAME, num, name_id, env);
if (r != ONIG_NORMAL) goto err_clear;
e = onig_reg_callout_list_at(env->reg, num);
if (IS_NULL(e)) {
r = ONIGERR_MEMORY; goto err_clear;
}
r = node_new_callout(&node, ONIG_CALLOUT_OF_NAME, num, name_id, env);
if (r != ONIG_NORMAL) goto err_clear;
e->of = ONIG_CALLOUT_OF_NAME;
e->in = in;
e->name_id = name_id;
@ -8546,7 +8553,12 @@ prs_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
*np = node_new_cclass();
CHECK_NULL_RETURN_MEMERR(*np);
cc = CCLASS_(*np);
add_ctype_to_cc(cc, tok->u.prop.ctype, FALSE, env);
r = add_ctype_to_cc(cc, tok->u.prop.ctype, FALSE, env);
if (r != 0) {
onig_node_free(*np);
*np = NULL_NODE;
return r;
}
if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
}
break;