+ update Oniguruma to current (2020-01-08) develoment version (6.9.4 R3)

This commit is contained in:
RaiKoHoff 2020-01-08 12:42:49 +01:00
parent c2b9aa7526
commit f591ebfe7f
17 changed files with 1265 additions and 1242 deletions

View File

@ -47,6 +47,7 @@ m4/*.m4
/test/testcu
/test/testp
/test/test_regset
/test/test_syntax
/test/kofu-utf8.txt
# sample/
@ -67,8 +68,8 @@ m4/*.m4
/sample/log*
/harnesses/utf16*.dict
/harnesses/*-libfuzzer
/harnesses/main-*
/harnesses/fuzzer-*
/harnesses/read-*
/harnesses/libfuzzer-onig
/harnesses/libfuzzer-onig-full
/harnesses/slow-unit-*

View File

@ -1,13 +1,15 @@
History
2019/MM/DD: Version 6.9.4
2019/11/29: Version 6.9.4
2019/11/22: Release Candidate 3 for Version 6.9.4
2019/11/20: fix a problem found by libFuzzer test
2019/11/14: Release Candidate 2 for Version 6.9.4
2019/11/12: fix integer overflow by nested quantifier
2019/11/11: fix #164: Integer overflow related to reg->dmax in search_in_range()
2019/11/07: fix #163: heap-buffer-overflow in gb18030_mbc_enc_len()
2019/11/06: fix #162: heap-buffer-overflow in fetch_interval_quantifier()
2019/11/11: fix CVE-2019-19012: Integer overflow related to reg->dmax in search_in_range()
2019/11/07: fix CVE-2019-19203: heap-buffer-overflow in gb18030_mbc_enc_len()
2019/11/06: fix CVE-2019-19204: heap-buffer-overflow in fetch_interval_quantifier()
2019/11/06: add HAVE_INTTYPES_H into config.h.windows.in and config.h.win{32,64}
2019/11/06: add HAVE_STDINT_H into config.h.win{32,64}
2019/11/05: Release Candidate 1 for Version 6.9.4

View File

@ -27,11 +27,20 @@ Supported character encodings:
* doc/SYNTAX.md: contributed by seanofw
Master branch
-------------
* Fixed behavior of isolated options in Perl and Java syntaxes. /...(?i).../
Version 6.9.4
-------------
* NEW API: RegSet (set of regexes)
* Fixed CVE-2019-19012 (Issue #164)
* Fixed CVE-2019-19012
* Fixed CVE-2019-19203 (Does not affect UTF-8, UTF-16 and UTF-32 encodings)
* Fixed CVE-2019-19204 (Affects only PosixBasic, Emacs and Grep syntaxes)
* Fixed CVE-2019-19246
* Fixed some problems (found by libFuzzer test)

View File

@ -1,7 +1,7 @@
# Oniguruma syntax (operator) configuration
_Documented for Oniguruma 6.9.3 (2019/08/08)_
_Documented for Oniguruma 6.9.5 (2019/12/16)_
----------
@ -910,6 +910,13 @@ If this flag is set, then intervals of a fixed size will ignore a lazy (non-gree
little as possible" is meaningless for a fixed-size interval. If this flag is clear,
then `r{n}?` will mean the same as `r{n}`, and the useless `?` will be discarded.
### 10. ONIG_SYN_ISOLATED_OPTION_CONTINUE_BRANCH (`..(?i)..`)
_Set in: Perl, Java_
If this flag is set, then an isolated option doesn't break the branch and affects until the end of the group (or end of the pattern).
If this flag is not set, then an isolated option is interpreted as the starting point of a new branch. /a(?i)b|c/ ==> /a(?i:b|c)/
### 20. ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC (add `\n` to `[^...]`)
_Set in: Grep_

View File

@ -55,6 +55,11 @@ def form3bytes(x):
x2 = (x>>16) & 0xff
return "\\x%02x\\x%02x\\x%02x" % (x2, x1, x0)
def enc_len(code, encode):
u = unichr(code)
s = u.encode(encode)
return len(s)
def check_version_info(s):
m = VERSION_REG.match(s)
if m is not None:
@ -324,6 +329,138 @@ def output_gperf_source():
with open(GPERF_FOLD_KEY_FILES[i-1], 'w') as f:
output_gperf_fold_key(f, i)
def unfolds_byte_length_check(encode):
l = UNFOLDS.items()
sl = sorted(l, key=lambda (k,e):(e.fold_len, e.index))
for unfold, e in sl:
key_len = enc_len(unfold, encode)
fold_len = sum(map(lambda c: enc_len(c, encode), e.fold))
if key_len > fold_len:
sfolds = ' '.join(map(lambda c: "0x%06x" % c, e.fold))
s = "%s byte length: %d > %d: 0x%06x => %s" % (encode, key_len, fold_len, unfold, sfolds)
print >> sys.stderr, s
def double_fold_check():
l = UNFOLDS.items()
sl = sorted(l, key=lambda (k,e):(e.fold_len, e.index))
for unfold, e in sl:
for f in e.fold:
#print >> sys.stderr, ("check 0x%06x" % f)
e2 = UNFOLDS.get(f)
if e2 is not None:
s = "double folds: 0x%06x => %s, 0x%06x => %s" % (unfold, e.fold, f, e2.fold)
print >> sys.stderr, s
def unfold_is_multi_code_folds_head_check():
l = UNFOLDS.items()
l2 = filter(lambda (k,e):e.fold_len == 2, l)
l3 = filter(lambda (k,e):e.fold_len == 3, l)
sl = sorted(l, key=lambda (k,e):(e.fold_len, e.index))
for unfold, _ in sl:
for k, e in l2:
if e.fold[0] == unfold:
s = "unfold 0x%06x is multi-code fold head in %s" % (unfold, e.fold)
print >> sys.stderr, s
for k, e in l3:
if e.fold[0] == unfold:
s = "unfold 0x%06x is multi-code fold head in %s" % (unfold, e.fold)
print >> sys.stderr, s
def make_one_folds(l):
h = {}
for unfold, e in l:
if e.fold_len != 1:
continue
fold = e.fold[0]
unfolds = h.get(fold)
if unfolds is None:
unfolds = [unfold]
h[fold] = unfolds
else:
unfolds.append(unfold)
return h
def make_foldn_heads(l, fold_len, one_folds):
h = {}
for unfold, e in l:
if e.fold_len != fold_len:
continue
unfolds = one_folds.get(e.fold[0])
h[e.fold[0]] = (e, unfolds)
return h
def fold2_expansion_num(e, one_folds):
n = len(e.unfolds)
n0 = 1
u0 = one_folds.get(e.fold[0])
if u0 is not None:
n0 += len(u0)
n1 = 1
u1 = one_folds.get(e.fold[1])
if u1 is not None:
n1 += len(u1)
n += (n0 * n1)
return n
def fold3_expansion_num(e, one_folds):
n = len(e.unfolds)
n0 = 1
u0 = one_folds.get(e.fold[0])
if u0 is not None:
n0 += len(u0)
n1 = 1
u1 = one_folds.get(e.fold[1])
if u1 is not None:
n1 += len(u1)
n2 = 1
u2 = one_folds.get(e.fold[2])
if u2 is not None:
n2 += len(u2)
n += (n0 * n1 * n2)
return n
def get_all_folds_expansion_num(x, one_folds, fold2_heads, fold3_heads):
e = UNFOLDS[x]
n = 0
if e.fold_len == 1:
n1 = len(e.unfolds) + 1 # +1: fold
fx = e.fold[0]
r = fold2_heads.get(fx)
n2 = n3 = 0
if r is not None:
e2, _ = r
n2 = fold2_expansion_num(e2, one_folds)
r = fold3_heads.get(fx)
if r is not None:
e3, _ = r
n3 = fold3_expansion_num(e3, one_folds)
n = max(n1, n2, n3)
elif e.fold_len == 2:
n = fold2_expansion_num(e, one_folds)
elif e.fold_len == 3:
n = fold3_expansion_num(e, one_folds)
else:
raise RuntimeError("Invalid fold_len %d" % (e.fold_len))
return n
def get_all_folds_expansion_max_num():
l = UNFOLDS.items()
one_folds = make_one_folds(l)
fold2_heads = make_foldn_heads(l, 2, one_folds)
fold3_heads = make_foldn_heads(l, 3, one_folds)
sl = sorted(l, key=lambda (k,e):(e.fold_len, e.index))
nmax = 0
max_unfold = None
for unfold, e in sl:
n = get_all_folds_expansion_num(unfold, one_folds, fold2_heads, fold3_heads)
if nmax < n:
nmax = n
max_unfold = unfold
return (nmax, max_unfold)
## main ##
with open(SOURCE_FILE, 'r') as f:
@ -335,3 +472,12 @@ out_comment = True
output_fold_source(sys.stdout, out_comment)
output_gperf_source()
#unfolds_byte_length_check('utf-8')
#unfolds_byte_length_check('utf-16')
double_fold_check()
unfold_is_multi_code_folds_head_check()
#max_num, max_code = get_all_folds_expansion_max_num()
#max_num -= 1 # remove self
#print >> sys.stderr, "max expansion: 0x%06x: %d" % (max_code, max_num)

View File

@ -1103,7 +1103,7 @@ static int IsAscii(int enc ARG_UNUSED, int c)
static int IsNewline(int enc ARG_UNUSED, int c)
{
if (c == 0x0a) return 1;
if (c == NEWLINE_CODE) return 1;
return 0;
}

View File

@ -531,6 +531,7 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
#define ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP (1U<<7) /* see doc/RE */
#define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME (1U<<8) /* (?<x>)(?<x>) */
#define ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY (1U<<9) /* a{n}?=(?:a{n})? */
#define ONIG_SYN_ISOLATED_OPTION_CONTINUE_BRANCH (1U<<10) /* ..(?i)...|... */
/* syntax (behavior) in char class [...] */
#define ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC (1U<<20) /* [^...] */

File diff suppressed because it is too large Load Diff

View File

@ -30,6 +30,9 @@
#include "regint.h"
#define LARGE_S 0x53
#define SMALL_S 0x73
OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT;
#define INITED_LIST_SIZE 20
@ -550,7 +553,7 @@ static int
ss_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
OnigApplyAllCaseFoldFunc f, void* arg)
{
static OnigCodePoint ss[] = { 0x73, 0x73 };
static OnigCodePoint ss[] = { SMALL_S, SMALL_S };
return (*f)((OnigCodePoint )0xdf, ss, 2, arg);
}
@ -589,35 +592,48 @@ onigenc_get_case_fold_codes_by_str_with_map(int map_size,
int ess_tsett_flag, OnigCaseFoldType flag ARG_UNUSED,
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
{
if (0x41 <= *p && *p <= 0x5a) {
int i, j, n;
static OnigUChar sa[] = { LARGE_S, SMALL_S };
if (0x41 <= *p && *p <= 0x5a) { /* A - Z */
if (*p == LARGE_S && ess_tsett_flag != 0 && end > p + 1
&& (*(p+1) == LARGE_S || *(p+1) == SMALL_S)) { /* SS */
ss_combination:
items[0].byte_len = 2;
items[0].code_len = 1;
items[0].code[0] = (OnigCodePoint )0xdf;
n = 1;
for (i = 0; i < 2; i++) {
for (j = 0; j < 2; j++) {
if (sa[i] == *p && sa[j] == *(p+1))
continue;
items[n].byte_len = 2;
items[n].code_len = 2;
items[n].code[0] = (OnigCodePoint )sa[i];
items[n].code[1] = (OnigCodePoint )sa[j];
n++;
}
}
return 4;
}
items[0].byte_len = 1;
items[0].code_len = 1;
items[0].code[0] = (OnigCodePoint )(*p + 0x20);
if (*p == 0x53 && ess_tsett_flag != 0 && end > p + 1
&& (*(p+1) == 0x53 || *(p+1) == 0x73)) {
/* SS */
items[1].byte_len = 2;
items[1].code_len = 1;
items[1].code[0] = (OnigCodePoint )0xdf;
return 2;
}
else
return 1;
return 1;
}
else if (0x61 <= *p && *p <= 0x7a) {
else if (0x61 <= *p && *p <= 0x7a) { /* a - z */
if (*p == SMALL_S && ess_tsett_flag != 0 && end > p + 1
&& (*(p+1) == SMALL_S || *(p+1) == LARGE_S)) {
goto ss_combination;
}
items[0].byte_len = 1;
items[0].code_len = 1;
items[0].code[0] = (OnigCodePoint )(*p - 0x20);
if (*p == 0x73 && ess_tsett_flag != 0 && end > p + 1
&& (*(p+1) == 0x73 || *(p+1) == 0x53)) {
/* ss */
items[1].byte_len = 2;
items[1].code_len = 1;
items[1].code[0] = (OnigCodePoint )0xdf;
return 2;
}
else
return 1;
return 1;
}
else if (*p == 0xdf && ess_tsett_flag != 0) {
items[0].byte_len = 1;
@ -677,7 +693,7 @@ extern int
onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end)
{
if (p < end) {
if (*p == 0x0a) return 1;
if (*p == NEWLINE_CODE) return 1;
}
return 0;
}
@ -906,7 +922,7 @@ onigenc_is_mbc_word_ascii(OnigEncoding enc, UChar* s, const UChar* end)
{
OnigCodePoint code = ONIGENC_MBC_TO_CODE(enc, s, end);
if (code > 127) return 0;
if (code > ASCII_LIMIT) return 0;
return ONIGENC_IS_ASCII_CODE_WORD(code);
}

View File

@ -75,6 +75,8 @@ typedef struct {
#define ONIG_CHECK_NULL_RETURN_VAL(p,val) if (ONIG_IS_NULL(p)) return (val)
#define MAX_CODE_POINT (~((OnigCodePoint )0))
#define ASCII_LIMIT 127
#define NEWLINE_CODE 0x0a
#define enclen(enc,p) ONIGENC_MBC_ENC_LEN(enc,p)

View File

@ -2,7 +2,7 @@
regexec.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2019 K.Kosako
* Copyright (c) 2002-2020 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -177,8 +177,6 @@ static OpInfoType OpInfo[] = {
{ OP_STR_MB2N, "str_mb2-n"},
{ OP_STR_MB3N, "str_mb3n"},
{ OP_STR_MBN, "str_mbn"},
{ OP_STR_1_IC, "str_1-ic"},
{ OP_STR_N_IC, "str_n-ic"},
{ OP_CCLASS, "cclass"},
{ OP_CCLASS_MB, "cclass-mb"},
{ OP_CCLASS_MIX, "cclass-mix"},
@ -254,7 +252,7 @@ static OpInfoType OpInfo[] = {
{ OP_LOOK_BEHIND, "look-behind"},
{ OP_LOOK_BEHIND_NOT_START, "look-behind-not-start"},
{ OP_LOOK_BEHIND_NOT_END, "look-behind-not-end"},
{ OP_PUSH_SAVE_VAL, "push-save-val"},
{ OP_SAVE_VAL, "save-val"},
{ OP_UPDATE_VAR, "update-var"},
#ifdef USE_CALL
{ OP_CALL, "call"},
@ -377,14 +375,6 @@ print_compiled_byte_code(FILE* f, regex_t* reg, int index,
while (n-- > 0) { fputc(*q++, f); }
}
break;
case OP_STR_1_IC:
len = enclen(enc, p->exact.s);
p_string(f, len, p->exact.s);
break;
case OP_STR_N_IC:
len = p->exact_n.n;
p_len_string(f, len, 1, p->exact_n.s);
break;
case OP_CCLASS:
case OP_CCLASS_NOT:
@ -564,12 +554,12 @@ print_compiled_byte_code(FILE* f, regex_t* reg, int index,
break;
#endif
case OP_PUSH_SAVE_VAL:
case OP_SAVE_VAL:
{
SaveType type;
type = p->push_save_val.type;
mem = p->push_save_val.id;
type = p->save_val.type;
mem = p->save_val.id;
fprintf(f, ":%d:%d", type, mem);
}
break;
@ -638,7 +628,7 @@ print_compiled_byte_code(FILE* f, regex_t* reg, int index,
break;
default:
fprintf(stderr, "print_compiled_byte_code: undefined code %d\n", opcode);
fprintf(DBGFP, "print_compiled_byte_code: undefined code %d\n", opcode);
break;
}
}
@ -1808,26 +1798,6 @@ stack_double(int is_alloca, char** arg_alloc_base,
}\
} while (0)
#define STACK_GET_SAVE_VAL_TYPE_LAST_ID_FROM(stype, sid, sval, stk_from) do { \
int level = 0;\
StackType *k = (stk_from);\
while (k > stk_base) {\
STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST_ID_FROM"); \
if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)\
&& k->u.val.id == (sid)) {\
if (level == 0) {\
(sval) = k->u.val.v;\
break;\
}\
}\
else if (k->type == STK_CALL_FRAME)\
level--;\
else if (k->type == STK_RETURN)\
level++;\
k--;\
}\
} while (0)
#define STACK_PUSH_CALLOUT_CONTENTS(anum, func) do {\
STACK_ENSURE(1);\
stk->type = STK_CALLOUT;\
@ -1849,7 +1819,7 @@ stack_double(int is_alloca, char** arg_alloc_base,
#ifdef ONIG_DEBUG
#define STACK_BASE_CHECK(p, at) \
if ((p) < stk_base) {\
fprintf(stderr, "at %s\n", at);\
fprintf(DBGFP, "at %s\n", at);\
MATCH_AT_ERROR_RETURN(ONIGERR_STACK_BUG);\
}
#else
@ -2544,7 +2514,7 @@ typedef struct {
int len, spos;\
spos = IS_NOT_NULL(s) ? (int )(s - str) : -1;\
xp = p - (offset);\
fprintf(stderr, "%7u: %7ld: %4d> \"",\
fprintf(DBGFP, "%7u: %7ld: %4d> \"",\
counter, GET_STACK_INDEX(stk), spos);\
counter++;\
bp = buf;\
@ -2560,21 +2530,23 @@ typedef struct {
xmemcpy(bp, "\"", 1); bp += 1;\
}\
*bp = 0;\
fputs((char* )buf, stderr);\
for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr);\
fputs((char* )buf, DBGFP);\
for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', DBGFP);\
if (xp == FinishCode)\
fprintf(stderr, "----: finish");\
fprintf(DBGFP, "----: finish");\
else {\
fprintf(stderr, "%4d: ", (int )(xp - reg->ops));\
print_compiled_byte_code(stderr, reg, (int )(xp - reg->ops), reg->ops, encode);\
fprintf(DBGFP, "%4d: ", (int )(xp - reg->ops));\
print_compiled_byte_code(DBGFP, reg, (int )(xp - reg->ops), reg->ops, encode); \
}\
fprintf(stderr, "\n");\
fprintf(DBGFP, "\n");\
} while(0);
#else
#define MATCH_DEBUG_OUT(offset)
#endif
#define MATCH_AT_ERROR_RETURN(err_code) best_len = err_code; goto match_at_end
#define MATCH_AT_ERROR_RETURN(err_code) do {\
best_len = err_code; goto match_at_end;\
} while(0)
/* match data(str - end) from position (sstart). */
@ -2607,8 +2579,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
&&L_STR_MB2N,
&&L_STR_MB3N,
&&L_STR_MBN,
&&L_STR_1_IC,
&&L_STR_N_IC,
&&L_CCLASS,
&&L_CCLASS_MB,
&&L_CCLASS_MIX,
@ -2684,7 +2654,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
&&L_LOOK_BEHIND,
&&L_LOOK_BEHIND_NOT_START,
&&L_LOOK_BEHIND_NOT_END,
&&L_PUSH_SAVE_VAL,
&&L_SAVE_VAL,
&&L_UPDATE_VAR,
#ifdef USE_CALL
&&L_CALL,
@ -2760,9 +2730,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
}
#ifdef ONIG_DEBUG_MATCH
fprintf(stderr, "match_at: str: %p, end: %p, start: %p, sprev: %p\n",
fprintf(DBGFP, "match_at: str: %p, end: %p, start: %p, sprev: %p\n",
str, end, sstart, sprev);
fprintf(stderr, "size: %d, start offset: %d\n",
fprintf(DBGFP, "size: %d, start offset: %d\n",
(int )(end - str), (int )(sstart - str));
#endif
@ -2781,7 +2751,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (n > best_len) {
OnigRegion* region;
#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
if (IS_FIND_LONGEST(option)) {
if (OPTON_FIND_LONGEST(option)) {
if (n > msa->best_len) {
msa->best_len = n;
msa->best_s = (UChar* )sstart;
@ -2796,7 +2766,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (keep > s) keep = s;
#ifdef USE_POSIX_API_REGION_OPTION
if (IS_POSIX_REGION(msa->options)) {
if (OPTON_POSIX_REGION(msa->options)) {
posix_regmatch_t* rmt = (posix_regmatch_t* )region;
rmt[0].rm_so = (regoff_t )(keep - str);
@ -2850,7 +2820,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
}
#endif /* USE_CAPTURE_HISTORY */
#ifdef USE_POSIX_API_REGION_OPTION
} /* else IS_POSIX_REGION() */
} /* else OPTON_POSIX_REGION() */
#endif
} /* if (region) */
} /* n > best_len */
@ -2860,12 +2830,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
#endif
SOP_OUT;
if (IS_FIND_CONDITION(option)) {
if (IS_FIND_NOT_EMPTY(option) && s == sstart) {
if (OPTON_FIND_CONDITION(option)) {
if (OPTON_FIND_NOT_EMPTY(option) && s == sstart) {
best_len = ONIG_MISMATCH;
goto fail; /* for retry */
}
if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) {
if (OPTON_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) {
goto fail; /* for retry */
}
}
@ -2881,27 +2851,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
INC_OP;
NEXT_OUT;
CASE_OP(STR_1_IC)
{
int len;
UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
DATA_ENSURE(1);
len = ONIGENC_MBC_CASE_FOLD(encode,
/* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
case_fold_flag,
&s, end, lowbuf);
DATA_ENSURE(0);
q = lowbuf;
ps = p->exact.s;
while (len-- > 0) {
if (*ps != *q) goto fail;
ps++; q++;
}
}
INC_OP;
NEXT_OUT;
CASE_OP(STR_2)
DATA_ENSURE(2);
ps = p->exact.s;
@ -2969,34 +2918,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
INC_OP;
JUMP_OUT;
CASE_OP(STR_N_IC)
{
int len;
UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
tlen = p->exact_n.n;
ps = p->exact_n.s;
endp = ps + tlen;
while (ps < endp) {
sprev = s;
DATA_ENSURE(1);
len = ONIGENC_MBC_CASE_FOLD(encode,
/* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
case_fold_flag,
&s, end, lowbuf);
DATA_ENSURE(0);
q = lowbuf;
while (len-- > 0) {
if (ps >= endp) goto fail;
if (*ps != *q) goto fail;
ps++; q++;
}
}
}
INC_OP;
JUMP_OUT;
CASE_OP(STR_MB2N1)
DATA_ENSURE(2);
ps = p->exact.s;
@ -3420,7 +3341,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE_OP(BEGIN_LINE)
if (ON_STR_BEGIN(s)) {
if (IS_NOTBOL(msa->options)) goto fail;
if (OPTON_NOTBOL(msa->options)) goto fail;
INC_OP;
JUMP_OUT;
}
@ -3435,7 +3356,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
#endif
if (IS_NOTEOL(msa->options)) goto fail;
if (OPTON_NOTEOL(msa->options)) goto fail;
INC_OP;
JUMP_OUT;
#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
@ -3459,7 +3380,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
#endif
if (IS_NOTEOL(msa->options)) goto fail;
if (OPTON_NOTEOL(msa->options)) goto fail;
INC_OP;
JUMP_OUT;
#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
@ -3746,7 +3667,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
INC_OP;
if (is_empty) {
#ifdef ONIG_DEBUG_MATCH
fprintf(stderr, "EMPTY_CHECK_END: skip id:%d, s:%p\n", (int )mem, s);
fprintf(DBGFP, "EMPTY_CHECK_END: skip id:%d, s:%p\n", (int )mem, s);
#endif
empty_check_found:
/* empty loop founded, skip next instruction */
@ -3779,7 +3700,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
INC_OP;
if (is_empty) {
#ifdef ONIG_DEBUG_MATCH
fprintf(stderr, "EMPTY_CHECK_END_MEM: skip id:%d, s:%p\n", (int)mem, s);
fprintf(DBGFP, "EMPTY_CHECK_END_MEM: skip id:%d, s:%p\n", (int)mem, s);
#endif
if (is_empty == -1) goto fail;
goto empty_check_found;
@ -3802,7 +3723,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
INC_OP;
if (is_empty) {
#ifdef ONIG_DEBUG_MATCH
fprintf(stderr, "EMPTY_CHECK_END_MEM_PUSH: skip id:%d, s:%p\n",
fprintf(DBGFP, "EMPTY_CHECK_END_MEM_PUSH: skip id:%d, s:%p\n",
(int )mem, s);
#endif
if (is_empty == -1) goto fail;
@ -4010,12 +3931,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
JUMP_OUT;
#endif
CASE_OP(PUSH_SAVE_VAL)
CASE_OP(SAVE_VAL)
{
SaveType type;
type = p->push_save_val.type;
mem = p->push_save_val.id; /* mem: save id */
type = p->save_val.type;
mem = p->save_val.id; /* mem: save id */
switch ((enum SaveType )type) {
case SAVE_KEEP:
STACK_PUSH_SAVE_VAL(mem, type, s);
@ -4167,6 +4088,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
return best_len;
}
#ifdef USE_REGSET
typedef struct {
regex_t* reg;
OnigRegion* region;
@ -4433,7 +4357,7 @@ onig_regset_search_with_param(OnigRegSet* set,
if (set->n == 0)
return ONIG_MISMATCH;
if (IS_POSIX_REGION(option))
if (OPTON_POSIX_REGION(option))
return ONIGERR_INVALID_ARGUMENT;
r = 0;
@ -4457,7 +4381,7 @@ onig_regset_search_with_param(OnigRegSet* set,
return ONIGERR_INVALID_ARGUMENT;
}
if (ONIG_IS_OPTION_ON(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING)) {
if (OPTON_CHECK_VALIDITY_OF_STRING(option)) {
if (! ONIGENC_IS_VALID_MBC_STRING(enc, str, end)) {
r = ONIGERR_INVALID_WIDE_CHAR_VALUE;
goto finish_no_msa;
@ -4567,7 +4491,7 @@ onig_regset_search_with_param(OnigRegSet* set,
for (i = 0; i < set->n; i++) {
if (IS_NOT_NULL(msas))
MATCH_ARG_FREE(msas[i]);
if (IS_FIND_NOT_EMPTY(set->rs[i].reg->options) &&
if (OPTON_FIND_NOT_EMPTY(set->rs[i].reg->options) &&
IS_NOT_NULL(set->rs[i].region)) {
onig_region_clear(set->rs[i].region);
}
@ -4586,7 +4510,7 @@ onig_regset_search_with_param(OnigRegSet* set,
for (i = 0; i < set->n; i++) {
if (IS_NOT_NULL(msas))
MATCH_ARG_FREE(msas[i]);
if (IS_FIND_NOT_EMPTY(set->rs[i].reg->options) &&
if (OPTON_FIND_NOT_EMPTY(set->rs[i].reg->options) &&
IS_NOT_NULL(set->rs[i].region)) {
onig_region_clear(set->rs[i].region);
}
@ -4625,6 +4549,9 @@ onig_regset_search(OnigRegSet* set, const UChar* str, const UChar* end,
return r;
}
#endif /* USE_REGSET */
static UChar*
slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
const UChar* text, const UChar* text_end, UChar* text_range)
@ -4656,48 +4583,6 @@ slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
return (UChar* )NULL;
}
static int
str_lower_case_match(OnigEncoding enc, int case_fold_flag,
const UChar* t, const UChar* tend,
const UChar* p, const UChar* end)
{
int lowlen;
UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
while (t < tend) {
if (p >= end) return 0;
lowlen = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, lowbuf);
q = lowbuf;
while (lowlen > 0) {
if (t >= tend) return 0;
if (*t++ != *q++) return 0;
lowlen--;
}
}
return 1;
}
static UChar*
slow_search_ic(OnigEncoding enc, int case_fold_flag,
UChar* target, UChar* target_end,
const UChar* text, const UChar* text_end, UChar* text_range)
{
UChar *s;
s = (UChar* )text;
while (s < text_range) {
if (str_lower_case_match(enc, case_fold_flag, target, target_end,
s, text_end))
return s;
s += enclen(enc, s);
}
return (UChar* )NULL;
}
static UChar*
slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end,
const UChar* text, const UChar* adjust_text,
@ -4730,33 +4615,6 @@ slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end,
return (UChar* )NULL;
}
static UChar*
slow_search_backward_ic(OnigEncoding enc, int case_fold_flag,
UChar* target, UChar* target_end,
const UChar* text, const UChar* adjust_text,
const UChar* text_end, const UChar* text_start)
{
UChar *s;
s = (UChar* )text_end;
s -= (target_end - target);
if (s > text_start)
s = (UChar* )text_start;
else
s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s);
while (s >= text) {
if (str_lower_case_match(enc, case_fold_flag,
target, target_end, s, text_end))
return s;
s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s);
}
return (UChar* )NULL;
}
static UChar*
sunday_quick_search_step_forward(regex_t* reg,
const UChar* target, const UChar* target_end,
@ -4770,8 +4628,9 @@ sunday_quick_search_step_forward(regex_t* reg,
OnigEncoding enc;
#ifdef ONIG_DEBUG_SEARCH
fprintf(stderr,
"sunday_quick_search_step_forward: text: %p, text_end: %p, text_range: %p\n", text, text_end, text_range);
fprintf(DBGFP,
"sunday_quick_search_step_forward: text: %p, text_end: %p, text_range: %p\n",
text, text_end, text_range);
#endif
enc = reg->enc;
@ -4894,7 +4753,7 @@ onig_match_with_param(regex_t* reg, const UChar* str, const UChar* end,
MATCH_ARG_INIT(msa, reg, option, region, at, mp);
if (region
#ifdef USE_POSIX_API_REGION_OPTION
&& !IS_POSIX_REGION(option)
&& !OPTON_POSIX_REGION(option)
#endif
) {
r = onig_region_resize_clear(region, reg->num_mem + 1);
@ -4903,7 +4762,7 @@ onig_match_with_param(regex_t* reg, const UChar* str, const UChar* end,
r = 0;
if (r == 0) {
if (ONIG_IS_OPTION_ON(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING)) {
if (OPTON_CHECK_VALIDITY_OF_STRING(option)) {
if (! ONIGENC_IS_VALID_MBC_STRING(reg->enc, str, end)) {
r = ONIGERR_INVALID_WIDE_CHAR_VALUE;
goto end;
@ -4926,7 +4785,7 @@ forward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* start,
UChar *p, *pprev = (UChar* )NULL;
#ifdef ONIG_DEBUG_SEARCH
fprintf(stderr, "forward_search: str: %p, end: %p, start: %p, range: %p\n",
fprintf(DBGFP, "forward_search: str: %p, end: %p, start: %p, range: %p\n",
str, end, start, range);
#endif
@ -4949,10 +4808,6 @@ forward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* start,
case OPTIMIZE_STR:
p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range);
break;
case OPTIMIZE_STR_CASE_FOLD:
p = slow_search_ic(reg->enc, reg->case_fold_flag,
reg->exact, reg->exact_end, p, end, range);
break;
case OPTIMIZE_STR_FAST:
p = sunday_quick_search(reg, reg->exact, reg->exact_end, p, end, range);
@ -5047,7 +4902,7 @@ forward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* start,
}
#ifdef ONIG_DEBUG_SEARCH
fprintf(stderr,
fprintf(DBGFP,
"forward_search success: low: %d, high: %d, dmin: %u, dmax: %u\n",
(int )(*low - str), (int )(*high - str),
reg->dist_min, reg->dist_max);
@ -5075,12 +4930,6 @@ backward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
range, adjrange, end, p);
break;
case OPTIMIZE_STR_CASE_FOLD:
p = slow_search_backward_ic(reg->enc, reg->case_fold_flag,
reg->exact, reg->exact_end,
range, adjrange, end, p);
break;
case OPTIMIZE_STR_FAST:
case OPTIMIZE_STR_FAST_STEP_FORWARD:
goto exact_method;
@ -5150,7 +4999,7 @@ backward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
}
#ifdef ONIG_DEBUG_SEARCH
fprintf(stderr, "backward_search: low: %d, high: %d\n",
fprintf(DBGFP, "backward_search: low: %d, high: %d\n",
(int )(*low - str), (int )(*high - str));
#endif
return 1; /* success */
@ -5158,7 +5007,7 @@ backward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
fail:
#ifdef ONIG_DEBUG_SEARCH
fprintf(stderr, "backward_search: fail.\n");
fprintf(DBGFP, "backward_search: fail.\n");
#endif
return 0; /* fail */
}
@ -5202,7 +5051,7 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,
const UChar *orig_start = start;
#ifdef ONIG_DEBUG_SEARCH
fprintf(stderr,
fprintf(DBGFP,
"onig_search (entry point): str: %p, end: %d, start: %d, range: %d\n",
str, (int )(end - str), (int )(start - str), (int )(range - str));
#endif
@ -5211,7 +5060,7 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,
if (region
#ifdef USE_POSIX_API_REGION_OPTION
&& !IS_POSIX_REGION(option)
&& ! OPTON_POSIX_REGION(option)
#endif
) {
r = onig_region_resize_clear(region, reg->num_mem + 1);
@ -5220,7 +5069,7 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,
if (start > end || start < str) goto mismatch_no_msa;
if (ONIG_IS_OPTION_ON(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING)) {
if (OPTON_CHECK_VALIDITY_OF_STRING(option)) {
if (! ONIGENC_IS_VALID_MBC_STRING(reg->enc, str, end)) {
r = ONIGERR_INVALID_WIDE_CHAR_VALUE;
goto finish_no_msa;
@ -5233,7 +5082,7 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,
r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
if (r != ONIG_MISMATCH) {\
if (r >= 0) {\
if (! IS_FIND_LONGEST(reg->options)) {\
if (! OPTON_FIND_LONGEST(reg->options)) {\
goto match;\
}\
}\
@ -5350,7 +5199,7 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,
static const UChar* address_for_empty_string = (UChar* )"";
#ifdef ONIG_DEBUG_SEARCH
fprintf(stderr, "onig_search: empty string.\n");
fprintf(DBGFP, "onig_search: empty string.\n");
#endif
if (reg->threshold_len == 0) {
@ -5366,7 +5215,7 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,
}
#ifdef ONIG_DEBUG_SEARCH
fprintf(stderr, "onig_search(apply anchor): end: %d, start: %d, range: %d\n",
fprintf(DBGFP, "onig_search(apply anchor): end: %d, start: %d, range: %d\n",
(int )(end - str), (int )(start - str), (int )(range - str));
#endif
@ -5509,7 +5358,7 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,
mismatch:
#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
if (IS_FIND_LONGEST(reg->options)) {
if (OPTON_FIND_LONGEST(reg->options)) {
if (msa.best_len >= 0) {
s = msa.best_s;
goto match;
@ -5523,9 +5372,9 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,
/* If result is mismatch and no FIND_NOT_EMPTY option,
then the region is not set in match_at(). */
if (IS_FIND_NOT_EMPTY(reg->options) && region
if (OPTON_FIND_NOT_EMPTY(reg->options) && region
#ifdef USE_POSIX_API_REGION_OPTION
&& !IS_POSIX_REGION(option)
&& !OPTON_POSIX_REGION(option)
#endif
) {
onig_region_clear(region);
@ -5533,7 +5382,7 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,
#ifdef ONIG_DEBUG
if (r != ONIG_MISMATCH)
fprintf(stderr, "onig_search: error %d\n", r);
fprintf(DBGFP, "onig_search: error %d\n", r);
#endif
return r;
@ -5542,7 +5391,7 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,
finish_no_msa:
#ifdef ONIG_DEBUG
if (r != ONIG_MISMATCH)
fprintf(stderr, "onig_search: error %d\n", r);
fprintf(DBGFP, "onig_search: error %d\n", r);
#endif
return r;
@ -5578,7 +5427,7 @@ onig_scan(regex_t* reg, const UChar* str, const UChar* end,
int rs;
const UChar* start;
if (ONIG_IS_OPTION_ON(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING)) {
if (OPTON_CHECK_VALIDITY_OF_STRING(option)) {
if (! ONIGENC_IS_VALID_MBC_STRING(reg->enc, str, end))
return ONIGERR_INVALID_WIDE_CHAR_VALUE;
@ -5669,6 +5518,8 @@ onig_copy_encoding(OnigEncoding to, OnigEncoding from)
*to = *from;
}
#ifdef USE_REGSET
extern int
onig_regset_new(OnigRegSet** rset, int n, regex_t* regs[])
{
@ -5759,7 +5610,7 @@ onig_regset_add(OnigRegSet* set, regex_t* reg)
{
OnigRegion* region;
if (IS_FIND_LONGEST(reg->options))
if (OPTON_FIND_LONGEST(reg->options))
return ONIGERR_INVALID_ARGUMENT;
if (set->n != 0 && reg->enc != set->enc)
@ -5805,7 +5656,7 @@ onig_regset_replace(OnigRegSet* set, int at, regex_t* reg)
set->n--;
}
else {
if (IS_FIND_LONGEST(reg->options))
if (OPTON_FIND_LONGEST(reg->options))
return ONIGERR_INVALID_ARGUMENT;
if (set->n > 1 && reg->enc != set->enc)
@ -5864,6 +5715,8 @@ onig_regset_get_region(OnigRegSet* set, int at)
return set->rs[at].region;
}
#endif /* USE_REGSET */
#ifdef USE_DIRECT_THREADED_CODE
extern int
@ -6385,6 +6238,8 @@ onig_builtin_cmp(OnigCalloutArgs* args, void* user_data ARG_UNUSED)
}
#ifndef ONIGURUMA_UNSUPPORTED_PRINT
#include <stdio.h>
static FILE* OutFp;
@ -6483,4 +6338,6 @@ onig_setup_builtin_monitors_by_ascii_encoded_name(void* fp /* FILE* */)
return ONIG_NORMAL;
}
#endif /* ONIGURUMA_UNSUPPORTED_PRINT */
#endif /* USE_CALLOUT */

View File

@ -5,7 +5,7 @@
encoding: UTF-8
**********************************************************************/
/*-
* Copyright (c) 2002-2019 K.Kosako
* Copyright (c) 2002-2020 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -45,6 +45,7 @@
defined(ONIG_DEBUG_STATISTICS)
#ifndef ONIG_DEBUG
#define ONIG_DEBUG
#define DBGFP stderr
#endif
#endif
@ -56,6 +57,7 @@
/* config */
/* spec. config */
#define USE_REGSET
#define USE_CALL
#define USE_CALLOUT
#define USE_BACKREF_WITH_LEVEL /* \k<name+n>, \k<name-n> */
@ -119,6 +121,9 @@
/* */
#define onig_st_is_member st_is_member
#ifndef ONIGURUMA_SYS_UEFI
#define xmemset memset
#define xmemcpy memcpy
#define xmemmove memmove
@ -176,6 +181,19 @@ typedef unsigned int uintptr_t;
#endif
#endif
/* strend hash */
typedef void hash_table_type;
#ifdef _WIN32
# include <windows.h>
typedef ULONG_PTR hash_data_type;
#else
typedef unsigned long hash_data_type;
#endif
#endif /* ONIGURUMA_SYS_UEFI */
#ifdef MIN
#undef MIN
#endif
@ -237,7 +255,6 @@ enum OptimizeType {
OPTIMIZE_STR, /* Slow Search */
OPTIMIZE_STR_FAST, /* Sunday quick search / BMH */
OPTIMIZE_STR_FAST_STEP_FORWARD, /* Sunday quick search / BMH */
OPTIMIZE_STR_CASE_FOLD, /* Slow Search (ignore case) */
OPTIMIZE_MAP /* char map */
};
@ -290,32 +307,20 @@ typedef unsigned int MemStatusType;
(IS_CODE_DIGIT_ASCII(enc,code) ? DIGITVAL(code) \
: (ONIGENC_IS_CODE_UPPER(enc,code) ? (code) - 'A' + 10 : (code) - 'a' + 10))
#define IS_SINGLELINE(option) ((option) & ONIG_OPTION_SINGLELINE)
#define IS_MULTILINE(option) ((option) & ONIG_OPTION_MULTILINE)
#define IS_IGNORECASE(option) ((option) & ONIG_OPTION_IGNORECASE)
#define IS_EXTEND(option) ((option) & ONIG_OPTION_EXTEND)
#define IS_FIND_LONGEST(option) ((option) & ONIG_OPTION_FIND_LONGEST)
#define IS_FIND_NOT_EMPTY(option) ((option) & ONIG_OPTION_FIND_NOT_EMPTY)
#define IS_FIND_CONDITION(option) ((option) & \
#define OPTON_FIND_LONGEST(option) ((option) & ONIG_OPTION_FIND_LONGEST)
#define OPTON_FIND_NOT_EMPTY(option) ((option) & ONIG_OPTION_FIND_NOT_EMPTY)
#define OPTON_FIND_CONDITION(option) ((option) & \
(ONIG_OPTION_FIND_LONGEST | ONIG_OPTION_FIND_NOT_EMPTY))
#define IS_NOTBOL(option) ((option) & ONIG_OPTION_NOTBOL)
#define IS_NOTEOL(option) ((option) & ONIG_OPTION_NOTEOL)
#define IS_POSIX_REGION(option) ((option) & ONIG_OPTION_POSIX_REGION)
#define IS_WORD_ASCII(option) \
((option) & (ONIG_OPTION_WORD_IS_ASCII | ONIG_OPTION_POSIX_IS_ASCII))
#define IS_DIGIT_ASCII(option) \
((option) & (ONIG_OPTION_DIGIT_IS_ASCII | ONIG_OPTION_POSIX_IS_ASCII))
#define IS_SPACE_ASCII(option) \
((option) & (ONIG_OPTION_SPACE_IS_ASCII | ONIG_OPTION_POSIX_IS_ASCII))
#define IS_POSIX_ASCII(option) ((option) & ONIG_OPTION_POSIX_IS_ASCII)
#define IS_ASCII_MODE_CTYPE_OPTION(ctype, options) \
((ctype) >= 0 && \
(((ctype) < ONIGENC_CTYPE_ASCII && IS_POSIX_ASCII(options)) ||\
((ctype) == ONIGENC_CTYPE_WORD && IS_WORD_ASCII(options)) ||\
((ctype) == ONIGENC_CTYPE_DIGIT && IS_DIGIT_ASCII(options)) ||\
((ctype) == ONIGENC_CTYPE_SPACE && IS_SPACE_ASCII(options))))
#define OPTON_NEGATE_SINGLELINE(option) ((option) & \
ONIG_OPTION_NEGATE_SINGLELINE)
#define OPTON_DONT_CAPTURE_GROUP(option) ((option) & \
ONIG_OPTION_DONT_CAPTURE_GROUP)
#define OPTON_CAPTURE_GROUP(option) ((option) & ONIG_OPTION_CAPTURE_GROUP)
#define OPTON_NOTBOL(option) ((option) & ONIG_OPTION_NOTBOL)
#define OPTON_NOTEOL(option) ((option) & ONIG_OPTION_NOTEOL)
#define OPTON_POSIX_REGION(option) ((option) & ONIG_OPTION_POSIX_REGION)
#define OPTON_CHECK_VALIDITY_OF_STRING(option) ((option) & \
ONIG_OPTION_CHECK_VALIDITY_OF_STRING)
#define DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag) \
((case_fold_flag) & ~INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR)
@ -327,17 +332,17 @@ typedef unsigned int MemStatusType;
#define BITS_PER_BYTE 8
#define SINGLE_BYTE_SIZE (1 << BITS_PER_BYTE)
#define BITS_IN_ROOM 32 /* 4 * BITS_PER_BYTE */
#define BITSET_SIZE (SINGLE_BYTE_SIZE / BITS_IN_ROOM)
#define BITSET_REAL_SIZE (SINGLE_BYTE_SIZE / BITS_IN_ROOM)
typedef uint32_t Bits;
typedef Bits BitSet[BITSET_SIZE];
typedef Bits BitSet[BITSET_REAL_SIZE];
typedef Bits* BitSetRef;
#define SIZE_BITSET sizeof(BitSet)
#define BITSET_CLEAR(bs) do {\
int i;\
for (i = 0; i < (int )BITSET_SIZE; i++) { (bs)[i] = 0; } \
for (i = 0; i < (int )BITSET_REAL_SIZE; i++) { (bs)[i] = 0; } \
} while (0)
#define BS_ROOM(bs,pos) (bs)[(unsigned int )(pos) >> 5]
@ -475,8 +480,6 @@ enum OpCode {
OP_STR_MB2N, /* mb-length = 2 */
OP_STR_MB3N, /* mb-length = 3 */
OP_STR_MBN, /* other length */
OP_STR_1_IC, /* single byte, N = 1, ignore case */
OP_STR_N_IC, /* single byte, ignore case */
OP_CCLASS,
OP_CCLASS_MB,
OP_CCLASS_MIX,
@ -552,7 +555,7 @@ enum OpCode {
OP_LOOK_BEHIND, /* (?<=...) start (no needs end opcode) */
OP_LOOK_BEHIND_NOT_START, /* (?<!...) start */
OP_LOOK_BEHIND_NOT_END, /* (?<!...) end */
OP_PUSH_SAVE_VAL,
OP_SAVE_VAL,
OP_UPDATE_VAR,
#ifdef USE_CALL
OP_CALL, /* \g<name> */
@ -650,7 +653,7 @@ typedef int ModeType;
#define OPSIZE_LOOK_BEHIND_NOT_END 1
#define OPSIZE_CALL 1
#define OPSIZE_RETURN 1
#define OPSIZE_PUSH_SAVE_VAL 1
#define OPSIZE_SAVE_VAL 1
#define OPSIZE_UPDATE_VAR 1
#ifdef USE_CALLOUT
@ -810,7 +813,7 @@ typedef struct {
struct {
SaveType type;
MemNumType id;
} push_save_val;
} save_val;
struct {
UpdateVarType type;
MemNumType id;
@ -999,16 +1002,6 @@ extern OnigCalloutFunc onig_get_callout_start_func(regex_t* reg, int callout_num
#endif /* USE_CALLOUT */
/* strend hash */
typedef void hash_table_type;
#ifdef _WIN32
# include <windows.h>
typedef ULONG_PTR hash_data_type;
#else
typedef unsigned long hash_data_type;
#endif
extern hash_table_type* onig_st_init_strend_table_with_size P_((int size));
extern int onig_st_lookup_strend P_((hash_table_type* table, const UChar* str_key, const UChar* end_key, hash_data_type *value));
extern int onig_st_insert_strend P_((hash_table_type* table, const UChar* str_key, const UChar* end_key, hash_data_type value));

View File

@ -3,7 +3,7 @@
encoding: UTF-8
**********************************************************************/
/*-
* Copyright (c) 2002-2019 K.Kosako
* Copyright (c) 2002-2020 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -46,6 +46,26 @@
#define IS_ALLOWED_CODE_IN_CALLOUT_TAG_NAME(c) \
((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_')
#define OPTON_SINGLELINE(option) ((option) & ONIG_OPTION_SINGLELINE)
#define OPTON_MULTILINE(option) ((option) & ONIG_OPTION_MULTILINE)
#define OPTON_IGNORECASE(option) ((option) & ONIG_OPTION_IGNORECASE)
#define OPTON_EXTEND(option) ((option) & ONIG_OPTION_EXTEND)
#define OPTON_WORD_ASCII(option) \
((option) & (ONIG_OPTION_WORD_IS_ASCII | ONIG_OPTION_POSIX_IS_ASCII))
#define OPTON_DIGIT_ASCII(option) \
((option) & (ONIG_OPTION_DIGIT_IS_ASCII | ONIG_OPTION_POSIX_IS_ASCII))
#define OPTON_SPACE_ASCII(option) \
((option) & (ONIG_OPTION_SPACE_IS_ASCII | ONIG_OPTION_POSIX_IS_ASCII))
#define OPTON_POSIX_ASCII(option) ((option) & ONIG_OPTION_POSIX_IS_ASCII)
#define OPTON_TEXT_SEGMENT_WORD(option) ((option) & ONIG_OPTION_TEXT_SEGMENT_WORD)
#define OPTON_IS_ASCII_MODE_CTYPE(ctype, options) \
((ctype) >= 0 && \
(((ctype) < ONIGENC_CTYPE_ASCII && OPTON_POSIX_ASCII(options)) ||\
((ctype) == ONIGENC_CTYPE_WORD && OPTON_WORD_ASCII(options)) ||\
((ctype) == ONIGENC_CTYPE_DIGIT && OPTON_DIGIT_ASCII(options)) ||\
((ctype) == ONIGENC_CTYPE_SPACE && OPTON_SPACE_ASCII(options))))
OnigSyntaxType OnigSyntaxOniguruma = {
(( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
@ -296,7 +316,7 @@ backref_rel_to_abs(int rel_no, ScanEnv* env)
#define BITSET_IS_EMPTY(bs,empty) do {\
int i;\
empty = 1;\
for (i = 0; i < (int )BITSET_SIZE; i++) {\
for (i = 0; i < (int )BITSET_REAL_SIZE; i++) {\
if ((bs)[i] != 0) {\
empty = 0; break;\
}\
@ -316,35 +336,35 @@ static void
bitset_invert(BitSetRef bs)
{
int i;
for (i = 0; i < (int )BITSET_SIZE; i++) { bs[i] = ~(bs[i]); }
for (i = 0; i < (int )BITSET_REAL_SIZE; i++) { bs[i] = ~(bs[i]); }
}
static void
bitset_invert_to(BitSetRef from, BitSetRef to)
{
int i;
for (i = 0; i < (int )BITSET_SIZE; i++) { to[i] = ~(from[i]); }
for (i = 0; i < (int )BITSET_REAL_SIZE; i++) { to[i] = ~(from[i]); }
}
static void
bitset_and(BitSetRef dest, BitSetRef bs)
{
int i;
for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] &= bs[i]; }
for (i = 0; i < (int )BITSET_REAL_SIZE; i++) { dest[i] &= bs[i]; }
}
static void
bitset_or(BitSetRef dest, BitSetRef bs)
{
int i;
for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] |= bs[i]; }
for (i = 0; i < (int )BITSET_REAL_SIZE; i++) { dest[i] |= bs[i]; }
}
static void
bitset_copy(BitSetRef dest, BitSetRef bs)
{
int i;
for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] = bs[i]; }
for (i = 0; i < (int )BITSET_REAL_SIZE; i++) { dest[i] = bs[i]; }
}
extern int
@ -776,7 +796,7 @@ onig_foreach_name(regex_t* reg,
}
static int
i_renumber_name(UChar* key ARG_UNUSED, NameEntry* e, GroupNumRemap* map)
i_renumber_name(UChar* key ARG_UNUSED, NameEntry* e, GroupNumMap* map)
{
int i;
@ -793,7 +813,7 @@ i_renumber_name(UChar* key ARG_UNUSED, NameEntry* e, GroupNumRemap* map)
}
extern int
onig_renumber_name_table(regex_t* reg, GroupNumRemap* map)
onig_renumber_name_table(regex_t* reg, GroupNumMap* map)
{
NameTable* t = (NameTable* )reg->name_table;
@ -1143,12 +1163,12 @@ onig_name_to_backref_number(regex_t* reg, const UChar* name,
extern int
onig_noname_group_capture_is_active(regex_t* reg)
{
if (ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_DONT_CAPTURE_GROUP))
if (OPTON_DONT_CAPTURE_GROUP(reg->options))
return 0;
if (onig_number_of_names(reg) > 0 &&
IS_SYNTAX_BV(reg->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
!ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
! OPTON_CAPTURE_GROUP(reg->options)) {
return 0;
}
@ -1604,12 +1624,11 @@ onig_set_callout_of_name(OnigEncoding enc, OnigCalloutType callout_type,
fe->arg_types[i] = arg_types[i];
}
for (i = arg_num - opt_arg_num, j = 0; i < arg_num; i++, j++) {
if (IS_NULL(opt_defaults)) return ONIGERR_INVALID_ARGUMENT;
if (fe->arg_types[i] == ONIG_TYPE_STRING) {
OnigValue* val;
UChar* ds;
if (IS_NULL(opt_defaults)) return ONIGERR_INVALID_ARGUMENT;
val = opt_defaults + j;
ds = onigenc_strdup(enc, val->s.start, val->s.end);
CHECK_NULL_RETURN_MEMERR(ds);
@ -2138,6 +2157,18 @@ node_new(void)
return node;
}
extern Node*
onig_node_copy(Node* from)
{
Node* copy;
copy = node_new();
CHECK_NULL_RETURN(copy);
xmemcpy(copy, from, sizeof(*copy));
return copy;
}
static void
initialize_cclass(CClassNode* cc)
@ -2167,30 +2198,20 @@ node_new_ctype(int type, int not, OnigOptionType options)
NODE_SET_TYPE(node, NODE_CTYPE);
CTYPE_(node)->ctype = type;
CTYPE_(node)->not = not;
CTYPE_(node)->options = options;
CTYPE_(node)->ascii_mode = IS_ASCII_MODE_CTYPE_OPTION(type, options);
CTYPE_(node)->ascii_mode = OPTON_IS_ASCII_MODE_CTYPE(type, options);
return node;
}
static Node*
node_new_anychar(void)
node_new_anychar(OnigOptionType options)
{
Node* node = node_new_ctype(CTYPE_ANYCHAR, FALSE, ONIG_OPTION_NONE);
return node;
}
static Node*
node_new_anychar_with_fixed_option(OnigOptionType option)
{
CtypeNode* ct;
Node* node;
node = node_new_anychar();
node = node_new_ctype(CTYPE_ANYCHAR, FALSE, options);
CHECK_NULL_RETURN(node);
ct = CTYPE_(node);
ct->options = option;
NODE_STATUS_ADD(node, FIXED_OPTION);
if (OPTON_MULTILINE(options))
NODE_STATUS_ADD(node, MULTILINE);
return node;
}
@ -2199,18 +2220,18 @@ node_new_no_newline(Node** node, ScanEnv* env)
{
Node* n;
n = node_new_anychar_with_fixed_option(ONIG_OPTION_NONE);
n = node_new_anychar(ONIG_OPTION_NONE);
CHECK_NULL_RETURN_MEMERR(n);
*node = n;
return 0;
}
static int
node_new_true_anychar(Node** node, ScanEnv* env)
node_new_true_anychar(Node** node)
{
Node* n;
n = node_new_anychar_with_fixed_option(ONIG_OPTION_MULTILINE);
n = node_new_anychar(ONIG_OPTION_MULTILINE);
CHECK_NULL_RETURN_MEMERR(n);
*node = n;
return 0;
@ -2292,16 +2313,39 @@ make_alt(int n, Node* ns[])
return make_list_or_alt(NODE_ALT, n, ns);
}
extern Node*
onig_node_new_anchor(int type, int ascii_mode)
static Node*
node_new_anchor(int type)
{
Node* node = node_new();
Node* node;
node = node_new();
CHECK_NULL_RETURN(node);
NODE_SET_TYPE(node, NODE_ANCHOR);
ANCHOR_(node)->type = type;
ANCHOR_(node)->char_len = -1;
ANCHOR_(node)->char_len = INFINITE_LEN;
ANCHOR_(node)->ascii_mode = 0;
return node;
}
static Node*
node_new_anchor_with_options(int type, OnigOptionType options)
{
int ascii_mode;
Node* node;
node = node_new_anchor(type);
CHECK_NULL_RETURN(node);
ascii_mode = OPTON_WORD_ASCII(options) && IS_WORD_ANCHOR_TYPE(type) ? 1 : 0;
ANCHOR_(node)->ascii_mode = ascii_mode;
if (type == ANCR_TEXT_SEGMENT_BOUNDARY ||
type == ANCR_NO_TEXT_SEGMENT_BOUNDARY) {
if (OPTON_TEXT_SEGMENT_WORD(options))
NODE_STATUS_ADD(node, TEXT_SEGMENT_WORD);
}
return node;
}
@ -2313,8 +2357,9 @@ node_new_backref(int back_num, int* backrefs, int by_name,
ScanEnv* env)
{
int i;
Node* node = node_new();
Node* node;
node = node_new();
CHECK_NULL_RETURN(node);
NODE_SET_TYPE(node, NODE_BACKREF);
@ -2323,6 +2368,9 @@ node_new_backref(int back_num, int* backrefs, int by_name,
if (by_name != 0)
NODE_STATUS_ADD(node, BY_NAME);
if (OPTON_IGNORECASE(env->options))
NODE_STATUS_ADD(node, IGNORECASE);
#ifdef USE_BACKREF_WITH_LEVEL
if (exist_level != 0) {
NODE_STATUS_ADD(node, NEST_LEVEL);
@ -2693,10 +2741,10 @@ make_text_segment(Node** node, ScanEnv* env)
ns[1] = NULL_NODE;
r = ONIGERR_MEMORY;
ns[0] = onig_node_new_anchor(ANCR_NO_TEXT_SEGMENT_BOUNDARY, FALSE);
ns[0] = node_new_anchor_with_options(ANCR_NO_TEXT_SEGMENT_BOUNDARY, env->options);
if (IS_NULL(ns[0])) goto err;
r = node_new_true_anychar(&ns[1], env);
r = node_new_true_anychar(&ns[1]);
if (r != 0) goto err1;
x = make_list(2, ns);
@ -2711,7 +2759,7 @@ make_text_segment(Node** node, ScanEnv* env)
ns[0] = NULL_NODE;
ns[1] = x;
r = node_new_true_anychar(&ns[0], env);
r = node_new_true_anychar(&ns[0]);
if (r != 0) goto err1;
x = make_list(2, ns);
@ -3060,7 +3108,7 @@ make_absent_tree(Node** node, Node* absent, Node* expr, int is_range_cutter,
quant = node_new_quantifier(0, INFINITE_REPEAT, FALSE);
if (IS_NULL(quant)) goto err0;
r = node_new_true_anychar(&body, env);
r = node_new_true_anychar(&body);
if (r != 0) {
onig_node_free(quant);
goto err;
@ -3095,7 +3143,7 @@ make_absent_tree(Node** node, Node* absent, Node* expr, int is_range_cutter,
id2 = GIMMICK_(ns[1])->id;
r = node_new_true_anychar(&ns[3], env);
r = node_new_true_anychar(&ns[3]);
if (r != 0) goto err;
possessive = 1;
@ -3195,7 +3243,6 @@ onig_node_str_clear(Node* node)
STR_(node)->s = STR_(node)->buf;
STR_(node)->end = STR_(node)->buf;
STR_(node)->capacity = 0;
STR_(node)->case_min_len = 0;
}
static Node*
@ -3209,7 +3256,6 @@ node_new_str(const UChar* s, const UChar* end)
STR_(node)->s = STR_(node)->buf;
STR_(node)->end = STR_(node)->buf;
STR_(node)->capacity = 0;
STR_(node)->case_min_len = 0;
if (onig_node_str_cat(node, s, end)) {
onig_node_free(node);
@ -3225,9 +3271,22 @@ onig_node_new_str(const UChar* s, const UChar* end)
}
static Node*
node_new_str_crude(UChar* s, UChar* end)
node_new_str_with_options(const UChar* s, const UChar* end,
OnigOptionType options)
{
Node* node = node_new_str(s, end);
Node* node;
node = node_new_str(s, end);
if (OPTON_IGNORECASE(options))
NODE_STATUS_ADD(node, IGNORECASE);
return node;
}
static Node*
node_new_str_crude(UChar* s, UChar* end, OnigOptionType options)
{
Node* node = node_new_str_with_options(s, end, options);
CHECK_NULL_RETURN(node);
NODE_STRING_SET_CRUDE(node);
return node;
@ -3240,14 +3299,14 @@ node_new_empty(void)
}
static Node*
node_new_str_crude_char(UChar c)
node_new_str_crude_char(UChar c, OnigOptionType options)
{
int i;
UChar p[1];
Node* node;
p[0] = c;
node = node_new_str_crude(p, p + 1);
node = node_new_str_crude(p, p + 1, options);
/* clear buf tail */
for (i = 1; i < NODE_STRING_BUF_SIZE; i++)
@ -3270,12 +3329,13 @@ str_node_split_last_char(Node* node, OnigEncoding enc)
if (p && p > sn->s) { /* can be split. */
rn = node_new_str(p, sn->end);
CHECK_NULL_RETURN(rn);
if (NODE_STRING_IS_CRUDE(node))
NODE_STRING_SET_CRUDE(rn);
sn->end = (UChar* )p;
STR_(rn)->flag = sn->flag;
NODE_STATUS(rn) = NODE_STATUS(node);
}
}
return rn;
}
@ -4001,10 +4061,10 @@ node_new_general_newline(Node** node, ScanEnv* env)
dlen = ONIGENC_CODE_TO_MBC(env->enc, 0x0d, buf);
if (dlen < 0) return dlen;
alen = ONIGENC_CODE_TO_MBC(env->enc, 0x0a, buf + dlen);
alen = ONIGENC_CODE_TO_MBC(env->enc, NEWLINE_CODE, buf + dlen);
if (alen < 0) return alen;
crnl = node_new_str_crude(buf, buf + dlen + alen);
crnl = node_new_str_crude(buf, buf + dlen + alen, ONIG_OPTION_NONE);
CHECK_NULL_RETURN_MEMERR(crnl);
ncc = node_new_cclass();
@ -4012,10 +4072,10 @@ node_new_general_newline(Node** node, ScanEnv* env)
cc = CCLASS_(ncc);
if (dlen == 1) {
bitset_set_range(cc->bs, 0x0a, 0x0d);
bitset_set_range(cc->bs, NEWLINE_CODE, 0x0d);
}
else {
r = add_code_range(&(cc->mbuf), env, 0x0a, 0x0d);
r = add_code_range(&(cc->mbuf), env, NEWLINE_CODE, 0x0d);
if (r != 0) {
err1:
onig_node_free(ncc);
@ -5485,7 +5545,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
if (c == MC_ANYCHAR(syn))
goto any_char;
else if (c == MC_ANYTIME(syn))
goto anytime;
goto any_time;
else if (c == MC_ZERO_OR_ONE_TIME(syn))
goto zero_or_one_time;
else if (c == MC_ONE_OR_MORE_TIME(syn))
@ -5509,7 +5569,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
case '*':
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ASTERISK_ZERO_INF)) break;
#ifdef USE_VARIABLE_META_CHARS
anytime:
any_time:
#endif
tok->type = TK_REPEAT;
tok->u.repeat.lower = 0;
@ -5665,14 +5725,14 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
case '^':
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;
tok->type = TK_ANCHOR;
tok->u.subtype = (IS_SINGLELINE(env->options)
tok->u.subtype = (OPTON_SINGLELINE(env->options)
? ANCR_BEGIN_BUF : ANCR_BEGIN_LINE);
break;
case '$':
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;
tok->type = TK_ANCHOR;
tok->u.subtype = (IS_SINGLELINE(env->options)
tok->u.subtype = (OPTON_SINGLELINE(env->options)
? ANCR_SEMI_END_BUF : ANCR_END_LINE);
break;
@ -5687,7 +5747,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
break;
case '#':
if (IS_EXTEND(env->options)) {
if (OPTON_EXTEND(env->options)) {
while (!PEND) {
PFETCH(c);
if (ONIGENC_IS_CODE_NEWLINE(enc, c))
@ -5699,7 +5759,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
break;
case ' ': case '\t': case '\n': case '\r': case '\f':
if (IS_EXTEND(env->options))
if (OPTON_EXTEND(env->options))
goto start;
break;
@ -5885,8 +5945,6 @@ add_ctype_to_cc_by_range_limit(CClassNode* cc, int ctype ARG_UNUSED, int not,
static int
add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)
{
#define ASCII_LIMIT 127
int c, r;
int ascii_mode;
int is_single;
@ -5895,7 +5953,7 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)
OnigCodePoint sb_out;
OnigEncoding enc = env->enc;
ascii_mode = IS_ASCII_MODE_CTYPE_OPTION(ctype, env->options);
ascii_mode = OPTON_IS_ASCII_MODE_CTYPE(ctype, env->options);
r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges);
if (r == 0) {
@ -6579,8 +6637,6 @@ parse_cc(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)
BITSET_IS_EMPTY(cc->bs, is_empty);
if (is_empty == 0) {
#define NEWLINE_CODE 0x0a
if (ONIGENC_IS_CODE_NEWLINE(env->enc, NEWLINE_CODE)) {
if (ONIGENC_CODE_TO_MBCLEN(env->enc, NEWLINE_CODE) == 1)
BITSET_SET_BIT(cc->bs, NEWLINE_CODE);
@ -7096,10 +7152,10 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
break;
case '=':
*np = onig_node_new_anchor(ANCR_PREC_READ, FALSE);
*np = node_new_anchor(ANCR_PREC_READ);
break;
case '!': /* preceding read */
*np = onig_node_new_anchor(ANCR_PREC_READ_NOT, FALSE);
*np = node_new_anchor(ANCR_PREC_READ_NOT);
break;
case '>': /* (?>...) stop backtrack */
*np = node_new_bag(BAG_STOP_BACKTRACK);
@ -7117,9 +7173,9 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
PFETCH(c);
if (c == '=')
*np = onig_node_new_anchor(ANCR_LOOK_BEHIND, FALSE);
*np = node_new_anchor(ANCR_LOOK_BEHIND);
else if (c == '!')
*np = onig_node_new_anchor(ANCR_LOOK_BEHIND_NOT, FALSE);
*np = node_new_anchor(ANCR_LOOK_BEHIND_NOT);
else {
if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
UChar *name;
@ -7132,7 +7188,9 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
named_group1:
list_capture = 0;
#ifdef USE_CAPTURE_HISTORY
named_group2:
#endif
name = p;
r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &num,
&num_type, FALSE);
@ -7613,7 +7671,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
}
#endif
else {
if (ONIG_IS_OPTION_ON(env->options, ONIG_OPTION_DONT_CAPTURE_GROUP))
if (OPTON_DONT_CAPTURE_GROUP(env->options))
goto group;
*np = node_new_memory(0);
@ -7884,7 +7942,7 @@ i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[], int to_len, void* arg)
else {
len = ONIGENC_CODE_TO_MBC(env->enc, to[i], buf);
if (n == 0 || NODE_TYPE(ns[n-1]) != NODE_STRING) {
csnode = onig_node_new_str(buf, buf + len);
csnode = node_new_str(buf, buf + len);
if (IS_NULL(csnode)) goto err_free_ns;
NODE_STRING_SET_CASE_EXPANDED(csnode);
@ -7923,6 +7981,7 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
Node** tp;
unsigned int parse_depth;
retry:
group = 0;
*np = NULL;
if (tok->type == (enum TokenSyms )term)
@ -7956,19 +8015,28 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
}
}
else if (r == 2) { /* option only */
Node* target;
OnigOptionType prev = env->options;
env->options = BAG_(*np)->o.options;
r = fetch_token(tok, src, end, env);
if (r < 0) return r;
r = parse_alts(&target, tok, term, src, end, env, FALSE);
env->options = prev;
if (r < 0) {
onig_node_free(target);
return r;
if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ISOLATED_OPTION_CONTINUE_BRANCH)) {
env->options = BAG_(*np)->o.options;
r = fetch_token(tok, src, end, env);
if (r < 0) return r;
onig_node_free(*np);
goto retry;
}
else {
Node* target;
OnigOptionType prev = env->options;
env->options = BAG_(*np)->o.options;
r = fetch_token(tok, src, end, env);
if (r < 0) return r;
r = parse_alts(&target, tok, term, src, end, env, FALSE);
env->options = prev;
if (r < 0) {
onig_node_free(target);
return r;
}
NODE_BODY(*np) = target;
}
NODE_BODY(*np) = target;
return tok->type;
}
break;
@ -7984,7 +8052,7 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
case TK_STRING:
tk_byte:
{
*np = node_new_str(tok->backp, *src);
*np = node_new_str_with_options(tok->backp, *src, env->options);
CHECK_NULL_RETURN_MEMERR(*np);
while (1) {
@ -8005,7 +8073,7 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
case TK_CRUDE_BYTE:
tk_crude_byte:
{
*np = node_new_str_crude_char(tok->u.byte);
*np = node_new_str_crude_char(tok->u.byte, env->options);
CHECK_NULL_RETURN_MEMERR(*np);
len = 1;
while (1) {
@ -8042,9 +8110,9 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
len = ONIGENC_CODE_TO_MBC(env->enc, tok->u.code, buf);
if (len < 0) return len;
#ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG
*np = node_new_str_crude(buf, buf + len);
*np = node_new_str_crude(buf, buf + len, env->options);
#else
*np = node_new_str(buf, buf + len);
*np = node_new_str_with_options(buf, buf + len, env->options);
#endif
CHECK_NULL_RETURN_MEMERR(*np);
}
@ -8062,7 +8130,7 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
if (IS_NULL(qend)) {
nextp = qend = end;
}
*np = node_new_str(qstart, qend);
*np = node_new_str_with_options(qstart, qend, env->options);
CHECK_NULL_RETURN_MEMERR(*np);
*src = nextp;
}
@ -8110,7 +8178,7 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
if (r != 0) return r;
cc = CCLASS_(*np);
if (IS_IGNORECASE(env->options)) {
if (OPTON_IGNORECASE(env->options)) {
IApplyCaseFoldArg iarg;
iarg.env = env;
@ -8137,12 +8205,12 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
break;
case TK_ANYCHAR:
*np = node_new_anychar();
*np = node_new_anychar(env->options);
CHECK_NULL_RETURN_MEMERR(*np);
break;
case TK_ANYCHAR_ANYTIME:
*np = node_new_anychar();
*np = node_new_anychar(env->options);
CHECK_NULL_RETURN_MEMERR(*np);
qn = node_new_quantifier(0, INFINITE_REPEAT, FALSE);
CHECK_NULL_RETURN_MEMERR(qn);
@ -8180,12 +8248,8 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
#endif
case TK_ANCHOR:
{
int ascii_mode =
IS_WORD_ASCII(env->options) && IS_WORD_ANCHOR_TYPE(tok->u.anchor) ? 1 : 0;
*np = onig_node_new_anchor(tok->u.anchor, ascii_mode);
CHECK_NULL_RETURN_MEMERR(*np);
}
*np = node_new_anchor_with_options(tok->u.anchor, env->options);
CHECK_NULL_RETURN_MEMERR(*np);
break;
case TK_REPEAT:
@ -8219,7 +8283,7 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
break;
case TK_TRUE_ANYCHAR:
r = node_new_true_anychar(np, env);
r = node_new_true_anychar(np);
if (r < 0) return r;
break;
@ -8365,9 +8429,11 @@ parse_alts(Node** top, PToken* tok, int term, UChar** src, UChar* end,
{
int r;
Node *node, **headp;
OnigOptionType save_options;
*top = NULL;
INC_PARSE_DEPTH(env->parse_depth);
save_options = env->options;
r = parse_branch(&node, tok, term, src, end, env, group_head);
if (r < 0) {
@ -8416,6 +8482,7 @@ parse_alts(Node** top, PToken* tok, int term, UChar** src, UChar* end,
return ONIGERR_PARSER_BUG;
}
env->options = save_options;
DEC_PARSE_DEPTH(env->parse_depth);
return r;
}

View File

@ -33,7 +33,7 @@
#include "regint.h"
#define NODE_STRING_MARGIN 16
#define NODE_STRING_BUF_SIZE 20 /* sizeof(CClassNode) - sizeof(int)*4 */
#define NODE_STRING_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */
#define NODE_BACKREFS_SIZE 6
/* node type */
@ -68,10 +68,10 @@ enum GimmickType {
};
enum BodyEmptyType {
BODY_IS_NOT_EMPTY = 0,
BODY_IS_EMPTY_POSSIBILITY = 1,
BODY_IS_EMPTY_POSSIBILITY_MEM = 2,
BODY_IS_EMPTY_POSSIBILITY_REC = 3
BODY_IS_NOT_EMPTY = 0,
BODY_MAY_BE_EMPTY = 1,
BODY_MAY_BE_EMPTY_MEM = 2,
BODY_MAY_BE_EMPTY_REC = 3
};
struct _Node;
@ -86,7 +86,6 @@ typedef struct {
unsigned int flag;
UChar buf[NODE_STRING_BUF_SIZE];
int capacity; /* (allocated size - 1) or 0: use buf[] */
int case_min_len;
} StrNode;
typedef struct {
@ -140,7 +139,8 @@ typedef struct {
/* for multiple call reference */
OnigLen min_len; /* min length (byte) */
OnigLen max_len; /* max length (byte) */
int char_len; /* character length */
OnigLen min_char_len;
OnigLen max_char_len;
int opt_count; /* referenced count in optimize_nodes() */
} BagNode;
@ -190,7 +190,7 @@ typedef struct {
struct _Node* body;
int type;
int char_len;
OnigLen char_len;
int ascii_mode;
} AnchorNode;
@ -210,7 +210,6 @@ typedef struct {
int ctype;
int not;
OnigOptionType options;
int ascii_mode;
} CtypeNode;
@ -288,42 +287,35 @@ typedef struct _Node {
#define NODE_IS_ANYCHAR(node) \
(NODE_TYPE(node) == NODE_CTYPE && CTYPE_(node)->ctype == CTYPE_ANYCHAR)
#define CTYPE_OPTION(node, reg) \
(NODE_IS_FIXED_OPTION(node) ? CTYPE_(node)->options : reg->options)
#define ANCR_ANYCHAR_INF_MASK (ANCR_ANYCHAR_INF | ANCR_ANYCHAR_INF_ML)
#define ANCR_END_BUF_MASK (ANCR_END_BUF | ANCR_SEMI_END_BUF)
#define NODE_STRING_CRUDE (1<<0)
#define NODE_STRING_CASE_EXPANDED (1<<1)
#define NODE_STRING_CASE_FOLD_MATCH (1<<2)
#define NODE_STRING_LEN(node) (int )((node)->u.str.end - (node)->u.str.s)
#define NODE_STRING_SET_CRUDE(node) (node)->u.str.flag |= NODE_STRING_CRUDE
#define NODE_STRING_CLEAR_CRUDE(node) (node)->u.str.flag &= ~NODE_STRING_CRUDE
#define NODE_STRING_SET_CASE_EXPANDED(node) (node)->u.str.flag |= NODE_STRING_CASE_EXPANDED
#define NODE_STRING_SET_CASE_FOLD_MATCH(node) (node)->u.str.flag |= NODE_STRING_CASE_FOLD_MATCH
#define NODE_STRING_IS_CRUDE(node) \
(((node)->u.str.flag & NODE_STRING_CRUDE) != 0)
#define NODE_STRING_IS_CASE_EXPANDED(node) \
(((node)->u.str.flag & NODE_STRING_CASE_EXPANDED) != 0)
#define NODE_STRING_IS_CASE_FOLD_MATCH(node) \
(((node)->u.str.flag & NODE_STRING_CASE_FOLD_MATCH) != 0)
#define BACKREFS_P(br) \
(IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static)
/* node status bits */
#define NODE_ST_MIN_FIXED (1<<0)
#define NODE_ST_MAX_FIXED (1<<1)
#define NODE_ST_CLEN_FIXED (1<<2)
#define NODE_ST_FIXED_MIN (1<<0)
#define NODE_ST_FIXED_MAX (1<<1)
#define NODE_ST_FIXED_CLEN (1<<2)
#define NODE_ST_MARK1 (1<<3)
#define NODE_ST_MARK2 (1<<4)
#define NODE_ST_STRICT_REAL_REPEAT (1<<5)
#define NODE_ST_RECURSION (1<<6)
#define NODE_ST_CALLED (1<<7)
#define NODE_ST_ADDR_FIXED (1<<8)
#define NODE_ST_FIXED_ADDR (1<<8)
#define NODE_ST_NAMED_GROUP (1<<9)
#define NODE_ST_IN_REAL_REPEAT (1<<10) /* STK_REPEAT is nested in stack. */
#define NODE_ST_IN_ZERO_REPEAT (1<<11) /* (....){0} */
@ -333,10 +325,12 @@ typedef struct _Node {
#define NODE_ST_BY_NAME (1<<15) /* backref by name */
#define NODE_ST_BACKREF (1<<16)
#define NODE_ST_CHECKER (1<<17)
#define NODE_ST_FIXED_OPTION (1<<18)
#define NODE_ST_PROHIBIT_RECURSION (1<<19)
#define NODE_ST_SUPER (1<<20)
#define NODE_ST_EMPTY_STATUS_CHECK (1<<21)
#define NODE_ST_PROHIBIT_RECURSION (1<<18)
#define NODE_ST_SUPER (1<<19)
#define NODE_ST_EMPTY_STATUS_CHECK (1<<20)
#define NODE_ST_IGNORECASE (1<<21)
#define NODE_ST_MULTILINE (1<<22)
#define NODE_ST_TEXT_SEGMENT_WORD (1<<23)
#define NODE_STATUS(node) (((Node* )node)->u.base.status)
@ -350,17 +344,16 @@ typedef struct _Node {
#define NODE_IS_RECURSION(node) ((NODE_STATUS(node) & NODE_ST_RECURSION) != 0)
#define NODE_IS_IN_ZERO_REPEAT(node) ((NODE_STATUS(node) & NODE_ST_IN_ZERO_REPEAT) != 0)
#define NODE_IS_NAMED_GROUP(node) ((NODE_STATUS(node) & NODE_ST_NAMED_GROUP) != 0)
#define NODE_IS_ADDR_FIXED(node) ((NODE_STATUS(node) & NODE_ST_ADDR_FIXED) != 0)
#define NODE_IS_CLEN_FIXED(node) ((NODE_STATUS(node) & NODE_ST_CLEN_FIXED) != 0)
#define NODE_IS_MIN_FIXED(node) ((NODE_STATUS(node) & NODE_ST_MIN_FIXED) != 0)
#define NODE_IS_MAX_FIXED(node) ((NODE_STATUS(node) & NODE_ST_MAX_FIXED) != 0)
#define NODE_IS_FIXED_ADDR(node) ((NODE_STATUS(node) & NODE_ST_FIXED_ADDR) != 0)
#define NODE_IS_FIXED_CLEN(node) ((NODE_STATUS(node) & NODE_ST_FIXED_CLEN) != 0)
#define NODE_IS_FIXED_MIN(node) ((NODE_STATUS(node) & NODE_ST_FIXED_MIN) != 0)
#define NODE_IS_FIXED_MAX(node) ((NODE_STATUS(node) & NODE_ST_FIXED_MAX) != 0)
#define NODE_IS_MARK1(node) ((NODE_STATUS(node) & NODE_ST_MARK1) != 0)
#define NODE_IS_MARK2(node) ((NODE_STATUS(node) & NODE_ST_MARK2) != 0)
#define NODE_IS_NEST_LEVEL(node) ((NODE_STATUS(node) & NODE_ST_NEST_LEVEL) != 0)
#define NODE_IS_BY_NAME(node) ((NODE_STATUS(node) & NODE_ST_BY_NAME) != 0)
#define NODE_IS_BACKREF(node) ((NODE_STATUS(node) & NODE_ST_BACKREF) != 0)
#define NODE_IS_CHECKER(node) ((NODE_STATUS(node) & NODE_ST_CHECKER) != 0)
#define NODE_IS_FIXED_OPTION(node) ((NODE_STATUS(node) & NODE_ST_FIXED_OPTION) != 0)
#define NODE_IS_SUPER(node) ((NODE_STATUS(node) & NODE_ST_SUPER) != 0)
#define NODE_IS_PROHIBIT_RECURSION(node) \
((NODE_STATUS(node) & NODE_ST_PROHIBIT_RECURSION) != 0)
@ -368,6 +361,9 @@ typedef struct _Node {
((NODE_STATUS(node) & NODE_ST_STRICT_REAL_REPEAT) != 0)
#define NODE_IS_EMPTY_STATUS_CHECK(node) \
((NODE_STATUS(node) & NODE_ST_EMPTY_STATUS_CHECK) != 0)
#define NODE_IS_IGNORECASE(node) ((NODE_STATUS(node) & NODE_ST_IGNORECASE) != 0)
#define NODE_IS_MULTILINE(node) ((NODE_STATUS(node) & NODE_ST_MULTILINE) != 0)
#define NODE_IS_TEXT_SEGMENT_WORD(node) ((NODE_STATUS(node) & NODE_ST_TEXT_SEGMENT_WORD) != 0)
#define NODE_PARENT(node) ((node)->u.base.parent)
#define NODE_BODY(node) ((node)->u.base.body)
@ -431,19 +427,19 @@ typedef struct {
typedef struct {
int new_val;
} GroupNumRemap;
} GroupNumMap;
extern int onig_renumber_name_table P_((regex_t* reg, GroupNumRemap* map));
extern int onig_renumber_name_table P_((regex_t* reg, GroupNumMap* map));
extern int onig_strncmp P_((const UChar* s1, const UChar* s2, int n));
extern void onig_strcpy P_((UChar* dest, const UChar* src, const UChar* end));
extern void onig_scan_env_set_error_string P_((ScanEnv* env, int ecode, UChar* arg, UChar* arg_end));
extern int onig_reduce_nested_quantifier P_((Node* pnode));
extern Node* onig_node_copy(Node* from);
extern int onig_node_str_cat P_((Node* node, const UChar* s, const UChar* end));
extern int onig_node_str_set P_((Node* node, const UChar* s, const UChar* end));
extern void onig_node_free P_((Node* node));
extern Node* onig_node_new_bag P_((enum BagType type));
extern Node* onig_node_new_anchor P_((int type, int ascii_mode));
extern Node* onig_node_new_str P_((const UChar* s, const UChar* end));
extern Node* onig_node_new_list P_((Node* left, Node* right));
extern Node* onig_node_new_alt P_((Node* left, Node* right));

View File

@ -153,7 +153,8 @@ OnigSyntaxType OnigSyntaxJava = {
ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL | ONIG_SYN_OP2_CCLASS_SET_OP |
ONIG_SYN_OP2_ESC_V_VTAB | ONIG_SYN_OP2_ESC_U_HEX4 |
ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY )
, ( SYN_GNU_REGEX_BV | ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND )
, ( SYN_GNU_REGEX_BV | ONIG_SYN_ISOLATED_OPTION_CONTINUE_BRANCH |
ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND )
, ONIG_OPTION_SINGLELINE
,
{
@ -186,7 +187,7 @@ OnigSyntaxType OnigSyntaxPerl = {
ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP |
ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE |
ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT )
, SYN_GNU_REGEX_BV
, SYN_GNU_REGEX_BV | ONIG_SYN_ISOLATED_OPTION_CONTINUE_BRANCH
, ONIG_OPTION_SINGLELINE
,
{
@ -224,7 +225,7 @@ OnigSyntaxType OnigSyntaxPerl_NG = {
ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE |
ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT |
ONIG_SYN_OP2_QMARK_PERL_SUBEXP_CALL )
, ( SYN_GNU_REGEX_BV |
, ( SYN_GNU_REGEX_BV | ONIG_SYN_ISOLATED_OPTION_CONTINUE_BRANCH |
ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |
ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME )
, ONIG_OPTION_SINGLELINE

View File

@ -6,12 +6,17 @@
#define ST_INCLUDED
#ifndef ONIGURUMA_SYS_UEFI
#ifdef _WIN32
# include <windows.h>
typedef ULONG_PTR st_data_t;
#else
typedef unsigned long st_data_t;
#endif
#endif /* ONIGURUMA_SYS_UEFI */
#define ST_DATA_T_DEFINED
typedef struct st_table st_table;

View File

@ -279,9 +279,12 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end,
OnigCaseFoldCodeItem items[])
{
int n, m, i, j, k, len;
OnigCodePoint code, codes[3];
const struct ByUnfoldKey* buk;
int n, m, i, j, k, len, lens[3];
int index;
int fn, ncs[3];
OnigCodePoint cs[3][4];
OnigCodePoint code, codes[3], orig_codes[3];
const struct ByUnfoldKey* buk1;
n = 0;
@ -317,38 +320,161 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
}
#endif
buk = onigenc_unicode_unfold_key(code);
if (buk != 0) {
if (buk->fold_len == 1) {
orig_codes[0] = code;
lens[0] = len;
p += len;
buk1 = onigenc_unicode_unfold_key(orig_codes[0]);
if (buk1 != 0 && buk1->fold_len == 1) {
codes[0] = *FOLDS1_FOLD(buk1->index);
}
else
codes[0] = orig_codes[0];
if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) == 0)
goto fold1;
if (p < end) {
const struct ByUnfoldKey* buk;
code = ONIGENC_MBC_TO_CODE(enc, p, end);
orig_codes[1] = code;
len = enclen(enc, p);
lens[1] = lens[0] + len;
buk = onigenc_unicode_unfold_key(orig_codes[1]);
if (buk != 0 && buk->fold_len == 1) {
codes[1] = *FOLDS1_FOLD(buk->index);
}
else
codes[1] = orig_codes[1];
p += len;
if (p < end) {
code = ONIGENC_MBC_TO_CODE(enc, p, end);
orig_codes[2] = code;
len = enclen(enc, p);
lens[2] = lens[1] + len;
buk = onigenc_unicode_unfold_key(orig_codes[2]);
if (buk != 0 && buk->fold_len == 1) {
codes[2] = *FOLDS1_FOLD(buk->index);
}
else
codes[2] = orig_codes[2];
index = onigenc_unicode_fold3_key(codes);
if (index >= 0) {
m = FOLDS3_UNFOLDS_NUM(index);
for (i = 0; i < m; i++) {
items[n].byte_len = lens[2];
items[n].code_len = 1;
items[n].code[0] = FOLDS3_UNFOLDS(index)[i];
n++;
}
for (fn = 0; fn < 3; fn++) {
int sindex;
cs[fn][0] = FOLDS3_FOLD(index)[fn];
ncs[fn] = 1;
sindex = onigenc_unicode_fold1_key(&cs[fn][0]);
if (sindex >= 0) {
int m = FOLDS1_UNFOLDS_NUM(sindex);
for (i = 0; i < m; i++) {
cs[fn][i+1] = FOLDS1_UNFOLDS(sindex)[i];
}
ncs[fn] += m;
}
}
for (i = 0; i < ncs[0]; i++) {
for (j = 0; j < ncs[1]; j++) {
for (k = 0; k < ncs[2]; k++) {
items[n].byte_len = lens[2];
items[n].code_len = 3;
items[n].code[0] = cs[0][i];
items[n].code[1] = cs[1][j];
items[n].code[2] = cs[2][k];
if (items[n].code[0] == orig_codes[0] &&
items[n].code[1] == orig_codes[1] &&
items[n].code[2] == orig_codes[2])
continue;
n++;
}
}
}
return n;
}
}
index = onigenc_unicode_fold2_key(codes);
if (index >= 0) {
m = FOLDS2_UNFOLDS_NUM(index);
for (i = 0; i < m; i++) {
items[n].byte_len = lens[1];
items[n].code_len = 1;
items[n].code[0] = FOLDS2_UNFOLDS(index)[i];
n++;
}
for (fn = 0; fn < 2; fn++) {
int sindex;
cs[fn][0] = FOLDS2_FOLD(index)[fn];
ncs[fn] = 1;
sindex = onigenc_unicode_fold1_key(&cs[fn][0]);
if (sindex >= 0) {
int m = FOLDS1_UNFOLDS_NUM(sindex);
for (i = 0; i < m; i++) {
cs[fn][i+1] = FOLDS1_UNFOLDS(sindex)[i];
}
ncs[fn] += m;
}
}
for (i = 0; i < ncs[0]; i++) {
for (j = 0; j < ncs[1]; j++) {
items[n].byte_len = lens[1];
items[n].code_len = 2;
items[n].code[0] = cs[0][i];
items[n].code[1] = cs[1][j];
if (items[n].code[0] == orig_codes[0] &&
items[n].code[1] == orig_codes[1])
continue;
n++;
}
}
return n;
}
}
fold1:
if (buk1 != 0) {
if (buk1->fold_len == 1) {
int un;
items[0].byte_len = len;
items[0].byte_len = lens[0];
items[0].code_len = 1;
items[0].code[0] = *FOLDS1_FOLD(buk->index);
items[0].code[0] = *FOLDS1_FOLD(buk1->index);
n++;
un = FOLDS1_UNFOLDS_NUM(buk->index);
un = FOLDS1_UNFOLDS_NUM(buk1->index);
for (i = 0; i < un; i++) {
OnigCodePoint unfold = FOLDS1_UNFOLDS(buk->index)[i];
if (unfold != code) {
items[n].byte_len = len;
OnigCodePoint unfold = FOLDS1_UNFOLDS(buk1->index)[i];
if (unfold != orig_codes[0]) {
items[n].byte_len = lens[0];
items[n].code_len = 1;
items[n].code[0] = unfold;
n++;
}
}
code = items[0].code[0]; /* for multi-code to unfold search. */
}
else if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
OnigCodePoint cs[3][4];
int fn, ncs[3];
if (buk->fold_len == 2) {
m = FOLDS2_UNFOLDS_NUM(buk->index);
if (buk1->fold_len == 2) {
m = FOLDS2_UNFOLDS_NUM(buk1->index);
for (i = 0; i < m; i++) {
OnigCodePoint unfold = FOLDS2_UNFOLDS(buk->index)[i];
if (unfold == code) continue;
OnigCodePoint unfold = FOLDS2_UNFOLDS(buk1->index)[i];
if (unfold == orig_codes[0]) continue;
items[n].byte_len = len;
items[n].byte_len = lens[0];
items[n].code_len = 1;
items[n].code[0] = unfold;
n++;
@ -356,7 +482,7 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
for (fn = 0; fn < 2; fn++) {
int index;
cs[fn][0] = FOLDS2_FOLD(buk->index)[fn];
cs[fn][0] = FOLDS2_FOLD(buk1->index)[fn];
ncs[fn] = 1;
index = onigenc_unicode_fold1_key(&cs[fn][0]);
if (index >= 0) {
@ -370,7 +496,7 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
for (i = 0; i < ncs[0]; i++) {
for (j = 0; j < ncs[1]; j++) {
items[n].byte_len = len;
items[n].byte_len = lens[0];
items[n].code_len = 2;
items[n].code[0] = cs[0][i];
items[n].code[1] = cs[1][j];
@ -379,12 +505,12 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
}
}
else { /* fold_len == 3 */
m = FOLDS3_UNFOLDS_NUM(buk->index);
m = FOLDS3_UNFOLDS_NUM(buk1->index);
for (i = 0; i < m; i++) {
OnigCodePoint unfold = FOLDS3_UNFOLDS(buk->index)[i];
if (unfold == code) continue;
OnigCodePoint unfold = FOLDS3_UNFOLDS(buk1->index)[i];
if (unfold == orig_codes[0]) continue;
items[n].byte_len = len;
items[n].byte_len = lens[0];
items[n].code_len = 1;
items[n].code[0] = unfold;
n++;
@ -392,7 +518,7 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
for (fn = 0; fn < 3; fn++) {
int index;
cs[fn][0] = FOLDS3_FOLD(buk->index)[fn];
cs[fn][0] = FOLDS3_FOLD(buk1->index)[fn];
ncs[fn] = 1;
index = onigenc_unicode_fold1_key(&cs[fn][0]);
if (index >= 0) {
@ -407,7 +533,7 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
for (i = 0; i < ncs[0]; i++) {
for (j = 0; j < ncs[1]; j++) {
for (k = 0; k < ncs[2]; k++) {
items[n].byte_len = len;
items[n].byte_len = lens[0];
items[n].code_len = 3;
items[n].code[0] = cs[0][i];
items[n].code[1] = cs[1][j];
@ -417,17 +543,14 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
}
}
}
/* multi char folded code is not head of another folded multi char */
return n;
}
}
else {
int index = onigenc_unicode_fold1_key(&code);
int index = onigenc_unicode_fold1_key(orig_codes);
if (index >= 0) {
int m = FOLDS1_UNFOLDS_NUM(index);
for (i = 0; i < m; i++) {
items[n].byte_len = len;
items[n].byte_len = lens[0];
items[n].code_len = 1;
items[n].code[0] = FOLDS1_UNFOLDS(index)[i];
n++;
@ -435,64 +558,6 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
}
}
if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) == 0)
return n;
p += len;
if (p < end) {
int clen;
int index;
codes[0] = code;
code = ONIGENC_MBC_TO_CODE(enc, p, end);
buk = onigenc_unicode_unfold_key(code);
if (buk != 0 && buk->fold_len == 1) {
codes[1] = *FOLDS1_FOLD(buk->index);
}
else
codes[1] = code;
clen = enclen(enc, p);
len += clen;
index = onigenc_unicode_fold2_key(codes);
if (index >= 0) {
m = FOLDS2_UNFOLDS_NUM(index);
for (i = 0; i < m; i++) {
items[n].byte_len = len;
items[n].code_len = 1;
items[n].code[0] = FOLDS2_UNFOLDS(index)[i];
n++;
}
}
p += clen;
if (p < end) {
code = ONIGENC_MBC_TO_CODE(enc, p, end);
buk = onigenc_unicode_unfold_key(code);
if (buk != 0 && buk->fold_len == 1) {
codes[2] = *FOLDS1_FOLD(buk->index);
}
else
codes[2] = code;
clen = enclen(enc, p);
len += clen;
index = onigenc_unicode_fold3_key(codes);
if (index >= 0) {
m = FOLDS3_UNFOLDS_NUM(index);
for (i = 0; i < m; i++) {
items[n].byte_len = len;
items[n].code_len = 1;
items[n].code[0] = FOLDS3_UNFOLDS(index)[i];
n++;
}
}
}
}
return n;
}
@ -931,7 +996,7 @@ onigenc_egcb_is_break_position(OnigEncoding enc, UChar* p, UChar* prev,
#ifdef USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER
if (! ONIGENC_IS_UNICODE_ENCODING(enc)) {
return from != 0x000d || to != 0x000a;
return from != 0x000d || to != NEWLINE_CODE;
}
btype = unicode_egcb_is_break_2code(from, to);
@ -974,7 +1039,7 @@ onigenc_egcb_is_break_position(OnigEncoding enc, UChar* p, UChar* prev,
return 1;
#else
return from != 0x000d || to != 0x000a;
return from != 0x000d || to != NEWLINE_CODE;
#endif /* USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER */
}