mirror of
https://github.com/rizonesoft/Notepad3.git
synced 2026-06-11 21:03:05 +08:00
+ update Oniguruma to current (2020-01-08) develoment version (6.9.4 R3)
This commit is contained in:
parent
c2b9aa7526
commit
f591ebfe7f
5
oniguruma/.gitignore
vendored
5
oniguruma/.gitignore
vendored
@ -47,6 +47,7 @@ m4/*.m4
|
||||
/test/testcu
|
||||
/test/testp
|
||||
/test/test_regset
|
||||
/test/test_syntax
|
||||
/test/kofu-utf8.txt
|
||||
|
||||
# sample/
|
||||
@ -67,8 +68,8 @@ m4/*.m4
|
||||
/sample/log*
|
||||
|
||||
/harnesses/utf16*.dict
|
||||
/harnesses/*-libfuzzer
|
||||
/harnesses/main-*
|
||||
/harnesses/fuzzer-*
|
||||
/harnesses/read-*
|
||||
/harnesses/libfuzzer-onig
|
||||
/harnesses/libfuzzer-onig-full
|
||||
/harnesses/slow-unit-*
|
||||
|
||||
@ -1,13 +1,15 @@
|
||||
History
|
||||
|
||||
2019/MM/DD: Version 6.9.4
|
||||
2019/11/29: Version 6.9.4
|
||||
|
||||
2019/11/22: Release Candidate 3 for Version 6.9.4
|
||||
|
||||
2019/11/20: fix a problem found by libFuzzer test
|
||||
2019/11/14: Release Candidate 2 for Version 6.9.4
|
||||
|
||||
2019/11/12: fix integer overflow by nested quantifier
|
||||
2019/11/11: fix #164: Integer overflow related to reg->dmax in search_in_range()
|
||||
2019/11/07: fix #163: heap-buffer-overflow in gb18030_mbc_enc_len()
|
||||
2019/11/06: fix #162: heap-buffer-overflow in fetch_interval_quantifier()
|
||||
2019/11/11: fix CVE-2019-19012: Integer overflow related to reg->dmax in search_in_range()
|
||||
2019/11/07: fix CVE-2019-19203: heap-buffer-overflow in gb18030_mbc_enc_len()
|
||||
2019/11/06: fix CVE-2019-19204: heap-buffer-overflow in fetch_interval_quantifier()
|
||||
2019/11/06: add HAVE_INTTYPES_H into config.h.windows.in and config.h.win{32,64}
|
||||
2019/11/06: add HAVE_STDINT_H into config.h.win{32,64}
|
||||
2019/11/05: Release Candidate 1 for Version 6.9.4
|
||||
|
||||
@ -27,11 +27,20 @@ Supported character encodings:
|
||||
* doc/SYNTAX.md: contributed by seanofw
|
||||
|
||||
|
||||
Master branch
|
||||
-------------
|
||||
|
||||
* Fixed behavior of isolated options in Perl and Java syntaxes. /...(?i).../
|
||||
|
||||
|
||||
Version 6.9.4
|
||||
-------------
|
||||
|
||||
* NEW API: RegSet (set of regexes)
|
||||
* Fixed CVE-2019-19012 (Issue #164)
|
||||
* Fixed CVE-2019-19012
|
||||
* Fixed CVE-2019-19203 (Does not affect UTF-8, UTF-16 and UTF-32 encodings)
|
||||
* Fixed CVE-2019-19204 (Affects only PosixBasic, Emacs and Grep syntaxes)
|
||||
* Fixed CVE-2019-19246
|
||||
* Fixed some problems (found by libFuzzer test)
|
||||
|
||||
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
|
||||
# Oniguruma syntax (operator) configuration
|
||||
|
||||
_Documented for Oniguruma 6.9.3 (2019/08/08)_
|
||||
_Documented for Oniguruma 6.9.5 (2019/12/16)_
|
||||
|
||||
|
||||
----------
|
||||
@ -910,6 +910,13 @@ If this flag is set, then intervals of a fixed size will ignore a lazy (non-gree
|
||||
little as possible" is meaningless for a fixed-size interval. If this flag is clear,
|
||||
then `r{n}?` will mean the same as `r{n}`, and the useless `?` will be discarded.
|
||||
|
||||
### 10. ONIG_SYN_ISOLATED_OPTION_CONTINUE_BRANCH (`..(?i)..`)
|
||||
|
||||
_Set in: Perl, Java_
|
||||
|
||||
If this flag is set, then an isolated option doesn't break the branch and affects until the end of the group (or end of the pattern).
|
||||
If this flag is not set, then an isolated option is interpreted as the starting point of a new branch. /a(?i)b|c/ ==> /a(?i:b|c)/
|
||||
|
||||
### 20. ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC (add `\n` to `[^...]`)
|
||||
|
||||
_Set in: Grep_
|
||||
|
||||
@ -55,6 +55,11 @@ def form3bytes(x):
|
||||
x2 = (x>>16) & 0xff
|
||||
return "\\x%02x\\x%02x\\x%02x" % (x2, x1, x0)
|
||||
|
||||
def enc_len(code, encode):
|
||||
u = unichr(code)
|
||||
s = u.encode(encode)
|
||||
return len(s)
|
||||
|
||||
def check_version_info(s):
|
||||
m = VERSION_REG.match(s)
|
||||
if m is not None:
|
||||
@ -324,6 +329,138 @@ def output_gperf_source():
|
||||
with open(GPERF_FOLD_KEY_FILES[i-1], 'w') as f:
|
||||
output_gperf_fold_key(f, i)
|
||||
|
||||
def unfolds_byte_length_check(encode):
|
||||
l = UNFOLDS.items()
|
||||
sl = sorted(l, key=lambda (k,e):(e.fold_len, e.index))
|
||||
for unfold, e in sl:
|
||||
key_len = enc_len(unfold, encode)
|
||||
fold_len = sum(map(lambda c: enc_len(c, encode), e.fold))
|
||||
if key_len > fold_len:
|
||||
sfolds = ' '.join(map(lambda c: "0x%06x" % c, e.fold))
|
||||
s = "%s byte length: %d > %d: 0x%06x => %s" % (encode, key_len, fold_len, unfold, sfolds)
|
||||
print >> sys.stderr, s
|
||||
|
||||
def double_fold_check():
|
||||
l = UNFOLDS.items()
|
||||
sl = sorted(l, key=lambda (k,e):(e.fold_len, e.index))
|
||||
for unfold, e in sl:
|
||||
for f in e.fold:
|
||||
#print >> sys.stderr, ("check 0x%06x" % f)
|
||||
e2 = UNFOLDS.get(f)
|
||||
if e2 is not None:
|
||||
s = "double folds: 0x%06x => %s, 0x%06x => %s" % (unfold, e.fold, f, e2.fold)
|
||||
print >> sys.stderr, s
|
||||
|
||||
def unfold_is_multi_code_folds_head_check():
|
||||
l = UNFOLDS.items()
|
||||
l2 = filter(lambda (k,e):e.fold_len == 2, l)
|
||||
l3 = filter(lambda (k,e):e.fold_len == 3, l)
|
||||
sl = sorted(l, key=lambda (k,e):(e.fold_len, e.index))
|
||||
for unfold, _ in sl:
|
||||
for k, e in l2:
|
||||
if e.fold[0] == unfold:
|
||||
s = "unfold 0x%06x is multi-code fold head in %s" % (unfold, e.fold)
|
||||
print >> sys.stderr, s
|
||||
for k, e in l3:
|
||||
if e.fold[0] == unfold:
|
||||
s = "unfold 0x%06x is multi-code fold head in %s" % (unfold, e.fold)
|
||||
print >> sys.stderr, s
|
||||
|
||||
def make_one_folds(l):
|
||||
h = {}
|
||||
for unfold, e in l:
|
||||
if e.fold_len != 1:
|
||||
continue
|
||||
fold = e.fold[0]
|
||||
unfolds = h.get(fold)
|
||||
if unfolds is None:
|
||||
unfolds = [unfold]
|
||||
h[fold] = unfolds
|
||||
else:
|
||||
unfolds.append(unfold)
|
||||
|
||||
return h
|
||||
|
||||
def make_foldn_heads(l, fold_len, one_folds):
|
||||
h = {}
|
||||
for unfold, e in l:
|
||||
if e.fold_len != fold_len:
|
||||
continue
|
||||
unfolds = one_folds.get(e.fold[0])
|
||||
h[e.fold[0]] = (e, unfolds)
|
||||
|
||||
return h
|
||||
|
||||
def fold2_expansion_num(e, one_folds):
|
||||
n = len(e.unfolds)
|
||||
n0 = 1
|
||||
u0 = one_folds.get(e.fold[0])
|
||||
if u0 is not None:
|
||||
n0 += len(u0)
|
||||
n1 = 1
|
||||
u1 = one_folds.get(e.fold[1])
|
||||
if u1 is not None:
|
||||
n1 += len(u1)
|
||||
n += (n0 * n1)
|
||||
return n
|
||||
|
||||
def fold3_expansion_num(e, one_folds):
|
||||
n = len(e.unfolds)
|
||||
n0 = 1
|
||||
u0 = one_folds.get(e.fold[0])
|
||||
if u0 is not None:
|
||||
n0 += len(u0)
|
||||
n1 = 1
|
||||
u1 = one_folds.get(e.fold[1])
|
||||
if u1 is not None:
|
||||
n1 += len(u1)
|
||||
n2 = 1
|
||||
u2 = one_folds.get(e.fold[2])
|
||||
if u2 is not None:
|
||||
n2 += len(u2)
|
||||
n += (n0 * n1 * n2)
|
||||
return n
|
||||
|
||||
def get_all_folds_expansion_num(x, one_folds, fold2_heads, fold3_heads):
|
||||
e = UNFOLDS[x]
|
||||
n = 0
|
||||
if e.fold_len == 1:
|
||||
n1 = len(e.unfolds) + 1 # +1: fold
|
||||
fx = e.fold[0]
|
||||
r = fold2_heads.get(fx)
|
||||
n2 = n3 = 0
|
||||
if r is not None:
|
||||
e2, _ = r
|
||||
n2 = fold2_expansion_num(e2, one_folds)
|
||||
r = fold3_heads.get(fx)
|
||||
if r is not None:
|
||||
e3, _ = r
|
||||
n3 = fold3_expansion_num(e3, one_folds)
|
||||
n = max(n1, n2, n3)
|
||||
elif e.fold_len == 2:
|
||||
n = fold2_expansion_num(e, one_folds)
|
||||
elif e.fold_len == 3:
|
||||
n = fold3_expansion_num(e, one_folds)
|
||||
else:
|
||||
raise RuntimeError("Invalid fold_len %d" % (e.fold_len))
|
||||
|
||||
return n
|
||||
|
||||
def get_all_folds_expansion_max_num():
|
||||
l = UNFOLDS.items()
|
||||
one_folds = make_one_folds(l)
|
||||
fold2_heads = make_foldn_heads(l, 2, one_folds)
|
||||
fold3_heads = make_foldn_heads(l, 3, one_folds)
|
||||
sl = sorted(l, key=lambda (k,e):(e.fold_len, e.index))
|
||||
nmax = 0
|
||||
max_unfold = None
|
||||
for unfold, e in sl:
|
||||
n = get_all_folds_expansion_num(unfold, one_folds, fold2_heads, fold3_heads)
|
||||
if nmax < n:
|
||||
nmax = n
|
||||
max_unfold = unfold
|
||||
|
||||
return (nmax, max_unfold)
|
||||
|
||||
## main ##
|
||||
with open(SOURCE_FILE, 'r') as f:
|
||||
@ -335,3 +472,12 @@ out_comment = True
|
||||
output_fold_source(sys.stdout, out_comment)
|
||||
|
||||
output_gperf_source()
|
||||
|
||||
#unfolds_byte_length_check('utf-8')
|
||||
#unfolds_byte_length_check('utf-16')
|
||||
double_fold_check()
|
||||
unfold_is_multi_code_folds_head_check()
|
||||
|
||||
#max_num, max_code = get_all_folds_expansion_max_num()
|
||||
#max_num -= 1 # remove self
|
||||
#print >> sys.stderr, "max expansion: 0x%06x: %d" % (max_code, max_num)
|
||||
|
||||
@ -1103,7 +1103,7 @@ static int IsAscii(int enc ARG_UNUSED, int c)
|
||||
|
||||
static int IsNewline(int enc ARG_UNUSED, int c)
|
||||
{
|
||||
if (c == 0x0a) return 1;
|
||||
if (c == NEWLINE_CODE) return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@ -531,6 +531,7 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
|
||||
#define ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP (1U<<7) /* see doc/RE */
|
||||
#define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME (1U<<8) /* (?<x>)(?<x>) */
|
||||
#define ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY (1U<<9) /* a{n}?=(?:a{n})? */
|
||||
#define ONIG_SYN_ISOLATED_OPTION_CONTINUE_BRANCH (1U<<10) /* ..(?i)...|... */
|
||||
|
||||
/* syntax (behavior) in char class [...] */
|
||||
#define ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC (1U<<20) /* [^...] */
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -30,6 +30,9 @@
|
||||
|
||||
#include "regint.h"
|
||||
|
||||
#define LARGE_S 0x53
|
||||
#define SMALL_S 0x73
|
||||
|
||||
OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT;
|
||||
|
||||
#define INITED_LIST_SIZE 20
|
||||
@ -550,7 +553,7 @@ static int
|
||||
ss_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
|
||||
OnigApplyAllCaseFoldFunc f, void* arg)
|
||||
{
|
||||
static OnigCodePoint ss[] = { 0x73, 0x73 };
|
||||
static OnigCodePoint ss[] = { SMALL_S, SMALL_S };
|
||||
|
||||
return (*f)((OnigCodePoint )0xdf, ss, 2, arg);
|
||||
}
|
||||
@ -589,35 +592,48 @@ onigenc_get_case_fold_codes_by_str_with_map(int map_size,
|
||||
int ess_tsett_flag, OnigCaseFoldType flag ARG_UNUSED,
|
||||
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
|
||||
{
|
||||
if (0x41 <= *p && *p <= 0x5a) {
|
||||
int i, j, n;
|
||||
static OnigUChar sa[] = { LARGE_S, SMALL_S };
|
||||
|
||||
if (0x41 <= *p && *p <= 0x5a) { /* A - Z */
|
||||
if (*p == LARGE_S && ess_tsett_flag != 0 && end > p + 1
|
||||
&& (*(p+1) == LARGE_S || *(p+1) == SMALL_S)) { /* SS */
|
||||
ss_combination:
|
||||
items[0].byte_len = 2;
|
||||
items[0].code_len = 1;
|
||||
items[0].code[0] = (OnigCodePoint )0xdf;
|
||||
|
||||
n = 1;
|
||||
for (i = 0; i < 2; i++) {
|
||||
for (j = 0; j < 2; j++) {
|
||||
if (sa[i] == *p && sa[j] == *(p+1))
|
||||
continue;
|
||||
|
||||
items[n].byte_len = 2;
|
||||
items[n].code_len = 2;
|
||||
items[n].code[0] = (OnigCodePoint )sa[i];
|
||||
items[n].code[1] = (OnigCodePoint )sa[j];
|
||||
n++;
|
||||
}
|
||||
}
|
||||
return 4;
|
||||
}
|
||||
|
||||
items[0].byte_len = 1;
|
||||
items[0].code_len = 1;
|
||||
items[0].code[0] = (OnigCodePoint )(*p + 0x20);
|
||||
if (*p == 0x53 && ess_tsett_flag != 0 && end > p + 1
|
||||
&& (*(p+1) == 0x53 || *(p+1) == 0x73)) {
|
||||
/* SS */
|
||||
items[1].byte_len = 2;
|
||||
items[1].code_len = 1;
|
||||
items[1].code[0] = (OnigCodePoint )0xdf;
|
||||
return 2;
|
||||
}
|
||||
else
|
||||
return 1;
|
||||
return 1;
|
||||
}
|
||||
else if (0x61 <= *p && *p <= 0x7a) {
|
||||
else if (0x61 <= *p && *p <= 0x7a) { /* a - z */
|
||||
if (*p == SMALL_S && ess_tsett_flag != 0 && end > p + 1
|
||||
&& (*(p+1) == SMALL_S || *(p+1) == LARGE_S)) {
|
||||
goto ss_combination;
|
||||
}
|
||||
|
||||
items[0].byte_len = 1;
|
||||
items[0].code_len = 1;
|
||||
items[0].code[0] = (OnigCodePoint )(*p - 0x20);
|
||||
if (*p == 0x73 && ess_tsett_flag != 0 && end > p + 1
|
||||
&& (*(p+1) == 0x73 || *(p+1) == 0x53)) {
|
||||
/* ss */
|
||||
items[1].byte_len = 2;
|
||||
items[1].code_len = 1;
|
||||
items[1].code[0] = (OnigCodePoint )0xdf;
|
||||
return 2;
|
||||
}
|
||||
else
|
||||
return 1;
|
||||
return 1;
|
||||
}
|
||||
else if (*p == 0xdf && ess_tsett_flag != 0) {
|
||||
items[0].byte_len = 1;
|
||||
@ -677,7 +693,7 @@ extern int
|
||||
onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end)
|
||||
{
|
||||
if (p < end) {
|
||||
if (*p == 0x0a) return 1;
|
||||
if (*p == NEWLINE_CODE) return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@ -906,7 +922,7 @@ onigenc_is_mbc_word_ascii(OnigEncoding enc, UChar* s, const UChar* end)
|
||||
{
|
||||
OnigCodePoint code = ONIGENC_MBC_TO_CODE(enc, s, end);
|
||||
|
||||
if (code > 127) return 0;
|
||||
if (code > ASCII_LIMIT) return 0;
|
||||
|
||||
return ONIGENC_IS_ASCII_CODE_WORD(code);
|
||||
}
|
||||
|
||||
@ -75,6 +75,8 @@ typedef struct {
|
||||
#define ONIG_CHECK_NULL_RETURN_VAL(p,val) if (ONIG_IS_NULL(p)) return (val)
|
||||
|
||||
#define MAX_CODE_POINT (~((OnigCodePoint )0))
|
||||
#define ASCII_LIMIT 127
|
||||
#define NEWLINE_CODE 0x0a
|
||||
|
||||
#define enclen(enc,p) ONIGENC_MBC_ENC_LEN(enc,p)
|
||||
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
regexec.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2019 K.Kosako
|
||||
* Copyright (c) 2002-2020 K.Kosako
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@ -177,8 +177,6 @@ static OpInfoType OpInfo[] = {
|
||||
{ OP_STR_MB2N, "str_mb2-n"},
|
||||
{ OP_STR_MB3N, "str_mb3n"},
|
||||
{ OP_STR_MBN, "str_mbn"},
|
||||
{ OP_STR_1_IC, "str_1-ic"},
|
||||
{ OP_STR_N_IC, "str_n-ic"},
|
||||
{ OP_CCLASS, "cclass"},
|
||||
{ OP_CCLASS_MB, "cclass-mb"},
|
||||
{ OP_CCLASS_MIX, "cclass-mix"},
|
||||
@ -254,7 +252,7 @@ static OpInfoType OpInfo[] = {
|
||||
{ OP_LOOK_BEHIND, "look-behind"},
|
||||
{ OP_LOOK_BEHIND_NOT_START, "look-behind-not-start"},
|
||||
{ OP_LOOK_BEHIND_NOT_END, "look-behind-not-end"},
|
||||
{ OP_PUSH_SAVE_VAL, "push-save-val"},
|
||||
{ OP_SAVE_VAL, "save-val"},
|
||||
{ OP_UPDATE_VAR, "update-var"},
|
||||
#ifdef USE_CALL
|
||||
{ OP_CALL, "call"},
|
||||
@ -377,14 +375,6 @@ print_compiled_byte_code(FILE* f, regex_t* reg, int index,
|
||||
while (n-- > 0) { fputc(*q++, f); }
|
||||
}
|
||||
break;
|
||||
case OP_STR_1_IC:
|
||||
len = enclen(enc, p->exact.s);
|
||||
p_string(f, len, p->exact.s);
|
||||
break;
|
||||
case OP_STR_N_IC:
|
||||
len = p->exact_n.n;
|
||||
p_len_string(f, len, 1, p->exact_n.s);
|
||||
break;
|
||||
|
||||
case OP_CCLASS:
|
||||
case OP_CCLASS_NOT:
|
||||
@ -564,12 +554,12 @@ print_compiled_byte_code(FILE* f, regex_t* reg, int index,
|
||||
break;
|
||||
#endif
|
||||
|
||||
case OP_PUSH_SAVE_VAL:
|
||||
case OP_SAVE_VAL:
|
||||
{
|
||||
SaveType type;
|
||||
|
||||
type = p->push_save_val.type;
|
||||
mem = p->push_save_val.id;
|
||||
type = p->save_val.type;
|
||||
mem = p->save_val.id;
|
||||
fprintf(f, ":%d:%d", type, mem);
|
||||
}
|
||||
break;
|
||||
@ -638,7 +628,7 @@ print_compiled_byte_code(FILE* f, regex_t* reg, int index,
|
||||
break;
|
||||
|
||||
default:
|
||||
fprintf(stderr, "print_compiled_byte_code: undefined code %d\n", opcode);
|
||||
fprintf(DBGFP, "print_compiled_byte_code: undefined code %d\n", opcode);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -1808,26 +1798,6 @@ stack_double(int is_alloca, char** arg_alloc_base,
|
||||
}\
|
||||
} while (0)
|
||||
|
||||
#define STACK_GET_SAVE_VAL_TYPE_LAST_ID_FROM(stype, sid, sval, stk_from) do { \
|
||||
int level = 0;\
|
||||
StackType *k = (stk_from);\
|
||||
while (k > stk_base) {\
|
||||
STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST_ID_FROM"); \
|
||||
if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)\
|
||||
&& k->u.val.id == (sid)) {\
|
||||
if (level == 0) {\
|
||||
(sval) = k->u.val.v;\
|
||||
break;\
|
||||
}\
|
||||
}\
|
||||
else if (k->type == STK_CALL_FRAME)\
|
||||
level--;\
|
||||
else if (k->type == STK_RETURN)\
|
||||
level++;\
|
||||
k--;\
|
||||
}\
|
||||
} while (0)
|
||||
|
||||
#define STACK_PUSH_CALLOUT_CONTENTS(anum, func) do {\
|
||||
STACK_ENSURE(1);\
|
||||
stk->type = STK_CALLOUT;\
|
||||
@ -1849,7 +1819,7 @@ stack_double(int is_alloca, char** arg_alloc_base,
|
||||
#ifdef ONIG_DEBUG
|
||||
#define STACK_BASE_CHECK(p, at) \
|
||||
if ((p) < stk_base) {\
|
||||
fprintf(stderr, "at %s\n", at);\
|
||||
fprintf(DBGFP, "at %s\n", at);\
|
||||
MATCH_AT_ERROR_RETURN(ONIGERR_STACK_BUG);\
|
||||
}
|
||||
#else
|
||||
@ -2544,7 +2514,7 @@ typedef struct {
|
||||
int len, spos;\
|
||||
spos = IS_NOT_NULL(s) ? (int )(s - str) : -1;\
|
||||
xp = p - (offset);\
|
||||
fprintf(stderr, "%7u: %7ld: %4d> \"",\
|
||||
fprintf(DBGFP, "%7u: %7ld: %4d> \"",\
|
||||
counter, GET_STACK_INDEX(stk), spos);\
|
||||
counter++;\
|
||||
bp = buf;\
|
||||
@ -2560,21 +2530,23 @@ typedef struct {
|
||||
xmemcpy(bp, "\"", 1); bp += 1;\
|
||||
}\
|
||||
*bp = 0;\
|
||||
fputs((char* )buf, stderr);\
|
||||
for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr);\
|
||||
fputs((char* )buf, DBGFP);\
|
||||
for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', DBGFP);\
|
||||
if (xp == FinishCode)\
|
||||
fprintf(stderr, "----: finish");\
|
||||
fprintf(DBGFP, "----: finish");\
|
||||
else {\
|
||||
fprintf(stderr, "%4d: ", (int )(xp - reg->ops));\
|
||||
print_compiled_byte_code(stderr, reg, (int )(xp - reg->ops), reg->ops, encode);\
|
||||
fprintf(DBGFP, "%4d: ", (int )(xp - reg->ops));\
|
||||
print_compiled_byte_code(DBGFP, reg, (int )(xp - reg->ops), reg->ops, encode); \
|
||||
}\
|
||||
fprintf(stderr, "\n");\
|
||||
fprintf(DBGFP, "\n");\
|
||||
} while(0);
|
||||
#else
|
||||
#define MATCH_DEBUG_OUT(offset)
|
||||
#endif
|
||||
|
||||
#define MATCH_AT_ERROR_RETURN(err_code) best_len = err_code; goto match_at_end
|
||||
#define MATCH_AT_ERROR_RETURN(err_code) do {\
|
||||
best_len = err_code; goto match_at_end;\
|
||||
} while(0)
|
||||
|
||||
|
||||
/* match data(str - end) from position (sstart). */
|
||||
@ -2607,8 +2579,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
||||
&&L_STR_MB2N,
|
||||
&&L_STR_MB3N,
|
||||
&&L_STR_MBN,
|
||||
&&L_STR_1_IC,
|
||||
&&L_STR_N_IC,
|
||||
&&L_CCLASS,
|
||||
&&L_CCLASS_MB,
|
||||
&&L_CCLASS_MIX,
|
||||
@ -2684,7 +2654,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
||||
&&L_LOOK_BEHIND,
|
||||
&&L_LOOK_BEHIND_NOT_START,
|
||||
&&L_LOOK_BEHIND_NOT_END,
|
||||
&&L_PUSH_SAVE_VAL,
|
||||
&&L_SAVE_VAL,
|
||||
&&L_UPDATE_VAR,
|
||||
#ifdef USE_CALL
|
||||
&&L_CALL,
|
||||
@ -2760,9 +2730,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
||||
}
|
||||
|
||||
#ifdef ONIG_DEBUG_MATCH
|
||||
fprintf(stderr, "match_at: str: %p, end: %p, start: %p, sprev: %p\n",
|
||||
fprintf(DBGFP, "match_at: str: %p, end: %p, start: %p, sprev: %p\n",
|
||||
str, end, sstart, sprev);
|
||||
fprintf(stderr, "size: %d, start offset: %d\n",
|
||||
fprintf(DBGFP, "size: %d, start offset: %d\n",
|
||||
(int )(end - str), (int )(sstart - str));
|
||||
#endif
|
||||
|
||||
@ -2781,7 +2751,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
||||
if (n > best_len) {
|
||||
OnigRegion* region;
|
||||
#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
|
||||
if (IS_FIND_LONGEST(option)) {
|
||||
if (OPTON_FIND_LONGEST(option)) {
|
||||
if (n > msa->best_len) {
|
||||
msa->best_len = n;
|
||||
msa->best_s = (UChar* )sstart;
|
||||
@ -2796,7 +2766,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
||||
if (keep > s) keep = s;
|
||||
|
||||
#ifdef USE_POSIX_API_REGION_OPTION
|
||||
if (IS_POSIX_REGION(msa->options)) {
|
||||
if (OPTON_POSIX_REGION(msa->options)) {
|
||||
posix_regmatch_t* rmt = (posix_regmatch_t* )region;
|
||||
|
||||
rmt[0].rm_so = (regoff_t )(keep - str);
|
||||
@ -2850,7 +2820,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
||||
}
|
||||
#endif /* USE_CAPTURE_HISTORY */
|
||||
#ifdef USE_POSIX_API_REGION_OPTION
|
||||
} /* else IS_POSIX_REGION() */
|
||||
} /* else OPTON_POSIX_REGION() */
|
||||
#endif
|
||||
} /* if (region) */
|
||||
} /* n > best_len */
|
||||
@ -2860,12 +2830,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
||||
#endif
|
||||
SOP_OUT;
|
||||
|
||||
if (IS_FIND_CONDITION(option)) {
|
||||
if (IS_FIND_NOT_EMPTY(option) && s == sstart) {
|
||||
if (OPTON_FIND_CONDITION(option)) {
|
||||
if (OPTON_FIND_NOT_EMPTY(option) && s == sstart) {
|
||||
best_len = ONIG_MISMATCH;
|
||||
goto fail; /* for retry */
|
||||
}
|
||||
if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) {
|
||||
if (OPTON_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) {
|
||||
goto fail; /* for retry */
|
||||
}
|
||||
}
|
||||
@ -2881,27 +2851,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
||||
INC_OP;
|
||||
NEXT_OUT;
|
||||
|
||||
CASE_OP(STR_1_IC)
|
||||
{
|
||||
int len;
|
||||
UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
|
||||
|
||||
DATA_ENSURE(1);
|
||||
len = ONIGENC_MBC_CASE_FOLD(encode,
|
||||
/* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
|
||||
case_fold_flag,
|
||||
&s, end, lowbuf);
|
||||
DATA_ENSURE(0);
|
||||
q = lowbuf;
|
||||
ps = p->exact.s;
|
||||
while (len-- > 0) {
|
||||
if (*ps != *q) goto fail;
|
||||
ps++; q++;
|
||||
}
|
||||
}
|
||||
INC_OP;
|
||||
NEXT_OUT;
|
||||
|
||||
CASE_OP(STR_2)
|
||||
DATA_ENSURE(2);
|
||||
ps = p->exact.s;
|
||||
@ -2969,34 +2918,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
||||
INC_OP;
|
||||
JUMP_OUT;
|
||||
|
||||
CASE_OP(STR_N_IC)
|
||||
{
|
||||
int len;
|
||||
UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
|
||||
|
||||
tlen = p->exact_n.n;
|
||||
ps = p->exact_n.s;
|
||||
endp = ps + tlen;
|
||||
while (ps < endp) {
|
||||
sprev = s;
|
||||
DATA_ENSURE(1);
|
||||
len = ONIGENC_MBC_CASE_FOLD(encode,
|
||||
/* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
|
||||
case_fold_flag,
|
||||
&s, end, lowbuf);
|
||||
DATA_ENSURE(0);
|
||||
q = lowbuf;
|
||||
while (len-- > 0) {
|
||||
if (ps >= endp) goto fail;
|
||||
if (*ps != *q) goto fail;
|
||||
ps++; q++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
INC_OP;
|
||||
JUMP_OUT;
|
||||
|
||||
CASE_OP(STR_MB2N1)
|
||||
DATA_ENSURE(2);
|
||||
ps = p->exact.s;
|
||||
@ -3420,7 +3341,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
||||
|
||||
CASE_OP(BEGIN_LINE)
|
||||
if (ON_STR_BEGIN(s)) {
|
||||
if (IS_NOTBOL(msa->options)) goto fail;
|
||||
if (OPTON_NOTBOL(msa->options)) goto fail;
|
||||
INC_OP;
|
||||
JUMP_OUT;
|
||||
}
|
||||
@ -3435,7 +3356,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
||||
#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
|
||||
if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
|
||||
#endif
|
||||
if (IS_NOTEOL(msa->options)) goto fail;
|
||||
if (OPTON_NOTEOL(msa->options)) goto fail;
|
||||
INC_OP;
|
||||
JUMP_OUT;
|
||||
#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
|
||||
@ -3459,7 +3380,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
||||
#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
|
||||
if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
|
||||
#endif
|
||||
if (IS_NOTEOL(msa->options)) goto fail;
|
||||
if (OPTON_NOTEOL(msa->options)) goto fail;
|
||||
INC_OP;
|
||||
JUMP_OUT;
|
||||
#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
|
||||
@ -3746,7 +3667,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
||||
INC_OP;
|
||||
if (is_empty) {
|
||||
#ifdef ONIG_DEBUG_MATCH
|
||||
fprintf(stderr, "EMPTY_CHECK_END: skip id:%d, s:%p\n", (int )mem, s);
|
||||
fprintf(DBGFP, "EMPTY_CHECK_END: skip id:%d, s:%p\n", (int )mem, s);
|
||||
#endif
|
||||
empty_check_found:
|
||||
/* empty loop founded, skip next instruction */
|
||||
@ -3779,7 +3700,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
||||
INC_OP;
|
||||
if (is_empty) {
|
||||
#ifdef ONIG_DEBUG_MATCH
|
||||
fprintf(stderr, "EMPTY_CHECK_END_MEM: skip id:%d, s:%p\n", (int)mem, s);
|
||||
fprintf(DBGFP, "EMPTY_CHECK_END_MEM: skip id:%d, s:%p\n", (int)mem, s);
|
||||
#endif
|
||||
if (is_empty == -1) goto fail;
|
||||
goto empty_check_found;
|
||||
@ -3802,7 +3723,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
||||
INC_OP;
|
||||
if (is_empty) {
|
||||
#ifdef ONIG_DEBUG_MATCH
|
||||
fprintf(stderr, "EMPTY_CHECK_END_MEM_PUSH: skip id:%d, s:%p\n",
|
||||
fprintf(DBGFP, "EMPTY_CHECK_END_MEM_PUSH: skip id:%d, s:%p\n",
|
||||
(int )mem, s);
|
||||
#endif
|
||||
if (is_empty == -1) goto fail;
|
||||
@ -4010,12 +3931,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
||||
JUMP_OUT;
|
||||
#endif
|
||||
|
||||
CASE_OP(PUSH_SAVE_VAL)
|
||||
CASE_OP(SAVE_VAL)
|
||||
{
|
||||
SaveType type;
|
||||
|
||||
type = p->push_save_val.type;
|
||||
mem = p->push_save_val.id; /* mem: save id */
|
||||
type = p->save_val.type;
|
||||
mem = p->save_val.id; /* mem: save id */
|
||||
switch ((enum SaveType )type) {
|
||||
case SAVE_KEEP:
|
||||
STACK_PUSH_SAVE_VAL(mem, type, s);
|
||||
@ -4167,6 +4088,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
||||
return best_len;
|
||||
}
|
||||
|
||||
|
||||
#ifdef USE_REGSET
|
||||
|
||||
typedef struct {
|
||||
regex_t* reg;
|
||||
OnigRegion* region;
|
||||
@ -4433,7 +4357,7 @@ onig_regset_search_with_param(OnigRegSet* set,
|
||||
if (set->n == 0)
|
||||
return ONIG_MISMATCH;
|
||||
|
||||
if (IS_POSIX_REGION(option))
|
||||
if (OPTON_POSIX_REGION(option))
|
||||
return ONIGERR_INVALID_ARGUMENT;
|
||||
|
||||
r = 0;
|
||||
@ -4457,7 +4381,7 @@ onig_regset_search_with_param(OnigRegSet* set,
|
||||
return ONIGERR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
if (ONIG_IS_OPTION_ON(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING)) {
|
||||
if (OPTON_CHECK_VALIDITY_OF_STRING(option)) {
|
||||
if (! ONIGENC_IS_VALID_MBC_STRING(enc, str, end)) {
|
||||
r = ONIGERR_INVALID_WIDE_CHAR_VALUE;
|
||||
goto finish_no_msa;
|
||||
@ -4567,7 +4491,7 @@ onig_regset_search_with_param(OnigRegSet* set,
|
||||
for (i = 0; i < set->n; i++) {
|
||||
if (IS_NOT_NULL(msas))
|
||||
MATCH_ARG_FREE(msas[i]);
|
||||
if (IS_FIND_NOT_EMPTY(set->rs[i].reg->options) &&
|
||||
if (OPTON_FIND_NOT_EMPTY(set->rs[i].reg->options) &&
|
||||
IS_NOT_NULL(set->rs[i].region)) {
|
||||
onig_region_clear(set->rs[i].region);
|
||||
}
|
||||
@ -4586,7 +4510,7 @@ onig_regset_search_with_param(OnigRegSet* set,
|
||||
for (i = 0; i < set->n; i++) {
|
||||
if (IS_NOT_NULL(msas))
|
||||
MATCH_ARG_FREE(msas[i]);
|
||||
if (IS_FIND_NOT_EMPTY(set->rs[i].reg->options) &&
|
||||
if (OPTON_FIND_NOT_EMPTY(set->rs[i].reg->options) &&
|
||||
IS_NOT_NULL(set->rs[i].region)) {
|
||||
onig_region_clear(set->rs[i].region);
|
||||
}
|
||||
@ -4625,6 +4549,9 @@ onig_regset_search(OnigRegSet* set, const UChar* str, const UChar* end,
|
||||
return r;
|
||||
}
|
||||
|
||||
#endif /* USE_REGSET */
|
||||
|
||||
|
||||
static UChar*
|
||||
slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
|
||||
const UChar* text, const UChar* text_end, UChar* text_range)
|
||||
@ -4656,48 +4583,6 @@ slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
|
||||
return (UChar* )NULL;
|
||||
}
|
||||
|
||||
static int
|
||||
str_lower_case_match(OnigEncoding enc, int case_fold_flag,
|
||||
const UChar* t, const UChar* tend,
|
||||
const UChar* p, const UChar* end)
|
||||
{
|
||||
int lowlen;
|
||||
UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
|
||||
|
||||
while (t < tend) {
|
||||
if (p >= end) return 0;
|
||||
lowlen = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, lowbuf);
|
||||
q = lowbuf;
|
||||
while (lowlen > 0) {
|
||||
if (t >= tend) return 0;
|
||||
if (*t++ != *q++) return 0;
|
||||
lowlen--;
|
||||
}
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static UChar*
|
||||
slow_search_ic(OnigEncoding enc, int case_fold_flag,
|
||||
UChar* target, UChar* target_end,
|
||||
const UChar* text, const UChar* text_end, UChar* text_range)
|
||||
{
|
||||
UChar *s;
|
||||
|
||||
s = (UChar* )text;
|
||||
|
||||
while (s < text_range) {
|
||||
if (str_lower_case_match(enc, case_fold_flag, target, target_end,
|
||||
s, text_end))
|
||||
return s;
|
||||
|
||||
s += enclen(enc, s);
|
||||
}
|
||||
|
||||
return (UChar* )NULL;
|
||||
}
|
||||
|
||||
static UChar*
|
||||
slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end,
|
||||
const UChar* text, const UChar* adjust_text,
|
||||
@ -4730,33 +4615,6 @@ slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end,
|
||||
return (UChar* )NULL;
|
||||
}
|
||||
|
||||
static UChar*
|
||||
slow_search_backward_ic(OnigEncoding enc, int case_fold_flag,
|
||||
UChar* target, UChar* target_end,
|
||||
const UChar* text, const UChar* adjust_text,
|
||||
const UChar* text_end, const UChar* text_start)
|
||||
{
|
||||
UChar *s;
|
||||
|
||||
s = (UChar* )text_end;
|
||||
s -= (target_end - target);
|
||||
if (s > text_start)
|
||||
s = (UChar* )text_start;
|
||||
else
|
||||
s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s);
|
||||
|
||||
while (s >= text) {
|
||||
if (str_lower_case_match(enc, case_fold_flag,
|
||||
target, target_end, s, text_end))
|
||||
return s;
|
||||
|
||||
s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s);
|
||||
}
|
||||
|
||||
return (UChar* )NULL;
|
||||
}
|
||||
|
||||
|
||||
static UChar*
|
||||
sunday_quick_search_step_forward(regex_t* reg,
|
||||
const UChar* target, const UChar* target_end,
|
||||
@ -4770,8 +4628,9 @@ sunday_quick_search_step_forward(regex_t* reg,
|
||||
OnigEncoding enc;
|
||||
|
||||
#ifdef ONIG_DEBUG_SEARCH
|
||||
fprintf(stderr,
|
||||
"sunday_quick_search_step_forward: text: %p, text_end: %p, text_range: %p\n", text, text_end, text_range);
|
||||
fprintf(DBGFP,
|
||||
"sunday_quick_search_step_forward: text: %p, text_end: %p, text_range: %p\n",
|
||||
text, text_end, text_range);
|
||||
#endif
|
||||
|
||||
enc = reg->enc;
|
||||
@ -4894,7 +4753,7 @@ onig_match_with_param(regex_t* reg, const UChar* str, const UChar* end,
|
||||
MATCH_ARG_INIT(msa, reg, option, region, at, mp);
|
||||
if (region
|
||||
#ifdef USE_POSIX_API_REGION_OPTION
|
||||
&& !IS_POSIX_REGION(option)
|
||||
&& !OPTON_POSIX_REGION(option)
|
||||
#endif
|
||||
) {
|
||||
r = onig_region_resize_clear(region, reg->num_mem + 1);
|
||||
@ -4903,7 +4762,7 @@ onig_match_with_param(regex_t* reg, const UChar* str, const UChar* end,
|
||||
r = 0;
|
||||
|
||||
if (r == 0) {
|
||||
if (ONIG_IS_OPTION_ON(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING)) {
|
||||
if (OPTON_CHECK_VALIDITY_OF_STRING(option)) {
|
||||
if (! ONIGENC_IS_VALID_MBC_STRING(reg->enc, str, end)) {
|
||||
r = ONIGERR_INVALID_WIDE_CHAR_VALUE;
|
||||
goto end;
|
||||
@ -4926,7 +4785,7 @@ forward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* start,
|
||||
UChar *p, *pprev = (UChar* )NULL;
|
||||
|
||||
#ifdef ONIG_DEBUG_SEARCH
|
||||
fprintf(stderr, "forward_search: str: %p, end: %p, start: %p, range: %p\n",
|
||||
fprintf(DBGFP, "forward_search: str: %p, end: %p, start: %p, range: %p\n",
|
||||
str, end, start, range);
|
||||
#endif
|
||||
|
||||
@ -4949,10 +4808,6 @@ forward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* start,
|
||||
case OPTIMIZE_STR:
|
||||
p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range);
|
||||
break;
|
||||
case OPTIMIZE_STR_CASE_FOLD:
|
||||
p = slow_search_ic(reg->enc, reg->case_fold_flag,
|
||||
reg->exact, reg->exact_end, p, end, range);
|
||||
break;
|
||||
|
||||
case OPTIMIZE_STR_FAST:
|
||||
p = sunday_quick_search(reg, reg->exact, reg->exact_end, p, end, range);
|
||||
@ -5047,7 +4902,7 @@ forward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* start,
|
||||
}
|
||||
|
||||
#ifdef ONIG_DEBUG_SEARCH
|
||||
fprintf(stderr,
|
||||
fprintf(DBGFP,
|
||||
"forward_search success: low: %d, high: %d, dmin: %u, dmax: %u\n",
|
||||
(int )(*low - str), (int )(*high - str),
|
||||
reg->dist_min, reg->dist_max);
|
||||
@ -5075,12 +4930,6 @@ backward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
|
||||
range, adjrange, end, p);
|
||||
break;
|
||||
|
||||
case OPTIMIZE_STR_CASE_FOLD:
|
||||
p = slow_search_backward_ic(reg->enc, reg->case_fold_flag,
|
||||
reg->exact, reg->exact_end,
|
||||
range, adjrange, end, p);
|
||||
break;
|
||||
|
||||
case OPTIMIZE_STR_FAST:
|
||||
case OPTIMIZE_STR_FAST_STEP_FORWARD:
|
||||
goto exact_method;
|
||||
@ -5150,7 +4999,7 @@ backward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
|
||||
}
|
||||
|
||||
#ifdef ONIG_DEBUG_SEARCH
|
||||
fprintf(stderr, "backward_search: low: %d, high: %d\n",
|
||||
fprintf(DBGFP, "backward_search: low: %d, high: %d\n",
|
||||
(int )(*low - str), (int )(*high - str));
|
||||
#endif
|
||||
return 1; /* success */
|
||||
@ -5158,7 +5007,7 @@ backward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
|
||||
|
||||
fail:
|
||||
#ifdef ONIG_DEBUG_SEARCH
|
||||
fprintf(stderr, "backward_search: fail.\n");
|
||||
fprintf(DBGFP, "backward_search: fail.\n");
|
||||
#endif
|
||||
return 0; /* fail */
|
||||
}
|
||||
@ -5202,7 +5051,7 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,
|
||||
const UChar *orig_start = start;
|
||||
|
||||
#ifdef ONIG_DEBUG_SEARCH
|
||||
fprintf(stderr,
|
||||
fprintf(DBGFP,
|
||||
"onig_search (entry point): str: %p, end: %d, start: %d, range: %d\n",
|
||||
str, (int )(end - str), (int )(start - str), (int )(range - str));
|
||||
#endif
|
||||
@ -5211,7 +5060,7 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,
|
||||
|
||||
if (region
|
||||
#ifdef USE_POSIX_API_REGION_OPTION
|
||||
&& !IS_POSIX_REGION(option)
|
||||
&& ! OPTON_POSIX_REGION(option)
|
||||
#endif
|
||||
) {
|
||||
r = onig_region_resize_clear(region, reg->num_mem + 1);
|
||||
@ -5220,7 +5069,7 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,
|
||||
|
||||
if (start > end || start < str) goto mismatch_no_msa;
|
||||
|
||||
if (ONIG_IS_OPTION_ON(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING)) {
|
||||
if (OPTON_CHECK_VALIDITY_OF_STRING(option)) {
|
||||
if (! ONIGENC_IS_VALID_MBC_STRING(reg->enc, str, end)) {
|
||||
r = ONIGERR_INVALID_WIDE_CHAR_VALUE;
|
||||
goto finish_no_msa;
|
||||
@ -5233,7 +5082,7 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,
|
||||
r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
|
||||
if (r != ONIG_MISMATCH) {\
|
||||
if (r >= 0) {\
|
||||
if (! IS_FIND_LONGEST(reg->options)) {\
|
||||
if (! OPTON_FIND_LONGEST(reg->options)) {\
|
||||
goto match;\
|
||||
}\
|
||||
}\
|
||||
@ -5350,7 +5199,7 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,
|
||||
static const UChar* address_for_empty_string = (UChar* )"";
|
||||
|
||||
#ifdef ONIG_DEBUG_SEARCH
|
||||
fprintf(stderr, "onig_search: empty string.\n");
|
||||
fprintf(DBGFP, "onig_search: empty string.\n");
|
||||
#endif
|
||||
|
||||
if (reg->threshold_len == 0) {
|
||||
@ -5366,7 +5215,7 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,
|
||||
}
|
||||
|
||||
#ifdef ONIG_DEBUG_SEARCH
|
||||
fprintf(stderr, "onig_search(apply anchor): end: %d, start: %d, range: %d\n",
|
||||
fprintf(DBGFP, "onig_search(apply anchor): end: %d, start: %d, range: %d\n",
|
||||
(int )(end - str), (int )(start - str), (int )(range - str));
|
||||
#endif
|
||||
|
||||
@ -5509,7 +5358,7 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,
|
||||
|
||||
mismatch:
|
||||
#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
|
||||
if (IS_FIND_LONGEST(reg->options)) {
|
||||
if (OPTON_FIND_LONGEST(reg->options)) {
|
||||
if (msa.best_len >= 0) {
|
||||
s = msa.best_s;
|
||||
goto match;
|
||||
@ -5523,9 +5372,9 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,
|
||||
|
||||
/* If result is mismatch and no FIND_NOT_EMPTY option,
|
||||
then the region is not set in match_at(). */
|
||||
if (IS_FIND_NOT_EMPTY(reg->options) && region
|
||||
if (OPTON_FIND_NOT_EMPTY(reg->options) && region
|
||||
#ifdef USE_POSIX_API_REGION_OPTION
|
||||
&& !IS_POSIX_REGION(option)
|
||||
&& !OPTON_POSIX_REGION(option)
|
||||
#endif
|
||||
) {
|
||||
onig_region_clear(region);
|
||||
@ -5533,7 +5382,7 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,
|
||||
|
||||
#ifdef ONIG_DEBUG
|
||||
if (r != ONIG_MISMATCH)
|
||||
fprintf(stderr, "onig_search: error %d\n", r);
|
||||
fprintf(DBGFP, "onig_search: error %d\n", r);
|
||||
#endif
|
||||
return r;
|
||||
|
||||
@ -5542,7 +5391,7 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,
|
||||
finish_no_msa:
|
||||
#ifdef ONIG_DEBUG
|
||||
if (r != ONIG_MISMATCH)
|
||||
fprintf(stderr, "onig_search: error %d\n", r);
|
||||
fprintf(DBGFP, "onig_search: error %d\n", r);
|
||||
#endif
|
||||
return r;
|
||||
|
||||
@ -5578,7 +5427,7 @@ onig_scan(regex_t* reg, const UChar* str, const UChar* end,
|
||||
int rs;
|
||||
const UChar* start;
|
||||
|
||||
if (ONIG_IS_OPTION_ON(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING)) {
|
||||
if (OPTON_CHECK_VALIDITY_OF_STRING(option)) {
|
||||
if (! ONIGENC_IS_VALID_MBC_STRING(reg->enc, str, end))
|
||||
return ONIGERR_INVALID_WIDE_CHAR_VALUE;
|
||||
|
||||
@ -5669,6 +5518,8 @@ onig_copy_encoding(OnigEncoding to, OnigEncoding from)
|
||||
*to = *from;
|
||||
}
|
||||
|
||||
#ifdef USE_REGSET
|
||||
|
||||
extern int
|
||||
onig_regset_new(OnigRegSet** rset, int n, regex_t* regs[])
|
||||
{
|
||||
@ -5759,7 +5610,7 @@ onig_regset_add(OnigRegSet* set, regex_t* reg)
|
||||
{
|
||||
OnigRegion* region;
|
||||
|
||||
if (IS_FIND_LONGEST(reg->options))
|
||||
if (OPTON_FIND_LONGEST(reg->options))
|
||||
return ONIGERR_INVALID_ARGUMENT;
|
||||
|
||||
if (set->n != 0 && reg->enc != set->enc)
|
||||
@ -5805,7 +5656,7 @@ onig_regset_replace(OnigRegSet* set, int at, regex_t* reg)
|
||||
set->n--;
|
||||
}
|
||||
else {
|
||||
if (IS_FIND_LONGEST(reg->options))
|
||||
if (OPTON_FIND_LONGEST(reg->options))
|
||||
return ONIGERR_INVALID_ARGUMENT;
|
||||
|
||||
if (set->n > 1 && reg->enc != set->enc)
|
||||
@ -5864,6 +5715,8 @@ onig_regset_get_region(OnigRegSet* set, int at)
|
||||
return set->rs[at].region;
|
||||
}
|
||||
|
||||
#endif /* USE_REGSET */
|
||||
|
||||
|
||||
#ifdef USE_DIRECT_THREADED_CODE
|
||||
extern int
|
||||
@ -6385,6 +6238,8 @@ onig_builtin_cmp(OnigCalloutArgs* args, void* user_data ARG_UNUSED)
|
||||
}
|
||||
|
||||
|
||||
#ifndef ONIGURUMA_UNSUPPORTED_PRINT
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
static FILE* OutFp;
|
||||
@ -6483,4 +6338,6 @@ onig_setup_builtin_monitors_by_ascii_encoded_name(void* fp /* FILE* */)
|
||||
return ONIG_NORMAL;
|
||||
}
|
||||
|
||||
#endif /* ONIGURUMA_UNSUPPORTED_PRINT */
|
||||
|
||||
#endif /* USE_CALLOUT */
|
||||
|
||||
@ -5,7 +5,7 @@
|
||||
encoding: UTF-8
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2019 K.Kosako
|
||||
* Copyright (c) 2002-2020 K.Kosako
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@ -45,6 +45,7 @@
|
||||
defined(ONIG_DEBUG_STATISTICS)
|
||||
#ifndef ONIG_DEBUG
|
||||
#define ONIG_DEBUG
|
||||
#define DBGFP stderr
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@ -56,6 +57,7 @@
|
||||
|
||||
/* config */
|
||||
/* spec. config */
|
||||
#define USE_REGSET
|
||||
#define USE_CALL
|
||||
#define USE_CALLOUT
|
||||
#define USE_BACKREF_WITH_LEVEL /* \k<name+n>, \k<name-n> */
|
||||
@ -119,6 +121,9 @@
|
||||
/* */
|
||||
#define onig_st_is_member st_is_member
|
||||
|
||||
|
||||
#ifndef ONIGURUMA_SYS_UEFI
|
||||
|
||||
#define xmemset memset
|
||||
#define xmemcpy memcpy
|
||||
#define xmemmove memmove
|
||||
@ -176,6 +181,19 @@ typedef unsigned int uintptr_t;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* strend hash */
|
||||
typedef void hash_table_type;
|
||||
|
||||
#ifdef _WIN32
|
||||
# include <windows.h>
|
||||
typedef ULONG_PTR hash_data_type;
|
||||
#else
|
||||
typedef unsigned long hash_data_type;
|
||||
#endif
|
||||
|
||||
#endif /* ONIGURUMA_SYS_UEFI */
|
||||
|
||||
|
||||
#ifdef MIN
|
||||
#undef MIN
|
||||
#endif
|
||||
@ -237,7 +255,6 @@ enum OptimizeType {
|
||||
OPTIMIZE_STR, /* Slow Search */
|
||||
OPTIMIZE_STR_FAST, /* Sunday quick search / BMH */
|
||||
OPTIMIZE_STR_FAST_STEP_FORWARD, /* Sunday quick search / BMH */
|
||||
OPTIMIZE_STR_CASE_FOLD, /* Slow Search (ignore case) */
|
||||
OPTIMIZE_MAP /* char map */
|
||||
};
|
||||
|
||||
@ -290,32 +307,20 @@ typedef unsigned int MemStatusType;
|
||||
(IS_CODE_DIGIT_ASCII(enc,code) ? DIGITVAL(code) \
|
||||
: (ONIGENC_IS_CODE_UPPER(enc,code) ? (code) - 'A' + 10 : (code) - 'a' + 10))
|
||||
|
||||
#define IS_SINGLELINE(option) ((option) & ONIG_OPTION_SINGLELINE)
|
||||
#define IS_MULTILINE(option) ((option) & ONIG_OPTION_MULTILINE)
|
||||
#define IS_IGNORECASE(option) ((option) & ONIG_OPTION_IGNORECASE)
|
||||
#define IS_EXTEND(option) ((option) & ONIG_OPTION_EXTEND)
|
||||
#define IS_FIND_LONGEST(option) ((option) & ONIG_OPTION_FIND_LONGEST)
|
||||
#define IS_FIND_NOT_EMPTY(option) ((option) & ONIG_OPTION_FIND_NOT_EMPTY)
|
||||
#define IS_FIND_CONDITION(option) ((option) & \
|
||||
#define OPTON_FIND_LONGEST(option) ((option) & ONIG_OPTION_FIND_LONGEST)
|
||||
#define OPTON_FIND_NOT_EMPTY(option) ((option) & ONIG_OPTION_FIND_NOT_EMPTY)
|
||||
#define OPTON_FIND_CONDITION(option) ((option) & \
|
||||
(ONIG_OPTION_FIND_LONGEST | ONIG_OPTION_FIND_NOT_EMPTY))
|
||||
#define IS_NOTBOL(option) ((option) & ONIG_OPTION_NOTBOL)
|
||||
#define IS_NOTEOL(option) ((option) & ONIG_OPTION_NOTEOL)
|
||||
#define IS_POSIX_REGION(option) ((option) & ONIG_OPTION_POSIX_REGION)
|
||||
|
||||
#define IS_WORD_ASCII(option) \
|
||||
((option) & (ONIG_OPTION_WORD_IS_ASCII | ONIG_OPTION_POSIX_IS_ASCII))
|
||||
#define IS_DIGIT_ASCII(option) \
|
||||
((option) & (ONIG_OPTION_DIGIT_IS_ASCII | ONIG_OPTION_POSIX_IS_ASCII))
|
||||
#define IS_SPACE_ASCII(option) \
|
||||
((option) & (ONIG_OPTION_SPACE_IS_ASCII | ONIG_OPTION_POSIX_IS_ASCII))
|
||||
#define IS_POSIX_ASCII(option) ((option) & ONIG_OPTION_POSIX_IS_ASCII)
|
||||
|
||||
#define IS_ASCII_MODE_CTYPE_OPTION(ctype, options) \
|
||||
((ctype) >= 0 && \
|
||||
(((ctype) < ONIGENC_CTYPE_ASCII && IS_POSIX_ASCII(options)) ||\
|
||||
((ctype) == ONIGENC_CTYPE_WORD && IS_WORD_ASCII(options)) ||\
|
||||
((ctype) == ONIGENC_CTYPE_DIGIT && IS_DIGIT_ASCII(options)) ||\
|
||||
((ctype) == ONIGENC_CTYPE_SPACE && IS_SPACE_ASCII(options))))
|
||||
#define OPTON_NEGATE_SINGLELINE(option) ((option) & \
|
||||
ONIG_OPTION_NEGATE_SINGLELINE)
|
||||
#define OPTON_DONT_CAPTURE_GROUP(option) ((option) & \
|
||||
ONIG_OPTION_DONT_CAPTURE_GROUP)
|
||||
#define OPTON_CAPTURE_GROUP(option) ((option) & ONIG_OPTION_CAPTURE_GROUP)
|
||||
#define OPTON_NOTBOL(option) ((option) & ONIG_OPTION_NOTBOL)
|
||||
#define OPTON_NOTEOL(option) ((option) & ONIG_OPTION_NOTEOL)
|
||||
#define OPTON_POSIX_REGION(option) ((option) & ONIG_OPTION_POSIX_REGION)
|
||||
#define OPTON_CHECK_VALIDITY_OF_STRING(option) ((option) & \
|
||||
ONIG_OPTION_CHECK_VALIDITY_OF_STRING)
|
||||
|
||||
#define DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag) \
|
||||
((case_fold_flag) & ~INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR)
|
||||
@ -327,17 +332,17 @@ typedef unsigned int MemStatusType;
|
||||
#define BITS_PER_BYTE 8
|
||||
#define SINGLE_BYTE_SIZE (1 << BITS_PER_BYTE)
|
||||
#define BITS_IN_ROOM 32 /* 4 * BITS_PER_BYTE */
|
||||
#define BITSET_SIZE (SINGLE_BYTE_SIZE / BITS_IN_ROOM)
|
||||
#define BITSET_REAL_SIZE (SINGLE_BYTE_SIZE / BITS_IN_ROOM)
|
||||
|
||||
typedef uint32_t Bits;
|
||||
typedef Bits BitSet[BITSET_SIZE];
|
||||
typedef Bits BitSet[BITSET_REAL_SIZE];
|
||||
typedef Bits* BitSetRef;
|
||||
|
||||
#define SIZE_BITSET sizeof(BitSet)
|
||||
|
||||
#define BITSET_CLEAR(bs) do {\
|
||||
int i;\
|
||||
for (i = 0; i < (int )BITSET_SIZE; i++) { (bs)[i] = 0; } \
|
||||
for (i = 0; i < (int )BITSET_REAL_SIZE; i++) { (bs)[i] = 0; } \
|
||||
} while (0)
|
||||
|
||||
#define BS_ROOM(bs,pos) (bs)[(unsigned int )(pos) >> 5]
|
||||
@ -475,8 +480,6 @@ enum OpCode {
|
||||
OP_STR_MB2N, /* mb-length = 2 */
|
||||
OP_STR_MB3N, /* mb-length = 3 */
|
||||
OP_STR_MBN, /* other length */
|
||||
OP_STR_1_IC, /* single byte, N = 1, ignore case */
|
||||
OP_STR_N_IC, /* single byte, ignore case */
|
||||
OP_CCLASS,
|
||||
OP_CCLASS_MB,
|
||||
OP_CCLASS_MIX,
|
||||
@ -552,7 +555,7 @@ enum OpCode {
|
||||
OP_LOOK_BEHIND, /* (?<=...) start (no needs end opcode) */
|
||||
OP_LOOK_BEHIND_NOT_START, /* (?<!...) start */
|
||||
OP_LOOK_BEHIND_NOT_END, /* (?<!...) end */
|
||||
OP_PUSH_SAVE_VAL,
|
||||
OP_SAVE_VAL,
|
||||
OP_UPDATE_VAR,
|
||||
#ifdef USE_CALL
|
||||
OP_CALL, /* \g<name> */
|
||||
@ -650,7 +653,7 @@ typedef int ModeType;
|
||||
#define OPSIZE_LOOK_BEHIND_NOT_END 1
|
||||
#define OPSIZE_CALL 1
|
||||
#define OPSIZE_RETURN 1
|
||||
#define OPSIZE_PUSH_SAVE_VAL 1
|
||||
#define OPSIZE_SAVE_VAL 1
|
||||
#define OPSIZE_UPDATE_VAR 1
|
||||
|
||||
#ifdef USE_CALLOUT
|
||||
@ -810,7 +813,7 @@ typedef struct {
|
||||
struct {
|
||||
SaveType type;
|
||||
MemNumType id;
|
||||
} push_save_val;
|
||||
} save_val;
|
||||
struct {
|
||||
UpdateVarType type;
|
||||
MemNumType id;
|
||||
@ -999,16 +1002,6 @@ extern OnigCalloutFunc onig_get_callout_start_func(regex_t* reg, int callout_num
|
||||
#endif /* USE_CALLOUT */
|
||||
|
||||
|
||||
/* strend hash */
|
||||
typedef void hash_table_type;
|
||||
|
||||
#ifdef _WIN32
|
||||
# include <windows.h>
|
||||
typedef ULONG_PTR hash_data_type;
|
||||
#else
|
||||
typedef unsigned long hash_data_type;
|
||||
#endif
|
||||
|
||||
extern hash_table_type* onig_st_init_strend_table_with_size P_((int size));
|
||||
extern int onig_st_lookup_strend P_((hash_table_type* table, const UChar* str_key, const UChar* end_key, hash_data_type *value));
|
||||
extern int onig_st_insert_strend P_((hash_table_type* table, const UChar* str_key, const UChar* end_key, hash_data_type value));
|
||||
|
||||
@ -3,7 +3,7 @@
|
||||
encoding: UTF-8
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2019 K.Kosako
|
||||
* Copyright (c) 2002-2020 K.Kosako
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@ -46,6 +46,26 @@
|
||||
#define IS_ALLOWED_CODE_IN_CALLOUT_TAG_NAME(c) \
|
||||
((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_')
|
||||
|
||||
#define OPTON_SINGLELINE(option) ((option) & ONIG_OPTION_SINGLELINE)
|
||||
#define OPTON_MULTILINE(option) ((option) & ONIG_OPTION_MULTILINE)
|
||||
#define OPTON_IGNORECASE(option) ((option) & ONIG_OPTION_IGNORECASE)
|
||||
#define OPTON_EXTEND(option) ((option) & ONIG_OPTION_EXTEND)
|
||||
#define OPTON_WORD_ASCII(option) \
|
||||
((option) & (ONIG_OPTION_WORD_IS_ASCII | ONIG_OPTION_POSIX_IS_ASCII))
|
||||
#define OPTON_DIGIT_ASCII(option) \
|
||||
((option) & (ONIG_OPTION_DIGIT_IS_ASCII | ONIG_OPTION_POSIX_IS_ASCII))
|
||||
#define OPTON_SPACE_ASCII(option) \
|
||||
((option) & (ONIG_OPTION_SPACE_IS_ASCII | ONIG_OPTION_POSIX_IS_ASCII))
|
||||
#define OPTON_POSIX_ASCII(option) ((option) & ONIG_OPTION_POSIX_IS_ASCII)
|
||||
#define OPTON_TEXT_SEGMENT_WORD(option) ((option) & ONIG_OPTION_TEXT_SEGMENT_WORD)
|
||||
|
||||
#define OPTON_IS_ASCII_MODE_CTYPE(ctype, options) \
|
||||
((ctype) >= 0 && \
|
||||
(((ctype) < ONIGENC_CTYPE_ASCII && OPTON_POSIX_ASCII(options)) ||\
|
||||
((ctype) == ONIGENC_CTYPE_WORD && OPTON_WORD_ASCII(options)) ||\
|
||||
((ctype) == ONIGENC_CTYPE_DIGIT && OPTON_DIGIT_ASCII(options)) ||\
|
||||
((ctype) == ONIGENC_CTYPE_SPACE && OPTON_SPACE_ASCII(options))))
|
||||
|
||||
|
||||
OnigSyntaxType OnigSyntaxOniguruma = {
|
||||
(( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
|
||||
@ -296,7 +316,7 @@ backref_rel_to_abs(int rel_no, ScanEnv* env)
|
||||
#define BITSET_IS_EMPTY(bs,empty) do {\
|
||||
int i;\
|
||||
empty = 1;\
|
||||
for (i = 0; i < (int )BITSET_SIZE; i++) {\
|
||||
for (i = 0; i < (int )BITSET_REAL_SIZE; i++) {\
|
||||
if ((bs)[i] != 0) {\
|
||||
empty = 0; break;\
|
||||
}\
|
||||
@ -316,35 +336,35 @@ static void
|
||||
bitset_invert(BitSetRef bs)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < (int )BITSET_SIZE; i++) { bs[i] = ~(bs[i]); }
|
||||
for (i = 0; i < (int )BITSET_REAL_SIZE; i++) { bs[i] = ~(bs[i]); }
|
||||
}
|
||||
|
||||
static void
|
||||
bitset_invert_to(BitSetRef from, BitSetRef to)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < (int )BITSET_SIZE; i++) { to[i] = ~(from[i]); }
|
||||
for (i = 0; i < (int )BITSET_REAL_SIZE; i++) { to[i] = ~(from[i]); }
|
||||
}
|
||||
|
||||
static void
|
||||
bitset_and(BitSetRef dest, BitSetRef bs)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] &= bs[i]; }
|
||||
for (i = 0; i < (int )BITSET_REAL_SIZE; i++) { dest[i] &= bs[i]; }
|
||||
}
|
||||
|
||||
static void
|
||||
bitset_or(BitSetRef dest, BitSetRef bs)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] |= bs[i]; }
|
||||
for (i = 0; i < (int )BITSET_REAL_SIZE; i++) { dest[i] |= bs[i]; }
|
||||
}
|
||||
|
||||
static void
|
||||
bitset_copy(BitSetRef dest, BitSetRef bs)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] = bs[i]; }
|
||||
for (i = 0; i < (int )BITSET_REAL_SIZE; i++) { dest[i] = bs[i]; }
|
||||
}
|
||||
|
||||
extern int
|
||||
@ -776,7 +796,7 @@ onig_foreach_name(regex_t* reg,
|
||||
}
|
||||
|
||||
static int
|
||||
i_renumber_name(UChar* key ARG_UNUSED, NameEntry* e, GroupNumRemap* map)
|
||||
i_renumber_name(UChar* key ARG_UNUSED, NameEntry* e, GroupNumMap* map)
|
||||
{
|
||||
int i;
|
||||
|
||||
@ -793,7 +813,7 @@ i_renumber_name(UChar* key ARG_UNUSED, NameEntry* e, GroupNumRemap* map)
|
||||
}
|
||||
|
||||
extern int
|
||||
onig_renumber_name_table(regex_t* reg, GroupNumRemap* map)
|
||||
onig_renumber_name_table(regex_t* reg, GroupNumMap* map)
|
||||
{
|
||||
NameTable* t = (NameTable* )reg->name_table;
|
||||
|
||||
@ -1143,12 +1163,12 @@ onig_name_to_backref_number(regex_t* reg, const UChar* name,
|
||||
extern int
|
||||
onig_noname_group_capture_is_active(regex_t* reg)
|
||||
{
|
||||
if (ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_DONT_CAPTURE_GROUP))
|
||||
if (OPTON_DONT_CAPTURE_GROUP(reg->options))
|
||||
return 0;
|
||||
|
||||
if (onig_number_of_names(reg) > 0 &&
|
||||
IS_SYNTAX_BV(reg->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
|
||||
!ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
|
||||
! OPTON_CAPTURE_GROUP(reg->options)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1604,12 +1624,11 @@ onig_set_callout_of_name(OnigEncoding enc, OnigCalloutType callout_type,
|
||||
fe->arg_types[i] = arg_types[i];
|
||||
}
|
||||
for (i = arg_num - opt_arg_num, j = 0; i < arg_num; i++, j++) {
|
||||
if (IS_NULL(opt_defaults)) return ONIGERR_INVALID_ARGUMENT;
|
||||
if (fe->arg_types[i] == ONIG_TYPE_STRING) {
|
||||
OnigValue* val;
|
||||
UChar* ds;
|
||||
|
||||
if (IS_NULL(opt_defaults)) return ONIGERR_INVALID_ARGUMENT;
|
||||
|
||||
val = opt_defaults + j;
|
||||
ds = onigenc_strdup(enc, val->s.start, val->s.end);
|
||||
CHECK_NULL_RETURN_MEMERR(ds);
|
||||
@ -2138,6 +2157,18 @@ node_new(void)
|
||||
return node;
|
||||
}
|
||||
|
||||
extern Node*
|
||||
onig_node_copy(Node* from)
|
||||
{
|
||||
Node* copy;
|
||||
|
||||
copy = node_new();
|
||||
CHECK_NULL_RETURN(copy);
|
||||
xmemcpy(copy, from, sizeof(*copy));
|
||||
|
||||
return copy;
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
initialize_cclass(CClassNode* cc)
|
||||
@ -2167,30 +2198,20 @@ node_new_ctype(int type, int not, OnigOptionType options)
|
||||
NODE_SET_TYPE(node, NODE_CTYPE);
|
||||
CTYPE_(node)->ctype = type;
|
||||
CTYPE_(node)->not = not;
|
||||
CTYPE_(node)->options = options;
|
||||
CTYPE_(node)->ascii_mode = IS_ASCII_MODE_CTYPE_OPTION(type, options);
|
||||
CTYPE_(node)->ascii_mode = OPTON_IS_ASCII_MODE_CTYPE(type, options);
|
||||
return node;
|
||||
}
|
||||
|
||||
static Node*
|
||||
node_new_anychar(void)
|
||||
node_new_anychar(OnigOptionType options)
|
||||
{
|
||||
Node* node = node_new_ctype(CTYPE_ANYCHAR, FALSE, ONIG_OPTION_NONE);
|
||||
return node;
|
||||
}
|
||||
|
||||
static Node*
|
||||
node_new_anychar_with_fixed_option(OnigOptionType option)
|
||||
{
|
||||
CtypeNode* ct;
|
||||
Node* node;
|
||||
|
||||
node = node_new_anychar();
|
||||
node = node_new_ctype(CTYPE_ANYCHAR, FALSE, options);
|
||||
CHECK_NULL_RETURN(node);
|
||||
|
||||
ct = CTYPE_(node);
|
||||
ct->options = option;
|
||||
NODE_STATUS_ADD(node, FIXED_OPTION);
|
||||
if (OPTON_MULTILINE(options))
|
||||
NODE_STATUS_ADD(node, MULTILINE);
|
||||
return node;
|
||||
}
|
||||
|
||||
@ -2199,18 +2220,18 @@ node_new_no_newline(Node** node, ScanEnv* env)
|
||||
{
|
||||
Node* n;
|
||||
|
||||
n = node_new_anychar_with_fixed_option(ONIG_OPTION_NONE);
|
||||
n = node_new_anychar(ONIG_OPTION_NONE);
|
||||
CHECK_NULL_RETURN_MEMERR(n);
|
||||
*node = n;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
node_new_true_anychar(Node** node, ScanEnv* env)
|
||||
node_new_true_anychar(Node** node)
|
||||
{
|
||||
Node* n;
|
||||
|
||||
n = node_new_anychar_with_fixed_option(ONIG_OPTION_MULTILINE);
|
||||
n = node_new_anychar(ONIG_OPTION_MULTILINE);
|
||||
CHECK_NULL_RETURN_MEMERR(n);
|
||||
*node = n;
|
||||
return 0;
|
||||
@ -2292,16 +2313,39 @@ make_alt(int n, Node* ns[])
|
||||
return make_list_or_alt(NODE_ALT, n, ns);
|
||||
}
|
||||
|
||||
extern Node*
|
||||
onig_node_new_anchor(int type, int ascii_mode)
|
||||
static Node*
|
||||
node_new_anchor(int type)
|
||||
{
|
||||
Node* node = node_new();
|
||||
Node* node;
|
||||
|
||||
node = node_new();
|
||||
CHECK_NULL_RETURN(node);
|
||||
|
||||
NODE_SET_TYPE(node, NODE_ANCHOR);
|
||||
ANCHOR_(node)->type = type;
|
||||
ANCHOR_(node)->char_len = -1;
|
||||
ANCHOR_(node)->char_len = INFINITE_LEN;
|
||||
ANCHOR_(node)->ascii_mode = 0;
|
||||
return node;
|
||||
}
|
||||
|
||||
static Node*
|
||||
node_new_anchor_with_options(int type, OnigOptionType options)
|
||||
{
|
||||
int ascii_mode;
|
||||
Node* node;
|
||||
|
||||
node = node_new_anchor(type);
|
||||
CHECK_NULL_RETURN(node);
|
||||
|
||||
ascii_mode = OPTON_WORD_ASCII(options) && IS_WORD_ANCHOR_TYPE(type) ? 1 : 0;
|
||||
ANCHOR_(node)->ascii_mode = ascii_mode;
|
||||
|
||||
if (type == ANCR_TEXT_SEGMENT_BOUNDARY ||
|
||||
type == ANCR_NO_TEXT_SEGMENT_BOUNDARY) {
|
||||
if (OPTON_TEXT_SEGMENT_WORD(options))
|
||||
NODE_STATUS_ADD(node, TEXT_SEGMENT_WORD);
|
||||
}
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
@ -2313,8 +2357,9 @@ node_new_backref(int back_num, int* backrefs, int by_name,
|
||||
ScanEnv* env)
|
||||
{
|
||||
int i;
|
||||
Node* node = node_new();
|
||||
Node* node;
|
||||
|
||||
node = node_new();
|
||||
CHECK_NULL_RETURN(node);
|
||||
|
||||
NODE_SET_TYPE(node, NODE_BACKREF);
|
||||
@ -2323,6 +2368,9 @@ node_new_backref(int back_num, int* backrefs, int by_name,
|
||||
if (by_name != 0)
|
||||
NODE_STATUS_ADD(node, BY_NAME);
|
||||
|
||||
if (OPTON_IGNORECASE(env->options))
|
||||
NODE_STATUS_ADD(node, IGNORECASE);
|
||||
|
||||
#ifdef USE_BACKREF_WITH_LEVEL
|
||||
if (exist_level != 0) {
|
||||
NODE_STATUS_ADD(node, NEST_LEVEL);
|
||||
@ -2693,10 +2741,10 @@ make_text_segment(Node** node, ScanEnv* env)
|
||||
ns[1] = NULL_NODE;
|
||||
|
||||
r = ONIGERR_MEMORY;
|
||||
ns[0] = onig_node_new_anchor(ANCR_NO_TEXT_SEGMENT_BOUNDARY, FALSE);
|
||||
ns[0] = node_new_anchor_with_options(ANCR_NO_TEXT_SEGMENT_BOUNDARY, env->options);
|
||||
if (IS_NULL(ns[0])) goto err;
|
||||
|
||||
r = node_new_true_anychar(&ns[1], env);
|
||||
r = node_new_true_anychar(&ns[1]);
|
||||
if (r != 0) goto err1;
|
||||
|
||||
x = make_list(2, ns);
|
||||
@ -2711,7 +2759,7 @@ make_text_segment(Node** node, ScanEnv* env)
|
||||
ns[0] = NULL_NODE;
|
||||
ns[1] = x;
|
||||
|
||||
r = node_new_true_anychar(&ns[0], env);
|
||||
r = node_new_true_anychar(&ns[0]);
|
||||
if (r != 0) goto err1;
|
||||
|
||||
x = make_list(2, ns);
|
||||
@ -3060,7 +3108,7 @@ make_absent_tree(Node** node, Node* absent, Node* expr, int is_range_cutter,
|
||||
quant = node_new_quantifier(0, INFINITE_REPEAT, FALSE);
|
||||
if (IS_NULL(quant)) goto err0;
|
||||
|
||||
r = node_new_true_anychar(&body, env);
|
||||
r = node_new_true_anychar(&body);
|
||||
if (r != 0) {
|
||||
onig_node_free(quant);
|
||||
goto err;
|
||||
@ -3095,7 +3143,7 @@ make_absent_tree(Node** node, Node* absent, Node* expr, int is_range_cutter,
|
||||
|
||||
id2 = GIMMICK_(ns[1])->id;
|
||||
|
||||
r = node_new_true_anychar(&ns[3], env);
|
||||
r = node_new_true_anychar(&ns[3]);
|
||||
if (r != 0) goto err;
|
||||
|
||||
possessive = 1;
|
||||
@ -3195,7 +3243,6 @@ onig_node_str_clear(Node* node)
|
||||
STR_(node)->s = STR_(node)->buf;
|
||||
STR_(node)->end = STR_(node)->buf;
|
||||
STR_(node)->capacity = 0;
|
||||
STR_(node)->case_min_len = 0;
|
||||
}
|
||||
|
||||
static Node*
|
||||
@ -3209,7 +3256,6 @@ node_new_str(const UChar* s, const UChar* end)
|
||||
STR_(node)->s = STR_(node)->buf;
|
||||
STR_(node)->end = STR_(node)->buf;
|
||||
STR_(node)->capacity = 0;
|
||||
STR_(node)->case_min_len = 0;
|
||||
|
||||
if (onig_node_str_cat(node, s, end)) {
|
||||
onig_node_free(node);
|
||||
@ -3225,9 +3271,22 @@ onig_node_new_str(const UChar* s, const UChar* end)
|
||||
}
|
||||
|
||||
static Node*
|
||||
node_new_str_crude(UChar* s, UChar* end)
|
||||
node_new_str_with_options(const UChar* s, const UChar* end,
|
||||
OnigOptionType options)
|
||||
{
|
||||
Node* node = node_new_str(s, end);
|
||||
Node* node;
|
||||
node = node_new_str(s, end);
|
||||
|
||||
if (OPTON_IGNORECASE(options))
|
||||
NODE_STATUS_ADD(node, IGNORECASE);
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
static Node*
|
||||
node_new_str_crude(UChar* s, UChar* end, OnigOptionType options)
|
||||
{
|
||||
Node* node = node_new_str_with_options(s, end, options);
|
||||
CHECK_NULL_RETURN(node);
|
||||
NODE_STRING_SET_CRUDE(node);
|
||||
return node;
|
||||
@ -3240,14 +3299,14 @@ node_new_empty(void)
|
||||
}
|
||||
|
||||
static Node*
|
||||
node_new_str_crude_char(UChar c)
|
||||
node_new_str_crude_char(UChar c, OnigOptionType options)
|
||||
{
|
||||
int i;
|
||||
UChar p[1];
|
||||
Node* node;
|
||||
|
||||
p[0] = c;
|
||||
node = node_new_str_crude(p, p + 1);
|
||||
node = node_new_str_crude(p, p + 1, options);
|
||||
|
||||
/* clear buf tail */
|
||||
for (i = 1; i < NODE_STRING_BUF_SIZE; i++)
|
||||
@ -3270,12 +3329,13 @@ str_node_split_last_char(Node* node, OnigEncoding enc)
|
||||
if (p && p > sn->s) { /* can be split. */
|
||||
rn = node_new_str(p, sn->end);
|
||||
CHECK_NULL_RETURN(rn);
|
||||
if (NODE_STRING_IS_CRUDE(node))
|
||||
NODE_STRING_SET_CRUDE(rn);
|
||||
|
||||
sn->end = (UChar* )p;
|
||||
STR_(rn)->flag = sn->flag;
|
||||
NODE_STATUS(rn) = NODE_STATUS(node);
|
||||
}
|
||||
}
|
||||
|
||||
return rn;
|
||||
}
|
||||
|
||||
@ -4001,10 +4061,10 @@ node_new_general_newline(Node** node, ScanEnv* env)
|
||||
|
||||
dlen = ONIGENC_CODE_TO_MBC(env->enc, 0x0d, buf);
|
||||
if (dlen < 0) return dlen;
|
||||
alen = ONIGENC_CODE_TO_MBC(env->enc, 0x0a, buf + dlen);
|
||||
alen = ONIGENC_CODE_TO_MBC(env->enc, NEWLINE_CODE, buf + dlen);
|
||||
if (alen < 0) return alen;
|
||||
|
||||
crnl = node_new_str_crude(buf, buf + dlen + alen);
|
||||
crnl = node_new_str_crude(buf, buf + dlen + alen, ONIG_OPTION_NONE);
|
||||
CHECK_NULL_RETURN_MEMERR(crnl);
|
||||
|
||||
ncc = node_new_cclass();
|
||||
@ -4012,10 +4072,10 @@ node_new_general_newline(Node** node, ScanEnv* env)
|
||||
|
||||
cc = CCLASS_(ncc);
|
||||
if (dlen == 1) {
|
||||
bitset_set_range(cc->bs, 0x0a, 0x0d);
|
||||
bitset_set_range(cc->bs, NEWLINE_CODE, 0x0d);
|
||||
}
|
||||
else {
|
||||
r = add_code_range(&(cc->mbuf), env, 0x0a, 0x0d);
|
||||
r = add_code_range(&(cc->mbuf), env, NEWLINE_CODE, 0x0d);
|
||||
if (r != 0) {
|
||||
err1:
|
||||
onig_node_free(ncc);
|
||||
@ -5485,7 +5545,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
|
||||
if (c == MC_ANYCHAR(syn))
|
||||
goto any_char;
|
||||
else if (c == MC_ANYTIME(syn))
|
||||
goto anytime;
|
||||
goto any_time;
|
||||
else if (c == MC_ZERO_OR_ONE_TIME(syn))
|
||||
goto zero_or_one_time;
|
||||
else if (c == MC_ONE_OR_MORE_TIME(syn))
|
||||
@ -5509,7 +5569,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
|
||||
case '*':
|
||||
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ASTERISK_ZERO_INF)) break;
|
||||
#ifdef USE_VARIABLE_META_CHARS
|
||||
anytime:
|
||||
any_time:
|
||||
#endif
|
||||
tok->type = TK_REPEAT;
|
||||
tok->u.repeat.lower = 0;
|
||||
@ -5665,14 +5725,14 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
|
||||
case '^':
|
||||
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;
|
||||
tok->type = TK_ANCHOR;
|
||||
tok->u.subtype = (IS_SINGLELINE(env->options)
|
||||
tok->u.subtype = (OPTON_SINGLELINE(env->options)
|
||||
? ANCR_BEGIN_BUF : ANCR_BEGIN_LINE);
|
||||
break;
|
||||
|
||||
case '$':
|
||||
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;
|
||||
tok->type = TK_ANCHOR;
|
||||
tok->u.subtype = (IS_SINGLELINE(env->options)
|
||||
tok->u.subtype = (OPTON_SINGLELINE(env->options)
|
||||
? ANCR_SEMI_END_BUF : ANCR_END_LINE);
|
||||
break;
|
||||
|
||||
@ -5687,7 +5747,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
|
||||
break;
|
||||
|
||||
case '#':
|
||||
if (IS_EXTEND(env->options)) {
|
||||
if (OPTON_EXTEND(env->options)) {
|
||||
while (!PEND) {
|
||||
PFETCH(c);
|
||||
if (ONIGENC_IS_CODE_NEWLINE(enc, c))
|
||||
@ -5699,7 +5759,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
|
||||
break;
|
||||
|
||||
case ' ': case '\t': case '\n': case '\r': case '\f':
|
||||
if (IS_EXTEND(env->options))
|
||||
if (OPTON_EXTEND(env->options))
|
||||
goto start;
|
||||
break;
|
||||
|
||||
@ -5885,8 +5945,6 @@ add_ctype_to_cc_by_range_limit(CClassNode* cc, int ctype ARG_UNUSED, int not,
|
||||
static int
|
||||
add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)
|
||||
{
|
||||
#define ASCII_LIMIT 127
|
||||
|
||||
int c, r;
|
||||
int ascii_mode;
|
||||
int is_single;
|
||||
@ -5895,7 +5953,7 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)
|
||||
OnigCodePoint sb_out;
|
||||
OnigEncoding enc = env->enc;
|
||||
|
||||
ascii_mode = IS_ASCII_MODE_CTYPE_OPTION(ctype, env->options);
|
||||
ascii_mode = OPTON_IS_ASCII_MODE_CTYPE(ctype, env->options);
|
||||
|
||||
r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges);
|
||||
if (r == 0) {
|
||||
@ -6579,8 +6637,6 @@ parse_cc(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)
|
||||
BITSET_IS_EMPTY(cc->bs, is_empty);
|
||||
|
||||
if (is_empty == 0) {
|
||||
#define NEWLINE_CODE 0x0a
|
||||
|
||||
if (ONIGENC_IS_CODE_NEWLINE(env->enc, NEWLINE_CODE)) {
|
||||
if (ONIGENC_CODE_TO_MBCLEN(env->enc, NEWLINE_CODE) == 1)
|
||||
BITSET_SET_BIT(cc->bs, NEWLINE_CODE);
|
||||
@ -7096,10 +7152,10 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
|
||||
break;
|
||||
|
||||
case '=':
|
||||
*np = onig_node_new_anchor(ANCR_PREC_READ, FALSE);
|
||||
*np = node_new_anchor(ANCR_PREC_READ);
|
||||
break;
|
||||
case '!': /* preceding read */
|
||||
*np = onig_node_new_anchor(ANCR_PREC_READ_NOT, FALSE);
|
||||
*np = node_new_anchor(ANCR_PREC_READ_NOT);
|
||||
break;
|
||||
case '>': /* (?>...) stop backtrack */
|
||||
*np = node_new_bag(BAG_STOP_BACKTRACK);
|
||||
@ -7117,9 +7173,9 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
|
||||
if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
|
||||
PFETCH(c);
|
||||
if (c == '=')
|
||||
*np = onig_node_new_anchor(ANCR_LOOK_BEHIND, FALSE);
|
||||
*np = node_new_anchor(ANCR_LOOK_BEHIND);
|
||||
else if (c == '!')
|
||||
*np = onig_node_new_anchor(ANCR_LOOK_BEHIND_NOT, FALSE);
|
||||
*np = node_new_anchor(ANCR_LOOK_BEHIND_NOT);
|
||||
else {
|
||||
if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
|
||||
UChar *name;
|
||||
@ -7132,7 +7188,9 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
|
||||
named_group1:
|
||||
list_capture = 0;
|
||||
|
||||
#ifdef USE_CAPTURE_HISTORY
|
||||
named_group2:
|
||||
#endif
|
||||
name = p;
|
||||
r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &num,
|
||||
&num_type, FALSE);
|
||||
@ -7613,7 +7671,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
|
||||
}
|
||||
#endif
|
||||
else {
|
||||
if (ONIG_IS_OPTION_ON(env->options, ONIG_OPTION_DONT_CAPTURE_GROUP))
|
||||
if (OPTON_DONT_CAPTURE_GROUP(env->options))
|
||||
goto group;
|
||||
|
||||
*np = node_new_memory(0);
|
||||
@ -7884,7 +7942,7 @@ i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[], int to_len, void* arg)
|
||||
else {
|
||||
len = ONIGENC_CODE_TO_MBC(env->enc, to[i], buf);
|
||||
if (n == 0 || NODE_TYPE(ns[n-1]) != NODE_STRING) {
|
||||
csnode = onig_node_new_str(buf, buf + len);
|
||||
csnode = node_new_str(buf, buf + len);
|
||||
if (IS_NULL(csnode)) goto err_free_ns;
|
||||
|
||||
NODE_STRING_SET_CASE_EXPANDED(csnode);
|
||||
@ -7923,6 +7981,7 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
|
||||
Node** tp;
|
||||
unsigned int parse_depth;
|
||||
|
||||
retry:
|
||||
group = 0;
|
||||
*np = NULL;
|
||||
if (tok->type == (enum TokenSyms )term)
|
||||
@ -7956,19 +8015,28 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
|
||||
}
|
||||
}
|
||||
else if (r == 2) { /* option only */
|
||||
Node* target;
|
||||
OnigOptionType prev = env->options;
|
||||
|
||||
env->options = BAG_(*np)->o.options;
|
||||
r = fetch_token(tok, src, end, env);
|
||||
if (r < 0) return r;
|
||||
r = parse_alts(&target, tok, term, src, end, env, FALSE);
|
||||
env->options = prev;
|
||||
if (r < 0) {
|
||||
onig_node_free(target);
|
||||
return r;
|
||||
if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ISOLATED_OPTION_CONTINUE_BRANCH)) {
|
||||
env->options = BAG_(*np)->o.options;
|
||||
r = fetch_token(tok, src, end, env);
|
||||
if (r < 0) return r;
|
||||
onig_node_free(*np);
|
||||
goto retry;
|
||||
}
|
||||
else {
|
||||
Node* target;
|
||||
OnigOptionType prev = env->options;
|
||||
|
||||
env->options = BAG_(*np)->o.options;
|
||||
r = fetch_token(tok, src, end, env);
|
||||
if (r < 0) return r;
|
||||
r = parse_alts(&target, tok, term, src, end, env, FALSE);
|
||||
env->options = prev;
|
||||
if (r < 0) {
|
||||
onig_node_free(target);
|
||||
return r;
|
||||
}
|
||||
NODE_BODY(*np) = target;
|
||||
}
|
||||
NODE_BODY(*np) = target;
|
||||
return tok->type;
|
||||
}
|
||||
break;
|
||||
@ -7984,7 +8052,7 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
|
||||
case TK_STRING:
|
||||
tk_byte:
|
||||
{
|
||||
*np = node_new_str(tok->backp, *src);
|
||||
*np = node_new_str_with_options(tok->backp, *src, env->options);
|
||||
CHECK_NULL_RETURN_MEMERR(*np);
|
||||
|
||||
while (1) {
|
||||
@ -8005,7 +8073,7 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
|
||||
case TK_CRUDE_BYTE:
|
||||
tk_crude_byte:
|
||||
{
|
||||
*np = node_new_str_crude_char(tok->u.byte);
|
||||
*np = node_new_str_crude_char(tok->u.byte, env->options);
|
||||
CHECK_NULL_RETURN_MEMERR(*np);
|
||||
len = 1;
|
||||
while (1) {
|
||||
@ -8042,9 +8110,9 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
|
||||
len = ONIGENC_CODE_TO_MBC(env->enc, tok->u.code, buf);
|
||||
if (len < 0) return len;
|
||||
#ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG
|
||||
*np = node_new_str_crude(buf, buf + len);
|
||||
*np = node_new_str_crude(buf, buf + len, env->options);
|
||||
#else
|
||||
*np = node_new_str(buf, buf + len);
|
||||
*np = node_new_str_with_options(buf, buf + len, env->options);
|
||||
#endif
|
||||
CHECK_NULL_RETURN_MEMERR(*np);
|
||||
}
|
||||
@ -8062,7 +8130,7 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
|
||||
if (IS_NULL(qend)) {
|
||||
nextp = qend = end;
|
||||
}
|
||||
*np = node_new_str(qstart, qend);
|
||||
*np = node_new_str_with_options(qstart, qend, env->options);
|
||||
CHECK_NULL_RETURN_MEMERR(*np);
|
||||
*src = nextp;
|
||||
}
|
||||
@ -8110,7 +8178,7 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
|
||||
if (r != 0) return r;
|
||||
|
||||
cc = CCLASS_(*np);
|
||||
if (IS_IGNORECASE(env->options)) {
|
||||
if (OPTON_IGNORECASE(env->options)) {
|
||||
IApplyCaseFoldArg iarg;
|
||||
|
||||
iarg.env = env;
|
||||
@ -8137,12 +8205,12 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
|
||||
break;
|
||||
|
||||
case TK_ANYCHAR:
|
||||
*np = node_new_anychar();
|
||||
*np = node_new_anychar(env->options);
|
||||
CHECK_NULL_RETURN_MEMERR(*np);
|
||||
break;
|
||||
|
||||
case TK_ANYCHAR_ANYTIME:
|
||||
*np = node_new_anychar();
|
||||
*np = node_new_anychar(env->options);
|
||||
CHECK_NULL_RETURN_MEMERR(*np);
|
||||
qn = node_new_quantifier(0, INFINITE_REPEAT, FALSE);
|
||||
CHECK_NULL_RETURN_MEMERR(qn);
|
||||
@ -8180,12 +8248,8 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
|
||||
#endif
|
||||
|
||||
case TK_ANCHOR:
|
||||
{
|
||||
int ascii_mode =
|
||||
IS_WORD_ASCII(env->options) && IS_WORD_ANCHOR_TYPE(tok->u.anchor) ? 1 : 0;
|
||||
*np = onig_node_new_anchor(tok->u.anchor, ascii_mode);
|
||||
CHECK_NULL_RETURN_MEMERR(*np);
|
||||
}
|
||||
*np = node_new_anchor_with_options(tok->u.anchor, env->options);
|
||||
CHECK_NULL_RETURN_MEMERR(*np);
|
||||
break;
|
||||
|
||||
case TK_REPEAT:
|
||||
@ -8219,7 +8283,7 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
|
||||
break;
|
||||
|
||||
case TK_TRUE_ANYCHAR:
|
||||
r = node_new_true_anychar(np, env);
|
||||
r = node_new_true_anychar(np);
|
||||
if (r < 0) return r;
|
||||
break;
|
||||
|
||||
@ -8365,9 +8429,11 @@ parse_alts(Node** top, PToken* tok, int term, UChar** src, UChar* end,
|
||||
{
|
||||
int r;
|
||||
Node *node, **headp;
|
||||
OnigOptionType save_options;
|
||||
|
||||
*top = NULL;
|
||||
INC_PARSE_DEPTH(env->parse_depth);
|
||||
save_options = env->options;
|
||||
|
||||
r = parse_branch(&node, tok, term, src, end, env, group_head);
|
||||
if (r < 0) {
|
||||
@ -8416,6 +8482,7 @@ parse_alts(Node** top, PToken* tok, int term, UChar** src, UChar* end,
|
||||
return ONIGERR_PARSER_BUG;
|
||||
}
|
||||
|
||||
env->options = save_options;
|
||||
DEC_PARSE_DEPTH(env->parse_depth);
|
||||
return r;
|
||||
}
|
||||
|
||||
@ -33,7 +33,7 @@
|
||||
#include "regint.h"
|
||||
|
||||
#define NODE_STRING_MARGIN 16
|
||||
#define NODE_STRING_BUF_SIZE 20 /* sizeof(CClassNode) - sizeof(int)*4 */
|
||||
#define NODE_STRING_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */
|
||||
#define NODE_BACKREFS_SIZE 6
|
||||
|
||||
/* node type */
|
||||
@ -68,10 +68,10 @@ enum GimmickType {
|
||||
};
|
||||
|
||||
enum BodyEmptyType {
|
||||
BODY_IS_NOT_EMPTY = 0,
|
||||
BODY_IS_EMPTY_POSSIBILITY = 1,
|
||||
BODY_IS_EMPTY_POSSIBILITY_MEM = 2,
|
||||
BODY_IS_EMPTY_POSSIBILITY_REC = 3
|
||||
BODY_IS_NOT_EMPTY = 0,
|
||||
BODY_MAY_BE_EMPTY = 1,
|
||||
BODY_MAY_BE_EMPTY_MEM = 2,
|
||||
BODY_MAY_BE_EMPTY_REC = 3
|
||||
};
|
||||
|
||||
struct _Node;
|
||||
@ -86,7 +86,6 @@ typedef struct {
|
||||
unsigned int flag;
|
||||
UChar buf[NODE_STRING_BUF_SIZE];
|
||||
int capacity; /* (allocated size - 1) or 0: use buf[] */
|
||||
int case_min_len;
|
||||
} StrNode;
|
||||
|
||||
typedef struct {
|
||||
@ -140,7 +139,8 @@ typedef struct {
|
||||
/* for multiple call reference */
|
||||
OnigLen min_len; /* min length (byte) */
|
||||
OnigLen max_len; /* max length (byte) */
|
||||
int char_len; /* character length */
|
||||
OnigLen min_char_len;
|
||||
OnigLen max_char_len;
|
||||
int opt_count; /* referenced count in optimize_nodes() */
|
||||
} BagNode;
|
||||
|
||||
@ -190,7 +190,7 @@ typedef struct {
|
||||
struct _Node* body;
|
||||
|
||||
int type;
|
||||
int char_len;
|
||||
OnigLen char_len;
|
||||
int ascii_mode;
|
||||
} AnchorNode;
|
||||
|
||||
@ -210,7 +210,6 @@ typedef struct {
|
||||
|
||||
int ctype;
|
||||
int not;
|
||||
OnigOptionType options;
|
||||
int ascii_mode;
|
||||
} CtypeNode;
|
||||
|
||||
@ -288,42 +287,35 @@ typedef struct _Node {
|
||||
#define NODE_IS_ANYCHAR(node) \
|
||||
(NODE_TYPE(node) == NODE_CTYPE && CTYPE_(node)->ctype == CTYPE_ANYCHAR)
|
||||
|
||||
#define CTYPE_OPTION(node, reg) \
|
||||
(NODE_IS_FIXED_OPTION(node) ? CTYPE_(node)->options : reg->options)
|
||||
|
||||
|
||||
#define ANCR_ANYCHAR_INF_MASK (ANCR_ANYCHAR_INF | ANCR_ANYCHAR_INF_ML)
|
||||
#define ANCR_END_BUF_MASK (ANCR_END_BUF | ANCR_SEMI_END_BUF)
|
||||
|
||||
#define NODE_STRING_CRUDE (1<<0)
|
||||
#define NODE_STRING_CASE_EXPANDED (1<<1)
|
||||
#define NODE_STRING_CASE_FOLD_MATCH (1<<2)
|
||||
|
||||
#define NODE_STRING_LEN(node) (int )((node)->u.str.end - (node)->u.str.s)
|
||||
#define NODE_STRING_SET_CRUDE(node) (node)->u.str.flag |= NODE_STRING_CRUDE
|
||||
#define NODE_STRING_CLEAR_CRUDE(node) (node)->u.str.flag &= ~NODE_STRING_CRUDE
|
||||
#define NODE_STRING_SET_CASE_EXPANDED(node) (node)->u.str.flag |= NODE_STRING_CASE_EXPANDED
|
||||
#define NODE_STRING_SET_CASE_FOLD_MATCH(node) (node)->u.str.flag |= NODE_STRING_CASE_FOLD_MATCH
|
||||
#define NODE_STRING_IS_CRUDE(node) \
|
||||
(((node)->u.str.flag & NODE_STRING_CRUDE) != 0)
|
||||
#define NODE_STRING_IS_CASE_EXPANDED(node) \
|
||||
(((node)->u.str.flag & NODE_STRING_CASE_EXPANDED) != 0)
|
||||
#define NODE_STRING_IS_CASE_FOLD_MATCH(node) \
|
||||
(((node)->u.str.flag & NODE_STRING_CASE_FOLD_MATCH) != 0)
|
||||
|
||||
#define BACKREFS_P(br) \
|
||||
(IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static)
|
||||
|
||||
/* node status bits */
|
||||
#define NODE_ST_MIN_FIXED (1<<0)
|
||||
#define NODE_ST_MAX_FIXED (1<<1)
|
||||
#define NODE_ST_CLEN_FIXED (1<<2)
|
||||
#define NODE_ST_FIXED_MIN (1<<0)
|
||||
#define NODE_ST_FIXED_MAX (1<<1)
|
||||
#define NODE_ST_FIXED_CLEN (1<<2)
|
||||
#define NODE_ST_MARK1 (1<<3)
|
||||
#define NODE_ST_MARK2 (1<<4)
|
||||
#define NODE_ST_STRICT_REAL_REPEAT (1<<5)
|
||||
#define NODE_ST_RECURSION (1<<6)
|
||||
#define NODE_ST_CALLED (1<<7)
|
||||
#define NODE_ST_ADDR_FIXED (1<<8)
|
||||
#define NODE_ST_FIXED_ADDR (1<<8)
|
||||
#define NODE_ST_NAMED_GROUP (1<<9)
|
||||
#define NODE_ST_IN_REAL_REPEAT (1<<10) /* STK_REPEAT is nested in stack. */
|
||||
#define NODE_ST_IN_ZERO_REPEAT (1<<11) /* (....){0} */
|
||||
@ -333,10 +325,12 @@ typedef struct _Node {
|
||||
#define NODE_ST_BY_NAME (1<<15) /* backref by name */
|
||||
#define NODE_ST_BACKREF (1<<16)
|
||||
#define NODE_ST_CHECKER (1<<17)
|
||||
#define NODE_ST_FIXED_OPTION (1<<18)
|
||||
#define NODE_ST_PROHIBIT_RECURSION (1<<19)
|
||||
#define NODE_ST_SUPER (1<<20)
|
||||
#define NODE_ST_EMPTY_STATUS_CHECK (1<<21)
|
||||
#define NODE_ST_PROHIBIT_RECURSION (1<<18)
|
||||
#define NODE_ST_SUPER (1<<19)
|
||||
#define NODE_ST_EMPTY_STATUS_CHECK (1<<20)
|
||||
#define NODE_ST_IGNORECASE (1<<21)
|
||||
#define NODE_ST_MULTILINE (1<<22)
|
||||
#define NODE_ST_TEXT_SEGMENT_WORD (1<<23)
|
||||
|
||||
|
||||
#define NODE_STATUS(node) (((Node* )node)->u.base.status)
|
||||
@ -350,17 +344,16 @@ typedef struct _Node {
|
||||
#define NODE_IS_RECURSION(node) ((NODE_STATUS(node) & NODE_ST_RECURSION) != 0)
|
||||
#define NODE_IS_IN_ZERO_REPEAT(node) ((NODE_STATUS(node) & NODE_ST_IN_ZERO_REPEAT) != 0)
|
||||
#define NODE_IS_NAMED_GROUP(node) ((NODE_STATUS(node) & NODE_ST_NAMED_GROUP) != 0)
|
||||
#define NODE_IS_ADDR_FIXED(node) ((NODE_STATUS(node) & NODE_ST_ADDR_FIXED) != 0)
|
||||
#define NODE_IS_CLEN_FIXED(node) ((NODE_STATUS(node) & NODE_ST_CLEN_FIXED) != 0)
|
||||
#define NODE_IS_MIN_FIXED(node) ((NODE_STATUS(node) & NODE_ST_MIN_FIXED) != 0)
|
||||
#define NODE_IS_MAX_FIXED(node) ((NODE_STATUS(node) & NODE_ST_MAX_FIXED) != 0)
|
||||
#define NODE_IS_FIXED_ADDR(node) ((NODE_STATUS(node) & NODE_ST_FIXED_ADDR) != 0)
|
||||
#define NODE_IS_FIXED_CLEN(node) ((NODE_STATUS(node) & NODE_ST_FIXED_CLEN) != 0)
|
||||
#define NODE_IS_FIXED_MIN(node) ((NODE_STATUS(node) & NODE_ST_FIXED_MIN) != 0)
|
||||
#define NODE_IS_FIXED_MAX(node) ((NODE_STATUS(node) & NODE_ST_FIXED_MAX) != 0)
|
||||
#define NODE_IS_MARK1(node) ((NODE_STATUS(node) & NODE_ST_MARK1) != 0)
|
||||
#define NODE_IS_MARK2(node) ((NODE_STATUS(node) & NODE_ST_MARK2) != 0)
|
||||
#define NODE_IS_NEST_LEVEL(node) ((NODE_STATUS(node) & NODE_ST_NEST_LEVEL) != 0)
|
||||
#define NODE_IS_BY_NAME(node) ((NODE_STATUS(node) & NODE_ST_BY_NAME) != 0)
|
||||
#define NODE_IS_BACKREF(node) ((NODE_STATUS(node) & NODE_ST_BACKREF) != 0)
|
||||
#define NODE_IS_CHECKER(node) ((NODE_STATUS(node) & NODE_ST_CHECKER) != 0)
|
||||
#define NODE_IS_FIXED_OPTION(node) ((NODE_STATUS(node) & NODE_ST_FIXED_OPTION) != 0)
|
||||
#define NODE_IS_SUPER(node) ((NODE_STATUS(node) & NODE_ST_SUPER) != 0)
|
||||
#define NODE_IS_PROHIBIT_RECURSION(node) \
|
||||
((NODE_STATUS(node) & NODE_ST_PROHIBIT_RECURSION) != 0)
|
||||
@ -368,6 +361,9 @@ typedef struct _Node {
|
||||
((NODE_STATUS(node) & NODE_ST_STRICT_REAL_REPEAT) != 0)
|
||||
#define NODE_IS_EMPTY_STATUS_CHECK(node) \
|
||||
((NODE_STATUS(node) & NODE_ST_EMPTY_STATUS_CHECK) != 0)
|
||||
#define NODE_IS_IGNORECASE(node) ((NODE_STATUS(node) & NODE_ST_IGNORECASE) != 0)
|
||||
#define NODE_IS_MULTILINE(node) ((NODE_STATUS(node) & NODE_ST_MULTILINE) != 0)
|
||||
#define NODE_IS_TEXT_SEGMENT_WORD(node) ((NODE_STATUS(node) & NODE_ST_TEXT_SEGMENT_WORD) != 0)
|
||||
|
||||
#define NODE_PARENT(node) ((node)->u.base.parent)
|
||||
#define NODE_BODY(node) ((node)->u.base.body)
|
||||
@ -431,19 +427,19 @@ typedef struct {
|
||||
|
||||
typedef struct {
|
||||
int new_val;
|
||||
} GroupNumRemap;
|
||||
} GroupNumMap;
|
||||
|
||||
extern int onig_renumber_name_table P_((regex_t* reg, GroupNumRemap* map));
|
||||
extern int onig_renumber_name_table P_((regex_t* reg, GroupNumMap* map));
|
||||
|
||||
extern int onig_strncmp P_((const UChar* s1, const UChar* s2, int n));
|
||||
extern void onig_strcpy P_((UChar* dest, const UChar* src, const UChar* end));
|
||||
extern void onig_scan_env_set_error_string P_((ScanEnv* env, int ecode, UChar* arg, UChar* arg_end));
|
||||
extern int onig_reduce_nested_quantifier P_((Node* pnode));
|
||||
extern Node* onig_node_copy(Node* from);
|
||||
extern int onig_node_str_cat P_((Node* node, const UChar* s, const UChar* end));
|
||||
extern int onig_node_str_set P_((Node* node, const UChar* s, const UChar* end));
|
||||
extern void onig_node_free P_((Node* node));
|
||||
extern Node* onig_node_new_bag P_((enum BagType type));
|
||||
extern Node* onig_node_new_anchor P_((int type, int ascii_mode));
|
||||
extern Node* onig_node_new_str P_((const UChar* s, const UChar* end));
|
||||
extern Node* onig_node_new_list P_((Node* left, Node* right));
|
||||
extern Node* onig_node_new_alt P_((Node* left, Node* right));
|
||||
|
||||
@ -153,7 +153,8 @@ OnigSyntaxType OnigSyntaxJava = {
|
||||
ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL | ONIG_SYN_OP2_CCLASS_SET_OP |
|
||||
ONIG_SYN_OP2_ESC_V_VTAB | ONIG_SYN_OP2_ESC_U_HEX4 |
|
||||
ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY )
|
||||
, ( SYN_GNU_REGEX_BV | ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND )
|
||||
, ( SYN_GNU_REGEX_BV | ONIG_SYN_ISOLATED_OPTION_CONTINUE_BRANCH |
|
||||
ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND )
|
||||
, ONIG_OPTION_SINGLELINE
|
||||
,
|
||||
{
|
||||
@ -186,7 +187,7 @@ OnigSyntaxType OnigSyntaxPerl = {
|
||||
ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP |
|
||||
ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE |
|
||||
ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT )
|
||||
, SYN_GNU_REGEX_BV
|
||||
, SYN_GNU_REGEX_BV | ONIG_SYN_ISOLATED_OPTION_CONTINUE_BRANCH
|
||||
, ONIG_OPTION_SINGLELINE
|
||||
,
|
||||
{
|
||||
@ -224,7 +225,7 @@ OnigSyntaxType OnigSyntaxPerl_NG = {
|
||||
ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE |
|
||||
ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT |
|
||||
ONIG_SYN_OP2_QMARK_PERL_SUBEXP_CALL )
|
||||
, ( SYN_GNU_REGEX_BV |
|
||||
, ( SYN_GNU_REGEX_BV | ONIG_SYN_ISOLATED_OPTION_CONTINUE_BRANCH |
|
||||
ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |
|
||||
ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME )
|
||||
, ONIG_OPTION_SINGLELINE
|
||||
|
||||
@ -6,12 +6,17 @@
|
||||
|
||||
#define ST_INCLUDED
|
||||
|
||||
#ifndef ONIGURUMA_SYS_UEFI
|
||||
|
||||
#ifdef _WIN32
|
||||
# include <windows.h>
|
||||
typedef ULONG_PTR st_data_t;
|
||||
#else
|
||||
typedef unsigned long st_data_t;
|
||||
#endif
|
||||
|
||||
#endif /* ONIGURUMA_SYS_UEFI */
|
||||
|
||||
#define ST_DATA_T_DEFINED
|
||||
|
||||
typedef struct st_table st_table;
|
||||
|
||||
@ -279,9 +279,12 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
|
||||
OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end,
|
||||
OnigCaseFoldCodeItem items[])
|
||||
{
|
||||
int n, m, i, j, k, len;
|
||||
OnigCodePoint code, codes[3];
|
||||
const struct ByUnfoldKey* buk;
|
||||
int n, m, i, j, k, len, lens[3];
|
||||
int index;
|
||||
int fn, ncs[3];
|
||||
OnigCodePoint cs[3][4];
|
||||
OnigCodePoint code, codes[3], orig_codes[3];
|
||||
const struct ByUnfoldKey* buk1;
|
||||
|
||||
n = 0;
|
||||
|
||||
@ -317,38 +320,161 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
|
||||
}
|
||||
#endif
|
||||
|
||||
buk = onigenc_unicode_unfold_key(code);
|
||||
if (buk != 0) {
|
||||
if (buk->fold_len == 1) {
|
||||
orig_codes[0] = code;
|
||||
lens[0] = len;
|
||||
p += len;
|
||||
|
||||
buk1 = onigenc_unicode_unfold_key(orig_codes[0]);
|
||||
if (buk1 != 0 && buk1->fold_len == 1) {
|
||||
codes[0] = *FOLDS1_FOLD(buk1->index);
|
||||
}
|
||||
else
|
||||
codes[0] = orig_codes[0];
|
||||
|
||||
if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) == 0)
|
||||
goto fold1;
|
||||
|
||||
if (p < end) {
|
||||
const struct ByUnfoldKey* buk;
|
||||
|
||||
code = ONIGENC_MBC_TO_CODE(enc, p, end);
|
||||
orig_codes[1] = code;
|
||||
len = enclen(enc, p);
|
||||
lens[1] = lens[0] + len;
|
||||
buk = onigenc_unicode_unfold_key(orig_codes[1]);
|
||||
if (buk != 0 && buk->fold_len == 1) {
|
||||
codes[1] = *FOLDS1_FOLD(buk->index);
|
||||
}
|
||||
else
|
||||
codes[1] = orig_codes[1];
|
||||
|
||||
p += len;
|
||||
if (p < end) {
|
||||
code = ONIGENC_MBC_TO_CODE(enc, p, end);
|
||||
orig_codes[2] = code;
|
||||
len = enclen(enc, p);
|
||||
lens[2] = lens[1] + len;
|
||||
buk = onigenc_unicode_unfold_key(orig_codes[2]);
|
||||
if (buk != 0 && buk->fold_len == 1) {
|
||||
codes[2] = *FOLDS1_FOLD(buk->index);
|
||||
}
|
||||
else
|
||||
codes[2] = orig_codes[2];
|
||||
|
||||
index = onigenc_unicode_fold3_key(codes);
|
||||
if (index >= 0) {
|
||||
m = FOLDS3_UNFOLDS_NUM(index);
|
||||
for (i = 0; i < m; i++) {
|
||||
items[n].byte_len = lens[2];
|
||||
items[n].code_len = 1;
|
||||
items[n].code[0] = FOLDS3_UNFOLDS(index)[i];
|
||||
n++;
|
||||
}
|
||||
|
||||
for (fn = 0; fn < 3; fn++) {
|
||||
int sindex;
|
||||
cs[fn][0] = FOLDS3_FOLD(index)[fn];
|
||||
ncs[fn] = 1;
|
||||
sindex = onigenc_unicode_fold1_key(&cs[fn][0]);
|
||||
if (sindex >= 0) {
|
||||
int m = FOLDS1_UNFOLDS_NUM(sindex);
|
||||
for (i = 0; i < m; i++) {
|
||||
cs[fn][i+1] = FOLDS1_UNFOLDS(sindex)[i];
|
||||
}
|
||||
ncs[fn] += m;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < ncs[0]; i++) {
|
||||
for (j = 0; j < ncs[1]; j++) {
|
||||
for (k = 0; k < ncs[2]; k++) {
|
||||
items[n].byte_len = lens[2];
|
||||
items[n].code_len = 3;
|
||||
items[n].code[0] = cs[0][i];
|
||||
items[n].code[1] = cs[1][j];
|
||||
items[n].code[2] = cs[2][k];
|
||||
if (items[n].code[0] == orig_codes[0] &&
|
||||
items[n].code[1] == orig_codes[1] &&
|
||||
items[n].code[2] == orig_codes[2])
|
||||
continue;
|
||||
n++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return n;
|
||||
}
|
||||
}
|
||||
|
||||
index = onigenc_unicode_fold2_key(codes);
|
||||
if (index >= 0) {
|
||||
m = FOLDS2_UNFOLDS_NUM(index);
|
||||
for (i = 0; i < m; i++) {
|
||||
items[n].byte_len = lens[1];
|
||||
items[n].code_len = 1;
|
||||
items[n].code[0] = FOLDS2_UNFOLDS(index)[i];
|
||||
n++;
|
||||
}
|
||||
|
||||
for (fn = 0; fn < 2; fn++) {
|
||||
int sindex;
|
||||
cs[fn][0] = FOLDS2_FOLD(index)[fn];
|
||||
ncs[fn] = 1;
|
||||
sindex = onigenc_unicode_fold1_key(&cs[fn][0]);
|
||||
if (sindex >= 0) {
|
||||
int m = FOLDS1_UNFOLDS_NUM(sindex);
|
||||
for (i = 0; i < m; i++) {
|
||||
cs[fn][i+1] = FOLDS1_UNFOLDS(sindex)[i];
|
||||
}
|
||||
ncs[fn] += m;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < ncs[0]; i++) {
|
||||
for (j = 0; j < ncs[1]; j++) {
|
||||
items[n].byte_len = lens[1];
|
||||
items[n].code_len = 2;
|
||||
items[n].code[0] = cs[0][i];
|
||||
items[n].code[1] = cs[1][j];
|
||||
if (items[n].code[0] == orig_codes[0] &&
|
||||
items[n].code[1] == orig_codes[1])
|
||||
continue;
|
||||
n++;
|
||||
}
|
||||
}
|
||||
|
||||
return n;
|
||||
}
|
||||
}
|
||||
|
||||
fold1:
|
||||
if (buk1 != 0) {
|
||||
if (buk1->fold_len == 1) {
|
||||
int un;
|
||||
items[0].byte_len = len;
|
||||
items[0].byte_len = lens[0];
|
||||
items[0].code_len = 1;
|
||||
items[0].code[0] = *FOLDS1_FOLD(buk->index);
|
||||
items[0].code[0] = *FOLDS1_FOLD(buk1->index);
|
||||
n++;
|
||||
|
||||
un = FOLDS1_UNFOLDS_NUM(buk->index);
|
||||
un = FOLDS1_UNFOLDS_NUM(buk1->index);
|
||||
for (i = 0; i < un; i++) {
|
||||
OnigCodePoint unfold = FOLDS1_UNFOLDS(buk->index)[i];
|
||||
if (unfold != code) {
|
||||
items[n].byte_len = len;
|
||||
OnigCodePoint unfold = FOLDS1_UNFOLDS(buk1->index)[i];
|
||||
if (unfold != orig_codes[0]) {
|
||||
items[n].byte_len = lens[0];
|
||||
items[n].code_len = 1;
|
||||
items[n].code[0] = unfold;
|
||||
n++;
|
||||
}
|
||||
}
|
||||
code = items[0].code[0]; /* for multi-code to unfold search. */
|
||||
}
|
||||
else if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
|
||||
OnigCodePoint cs[3][4];
|
||||
int fn, ncs[3];
|
||||
|
||||
if (buk->fold_len == 2) {
|
||||
m = FOLDS2_UNFOLDS_NUM(buk->index);
|
||||
if (buk1->fold_len == 2) {
|
||||
m = FOLDS2_UNFOLDS_NUM(buk1->index);
|
||||
for (i = 0; i < m; i++) {
|
||||
OnigCodePoint unfold = FOLDS2_UNFOLDS(buk->index)[i];
|
||||
if (unfold == code) continue;
|
||||
OnigCodePoint unfold = FOLDS2_UNFOLDS(buk1->index)[i];
|
||||
if (unfold == orig_codes[0]) continue;
|
||||
|
||||
items[n].byte_len = len;
|
||||
items[n].byte_len = lens[0];
|
||||
items[n].code_len = 1;
|
||||
items[n].code[0] = unfold;
|
||||
n++;
|
||||
@ -356,7 +482,7 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
|
||||
|
||||
for (fn = 0; fn < 2; fn++) {
|
||||
int index;
|
||||
cs[fn][0] = FOLDS2_FOLD(buk->index)[fn];
|
||||
cs[fn][0] = FOLDS2_FOLD(buk1->index)[fn];
|
||||
ncs[fn] = 1;
|
||||
index = onigenc_unicode_fold1_key(&cs[fn][0]);
|
||||
if (index >= 0) {
|
||||
@ -370,7 +496,7 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
|
||||
|
||||
for (i = 0; i < ncs[0]; i++) {
|
||||
for (j = 0; j < ncs[1]; j++) {
|
||||
items[n].byte_len = len;
|
||||
items[n].byte_len = lens[0];
|
||||
items[n].code_len = 2;
|
||||
items[n].code[0] = cs[0][i];
|
||||
items[n].code[1] = cs[1][j];
|
||||
@ -379,12 +505,12 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
|
||||
}
|
||||
}
|
||||
else { /* fold_len == 3 */
|
||||
m = FOLDS3_UNFOLDS_NUM(buk->index);
|
||||
m = FOLDS3_UNFOLDS_NUM(buk1->index);
|
||||
for (i = 0; i < m; i++) {
|
||||
OnigCodePoint unfold = FOLDS3_UNFOLDS(buk->index)[i];
|
||||
if (unfold == code) continue;
|
||||
OnigCodePoint unfold = FOLDS3_UNFOLDS(buk1->index)[i];
|
||||
if (unfold == orig_codes[0]) continue;
|
||||
|
||||
items[n].byte_len = len;
|
||||
items[n].byte_len = lens[0];
|
||||
items[n].code_len = 1;
|
||||
items[n].code[0] = unfold;
|
||||
n++;
|
||||
@ -392,7 +518,7 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
|
||||
|
||||
for (fn = 0; fn < 3; fn++) {
|
||||
int index;
|
||||
cs[fn][0] = FOLDS3_FOLD(buk->index)[fn];
|
||||
cs[fn][0] = FOLDS3_FOLD(buk1->index)[fn];
|
||||
ncs[fn] = 1;
|
||||
index = onigenc_unicode_fold1_key(&cs[fn][0]);
|
||||
if (index >= 0) {
|
||||
@ -407,7 +533,7 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
|
||||
for (i = 0; i < ncs[0]; i++) {
|
||||
for (j = 0; j < ncs[1]; j++) {
|
||||
for (k = 0; k < ncs[2]; k++) {
|
||||
items[n].byte_len = len;
|
||||
items[n].byte_len = lens[0];
|
||||
items[n].code_len = 3;
|
||||
items[n].code[0] = cs[0][i];
|
||||
items[n].code[1] = cs[1][j];
|
||||
@ -417,17 +543,14 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* multi char folded code is not head of another folded multi char */
|
||||
return n;
|
||||
}
|
||||
}
|
||||
else {
|
||||
int index = onigenc_unicode_fold1_key(&code);
|
||||
int index = onigenc_unicode_fold1_key(orig_codes);
|
||||
if (index >= 0) {
|
||||
int m = FOLDS1_UNFOLDS_NUM(index);
|
||||
for (i = 0; i < m; i++) {
|
||||
items[n].byte_len = len;
|
||||
items[n].byte_len = lens[0];
|
||||
items[n].code_len = 1;
|
||||
items[n].code[0] = FOLDS1_UNFOLDS(index)[i];
|
||||
n++;
|
||||
@ -435,64 +558,6 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
|
||||
}
|
||||
}
|
||||
|
||||
if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) == 0)
|
||||
return n;
|
||||
|
||||
p += len;
|
||||
if (p < end) {
|
||||
int clen;
|
||||
int index;
|
||||
|
||||
codes[0] = code;
|
||||
code = ONIGENC_MBC_TO_CODE(enc, p, end);
|
||||
|
||||
buk = onigenc_unicode_unfold_key(code);
|
||||
if (buk != 0 && buk->fold_len == 1) {
|
||||
codes[1] = *FOLDS1_FOLD(buk->index);
|
||||
}
|
||||
else
|
||||
codes[1] = code;
|
||||
|
||||
clen = enclen(enc, p);
|
||||
len += clen;
|
||||
|
||||
index = onigenc_unicode_fold2_key(codes);
|
||||
if (index >= 0) {
|
||||
m = FOLDS2_UNFOLDS_NUM(index);
|
||||
for (i = 0; i < m; i++) {
|
||||
items[n].byte_len = len;
|
||||
items[n].code_len = 1;
|
||||
items[n].code[0] = FOLDS2_UNFOLDS(index)[i];
|
||||
n++;
|
||||
}
|
||||
}
|
||||
|
||||
p += clen;
|
||||
if (p < end) {
|
||||
code = ONIGENC_MBC_TO_CODE(enc, p, end);
|
||||
buk = onigenc_unicode_unfold_key(code);
|
||||
if (buk != 0 && buk->fold_len == 1) {
|
||||
codes[2] = *FOLDS1_FOLD(buk->index);
|
||||
}
|
||||
else
|
||||
codes[2] = code;
|
||||
|
||||
clen = enclen(enc, p);
|
||||
len += clen;
|
||||
|
||||
index = onigenc_unicode_fold3_key(codes);
|
||||
if (index >= 0) {
|
||||
m = FOLDS3_UNFOLDS_NUM(index);
|
||||
for (i = 0; i < m; i++) {
|
||||
items[n].byte_len = len;
|
||||
items[n].code_len = 1;
|
||||
items[n].code[0] = FOLDS3_UNFOLDS(index)[i];
|
||||
n++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
@ -931,7 +996,7 @@ onigenc_egcb_is_break_position(OnigEncoding enc, UChar* p, UChar* prev,
|
||||
|
||||
#ifdef USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER
|
||||
if (! ONIGENC_IS_UNICODE_ENCODING(enc)) {
|
||||
return from != 0x000d || to != 0x000a;
|
||||
return from != 0x000d || to != NEWLINE_CODE;
|
||||
}
|
||||
|
||||
btype = unicode_egcb_is_break_2code(from, to);
|
||||
@ -974,7 +1039,7 @@ onigenc_egcb_is_break_position(OnigEncoding enc, UChar* p, UChar* prev,
|
||||
return 1;
|
||||
|
||||
#else
|
||||
return from != 0x000d || to != 0x000a;
|
||||
return from != 0x000d || to != NEWLINE_CODE;
|
||||
#endif /* USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER */
|
||||
}
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user