+ upd: update Oniguruma RegEx component

This commit is contained in:
Rainer Kottenhoff 2019-11-04 01:17:17 +01:00
parent 5f220dba56
commit f76a1b8dee
19 changed files with 1010 additions and 930 deletions

View File

@ -15,7 +15,8 @@ trim_trailing_whitespace = false
# Matches multiple files with brace expansion notation
[**.{h,c,cpp,hpp,cxx,rc}]
# Set default charset
charset = utf-8-bom
#charset = utf-8-bom
charset = utf-8
# 2 space indentation
indent_style = space
indent_size = 2

View File

@ -1,8 +1,22 @@
History
2019/MM/DD: Version 6.9.4
2019/10/31: Update Unicode Emoji version to 12.1 (Nothing data changed)
2019/10/29: implement USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR configuration
2019/10/18: re-implement case fold conversion
2019/10/04: fix #156: Heap buffer overflow in match_at() with case-insensitive match
2019/09/30: NEW API: add onig_regset_replace()
2019/09/30: change Unicode VERSION value format
2019/09/20: NEW API: add regset functions
2019/09/20: add data ensure check before peek string value in OP_PUSH_IF_PEEK_NEXT
2019/09/20: fix loose code in encode-harness.c
2019/08/13: fix heap-buffer-overflow
2019/08/13: Add a macro to disable direct threading in the match engine (PR#149)
2019/08/06: Version 6.9.3 (secirity fix release)
2019/07/30: add ONIG_SYN_ALLOW_INVALID_CODE_END_OF_RANGE
2019/07/30: add ONIG_SYN_ALLOW_INVALID_CODE_END_OF_RANGE_IN_CC
2019/07/29: add STK_PREC_READ_START/END stack type
2019/07/29: Fix #147: Stack Exhaustion Problem caused by some parsing functions
2019/07/11: add a dictionary file for libfuzzer

View File

@ -1,4 +1,4 @@
Oniguruma Regular Expressions Version 6.9.2 2019/08/08
Oniguruma Regular Expressions Version 6.9.4 2019/10/31
syntax: ONIG_SYNTAX_ONIGURUMA (default)
@ -289,6 +289,11 @@ syntax: ONIG_SYNTAX_ONIGURUMA (default)
In negative look-behind, capturing group isn't allowed,
but non-capturing group (?:) is allowed.
* In look-behind and negative look-behind, support for
ignore-case option is limited. Only supports conversion
between single characters. (Does not support conversion
of multiple characters in Unicode)
(?>subexp) atomic group
no backtracks in subexp.

View File

@ -1,4 +1,4 @@
Unicode Properties (from Unicode Version: 12.1.0)
Unicode Properties (Unicode Version: 12.1.0, Emoji: 12.1)
15: ASCII_Hex_Digit
16: Adlam

View File

@ -37,6 +37,7 @@
#endif
#define HAVE_DECL_SYS_NERR 1
#define STDC_HEADERS 1
#define HAVE_STDINT_H 1
#define HAVE_STDLIB_H 1
#define HAVE_STRING_H 1
#define HAVE_LIMITS_H 1

View File

@ -12,7 +12,7 @@ REG_STR_AT = re.compile('str\[(\d+)\]')
REG_RETURN_TYPE = re.compile('^const\s+short\s+int\s*\*')
REG_FOLD_KEY = re.compile('unicode_fold(\d)_key\s*\(register\s+const\s+char\s*\*\s*str,\s*register\s+size_t\s+len\)')
REG_ENTRY = re.compile('\{".*?",\s*(-?\d+)\s*\}')
REG_IF_LEN = re.compile('if\s*\(\s*len\s*<=\s*MAX_WORD_LENGTH.+')
REG_IF_LEN = re.compile('\s*if\s*\(\s*len\s*<=\s*MAX_WORD_LENGTH.+')
REG_GET_HASH = re.compile('(?:register\s+)?(?:unsigned\s+)?int\s+key\s*=\s*hash\s*\(str,\s*len\);')
REG_GET_CODE = re.compile('(?:register\s+)?const\s+char\s*\*\s*s\s*=\s*wordlist\[key\]\.name;')
REG_CODE_CHECK = re.compile('if\s*\(\*str\s*==\s*\*s\s*&&\s*!strncmp.+\)')
@ -34,7 +34,7 @@ def parse_line(s, key_len):
if r != s: return r
r = re.sub(REG_ENTRY, '\\1', s)
if r != s: return r
r = re.sub(REG_IF_LEN, 'if (0 == 0)', s)
r = re.sub(REG_IF_LEN, '', s)
if r != s: return r
r = re.sub(REG_GET_HASH, 'int key = hash(codes);', s)
if r != s: return r

View File

@ -12,7 +12,7 @@ REG_STR_AT = re.compile('str\[(\d+)\]')
REG_UNFOLD_KEY = re.compile('onigenc_unicode_unfold_key\s*\(register\s+const\s+char\s*\*\s*str,\s*register\s+size_t\s+len\)')
REG_ENTRY = re.compile('\{".+?",\s*/\*(.+?)\*/\s*(-?\d+),\s*(\d)\}')
REG_EMPTY_ENTRY = re.compile('\{"",\s*(-?\d+),\s*(\d)\}')
REG_IF_LEN = re.compile('if\s*\(\s*len\s*<=\s*MAX_WORD_LENGTH.+')
REG_IF_LEN = re.compile('\s*if\s*\(\s*len\s*<=\s*MAX_WORD_LENGTH.+')
REG_GET_HASH = re.compile('(?:register\s+)?(?:unsigned\s+)?int\s+key\s*=\s*hash\s*\(str,\s*len\);')
REG_GET_CODE = re.compile('(?:register\s+)?const\s+char\s*\*\s*s\s*=\s*wordlist\[key\]\.name;')
REG_CODE_CHECK = re.compile('if\s*\(\*str\s*==\s*\*s\s*&&\s*!strncmp.+\)')
@ -32,7 +32,7 @@ def parse_line(s):
if r != s: return r
r = re.sub(REG_EMPTY_ENTRY, '{0xffffffff, \\1, \\2}', s)
if r != s: return r
r = re.sub(REG_IF_LEN, 'if (0 == 0)', s)
r = re.sub(REG_IF_LEN, '', s)
if r != s: return r
r = re.sub(REG_GET_HASH, 'int key = hash(&code);', s)
if r != s: return r

View File

@ -1,7 +1,7 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# make_unicode_property_data.py
# Copyright (c) 2016-2018 K.Kosako
# Copyright (c) 2016-2019 K.Kosako
import sys
import re
@ -22,9 +22,12 @@ PR_LINE_REG = re.compile("([0-9A-Fa-f]+)(?:..([0-9A-Fa-f]+))?\s*;\s*(\w+)")
PA_LINE_REG = re.compile("(\w+)\s*;\s*(\w+)")
PVA_LINE_REG = re.compile("(sc|gc)\s*;\s*(\w+)\s*;\s*(\w+)(?:\s*;\s*(\w+))?")
BL_LINE_REG = re.compile("([0-9A-Fa-f]+)\.\.([0-9A-Fa-f]+)\s*;\s*(.*)")
VERSION_REG = re.compile("#\s*.*-(\d+)\.(\d+)\.(\d+)\.txt")
UNICODE_VERSION_REG = re.compile("#\s*.*-(\d+)\.(\d+)\.(\d+)\.txt")
EMOJI_VERSION_REG = re.compile("(?i)#\s*Version:\s*(\d+)\.(\d+)")
VERSION_INFO = [-1, -1, -1]
EMOJI_VERSION_INFO = [-1, -1]
DIC = { }
KDIC = { }
PropIndex = { }
@ -40,14 +43,6 @@ def fix_block_name(name):
s = re.sub(r'[- ]+', '_', name)
return 'In_' + s
def check_version_info(s):
m = VERSION_REG.match(s)
if m is not None:
VERSION_INFO[0] = int(m.group(1))
VERSION_INFO[1] = int(m.group(2))
VERSION_INFO[2] = int(m.group(3))
def print_ranges(ranges):
for (start, end) in ranges:
print "0x%06x, 0x%06x" % (start, end)
@ -234,7 +229,8 @@ def parse_unicode_data_file(f):
normalize_ranges_in_dic(dic)
return dic, assigned
def parse_properties(path, klass, prop_prefix = None):
def parse_properties(path, klass, prop_prefix = None, version_reg = None):
version_match = None
with open(path, 'r') as f:
dic = { }
prop = None
@ -244,9 +240,10 @@ def parse_properties(path, klass, prop_prefix = None):
if len(s) == 0:
continue
if s[0] == '#':
if VERSION_INFO[0] < 0:
check_version_info(s)
if s[0] == '#' and version_reg is not None and version_match is None:
version_match = version_reg.match(s)
if version_match is not None:
continue
m = PR_LINE_REG.match(s)
if m:
@ -267,7 +264,7 @@ def parse_properties(path, klass, prop_prefix = None):
props.append(prop)
normalize_ranges_in_dic(dic)
return (dic, props)
return (dic, props, version_match)
def parse_property_aliases(path):
a = { }
@ -415,11 +412,11 @@ def entry_and_print_prop_and_index(name, index):
nname = normalize_prop_name(name)
print_prop_and_index(nname, index)
def parse_and_merge_properties(path, klass):
dic, props = parse_properties(path, klass)
def parse_and_merge_properties(path, klass, prop_prefix = None, version_reg = None):
dic, props, ver_m = parse_properties(path, klass, prop_prefix, version_reg)
merge_dic(DIC, dic)
merge_props(PROPS, props)
return dic, props
return dic, props, ver_m
### main ###
argv = sys.argv
@ -448,11 +445,21 @@ with open('UnicodeData.txt', 'r') as f:
PROPS = DIC.keys()
PROPS = list_sub(PROPS, POSIX_LIST)
parse_and_merge_properties('DerivedCoreProperties.txt', 'Derived Property')
dic, props = parse_and_merge_properties('Scripts.txt', 'Script')
_, _, ver_m = parse_and_merge_properties('DerivedCoreProperties.txt', 'Derived Property', None, UNICODE_VERSION_REG)
if ver_m is not None:
VERSION_INFO[0] = int(ver_m.group(1))
VERSION_INFO[1] = int(ver_m.group(2))
VERSION_INFO[2] = int(ver_m.group(3))
dic, props, _ = parse_and_merge_properties('Scripts.txt', 'Script')
DIC['Unknown'] = inverse_ranges(add_ranges_in_dic(dic))
parse_and_merge_properties('PropList.txt', 'Binary Property')
parse_and_merge_properties('emoji-data.txt', 'Emoji Property')
_, _, ver_m = parse_and_merge_properties('emoji-data.txt', 'Emoji Property', None, EMOJI_VERSION_REG)
if ver_m is not None:
EMOJI_VERSION_INFO[0] = int(ver_m.group(1))
EMOJI_VERSION_INFO[1] = int(ver_m.group(2))
PROPS.append('Unknown')
KDIC['Unknown'] = 'Script'
@ -465,9 +472,9 @@ dic, BLOCKS = parse_blocks('Blocks.txt')
merge_dic(DIC, dic)
if INCLUDE_GRAPHEME_CLUSTER_DATA:
dic, props = parse_properties('GraphemeBreakProperty.txt',
'GraphemeBreak Property',
GRAPHEME_CLUSTER_BREAK_NAME_PREFIX)
dic, props, _ = parse_properties('GraphemeBreakProperty.txt',
'GraphemeBreak Property',
GRAPHEME_CLUSTER_BREAK_NAME_PREFIX)
merge_dic(DIC, dic)
merge_props(PROPS, props)
#prop = GRAPHEME_CLUSTER_BREAK_NAME_PREFIX + 'Other'
@ -535,9 +542,11 @@ sys.stdout.write(s)
if OUTPUT_LIST_MODE:
UPF = open("UNICODE_PROPERTIES", "w")
if VERSION_INFO[0] < 0:
raise RuntimeError("Version is not found")
raise RuntimeError("Unicode Version is not found")
if EMOJI_VERSION_INFO[0] < 0:
raise RuntimeError("Emoji Version is not found")
print >> UPF, "Unicode Properties (from Unicode Version: %d.%d.%d)" % (VERSION_INFO[0], VERSION_INFO[1], VERSION_INFO[2])
print >> UPF, "Unicode Properties (Unicode Version: %d.%d.%d, Emoji: %d.%d)" % (VERSION_INFO[0], VERSION_INFO[1], VERSION_INFO[2], EMOJI_VERSION_INFO[0], EMOJI_VERSION_INFO[1])
print >> UPF, ''
index = -1
@ -573,9 +582,12 @@ print '%%'
print ''
if not(POSIX_ONLY):
if VERSION_INFO[0] < 0:
raise RuntimeError("Version is not found")
raise RuntimeError("Unicode Version is not found")
if EMOJI_VERSION_INFO[0] < 0:
raise RuntimeError("Emoji Version is not found")
print "#define UNICODE_PROPERTY_VERSION %02d%02d%02d" % (VERSION_INFO[0], VERSION_INFO[1], VERSION_INFO[2])
print "#define UNICODE_EMOJI_VERSION %02d%02d" % (EMOJI_VERSION_INFO[0], EMOJI_VERSION_INFO[1])
print ''
print "#define PROPERTY_NAME_MAX_SIZE %d" % (PROPERTY_NAME_MAX_LEN + 10)

File diff suppressed because it is too large Load Diff

View File

@ -219,9 +219,13 @@ static OpInfoType OpInfo[] = {
{ OP_MEM_START_PUSH, "mem-start-push" },
{ OP_MEM_START, "mem-start" },
{ OP_MEM_END_PUSH, "mem-end-push" },
#ifdef USE_CALL
{ OP_MEM_END_PUSH_REC, "mem-end-push-rec" },
#endif
{ OP_MEM_END, "mem-end" },
#ifdef USE_CALL
{ OP_MEM_END_REC, "mem-end-rec" },
#endif
{ OP_FAIL, "fail" },
{ OP_JUMP, "jump" },
{ OP_PUSH, "push" },
@ -235,12 +239,12 @@ static OpInfoType OpInfo[] = {
{ OP_REPEAT_NG, "repeat-ng" },
{ OP_REPEAT_INC, "repeat-inc" },
{ OP_REPEAT_INC_NG, "repeat-inc-ng" },
{ OP_REPEAT_INC_SG, "repeat-inc-sg" },
{ OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg" },
{ OP_EMPTY_CHECK_START, "empty-check-start" },
{ OP_EMPTY_CHECK_END, "empty-check-end" },
{ OP_EMPTY_CHECK_END_MEMST, "empty-check-end-memst" },
#ifdef USE_CALL
{ OP_EMPTY_CHECK_END_MEMST_PUSH,"empty-check-end-memst-push" },
#endif
{ OP_PREC_READ_START, "push-pos" },
{ OP_PREC_READ_END, "pop-pos" },
{ OP_PREC_READ_NOT_START, "prec-read-not-start" },
@ -250,10 +254,12 @@ static OpInfoType OpInfo[] = {
{ OP_LOOK_BEHIND, "look-behind" },
{ OP_LOOK_BEHIND_NOT_START, "look-behind-not-start" },
{ OP_LOOK_BEHIND_NOT_END, "look-behind-not-end" },
{ OP_CALL, "call" },
{ OP_RETURN, "return" },
{ OP_PUSH_SAVE_VAL, "push-save-val" },
{ OP_UPDATE_VAR, "update-var" },
#ifdef USE_CALL
{ OP_CALL, "call" },
{ OP_RETURN, "return" },
#endif
#ifdef USE_CALLOUT
{ OP_CALLOUT_CONTENTS, "callout-contents" },
{ OP_CALLOUT_NAME, "callout-name" },
@ -466,10 +472,13 @@ print_compiled_byte_code(FILE* f, regex_t* reg, int index,
mem = p->memory_start.num;
fprintf(f, ":%d", mem);
break;
case OP_MEM_END_PUSH:
case OP_MEM_END_PUSH_REC:
case OP_MEM_END:
case OP_MEM_END_PUSH:
#ifdef USE_CALL
case OP_MEM_END_REC:
case OP_MEM_END_PUSH_REC:
#endif
mem = p->memory_end.num;
fprintf(f, ":%d", mem);
break;
@ -513,8 +522,6 @@ print_compiled_byte_code(FILE* f, regex_t* reg, int index,
case OP_REPEAT_INC:
case OP_REPEAT_INC_NG:
case OP_REPEAT_INC_SG:
case OP_REPEAT_INC_NG_SG:
mem = p->repeat.id;
fprintf(f, ":%d", mem);
break;
@ -525,7 +532,9 @@ print_compiled_byte_code(FILE* f, regex_t* reg, int index,
break;
case OP_EMPTY_CHECK_END:
case OP_EMPTY_CHECK_END_MEMST:
#ifdef USE_CALL
case OP_EMPTY_CHECK_END_MEMST_PUSH:
#endif
mem = p->empty_check_end.mem;
fprintf(f, ":%d", mem);
break;
@ -548,10 +557,12 @@ print_compiled_byte_code(FILE* f, regex_t* reg, int index,
p_rel_addr(f, addr, p, start);
break;
#ifdef USE_CALL
case OP_CALL:
addr = p->call.addr;
fprintf(f, ":{/%d}", addr);
break;
#endif
case OP_PUSH_SAVE_VAL:
{
@ -621,7 +632,9 @@ print_compiled_byte_code(FILE* f, regex_t* reg, int index,
case OP_ATOMIC_START:
case OP_ATOMIC_END:
case OP_LOOK_BEHIND_NOT_END:
#ifdef USE_CALL
case OP_RETURN:
#endif
break;
default:
@ -957,7 +970,7 @@ onig_region_copy(OnigRegion* to, OnigRegion* from)
result = ONIGERR_INVALID_ARGUMENT;\
}\
best_len = result;\
goto finish;\
goto match_at_end;\
break;\
}\
} while(0)
@ -979,18 +992,26 @@ onig_region_copy(OnigRegion* to, OnigRegion* from)
/* handled by normal-POP */
#define STK_MEM_START 0x0010
#define STK_MEM_END 0x8030
#define STK_REPEAT_INC 0x0050
#ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
#define STK_REPEAT_INC (0x0040 | STK_MASK_POP_HANDLED)
#else
#define STK_REPEAT_INC 0x0040
#endif
#ifdef USE_CALLOUT
#define STK_CALLOUT 0x0070
#endif
/* avoided by normal-POP */
#define STK_VOID 0x0000 /* for fill a blank */
#ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
#define STK_EMPTY_CHECK_START (0x3000 | STK_MASK_POP_HANDLED)
#else
#define STK_EMPTY_CHECK_START 0x3000
#endif
#define STK_EMPTY_CHECK_END 0x5000 /* for recursive call */
#define STK_MEM_END_MARK 0x8100
#define STK_TO_VOID_START 0x1200 /* mark for "(?>...)" */
#define STK_REPEAT 0x0300
/* #define STK_REPEAT 0x0300 */
#define STK_CALL_FRAME 0x0400
#define STK_RETURN 0x0500
#define STK_SAVE_VAL 0x0600
@ -1016,11 +1037,10 @@ typedef struct _StackType {
UChar* pstr_prev; /* previous char position of pstr */
} state;
struct {
int count; /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */
Operation* pcode; /* byte code position (head of repeated target) */
} repeat;
struct {
StackIndex si; /* index of stack */
int count;
#ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
StackIndex prev_index; /* index of stack */
#endif
} repeat_inc;
struct {
UChar *pstr; /* start/end position */
@ -1029,7 +1049,10 @@ typedef struct _StackType {
StackIndex prev_end; /* prev. info (for backtrack "(...)*" ) */
} mem;
struct {
UChar *pstr; /* start position */
UChar *pstr; /* start position */
#ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
StackIndex prev_index; /* index of stack */
#endif
} empty_check;
#ifdef USE_CALL
struct {
@ -1075,6 +1098,41 @@ struct OnigCalloutArgsStruct {
#endif
#ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
#define PTR_NUM_SIZE(reg) ((reg)->num_repeat + (reg)->num_empty_check + ((reg)->num_mem + 1) * 2)
#define UPDATE_FOR_STACK_REALLOC do{\
repeat_stk = (StackIndex* )alloc_base;\
empty_check_stk = (StackIndex* )(repeat_stk + reg->num_repeat);\
mem_start_stk = (StackIndex* )(empty_check_stk + reg->num_empty_check);\
mem_end_stk = mem_start_stk + num_mem + 1;\
} while(0)
#define SAVE_REPEAT_STK_VAR(sid) stk->u.repeat_inc.prev_index = repeat_stk[sid]
#define LOAD_TO_REPEAT_STK_VAR(sid) repeat_stk[sid] = GET_STACK_INDEX(stk)
#define POP_REPEAT_INC else if (stk->type == STK_REPEAT_INC) {repeat_stk[stk->zid] = stk->u.repeat_inc.prev_index;}
#define SAVE_EMPTY_CHECK_STK_VAR(sid) stk->u.empty_check.prev_index = empty_check_stk[sid]
#define LOAD_TO_EMPTY_CHECK_STK_VAR(sid) empty_check_stk[sid] = GET_STACK_INDEX(stk)
#define POP_EMPTY_CHECK_START else if (stk->type == STK_EMPTY_CHECK_START) {empty_check_stk[stk->zid] = stk->u.empty_check.prev_index;}
#else
#define PTR_NUM_SIZE(reg) (((reg)->num_mem + 1) * 2)
#define UPDATE_FOR_STACK_REALLOC do{\
mem_start_stk = (StackIndex* )alloc_base;\
mem_end_stk = mem_start_stk + num_mem + 1;\
} while(0)
#define SAVE_REPEAT_STK_VAR(sid)
#define LOAD_TO_REPEAT_STK_VAR(sid)
#define POP_REPEAT_INC
#define SAVE_EMPTY_CHECK_STK_VAR(sid)
#define LOAD_TO_EMPTY_CHECK_STK_VAR(sid)
#define POP_EMPTY_CHECK_START
#endif /* USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR */
#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mpv) do { \
@ -1086,7 +1144,7 @@ struct OnigCalloutArgsStruct {
(msa).retry_limit_in_match = (mpv)->retry_limit_in_match;\
(msa).mp = mpv;\
(msa).best_len = ONIG_MISMATCH;\
(msa).ptr_num = (reg)->num_repeat + ((reg)->num_mem + 1) * 2; \
(msa).ptr_num = PTR_NUM_SIZE(reg);\
} while(0)
#else
#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mpv) do { \
@ -1097,7 +1155,7 @@ struct OnigCalloutArgsStruct {
(msa).match_stack_limit = (mpv)->match_stack_limit;\
(msa).retry_limit_in_match = (mpv)->retry_limit_in_match;\
(msa).mp = mpv;\
(msa).ptr_num = (reg)->num_repeat + ((reg)->num_mem + 1) * 2; \
(msa).ptr_num = PTR_NUM_SIZE(reg);\
} while(0)
#endif
@ -1152,12 +1210,6 @@ struct OnigCalloutArgsStruct {
};\
} while(0)
#define UPDATE_FOR_STACK_REALLOC do{\
repeat_stk = (StackIndex* )alloc_base;\
mem_start_stk = (StackIndex* )(repeat_stk + reg->num_repeat);\
mem_end_stk = mem_start_stk + num_mem + 1;\
} while(0)
static unsigned int MatchStackLimit = DEFAULT_MATCH_STACK_LIMIT_SIZE;
extern unsigned int
@ -1178,7 +1230,9 @@ onig_set_match_stack_limit_size(unsigned int size)
static unsigned long RetryLimitInMatch = DEFAULT_RETRY_LIMIT_IN_MATCH;
#define CHECK_RETRY_LIMIT_IN_MATCH do {\
if (retry_in_match_counter++ > retry_limit_in_match) goto retry_limit_in_match_over;\
if (retry_in_match_counter++ > retry_limit_in_match) {\
MATCH_AT_ERROR_RETURN(ONIGERR_RETRY_LIMIT_IN_MATCH_OVER);\
}\
} while (0)
#else
@ -1568,19 +1622,23 @@ stack_double(int is_alloca, char** arg_alloc_base,
#define STACK_PUSH_ALT_LOOK_BEHIND_NOT(pat,s,sprev) \
STACK_PUSH(STK_ALT_LOOK_BEHIND_NOT,pat,s,sprev)
#if 0
#define STACK_PUSH_REPEAT(sid, pat) do {\
STACK_ENSURE(1);\
stk->type = STK_REPEAT;\
stk->zid = (sid);\
stk->u.repeat.pcode = (pat);\
stk->u.repeat.count = 0;\
stk->u.repeat.pcode = (pat);\
STACK_INC;\
} while(0)
#endif
#define STACK_PUSH_REPEAT_INC(sindex) do {\
#define STACK_PUSH_REPEAT_INC(sid, ct) do {\
STACK_ENSURE(1);\
stk->type = STK_REPEAT_INC;\
stk->u.repeat_inc.si = (sindex);\
stk->zid = (sid);\
stk->u.repeat_inc.count = (ct);\
SAVE_REPEAT_STK_VAR(sid);\
LOAD_TO_REPEAT_STK_VAR(sid);\
STACK_INC;\
} while(0)
@ -1653,6 +1711,8 @@ stack_double(int is_alloca, char** arg_alloc_base,
stk->type = STK_EMPTY_CHECK_START;\
stk->zid = (cnum);\
stk->u.empty_check.pstr = (s);\
SAVE_EMPTY_CHECK_STK_VAR(cnum);\
LOAD_TO_EMPTY_CHECK_STK_VAR(cnum);\
STACK_INC;\
} while(0)
@ -1790,7 +1850,7 @@ stack_double(int is_alloca, char** arg_alloc_base,
#define STACK_BASE_CHECK(p, at) \
if ((p) < stk_base) {\
fprintf(stderr, "at %s\n", at);\
goto stack_error;\
MATCH_AT_ERROR_RETURN(ONIGERR_STACK_BUG);\
}
#else
#define STACK_BASE_CHECK(p, at)
@ -1841,13 +1901,12 @@ stack_double(int is_alloca, char** arg_alloc_base,
mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
}\
else if (stk->type == STK_REPEAT_INC) {\
STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
}\
else if (stk->type == STK_MEM_END) {\
mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
}\
POP_REPEAT_INC \
POP_EMPTY_CHECK_START \
POP_CALLOUT_CASE\
}\
}\
@ -1866,13 +1925,12 @@ stack_double(int is_alloca, char** arg_alloc_base,
mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
}\
else if (stk->type == STK_REPEAT_INC) {\
STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
}\
else if (stk->type == STK_MEM_END) {\
mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
}\
POP_REPEAT_INC \
POP_EMPTY_CHECK_START \
/* Don't call callout here because negation of total success by (?!..) (?<!..) */\
}\
}\
@ -1924,20 +1982,41 @@ stack_double(int is_alloca, char** arg_alloc_base,
}\
} while(0)
#define STACK_EMPTY_CHECK(isnull,sid,s) do {\
StackType* k = stk;\
#define EMPTY_CHECK_START_SEARCH(sid, k) do {\
k = stk;\
while (1) {\
k--;\
STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK"); \
STACK_BASE_CHECK(k, "EMPTY_CHECK_START_SEARCH"); \
if (k->type == STK_EMPTY_CHECK_START) {\
if (k->zid == (sid)) {\
(isnull) = (k->u.empty_check.pstr == (s));\
break;\
}\
if (k->zid == (sid)) break;\
}\
}\
} while(0)
#ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
#define GET_EMPTY_CHECK_START(sid, k) do {\
if (reg->num_call == 0) {\
k = STACK_AT(empty_check_stk[sid]);\
}\
else {\
EMPTY_CHECK_START_SEARCH(sid, k);\
}\
} while(0)
#else
#define GET_EMPTY_CHECK_START(sid, k) EMPTY_CHECK_START_SEARCH(sid, k)
#endif
#define STACK_EMPTY_CHECK(isnull, sid, s) do {\
StackType* k;\
GET_EMPTY_CHECK_START(sid, k);\
(isnull) = (k->u.empty_check.pstr == (s));\
} while(0)
#define STACK_MEM_START_GET_PREV_END_ADDR(k /* STK_MEM_START*/, reg, addr) do {\
if (k->u.mem.prev_end == INVALID_STACK_INDEX) {\
(addr) = 0;\
@ -1951,39 +2030,30 @@ stack_double(int is_alloca, char** arg_alloc_base,
} while (0)
#ifdef USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT
#define STACK_EMPTY_CHECK_MEM(isnull,sid,s,reg) do {\
StackType* k = stk;\
while (1) {\
k--;\
STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_MEM"); \
if (k->type == STK_EMPTY_CHECK_START) {\
if (k->zid == (sid)) {\
if (k->u.empty_check.pstr != (s)) {\
(isnull) = 0;\
break;\
#define STACK_EMPTY_CHECK_MEM(isnull, sid, s, reg) do {\
StackType* k;\
GET_EMPTY_CHECK_START(sid, k);\
if (k->u.empty_check.pstr != (s)) {\
(isnull) = 0;\
}\
else {\
UChar* endp;\
(isnull) = 1;\
while (k < stk) {\
if (k->type == STK_MEM_START &&\
MEM_STATUS_LIMIT_AT((reg)->empty_status_mem, k->zid)) {\
STACK_MEM_START_GET_PREV_END_ADDR(k, reg, endp);\
if (endp == 0) {\
(isnull) = 0; break;\
}\
else {\
UChar* endp;\
(isnull) = 1;\
while (k < stk) {\
if (k->type == STK_MEM_START &&\
MEM_STATUS_LIMIT_AT((reg)->empty_status_mem, k->zid)) {\
STACK_MEM_START_GET_PREV_END_ADDR(k, reg, endp);\
if (endp == 0) {\
(isnull) = 0; break;\
}\
else if (STACK_AT(k->u.mem.prev_start)->u.mem.pstr != endp) {\
(isnull) = 0; break;\
}\
else if (endp != s) {\
(isnull) = -1; /* empty, but position changed */ \
}\
}\
k++;\
}\
break;\
else if (STACK_AT(k->u.mem.prev_start)->u.mem.pstr != endp) {\
(isnull) = 0; break;\
}\
else if (endp != s) {\
(isnull) = -1; /* empty, but position changed */ \
}\
}\
k++;\
}\
}\
} while(0)
@ -2064,24 +2134,45 @@ stack_double(int is_alloca, char** arg_alloc_base,
} while(0)
#endif /* USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT */
#define STACK_GET_REPEAT(sid, k) do {\
int level = 0;\
k = stk;\
#define STACK_GET_REPEAT_COUNT_SEARCH(sid, c) do {\
StackType* k = stk;\
while (1) {\
k--;\
STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \
if (k->type == STK_REPEAT) {\
if (level == 0) {\
if (k->zid == (sid)) {\
break;\
}\
(k)--;\
STACK_BASE_CHECK(k, "STACK_GET_REPEAT_COUNT_SEARCH");\
if ((k)->type == STK_REPEAT_INC) {\
if ((k)->zid == (sid)) {\
(c) = (k)->u.repeat_inc.count;\
break;\
}\
}\
else if ((k)->type == STK_RETURN) {\
int level = -1;\
while (1) {\
(k)--;\
if ((k)->type == STK_CALL_FRAME) {\
level++;\
if (level == 0) break;\
}\
else if ((k)->type == STK_RETURN) level--;\
}\
}\
else if (k->type == STK_CALL_FRAME) level--;\
else if (k->type == STK_RETURN) level++;\
}\
} while(0)
#ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
#define STACK_GET_REPEAT_COUNT(sid, c) do {\
if (reg->num_call == 0) {\
(c) = (STACK_AT(repeat_stk[sid]))->u.repeat_inc.count;\
}\
else {\
STACK_GET_REPEAT_COUNT_SEARCH(sid, c);\
}\
} while(0)
#else
#define STACK_GET_REPEAT_COUNT(sid, c) STACK_GET_REPEAT_COUNT_SEARCH(sid, c)
#endif
#define STACK_RETURN(addr) do {\
int level = 0;\
StackType* k = stk;\
@ -2483,6 +2574,8 @@ typedef struct {
#define MATCH_DEBUG_OUT(offset)
#endif
#define MATCH_AT_ERROR_RETURN(err_code) best_len = err_code; goto match_at_end
/* match data(str - end) from position (sstart). */
/* if sstart == str then set sprev to NULL. */
@ -2556,9 +2649,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
&&L_MEM_START,
&&L_MEM_START_PUSH,
&&L_MEM_END_PUSH,
#ifdef USE_CALL
&&L_MEM_END_PUSH_REC,
#endif
&&L_MEM_END,
#ifdef USE_CALL
&&L_MEM_END_REC,
#endif
&&L_FAIL,
&&L_JUMP,
&&L_PUSH,
@ -2572,12 +2669,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
&&L_REPEAT_NG,
&&L_REPEAT_INC,
&&L_REPEAT_INC_NG,
&&L_REPEAT_INC_SG,
&&L_REPEAT_INC_NG_SG,
&&L_EMPTY_CHECK_START,
&&L_EMPTY_CHECK_END,
&&L_EMPTY_CHECK_END_MEMST,
#ifdef USE_CALL
&&L_EMPTY_CHECK_END_MEMST_PUSH,
#endif
&&L_PREC_READ_START,
&&L_PREC_READ_END,
&&L_PREC_READ_NOT_START,
@ -2587,10 +2684,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
&&L_LOOK_BEHIND,
&&L_LOOK_BEHIND_NOT_START,
&&L_LOOK_BEHIND_NOT_END,
&&L_CALL,
&&L_RETURN,
&&L_PUSH_SAVE_VAL,
&&L_UPDATE_VAR,
#ifdef USE_CALL
&&L_CALL,
&&L_RETURN,
#endif
#ifdef USE_CALLOUT
&&L_CALLOUT_CONTENTS,
&&L_CALLOUT_NAME,
@ -2608,15 +2707,17 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
char *alloc_base;
StackType *stk_base, *stk, *stk_end;
StackType *stkp; /* used as any purpose. */
StackIndex si;
StackIndex *repeat_stk;
StackIndex *mem_start_stk, *mem_end_stk;
UChar* keep;
#ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
StackIndex *repeat_stk;
StackIndex *empty_check_stk;
#endif
#ifdef USE_RETRY_LIMIT_IN_MATCH
unsigned long retry_limit_in_match;
unsigned long retry_in_match_counter;
#endif
#ifdef USE_CALLOUT
int of;
#endif
@ -2745,10 +2846,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
stkp = stk_base;
r = make_capture_history_tree(region->history_root, &stkp,
stk, (UChar* )str, reg);
if (r < 0) {
best_len = r; /* error code */
goto finish;
}
if (r < 0) MATCH_AT_ERROR_RETURN(r);
}
#endif /* USE_CAPTURE_HISTORY */
#ifdef USE_POSIX_API_REGION_OPTION
@ -2773,7 +2871,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
}
/* default behavior: return first-matching result. */
goto finish;
goto match_at_end;
CASE_OP(EXACT1)
DATA_ENSURE(1);
@ -3293,7 +3391,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
break;
#endif
default:
goto bytecode_error;
MATCH_AT_ERROR_RETURN(ONIGERR_UNDEFINED_BYTECODE);
break;
}
@ -3419,13 +3517,17 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
#ifdef USE_CALL
CASE_OP(MEM_END_PUSH_REC)
mem = p->memory_end.num;
STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */
si = GET_STACK_INDEX(stkp);
STACK_PUSH_MEM_END(mem, s);
mem_start_stk[mem] = si;
INC_OP;
JUMP_OUT;
{
StackIndex si;
mem = p->memory_end.num;
STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */
si = GET_STACK_INDEX(stkp);
STACK_PUSH_MEM_END(mem, s);
mem_start_stk[mem] = si;
INC_OP;
JUMP_OUT;
}
CASE_OP(MEM_END_REC)
mem = p->memory_end.num;
@ -3655,12 +3757,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
case OP_PUSH:
case OP_REPEAT_INC:
case OP_REPEAT_INC_NG:
case OP_REPEAT_INC_SG:
case OP_REPEAT_INC_NG_SG:
INC_OP;
break;
default:
goto unexpected_bytecode_error;
MATCH_AT_ERROR_RETURN(ONIGERR_UNEXPECTED_BYTECODE);
break;
}
#else
@ -3776,10 +3876,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
mem = p->repeat.id; /* mem: OP_REPEAT ID */
addr = p->repeat.addr;
STACK_ENSURE(1);
repeat_stk[mem] = GET_STACK_INDEX(stk);
STACK_PUSH_REPEAT(mem, p + 1);
STACK_PUSH_REPEAT_INC(mem, 0);
if (reg->repeat_range[mem].lower == 0) {
STACK_PUSH_ALT(p + addr, s, sprev);
}
@ -3790,10 +3887,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
mem = p->repeat.id; /* mem: OP_REPEAT ID */
addr = p->repeat.addr;
STACK_ENSURE(1);
repeat_stk[mem] = GET_STACK_INDEX(stk);
STACK_PUSH_REPEAT(mem, p + 1);
STACK_PUSH_REPEAT_INC(mem, 0);
if (reg->repeat_range[mem].lower == 0) {
STACK_PUSH_ALT(p + 1, s, sprev);
p += addr;
@ -3804,64 +3898,42 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE_OP(REPEAT_INC)
mem = p->repeat_inc.id; /* mem: OP_REPEAT ID */
si = repeat_stk[mem];
stkp = STACK_AT(si);
repeat_inc:
stkp->u.repeat.count++;
if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) {
STACK_GET_REPEAT_COUNT(mem, n);
n++;
if (n >= reg->repeat_range[mem].upper) {
/* end of repeat. Nothing to do. */
INC_OP;
}
else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
else if (n >= reg->repeat_range[mem].lower) {
INC_OP;
STACK_PUSH_ALT(p, s, sprev);
p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */
p = reg->repeat_range[mem].u.pcode;
}
else {
p = stkp->u.repeat.pcode;
p = reg->repeat_range[mem].u.pcode;
}
STACK_PUSH_REPEAT_INC(si);
STACK_PUSH_REPEAT_INC(mem, n);
CHECK_INTERRUPT_JUMP_OUT;
CASE_OP(REPEAT_INC_SG)
mem = p->repeat_inc.id; /* mem: OP_REPEAT ID */
STACK_GET_REPEAT(mem, stkp);
si = GET_STACK_INDEX(stkp);
goto repeat_inc;
CASE_OP(REPEAT_INC_NG)
mem = p->repeat_inc.id; /* mem: OP_REPEAT ID */
si = repeat_stk[mem];
stkp = STACK_AT(si);
repeat_inc_ng:
stkp->u.repeat.count++;
if (stkp->u.repeat.count < reg->repeat_range[mem].upper) {
if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
Operation* pcode = stkp->u.repeat.pcode;
STACK_PUSH_REPEAT_INC(si);
STACK_PUSH_ALT(pcode, s, sprev);
STACK_GET_REPEAT_COUNT(mem, n);
n++;
STACK_PUSH_REPEAT_INC(mem, n);
if (n == reg->repeat_range[mem].upper) {
INC_OP;
}
else {
if (n >= reg->repeat_range[mem].lower) {
STACK_PUSH_ALT(reg->repeat_range[mem].u.pcode, s, sprev);
INC_OP;
}
else {
p = stkp->u.repeat.pcode;
STACK_PUSH_REPEAT_INC(si);
p = reg->repeat_range[mem].u.pcode;
}
}
else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
STACK_PUSH_REPEAT_INC(si);
INC_OP;
}
CHECK_INTERRUPT_JUMP_OUT;
CASE_OP(REPEAT_INC_NG_SG)
mem = p->repeat_inc.id; /* mem: OP_REPEAT ID */
STACK_GET_REPEAT(mem, stkp);
si = GET_STACK_INDEX(stkp);
goto repeat_inc_ng;
CASE_OP(PREC_READ_START)
STACK_PUSH_PREC_READ_START(s, sprev);
INC_OP;
@ -4040,7 +4112,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
call_result = ONIGERR_INVALID_ARGUMENT;
}
best_len = call_result;
goto finish;
goto match_at_end;
break;
}
}
@ -4066,7 +4138,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
#endif
CASE_OP(FINISH)
goto finish;
goto match_at_end;
#ifdef ONIG_DEBUG_STATISTICS
fail:
@ -4087,35 +4159,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
JUMP_OUT;
DEFAULT_OP
goto bytecode_error;
MATCH_AT_ERROR_RETURN(ONIGERR_UNDEFINED_BYTECODE);
} BYTECODE_INTERPRETER_END;
finish:
match_at_end:
STACK_SAVE;
return best_len;
#ifdef ONIG_DEBUG
stack_error:
STACK_SAVE;
return ONIGERR_STACK_BUG;
#endif
bytecode_error:
STACK_SAVE;
return ONIGERR_UNDEFINED_BYTECODE;
#if defined(ONIG_DEBUG) && !defined(USE_DIRECT_THREADED_CODE)
unexpected_bytecode_error:
STACK_SAVE;
return ONIGERR_UNEXPECTED_BYTECODE;
#endif
#ifdef USE_RETRY_LIMIT_IN_MATCH
retry_limit_in_match_over:
STACK_SAVE;
return ONIGERR_RETRY_LIMIT_IN_MATCH_OVER;
#endif
}
typedef struct {
@ -4789,60 +4839,6 @@ sunday_quick_search(regex_t* reg, const UChar* target, const UChar* target_end,
return (UChar* )NULL;
}
static UChar*
sunday_quick_search_case_fold(regex_t* reg,
const UChar* target, const UChar* target_end,
const UChar* text, const UChar* text_end,
const UChar* text_range)
{
const UChar *s, *se, *end;
const UChar *tail;
int skip, tlen1;
int map_offset;
int case_fold_flag;
OnigEncoding enc;
#ifdef ONIG_DEBUG_SEARCH
fprintf(stderr,
"sunday_quick_search_case_fold: text: %p, text_end: %p, text_range: %p\n", text, text_end, text_range);
#endif
enc = reg->enc;
case_fold_flag = reg->case_fold_flag;
tail = target_end - 1;
tlen1 = (int )(tail - target);
end = text_range;
if (end + tlen1 > text_end)
end = text_end - tlen1;
map_offset = reg->map_offset;
s = text;
while (s < end) {
if (str_lower_case_match(enc, case_fold_flag, target, target_end,
s, text_end))
return (UChar* )s;
se = s + tlen1;
if (se + map_offset >= text_end) break;
skip = reg->map[*(se + map_offset)];
#if 0
p = s;
do {
s += enclen(enc, s);
} while ((s - p) < skip && s < end);
#else
/* This is faster than prev code for long text. ex: /(?i)Twain/ */
s += skip;
if (s < end)
s = onigenc_get_right_adjust_char_head(enc, text, s);
#endif
}
return (UChar* )NULL;
}
static UChar*
map_search(OnigEncoding enc, UChar map[],
const UChar* text, const UChar* text_range)
@ -4956,11 +4952,6 @@ forward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* start,
reg->exact, reg->exact_end, p, end, range);
break;
case OPTIMIZE_STR_CASE_FOLD_FAST:
p = sunday_quick_search_case_fold(reg, reg->exact, reg->exact_end, p, end,
range);
break;
case OPTIMIZE_STR_FAST:
p = sunday_quick_search(reg, reg->exact, reg->exact_end, p, end, range);
break;
@ -5081,7 +5072,6 @@ backward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
break;
case OPTIMIZE_STR_CASE_FOLD:
case OPTIMIZE_STR_CASE_FOLD_FAST:
p = slow_search_backward_ic(reg->enc, reg->case_fold_flag,
reg->exact, reg->exact_end,
range, adjrange, end, p);

View File

@ -47,13 +47,6 @@
#endif
#endif
#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
(defined(__ppc__) && defined(__APPLE__)) || \
defined(__x86_64) || defined(__x86_64__) || \
defined(__mc68020__)
#define PLATFORM_UNALIGNED_WORD_ACCESS
#endif
#ifndef ONIG_DISABLE_DIRECT_THREADING
#ifdef __GNUC__
#define USE_GOTO_LABELS_AS_VALUES
@ -84,6 +77,8 @@
#define USE_VARIABLE_META_CHARS
#define USE_POSIX_API_REGION_OPTION
#define USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
/* #define USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR */
#include "regenc.h"
@ -199,39 +194,6 @@ typedef unsigned int uintptr_t;
#define CHAR_MAP_SIZE 256
#define INFINITE_LEN ONIG_INFINITE_DISTANCE
#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
#define PLATFORM_GET_INC(val,p,type) do{\
val = *(type* )p;\
(p) += sizeof(type);\
} while(0)
#else
#define PLATFORM_GET_INC(val,p,type) do{\
xmemcpy(&val, (p), sizeof(type));\
(p) += sizeof(type);\
} while(0)
/* sizeof(OnigCodePoint) */
#ifdef SIZEOF_SIZE_T
# define WORD_ALIGNMENT_SIZE SIZEOF_SIZE_T
#else
# define WORD_ALIGNMENT_SIZE SIZEOF_LONG
#endif
#define GET_ALIGNMENT_PAD_SIZE(addr,pad_size) do {\
(pad_size) = WORD_ALIGNMENT_SIZE - ((uintptr_t )(addr) % WORD_ALIGNMENT_SIZE);\
if ((pad_size) == WORD_ALIGNMENT_SIZE) (pad_size) = 0;\
} while (0)
#define ALIGNMENT_RIGHT(addr) do {\
(addr) += (WORD_ALIGNMENT_SIZE - 1);\
(addr) -= ((uintptr_t )(addr) % WORD_ALIGNMENT_SIZE);\
} while (0)
#endif /* PLATFORM_UNALIGNED_WORD_ACCESS */
#ifdef USE_CALLOUT
@ -274,7 +236,6 @@ enum OptimizeType {
OPTIMIZE_STR, /* Slow Search */
OPTIMIZE_STR_FAST, /* Sunday quick search / BMH */
OPTIMIZE_STR_FAST_STEP_FORWARD, /* Sunday quick search / BMH */
OPTIMIZE_STR_CASE_FOLD_FAST, /* Sunday quick search / BMH (ignore case) */
OPTIMIZE_STR_CASE_FOLD, /* Slow Search (ignore case) */
OPTIMIZE_MAP /* char map */
};
@ -364,16 +325,12 @@ typedef unsigned int MemStatusType;
/* bitset */
#define BITS_PER_BYTE 8
#define SINGLE_BYTE_SIZE (1 << BITS_PER_BYTE)
#define BITS_IN_ROOM (sizeof(Bits) * BITS_PER_BYTE)
#define BITS_IN_ROOM 32 /* 4 * BITS_PER_BYTE */
#define BITSET_SIZE (SINGLE_BYTE_SIZE / BITS_IN_ROOM)
#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
typedef unsigned int Bits;
#else
typedef unsigned char Bits;
#endif
typedef Bits BitSet[BITSET_SIZE];
typedef Bits* BitSetRef;
typedef uint32_t Bits;
typedef Bits BitSet[BITSET_SIZE];
typedef Bits* BitSetRef;
#define SIZE_BITSET sizeof(BitSet)
@ -382,8 +339,8 @@ typedef Bits* BitSetRef;
for (i = 0; i < (int )BITSET_SIZE; i++) { (bs)[i] = 0; } \
} while (0)
#define BS_ROOM(bs,pos) (bs)[pos / BITS_IN_ROOM]
#define BS_BIT(pos) (1u << (pos % BITS_IN_ROOM))
#define BS_ROOM(bs,pos) (bs)[(unsigned int )(pos) >> 5]
#define BS_BIT(pos) (1u << ((unsigned int )(pos) & 0x1f))
#define BITSET_AT(bs, pos) (BS_ROOM(bs,pos) & BS_BIT(pos))
#define BITSET_SET_BIT(bs, pos) BS_ROOM(bs,pos) |= BS_BIT(pos)
@ -559,9 +516,13 @@ enum OpCode {
OP_MEM_START,
OP_MEM_START_PUSH, /* push back-tracker to stack */
OP_MEM_END_PUSH, /* push back-tracker to stack */
#ifdef USE_CALL
OP_MEM_END_PUSH_REC, /* push back-tracker to stack */
#endif
OP_MEM_END,
#ifdef USE_CALL
OP_MEM_END_REC, /* push marker to stack */
#endif
OP_FAIL, /* pop stack and move */
OP_JUMP,
OP_PUSH,
@ -575,12 +536,12 @@ enum OpCode {
OP_REPEAT_NG, /* {n,m}? (non greedy) */
OP_REPEAT_INC,
OP_REPEAT_INC_NG, /* non greedy */
OP_REPEAT_INC_SG, /* search and get in stack */
OP_REPEAT_INC_NG_SG, /* search and get in stack (non greedy) */
OP_EMPTY_CHECK_START, /* null loop checker start */
OP_EMPTY_CHECK_END, /* null loop checker end */
OP_EMPTY_CHECK_END_MEMST, /* null loop checker end (with capture status) */
#ifdef USE_CALL
OP_EMPTY_CHECK_END_MEMST_PUSH, /* with capture status and push check-end */
#endif
OP_PREC_READ_START, /* (?=...) start */
OP_PREC_READ_END, /* (?=...) end */
OP_PREC_READ_NOT_START, /* (?!...) start */
@ -590,10 +551,12 @@ enum OpCode {
OP_LOOK_BEHIND, /* (?<=...) start (no needs end opcode) */
OP_LOOK_BEHIND_NOT_START, /* (?<!...) start */
OP_LOOK_BEHIND_NOT_END, /* (?<!...) end */
OP_CALL, /* \g<name> */
OP_RETURN,
OP_PUSH_SAVE_VAL,
OP_UPDATE_VAR,
#ifdef USE_CALL
OP_CALL, /* \g<name> */
OP_RETURN,
#endif
#ifdef USE_CALLOUT
OP_CALLOUT_CONTENTS, /* (?{...}) (?{{...}}) */
OP_CALLOUT_NAME, /* (*name) (*name[tag](args...)) */
@ -642,23 +605,8 @@ typedef int ModeType;
#define SIZE_UPDATE_VAR_TYPE sizeof(UpdateVarType)
#define SIZE_MODE sizeof(ModeType)
#define GET_RELADDR_INC(addr,p) PLATFORM_GET_INC(addr, p, RelAddrType)
#define GET_ABSADDR_INC(addr,p) PLATFORM_GET_INC(addr, p, AbsAddrType)
#define GET_LENGTH_INC(len,p) PLATFORM_GET_INC(len, p, LengthType)
#define GET_MEMNUM_INC(num,p) PLATFORM_GET_INC(num, p, MemNumType)
#define GET_REPEATNUM_INC(num,p) PLATFORM_GET_INC(num, p, RepeatNumType)
#define GET_OPTION_INC(option,p) PLATFORM_GET_INC(option, p, OnigOptionType)
#define GET_POINTER_INC(ptr,p) PLATFORM_GET_INC(ptr, p, PointerType)
#define GET_SAVE_TYPE_INC(type,p) PLATFORM_GET_INC(type, p, SaveType)
#define GET_UPDATE_VAR_TYPE_INC(type,p) PLATFORM_GET_INC(type, p, UpdateVarType)
#define GET_MODE_INC(mode,p) PLATFORM_GET_INC(mode, p, ModeType)
/* code point's address must be aligned address. */
#define GET_CODE_POINT(code,p) code = *((OnigCodePoint* )(p))
#define GET_BYTE_INC(byte,p) do{\
byte = *(p);\
(p)++;\
} while(0)
/* op-code + arg size */
@ -838,7 +786,7 @@ typedef struct {
} repeat; /* REPEAT, REPEAT_NG */
struct {
MemNumType id;
} repeat_inc; /* REPEAT_INC, REPEAT_INC_SG, REPEAT_INC_NG, REPEAT_INC_NG_SG */
} repeat_inc; /* REPEAT_INC, REPEAT_INC_NG */
struct {
MemNumType mem;
} empty_check_start;
@ -889,6 +837,15 @@ typedef struct {
#endif
} RegexExt;
typedef struct {
int lower;
int upper;
union {
Operation* pcode; /* address of repeated body */
int offset;
} u;
} RepeatRange;
struct re_pattern_buffer {
/* common members of BBuf(bytes-buffer) */
Operation* ops;
@ -903,15 +860,15 @@ struct re_pattern_buffer {
int num_mem; /* used memory(...) num counted from 1 */
int num_repeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */
int num_null_check; /* OP_EMPTY_CHECK_START/END id counter */
int num_empty_check; /* OP_EMPTY_CHECK_START/END id counter */
int num_call; /* number of subexp call */
MemStatusType capture_history; /* (?@...) flag (1-31) */
MemStatusType push_mem_start; /* need backtrack flag */
MemStatusType push_mem_end; /* need backtrack flag */
MemStatusType empty_status_mem;
int stack_pop_level;
int repeat_range_alloc;
OnigRepeatRange* repeat_range;
int repeat_range_alloc;
RepeatRange* repeat_range;
OnigEncoding enc;
OnigOptionType options;

View File

@ -2173,7 +2173,7 @@ node_new_ctype(int type, int not, OnigOptionType options)
static Node*
node_new_anychar(void)
{
Node* node = node_new_ctype(CTYPE_ANYCHAR, 0, ONIG_OPTION_NONE);
Node* node = node_new_ctype(CTYPE_ANYCHAR, FALSE, ONIG_OPTION_NONE);
return node;
}
@ -2691,7 +2691,7 @@ make_text_segment(Node** node, ScanEnv* env)
ns[1] = NULL_NODE;
r = ONIGERR_MEMORY;
ns[0] = onig_node_new_anchor(ANCR_NO_TEXT_SEGMENT_BOUNDARY, 0);
ns[0] = onig_node_new_anchor(ANCR_NO_TEXT_SEGMENT_BOUNDARY, FALSE);
if (IS_NULL(ns[0])) goto err;
r = node_new_true_anychar(&ns[1], env);
@ -2702,7 +2702,7 @@ make_text_segment(Node** node, ScanEnv* env)
ns[0] = x;
ns[1] = NULL_NODE;
x = node_new_quantifier(0, INFINITE_REPEAT, 1);
x = node_new_quantifier(0, INFINITE_REPEAT, TRUE);
if (IS_NULL(x)) goto err;
NODE_BODY(x) = ns[0];
@ -2771,7 +2771,7 @@ make_absent_engine(Node** node, int pre_save_right_id, Node* absent,
ns[0] = x;
x = node_new_quantifier(lower, upper, 0);
x = node_new_quantifier(lower, upper, FALSE);
if (IS_NULL(x)) goto err0;
NODE_BODY(x) = ns[0];
@ -2800,7 +2800,7 @@ make_absent_engine(Node** node, int pre_save_right_id, Node* absent,
x = make_alt(2, ns);
if (IS_NULL(x)) goto err0;
if (is_range_cutter != 0)
if (is_range_cutter != FALSE)
NODE_STATUS_ADD(x, SUPER);
*node = x;
@ -2890,7 +2890,10 @@ make_range_clear(Node** node, ScanEnv* env)
ns[0] = NULL_NODE; ns[1] = x;
r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_INIT, 0, env);
#define ID_NOT_USED_DONT_CARE_ME 0
r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_INIT,
ID_NOT_USED_DONT_CARE_ME, env);
if (r != 0) goto err;
x = make_alt(2, ns);
@ -3009,7 +3012,7 @@ make_absent_tree_for_simple_one_char_repeat(Node** node, Node* absent, Node* qua
id1 = GIMMICK_(ns[0])->id;
r = make_absent_engine(&ns[1], id1, absent, body, lower, upper, possessive,
0, env);
FALSE, env);
if (r != 0) goto err;
ns[2] = ns[3] = NULL_NODE;
@ -3052,7 +3055,7 @@ make_absent_tree(Node** node, Node* absent, Node* expr, int is_range_cutter,
if (expr == NULL_NODE) {
/* default expr \O* */
quant = node_new_quantifier(0, INFINITE_REPEAT, 0);
quant = node_new_quantifier(0, INFINITE_REPEAT, FALSE);
if (IS_NULL(quant)) goto err0;
r = node_new_true_anychar(&body, env);
@ -3178,16 +3181,6 @@ node_str_cat_char(Node* node, UChar c)
return onig_node_str_cat(node, s, s + 1);
}
extern void
onig_node_conv_to_str_node(Node* node, int flag)
{
NODE_SET_TYPE(node, NODE_STRING);
STR_(node)->flag = flag;
STR_(node)->capacity = 0;
STR_(node)->s = STR_(node)->buf;
STR_(node)->end = STR_(node)->buf;
}
extern void
onig_node_str_clear(Node* node)
{
@ -3196,10 +3189,11 @@ onig_node_str_clear(Node* node)
xfree(STR_(node)->s);
}
STR_(node)->capacity = 0;
STR_(node)->flag = 0;
STR_(node)->s = STR_(node)->buf;
STR_(node)->end = STR_(node)->buf;
STR_(node)->capacity = 0;
STR_(node)->case_min_len = 0;
}
static Node*
@ -3209,10 +3203,12 @@ node_new_str(const UChar* s, const UChar* end)
CHECK_NULL_RETURN(node);
NODE_SET_TYPE(node, NODE_STRING);
STR_(node)->capacity = 0;
STR_(node)->flag = 0;
STR_(node)->s = STR_(node)->buf;
STR_(node)->end = STR_(node)->buf;
STR_(node)->capacity = 0;
STR_(node)->case_min_len = 0;
if (onig_node_str_cat(node, s, end)) {
onig_node_free(node);
return NULL;
@ -3227,11 +3223,11 @@ onig_node_new_str(const UChar* s, const UChar* end)
}
static Node*
node_new_str_raw(UChar* s, UChar* end)
node_new_str_crude(UChar* s, UChar* end)
{
Node* node = node_new_str(s, end);
CHECK_NULL_RETURN(node);
NODE_STRING_SET_RAW(node);
NODE_STRING_SET_CRUDE(node);
return node;
}
@ -3242,14 +3238,14 @@ node_new_empty(void)
}
static Node*
node_new_str_raw_char(UChar c)
node_new_str_crude_char(UChar c)
{
int i;
UChar p[1];
Node* node;
p[0] = c;
node = node_new_str_raw(p, p + 1);
node = node_new_str_crude(p, p + 1);
/* clear buf tail */
for (i = 1; i < NODE_STRING_BUF_SIZE; i++)
@ -3272,8 +3268,8 @@ str_node_split_last_char(Node* node, OnigEncoding enc)
if (p && p > sn->s) { /* can be split. */
rn = node_new_str(p, sn->end);
CHECK_NULL_RETURN(rn);
if (NODE_STRING_IS_RAW(node))
NODE_STRING_SET_RAW(rn);
if (NODE_STRING_IS_CRUDE(node))
NODE_STRING_SET_CRUDE(rn);
sn->end = (UChar* )p;
}
@ -4004,7 +4000,7 @@ node_new_general_newline(Node** node, ScanEnv* env)
alen = ONIGENC_CODE_TO_MBC(env->enc, 0x0a, buf + dlen);
if (alen < 0) return alen;
crnl = node_new_str_raw(buf, buf + dlen + alen);
crnl = node_new_str_crude(buf, buf + dlen + alen);
CHECK_NULL_RETURN_MEMERR(crnl);
ncc = node_new_cclass();
@ -4032,7 +4028,7 @@ node_new_general_newline(Node** node, ScanEnv* env)
if (r != 0) goto err1;
}
x = node_new_bag_if_else(crnl, 0, ncc);
x = node_new_bag_if_else(crnl, NULL_NODE, ncc);
if (IS_NULL(x)) goto err1;
*node = x;
@ -4041,7 +4037,7 @@ node_new_general_newline(Node** node, ScanEnv* env)
enum TokenSyms {
TK_EOT = 0, /* end of token */
TK_RAW_BYTE = 1,
TK_CRUDE_BYTE = 1,
TK_CHAR,
TK_STRING,
TK_CODE_POINT,
@ -4454,7 +4450,7 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,
static int
fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
UChar** rname_end, ScanEnv* env, int* rback_num,
enum REF_NUM* num_type, int ref)
enum REF_NUM* num_type, int is_ref)
{
int r, sign;
int digit_count;
@ -4484,7 +4480,7 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
return ONIGERR_EMPTY_GROUP_NAME;
if (IS_CODE_DIGIT_ASCII(enc, c)) {
if (ref == 1)
if (is_ref == TRUE)
*num_type = IS_ABS_NUM;
else {
r = ONIGERR_INVALID_GROUP_NAME;
@ -4492,7 +4488,7 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
digit_count++;
}
else if (c == '-') {
if (ref == 1) {
if (is_ref == TRUE) {
*num_type = IS_REL_NUM;
sign = -1;
pnum_head = p;
@ -4502,7 +4498,7 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
}
}
else if (c == '+') {
if (ref == 1) {
if (is_ref == TRUE) {
*num_type = IS_REL_NUM;
sign = 1;
pnum_head = p;
@ -4843,7 +4839,7 @@ fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
if (p == prev) { /* can't read nothing. */
code = 0; /* but, it's not error */
}
tok->type = TK_RAW_BYTE;
tok->type = TK_CRUDE_BYTE;
tok->base = 16;
tok->u.byte = (UChar )code;
}
@ -4876,7 +4872,7 @@ fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
if (p == prev) { /* can't read nothing. */
code = 0; /* but, it's not error */
}
tok->type = TK_RAW_BYTE;
tok->type = TK_CRUDE_BYTE;
tok->base = 8;
tok->u.byte = (UChar )code;
}
@ -5246,7 +5242,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
if (p == prev) { /* can't read nothing. */
code = 0; /* but, it's not error */
}
tok->type = TK_RAW_BYTE;
tok->type = TK_CRUDE_BYTE;
tok->base = 16;
tok->u.byte = (UChar )code;
}
@ -5311,7 +5307,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
if (p == prev) { /* can't read nothing. */
code = 0; /* but, it's not error */
}
tok->type = TK_RAW_BYTE;
tok->type = TK_CRUDE_BYTE;
tok->base = 8;
tok->u.byte = (UChar )code;
}
@ -5338,7 +5334,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
if (r == 1) tok->u.backref.exist_level = 1;
else tok->u.backref.exist_level = 0;
#else
r = fetch_name(c, &p, end, &name_end, env, &back_num, &num_type, 1);
r = fetch_name(c, &p, end, &name_end, env, &back_num, &num_type, TRUE);
#endif
if (r < 0) return r;
@ -5401,7 +5397,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
prev = p;
r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env,
&gnum, &num_type, 1);
&gnum, &num_type, TRUE);
if (r < 0) return r;
if (num_type != IS_NOT_NUM) {
@ -5464,7 +5460,6 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
PUNFETCH;
r = fetch_escaped_value(&p, end, env, &c2);
if (r < 0) return r;
/* set_raw: */
if (tok->u.code != c2) {
tok->type = TK_CODE_POINT;
tok->u.code = c2;
@ -5590,8 +5585,8 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
{
PINC;
name = p;
r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env, &gnum,
&num_type, 0);
r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env,
&gnum, &num_type, FALSE);
if (r < 0) return r;
tok->type = TK_CALL;
@ -5623,7 +5618,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
{
name = p;
r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env,
&gnum, &num_type, 1);
&gnum, &num_type, TRUE);
if (r < 0) return r;
if (num_type == IS_NOT_NUM) {
@ -6097,7 +6092,7 @@ parse_char_property(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* en
*np = node_new_cclass();
CHECK_NULL_RETURN_MEMERR(*np);
cc = CCLASS_(*np);
r = add_ctype_to_cc(cc, ctype, 0, env);
r = add_ctype_to_cc(cc, ctype, FALSE, env);
if (r != 0) return r;
if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
@ -6297,7 +6292,7 @@ parse_cc(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)
goto val_entry2;
break;
case TK_RAW_BYTE:
case TK_CRUDE_BYTE:
/* tok->base != 0 : octal or hexadec. */
if (! ONIGENC_IS_SINGLEBYTE(env->enc) && tok->base != 0) {
int i, j;
@ -6310,7 +6305,7 @@ parse_cc(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)
for (i = 1; i < ONIGENC_MBC_MAXLEN(env->enc); i++) {
r = fetch_token_in_cc(tok, &p, end, env);
if (r < 0) goto err;
if (r != TK_RAW_BYTE || tok->base != base) {
if (r != TK_CRUDE_BYTE || tok->base != base) {
fetched = 1;
break;
}
@ -6340,7 +6335,7 @@ parse_cc(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)
if (i == 1) {
in_code = (OnigCodePoint )buf[0];
goto raw_single;
goto crude_single;
}
else {
in_code = ONIGENC_MBC_TO_CODE(env->enc, buf, bufe);
@ -6349,7 +6344,7 @@ parse_cc(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)
}
else {
in_code = (OnigCodePoint )tok->u.byte;
raw_single:
crude_single:
in_type = CV_SB;
}
in_raw = 1;
@ -6815,7 +6810,7 @@ parse_callout_args(int skip_mode, int cterm, UChar** src, UChar* end,
size_t clen;
add_char:
if (skip_mode == 0) {
if (skip_mode == FALSE) {
clen = p - e;
if (bufend + clen > buf + MAX_CALLOUT_ARG_BYTE_LENGTH)
return ONIGERR_INVALID_CALLOUT_ARG; /* too long argument */
@ -6832,7 +6827,7 @@ parse_callout_args(int skip_mode, int cterm, UChar** src, UChar* end,
if (max_arg_num >= 0 && n >= max_arg_num)
return ONIGERR_INVALID_CALLOUT_ARG;
if (skip_mode == 0) {
if (skip_mode == FALSE) {
if ((types[n] & ONIG_TYPE_LONG) != 0) {
int fixed = 0;
if (cn > 0) {
@ -6964,7 +6959,7 @@ parse_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* en
/* read for single check only */
save = p;
arg_num = parse_callout_args(1, '}', &p, end, -1, 0, 0, env);
arg_num = parse_callout_args(TRUE, '}', &p, end, -1, NULL, NULL, env);
if (arg_num < 0) return arg_num;
is_not_single = PPEEK_IS(cterm) ? 0 : 1;
@ -6978,7 +6973,7 @@ parse_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* en
types[i] = get_callout_arg_type_by_name_id(name_id, i);
}
arg_num = parse_callout_args(0, '}', &p, end, max_arg_num, types, vals, env);
arg_num = parse_callout_args(FALSE, '}', &p, end, max_arg_num, types, vals, env);
if (arg_num < 0) return arg_num;
if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
@ -7078,17 +7073,17 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
group:
r = fetch_token(tok, &p, end, env);
if (r < 0) return r;
r = parse_alts(np, tok, term, &p, end, env, 0);
r = parse_alts(np, tok, term, &p, end, env, FALSE);
if (r < 0) return r;
*src = p;
return 1; /* group */
break;
case '=':
*np = onig_node_new_anchor(ANCR_PREC_READ, 0);
*np = onig_node_new_anchor(ANCR_PREC_READ, FALSE);
break;
case '!': /* preceding read */
*np = onig_node_new_anchor(ANCR_PREC_READ_NOT, 0);
*np = onig_node_new_anchor(ANCR_PREC_READ_NOT, FALSE);
break;
case '>': /* (?>...) stop backtrack */
*np = node_new_bag(BAG_STOP_BACKTRACK);
@ -7106,9 +7101,9 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
PFETCH(c);
if (c == '=')
*np = onig_node_new_anchor(ANCR_LOOK_BEHIND, 0);
*np = onig_node_new_anchor(ANCR_LOOK_BEHIND, FALSE);
else if (c == '!')
*np = onig_node_new_anchor(ANCR_LOOK_BEHIND_NOT, 0);
*np = onig_node_new_anchor(ANCR_LOOK_BEHIND_NOT, FALSE);
else {
if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
UChar *name;
@ -7124,7 +7119,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
named_group2:
name = p;
r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &num,
&num_type, 0);
&num_type, FALSE);
if (r < 0) return r;
num = scan_env_add_mem_entry(env);
@ -7173,7 +7168,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
r = fetch_token(tok, &p, end, env);
if (r < 0) return r;
r = parse_alts(&absent, tok, term, &p, end, env, 1);
r = parse_alts(&absent, tok, term, &p, end, env, TRUE);
if (r < 0) {
onig_node_free(absent);
return r;
@ -7260,7 +7255,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
if (r == 1) exist_level = 1;
#else
r = fetch_name((OnigCodePoint )(is_enclosed != 0 ? c : '('),
&p, end, &name_end, env, &back_num, &num_type, 1);
&p, end, &name_end, env, &back_num, &num_type, TRUE);
#endif
if (r < 0) {
if (is_enclosed == 0) {
@ -7284,7 +7279,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
return ONIGERR_INVALID_BACKREF;
}
condition = node_new_backref_checker(1, &back_num, 0,
condition = node_new_backref_checker(1, &back_num, FALSE,
#ifdef USE_BACKREF_WITH_LEVEL
exist_level, level,
#endif
@ -7307,7 +7302,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
}
}
condition = node_new_backref_checker(num, backs, 1,
condition = node_new_backref_checker(num, backs, TRUE,
#ifdef USE_BACKREF_WITH_LEVEL
exist_level, level,
#endif
@ -7349,7 +7344,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
condition_is_checker = 0;
r = fetch_token(tok, &p, end, env);
if (r < 0) return r;
r = parse_alts(&condition, tok, term, &p, end, env, 0);
r = parse_alts(&condition, tok, term, &p, end, env, FALSE);
if (r < 0) {
onig_node_free(condition);
return r;
@ -7392,7 +7387,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
onig_node_free(condition);
return r;
}
r = parse_alts(&target, tok, term, &p, end, env, 1);
r = parse_alts(&target, tok, term, &p, end, env, TRUE);
if (r < 0) {
onig_node_free(condition);
onig_node_free(target);
@ -7493,7 +7488,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
case 'm':
if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {
OPTION_NEGATE(option, ONIG_OPTION_SINGLELINE, (neg == 0 ? 1 : 0));
OPTION_NEGATE(option, ONIG_OPTION_SINGLELINE, (neg == 0 ? TRUE : FALSE));
}
else if (IS_SYNTAX_OP2(env->syntax,
ONIG_SYN_OP2_OPTION_ONIGURUMA|ONIG_SYN_OP2_OPTION_RUBY)) {
@ -7529,16 +7524,16 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
if (! ONIGENC_IS_UNICODE_ENCODING(enc))
return ONIGERR_UNDEFINED_GROUP_OPTION;
OPTION_NEGATE(option, ONIG_OPTION_TEXT_SEGMENT_EXTENDED_GRAPHEME_CLUSTER, 0);
OPTION_NEGATE(option, ONIG_OPTION_TEXT_SEGMENT_WORD, 1);
OPTION_NEGATE(option, ONIG_OPTION_TEXT_SEGMENT_EXTENDED_GRAPHEME_CLUSTER, FALSE);
OPTION_NEGATE(option, ONIG_OPTION_TEXT_SEGMENT_WORD, TRUE);
break;
#ifdef USE_UNICODE_WORD_BREAK
case 'w':
if (! ONIGENC_IS_UNICODE_ENCODING(enc))
return ONIGERR_UNDEFINED_GROUP_OPTION;
OPTION_NEGATE(option, ONIG_OPTION_TEXT_SEGMENT_WORD, 0);
OPTION_NEGATE(option, ONIG_OPTION_TEXT_SEGMENT_EXTENDED_GRAPHEME_CLUSTER, 1);
OPTION_NEGATE(option, ONIG_OPTION_TEXT_SEGMENT_WORD, FALSE);
OPTION_NEGATE(option, ONIG_OPTION_TEXT_SEGMENT_EXTENDED_GRAPHEME_CLUSTER, TRUE);
break;
#endif
default:
@ -7568,7 +7563,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
env->options = option;
r = fetch_token(tok, &p, end, env);
if (r < 0) return r;
r = parse_alts(&target, tok, term, &p, end, env, 0);
r = parse_alts(&target, tok, term, &p, end, env, FALSE);
env->options = prev;
if (r < 0) {
onig_node_free(target);
@ -7615,7 +7610,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
CHECK_NULL_RETURN_MEMERR(*np);
r = fetch_token(tok, &p, end, env);
if (r < 0) return r;
r = parse_alts(&target, tok, term, &p, end, env, 0);
r = parse_alts(&target, tok, term, &p, end, env, FALSE);
if (r < 0) {
onig_node_free(target);
return r;
@ -7768,6 +7763,29 @@ clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc)
}\
} while (0)
extern int
onig_new_cclass_with_code_list(Node** rnode, OnigEncoding enc,
int n, OnigCodePoint codes[])
{
int i;
Node* node;
CClassNode* cc;
*rnode = NULL_NODE;
node = node_new_cclass();
CHECK_NULL_RETURN_MEMERR(node);
cc = CCLASS_(node);
for (i = 0; i < n; i++) {
ADD_CODE_INTO_CC(cc, codes[i], enc);
}
*rnode = node;
return 0;
}
typedef struct {
ScanEnv* env;
CClassNode* cc;
@ -7927,7 +7945,7 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
env->options = BAG_(*np)->o.options;
r = fetch_token(tok, src, end, env);
if (r < 0) return r;
r = parse_alts(&target, tok, term, src, end, env, 0);
r = parse_alts(&target, tok, term, src, end, env, FALSE);
env->options = prev;
if (r < 0) {
onig_node_free(target);
@ -7942,7 +7960,7 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
if (! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP))
return ONIGERR_UNMATCHED_CLOSE_PARENTHESIS;
if (tok->escaped) goto tk_raw_byte;
if (tok->escaped) goto tk_crude_byte;
else goto tk_byte;
break;
@ -7967,23 +7985,23 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
}
break;
case TK_RAW_BYTE:
tk_raw_byte:
case TK_CRUDE_BYTE:
tk_crude_byte:
{
*np = node_new_str_raw_char(tok->u.byte);
*np = node_new_str_crude_char(tok->u.byte);
CHECK_NULL_RETURN_MEMERR(*np);
len = 1;
while (1) {
if (len >= ONIGENC_MBC_MINLEN(env->enc)) {
if (len == enclen(env->enc, STR_(*np)->s)) {
r = fetch_token(tok, src, end, env);
goto tk_raw_byte_end;
goto tk_crude_byte_end;
}
}
r = fetch_token(tok, src, end, env);
if (r < 0) return r;
if (r != TK_RAW_BYTE)
if (r != TK_CRUDE_BYTE)
return ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
r = node_str_cat_char(*np, tok->u.byte);
@ -7992,11 +8010,11 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
len++;
}
tk_raw_byte_end:
tk_crude_byte_end:
if (! ONIGENC_IS_VALID_MBC_STRING(env->enc, STR_(*np)->s, STR_(*np)->end))
return ONIGERR_INVALID_WIDE_CHAR_VALUE;
NODE_STRING_CLEAR_RAW(*np);
NODE_STRING_CLEAR_CRUDE(*np);
goto string_end;
}
break;
@ -8007,7 +8025,7 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
len = ONIGENC_CODE_TO_MBC(env->enc, tok->u.code, buf);
if (len < 0) return len;
#ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG
*np = node_new_str_raw(buf, buf + len);
*np = node_new_str_crude(buf, buf + len);
#else
*np = node_new_str(buf, buf + len);
#endif
@ -8050,7 +8068,7 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
*np = node_new_cclass();
CHECK_NULL_RETURN_MEMERR(*np);
cc = CCLASS_(*np);
add_ctype_to_cc(cc, tok->u.prop.ctype, 0, env);
add_ctype_to_cc(cc, tok->u.prop.ctype, FALSE, env);
if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
}
break;
@ -8109,7 +8127,7 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
case TK_ANYCHAR_ANYTIME:
*np = node_new_anychar();
CHECK_NULL_RETURN_MEMERR(*np);
qn = node_new_quantifier(0, INFINITE_REPEAT, 0);
qn = node_new_quantifier(0, INFINITE_REPEAT, FALSE);
CHECK_NULL_RETURN_MEMERR(qn);
NODE_BODY(qn) = *np;
*np = qn;
@ -8300,7 +8318,7 @@ parse_branch(Node** top, PToken* tok, int term, UChar** src, UChar* end,
headp = &(NODE_CDR(*top));
while (r != TK_EOT && r != term && r != TK_ALT) {
r = parse_exp(&node, tok, term, src, end, env, 0);
r = parse_exp(&node, tok, term, src, end, env, FALSE);
if (r < 0) {
onig_node_free(node);
return r;
@ -8353,7 +8371,7 @@ parse_alts(Node** top, PToken* tok, int term, UChar** src, UChar* end,
while (r == TK_ALT) {
r = fetch_token(tok, src, end, env);
if (r < 0) return r;
r = parse_branch(&node, tok, term, src, end, env, 0);
r = parse_branch(&node, tok, term, src, end, env, FALSE);
if (r < 0) {
onig_node_free(node);
return r;
@ -8392,7 +8410,7 @@ parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env)
r = fetch_token(&tok, src, end, env);
if (r < 0) return r;
r = parse_alts(top, &tok, TK_EOT, src, end, env, 0);
r = parse_alts(top, &tok, TK_EOT, src, end, env, FALSE);
if (r < 0) return r;
return 0;

View File

@ -32,7 +32,7 @@
#include "regint.h"
#define NODE_STRING_MARGIN 16
#define NODE_STRING_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */
#define NODE_STRING_BUF_SIZE 20 /* sizeof(CClassNode) - sizeof(int)*4 */
#define NODE_BACKREFS_SIZE 6
/* node type */
@ -83,8 +83,9 @@ typedef struct {
UChar* s;
UChar* end;
unsigned int flag;
int capacity; /* (allocated size - 1) or 0: use buf[] */
UChar buf[NODE_STRING_BUF_SIZE];
int capacity; /* (allocated size - 1) or 0: use buf[] */
int case_min_len;
} StrNode;
typedef struct {
@ -293,30 +294,21 @@ typedef struct _Node {
#define ANCR_ANYCHAR_INF_MASK (ANCR_ANYCHAR_INF | ANCR_ANYCHAR_INF_ML)
#define ANCR_END_BUF_MASK (ANCR_END_BUF | ANCR_SEMI_END_BUF)
#define NODE_STRING_RAW (1<<0) /* by backslashed number */
#define NODE_STRING_CRUDE (1<<0)
#define NODE_STRING_CASE_EXPANDED (1<<1)
#define NODE_STRING_CASE_FOLD_MATCH (1<<2)
#define NODE_STRING_GOOD_AMBIG (1<<3)
#define NODE_STRING_DONT_GET_OPT_INFO (1<<4)
#define NODE_STRING_LEN(node) (int )((node)->u.str.end - (node)->u.str.s)
#define NODE_STRING_SET_RAW(node) (node)->u.str.flag |= NODE_STRING_RAW
#define NODE_STRING_CLEAR_RAW(node) (node)->u.str.flag &= ~NODE_STRING_RAW
#define NODE_STRING_SET_CRUDE(node) (node)->u.str.flag |= NODE_STRING_CRUDE
#define NODE_STRING_CLEAR_CRUDE(node) (node)->u.str.flag &= ~NODE_STRING_CRUDE
#define NODE_STRING_SET_CASE_EXPANDED(node) (node)->u.str.flag |= NODE_STRING_CASE_EXPANDED
#define NODE_STRING_SET_CASE_FOLD_MATCH(node) (node)->u.str.flag |= NODE_STRING_CASE_FOLD_MATCH
#define NODE_STRING_SET_GOOD_AMBIG(node) (node)->u.str.flag |= NODE_STRING_GOOD_AMBIG
#define NODE_STRING_SET_DONT_GET_OPT_INFO(node) \
(node)->u.str.flag |= NODE_STRING_DONT_GET_OPT_INFO
#define NODE_STRING_IS_RAW(node) \
(((node)->u.str.flag & NODE_STRING_RAW) != 0)
#define NODE_STRING_IS_CRUDE(node) \
(((node)->u.str.flag & NODE_STRING_CRUDE) != 0)
#define NODE_STRING_IS_CASE_EXPANDED(node) \
(((node)->u.str.flag & NODE_STRING_CASE_EXPANDED) != 0)
#define NODE_STRING_IS_CASE_FOLD_MATCH(node) \
(((node)->u.str.flag & NODE_STRING_CASE_FOLD_MATCH) != 0)
#define NODE_STRING_IS_GOOD_AMBIG(node) \
(((node)->u.str.flag & NODE_STRING_GOOD_AMBIG) != 0)
#define NODE_STRING_IS_DONT_GET_OPT_INFO(node) \
(((node)->u.str.flag & NODE_STRING_DONT_GET_OPT_INFO) != 0)
#define BACKREFS_P(br) \
(IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static)
@ -446,7 +438,6 @@ extern int onig_strncmp P_((const UChar* s1, const UChar* s2, int n));
extern void onig_strcpy P_((UChar* dest, const UChar* src, const UChar* end));
extern void onig_scan_env_set_error_string P_((ScanEnv* env, int ecode, UChar* arg, UChar* arg_end));
extern void onig_reduce_nested_quantifier P_((Node* pnode, Node* cnode));
extern void onig_node_conv_to_str_node P_((Node* node, int raw));
extern int onig_node_str_cat P_((Node* node, const UChar* s, const UChar* end));
extern int onig_node_str_set P_((Node* node, const UChar* s, const UChar* end));
extern void onig_node_free P_((Node* node));
@ -460,6 +451,7 @@ extern int onig_names_free P_((regex_t* reg));
extern int onig_parse_tree P_((Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env));
extern int onig_free_shared_cclass_table P_((void));
extern int onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc));
extern int onig_new_cclass_with_code_list(Node** rnode, OnigEncoding enc, int n, OnigCodePoint codes[]);
extern OnigLen onig_get_tiny_min_len(Node* node, unsigned int inhibit_node_types, int* invalid_node);
#ifdef USE_CALLOUT

View File

@ -1,7 +1,7 @@
/* This file was converted by gperf_fold_key_conv.py
from gperf output file. */
/* ANSI-C code produced by gperf version 3.1 */
/* Command-line: /usr/local/bin/gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1 -N onigenc_unicode_fold1_key unicode_fold1_key.gperf */
/* Command-line: gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1 -N onigenc_unicode_fold1_key unicode_fold1_key.gperf */
/* Computed positions: -k'1-3' */
@ -2983,7 +2983,7 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[])
4026
};
if (0 == 0)
{
int key = hash(codes);

View File

@ -1,7 +1,7 @@
/* This file was converted by gperf_fold_key_conv.py
from gperf output file. */
/* ANSI-C code produced by gperf version 3.1 */
/* Command-line: /usr/local/bin/gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1 -N onigenc_unicode_fold2_key unicode_fold2_key.gperf */
/* Command-line: gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1 -N onigenc_unicode_fold2_key unicode_fold2_key.gperf */
/* Computed positions: -k'3,6' */
@ -211,7 +211,7 @@ onigenc_unicode_fold2_key(OnigCodePoint codes[])
129
};
if (0 == 0)
{
int key = hash(codes);

View File

@ -1,7 +1,7 @@
/* This file was converted by gperf_fold_key_conv.py
from gperf output file. */
/* ANSI-C code produced by gperf version 3.1 */
/* Command-line: /usr/local/bin/gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1 -N onigenc_unicode_fold3_key unicode_fold3_key.gperf */
/* Command-line: gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1 -N onigenc_unicode_fold3_key unicode_fold3_key.gperf */
/* Computed positions: -k'3,6,9' */
@ -121,7 +121,7 @@ onigenc_unicode_fold3_key(OnigCodePoint codes[])
0
};
if (0 == 0)
{
int key = hash(codes);

View File

@ -1,5 +1,5 @@
/* ANSI-C code produced by gperf version 3.1 */
/* Command-line: /usr/local/bin/gperf -T -C -c -t -j1 -L ANSI-C --ignore-case --pic -Q unicode_prop_name_pool -N unicode_lookup_property_name --output-file gperf1.tmp unicode_property_data.gperf */
/* Command-line: gperf -T -C -c -t -j1 -L ANSI-C --ignore-case --pic -Q unicode_prop_name_pool -N unicode_lookup_property_name --output-file gperf1.tmp unicode_property_data.gperf */
/* Computed positions: -k'1-3,5-6,12,16,$' */
#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
@ -29581,6 +29581,7 @@ unicode_lookup_property_name (register const char *str, register size_t len)
#define UNICODE_PROPERTY_VERSION 120100
#define UNICODE_EMOJI_VERSION 1201
#define PROPERTY_NAME_MAX_SIZE 59
#define CODE_RANGES_NUM 568

View File

@ -1,5 +1,5 @@
/* ANSI-C code produced by gperf version 3.1 */
/* Command-line: /usr/local/bin/gperf -T -C -c -t -j1 -L ANSI-C --ignore-case --pic -Q unicode_prop_name_pool -N unicode_lookup_property_name --output-file gperf2.tmp unicode_property_data_posix.gperf */
/* Command-line: gperf -T -C -c -t -j1 -L ANSI-C --ignore-case --pic -Q unicode_prop_name_pool -N unicode_lookup_property_name --output-file gperf2.tmp unicode_property_data_posix.gperf */
/* Computed positions: -k'1,3' */
#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \

View File

@ -1,7 +1,7 @@
/* This file was converted by gperf_unfold_key_conv.py
from gperf output file. */
/* ANSI-C code produced by gperf version 3.1 */
/* Command-line: /usr/local/bin/gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1,0 -N onigenc_unicode_unfold_key unicode_unfold_key.gperf */
/* Command-line: gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1,0 -N onigenc_unicode_unfold_key unicode_unfold_key.gperf */
/* Computed positions: -k'1-3' */
@ -3288,7 +3288,7 @@ onigenc_unicode_unfold_key(OnigCodePoint code)
{0x1e907, 4005, 1}
};
if (0 == 0)
{
int key = hash(&code);