mirror of
https://github.com/rizonesoft/Notepad3.git
synced 2026-06-14 21:09:05 +08:00
+ upd: Oniguruma update (utf-8 version)
This commit is contained in:
parent
c7f4d725e3
commit
bf5840cd87
@ -1,8 +1,22 @@
|
||||
History
|
||||
|
||||
2019/MM/DD: Version 6.9.4
|
||||
|
||||
2019/10/31: Update Unicode Emoji version to 12.1 (Nothing data changed)
|
||||
2019/10/29: implement USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR configuration
|
||||
2019/10/18: re-implement case fold conversion
|
||||
2019/10/04: fix #156: Heap buffer overflow in match_at() with case-insensitive match
|
||||
2019/09/30: NEW API: add onig_regset_replace()
|
||||
2019/09/30: change Unicode VERSION value format
|
||||
2019/09/20: NEW API: add regset functions
|
||||
2019/09/20: add data ensure check before peek string value in OP_PUSH_IF_PEEK_NEXT
|
||||
2019/09/20: fix loose code in encode-harness.c
|
||||
2019/08/13: fix heap-buffer-overflow
|
||||
2019/08/13: Add a macro to disable direct threading in the match engine (PR#149)
|
||||
|
||||
2019/08/06: Version 6.9.3 (secirity fix release)
|
||||
|
||||
2019/07/30: add ONIG_SYN_ALLOW_INVALID_CODE_END_OF_RANGE
|
||||
2019/07/30: add ONIG_SYN_ALLOW_INVALID_CODE_END_OF_RANGE_IN_CC
|
||||
2019/07/29: add STK_PREC_READ_START/END stack type
|
||||
2019/07/29: Fix #147: Stack Exhaustion Problem caused by some parsing functions
|
||||
2019/07/11: add a dictionary file for libfuzzer
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
Oniguruma Regular Expressions Version 6.9.2 2019/08/08
|
||||
Oniguruma Regular Expressions Version 6.9.4 2019/10/31
|
||||
|
||||
syntax: ONIG_SYNTAX_ONIGURUMA (default)
|
||||
|
||||
@ -289,6 +289,11 @@ syntax: ONIG_SYNTAX_ONIGURUMA (default)
|
||||
In negative look-behind, capturing group isn't allowed,
|
||||
but non-capturing group (?:) is allowed.
|
||||
|
||||
* In look-behind and negative look-behind, support for
|
||||
ignore-case option is limited. Only supports conversion
|
||||
between single characters. (Does not support conversion
|
||||
of multiple characters in Unicode)
|
||||
|
||||
(?>subexp) atomic group
|
||||
no backtracks in subexp.
|
||||
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
Unicode Properties (from Unicode Version: 12.1.0)
|
||||
Unicode Properties (Unicode Version: 12.1.0, Emoji: 12.1)
|
||||
|
||||
15: ASCII_Hex_Digit
|
||||
16: Adlam
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
// encoding: UTF8
|
||||
/**********************************************************************
|
||||
ascii.c - Oniguruma (regular expression library)
|
||||
encoding: UTF8
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2019 K.Kosako
|
||||
|
||||
@ -1,4 +1,6 @@
|
||||
#pragma once
|
||||
/* encoding: UTF8 */
|
||||
|
||||
#ifndef _ONIGURUMA_CONFIG_H_
|
||||
#define _ONIGURUMA_CONFIG_H_
|
||||
|
||||
@ -37,6 +39,7 @@
|
||||
#endif
|
||||
#define HAVE_DECL_SYS_NERR 1
|
||||
#define STDC_HEADERS 1
|
||||
#define HAVE_STDINT_H 1
|
||||
#define HAVE_STDLIB_H 1
|
||||
#define HAVE_STRING_H 1
|
||||
#define HAVE_LIMITS_H 1
|
||||
|
||||
@ -12,7 +12,7 @@ REG_STR_AT = re.compile('str\[(\d+)\]')
|
||||
REG_RETURN_TYPE = re.compile('^const\s+short\s+int\s*\*')
|
||||
REG_FOLD_KEY = re.compile('unicode_fold(\d)_key\s*\(register\s+const\s+char\s*\*\s*str,\s*register\s+size_t\s+len\)')
|
||||
REG_ENTRY = re.compile('\{".*?",\s*(-?\d+)\s*\}')
|
||||
REG_IF_LEN = re.compile('if\s*\(\s*len\s*<=\s*MAX_WORD_LENGTH.+')
|
||||
REG_IF_LEN = re.compile('\s*if\s*\(\s*len\s*<=\s*MAX_WORD_LENGTH.+')
|
||||
REG_GET_HASH = re.compile('(?:register\s+)?(?:unsigned\s+)?int\s+key\s*=\s*hash\s*\(str,\s*len\);')
|
||||
REG_GET_CODE = re.compile('(?:register\s+)?const\s+char\s*\*\s*s\s*=\s*wordlist\[key\]\.name;')
|
||||
REG_CODE_CHECK = re.compile('if\s*\(\*str\s*==\s*\*s\s*&&\s*!strncmp.+\)')
|
||||
@ -34,7 +34,7 @@ def parse_line(s, key_len):
|
||||
if r != s: return r
|
||||
r = re.sub(REG_ENTRY, '\\1', s)
|
||||
if r != s: return r
|
||||
r = re.sub(REG_IF_LEN, 'if (0 == 0)', s)
|
||||
r = re.sub(REG_IF_LEN, '', s)
|
||||
if r != s: return r
|
||||
r = re.sub(REG_GET_HASH, 'int key = hash(codes);', s)
|
||||
if r != s: return r
|
||||
|
||||
@ -12,7 +12,7 @@ REG_STR_AT = re.compile('str\[(\d+)\]')
|
||||
REG_UNFOLD_KEY = re.compile('onigenc_unicode_unfold_key\s*\(register\s+const\s+char\s*\*\s*str,\s*register\s+size_t\s+len\)')
|
||||
REG_ENTRY = re.compile('\{".+?",\s*/\*(.+?)\*/\s*(-?\d+),\s*(\d)\}')
|
||||
REG_EMPTY_ENTRY = re.compile('\{"",\s*(-?\d+),\s*(\d)\}')
|
||||
REG_IF_LEN = re.compile('if\s*\(\s*len\s*<=\s*MAX_WORD_LENGTH.+')
|
||||
REG_IF_LEN = re.compile('\s*if\s*\(\s*len\s*<=\s*MAX_WORD_LENGTH.+')
|
||||
REG_GET_HASH = re.compile('(?:register\s+)?(?:unsigned\s+)?int\s+key\s*=\s*hash\s*\(str,\s*len\);')
|
||||
REG_GET_CODE = re.compile('(?:register\s+)?const\s+char\s*\*\s*s\s*=\s*wordlist\[key\]\.name;')
|
||||
REG_CODE_CHECK = re.compile('if\s*\(\*str\s*==\s*\*s\s*&&\s*!strncmp.+\)')
|
||||
@ -32,7 +32,7 @@ def parse_line(s):
|
||||
if r != s: return r
|
||||
r = re.sub(REG_EMPTY_ENTRY, '{0xffffffff, \\1, \\2}', s)
|
||||
if r != s: return r
|
||||
r = re.sub(REG_IF_LEN, 'if (0 == 0)', s)
|
||||
r = re.sub(REG_IF_LEN, '', s)
|
||||
if r != s: return r
|
||||
r = re.sub(REG_GET_HASH, 'int key = hash(&code);', s)
|
||||
if r != s: return r
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
# make_unicode_property_data.py
|
||||
# Copyright (c) 2016-2018 K.Kosako
|
||||
# Copyright (c) 2016-2019 K.Kosako
|
||||
|
||||
import sys
|
||||
import re
|
||||
@ -22,9 +22,12 @@ PR_LINE_REG = re.compile("([0-9A-Fa-f]+)(?:..([0-9A-Fa-f]+))?\s*;\s*(\w+)")
|
||||
PA_LINE_REG = re.compile("(\w+)\s*;\s*(\w+)")
|
||||
PVA_LINE_REG = re.compile("(sc|gc)\s*;\s*(\w+)\s*;\s*(\w+)(?:\s*;\s*(\w+))?")
|
||||
BL_LINE_REG = re.compile("([0-9A-Fa-f]+)\.\.([0-9A-Fa-f]+)\s*;\s*(.*)")
|
||||
VERSION_REG = re.compile("#\s*.*-(\d+)\.(\d+)\.(\d+)\.txt")
|
||||
UNICODE_VERSION_REG = re.compile("#\s*.*-(\d+)\.(\d+)\.(\d+)\.txt")
|
||||
EMOJI_VERSION_REG = re.compile("(?i)#\s*Version:\s*(\d+)\.(\d+)")
|
||||
|
||||
VERSION_INFO = [-1, -1, -1]
|
||||
EMOJI_VERSION_INFO = [-1, -1]
|
||||
|
||||
DIC = { }
|
||||
KDIC = { }
|
||||
PropIndex = { }
|
||||
@ -40,14 +43,6 @@ def fix_block_name(name):
|
||||
s = re.sub(r'[- ]+', '_', name)
|
||||
return 'In_' + s
|
||||
|
||||
def check_version_info(s):
|
||||
m = VERSION_REG.match(s)
|
||||
if m is not None:
|
||||
VERSION_INFO[0] = int(m.group(1))
|
||||
VERSION_INFO[1] = int(m.group(2))
|
||||
VERSION_INFO[2] = int(m.group(3))
|
||||
|
||||
|
||||
def print_ranges(ranges):
|
||||
for (start, end) in ranges:
|
||||
print "0x%06x, 0x%06x" % (start, end)
|
||||
@ -234,7 +229,8 @@ def parse_unicode_data_file(f):
|
||||
normalize_ranges_in_dic(dic)
|
||||
return dic, assigned
|
||||
|
||||
def parse_properties(path, klass, prop_prefix = None):
|
||||
def parse_properties(path, klass, prop_prefix = None, version_reg = None):
|
||||
version_match = None
|
||||
with open(path, 'r') as f:
|
||||
dic = { }
|
||||
prop = None
|
||||
@ -244,9 +240,10 @@ def parse_properties(path, klass, prop_prefix = None):
|
||||
if len(s) == 0:
|
||||
continue
|
||||
|
||||
if s[0] == '#':
|
||||
if VERSION_INFO[0] < 0:
|
||||
check_version_info(s)
|
||||
if s[0] == '#' and version_reg is not None and version_match is None:
|
||||
version_match = version_reg.match(s)
|
||||
if version_match is not None:
|
||||
continue
|
||||
|
||||
m = PR_LINE_REG.match(s)
|
||||
if m:
|
||||
@ -267,7 +264,7 @@ def parse_properties(path, klass, prop_prefix = None):
|
||||
props.append(prop)
|
||||
|
||||
normalize_ranges_in_dic(dic)
|
||||
return (dic, props)
|
||||
return (dic, props, version_match)
|
||||
|
||||
def parse_property_aliases(path):
|
||||
a = { }
|
||||
@ -415,11 +412,11 @@ def entry_and_print_prop_and_index(name, index):
|
||||
nname = normalize_prop_name(name)
|
||||
print_prop_and_index(nname, index)
|
||||
|
||||
def parse_and_merge_properties(path, klass):
|
||||
dic, props = parse_properties(path, klass)
|
||||
def parse_and_merge_properties(path, klass, prop_prefix = None, version_reg = None):
|
||||
dic, props, ver_m = parse_properties(path, klass, prop_prefix, version_reg)
|
||||
merge_dic(DIC, dic)
|
||||
merge_props(PROPS, props)
|
||||
return dic, props
|
||||
return dic, props, ver_m
|
||||
|
||||
### main ###
|
||||
argv = sys.argv
|
||||
@ -448,11 +445,21 @@ with open('UnicodeData.txt', 'r') as f:
|
||||
PROPS = DIC.keys()
|
||||
PROPS = list_sub(PROPS, POSIX_LIST)
|
||||
|
||||
parse_and_merge_properties('DerivedCoreProperties.txt', 'Derived Property')
|
||||
dic, props = parse_and_merge_properties('Scripts.txt', 'Script')
|
||||
_, _, ver_m = parse_and_merge_properties('DerivedCoreProperties.txt', 'Derived Property', None, UNICODE_VERSION_REG)
|
||||
if ver_m is not None:
|
||||
VERSION_INFO[0] = int(ver_m.group(1))
|
||||
VERSION_INFO[1] = int(ver_m.group(2))
|
||||
VERSION_INFO[2] = int(ver_m.group(3))
|
||||
|
||||
dic, props, _ = parse_and_merge_properties('Scripts.txt', 'Script')
|
||||
DIC['Unknown'] = inverse_ranges(add_ranges_in_dic(dic))
|
||||
|
||||
parse_and_merge_properties('PropList.txt', 'Binary Property')
|
||||
parse_and_merge_properties('emoji-data.txt', 'Emoji Property')
|
||||
|
||||
_, _, ver_m = parse_and_merge_properties('emoji-data.txt', 'Emoji Property', None, EMOJI_VERSION_REG)
|
||||
if ver_m is not None:
|
||||
EMOJI_VERSION_INFO[0] = int(ver_m.group(1))
|
||||
EMOJI_VERSION_INFO[1] = int(ver_m.group(2))
|
||||
|
||||
PROPS.append('Unknown')
|
||||
KDIC['Unknown'] = 'Script'
|
||||
@ -465,9 +472,9 @@ dic, BLOCKS = parse_blocks('Blocks.txt')
|
||||
merge_dic(DIC, dic)
|
||||
|
||||
if INCLUDE_GRAPHEME_CLUSTER_DATA:
|
||||
dic, props = parse_properties('GraphemeBreakProperty.txt',
|
||||
'GraphemeBreak Property',
|
||||
GRAPHEME_CLUSTER_BREAK_NAME_PREFIX)
|
||||
dic, props, _ = parse_properties('GraphemeBreakProperty.txt',
|
||||
'GraphemeBreak Property',
|
||||
GRAPHEME_CLUSTER_BREAK_NAME_PREFIX)
|
||||
merge_dic(DIC, dic)
|
||||
merge_props(PROPS, props)
|
||||
#prop = GRAPHEME_CLUSTER_BREAK_NAME_PREFIX + 'Other'
|
||||
@ -535,9 +542,11 @@ sys.stdout.write(s)
|
||||
if OUTPUT_LIST_MODE:
|
||||
UPF = open("UNICODE_PROPERTIES", "w")
|
||||
if VERSION_INFO[0] < 0:
|
||||
raise RuntimeError("Version is not found")
|
||||
raise RuntimeError("Unicode Version is not found")
|
||||
if EMOJI_VERSION_INFO[0] < 0:
|
||||
raise RuntimeError("Emoji Version is not found")
|
||||
|
||||
print >> UPF, "Unicode Properties (from Unicode Version: %d.%d.%d)" % (VERSION_INFO[0], VERSION_INFO[1], VERSION_INFO[2])
|
||||
print >> UPF, "Unicode Properties (Unicode Version: %d.%d.%d, Emoji: %d.%d)" % (VERSION_INFO[0], VERSION_INFO[1], VERSION_INFO[2], EMOJI_VERSION_INFO[0], EMOJI_VERSION_INFO[1])
|
||||
print >> UPF, ''
|
||||
|
||||
index = -1
|
||||
@ -573,9 +582,12 @@ print '%%'
|
||||
print ''
|
||||
if not(POSIX_ONLY):
|
||||
if VERSION_INFO[0] < 0:
|
||||
raise RuntimeError("Version is not found")
|
||||
raise RuntimeError("Unicode Version is not found")
|
||||
if EMOJI_VERSION_INFO[0] < 0:
|
||||
raise RuntimeError("Emoji Version is not found")
|
||||
|
||||
print "#define UNICODE_PROPERTY_VERSION %02d%02d%02d" % (VERSION_INFO[0], VERSION_INFO[1], VERSION_INFO[2])
|
||||
print "#define UNICODE_EMOJI_VERSION %02d%02d" % (EMOJI_VERSION_INFO[0], EMOJI_VERSION_INFO[1])
|
||||
print ''
|
||||
|
||||
print "#define PROPERTY_NAME_MAX_SIZE %d" % (PROPERTY_NAME_MAX_LEN + 10)
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
/**********************************************************************
|
||||
mktable.c
|
||||
encoding: UTF8
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2019 K.Kosako
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
/**********************************************************************
|
||||
onig_init.c - Oniguruma (regular expression library)
|
||||
encoding: UTF8
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2016-2019 K.Kosako
|
||||
|
||||
@ -2,6 +2,7 @@
|
||||
#define ONIGGNU_H
|
||||
/**********************************************************************
|
||||
oniggnu.h - Oniguruma (regular expression library)
|
||||
encoding: UTF8
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2019 K.Kosako
|
||||
|
||||
@ -2,6 +2,7 @@
|
||||
#define ONIGURUMA_H
|
||||
/**********************************************************************
|
||||
oniguruma.h - Oniguruma (regular expression library)
|
||||
encoding: UTF8
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2019 K.Kosako
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -1,5 +1,6 @@
|
||||
/**********************************************************************
|
||||
regenc.c - Oniguruma (regular expression library)
|
||||
encoding: UTF8
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2019 K.Kosako
|
||||
|
||||
@ -2,6 +2,7 @@
|
||||
#define REGENC_H
|
||||
/**********************************************************************
|
||||
regenc.h - Oniguruma (regular expression library)
|
||||
encoding: UTF8
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2019 K.Kosako
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
/**********************************************************************
|
||||
regerror.c - Oniguruma (regular expression library)
|
||||
encoding: UTF8
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2019 K.Kosako
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
/**********************************************************************
|
||||
regexec.c - Oniguruma (regular expression library)
|
||||
encoding: UTF8
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2019 K.Kosako
|
||||
@ -219,9 +220,13 @@ static OpInfoType OpInfo[] = {
|
||||
{ OP_MEM_START_PUSH, "mem-start-push" },
|
||||
{ OP_MEM_START, "mem-start" },
|
||||
{ OP_MEM_END_PUSH, "mem-end-push" },
|
||||
#ifdef USE_CALL
|
||||
{ OP_MEM_END_PUSH_REC, "mem-end-push-rec" },
|
||||
#endif
|
||||
{ OP_MEM_END, "mem-end" },
|
||||
#ifdef USE_CALL
|
||||
{ OP_MEM_END_REC, "mem-end-rec" },
|
||||
#endif
|
||||
{ OP_FAIL, "fail" },
|
||||
{ OP_JUMP, "jump" },
|
||||
{ OP_PUSH, "push" },
|
||||
@ -235,12 +240,12 @@ static OpInfoType OpInfo[] = {
|
||||
{ OP_REPEAT_NG, "repeat-ng" },
|
||||
{ OP_REPEAT_INC, "repeat-inc" },
|
||||
{ OP_REPEAT_INC_NG, "repeat-inc-ng" },
|
||||
{ OP_REPEAT_INC_SG, "repeat-inc-sg" },
|
||||
{ OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg" },
|
||||
{ OP_EMPTY_CHECK_START, "empty-check-start" },
|
||||
{ OP_EMPTY_CHECK_END, "empty-check-end" },
|
||||
{ OP_EMPTY_CHECK_END_MEMST, "empty-check-end-memst" },
|
||||
#ifdef USE_CALL
|
||||
{ OP_EMPTY_CHECK_END_MEMST_PUSH,"empty-check-end-memst-push" },
|
||||
#endif
|
||||
{ OP_PREC_READ_START, "push-pos" },
|
||||
{ OP_PREC_READ_END, "pop-pos" },
|
||||
{ OP_PREC_READ_NOT_START, "prec-read-not-start" },
|
||||
@ -250,10 +255,12 @@ static OpInfoType OpInfo[] = {
|
||||
{ OP_LOOK_BEHIND, "look-behind" },
|
||||
{ OP_LOOK_BEHIND_NOT_START, "look-behind-not-start" },
|
||||
{ OP_LOOK_BEHIND_NOT_END, "look-behind-not-end" },
|
||||
{ OP_CALL, "call" },
|
||||
{ OP_RETURN, "return" },
|
||||
{ OP_PUSH_SAVE_VAL, "push-save-val" },
|
||||
{ OP_UPDATE_VAR, "update-var" },
|
||||
#ifdef USE_CALL
|
||||
{ OP_CALL, "call" },
|
||||
{ OP_RETURN, "return" },
|
||||
#endif
|
||||
#ifdef USE_CALLOUT
|
||||
{ OP_CALLOUT_CONTENTS, "callout-contents" },
|
||||
{ OP_CALLOUT_NAME, "callout-name" },
|
||||
@ -466,10 +473,13 @@ print_compiled_byte_code(FILE* f, regex_t* reg, int index,
|
||||
mem = p->memory_start.num;
|
||||
fprintf(f, ":%d", mem);
|
||||
break;
|
||||
case OP_MEM_END_PUSH:
|
||||
case OP_MEM_END_PUSH_REC:
|
||||
|
||||
case OP_MEM_END:
|
||||
case OP_MEM_END_PUSH:
|
||||
#ifdef USE_CALL
|
||||
case OP_MEM_END_REC:
|
||||
case OP_MEM_END_PUSH_REC:
|
||||
#endif
|
||||
mem = p->memory_end.num;
|
||||
fprintf(f, ":%d", mem);
|
||||
break;
|
||||
@ -513,8 +523,6 @@ print_compiled_byte_code(FILE* f, regex_t* reg, int index,
|
||||
|
||||
case OP_REPEAT_INC:
|
||||
case OP_REPEAT_INC_NG:
|
||||
case OP_REPEAT_INC_SG:
|
||||
case OP_REPEAT_INC_NG_SG:
|
||||
mem = p->repeat.id;
|
||||
fprintf(f, ":%d", mem);
|
||||
break;
|
||||
@ -525,7 +533,9 @@ print_compiled_byte_code(FILE* f, regex_t* reg, int index,
|
||||
break;
|
||||
case OP_EMPTY_CHECK_END:
|
||||
case OP_EMPTY_CHECK_END_MEMST:
|
||||
#ifdef USE_CALL
|
||||
case OP_EMPTY_CHECK_END_MEMST_PUSH:
|
||||
#endif
|
||||
mem = p->empty_check_end.mem;
|
||||
fprintf(f, ":%d", mem);
|
||||
break;
|
||||
@ -548,10 +558,12 @@ print_compiled_byte_code(FILE* f, regex_t* reg, int index,
|
||||
p_rel_addr(f, addr, p, start);
|
||||
break;
|
||||
|
||||
#ifdef USE_CALL
|
||||
case OP_CALL:
|
||||
addr = p->call.addr;
|
||||
fprintf(f, ":{/%d}", addr);
|
||||
break;
|
||||
#endif
|
||||
|
||||
case OP_PUSH_SAVE_VAL:
|
||||
{
|
||||
@ -621,7 +633,9 @@ print_compiled_byte_code(FILE* f, regex_t* reg, int index,
|
||||
case OP_ATOMIC_START:
|
||||
case OP_ATOMIC_END:
|
||||
case OP_LOOK_BEHIND_NOT_END:
|
||||
#ifdef USE_CALL
|
||||
case OP_RETURN:
|
||||
#endif
|
||||
break;
|
||||
|
||||
default:
|
||||
@ -957,7 +971,7 @@ onig_region_copy(OnigRegion* to, OnigRegion* from)
|
||||
result = ONIGERR_INVALID_ARGUMENT;\
|
||||
}\
|
||||
best_len = result;\
|
||||
goto finish;\
|
||||
goto match_at_end;\
|
||||
break;\
|
||||
}\
|
||||
} while(0)
|
||||
@ -979,18 +993,26 @@ onig_region_copy(OnigRegion* to, OnigRegion* from)
|
||||
/* handled by normal-POP */
|
||||
#define STK_MEM_START 0x0010
|
||||
#define STK_MEM_END 0x8030
|
||||
#define STK_REPEAT_INC 0x0050
|
||||
#ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
|
||||
#define STK_REPEAT_INC (0x0040 | STK_MASK_POP_HANDLED)
|
||||
#else
|
||||
#define STK_REPEAT_INC 0x0040
|
||||
#endif
|
||||
#ifdef USE_CALLOUT
|
||||
#define STK_CALLOUT 0x0070
|
||||
#endif
|
||||
|
||||
/* avoided by normal-POP */
|
||||
#define STK_VOID 0x0000 /* for fill a blank */
|
||||
#ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
|
||||
#define STK_EMPTY_CHECK_START (0x3000 | STK_MASK_POP_HANDLED)
|
||||
#else
|
||||
#define STK_EMPTY_CHECK_START 0x3000
|
||||
#endif
|
||||
#define STK_EMPTY_CHECK_END 0x5000 /* for recursive call */
|
||||
#define STK_MEM_END_MARK 0x8100
|
||||
#define STK_TO_VOID_START 0x1200 /* mark for "(?>...)" */
|
||||
#define STK_REPEAT 0x0300
|
||||
/* #define STK_REPEAT 0x0300 */
|
||||
#define STK_CALL_FRAME 0x0400
|
||||
#define STK_RETURN 0x0500
|
||||
#define STK_SAVE_VAL 0x0600
|
||||
@ -1016,11 +1038,10 @@ typedef struct _StackType {
|
||||
UChar* pstr_prev; /* previous char position of pstr */
|
||||
} state;
|
||||
struct {
|
||||
int count; /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */
|
||||
Operation* pcode; /* byte code position (head of repeated target) */
|
||||
} repeat;
|
||||
struct {
|
||||
StackIndex si; /* index of stack */
|
||||
int count;
|
||||
#ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
|
||||
StackIndex prev_index; /* index of stack */
|
||||
#endif
|
||||
} repeat_inc;
|
||||
struct {
|
||||
UChar *pstr; /* start/end position */
|
||||
@ -1029,7 +1050,10 @@ typedef struct _StackType {
|
||||
StackIndex prev_end; /* prev. info (for backtrack "(...)*" ) */
|
||||
} mem;
|
||||
struct {
|
||||
UChar *pstr; /* start position */
|
||||
UChar *pstr; /* start position */
|
||||
#ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
|
||||
StackIndex prev_index; /* index of stack */
|
||||
#endif
|
||||
} empty_check;
|
||||
#ifdef USE_CALL
|
||||
struct {
|
||||
@ -1075,6 +1099,41 @@ struct OnigCalloutArgsStruct {
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
|
||||
|
||||
#define PTR_NUM_SIZE(reg) ((reg)->num_repeat + (reg)->num_empty_check + ((reg)->num_mem + 1) * 2)
|
||||
#define UPDATE_FOR_STACK_REALLOC do{\
|
||||
repeat_stk = (StackIndex* )alloc_base;\
|
||||
empty_check_stk = (StackIndex* )(repeat_stk + reg->num_repeat);\
|
||||
mem_start_stk = (StackIndex* )(empty_check_stk + reg->num_empty_check);\
|
||||
mem_end_stk = mem_start_stk + num_mem + 1;\
|
||||
} while(0)
|
||||
|
||||
#define SAVE_REPEAT_STK_VAR(sid) stk->u.repeat_inc.prev_index = repeat_stk[sid]
|
||||
#define LOAD_TO_REPEAT_STK_VAR(sid) repeat_stk[sid] = GET_STACK_INDEX(stk)
|
||||
#define POP_REPEAT_INC else if (stk->type == STK_REPEAT_INC) {repeat_stk[stk->zid] = stk->u.repeat_inc.prev_index;}
|
||||
|
||||
#define SAVE_EMPTY_CHECK_STK_VAR(sid) stk->u.empty_check.prev_index = empty_check_stk[sid]
|
||||
#define LOAD_TO_EMPTY_CHECK_STK_VAR(sid) empty_check_stk[sid] = GET_STACK_INDEX(stk)
|
||||
#define POP_EMPTY_CHECK_START else if (stk->type == STK_EMPTY_CHECK_START) {empty_check_stk[stk->zid] = stk->u.empty_check.prev_index;}
|
||||
|
||||
#else
|
||||
|
||||
#define PTR_NUM_SIZE(reg) (((reg)->num_mem + 1) * 2)
|
||||
#define UPDATE_FOR_STACK_REALLOC do{\
|
||||
mem_start_stk = (StackIndex* )alloc_base;\
|
||||
mem_end_stk = mem_start_stk + num_mem + 1;\
|
||||
} while(0)
|
||||
|
||||
#define SAVE_REPEAT_STK_VAR(sid)
|
||||
#define LOAD_TO_REPEAT_STK_VAR(sid)
|
||||
#define POP_REPEAT_INC
|
||||
|
||||
#define SAVE_EMPTY_CHECK_STK_VAR(sid)
|
||||
#define LOAD_TO_EMPTY_CHECK_STK_VAR(sid)
|
||||
#define POP_EMPTY_CHECK_START
|
||||
|
||||
#endif /* USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR */
|
||||
|
||||
#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
|
||||
#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mpv) do { \
|
||||
@ -1086,7 +1145,7 @@ struct OnigCalloutArgsStruct {
|
||||
(msa).retry_limit_in_match = (mpv)->retry_limit_in_match;\
|
||||
(msa).mp = mpv;\
|
||||
(msa).best_len = ONIG_MISMATCH;\
|
||||
(msa).ptr_num = (reg)->num_repeat + ((reg)->num_mem + 1) * 2; \
|
||||
(msa).ptr_num = PTR_NUM_SIZE(reg);\
|
||||
} while(0)
|
||||
#else
|
||||
#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mpv) do { \
|
||||
@ -1097,7 +1156,7 @@ struct OnigCalloutArgsStruct {
|
||||
(msa).match_stack_limit = (mpv)->match_stack_limit;\
|
||||
(msa).retry_limit_in_match = (mpv)->retry_limit_in_match;\
|
||||
(msa).mp = mpv;\
|
||||
(msa).ptr_num = (reg)->num_repeat + ((reg)->num_mem + 1) * 2; \
|
||||
(msa).ptr_num = PTR_NUM_SIZE(reg);\
|
||||
} while(0)
|
||||
#endif
|
||||
|
||||
@ -1152,12 +1211,6 @@ struct OnigCalloutArgsStruct {
|
||||
};\
|
||||
} while(0)
|
||||
|
||||
#define UPDATE_FOR_STACK_REALLOC do{\
|
||||
repeat_stk = (StackIndex* )alloc_base;\
|
||||
mem_start_stk = (StackIndex* )(repeat_stk + reg->num_repeat);\
|
||||
mem_end_stk = mem_start_stk + num_mem + 1;\
|
||||
} while(0)
|
||||
|
||||
static unsigned int MatchStackLimit = DEFAULT_MATCH_STACK_LIMIT_SIZE;
|
||||
|
||||
extern unsigned int
|
||||
@ -1178,7 +1231,9 @@ onig_set_match_stack_limit_size(unsigned int size)
|
||||
static unsigned long RetryLimitInMatch = DEFAULT_RETRY_LIMIT_IN_MATCH;
|
||||
|
||||
#define CHECK_RETRY_LIMIT_IN_MATCH do {\
|
||||
if (retry_in_match_counter++ > retry_limit_in_match) goto retry_limit_in_match_over;\
|
||||
if (retry_in_match_counter++ > retry_limit_in_match) {\
|
||||
MATCH_AT_ERROR_RETURN(ONIGERR_RETRY_LIMIT_IN_MATCH_OVER);\
|
||||
}\
|
||||
} while (0)
|
||||
|
||||
#else
|
||||
@ -1568,19 +1623,23 @@ stack_double(int is_alloca, char** arg_alloc_base,
|
||||
#define STACK_PUSH_ALT_LOOK_BEHIND_NOT(pat,s,sprev) \
|
||||
STACK_PUSH(STK_ALT_LOOK_BEHIND_NOT,pat,s,sprev)
|
||||
|
||||
#if 0
|
||||
#define STACK_PUSH_REPEAT(sid, pat) do {\
|
||||
STACK_ENSURE(1);\
|
||||
stk->type = STK_REPEAT;\
|
||||
stk->zid = (sid);\
|
||||
stk->u.repeat.pcode = (pat);\
|
||||
stk->u.repeat.count = 0;\
|
||||
stk->u.repeat.pcode = (pat);\
|
||||
STACK_INC;\
|
||||
} while(0)
|
||||
#endif
|
||||
|
||||
#define STACK_PUSH_REPEAT_INC(sindex) do {\
|
||||
#define STACK_PUSH_REPEAT_INC(sid, ct) do {\
|
||||
STACK_ENSURE(1);\
|
||||
stk->type = STK_REPEAT_INC;\
|
||||
stk->u.repeat_inc.si = (sindex);\
|
||||
stk->zid = (sid);\
|
||||
stk->u.repeat_inc.count = (ct);\
|
||||
SAVE_REPEAT_STK_VAR(sid);\
|
||||
LOAD_TO_REPEAT_STK_VAR(sid);\
|
||||
STACK_INC;\
|
||||
} while(0)
|
||||
|
||||
@ -1653,6 +1712,8 @@ stack_double(int is_alloca, char** arg_alloc_base,
|
||||
stk->type = STK_EMPTY_CHECK_START;\
|
||||
stk->zid = (cnum);\
|
||||
stk->u.empty_check.pstr = (s);\
|
||||
SAVE_EMPTY_CHECK_STK_VAR(cnum);\
|
||||
LOAD_TO_EMPTY_CHECK_STK_VAR(cnum);\
|
||||
STACK_INC;\
|
||||
} while(0)
|
||||
|
||||
@ -1790,7 +1851,7 @@ stack_double(int is_alloca, char** arg_alloc_base,
|
||||
#define STACK_BASE_CHECK(p, at) \
|
||||
if ((p) < stk_base) {\
|
||||
fprintf(stderr, "at %s\n", at);\
|
||||
goto stack_error;\
|
||||
MATCH_AT_ERROR_RETURN(ONIGERR_STACK_BUG);\
|
||||
}
|
||||
#else
|
||||
#define STACK_BASE_CHECK(p, at)
|
||||
@ -1841,13 +1902,12 @@ stack_double(int is_alloca, char** arg_alloc_base,
|
||||
mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
|
||||
mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
|
||||
}\
|
||||
else if (stk->type == STK_REPEAT_INC) {\
|
||||
STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
|
||||
}\
|
||||
else if (stk->type == STK_MEM_END) {\
|
||||
mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
|
||||
mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
|
||||
}\
|
||||
POP_REPEAT_INC \
|
||||
POP_EMPTY_CHECK_START \
|
||||
POP_CALLOUT_CASE\
|
||||
}\
|
||||
}\
|
||||
@ -1866,13 +1926,12 @@ stack_double(int is_alloca, char** arg_alloc_base,
|
||||
mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
|
||||
mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
|
||||
}\
|
||||
else if (stk->type == STK_REPEAT_INC) {\
|
||||
STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
|
||||
}\
|
||||
else if (stk->type == STK_MEM_END) {\
|
||||
mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
|
||||
mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
|
||||
}\
|
||||
POP_REPEAT_INC \
|
||||
POP_EMPTY_CHECK_START \
|
||||
/* Don't call callout here because negation of total success by (?!..) (?<!..) */\
|
||||
}\
|
||||
}\
|
||||
@ -1924,20 +1983,41 @@ stack_double(int is_alloca, char** arg_alloc_base,
|
||||
}\
|
||||
} while(0)
|
||||
|
||||
#define STACK_EMPTY_CHECK(isnull,sid,s) do {\
|
||||
StackType* k = stk;\
|
||||
|
||||
#define EMPTY_CHECK_START_SEARCH(sid, k) do {\
|
||||
k = stk;\
|
||||
while (1) {\
|
||||
k--;\
|
||||
STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK"); \
|
||||
STACK_BASE_CHECK(k, "EMPTY_CHECK_START_SEARCH"); \
|
||||
if (k->type == STK_EMPTY_CHECK_START) {\
|
||||
if (k->zid == (sid)) {\
|
||||
(isnull) = (k->u.empty_check.pstr == (s));\
|
||||
break;\
|
||||
}\
|
||||
if (k->zid == (sid)) break;\
|
||||
}\
|
||||
}\
|
||||
} while(0)
|
||||
|
||||
#ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
|
||||
|
||||
#define GET_EMPTY_CHECK_START(sid, k) do {\
|
||||
if (reg->num_call == 0) {\
|
||||
k = STACK_AT(empty_check_stk[sid]);\
|
||||
}\
|
||||
else {\
|
||||
EMPTY_CHECK_START_SEARCH(sid, k);\
|
||||
}\
|
||||
} while(0)
|
||||
#else
|
||||
|
||||
#define GET_EMPTY_CHECK_START(sid, k) EMPTY_CHECK_START_SEARCH(sid, k)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#define STACK_EMPTY_CHECK(isnull, sid, s) do {\
|
||||
StackType* k;\
|
||||
GET_EMPTY_CHECK_START(sid, k);\
|
||||
(isnull) = (k->u.empty_check.pstr == (s));\
|
||||
} while(0)
|
||||
|
||||
#define STACK_MEM_START_GET_PREV_END_ADDR(k /* STK_MEM_START*/, reg, addr) do {\
|
||||
if (k->u.mem.prev_end == INVALID_STACK_INDEX) {\
|
||||
(addr) = 0;\
|
||||
@ -1951,39 +2031,30 @@ stack_double(int is_alloca, char** arg_alloc_base,
|
||||
} while (0)
|
||||
|
||||
#ifdef USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT
|
||||
#define STACK_EMPTY_CHECK_MEM(isnull,sid,s,reg) do {\
|
||||
StackType* k = stk;\
|
||||
while (1) {\
|
||||
k--;\
|
||||
STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_MEM"); \
|
||||
if (k->type == STK_EMPTY_CHECK_START) {\
|
||||
if (k->zid == (sid)) {\
|
||||
if (k->u.empty_check.pstr != (s)) {\
|
||||
(isnull) = 0;\
|
||||
break;\
|
||||
#define STACK_EMPTY_CHECK_MEM(isnull, sid, s, reg) do {\
|
||||
StackType* k;\
|
||||
GET_EMPTY_CHECK_START(sid, k);\
|
||||
if (k->u.empty_check.pstr != (s)) {\
|
||||
(isnull) = 0;\
|
||||
}\
|
||||
else {\
|
||||
UChar* endp;\
|
||||
(isnull) = 1;\
|
||||
while (k < stk) {\
|
||||
if (k->type == STK_MEM_START &&\
|
||||
MEM_STATUS_LIMIT_AT((reg)->empty_status_mem, k->zid)) {\
|
||||
STACK_MEM_START_GET_PREV_END_ADDR(k, reg, endp);\
|
||||
if (endp == 0) {\
|
||||
(isnull) = 0; break;\
|
||||
}\
|
||||
else {\
|
||||
UChar* endp;\
|
||||
(isnull) = 1;\
|
||||
while (k < stk) {\
|
||||
if (k->type == STK_MEM_START &&\
|
||||
MEM_STATUS_LIMIT_AT((reg)->empty_status_mem, k->zid)) {\
|
||||
STACK_MEM_START_GET_PREV_END_ADDR(k, reg, endp);\
|
||||
if (endp == 0) {\
|
||||
(isnull) = 0; break;\
|
||||
}\
|
||||
else if (STACK_AT(k->u.mem.prev_start)->u.mem.pstr != endp) {\
|
||||
(isnull) = 0; break;\
|
||||
}\
|
||||
else if (endp != s) {\
|
||||
(isnull) = -1; /* empty, but position changed */ \
|
||||
}\
|
||||
}\
|
||||
k++;\
|
||||
}\
|
||||
break;\
|
||||
else if (STACK_AT(k->u.mem.prev_start)->u.mem.pstr != endp) {\
|
||||
(isnull) = 0; break;\
|
||||
}\
|
||||
else if (endp != s) {\
|
||||
(isnull) = -1; /* empty, but position changed */ \
|
||||
}\
|
||||
}\
|
||||
k++;\
|
||||
}\
|
||||
}\
|
||||
} while(0)
|
||||
@ -2064,24 +2135,45 @@ stack_double(int is_alloca, char** arg_alloc_base,
|
||||
} while(0)
|
||||
#endif /* USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT */
|
||||
|
||||
#define STACK_GET_REPEAT(sid, k) do {\
|
||||
int level = 0;\
|
||||
k = stk;\
|
||||
#define STACK_GET_REPEAT_COUNT_SEARCH(sid, c) do {\
|
||||
StackType* k = stk;\
|
||||
while (1) {\
|
||||
k--;\
|
||||
STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \
|
||||
if (k->type == STK_REPEAT) {\
|
||||
if (level == 0) {\
|
||||
if (k->zid == (sid)) {\
|
||||
break;\
|
||||
}\
|
||||
(k)--;\
|
||||
STACK_BASE_CHECK(k, "STACK_GET_REPEAT_COUNT_SEARCH");\
|
||||
if ((k)->type == STK_REPEAT_INC) {\
|
||||
if ((k)->zid == (sid)) {\
|
||||
(c) = (k)->u.repeat_inc.count;\
|
||||
break;\
|
||||
}\
|
||||
}\
|
||||
else if ((k)->type == STK_RETURN) {\
|
||||
int level = -1;\
|
||||
while (1) {\
|
||||
(k)--;\
|
||||
if ((k)->type == STK_CALL_FRAME) {\
|
||||
level++;\
|
||||
if (level == 0) break;\
|
||||
}\
|
||||
else if ((k)->type == STK_RETURN) level--;\
|
||||
}\
|
||||
}\
|
||||
else if (k->type == STK_CALL_FRAME) level--;\
|
||||
else if (k->type == STK_RETURN) level++;\
|
||||
}\
|
||||
} while(0)
|
||||
|
||||
#ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
|
||||
|
||||
#define STACK_GET_REPEAT_COUNT(sid, c) do {\
|
||||
if (reg->num_call == 0) {\
|
||||
(c) = (STACK_AT(repeat_stk[sid]))->u.repeat_inc.count;\
|
||||
}\
|
||||
else {\
|
||||
STACK_GET_REPEAT_COUNT_SEARCH(sid, c);\
|
||||
}\
|
||||
} while(0)
|
||||
#else
|
||||
#define STACK_GET_REPEAT_COUNT(sid, c) STACK_GET_REPEAT_COUNT_SEARCH(sid, c)
|
||||
#endif
|
||||
|
||||
#define STACK_RETURN(addr) do {\
|
||||
int level = 0;\
|
||||
StackType* k = stk;\
|
||||
@ -2483,6 +2575,8 @@ typedef struct {
|
||||
#define MATCH_DEBUG_OUT(offset)
|
||||
#endif
|
||||
|
||||
#define MATCH_AT_ERROR_RETURN(err_code) best_len = err_code; goto match_at_end
|
||||
|
||||
|
||||
/* match data(str - end) from position (sstart). */
|
||||
/* if sstart == str then set sprev to NULL. */
|
||||
@ -2556,9 +2650,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
||||
&&L_MEM_START,
|
||||
&&L_MEM_START_PUSH,
|
||||
&&L_MEM_END_PUSH,
|
||||
#ifdef USE_CALL
|
||||
&&L_MEM_END_PUSH_REC,
|
||||
#endif
|
||||
&&L_MEM_END,
|
||||
#ifdef USE_CALL
|
||||
&&L_MEM_END_REC,
|
||||
#endif
|
||||
&&L_FAIL,
|
||||
&&L_JUMP,
|
||||
&&L_PUSH,
|
||||
@ -2572,12 +2670,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
||||
&&L_REPEAT_NG,
|
||||
&&L_REPEAT_INC,
|
||||
&&L_REPEAT_INC_NG,
|
||||
&&L_REPEAT_INC_SG,
|
||||
&&L_REPEAT_INC_NG_SG,
|
||||
&&L_EMPTY_CHECK_START,
|
||||
&&L_EMPTY_CHECK_END,
|
||||
&&L_EMPTY_CHECK_END_MEMST,
|
||||
#ifdef USE_CALL
|
||||
&&L_EMPTY_CHECK_END_MEMST_PUSH,
|
||||
#endif
|
||||
&&L_PREC_READ_START,
|
||||
&&L_PREC_READ_END,
|
||||
&&L_PREC_READ_NOT_START,
|
||||
@ -2587,10 +2685,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
||||
&&L_LOOK_BEHIND,
|
||||
&&L_LOOK_BEHIND_NOT_START,
|
||||
&&L_LOOK_BEHIND_NOT_END,
|
||||
&&L_CALL,
|
||||
&&L_RETURN,
|
||||
&&L_PUSH_SAVE_VAL,
|
||||
&&L_UPDATE_VAR,
|
||||
#ifdef USE_CALL
|
||||
&&L_CALL,
|
||||
&&L_RETURN,
|
||||
#endif
|
||||
#ifdef USE_CALLOUT
|
||||
&&L_CALLOUT_CONTENTS,
|
||||
&&L_CALLOUT_NAME,
|
||||
@ -2608,15 +2708,17 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
||||
char *alloc_base;
|
||||
StackType *stk_base, *stk, *stk_end;
|
||||
StackType *stkp; /* used as any purpose. */
|
||||
StackIndex si;
|
||||
StackIndex *repeat_stk;
|
||||
StackIndex *mem_start_stk, *mem_end_stk;
|
||||
UChar* keep;
|
||||
|
||||
#ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
|
||||
StackIndex *repeat_stk;
|
||||
StackIndex *empty_check_stk;
|
||||
#endif
|
||||
#ifdef USE_RETRY_LIMIT_IN_MATCH
|
||||
unsigned long retry_limit_in_match;
|
||||
unsigned long retry_in_match_counter;
|
||||
#endif
|
||||
|
||||
#ifdef USE_CALLOUT
|
||||
int of;
|
||||
#endif
|
||||
@ -2745,10 +2847,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
||||
stkp = stk_base;
|
||||
r = make_capture_history_tree(region->history_root, &stkp,
|
||||
stk, (UChar* )str, reg);
|
||||
if (r < 0) {
|
||||
best_len = r; /* error code */
|
||||
goto finish;
|
||||
}
|
||||
if (r < 0) MATCH_AT_ERROR_RETURN(r);
|
||||
}
|
||||
#endif /* USE_CAPTURE_HISTORY */
|
||||
#ifdef USE_POSIX_API_REGION_OPTION
|
||||
@ -2773,7 +2872,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
||||
}
|
||||
|
||||
/* default behavior: return first-matching result. */
|
||||
goto finish;
|
||||
goto match_at_end;
|
||||
|
||||
CASE_OP(EXACT1)
|
||||
DATA_ENSURE(1);
|
||||
@ -3293,7 +3392,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
goto bytecode_error;
|
||||
MATCH_AT_ERROR_RETURN(ONIGERR_UNDEFINED_BYTECODE);
|
||||
break;
|
||||
}
|
||||
|
||||
@ -3419,13 +3518,17 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
||||
|
||||
#ifdef USE_CALL
|
||||
CASE_OP(MEM_END_PUSH_REC)
|
||||
mem = p->memory_end.num;
|
||||
STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */
|
||||
si = GET_STACK_INDEX(stkp);
|
||||
STACK_PUSH_MEM_END(mem, s);
|
||||
mem_start_stk[mem] = si;
|
||||
INC_OP;
|
||||
JUMP_OUT;
|
||||
{
|
||||
StackIndex si;
|
||||
|
||||
mem = p->memory_end.num;
|
||||
STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */
|
||||
si = GET_STACK_INDEX(stkp);
|
||||
STACK_PUSH_MEM_END(mem, s);
|
||||
mem_start_stk[mem] = si;
|
||||
INC_OP;
|
||||
JUMP_OUT;
|
||||
}
|
||||
|
||||
CASE_OP(MEM_END_REC)
|
||||
mem = p->memory_end.num;
|
||||
@ -3655,12 +3758,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
||||
case OP_PUSH:
|
||||
case OP_REPEAT_INC:
|
||||
case OP_REPEAT_INC_NG:
|
||||
case OP_REPEAT_INC_SG:
|
||||
case OP_REPEAT_INC_NG_SG:
|
||||
INC_OP;
|
||||
break;
|
||||
default:
|
||||
goto unexpected_bytecode_error;
|
||||
MATCH_AT_ERROR_RETURN(ONIGERR_UNEXPECTED_BYTECODE);
|
||||
break;
|
||||
}
|
||||
#else
|
||||
@ -3776,10 +3877,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
||||
mem = p->repeat.id; /* mem: OP_REPEAT ID */
|
||||
addr = p->repeat.addr;
|
||||
|
||||
STACK_ENSURE(1);
|
||||
repeat_stk[mem] = GET_STACK_INDEX(stk);
|
||||
STACK_PUSH_REPEAT(mem, p + 1);
|
||||
|
||||
STACK_PUSH_REPEAT_INC(mem, 0);
|
||||
if (reg->repeat_range[mem].lower == 0) {
|
||||
STACK_PUSH_ALT(p + addr, s, sprev);
|
||||
}
|
||||
@ -3790,10 +3888,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
||||
mem = p->repeat.id; /* mem: OP_REPEAT ID */
|
||||
addr = p->repeat.addr;
|
||||
|
||||
STACK_ENSURE(1);
|
||||
repeat_stk[mem] = GET_STACK_INDEX(stk);
|
||||
STACK_PUSH_REPEAT(mem, p + 1);
|
||||
|
||||
STACK_PUSH_REPEAT_INC(mem, 0);
|
||||
if (reg->repeat_range[mem].lower == 0) {
|
||||
STACK_PUSH_ALT(p + 1, s, sprev);
|
||||
p += addr;
|
||||
@ -3804,64 +3899,42 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
||||
|
||||
CASE_OP(REPEAT_INC)
|
||||
mem = p->repeat_inc.id; /* mem: OP_REPEAT ID */
|
||||
si = repeat_stk[mem];
|
||||
stkp = STACK_AT(si);
|
||||
|
||||
repeat_inc:
|
||||
stkp->u.repeat.count++;
|
||||
if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) {
|
||||
STACK_GET_REPEAT_COUNT(mem, n);
|
||||
n++;
|
||||
if (n >= reg->repeat_range[mem].upper) {
|
||||
/* end of repeat. Nothing to do. */
|
||||
INC_OP;
|
||||
}
|
||||
else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
|
||||
else if (n >= reg->repeat_range[mem].lower) {
|
||||
INC_OP;
|
||||
STACK_PUSH_ALT(p, s, sprev);
|
||||
p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */
|
||||
p = reg->repeat_range[mem].u.pcode;
|
||||
}
|
||||
else {
|
||||
p = stkp->u.repeat.pcode;
|
||||
p = reg->repeat_range[mem].u.pcode;
|
||||
}
|
||||
STACK_PUSH_REPEAT_INC(si);
|
||||
STACK_PUSH_REPEAT_INC(mem, n);
|
||||
CHECK_INTERRUPT_JUMP_OUT;
|
||||
|
||||
CASE_OP(REPEAT_INC_SG)
|
||||
mem = p->repeat_inc.id; /* mem: OP_REPEAT ID */
|
||||
STACK_GET_REPEAT(mem, stkp);
|
||||
si = GET_STACK_INDEX(stkp);
|
||||
goto repeat_inc;
|
||||
|
||||
CASE_OP(REPEAT_INC_NG)
|
||||
mem = p->repeat_inc.id; /* mem: OP_REPEAT ID */
|
||||
si = repeat_stk[mem];
|
||||
stkp = STACK_AT(si);
|
||||
|
||||
repeat_inc_ng:
|
||||
stkp->u.repeat.count++;
|
||||
if (stkp->u.repeat.count < reg->repeat_range[mem].upper) {
|
||||
if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
|
||||
Operation* pcode = stkp->u.repeat.pcode;
|
||||
|
||||
STACK_PUSH_REPEAT_INC(si);
|
||||
STACK_PUSH_ALT(pcode, s, sprev);
|
||||
STACK_GET_REPEAT_COUNT(mem, n);
|
||||
n++;
|
||||
STACK_PUSH_REPEAT_INC(mem, n);
|
||||
if (n == reg->repeat_range[mem].upper) {
|
||||
INC_OP;
|
||||
}
|
||||
else {
|
||||
if (n >= reg->repeat_range[mem].lower) {
|
||||
STACK_PUSH_ALT(reg->repeat_range[mem].u.pcode, s, sprev);
|
||||
INC_OP;
|
||||
}
|
||||
else {
|
||||
p = stkp->u.repeat.pcode;
|
||||
STACK_PUSH_REPEAT_INC(si);
|
||||
p = reg->repeat_range[mem].u.pcode;
|
||||
}
|
||||
}
|
||||
else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
|
||||
STACK_PUSH_REPEAT_INC(si);
|
||||
INC_OP;
|
||||
}
|
||||
CHECK_INTERRUPT_JUMP_OUT;
|
||||
|
||||
CASE_OP(REPEAT_INC_NG_SG)
|
||||
mem = p->repeat_inc.id; /* mem: OP_REPEAT ID */
|
||||
STACK_GET_REPEAT(mem, stkp);
|
||||
si = GET_STACK_INDEX(stkp);
|
||||
goto repeat_inc_ng;
|
||||
|
||||
CASE_OP(PREC_READ_START)
|
||||
STACK_PUSH_PREC_READ_START(s, sprev);
|
||||
INC_OP;
|
||||
@ -4040,7 +4113,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
||||
call_result = ONIGERR_INVALID_ARGUMENT;
|
||||
}
|
||||
best_len = call_result;
|
||||
goto finish;
|
||||
goto match_at_end;
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -4066,7 +4139,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
||||
#endif
|
||||
|
||||
CASE_OP(FINISH)
|
||||
goto finish;
|
||||
goto match_at_end;
|
||||
|
||||
#ifdef ONIG_DEBUG_STATISTICS
|
||||
fail:
|
||||
@ -4087,35 +4160,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
||||
JUMP_OUT;
|
||||
|
||||
DEFAULT_OP
|
||||
goto bytecode_error;
|
||||
MATCH_AT_ERROR_RETURN(ONIGERR_UNDEFINED_BYTECODE);
|
||||
|
||||
} BYTECODE_INTERPRETER_END;
|
||||
|
||||
finish:
|
||||
match_at_end:
|
||||
STACK_SAVE;
|
||||
return best_len;
|
||||
|
||||
#ifdef ONIG_DEBUG
|
||||
stack_error:
|
||||
STACK_SAVE;
|
||||
return ONIGERR_STACK_BUG;
|
||||
#endif
|
||||
|
||||
bytecode_error:
|
||||
STACK_SAVE;
|
||||
return ONIGERR_UNDEFINED_BYTECODE;
|
||||
|
||||
#if defined(ONIG_DEBUG) && !defined(USE_DIRECT_THREADED_CODE)
|
||||
unexpected_bytecode_error:
|
||||
STACK_SAVE;
|
||||
return ONIGERR_UNEXPECTED_BYTECODE;
|
||||
#endif
|
||||
|
||||
#ifdef USE_RETRY_LIMIT_IN_MATCH
|
||||
retry_limit_in_match_over:
|
||||
STACK_SAVE;
|
||||
return ONIGERR_RETRY_LIMIT_IN_MATCH_OVER;
|
||||
#endif
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
@ -4789,60 +4840,6 @@ sunday_quick_search(regex_t* reg, const UChar* target, const UChar* target_end,
|
||||
return (UChar* )NULL;
|
||||
}
|
||||
|
||||
static UChar*
|
||||
sunday_quick_search_case_fold(regex_t* reg,
|
||||
const UChar* target, const UChar* target_end,
|
||||
const UChar* text, const UChar* text_end,
|
||||
const UChar* text_range)
|
||||
{
|
||||
const UChar *s, *se, *end;
|
||||
const UChar *tail;
|
||||
int skip, tlen1;
|
||||
int map_offset;
|
||||
int case_fold_flag;
|
||||
OnigEncoding enc;
|
||||
|
||||
#ifdef ONIG_DEBUG_SEARCH
|
||||
fprintf(stderr,
|
||||
"sunday_quick_search_case_fold: text: %p, text_end: %p, text_range: %p\n", text, text_end, text_range);
|
||||
#endif
|
||||
|
||||
enc = reg->enc;
|
||||
case_fold_flag = reg->case_fold_flag;
|
||||
|
||||
tail = target_end - 1;
|
||||
tlen1 = (int )(tail - target);
|
||||
end = text_range;
|
||||
if (end + tlen1 > text_end)
|
||||
end = text_end - tlen1;
|
||||
|
||||
map_offset = reg->map_offset;
|
||||
s = text;
|
||||
|
||||
while (s < end) {
|
||||
if (str_lower_case_match(enc, case_fold_flag, target, target_end,
|
||||
s, text_end))
|
||||
return (UChar* )s;
|
||||
|
||||
se = s + tlen1;
|
||||
if (se + map_offset >= text_end) break;
|
||||
skip = reg->map[*(se + map_offset)];
|
||||
#if 0
|
||||
p = s;
|
||||
do {
|
||||
s += enclen(enc, s);
|
||||
} while ((s - p) < skip && s < end);
|
||||
#else
|
||||
/* This is faster than prev code for long text. ex: /(?i)Twain/ */
|
||||
s += skip;
|
||||
if (s < end)
|
||||
s = onigenc_get_right_adjust_char_head(enc, text, s);
|
||||
#endif
|
||||
}
|
||||
|
||||
return (UChar* )NULL;
|
||||
}
|
||||
|
||||
static UChar*
|
||||
map_search(OnigEncoding enc, UChar map[],
|
||||
const UChar* text, const UChar* text_range)
|
||||
@ -4956,11 +4953,6 @@ forward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* start,
|
||||
reg->exact, reg->exact_end, p, end, range);
|
||||
break;
|
||||
|
||||
case OPTIMIZE_STR_CASE_FOLD_FAST:
|
||||
p = sunday_quick_search_case_fold(reg, reg->exact, reg->exact_end, p, end,
|
||||
range);
|
||||
break;
|
||||
|
||||
case OPTIMIZE_STR_FAST:
|
||||
p = sunday_quick_search(reg, reg->exact, reg->exact_end, p, end, range);
|
||||
break;
|
||||
@ -5081,7 +5073,6 @@ backward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
|
||||
break;
|
||||
|
||||
case OPTIMIZE_STR_CASE_FOLD:
|
||||
case OPTIMIZE_STR_CASE_FOLD_FAST:
|
||||
p = slow_search_backward_ic(reg->enc, reg->case_fold_flag,
|
||||
reg->exact, reg->exact_end,
|
||||
range, adjrange, end, p);
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
/**********************************************************************
|
||||
regext.c - Oniguruma (regular expression library)
|
||||
encoding: UTF8
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2019 K.Kosako
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
/**********************************************************************
|
||||
reggnu.c - Oniguruma (regular expression library)
|
||||
encoding: UTF8
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2019 K.Kosako
|
||||
|
||||
@ -2,6 +2,7 @@
|
||||
#define REGINT_H
|
||||
/**********************************************************************
|
||||
regint.h - Oniguruma (regular expression library)
|
||||
encoding: UTF8
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2019 K.Kosako
|
||||
@ -47,13 +48,6 @@
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
|
||||
(defined(__ppc__) && defined(__APPLE__)) || \
|
||||
defined(__x86_64) || defined(__x86_64__) || \
|
||||
defined(__mc68020__)
|
||||
#define PLATFORM_UNALIGNED_WORD_ACCESS
|
||||
#endif
|
||||
|
||||
#ifndef ONIG_DISABLE_DIRECT_THREADING
|
||||
#ifdef __GNUC__
|
||||
#define USE_GOTO_LABELS_AS_VALUES
|
||||
@ -84,6 +78,8 @@
|
||||
#define USE_VARIABLE_META_CHARS
|
||||
#define USE_POSIX_API_REGION_OPTION
|
||||
#define USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
|
||||
/* #define USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR */
|
||||
|
||||
|
||||
#include "regenc.h"
|
||||
|
||||
@ -199,39 +195,6 @@ typedef unsigned int uintptr_t;
|
||||
#define CHAR_MAP_SIZE 256
|
||||
#define INFINITE_LEN ONIG_INFINITE_DISTANCE
|
||||
|
||||
#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
|
||||
|
||||
#define PLATFORM_GET_INC(val,p,type) do{\
|
||||
val = *(type* )p;\
|
||||
(p) += sizeof(type);\
|
||||
} while(0)
|
||||
|
||||
#else
|
||||
|
||||
#define PLATFORM_GET_INC(val,p,type) do{\
|
||||
xmemcpy(&val, (p), sizeof(type));\
|
||||
(p) += sizeof(type);\
|
||||
} while(0)
|
||||
|
||||
/* sizeof(OnigCodePoint) */
|
||||
#ifdef SIZEOF_SIZE_T
|
||||
# define WORD_ALIGNMENT_SIZE SIZEOF_SIZE_T
|
||||
#else
|
||||
# define WORD_ALIGNMENT_SIZE SIZEOF_LONG
|
||||
#endif
|
||||
|
||||
#define GET_ALIGNMENT_PAD_SIZE(addr,pad_size) do {\
|
||||
(pad_size) = WORD_ALIGNMENT_SIZE - ((uintptr_t )(addr) % WORD_ALIGNMENT_SIZE);\
|
||||
if ((pad_size) == WORD_ALIGNMENT_SIZE) (pad_size) = 0;\
|
||||
} while (0)
|
||||
|
||||
#define ALIGNMENT_RIGHT(addr) do {\
|
||||
(addr) += (WORD_ALIGNMENT_SIZE - 1);\
|
||||
(addr) -= ((uintptr_t )(addr) % WORD_ALIGNMENT_SIZE);\
|
||||
} while (0)
|
||||
|
||||
#endif /* PLATFORM_UNALIGNED_WORD_ACCESS */
|
||||
|
||||
|
||||
#ifdef USE_CALLOUT
|
||||
|
||||
@ -274,7 +237,6 @@ enum OptimizeType {
|
||||
OPTIMIZE_STR, /* Slow Search */
|
||||
OPTIMIZE_STR_FAST, /* Sunday quick search / BMH */
|
||||
OPTIMIZE_STR_FAST_STEP_FORWARD, /* Sunday quick search / BMH */
|
||||
OPTIMIZE_STR_CASE_FOLD_FAST, /* Sunday quick search / BMH (ignore case) */
|
||||
OPTIMIZE_STR_CASE_FOLD, /* Slow Search (ignore case) */
|
||||
OPTIMIZE_MAP /* char map */
|
||||
};
|
||||
@ -364,16 +326,12 @@ typedef unsigned int MemStatusType;
|
||||
/* bitset */
|
||||
#define BITS_PER_BYTE 8
|
||||
#define SINGLE_BYTE_SIZE (1 << BITS_PER_BYTE)
|
||||
#define BITS_IN_ROOM (sizeof(Bits) * BITS_PER_BYTE)
|
||||
#define BITS_IN_ROOM 32 /* 4 * BITS_PER_BYTE */
|
||||
#define BITSET_SIZE (SINGLE_BYTE_SIZE / BITS_IN_ROOM)
|
||||
|
||||
#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
|
||||
typedef unsigned int Bits;
|
||||
#else
|
||||
typedef unsigned char Bits;
|
||||
#endif
|
||||
typedef Bits BitSet[BITSET_SIZE];
|
||||
typedef Bits* BitSetRef;
|
||||
typedef uint32_t Bits;
|
||||
typedef Bits BitSet[BITSET_SIZE];
|
||||
typedef Bits* BitSetRef;
|
||||
|
||||
#define SIZE_BITSET sizeof(BitSet)
|
||||
|
||||
@ -382,8 +340,8 @@ typedef Bits* BitSetRef;
|
||||
for (i = 0; i < (int )BITSET_SIZE; i++) { (bs)[i] = 0; } \
|
||||
} while (0)
|
||||
|
||||
#define BS_ROOM(bs,pos) (bs)[pos / BITS_IN_ROOM]
|
||||
#define BS_BIT(pos) (1u << (pos % BITS_IN_ROOM))
|
||||
#define BS_ROOM(bs,pos) (bs)[(unsigned int )(pos) >> 5]
|
||||
#define BS_BIT(pos) (1u << ((unsigned int )(pos) & 0x1f))
|
||||
|
||||
#define BITSET_AT(bs, pos) (BS_ROOM(bs,pos) & BS_BIT(pos))
|
||||
#define BITSET_SET_BIT(bs, pos) BS_ROOM(bs,pos) |= BS_BIT(pos)
|
||||
@ -559,9 +517,13 @@ enum OpCode {
|
||||
OP_MEM_START,
|
||||
OP_MEM_START_PUSH, /* push back-tracker to stack */
|
||||
OP_MEM_END_PUSH, /* push back-tracker to stack */
|
||||
#ifdef USE_CALL
|
||||
OP_MEM_END_PUSH_REC, /* push back-tracker to stack */
|
||||
#endif
|
||||
OP_MEM_END,
|
||||
#ifdef USE_CALL
|
||||
OP_MEM_END_REC, /* push marker to stack */
|
||||
#endif
|
||||
OP_FAIL, /* pop stack and move */
|
||||
OP_JUMP,
|
||||
OP_PUSH,
|
||||
@ -575,12 +537,12 @@ enum OpCode {
|
||||
OP_REPEAT_NG, /* {n,m}? (non greedy) */
|
||||
OP_REPEAT_INC,
|
||||
OP_REPEAT_INC_NG, /* non greedy */
|
||||
OP_REPEAT_INC_SG, /* search and get in stack */
|
||||
OP_REPEAT_INC_NG_SG, /* search and get in stack (non greedy) */
|
||||
OP_EMPTY_CHECK_START, /* null loop checker start */
|
||||
OP_EMPTY_CHECK_END, /* null loop checker end */
|
||||
OP_EMPTY_CHECK_END_MEMST, /* null loop checker end (with capture status) */
|
||||
#ifdef USE_CALL
|
||||
OP_EMPTY_CHECK_END_MEMST_PUSH, /* with capture status and push check-end */
|
||||
#endif
|
||||
OP_PREC_READ_START, /* (?=...) start */
|
||||
OP_PREC_READ_END, /* (?=...) end */
|
||||
OP_PREC_READ_NOT_START, /* (?!...) start */
|
||||
@ -590,10 +552,12 @@ enum OpCode {
|
||||
OP_LOOK_BEHIND, /* (?<=...) start (no needs end opcode) */
|
||||
OP_LOOK_BEHIND_NOT_START, /* (?<!...) start */
|
||||
OP_LOOK_BEHIND_NOT_END, /* (?<!...) end */
|
||||
OP_CALL, /* \g<name> */
|
||||
OP_RETURN,
|
||||
OP_PUSH_SAVE_VAL,
|
||||
OP_UPDATE_VAR,
|
||||
#ifdef USE_CALL
|
||||
OP_CALL, /* \g<name> */
|
||||
OP_RETURN,
|
||||
#endif
|
||||
#ifdef USE_CALLOUT
|
||||
OP_CALLOUT_CONTENTS, /* (?{...}) (?{{...}}) */
|
||||
OP_CALLOUT_NAME, /* (*name) (*name[tag](args...)) */
|
||||
@ -642,23 +606,8 @@ typedef int ModeType;
|
||||
#define SIZE_UPDATE_VAR_TYPE sizeof(UpdateVarType)
|
||||
#define SIZE_MODE sizeof(ModeType)
|
||||
|
||||
#define GET_RELADDR_INC(addr,p) PLATFORM_GET_INC(addr, p, RelAddrType)
|
||||
#define GET_ABSADDR_INC(addr,p) PLATFORM_GET_INC(addr, p, AbsAddrType)
|
||||
#define GET_LENGTH_INC(len,p) PLATFORM_GET_INC(len, p, LengthType)
|
||||
#define GET_MEMNUM_INC(num,p) PLATFORM_GET_INC(num, p, MemNumType)
|
||||
#define GET_REPEATNUM_INC(num,p) PLATFORM_GET_INC(num, p, RepeatNumType)
|
||||
#define GET_OPTION_INC(option,p) PLATFORM_GET_INC(option, p, OnigOptionType)
|
||||
#define GET_POINTER_INC(ptr,p) PLATFORM_GET_INC(ptr, p, PointerType)
|
||||
#define GET_SAVE_TYPE_INC(type,p) PLATFORM_GET_INC(type, p, SaveType)
|
||||
#define GET_UPDATE_VAR_TYPE_INC(type,p) PLATFORM_GET_INC(type, p, UpdateVarType)
|
||||
#define GET_MODE_INC(mode,p) PLATFORM_GET_INC(mode, p, ModeType)
|
||||
|
||||
/* code point's address must be aligned address. */
|
||||
#define GET_CODE_POINT(code,p) code = *((OnigCodePoint* )(p))
|
||||
#define GET_BYTE_INC(byte,p) do{\
|
||||
byte = *(p);\
|
||||
(p)++;\
|
||||
} while(0)
|
||||
|
||||
|
||||
/* op-code + arg size */
|
||||
@ -838,7 +787,7 @@ typedef struct {
|
||||
} repeat; /* REPEAT, REPEAT_NG */
|
||||
struct {
|
||||
MemNumType id;
|
||||
} repeat_inc; /* REPEAT_INC, REPEAT_INC_SG, REPEAT_INC_NG, REPEAT_INC_NG_SG */
|
||||
} repeat_inc; /* REPEAT_INC, REPEAT_INC_NG */
|
||||
struct {
|
||||
MemNumType mem;
|
||||
} empty_check_start;
|
||||
@ -889,6 +838,15 @@ typedef struct {
|
||||
#endif
|
||||
} RegexExt;
|
||||
|
||||
typedef struct {
|
||||
int lower;
|
||||
int upper;
|
||||
union {
|
||||
Operation* pcode; /* address of repeated body */
|
||||
int offset;
|
||||
} u;
|
||||
} RepeatRange;
|
||||
|
||||
struct re_pattern_buffer {
|
||||
/* common members of BBuf(bytes-buffer) */
|
||||
Operation* ops;
|
||||
@ -903,15 +861,15 @@ struct re_pattern_buffer {
|
||||
|
||||
int num_mem; /* used memory(...) num counted from 1 */
|
||||
int num_repeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */
|
||||
int num_null_check; /* OP_EMPTY_CHECK_START/END id counter */
|
||||
int num_empty_check; /* OP_EMPTY_CHECK_START/END id counter */
|
||||
int num_call; /* number of subexp call */
|
||||
MemStatusType capture_history; /* (?@...) flag (1-31) */
|
||||
MemStatusType push_mem_start; /* need backtrack flag */
|
||||
MemStatusType push_mem_end; /* need backtrack flag */
|
||||
MemStatusType empty_status_mem;
|
||||
int stack_pop_level;
|
||||
int repeat_range_alloc;
|
||||
OnigRepeatRange* repeat_range;
|
||||
int repeat_range_alloc;
|
||||
RepeatRange* repeat_range;
|
||||
|
||||
OnigEncoding enc;
|
||||
OnigOptionType options;
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
/**********************************************************************
|
||||
regparse.c - Oniguruma (regular expression library)
|
||||
encoding: UTF8
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2019 K.Kosako
|
||||
@ -2173,7 +2174,7 @@ node_new_ctype(int type, int not, OnigOptionType options)
|
||||
static Node*
|
||||
node_new_anychar(void)
|
||||
{
|
||||
Node* node = node_new_ctype(CTYPE_ANYCHAR, 0, ONIG_OPTION_NONE);
|
||||
Node* node = node_new_ctype(CTYPE_ANYCHAR, FALSE, ONIG_OPTION_NONE);
|
||||
return node;
|
||||
}
|
||||
|
||||
@ -2691,7 +2692,7 @@ make_text_segment(Node** node, ScanEnv* env)
|
||||
ns[1] = NULL_NODE;
|
||||
|
||||
r = ONIGERR_MEMORY;
|
||||
ns[0] = onig_node_new_anchor(ANCR_NO_TEXT_SEGMENT_BOUNDARY, 0);
|
||||
ns[0] = onig_node_new_anchor(ANCR_NO_TEXT_SEGMENT_BOUNDARY, FALSE);
|
||||
if (IS_NULL(ns[0])) goto err;
|
||||
|
||||
r = node_new_true_anychar(&ns[1], env);
|
||||
@ -2702,7 +2703,7 @@ make_text_segment(Node** node, ScanEnv* env)
|
||||
ns[0] = x;
|
||||
ns[1] = NULL_NODE;
|
||||
|
||||
x = node_new_quantifier(0, INFINITE_REPEAT, 1);
|
||||
x = node_new_quantifier(0, INFINITE_REPEAT, TRUE);
|
||||
if (IS_NULL(x)) goto err;
|
||||
|
||||
NODE_BODY(x) = ns[0];
|
||||
@ -2771,7 +2772,7 @@ make_absent_engine(Node** node, int pre_save_right_id, Node* absent,
|
||||
|
||||
ns[0] = x;
|
||||
|
||||
x = node_new_quantifier(lower, upper, 0);
|
||||
x = node_new_quantifier(lower, upper, FALSE);
|
||||
if (IS_NULL(x)) goto err0;
|
||||
|
||||
NODE_BODY(x) = ns[0];
|
||||
@ -2800,7 +2801,7 @@ make_absent_engine(Node** node, int pre_save_right_id, Node* absent,
|
||||
x = make_alt(2, ns);
|
||||
if (IS_NULL(x)) goto err0;
|
||||
|
||||
if (is_range_cutter != 0)
|
||||
if (is_range_cutter != FALSE)
|
||||
NODE_STATUS_ADD(x, SUPER);
|
||||
|
||||
*node = x;
|
||||
@ -2890,7 +2891,10 @@ make_range_clear(Node** node, ScanEnv* env)
|
||||
|
||||
ns[0] = NULL_NODE; ns[1] = x;
|
||||
|
||||
r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_INIT, 0, env);
|
||||
#define ID_NOT_USED_DONT_CARE_ME 0
|
||||
|
||||
r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_INIT,
|
||||
ID_NOT_USED_DONT_CARE_ME, env);
|
||||
if (r != 0) goto err;
|
||||
|
||||
x = make_alt(2, ns);
|
||||
@ -3009,7 +3013,7 @@ make_absent_tree_for_simple_one_char_repeat(Node** node, Node* absent, Node* qua
|
||||
id1 = GIMMICK_(ns[0])->id;
|
||||
|
||||
r = make_absent_engine(&ns[1], id1, absent, body, lower, upper, possessive,
|
||||
0, env);
|
||||
FALSE, env);
|
||||
if (r != 0) goto err;
|
||||
|
||||
ns[2] = ns[3] = NULL_NODE;
|
||||
@ -3052,7 +3056,7 @@ make_absent_tree(Node** node, Node* absent, Node* expr, int is_range_cutter,
|
||||
|
||||
if (expr == NULL_NODE) {
|
||||
/* default expr \O* */
|
||||
quant = node_new_quantifier(0, INFINITE_REPEAT, 0);
|
||||
quant = node_new_quantifier(0, INFINITE_REPEAT, FALSE);
|
||||
if (IS_NULL(quant)) goto err0;
|
||||
|
||||
r = node_new_true_anychar(&body, env);
|
||||
@ -3178,16 +3182,6 @@ node_str_cat_char(Node* node, UChar c)
|
||||
return onig_node_str_cat(node, s, s + 1);
|
||||
}
|
||||
|
||||
extern void
|
||||
onig_node_conv_to_str_node(Node* node, int flag)
|
||||
{
|
||||
NODE_SET_TYPE(node, NODE_STRING);
|
||||
STR_(node)->flag = flag;
|
||||
STR_(node)->capacity = 0;
|
||||
STR_(node)->s = STR_(node)->buf;
|
||||
STR_(node)->end = STR_(node)->buf;
|
||||
}
|
||||
|
||||
extern void
|
||||
onig_node_str_clear(Node* node)
|
||||
{
|
||||
@ -3196,10 +3190,11 @@ onig_node_str_clear(Node* node)
|
||||
xfree(STR_(node)->s);
|
||||
}
|
||||
|
||||
STR_(node)->capacity = 0;
|
||||
STR_(node)->flag = 0;
|
||||
STR_(node)->s = STR_(node)->buf;
|
||||
STR_(node)->end = STR_(node)->buf;
|
||||
STR_(node)->capacity = 0;
|
||||
STR_(node)->case_min_len = 0;
|
||||
}
|
||||
|
||||
static Node*
|
||||
@ -3209,10 +3204,12 @@ node_new_str(const UChar* s, const UChar* end)
|
||||
CHECK_NULL_RETURN(node);
|
||||
|
||||
NODE_SET_TYPE(node, NODE_STRING);
|
||||
STR_(node)->capacity = 0;
|
||||
STR_(node)->flag = 0;
|
||||
STR_(node)->s = STR_(node)->buf;
|
||||
STR_(node)->end = STR_(node)->buf;
|
||||
STR_(node)->capacity = 0;
|
||||
STR_(node)->case_min_len = 0;
|
||||
|
||||
if (onig_node_str_cat(node, s, end)) {
|
||||
onig_node_free(node);
|
||||
return NULL;
|
||||
@ -3227,11 +3224,11 @@ onig_node_new_str(const UChar* s, const UChar* end)
|
||||
}
|
||||
|
||||
static Node*
|
||||
node_new_str_raw(UChar* s, UChar* end)
|
||||
node_new_str_crude(UChar* s, UChar* end)
|
||||
{
|
||||
Node* node = node_new_str(s, end);
|
||||
CHECK_NULL_RETURN(node);
|
||||
NODE_STRING_SET_RAW(node);
|
||||
NODE_STRING_SET_CRUDE(node);
|
||||
return node;
|
||||
}
|
||||
|
||||
@ -3242,14 +3239,14 @@ node_new_empty(void)
|
||||
}
|
||||
|
||||
static Node*
|
||||
node_new_str_raw_char(UChar c)
|
||||
node_new_str_crude_char(UChar c)
|
||||
{
|
||||
int i;
|
||||
UChar p[1];
|
||||
Node* node;
|
||||
|
||||
p[0] = c;
|
||||
node = node_new_str_raw(p, p + 1);
|
||||
node = node_new_str_crude(p, p + 1);
|
||||
|
||||
/* clear buf tail */
|
||||
for (i = 1; i < NODE_STRING_BUF_SIZE; i++)
|
||||
@ -3272,8 +3269,8 @@ str_node_split_last_char(Node* node, OnigEncoding enc)
|
||||
if (p && p > sn->s) { /* can be split. */
|
||||
rn = node_new_str(p, sn->end);
|
||||
CHECK_NULL_RETURN(rn);
|
||||
if (NODE_STRING_IS_RAW(node))
|
||||
NODE_STRING_SET_RAW(rn);
|
||||
if (NODE_STRING_IS_CRUDE(node))
|
||||
NODE_STRING_SET_CRUDE(rn);
|
||||
|
||||
sn->end = (UChar* )p;
|
||||
}
|
||||
@ -4004,7 +4001,7 @@ node_new_general_newline(Node** node, ScanEnv* env)
|
||||
alen = ONIGENC_CODE_TO_MBC(env->enc, 0x0a, buf + dlen);
|
||||
if (alen < 0) return alen;
|
||||
|
||||
crnl = node_new_str_raw(buf, buf + dlen + alen);
|
||||
crnl = node_new_str_crude(buf, buf + dlen + alen);
|
||||
CHECK_NULL_RETURN_MEMERR(crnl);
|
||||
|
||||
ncc = node_new_cclass();
|
||||
@ -4032,7 +4029,7 @@ node_new_general_newline(Node** node, ScanEnv* env)
|
||||
if (r != 0) goto err1;
|
||||
}
|
||||
|
||||
x = node_new_bag_if_else(crnl, 0, ncc);
|
||||
x = node_new_bag_if_else(crnl, NULL_NODE, ncc);
|
||||
if (IS_NULL(x)) goto err1;
|
||||
|
||||
*node = x;
|
||||
@ -4041,7 +4038,7 @@ node_new_general_newline(Node** node, ScanEnv* env)
|
||||
|
||||
enum TokenSyms {
|
||||
TK_EOT = 0, /* end of token */
|
||||
TK_RAW_BYTE = 1,
|
||||
TK_CRUDE_BYTE = 1,
|
||||
TK_CHAR,
|
||||
TK_STRING,
|
||||
TK_CODE_POINT,
|
||||
@ -4454,7 +4451,7 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,
|
||||
static int
|
||||
fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
|
||||
UChar** rname_end, ScanEnv* env, int* rback_num,
|
||||
enum REF_NUM* num_type, int ref)
|
||||
enum REF_NUM* num_type, int is_ref)
|
||||
{
|
||||
int r, sign;
|
||||
int digit_count;
|
||||
@ -4484,7 +4481,7 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
|
||||
return ONIGERR_EMPTY_GROUP_NAME;
|
||||
|
||||
if (IS_CODE_DIGIT_ASCII(enc, c)) {
|
||||
if (ref == 1)
|
||||
if (is_ref == TRUE)
|
||||
*num_type = IS_ABS_NUM;
|
||||
else {
|
||||
r = ONIGERR_INVALID_GROUP_NAME;
|
||||
@ -4492,7 +4489,7 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
|
||||
digit_count++;
|
||||
}
|
||||
else if (c == '-') {
|
||||
if (ref == 1) {
|
||||
if (is_ref == TRUE) {
|
||||
*num_type = IS_REL_NUM;
|
||||
sign = -1;
|
||||
pnum_head = p;
|
||||
@ -4502,7 +4499,7 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
|
||||
}
|
||||
}
|
||||
else if (c == '+') {
|
||||
if (ref == 1) {
|
||||
if (is_ref == TRUE) {
|
||||
*num_type = IS_REL_NUM;
|
||||
sign = 1;
|
||||
pnum_head = p;
|
||||
@ -4843,7 +4840,7 @@ fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
|
||||
if (p == prev) { /* can't read nothing. */
|
||||
code = 0; /* but, it's not error */
|
||||
}
|
||||
tok->type = TK_RAW_BYTE;
|
||||
tok->type = TK_CRUDE_BYTE;
|
||||
tok->base = 16;
|
||||
tok->u.byte = (UChar )code;
|
||||
}
|
||||
@ -4876,7 +4873,7 @@ fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
|
||||
if (p == prev) { /* can't read nothing. */
|
||||
code = 0; /* but, it's not error */
|
||||
}
|
||||
tok->type = TK_RAW_BYTE;
|
||||
tok->type = TK_CRUDE_BYTE;
|
||||
tok->base = 8;
|
||||
tok->u.byte = (UChar )code;
|
||||
}
|
||||
@ -5246,7 +5243,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
|
||||
if (p == prev) { /* can't read nothing. */
|
||||
code = 0; /* but, it's not error */
|
||||
}
|
||||
tok->type = TK_RAW_BYTE;
|
||||
tok->type = TK_CRUDE_BYTE;
|
||||
tok->base = 16;
|
||||
tok->u.byte = (UChar )code;
|
||||
}
|
||||
@ -5311,7 +5308,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
|
||||
if (p == prev) { /* can't read nothing. */
|
||||
code = 0; /* but, it's not error */
|
||||
}
|
||||
tok->type = TK_RAW_BYTE;
|
||||
tok->type = TK_CRUDE_BYTE;
|
||||
tok->base = 8;
|
||||
tok->u.byte = (UChar )code;
|
||||
}
|
||||
@ -5338,7 +5335,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
|
||||
if (r == 1) tok->u.backref.exist_level = 1;
|
||||
else tok->u.backref.exist_level = 0;
|
||||
#else
|
||||
r = fetch_name(c, &p, end, &name_end, env, &back_num, &num_type, 1);
|
||||
r = fetch_name(c, &p, end, &name_end, env, &back_num, &num_type, TRUE);
|
||||
#endif
|
||||
if (r < 0) return r;
|
||||
|
||||
@ -5401,7 +5398,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
|
||||
|
||||
prev = p;
|
||||
r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env,
|
||||
&gnum, &num_type, 1);
|
||||
&gnum, &num_type, TRUE);
|
||||
if (r < 0) return r;
|
||||
|
||||
if (num_type != IS_NOT_NUM) {
|
||||
@ -5464,7 +5461,6 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
|
||||
PUNFETCH;
|
||||
r = fetch_escaped_value(&p, end, env, &c2);
|
||||
if (r < 0) return r;
|
||||
/* set_raw: */
|
||||
if (tok->u.code != c2) {
|
||||
tok->type = TK_CODE_POINT;
|
||||
tok->u.code = c2;
|
||||
@ -5590,8 +5586,8 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
|
||||
{
|
||||
PINC;
|
||||
name = p;
|
||||
r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env, &gnum,
|
||||
&num_type, 0);
|
||||
r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env,
|
||||
&gnum, &num_type, FALSE);
|
||||
if (r < 0) return r;
|
||||
|
||||
tok->type = TK_CALL;
|
||||
@ -5623,7 +5619,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
|
||||
{
|
||||
name = p;
|
||||
r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env,
|
||||
&gnum, &num_type, 1);
|
||||
&gnum, &num_type, TRUE);
|
||||
if (r < 0) return r;
|
||||
|
||||
if (num_type == IS_NOT_NUM) {
|
||||
@ -6097,7 +6093,7 @@ parse_char_property(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* en
|
||||
*np = node_new_cclass();
|
||||
CHECK_NULL_RETURN_MEMERR(*np);
|
||||
cc = CCLASS_(*np);
|
||||
r = add_ctype_to_cc(cc, ctype, 0, env);
|
||||
r = add_ctype_to_cc(cc, ctype, FALSE, env);
|
||||
if (r != 0) return r;
|
||||
if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
|
||||
|
||||
@ -6297,7 +6293,7 @@ parse_cc(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)
|
||||
goto val_entry2;
|
||||
break;
|
||||
|
||||
case TK_RAW_BYTE:
|
||||
case TK_CRUDE_BYTE:
|
||||
/* tok->base != 0 : octal or hexadec. */
|
||||
if (! ONIGENC_IS_SINGLEBYTE(env->enc) && tok->base != 0) {
|
||||
int i, j;
|
||||
@ -6310,7 +6306,7 @@ parse_cc(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)
|
||||
for (i = 1; i < ONIGENC_MBC_MAXLEN(env->enc); i++) {
|
||||
r = fetch_token_in_cc(tok, &p, end, env);
|
||||
if (r < 0) goto err;
|
||||
if (r != TK_RAW_BYTE || tok->base != base) {
|
||||
if (r != TK_CRUDE_BYTE || tok->base != base) {
|
||||
fetched = 1;
|
||||
break;
|
||||
}
|
||||
@ -6340,7 +6336,7 @@ parse_cc(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)
|
||||
|
||||
if (i == 1) {
|
||||
in_code = (OnigCodePoint )buf[0];
|
||||
goto raw_single;
|
||||
goto crude_single;
|
||||
}
|
||||
else {
|
||||
in_code = ONIGENC_MBC_TO_CODE(env->enc, buf, bufe);
|
||||
@ -6349,7 +6345,7 @@ parse_cc(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)
|
||||
}
|
||||
else {
|
||||
in_code = (OnigCodePoint )tok->u.byte;
|
||||
raw_single:
|
||||
crude_single:
|
||||
in_type = CV_SB;
|
||||
}
|
||||
in_raw = 1;
|
||||
@ -6815,7 +6811,7 @@ parse_callout_args(int skip_mode, int cterm, UChar** src, UChar* end,
|
||||
size_t clen;
|
||||
|
||||
add_char:
|
||||
if (skip_mode == 0) {
|
||||
if (skip_mode == FALSE) {
|
||||
clen = p - e;
|
||||
if (bufend + clen > buf + MAX_CALLOUT_ARG_BYTE_LENGTH)
|
||||
return ONIGERR_INVALID_CALLOUT_ARG; /* too long argument */
|
||||
@ -6832,7 +6828,7 @@ parse_callout_args(int skip_mode, int cterm, UChar** src, UChar* end,
|
||||
if (max_arg_num >= 0 && n >= max_arg_num)
|
||||
return ONIGERR_INVALID_CALLOUT_ARG;
|
||||
|
||||
if (skip_mode == 0) {
|
||||
if (skip_mode == FALSE) {
|
||||
if ((types[n] & ONIG_TYPE_LONG) != 0) {
|
||||
int fixed = 0;
|
||||
if (cn > 0) {
|
||||
@ -6964,7 +6960,7 @@ parse_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* en
|
||||
|
||||
/* read for single check only */
|
||||
save = p;
|
||||
arg_num = parse_callout_args(1, '}', &p, end, -1, 0, 0, env);
|
||||
arg_num = parse_callout_args(TRUE, '}', &p, end, -1, NULL, NULL, env);
|
||||
if (arg_num < 0) return arg_num;
|
||||
|
||||
is_not_single = PPEEK_IS(cterm) ? 0 : 1;
|
||||
@ -6978,7 +6974,7 @@ parse_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* en
|
||||
types[i] = get_callout_arg_type_by_name_id(name_id, i);
|
||||
}
|
||||
|
||||
arg_num = parse_callout_args(0, '}', &p, end, max_arg_num, types, vals, env);
|
||||
arg_num = parse_callout_args(FALSE, '}', &p, end, max_arg_num, types, vals, env);
|
||||
if (arg_num < 0) return arg_num;
|
||||
|
||||
if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
|
||||
@ -7078,17 +7074,17 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
|
||||
group:
|
||||
r = fetch_token(tok, &p, end, env);
|
||||
if (r < 0) return r;
|
||||
r = parse_alts(np, tok, term, &p, end, env, 0);
|
||||
r = parse_alts(np, tok, term, &p, end, env, FALSE);
|
||||
if (r < 0) return r;
|
||||
*src = p;
|
||||
return 1; /* group */
|
||||
break;
|
||||
|
||||
case '=':
|
||||
*np = onig_node_new_anchor(ANCR_PREC_READ, 0);
|
||||
*np = onig_node_new_anchor(ANCR_PREC_READ, FALSE);
|
||||
break;
|
||||
case '!': /* preceding read */
|
||||
*np = onig_node_new_anchor(ANCR_PREC_READ_NOT, 0);
|
||||
*np = onig_node_new_anchor(ANCR_PREC_READ_NOT, FALSE);
|
||||
break;
|
||||
case '>': /* (?>...) stop backtrack */
|
||||
*np = node_new_bag(BAG_STOP_BACKTRACK);
|
||||
@ -7106,9 +7102,9 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
|
||||
if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
|
||||
PFETCH(c);
|
||||
if (c == '=')
|
||||
*np = onig_node_new_anchor(ANCR_LOOK_BEHIND, 0);
|
||||
*np = onig_node_new_anchor(ANCR_LOOK_BEHIND, FALSE);
|
||||
else if (c == '!')
|
||||
*np = onig_node_new_anchor(ANCR_LOOK_BEHIND_NOT, 0);
|
||||
*np = onig_node_new_anchor(ANCR_LOOK_BEHIND_NOT, FALSE);
|
||||
else {
|
||||
if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
|
||||
UChar *name;
|
||||
@ -7124,7 +7120,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
|
||||
named_group2:
|
||||
name = p;
|
||||
r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &num,
|
||||
&num_type, 0);
|
||||
&num_type, FALSE);
|
||||
if (r < 0) return r;
|
||||
|
||||
num = scan_env_add_mem_entry(env);
|
||||
@ -7173,7 +7169,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
|
||||
|
||||
r = fetch_token(tok, &p, end, env);
|
||||
if (r < 0) return r;
|
||||
r = parse_alts(&absent, tok, term, &p, end, env, 1);
|
||||
r = parse_alts(&absent, tok, term, &p, end, env, TRUE);
|
||||
if (r < 0) {
|
||||
onig_node_free(absent);
|
||||
return r;
|
||||
@ -7260,7 +7256,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
|
||||
if (r == 1) exist_level = 1;
|
||||
#else
|
||||
r = fetch_name((OnigCodePoint )(is_enclosed != 0 ? c : '('),
|
||||
&p, end, &name_end, env, &back_num, &num_type, 1);
|
||||
&p, end, &name_end, env, &back_num, &num_type, TRUE);
|
||||
#endif
|
||||
if (r < 0) {
|
||||
if (is_enclosed == 0) {
|
||||
@ -7284,7 +7280,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
|
||||
return ONIGERR_INVALID_BACKREF;
|
||||
}
|
||||
|
||||
condition = node_new_backref_checker(1, &back_num, 0,
|
||||
condition = node_new_backref_checker(1, &back_num, FALSE,
|
||||
#ifdef USE_BACKREF_WITH_LEVEL
|
||||
exist_level, level,
|
||||
#endif
|
||||
@ -7307,7 +7303,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
|
||||
}
|
||||
}
|
||||
|
||||
condition = node_new_backref_checker(num, backs, 1,
|
||||
condition = node_new_backref_checker(num, backs, TRUE,
|
||||
#ifdef USE_BACKREF_WITH_LEVEL
|
||||
exist_level, level,
|
||||
#endif
|
||||
@ -7349,7 +7345,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
|
||||
condition_is_checker = 0;
|
||||
r = fetch_token(tok, &p, end, env);
|
||||
if (r < 0) return r;
|
||||
r = parse_alts(&condition, tok, term, &p, end, env, 0);
|
||||
r = parse_alts(&condition, tok, term, &p, end, env, FALSE);
|
||||
if (r < 0) {
|
||||
onig_node_free(condition);
|
||||
return r;
|
||||
@ -7392,7 +7388,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
|
||||
onig_node_free(condition);
|
||||
return r;
|
||||
}
|
||||
r = parse_alts(&target, tok, term, &p, end, env, 1);
|
||||
r = parse_alts(&target, tok, term, &p, end, env, TRUE);
|
||||
if (r < 0) {
|
||||
onig_node_free(condition);
|
||||
onig_node_free(target);
|
||||
@ -7493,7 +7489,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
|
||||
|
||||
case 'm':
|
||||
if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {
|
||||
OPTION_NEGATE(option, ONIG_OPTION_SINGLELINE, (neg == 0 ? 1 : 0));
|
||||
OPTION_NEGATE(option, ONIG_OPTION_SINGLELINE, (neg == 0 ? TRUE : FALSE));
|
||||
}
|
||||
else if (IS_SYNTAX_OP2(env->syntax,
|
||||
ONIG_SYN_OP2_OPTION_ONIGURUMA|ONIG_SYN_OP2_OPTION_RUBY)) {
|
||||
@ -7529,16 +7525,16 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
|
||||
if (! ONIGENC_IS_UNICODE_ENCODING(enc))
|
||||
return ONIGERR_UNDEFINED_GROUP_OPTION;
|
||||
|
||||
OPTION_NEGATE(option, ONIG_OPTION_TEXT_SEGMENT_EXTENDED_GRAPHEME_CLUSTER, 0);
|
||||
OPTION_NEGATE(option, ONIG_OPTION_TEXT_SEGMENT_WORD, 1);
|
||||
OPTION_NEGATE(option, ONIG_OPTION_TEXT_SEGMENT_EXTENDED_GRAPHEME_CLUSTER, FALSE);
|
||||
OPTION_NEGATE(option, ONIG_OPTION_TEXT_SEGMENT_WORD, TRUE);
|
||||
break;
|
||||
#ifdef USE_UNICODE_WORD_BREAK
|
||||
case 'w':
|
||||
if (! ONIGENC_IS_UNICODE_ENCODING(enc))
|
||||
return ONIGERR_UNDEFINED_GROUP_OPTION;
|
||||
|
||||
OPTION_NEGATE(option, ONIG_OPTION_TEXT_SEGMENT_WORD, 0);
|
||||
OPTION_NEGATE(option, ONIG_OPTION_TEXT_SEGMENT_EXTENDED_GRAPHEME_CLUSTER, 1);
|
||||
OPTION_NEGATE(option, ONIG_OPTION_TEXT_SEGMENT_WORD, FALSE);
|
||||
OPTION_NEGATE(option, ONIG_OPTION_TEXT_SEGMENT_EXTENDED_GRAPHEME_CLUSTER, TRUE);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
@ -7568,7 +7564,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
|
||||
env->options = option;
|
||||
r = fetch_token(tok, &p, end, env);
|
||||
if (r < 0) return r;
|
||||
r = parse_alts(&target, tok, term, &p, end, env, 0);
|
||||
r = parse_alts(&target, tok, term, &p, end, env, FALSE);
|
||||
env->options = prev;
|
||||
if (r < 0) {
|
||||
onig_node_free(target);
|
||||
@ -7615,7 +7611,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
|
||||
CHECK_NULL_RETURN_MEMERR(*np);
|
||||
r = fetch_token(tok, &p, end, env);
|
||||
if (r < 0) return r;
|
||||
r = parse_alts(&target, tok, term, &p, end, env, 0);
|
||||
r = parse_alts(&target, tok, term, &p, end, env, FALSE);
|
||||
if (r < 0) {
|
||||
onig_node_free(target);
|
||||
return r;
|
||||
@ -7768,6 +7764,29 @@ clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc)
|
||||
}\
|
||||
} while (0)
|
||||
|
||||
extern int
|
||||
onig_new_cclass_with_code_list(Node** rnode, OnigEncoding enc,
|
||||
int n, OnigCodePoint codes[])
|
||||
{
|
||||
int i;
|
||||
Node* node;
|
||||
CClassNode* cc;
|
||||
|
||||
*rnode = NULL_NODE;
|
||||
|
||||
node = node_new_cclass();
|
||||
CHECK_NULL_RETURN_MEMERR(node);
|
||||
|
||||
cc = CCLASS_(node);
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
ADD_CODE_INTO_CC(cc, codes[i], enc);
|
||||
}
|
||||
|
||||
*rnode = node;
|
||||
return 0;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
ScanEnv* env;
|
||||
CClassNode* cc;
|
||||
@ -7927,7 +7946,7 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
|
||||
env->options = BAG_(*np)->o.options;
|
||||
r = fetch_token(tok, src, end, env);
|
||||
if (r < 0) return r;
|
||||
r = parse_alts(&target, tok, term, src, end, env, 0);
|
||||
r = parse_alts(&target, tok, term, src, end, env, FALSE);
|
||||
env->options = prev;
|
||||
if (r < 0) {
|
||||
onig_node_free(target);
|
||||
@ -7942,7 +7961,7 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
|
||||
if (! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP))
|
||||
return ONIGERR_UNMATCHED_CLOSE_PARENTHESIS;
|
||||
|
||||
if (tok->escaped) goto tk_raw_byte;
|
||||
if (tok->escaped) goto tk_crude_byte;
|
||||
else goto tk_byte;
|
||||
break;
|
||||
|
||||
@ -7967,23 +7986,23 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
|
||||
}
|
||||
break;
|
||||
|
||||
case TK_RAW_BYTE:
|
||||
tk_raw_byte:
|
||||
case TK_CRUDE_BYTE:
|
||||
tk_crude_byte:
|
||||
{
|
||||
*np = node_new_str_raw_char(tok->u.byte);
|
||||
*np = node_new_str_crude_char(tok->u.byte);
|
||||
CHECK_NULL_RETURN_MEMERR(*np);
|
||||
len = 1;
|
||||
while (1) {
|
||||
if (len >= ONIGENC_MBC_MINLEN(env->enc)) {
|
||||
if (len == enclen(env->enc, STR_(*np)->s)) {
|
||||
r = fetch_token(tok, src, end, env);
|
||||
goto tk_raw_byte_end;
|
||||
goto tk_crude_byte_end;
|
||||
}
|
||||
}
|
||||
|
||||
r = fetch_token(tok, src, end, env);
|
||||
if (r < 0) return r;
|
||||
if (r != TK_RAW_BYTE)
|
||||
if (r != TK_CRUDE_BYTE)
|
||||
return ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
|
||||
|
||||
r = node_str_cat_char(*np, tok->u.byte);
|
||||
@ -7992,11 +8011,11 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
|
||||
len++;
|
||||
}
|
||||
|
||||
tk_raw_byte_end:
|
||||
tk_crude_byte_end:
|
||||
if (! ONIGENC_IS_VALID_MBC_STRING(env->enc, STR_(*np)->s, STR_(*np)->end))
|
||||
return ONIGERR_INVALID_WIDE_CHAR_VALUE;
|
||||
|
||||
NODE_STRING_CLEAR_RAW(*np);
|
||||
NODE_STRING_CLEAR_CRUDE(*np);
|
||||
goto string_end;
|
||||
}
|
||||
break;
|
||||
@ -8007,7 +8026,7 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
|
||||
len = ONIGENC_CODE_TO_MBC(env->enc, tok->u.code, buf);
|
||||
if (len < 0) return len;
|
||||
#ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG
|
||||
*np = node_new_str_raw(buf, buf + len);
|
||||
*np = node_new_str_crude(buf, buf + len);
|
||||
#else
|
||||
*np = node_new_str(buf, buf + len);
|
||||
#endif
|
||||
@ -8050,7 +8069,7 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
|
||||
*np = node_new_cclass();
|
||||
CHECK_NULL_RETURN_MEMERR(*np);
|
||||
cc = CCLASS_(*np);
|
||||
add_ctype_to_cc(cc, tok->u.prop.ctype, 0, env);
|
||||
add_ctype_to_cc(cc, tok->u.prop.ctype, FALSE, env);
|
||||
if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
|
||||
}
|
||||
break;
|
||||
@ -8109,7 +8128,7 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
|
||||
case TK_ANYCHAR_ANYTIME:
|
||||
*np = node_new_anychar();
|
||||
CHECK_NULL_RETURN_MEMERR(*np);
|
||||
qn = node_new_quantifier(0, INFINITE_REPEAT, 0);
|
||||
qn = node_new_quantifier(0, INFINITE_REPEAT, FALSE);
|
||||
CHECK_NULL_RETURN_MEMERR(qn);
|
||||
NODE_BODY(qn) = *np;
|
||||
*np = qn;
|
||||
@ -8300,7 +8319,7 @@ parse_branch(Node** top, PToken* tok, int term, UChar** src, UChar* end,
|
||||
|
||||
headp = &(NODE_CDR(*top));
|
||||
while (r != TK_EOT && r != term && r != TK_ALT) {
|
||||
r = parse_exp(&node, tok, term, src, end, env, 0);
|
||||
r = parse_exp(&node, tok, term, src, end, env, FALSE);
|
||||
if (r < 0) {
|
||||
onig_node_free(node);
|
||||
return r;
|
||||
@ -8353,7 +8372,7 @@ parse_alts(Node** top, PToken* tok, int term, UChar** src, UChar* end,
|
||||
while (r == TK_ALT) {
|
||||
r = fetch_token(tok, src, end, env);
|
||||
if (r < 0) return r;
|
||||
r = parse_branch(&node, tok, term, src, end, env, 0);
|
||||
r = parse_branch(&node, tok, term, src, end, env, FALSE);
|
||||
if (r < 0) {
|
||||
onig_node_free(node);
|
||||
return r;
|
||||
@ -8392,7 +8411,7 @@ parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env)
|
||||
|
||||
r = fetch_token(&tok, src, end, env);
|
||||
if (r < 0) return r;
|
||||
r = parse_alts(top, &tok, TK_EOT, src, end, env, 0);
|
||||
r = parse_alts(top, &tok, TK_EOT, src, end, env, FALSE);
|
||||
if (r < 0) return r;
|
||||
|
||||
return 0;
|
||||
|
||||
@ -2,6 +2,7 @@
|
||||
#define REGPARSE_H
|
||||
/**********************************************************************
|
||||
regparse.h - Oniguruma (regular expression library)
|
||||
encoding: UTF8
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2019 K.Kosako
|
||||
@ -32,7 +33,7 @@
|
||||
#include "regint.h"
|
||||
|
||||
#define NODE_STRING_MARGIN 16
|
||||
#define NODE_STRING_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */
|
||||
#define NODE_STRING_BUF_SIZE 20 /* sizeof(CClassNode) - sizeof(int)*4 */
|
||||
#define NODE_BACKREFS_SIZE 6
|
||||
|
||||
/* node type */
|
||||
@ -83,8 +84,9 @@ typedef struct {
|
||||
UChar* s;
|
||||
UChar* end;
|
||||
unsigned int flag;
|
||||
int capacity; /* (allocated size - 1) or 0: use buf[] */
|
||||
UChar buf[NODE_STRING_BUF_SIZE];
|
||||
int capacity; /* (allocated size - 1) or 0: use buf[] */
|
||||
int case_min_len;
|
||||
} StrNode;
|
||||
|
||||
typedef struct {
|
||||
@ -293,30 +295,21 @@ typedef struct _Node {
|
||||
#define ANCR_ANYCHAR_INF_MASK (ANCR_ANYCHAR_INF | ANCR_ANYCHAR_INF_ML)
|
||||
#define ANCR_END_BUF_MASK (ANCR_END_BUF | ANCR_SEMI_END_BUF)
|
||||
|
||||
#define NODE_STRING_RAW (1<<0) /* by backslashed number */
|
||||
#define NODE_STRING_CRUDE (1<<0)
|
||||
#define NODE_STRING_CASE_EXPANDED (1<<1)
|
||||
#define NODE_STRING_CASE_FOLD_MATCH (1<<2)
|
||||
#define NODE_STRING_GOOD_AMBIG (1<<3)
|
||||
#define NODE_STRING_DONT_GET_OPT_INFO (1<<4)
|
||||
|
||||
#define NODE_STRING_LEN(node) (int )((node)->u.str.end - (node)->u.str.s)
|
||||
#define NODE_STRING_SET_RAW(node) (node)->u.str.flag |= NODE_STRING_RAW
|
||||
#define NODE_STRING_CLEAR_RAW(node) (node)->u.str.flag &= ~NODE_STRING_RAW
|
||||
#define NODE_STRING_SET_CRUDE(node) (node)->u.str.flag |= NODE_STRING_CRUDE
|
||||
#define NODE_STRING_CLEAR_CRUDE(node) (node)->u.str.flag &= ~NODE_STRING_CRUDE
|
||||
#define NODE_STRING_SET_CASE_EXPANDED(node) (node)->u.str.flag |= NODE_STRING_CASE_EXPANDED
|
||||
#define NODE_STRING_SET_CASE_FOLD_MATCH(node) (node)->u.str.flag |= NODE_STRING_CASE_FOLD_MATCH
|
||||
#define NODE_STRING_SET_GOOD_AMBIG(node) (node)->u.str.flag |= NODE_STRING_GOOD_AMBIG
|
||||
#define NODE_STRING_SET_DONT_GET_OPT_INFO(node) \
|
||||
(node)->u.str.flag |= NODE_STRING_DONT_GET_OPT_INFO
|
||||
#define NODE_STRING_IS_RAW(node) \
|
||||
(((node)->u.str.flag & NODE_STRING_RAW) != 0)
|
||||
#define NODE_STRING_IS_CRUDE(node) \
|
||||
(((node)->u.str.flag & NODE_STRING_CRUDE) != 0)
|
||||
#define NODE_STRING_IS_CASE_EXPANDED(node) \
|
||||
(((node)->u.str.flag & NODE_STRING_CASE_EXPANDED) != 0)
|
||||
#define NODE_STRING_IS_CASE_FOLD_MATCH(node) \
|
||||
(((node)->u.str.flag & NODE_STRING_CASE_FOLD_MATCH) != 0)
|
||||
#define NODE_STRING_IS_GOOD_AMBIG(node) \
|
||||
(((node)->u.str.flag & NODE_STRING_GOOD_AMBIG) != 0)
|
||||
#define NODE_STRING_IS_DONT_GET_OPT_INFO(node) \
|
||||
(((node)->u.str.flag & NODE_STRING_DONT_GET_OPT_INFO) != 0)
|
||||
|
||||
#define BACKREFS_P(br) \
|
||||
(IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static)
|
||||
@ -446,7 +439,6 @@ extern int onig_strncmp P_((const UChar* s1, const UChar* s2, int n));
|
||||
extern void onig_strcpy P_((UChar* dest, const UChar* src, const UChar* end));
|
||||
extern void onig_scan_env_set_error_string P_((ScanEnv* env, int ecode, UChar* arg, UChar* arg_end));
|
||||
extern void onig_reduce_nested_quantifier P_((Node* pnode, Node* cnode));
|
||||
extern void onig_node_conv_to_str_node P_((Node* node, int raw));
|
||||
extern int onig_node_str_cat P_((Node* node, const UChar* s, const UChar* end));
|
||||
extern int onig_node_str_set P_((Node* node, const UChar* s, const UChar* end));
|
||||
extern void onig_node_free P_((Node* node));
|
||||
@ -460,6 +452,7 @@ extern int onig_names_free P_((regex_t* reg));
|
||||
extern int onig_parse_tree P_((Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env));
|
||||
extern int onig_free_shared_cclass_table P_((void));
|
||||
extern int onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc));
|
||||
extern int onig_new_cclass_with_code_list(Node** rnode, OnigEncoding enc, int n, OnigCodePoint codes[]);
|
||||
extern OnigLen onig_get_tiny_min_len(Node* node, unsigned int inhibit_node_types, int* invalid_node);
|
||||
|
||||
#ifdef USE_CALLOUT
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
/**********************************************************************
|
||||
regsyntax.c - Oniguruma (regular expression library)
|
||||
encoding: UTF8
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2019 K.Kosako
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
/**********************************************************************
|
||||
regtrav.c - Oniguruma (regular expression library)
|
||||
encoding: UTF8
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2019 K.Kosako
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
/**********************************************************************
|
||||
regversion.c - Oniguruma (regular expression library)
|
||||
encoding: UTF8
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2019 K.Kosako
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
/* This is a public domain general purpose hash table package written by Peter Moore @ UCB. */
|
||||
/* encoding: UTF8 */
|
||||
|
||||
/* static char sccsid[] = "@(#) st.c 5.1 89/12/14 Crucible"; */
|
||||
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/* This is a public domain general purpose hash table package written by Peter Moore @ UCB. */
|
||||
|
||||
/* encoding: UTF8 */
|
||||
/* @(#) st.h 5.1 89/12/14 */
|
||||
|
||||
#ifndef ST_INCLUDED
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
/**********************************************************************
|
||||
unicode.c - Oniguruma (regular expression library)
|
||||
encoding: UTF8
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2019 K.Kosako
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
/* This file was converted by gperf_fold_key_conv.py
|
||||
from gperf output file. */
|
||||
/* ANSI-C code produced by gperf version 3.1 */
|
||||
/* Command-line: /usr/local/bin/gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1 -N onigenc_unicode_fold1_key unicode_fold1_key.gperf */
|
||||
/* Command-line: gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1 -N onigenc_unicode_fold1_key unicode_fold1_key.gperf */
|
||||
/* Computed positions: -k'1-3' */
|
||||
|
||||
|
||||
@ -2983,7 +2983,7 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[])
|
||||
4026
|
||||
};
|
||||
|
||||
if (0 == 0)
|
||||
|
||||
{
|
||||
int key = hash(codes);
|
||||
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
/* This file was converted by gperf_fold_key_conv.py
|
||||
from gperf output file. */
|
||||
/* ANSI-C code produced by gperf version 3.1 */
|
||||
/* Command-line: /usr/local/bin/gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1 -N onigenc_unicode_fold2_key unicode_fold2_key.gperf */
|
||||
/* Command-line: gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1 -N onigenc_unicode_fold2_key unicode_fold2_key.gperf */
|
||||
/* Computed positions: -k'3,6' */
|
||||
|
||||
|
||||
@ -211,7 +211,7 @@ onigenc_unicode_fold2_key(OnigCodePoint codes[])
|
||||
129
|
||||
};
|
||||
|
||||
if (0 == 0)
|
||||
|
||||
{
|
||||
int key = hash(codes);
|
||||
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
/* This file was converted by gperf_fold_key_conv.py
|
||||
from gperf output file. */
|
||||
/* ANSI-C code produced by gperf version 3.1 */
|
||||
/* Command-line: /usr/local/bin/gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1 -N onigenc_unicode_fold3_key unicode_fold3_key.gperf */
|
||||
/* Command-line: gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1 -N onigenc_unicode_fold3_key unicode_fold3_key.gperf */
|
||||
/* Computed positions: -k'3,6,9' */
|
||||
|
||||
|
||||
@ -121,7 +121,7 @@ onigenc_unicode_fold3_key(OnigCodePoint codes[])
|
||||
0
|
||||
};
|
||||
|
||||
if (0 == 0)
|
||||
|
||||
{
|
||||
int key = hash(codes);
|
||||
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/* ANSI-C code produced by gperf version 3.1 */
|
||||
/* Command-line: /usr/local/bin/gperf -T -C -c -t -j1 -L ANSI-C --ignore-case --pic -Q unicode_prop_name_pool -N unicode_lookup_property_name --output-file gperf1.tmp unicode_property_data.gperf */
|
||||
/* Command-line: gperf -T -C -c -t -j1 -L ANSI-C --ignore-case --pic -Q unicode_prop_name_pool -N unicode_lookup_property_name --output-file gperf1.tmp unicode_property_data.gperf */
|
||||
/* Computed positions: -k'1-3,5-6,12,16,$' */
|
||||
|
||||
#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
|
||||
@ -29581,6 +29581,7 @@ unicode_lookup_property_name (register const char *str, register size_t len)
|
||||
|
||||
|
||||
#define UNICODE_PROPERTY_VERSION 120100
|
||||
#define UNICODE_EMOJI_VERSION 1201
|
||||
|
||||
#define PROPERTY_NAME_MAX_SIZE 59
|
||||
#define CODE_RANGES_NUM 568
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/* ANSI-C code produced by gperf version 3.1 */
|
||||
/* Command-line: /usr/local/bin/gperf -T -C -c -t -j1 -L ANSI-C --ignore-case --pic -Q unicode_prop_name_pool -N unicode_lookup_property_name --output-file gperf2.tmp unicode_property_data_posix.gperf */
|
||||
/* Command-line: gperf -T -C -c -t -j1 -L ANSI-C --ignore-case --pic -Q unicode_prop_name_pool -N unicode_lookup_property_name --output-file gperf2.tmp unicode_property_data_posix.gperf */
|
||||
/* Computed positions: -k'1,3' */
|
||||
|
||||
#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
/* This file was converted by gperf_unfold_key_conv.py
|
||||
from gperf output file. */
|
||||
/* ANSI-C code produced by gperf version 3.1 */
|
||||
/* Command-line: /usr/local/bin/gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1,0 -N onigenc_unicode_unfold_key unicode_unfold_key.gperf */
|
||||
/* Command-line: gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1,0 -N onigenc_unicode_unfold_key unicode_unfold_key.gperf */
|
||||
/* Computed positions: -k'1-3' */
|
||||
|
||||
|
||||
@ -3288,7 +3288,7 @@ onigenc_unicode_unfold_key(OnigCodePoint code)
|
||||
{0x1e907, 4005, 1}
|
||||
};
|
||||
|
||||
if (0 == 0)
|
||||
|
||||
{
|
||||
int key = hash(&code);
|
||||
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
// encoding: UTF8
|
||||
/**********************************************************************
|
||||
utf8.c - Oniguruma (regular expression library)
|
||||
encoding: UTF8
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2019 K.Kosako
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
// encoding: UTF8
|
||||
/*
|
||||
* This program was generated by testconv.rb.
|
||||
* encoding: UTF8
|
||||
*/
|
||||
#ifdef ONIG_ESCAPE_UCHAR_COLLISION
|
||||
#undef ONIG_ESCAPE_UCHAR_COLLISION
|
||||
|
||||
Loading…
Reference in New Issue
Block a user