diff --git a/oniguruma/HISTORY b/oniguruma/HISTORY index f4d4f6744..1e5989281 100644 --- a/oniguruma/HISTORY +++ b/oniguruma/HISTORY @@ -1,9 +1,26 @@ History +2020/04/03: Release Candidate 1 for Version 6.9.5 + +2020/03/30: remove src/*.py and src/*.sh from distribution files +2020/03/27: NEW: Code point sequence notation \x{HHHH ...}, \o{OOOO ...} +2020/03/24: NEW API: maximum nesting level of subexp call +2020/03/22: #165: change enable-posix-api default from YES to NO +2020/03/15: update Unicode version to 13.0.0 +2020/03/10: add test_back.c +2020/03/08: tune output of debug in print_optimize_info() +2020/03/02: fix #186: Allow regset search to succeed at end of string +2020/02/13: NEW API: retry-limit-in-search functions +2020/01/20: add ONIG_SYN_VARIABLE_LEN_LOOK_BEHIND flag +2019/12/27: add USE_REGSET switch +2019/12/20: remove OPTIMIZE_STR_CASE_FOLD +2019/12/13: add test/test_syntax.c +2019/12/13: add ONIG_SYN_ISOLATED_OPTION_CONTINUE_BRANCH flag + + 2019/11/29: Version 6.9.4 2019/11/22: Release Candidate 3 for Version 6.9.4 - 2019/11/20: fix a problem found by libFuzzer test 2019/11/14: Release Candidate 2 for Version 6.9.4 2019/11/12: fix integer overflow by nested quantifier diff --git a/oniguruma/README.md b/oniguruma/README.md index 278859169..1881ec1b5 100644 --- a/oniguruma/README.md +++ b/oniguruma/README.md @@ -30,9 +30,11 @@ Supported character encodings: Master branch ------------- +* POSIX API disabled by default for Unix (* Enabled by: configure --enable-posix-api=yes) * Update Unicode version 13.0.0 +* NEW: Code point sequence notation \x{HHHH HHHH ...}, \o{OOOO OOOO ...} * NEW API: retry limit in search functions -* Limit on maximum nesting level of subexp call (16) +* NEW API: maximum nesting level of subexp call * Fixed behavior of isolated options in Perl and Java syntaxes. /...(?i).../ @@ -225,7 +227,16 @@ Install (I have checked by Visual Studio Community 2015) +Alternatively, you can build and install oniguruma using [vcpkg](https://github.com/microsoft/vcpkg/) dependency manager: + 1. git clone https://github.com/Microsoft/vcpkg.git + 2. cd vcpkg + 3. ./bootstrap-vcpkg.bat + 4. ./vcpkg integrate install + 5. ./vcpkg install oniguruma + +The oniguruma port in vcpkg is kept up to date by microsoft team members and community contributors. +If the version is out of date, please [create an issue or pull request](https://github.com/Microsoft/vcpkg) on the vcpkg repository. Regular Expressions ------------------- diff --git a/oniguruma/doc/API b/oniguruma/doc/API index 5795e1327..bb7b010db 100644 --- a/oniguruma/doc/API +++ b/oniguruma/doc/API @@ -1,4 +1,4 @@ -Oniguruma API Version 6.9.5 2020/02/19 +Oniguruma API Version 6.9.5 2020/03/25 #include @@ -611,8 +611,8 @@ Oniguruma API Version 6.9.5 2020/02/19 # int onig_foreach_name(regex_t* reg, - int (*func)(const UChar*, const UChar*, int,int*,regex_t*,void*), - void* arg) + int (*func)(const UChar*, const UChar*, int,int*,regex_t*,void*), + void* arg) Iterate function call for all names. @@ -908,6 +908,21 @@ Oniguruma API Version 6.9.5 2020/02/19 normal return: ONIG_NORMAL +# int onig_get_subexp_call_max_nest_level(void) + + Return the limit of subexp call nest level. + (default: 24) + + normal return: current limit value + + +# int onig_set_subexp_call_max_nest_level(int max_level) + + Set a limit level of subexp call nest level. + + normal return: ONIG_NORMAL + + # OnigCalloutFunc onig_get_progress_callout(void) Get a function for callouts of contents in progress. diff --git a/oniguruma/doc/RE b/oniguruma/doc/RE index 4561698a7..e6afbe563 100644 --- a/oniguruma/doc/RE +++ b/oniguruma/doc/RE @@ -1,4 +1,4 @@ -Oniguruma Regular Expressions Version 6.9.5 2020/01/28 +Oniguruma Regular Expressions Version 6.9.5 2020/03/27 syntax: ONIG_SYNTAX_ONIGURUMA (default) @@ -34,6 +34,15 @@ syntax: ONIG_SYNTAX_ONIGURUMA (default) (* \b as backspace is effective in character class only) +2.1 Code point sequences + + Hexadecimal code point (1-8 digits) + \x{7HHHHHHH 7HHHHHHH ... 7HHHHHHH} + + Octal code point (1-11 digits) + \o{17777777777 17777777777 ... 17777777777} + + 3. Character types . any character (except newline) diff --git a/oniguruma/src/make_unicode_egcb_data.py b/oniguruma/src/make_unicode_egcb_data.py index 9c71796fd..e2982df43 100644 --- a/oniguruma/src/make_unicode_egcb_data.py +++ b/oniguruma/src/make_unicode_egcb_data.py @@ -1,7 +1,7 @@ #!/usr/bin/python # -*- coding: utf-8 -*- # make_unicode_egcb_data.py -# Copyright (c) 2017-2019 K.Kosako +# Copyright (c) 2017-2020 K.Kosako import sys import re @@ -195,7 +195,7 @@ PROPS = sorted(PROPS) print '/* unicode_egcb_data.c: Generated by make_unicode_egcb_data.py. */' COPYRIGHT = ''' /*- - * Copyright (c) 2017-2019 K.Kosako + * Copyright (c) 2017-2020 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/oniguruma/src/make_unicode_property_data.py b/oniguruma/src/make_unicode_property_data.py index 9776628f5..285c462d3 100644 --- a/oniguruma/src/make_unicode_property_data.py +++ b/oniguruma/src/make_unicode_property_data.py @@ -418,10 +418,39 @@ def parse_and_merge_properties(path, klass, prop_prefix = None, version_reg = No merge_props(PROPS, props) return dic, props, ver_m + ### main ### argv = sys.argv argc = len(argv) +COPYRIGHT = ''' +/*- + * Copyright (c) 2016-2020 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +'''.strip() + POSIX_ONLY = False INCLUDE_GRAPHEME_CLUSTER_DATA = False @@ -485,10 +514,14 @@ if INCLUDE_GRAPHEME_CLUSTER_DATA: add_posix_props(DIC) PROPS = sorted(PROPS) + s = '''%{ /* Generated by make_unicode_property_data.py. */ ''' print s +print COPYRIGHT +print '' + for prop in POSIX_LIST: print_property(prop, DIC[prop], "POSIX [[:%s:]]" % prop) diff --git a/oniguruma/src/oniguruma.h b/oniguruma/src/oniguruma.h index aea8866bb..cc7562713 100644 --- a/oniguruma/src/oniguruma.h +++ b/oniguruma/src/oniguruma.h @@ -935,6 +935,10 @@ int onig_set_capture_num_limit P_((int num)); ONIG_EXTERN int onig_set_parse_depth_limit P_((unsigned int depth)); ONIG_EXTERN +int onig_get_subexp_call_max_nest_level P_((void)); +ONIG_EXTERN +int onig_set_subexp_call_max_nest_level P_((int level)); +ONIG_EXTERN int onig_unicode_define_user_property P_((const char* name, OnigCodePoint* ranges)); ONIG_EXTERN int onig_end P_((void)); diff --git a/oniguruma/src/regexec.c b/oniguruma/src/regexec.c index e4e48119f..d1fbacf55 100644 --- a/oniguruma/src/regexec.c +++ b/oniguruma/src/regexec.c @@ -1221,7 +1221,7 @@ struct OnigCalloutArgsStruct { #define RETRY_IN_MATCH_ARG_INIT(msa,mpv) #endif -#if defined(USE_CALL) && defined(SUBEXP_CALL_MAX_NEST_LEVEL) +#if defined(USE_CALL) #define POP_CALL else if (stk->type == STK_RETURN) {subexp_call_nest_counter++;} else if (stk->type == STK_CALL_FRAME) {subexp_call_nest_counter--;} #else #define POP_CALL @@ -2541,6 +2541,7 @@ backref_check_at_nested_level(regex_t* reg, } #endif /* USE_BACKREF_WITH_LEVEL */ +static int SubexpCallMaxNestLevel = DEFAULT_SUBEXP_CALL_MAX_NEST_LEVEL; #ifdef ONIG_DEBUG_STATISTICS @@ -2867,7 +2868,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, OnigEncoding encode = reg->enc; OnigCaseFoldType case_fold_flag = reg->case_fold_flag; -#if defined(USE_CALL) && defined(SUBEXP_CALL_MAX_NEST_LEVEL) +#ifdef USE_CALL unsigned long subexp_call_nest_counter = 0; #endif @@ -3189,7 +3190,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE_OP(CCLASS) DATA_ENSURE(1); if (BITSET_AT(p->cclass.bsp, *s) == 0) goto fail; - s++; + s += enclen(encode, s); INC_OP; JUMP_OUT_WITH_SPREV_SET; @@ -4046,11 +4047,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, #ifdef USE_CALL CASE_OP(CALL) -#ifdef SUBEXP_CALL_MAX_NEST_LEVEL - if (subexp_call_nest_counter == SUBEXP_CALL_MAX_NEST_LEVEL) + if (subexp_call_nest_counter == SubexpCallMaxNestLevel) goto fail; subexp_call_nest_counter++; -#endif addr = p->call.addr; INC_OP; STACK_PUSH_CALL_FRAME(p); p = reg->ops + addr; @@ -4060,9 +4059,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE_OP(RETURN) STACK_RETURN(p); STACK_PUSH_RETURN; -#ifdef SUBEXP_CALL_MAX_NEST_LEVEL subexp_call_nest_counter--; -#endif JUMP_OUT; #endif @@ -5687,6 +5684,19 @@ onig_scan(regex_t* reg, const UChar* str, const UChar* end, return n; } +extern int +onig_get_subexp_call_max_nest_level(void) +{ + return SubexpCallMaxNestLevel; +} + +extern int +onig_set_subexp_call_max_nest_level(int level) +{ + SubexpCallMaxNestLevel = level; + return 0; +} + extern OnigEncoding onig_get_encoding(regex_t* reg) { diff --git a/oniguruma/src/regint.h b/oniguruma/src/regint.h index 8d8f89b86..ab053dd42 100644 --- a/oniguruma/src/regint.h +++ b/oniguruma/src/regint.h @@ -88,7 +88,7 @@ #define DEFAULT_RETRY_LIMIT_IN_MATCH 10000000 #define DEFAULT_RETRY_LIMIT_IN_SEARCH 0 /* unlimited */ #define DEFAULT_PARSE_DEPTH_LIMIT 4096 -#define SUBEXP_CALL_MAX_NEST_LEVEL 16 +#define DEFAULT_SUBEXP_CALL_MAX_NEST_LEVEL 20 #include "regenc.h" diff --git a/oniguruma/src/regparse.c b/oniguruma/src/regparse.c index 0b45ca912..8d58e7059 100644 --- a/oniguruma/src/regparse.c +++ b/oniguruma/src/regparse.c @@ -3544,6 +3544,129 @@ scan_octal_number(UChar** src, UChar* end, int minlen, int maxlen, return ONIG_NORMAL; } +static int +scan_number_of_base(UChar** src, UChar* end, int minlen, + OnigEncoding enc, OnigCodePoint* rcode, int base) +{ + int r; + + if (base == 16) + r = scan_hexadecimal_number(src, end, minlen, 8, enc, rcode); + else if (base == 8) + r = scan_octal_number(src, end, minlen, 11, enc, rcode); + else + r = ONIGERR_INVALID_CODE_POINT_VALUE; + + return r; +} + +#define IS_CODE_POINT_DIVIDE(c) ((c) == ' ' || (c) == '\n') + +enum CPS_STATE { + CPS_EMPTY = 0, + CPS_START = 1, + CPS_RANGE = 2 +}; + +static int +check_code_point_sequence(UChar* p, UChar* end, int base, OnigEncoding enc, + int in_cc) +{ + int r; + int n; + int end_digit; + int state; + OnigCodePoint code; + OnigCodePoint c; + PFETCH_READY; + + end_digit = FALSE; + state = CPS_START; + n = 0; + while (! PEND) { + start: + PFETCH(c); + if (c == '}') { + end_char: + if (state == CPS_RANGE) return ONIGERR_INVALID_CODE_POINT_VALUE; + return n; + } + + if (IS_CODE_POINT_DIVIDE(c)) { + while (! PEND) { + PFETCH(c); + if (! IS_CODE_POINT_DIVIDE(c)) break; + } + if (IS_CODE_POINT_DIVIDE(c)) + return ONIGERR_INVALID_CODE_POINT_VALUE; + } + else if (c == '-' && in_cc == TRUE) { + range: + if (state != CPS_START) return ONIGERR_INVALID_CODE_POINT_VALUE; + if (PEND) return ONIGERR_INVALID_CODE_POINT_VALUE; + end_digit = FALSE; + state = CPS_RANGE; + goto start; + } + else if (end_digit == TRUE) { + if (base == 16) { + if (IS_CODE_XDIGIT_ASCII(enc, c)) + return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; + } + else if (base == 8) { + if (IS_CODE_DIGIT_ASCII(enc, c) && c < '8') + return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; + } + + return ONIGERR_INVALID_CODE_POINT_VALUE; + } + + if (c == '}') goto end_char; + if (c == '-' && in_cc == TRUE) goto range; + + PUNFETCH; + r = scan_number_of_base(&p, end, 1, enc, &code, base); + if (r != 0) return r; + n++; + end_digit = TRUE; + state = (state == CPS_RANGE) ? CPS_EMPTY : CPS_START; + } + + return ONIGERR_INVALID_CODE_POINT_VALUE; +} + +static int +get_next_code_point(UChar** src, UChar* end, int base, OnigEncoding enc, int in_cc, OnigCodePoint* rcode) +{ + int r; + OnigCodePoint c; + UChar* p = *src; + PFETCH_READY; + + while (! PEND) { + PFETCH(c); + if (! IS_CODE_POINT_DIVIDE(c)) break; + } + if (IS_CODE_POINT_DIVIDE(c)) + return ONIGERR_INVALID_CODE_POINT_VALUE; + + if (c == '}') { + *src = p; + return 1; /* end of sequence */ + } + else if (c == '-' && in_cc == TRUE) { + *src = p; + return 2; /* range */ + } + + PUNFETCH; + r = scan_number_of_base(&p, end, 1, enc, rcode, base); + if (r != 0) return r; + + *src = p; + return ONIG_NORMAL; +} + #define BB_WRITE_CODE_POINT(bbuf,pos,code) \ BB_WRITE(bbuf, pos, &(code), SIZE_CODE_POINT) @@ -4229,8 +4352,9 @@ enum TokenSyms { typedef struct { enum TokenSyms type; + int code_point_continue; int escaped; - int base; /* is number: 8, 16 (used in [....]) */ + int base_num; /* is number: 8, 16 (used in [....]) */ UChar* backp; union { UChar* s; @@ -4267,6 +4391,11 @@ typedef struct { } u; } PToken; +static void +ptoken_init(PToken* tok) +{ + tok->code_point_continue = 0; +} static int fetch_interval(UChar** src, UChar* end, PToken* tok, ScanEnv* env) @@ -4836,7 +4965,7 @@ str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to, } static int -fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env) +fetch_token_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env) { int r; OnigCodePoint code; @@ -4847,6 +4976,24 @@ fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env) UChar* p = *src; PFETCH_READY; + if (tok->code_point_continue != 0) { + r = get_next_code_point(&p, end, tok->base_num, enc, TRUE, &code); + if (r == 1) { + tok->code_point_continue = 0; + } + else if (r == 2) { + tok->type = TK_CC_RANGE; + goto end; + } + else if (r == 0) { + tok->type = TK_CODE_POINT; + tok->u.code = code; + goto end; + } + else + return r; /* error */ + } + if (PEND) { tok->type = TK_EOT; return tok->type; @@ -4854,9 +5001,9 @@ fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env) PFETCH(c); tok->type = TK_CHAR; - tok->base = 0; - tok->u.code = c; - tok->escaped = 0; + tok->base_num = 0; + tok->u.code = c; + tok->escaped = 0; if (c == ']') { tok->type = TK_CC_CLOSE; @@ -4953,16 +5100,8 @@ fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env) return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; } - if (p > prev + enclen(enc, prev) && !PEND && (PPEEK_IS('}'))) { - PINC; - tok->type = TK_CODE_POINT; - tok->base = 8; - tok->u.code = code; - } - else { - /* can't read nothing or invalid format */ - p = prev; - } + tok->base_num = 8; + goto brace_code_point_entry; } break; @@ -4980,10 +5119,20 @@ fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env) return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; } - if (p > prev + enclen(enc, prev) && !PEND && (PPEEK_IS('}'))) { - PINC; + tok->base_num = 16; + brace_code_point_entry: + if ((p > prev + enclen(enc, prev))) { + if (PEND) return ONIGERR_INVALID_CODE_POINT_VALUE; + if (PPEEK_IS('}')) { + PINC; + } + else { + r = check_code_point_sequence(p, end, tok->base_num, enc, TRUE); + if (r < 0) return r; + if (r == 0) return ONIGERR_INVALID_CODE_POINT_VALUE; + tok->code_point_continue = TRUE; + } tok->type = TK_CODE_POINT; - tok->base = 16; tok->u.code = code; } else { @@ -4998,8 +5147,8 @@ fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env) code = 0; /* but, it's not error */ } tok->type = TK_CRUDE_BYTE; - tok->base = 16; - tok->u.byte = (UChar )code; + tok->base_num = 16; + tok->u.byte = (UChar )code; } break; @@ -5013,9 +5162,9 @@ fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env) if (p == prev) { /* can't read nothing. */ code = 0; /* but, it's not error */ } - tok->type = TK_CODE_POINT; - tok->base = 16; - tok->u.code = code; + tok->type = TK_CODE_POINT; + tok->base_num = 16; + tok->u.code = code; } break; @@ -5031,8 +5180,8 @@ fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env) code = 0; /* but, it's not error */ } tok->type = TK_CRUDE_BYTE; - tok->base = 8; - tok->u.byte = (UChar )code; + tok->base_num = 8; + tok->u.byte = (UChar )code; } break; @@ -5096,15 +5245,29 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env) UChar* p = *src; PFETCH_READY; + if (tok->code_point_continue != 0) { + r = get_next_code_point(&p, end, tok->base_num, enc, FALSE, &code); + if (r == 1) { + tok->code_point_continue = 0; + } + else if (r == 0) { + tok->type = TK_CODE_POINT; + tok->u.code = code; + goto out; + } + else + return r; /* error */ + } + start: if (PEND) { tok->type = TK_EOT; return tok->type; } - tok->type = TK_STRING; - tok->base = 0; - tok->backp = p; + tok->type = TK_STRING; + tok->base_num = 0; + tok->backp = p; PFETCH(c); if (IS_MC_ESC_CODE(c, syn)) { @@ -5359,15 +5522,8 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env) return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; } - if ((p > prev + enclen(enc, prev)) && !PEND && PPEEK_IS('}')) { - PINC; - tok->type = TK_CODE_POINT; - tok->u.code = code; - } - else { - /* can't read nothing or invalid format */ - p = prev; - } + tok->base_num = 8; + goto brace_code_point_entry; } break; @@ -5384,8 +5540,19 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env) return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; } - if ((p > prev + enclen(enc, prev)) && !PEND && PPEEK_IS('}')) { - PINC; + tok->base_num = 16; + brace_code_point_entry: + if ((p > prev + enclen(enc, prev))) { + if (PEND) return ONIGERR_INVALID_CODE_POINT_VALUE; + if (PPEEK_IS('}')) { + PINC; + } + else { + r = check_code_point_sequence(p, end, tok->base_num, enc, FALSE); + if (r < 0) return r; + if (r == 0) return ONIGERR_INVALID_CODE_POINT_VALUE; + tok->code_point_continue = TRUE; + } tok->type = TK_CODE_POINT; tok->u.code = code; } @@ -5401,8 +5568,8 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env) code = 0; /* but, it's not error */ } tok->type = TK_CRUDE_BYTE; - tok->base = 16; - tok->u.byte = (UChar )code; + tok->base_num = 16; + tok->u.byte = (UChar )code; } break; @@ -5416,9 +5583,9 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env) if (p == prev) { /* can't read nothing. */ code = 0; /* but, it's not error */ } - tok->type = TK_CODE_POINT; - tok->base = 16; - tok->u.code = code; + tok->type = TK_CODE_POINT; + tok->base_num = 16; + tok->u.code = code; } break; @@ -5466,8 +5633,8 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env) code = 0; /* but, it's not error */ } tok->type = TK_CRUDE_BYTE; - tok->base = 8; - tok->u.byte = (UChar )code; + tok->base_num = 8; + tok->u.byte = (UChar )code; } else if (c != '0') { PINC; @@ -5863,9 +6030,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env) } } -#ifdef USE_VARIABLE_META_CHARS out: -#endif *src = p; return tok->type; } @@ -6416,10 +6581,10 @@ parse_cc(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env) INC_PARSE_DEPTH(env->parse_depth); prev_cc = (CClassNode* )NULL; - r = fetch_token_in_cc(tok, src, end, env); + r = fetch_token_cc(tok, src, end, env); if (r == TK_CHAR && tok->u.code == (OnigCodePoint )'^' && tok->escaped == 0) { neg = 1; - r = fetch_token_in_cc(tok, src, end, env); + r = fetch_token_cc(tok, src, end, env); } else { neg = 0; @@ -6461,19 +6626,19 @@ parse_cc(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env) break; case TK_CRUDE_BYTE: - /* tok->base != 0 : octal or hexadec. */ - if (! ONIGENC_IS_SINGLEBYTE(env->enc) && tok->base != 0) { + /* tok->base_num != 0 : octal or hexadec. */ + if (! ONIGENC_IS_SINGLEBYTE(env->enc) && tok->base_num != 0) { int i, j; UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; UChar* bufe = buf + ONIGENC_CODE_TO_MBC_MAXLEN; UChar* psave = p; - int base = tok->base; + int base_num = tok->base_num; buf[0] = tok->u.byte; for (i = 1; i < ONIGENC_MBC_MAXLEN(env->enc); i++) { - r = fetch_token_in_cc(tok, &p, end, env); + r = fetch_token_cc(tok, &p, end, env); if (r < 0) goto err; - if (r != TK_CRUDE_BYTE || tok->base != base) { + if (r != TK_CRUDE_BYTE || tok->base_num != base_num) { fetched = 1; break; } @@ -6496,7 +6661,7 @@ parse_cc(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env) else if (i > len) { /* fetch back */ p = psave; for (i = 1; i < len; i++) { - r = fetch_token_in_cc(tok, &p, end, env); + r = fetch_token_cc(tok, &p, end, env); } fetched = 0; } @@ -6577,7 +6742,7 @@ parse_cc(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env) case TK_CC_RANGE: if (state == CS_VALUE) { - r = fetch_token_in_cc(tok, &p, end, env); + r = fetch_token_cc(tok, &p, end, env); if (r < 0) goto err; fetched = 1; @@ -6604,7 +6769,7 @@ parse_cc(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env) in_code = tok->u.code; in_raw = 0; - r = fetch_token_in_cc(tok, &p, end, env); + r = fetch_token_cc(tok, &p, end, env); if (r < 0) goto err; fetched = 1; @@ -6619,7 +6784,7 @@ parse_cc(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env) goto any_char_in; /* [!--] is allowed */ } else { /* CS_COMPLETE */ - r = fetch_token_in_cc(tok, &p, end, env); + r = fetch_token_cc(tok, &p, end, env); if (r < 0) goto err; fetched = 1; @@ -6702,7 +6867,7 @@ parse_cc(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env) if (fetched) r = tok->type; else { - r = fetch_token_in_cc(tok, &p, end, env); + r = fetch_token_cc(tok, &p, end, env); if (r < 0) goto err; } } @@ -8587,6 +8752,7 @@ parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env) int r; PToken tok; + ptoken_init(&tok); r = fetch_token(&tok, src, end, env); if (r < 0) return r; r = parse_alts(top, &tok, TK_EOT, src, end, env, FALSE); diff --git a/oniguruma/src/unicode_egcb_data.c b/oniguruma/src/unicode_egcb_data.c index ffe1e2f69..0935bb45f 100644 --- a/oniguruma/src/unicode_egcb_data.c +++ b/oniguruma/src/unicode_egcb_data.c @@ -1,6 +1,6 @@ /* unicode_egcb_data.c: Generated by make_unicode_egcb_data.py. */ /*- - * Copyright (c) 2017-2019 K.Kosako + * Copyright (c) 2017-2020 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/oniguruma/src/unicode_property_data.c b/oniguruma/src/unicode_property_data.c index 37df9c325..3b645c64c 100644 --- a/oniguruma/src/unicode_property_data.c +++ b/oniguruma/src/unicode_property_data.c @@ -33,6 +33,32 @@ /* Generated by make_unicode_property_data.py. */ +/*- + * Copyright (c) 2016-2020 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + /* PROPERTY: 'NEWLINE': POSIX [[:NEWLINE:]] */ static const OnigCodePoint diff --git a/oniguruma/src/unicode_property_data_posix.c b/oniguruma/src/unicode_property_data_posix.c index 5a139503c..b050ff2f4 100644 --- a/oniguruma/src/unicode_property_data_posix.c +++ b/oniguruma/src/unicode_property_data_posix.c @@ -33,6 +33,32 @@ /* Generated by make_unicode_property_data.py. */ +/*- + * Copyright (c) 2016-2020 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + /* PROPERTY: 'NEWLINE': POSIX [[:NEWLINE:]] */ static const OnigCodePoint diff --git a/oniguruma/src/unicode_wb_data.c b/oniguruma/src/unicode_wb_data.c index 341e77883..ce082a217 100644 --- a/oniguruma/src/unicode_wb_data.c +++ b/oniguruma/src/unicode_wb_data.c @@ -1,6 +1,6 @@ /* unicode_wb_data.c: Generated by make_unicode_wb_data.py. */ /*- - * Copyright (c) 2019 K.Kosako + * Copyright (c) 2019-2020 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/src/VersionEx.h b/src/VersionEx.h index e22fb7ece..2a2e662f1 100644 --- a/src/VersionEx.h +++ b/src/VersionEx.h @@ -15,5 +15,5 @@ #define UCHARDET_VER 2018.09.27 #define TINYEXPR_VER 2018.05.11 #define UTHASH_VER 2.1.0 -#define VERSION_PATCH BETA -#define VERSION_COMMIT_ID dkt1-amr +#define VERSION_PATCH RC3 +#define VERSION_COMMIT_ID t7820-rk