Merge branch 'master' into mui

This commit is contained in:
Pairi Daiza 2020-03-31 15:48:06 +02:00 committed by GitHub
commit 9f63fe5f88
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 398 additions and 81 deletions

View File

@ -1,9 +1,26 @@
History
2020/04/03: Release Candidate 1 for Version 6.9.5
2020/03/30: remove src/*.py and src/*.sh from distribution files
2020/03/27: NEW: Code point sequence notation \x{HHHH ...}, \o{OOOO ...}
2020/03/24: NEW API: maximum nesting level of subexp call
2020/03/22: #165: change enable-posix-api default from YES to NO
2020/03/15: update Unicode version to 13.0.0
2020/03/10: add test_back.c
2020/03/08: tune output of debug in print_optimize_info()
2020/03/02: fix #186: Allow regset search to succeed at end of string
2020/02/13: NEW API: retry-limit-in-search functions
2020/01/20: add ONIG_SYN_VARIABLE_LEN_LOOK_BEHIND flag
2019/12/27: add USE_REGSET switch
2019/12/20: remove OPTIMIZE_STR_CASE_FOLD
2019/12/13: add test/test_syntax.c
2019/12/13: add ONIG_SYN_ISOLATED_OPTION_CONTINUE_BRANCH flag
2019/11/29: Version 6.9.4
2019/11/22: Release Candidate 3 for Version 6.9.4
2019/11/20: fix a problem found by libFuzzer test
2019/11/14: Release Candidate 2 for Version 6.9.4
2019/11/12: fix integer overflow by nested quantifier

View File

@ -30,9 +30,11 @@ Supported character encodings:
Master branch
-------------
* POSIX API disabled by default for Unix (* Enabled by: configure --enable-posix-api=yes)
* Update Unicode version 13.0.0
* NEW: Code point sequence notation \x{HHHH HHHH ...}, \o{OOOO OOOO ...}
* NEW API: retry limit in search functions
* Limit on maximum nesting level of subexp call (16)
* NEW API: maximum nesting level of subexp call
* Fixed behavior of isolated options in Perl and Java syntaxes. /...(?i).../
@ -225,7 +227,16 @@ Install
(I have checked by Visual Studio Community 2015)
Alternatively, you can build and install oniguruma using [vcpkg](https://github.com/microsoft/vcpkg/) dependency manager:
1. git clone https://github.com/Microsoft/vcpkg.git
2. cd vcpkg
3. ./bootstrap-vcpkg.bat
4. ./vcpkg integrate install
5. ./vcpkg install oniguruma
The oniguruma port in vcpkg is kept up to date by microsoft team members and community contributors.
If the version is out of date, please [create an issue or pull request](https://github.com/Microsoft/vcpkg) on the vcpkg repository.
Regular Expressions
-------------------

View File

@ -1,4 +1,4 @@
Oniguruma API Version 6.9.5 2020/02/19
Oniguruma API Version 6.9.5 2020/03/25
#include <oniguruma.h>
@ -611,8 +611,8 @@ Oniguruma API Version 6.9.5 2020/02/19
# int onig_foreach_name(regex_t* reg,
int (*func)(const UChar*, const UChar*, int,int*,regex_t*,void*),
void* arg)
int (*func)(const UChar*, const UChar*, int,int*,regex_t*,void*),
void* arg)
Iterate function call for all names.
@ -908,6 +908,21 @@ Oniguruma API Version 6.9.5 2020/02/19
normal return: ONIG_NORMAL
# int onig_get_subexp_call_max_nest_level(void)
Return the limit of subexp call nest level.
(default: 24)
normal return: current limit value
# int onig_set_subexp_call_max_nest_level(int max_level)
Set a limit level of subexp call nest level.
normal return: ONIG_NORMAL
# OnigCalloutFunc onig_get_progress_callout(void)
Get a function for callouts of contents in progress.

View File

@ -1,4 +1,4 @@
Oniguruma Regular Expressions Version 6.9.5 2020/01/28
Oniguruma Regular Expressions Version 6.9.5 2020/03/27
syntax: ONIG_SYNTAX_ONIGURUMA (default)
@ -34,6 +34,15 @@ syntax: ONIG_SYNTAX_ONIGURUMA (default)
(* \b as backspace is effective in character class only)
2.1 Code point sequences
Hexadecimal code point (1-8 digits)
\x{7HHHHHHH 7HHHHHHH ... 7HHHHHHH}
Octal code point (1-11 digits)
\o{17777777777 17777777777 ... 17777777777}
3. Character types
. any character (except newline)

View File

@ -1,7 +1,7 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# make_unicode_egcb_data.py
# Copyright (c) 2017-2019 K.Kosako
# Copyright (c) 2017-2020 K.Kosako
import sys
import re
@ -195,7 +195,7 @@ PROPS = sorted(PROPS)
print '/* unicode_egcb_data.c: Generated by make_unicode_egcb_data.py. */'
COPYRIGHT = '''
/*-
* Copyright (c) 2017-2019 K.Kosako
* Copyright (c) 2017-2020 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without

View File

@ -418,10 +418,39 @@ def parse_and_merge_properties(path, klass, prop_prefix = None, version_reg = No
merge_props(PROPS, props)
return dic, props, ver_m
### main ###
argv = sys.argv
argc = len(argv)
COPYRIGHT = '''
/*-
* Copyright (c) 2016-2020 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
'''.strip()
POSIX_ONLY = False
INCLUDE_GRAPHEME_CLUSTER_DATA = False
@ -485,10 +514,14 @@ if INCLUDE_GRAPHEME_CLUSTER_DATA:
add_posix_props(DIC)
PROPS = sorted(PROPS)
s = '''%{
/* Generated by make_unicode_property_data.py. */
'''
print s
print COPYRIGHT
print ''
for prop in POSIX_LIST:
print_property(prop, DIC[prop], "POSIX [[:%s:]]" % prop)

View File

@ -935,6 +935,10 @@ int onig_set_capture_num_limit P_((int num));
ONIG_EXTERN
int onig_set_parse_depth_limit P_((unsigned int depth));
ONIG_EXTERN
int onig_get_subexp_call_max_nest_level P_((void));
ONIG_EXTERN
int onig_set_subexp_call_max_nest_level P_((int level));
ONIG_EXTERN
int onig_unicode_define_user_property P_((const char* name, OnigCodePoint* ranges));
ONIG_EXTERN
int onig_end P_((void));

View File

@ -1221,7 +1221,7 @@ struct OnigCalloutArgsStruct {
#define RETRY_IN_MATCH_ARG_INIT(msa,mpv)
#endif
#if defined(USE_CALL) && defined(SUBEXP_CALL_MAX_NEST_LEVEL)
#if defined(USE_CALL)
#define POP_CALL else if (stk->type == STK_RETURN) {subexp_call_nest_counter++;} else if (stk->type == STK_CALL_FRAME) {subexp_call_nest_counter--;}
#else
#define POP_CALL
@ -2541,6 +2541,7 @@ backref_check_at_nested_level(regex_t* reg,
}
#endif /* USE_BACKREF_WITH_LEVEL */
static int SubexpCallMaxNestLevel = DEFAULT_SUBEXP_CALL_MAX_NEST_LEVEL;
#ifdef ONIG_DEBUG_STATISTICS
@ -2867,7 +2868,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
OnigEncoding encode = reg->enc;
OnigCaseFoldType case_fold_flag = reg->case_fold_flag;
#if defined(USE_CALL) && defined(SUBEXP_CALL_MAX_NEST_LEVEL)
#ifdef USE_CALL
unsigned long subexp_call_nest_counter = 0;
#endif
@ -3189,7 +3190,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE_OP(CCLASS)
DATA_ENSURE(1);
if (BITSET_AT(p->cclass.bsp, *s) == 0) goto fail;
s++;
s += enclen(encode, s);
INC_OP;
JUMP_OUT_WITH_SPREV_SET;
@ -4046,11 +4047,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
#ifdef USE_CALL
CASE_OP(CALL)
#ifdef SUBEXP_CALL_MAX_NEST_LEVEL
if (subexp_call_nest_counter == SUBEXP_CALL_MAX_NEST_LEVEL)
if (subexp_call_nest_counter == SubexpCallMaxNestLevel)
goto fail;
subexp_call_nest_counter++;
#endif
addr = p->call.addr;
INC_OP; STACK_PUSH_CALL_FRAME(p);
p = reg->ops + addr;
@ -4060,9 +4059,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE_OP(RETURN)
STACK_RETURN(p);
STACK_PUSH_RETURN;
#ifdef SUBEXP_CALL_MAX_NEST_LEVEL
subexp_call_nest_counter--;
#endif
JUMP_OUT;
#endif
@ -5687,6 +5684,19 @@ onig_scan(regex_t* reg, const UChar* str, const UChar* end,
return n;
}
extern int
onig_get_subexp_call_max_nest_level(void)
{
return SubexpCallMaxNestLevel;
}
extern int
onig_set_subexp_call_max_nest_level(int level)
{
SubexpCallMaxNestLevel = level;
return 0;
}
extern OnigEncoding
onig_get_encoding(regex_t* reg)
{

View File

@ -88,7 +88,7 @@
#define DEFAULT_RETRY_LIMIT_IN_MATCH 10000000
#define DEFAULT_RETRY_LIMIT_IN_SEARCH 0 /* unlimited */
#define DEFAULT_PARSE_DEPTH_LIMIT 4096
#define SUBEXP_CALL_MAX_NEST_LEVEL 16
#define DEFAULT_SUBEXP_CALL_MAX_NEST_LEVEL 20
#include "regenc.h"

View File

@ -3544,6 +3544,129 @@ scan_octal_number(UChar** src, UChar* end, int minlen, int maxlen,
return ONIG_NORMAL;
}
static int
scan_number_of_base(UChar** src, UChar* end, int minlen,
OnigEncoding enc, OnigCodePoint* rcode, int base)
{
int r;
if (base == 16)
r = scan_hexadecimal_number(src, end, minlen, 8, enc, rcode);
else if (base == 8)
r = scan_octal_number(src, end, minlen, 11, enc, rcode);
else
r = ONIGERR_INVALID_CODE_POINT_VALUE;
return r;
}
#define IS_CODE_POINT_DIVIDE(c) ((c) == ' ' || (c) == '\n')
enum CPS_STATE {
CPS_EMPTY = 0,
CPS_START = 1,
CPS_RANGE = 2
};
static int
check_code_point_sequence(UChar* p, UChar* end, int base, OnigEncoding enc,
int in_cc)
{
int r;
int n;
int end_digit;
int state;
OnigCodePoint code;
OnigCodePoint c;
PFETCH_READY;
end_digit = FALSE;
state = CPS_START;
n = 0;
while (! PEND) {
start:
PFETCH(c);
if (c == '}') {
end_char:
if (state == CPS_RANGE) return ONIGERR_INVALID_CODE_POINT_VALUE;
return n;
}
if (IS_CODE_POINT_DIVIDE(c)) {
while (! PEND) {
PFETCH(c);
if (! IS_CODE_POINT_DIVIDE(c)) break;
}
if (IS_CODE_POINT_DIVIDE(c))
return ONIGERR_INVALID_CODE_POINT_VALUE;
}
else if (c == '-' && in_cc == TRUE) {
range:
if (state != CPS_START) return ONIGERR_INVALID_CODE_POINT_VALUE;
if (PEND) return ONIGERR_INVALID_CODE_POINT_VALUE;
end_digit = FALSE;
state = CPS_RANGE;
goto start;
}
else if (end_digit == TRUE) {
if (base == 16) {
if (IS_CODE_XDIGIT_ASCII(enc, c))
return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
}
else if (base == 8) {
if (IS_CODE_DIGIT_ASCII(enc, c) && c < '8')
return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
}
return ONIGERR_INVALID_CODE_POINT_VALUE;
}
if (c == '}') goto end_char;
if (c == '-' && in_cc == TRUE) goto range;
PUNFETCH;
r = scan_number_of_base(&p, end, 1, enc, &code, base);
if (r != 0) return r;
n++;
end_digit = TRUE;
state = (state == CPS_RANGE) ? CPS_EMPTY : CPS_START;
}
return ONIGERR_INVALID_CODE_POINT_VALUE;
}
static int
get_next_code_point(UChar** src, UChar* end, int base, OnigEncoding enc, int in_cc, OnigCodePoint* rcode)
{
int r;
OnigCodePoint c;
UChar* p = *src;
PFETCH_READY;
while (! PEND) {
PFETCH(c);
if (! IS_CODE_POINT_DIVIDE(c)) break;
}
if (IS_CODE_POINT_DIVIDE(c))
return ONIGERR_INVALID_CODE_POINT_VALUE;
if (c == '}') {
*src = p;
return 1; /* end of sequence */
}
else if (c == '-' && in_cc == TRUE) {
*src = p;
return 2; /* range */
}
PUNFETCH;
r = scan_number_of_base(&p, end, 1, enc, rcode, base);
if (r != 0) return r;
*src = p;
return ONIG_NORMAL;
}
#define BB_WRITE_CODE_POINT(bbuf,pos,code) \
BB_WRITE(bbuf, pos, &(code), SIZE_CODE_POINT)
@ -4229,8 +4352,9 @@ enum TokenSyms {
typedef struct {
enum TokenSyms type;
int code_point_continue;
int escaped;
int base; /* is number: 8, 16 (used in [....]) */
int base_num; /* is number: 8, 16 (used in [....]) */
UChar* backp;
union {
UChar* s;
@ -4267,6 +4391,11 @@ typedef struct {
} u;
} PToken;
static void
ptoken_init(PToken* tok)
{
tok->code_point_continue = 0;
}
static int
fetch_interval(UChar** src, UChar* end, PToken* tok, ScanEnv* env)
@ -4836,7 +4965,7 @@ str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to,
}
static int
fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
fetch_token_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
{
int r;
OnigCodePoint code;
@ -4847,6 +4976,24 @@ fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
UChar* p = *src;
PFETCH_READY;
if (tok->code_point_continue != 0) {
r = get_next_code_point(&p, end, tok->base_num, enc, TRUE, &code);
if (r == 1) {
tok->code_point_continue = 0;
}
else if (r == 2) {
tok->type = TK_CC_RANGE;
goto end;
}
else if (r == 0) {
tok->type = TK_CODE_POINT;
tok->u.code = code;
goto end;
}
else
return r; /* error */
}
if (PEND) {
tok->type = TK_EOT;
return tok->type;
@ -4854,9 +5001,9 @@ fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
PFETCH(c);
tok->type = TK_CHAR;
tok->base = 0;
tok->u.code = c;
tok->escaped = 0;
tok->base_num = 0;
tok->u.code = c;
tok->escaped = 0;
if (c == ']') {
tok->type = TK_CC_CLOSE;
@ -4953,16 +5100,8 @@ fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
}
if (p > prev + enclen(enc, prev) && !PEND && (PPEEK_IS('}'))) {
PINC;
tok->type = TK_CODE_POINT;
tok->base = 8;
tok->u.code = code;
}
else {
/* can't read nothing or invalid format */
p = prev;
}
tok->base_num = 8;
goto brace_code_point_entry;
}
break;
@ -4980,10 +5119,20 @@ fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
}
if (p > prev + enclen(enc, prev) && !PEND && (PPEEK_IS('}'))) {
PINC;
tok->base_num = 16;
brace_code_point_entry:
if ((p > prev + enclen(enc, prev))) {
if (PEND) return ONIGERR_INVALID_CODE_POINT_VALUE;
if (PPEEK_IS('}')) {
PINC;
}
else {
r = check_code_point_sequence(p, end, tok->base_num, enc, TRUE);
if (r < 0) return r;
if (r == 0) return ONIGERR_INVALID_CODE_POINT_VALUE;
tok->code_point_continue = TRUE;
}
tok->type = TK_CODE_POINT;
tok->base = 16;
tok->u.code = code;
}
else {
@ -4998,8 +5147,8 @@ fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
code = 0; /* but, it's not error */
}
tok->type = TK_CRUDE_BYTE;
tok->base = 16;
tok->u.byte = (UChar )code;
tok->base_num = 16;
tok->u.byte = (UChar )code;
}
break;
@ -5013,9 +5162,9 @@ fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
if (p == prev) { /* can't read nothing. */
code = 0; /* but, it's not error */
}
tok->type = TK_CODE_POINT;
tok->base = 16;
tok->u.code = code;
tok->type = TK_CODE_POINT;
tok->base_num = 16;
tok->u.code = code;
}
break;
@ -5031,8 +5180,8 @@ fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
code = 0; /* but, it's not error */
}
tok->type = TK_CRUDE_BYTE;
tok->base = 8;
tok->u.byte = (UChar )code;
tok->base_num = 8;
tok->u.byte = (UChar )code;
}
break;
@ -5096,15 +5245,29 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
UChar* p = *src;
PFETCH_READY;
if (tok->code_point_continue != 0) {
r = get_next_code_point(&p, end, tok->base_num, enc, FALSE, &code);
if (r == 1) {
tok->code_point_continue = 0;
}
else if (r == 0) {
tok->type = TK_CODE_POINT;
tok->u.code = code;
goto out;
}
else
return r; /* error */
}
start:
if (PEND) {
tok->type = TK_EOT;
return tok->type;
}
tok->type = TK_STRING;
tok->base = 0;
tok->backp = p;
tok->type = TK_STRING;
tok->base_num = 0;
tok->backp = p;
PFETCH(c);
if (IS_MC_ESC_CODE(c, syn)) {
@ -5359,15 +5522,8 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
}
if ((p > prev + enclen(enc, prev)) && !PEND && PPEEK_IS('}')) {
PINC;
tok->type = TK_CODE_POINT;
tok->u.code = code;
}
else {
/* can't read nothing or invalid format */
p = prev;
}
tok->base_num = 8;
goto brace_code_point_entry;
}
break;
@ -5384,8 +5540,19 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
}
if ((p > prev + enclen(enc, prev)) && !PEND && PPEEK_IS('}')) {
PINC;
tok->base_num = 16;
brace_code_point_entry:
if ((p > prev + enclen(enc, prev))) {
if (PEND) return ONIGERR_INVALID_CODE_POINT_VALUE;
if (PPEEK_IS('}')) {
PINC;
}
else {
r = check_code_point_sequence(p, end, tok->base_num, enc, FALSE);
if (r < 0) return r;
if (r == 0) return ONIGERR_INVALID_CODE_POINT_VALUE;
tok->code_point_continue = TRUE;
}
tok->type = TK_CODE_POINT;
tok->u.code = code;
}
@ -5401,8 +5568,8 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
code = 0; /* but, it's not error */
}
tok->type = TK_CRUDE_BYTE;
tok->base = 16;
tok->u.byte = (UChar )code;
tok->base_num = 16;
tok->u.byte = (UChar )code;
}
break;
@ -5416,9 +5583,9 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
if (p == prev) { /* can't read nothing. */
code = 0; /* but, it's not error */
}
tok->type = TK_CODE_POINT;
tok->base = 16;
tok->u.code = code;
tok->type = TK_CODE_POINT;
tok->base_num = 16;
tok->u.code = code;
}
break;
@ -5466,8 +5633,8 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
code = 0; /* but, it's not error */
}
tok->type = TK_CRUDE_BYTE;
tok->base = 8;
tok->u.byte = (UChar )code;
tok->base_num = 8;
tok->u.byte = (UChar )code;
}
else if (c != '0') {
PINC;
@ -5863,9 +6030,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
}
}
#ifdef USE_VARIABLE_META_CHARS
out:
#endif
*src = p;
return tok->type;
}
@ -6416,10 +6581,10 @@ parse_cc(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)
INC_PARSE_DEPTH(env->parse_depth);
prev_cc = (CClassNode* )NULL;
r = fetch_token_in_cc(tok, src, end, env);
r = fetch_token_cc(tok, src, end, env);
if (r == TK_CHAR && tok->u.code == (OnigCodePoint )'^' && tok->escaped == 0) {
neg = 1;
r = fetch_token_in_cc(tok, src, end, env);
r = fetch_token_cc(tok, src, end, env);
}
else {
neg = 0;
@ -6461,19 +6626,19 @@ parse_cc(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)
break;
case TK_CRUDE_BYTE:
/* tok->base != 0 : octal or hexadec. */
if (! ONIGENC_IS_SINGLEBYTE(env->enc) && tok->base != 0) {
/* tok->base_num != 0 : octal or hexadec. */
if (! ONIGENC_IS_SINGLEBYTE(env->enc) && tok->base_num != 0) {
int i, j;
UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
UChar* bufe = buf + ONIGENC_CODE_TO_MBC_MAXLEN;
UChar* psave = p;
int base = tok->base;
int base_num = tok->base_num;
buf[0] = tok->u.byte;
for (i = 1; i < ONIGENC_MBC_MAXLEN(env->enc); i++) {
r = fetch_token_in_cc(tok, &p, end, env);
r = fetch_token_cc(tok, &p, end, env);
if (r < 0) goto err;
if (r != TK_CRUDE_BYTE || tok->base != base) {
if (r != TK_CRUDE_BYTE || tok->base_num != base_num) {
fetched = 1;
break;
}
@ -6496,7 +6661,7 @@ parse_cc(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)
else if (i > len) { /* fetch back */
p = psave;
for (i = 1; i < len; i++) {
r = fetch_token_in_cc(tok, &p, end, env);
r = fetch_token_cc(tok, &p, end, env);
}
fetched = 0;
}
@ -6577,7 +6742,7 @@ parse_cc(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)
case TK_CC_RANGE:
if (state == CS_VALUE) {
r = fetch_token_in_cc(tok, &p, end, env);
r = fetch_token_cc(tok, &p, end, env);
if (r < 0) goto err;
fetched = 1;
@ -6604,7 +6769,7 @@ parse_cc(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)
in_code = tok->u.code;
in_raw = 0;
r = fetch_token_in_cc(tok, &p, end, env);
r = fetch_token_cc(tok, &p, end, env);
if (r < 0) goto err;
fetched = 1;
@ -6619,7 +6784,7 @@ parse_cc(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)
goto any_char_in; /* [!--] is allowed */
}
else { /* CS_COMPLETE */
r = fetch_token_in_cc(tok, &p, end, env);
r = fetch_token_cc(tok, &p, end, env);
if (r < 0) goto err;
fetched = 1;
@ -6702,7 +6867,7 @@ parse_cc(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)
if (fetched)
r = tok->type;
else {
r = fetch_token_in_cc(tok, &p, end, env);
r = fetch_token_cc(tok, &p, end, env);
if (r < 0) goto err;
}
}
@ -8587,6 +8752,7 @@ parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env)
int r;
PToken tok;
ptoken_init(&tok);
r = fetch_token(&tok, src, end, env);
if (r < 0) return r;
r = parse_alts(top, &tok, TK_EOT, src, end, env, FALSE);

View File

@ -1,6 +1,6 @@
/* unicode_egcb_data.c: Generated by make_unicode_egcb_data.py. */
/*-
* Copyright (c) 2017-2019 K.Kosako
* Copyright (c) 2017-2020 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without

View File

@ -33,6 +33,32 @@
/* Generated by make_unicode_property_data.py. */
/*-
* Copyright (c) 2016-2020 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/* PROPERTY: 'NEWLINE': POSIX [[:NEWLINE:]] */
static const OnigCodePoint

View File

@ -33,6 +33,32 @@
/* Generated by make_unicode_property_data.py. */
/*-
* Copyright (c) 2016-2020 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/* PROPERTY: 'NEWLINE': POSIX [[:NEWLINE:]] */
static const OnigCodePoint

View File

@ -1,6 +1,6 @@
/* unicode_wb_data.c: Generated by make_unicode_wb_data.py. */
/*-
* Copyright (c) 2019 K.Kosako
* Copyright (c) 2019-2020 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without

View File

@ -15,5 +15,5 @@
#define UCHARDET_VER 2018.09.27
#define TINYEXPR_VER 2018.05.11
#define UTHASH_VER 2.1.0
#define VERSION_PATCH BETA
#define VERSION_COMMIT_ID dkt1-amr
#define VERSION_PATCH RC3
#define VERSION_COMMIT_ID t7820-rk