mirror of
https://github.com/rizonesoft/Notepad3.git
synced 2026-06-11 21:03:05 +08:00
Merge pull request #2182 from RaiKoHoff/RC2_DEV
Update Oniguruma RegEx RC1 for v6.9.5
This commit is contained in:
commit
e77adc6625
@ -1 +1 @@
|
||||
330
|
||||
331
|
||||
|
||||
@ -1,9 +1,26 @@
|
||||
History
|
||||
|
||||
2020/04/03: Release Candidate 1 for Version 6.9.5
|
||||
|
||||
2020/03/30: remove src/*.py and src/*.sh from distribution files
|
||||
2020/03/27: NEW: Code point sequence notation \x{HHHH ...}, \o{OOOO ...}
|
||||
2020/03/24: NEW API: maximum nesting level of subexp call
|
||||
2020/03/22: #165: change enable-posix-api default from YES to NO
|
||||
2020/03/15: update Unicode version to 13.0.0
|
||||
2020/03/10: add test_back.c
|
||||
2020/03/08: tune output of debug in print_optimize_info()
|
||||
2020/03/02: fix #186: Allow regset search to succeed at end of string
|
||||
2020/02/13: NEW API: retry-limit-in-search functions
|
||||
2020/01/20: add ONIG_SYN_VARIABLE_LEN_LOOK_BEHIND flag
|
||||
2019/12/27: add USE_REGSET switch
|
||||
2019/12/20: remove OPTIMIZE_STR_CASE_FOLD
|
||||
2019/12/13: add test/test_syntax.c
|
||||
2019/12/13: add ONIG_SYN_ISOLATED_OPTION_CONTINUE_BRANCH flag
|
||||
|
||||
|
||||
2019/11/29: Version 6.9.4
|
||||
|
||||
2019/11/22: Release Candidate 3 for Version 6.9.4
|
||||
|
||||
2019/11/20: fix a problem found by libFuzzer test
|
||||
2019/11/14: Release Candidate 2 for Version 6.9.4
|
||||
2019/11/12: fix integer overflow by nested quantifier
|
||||
|
||||
@ -30,9 +30,11 @@ Supported character encodings:
|
||||
Master branch
|
||||
-------------
|
||||
|
||||
* POSIX API disabled by default for Unix (* Enabled by: configure --enable-posix-api=yes)
|
||||
* Update Unicode version 13.0.0
|
||||
* NEW: Code point sequence notation \x{HHHH HHHH ...}, \o{OOOO OOOO ...}
|
||||
* NEW API: retry limit in search functions
|
||||
* Limit on maximum nesting level of subexp call (16)
|
||||
* NEW API: maximum nesting level of subexp call
|
||||
* Fixed behavior of isolated options in Perl and Java syntaxes. /...(?i).../
|
||||
|
||||
|
||||
@ -225,7 +227,16 @@ Install
|
||||
|
||||
(I have checked by Visual Studio Community 2015)
|
||||
|
||||
Alternatively, you can build and install oniguruma using [vcpkg](https://github.com/microsoft/vcpkg/) dependency manager:
|
||||
|
||||
1. git clone https://github.com/Microsoft/vcpkg.git
|
||||
2. cd vcpkg
|
||||
3. ./bootstrap-vcpkg.bat
|
||||
4. ./vcpkg integrate install
|
||||
5. ./vcpkg install oniguruma
|
||||
|
||||
The oniguruma port in vcpkg is kept up to date by microsoft team members and community contributors.
|
||||
If the version is out of date, please [create an issue or pull request](https://github.com/Microsoft/vcpkg) on the vcpkg repository.
|
||||
|
||||
Regular Expressions
|
||||
-------------------
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
Oniguruma API Version 6.9.5 2020/02/19
|
||||
Oniguruma API Version 6.9.5 2020/03/25
|
||||
|
||||
#include <oniguruma.h>
|
||||
|
||||
@ -611,8 +611,8 @@ Oniguruma API Version 6.9.5 2020/02/19
|
||||
|
||||
|
||||
# int onig_foreach_name(regex_t* reg,
|
||||
int (*func)(const UChar*, const UChar*, int,int*,regex_t*,void*),
|
||||
void* arg)
|
||||
int (*func)(const UChar*, const UChar*, int,int*,regex_t*,void*),
|
||||
void* arg)
|
||||
|
||||
Iterate function call for all names.
|
||||
|
||||
@ -908,6 +908,21 @@ Oniguruma API Version 6.9.5 2020/02/19
|
||||
normal return: ONIG_NORMAL
|
||||
|
||||
|
||||
# int onig_get_subexp_call_max_nest_level(void)
|
||||
|
||||
Return the limit of subexp call nest level.
|
||||
(default: 24)
|
||||
|
||||
normal return: current limit value
|
||||
|
||||
|
||||
# int onig_set_subexp_call_max_nest_level(int max_level)
|
||||
|
||||
Set a limit level of subexp call nest level.
|
||||
|
||||
normal return: ONIG_NORMAL
|
||||
|
||||
|
||||
# OnigCalloutFunc onig_get_progress_callout(void)
|
||||
|
||||
Get a function for callouts of contents in progress.
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
Oniguruma Regular Expressions Version 6.9.5 2020/01/28
|
||||
Oniguruma Regular Expressions Version 6.9.5 2020/03/27
|
||||
|
||||
syntax: ONIG_SYNTAX_ONIGURUMA (default)
|
||||
|
||||
@ -34,6 +34,15 @@ syntax: ONIG_SYNTAX_ONIGURUMA (default)
|
||||
(* \b as backspace is effective in character class only)
|
||||
|
||||
|
||||
2.1 Code point sequences
|
||||
|
||||
Hexadecimal code point (1-8 digits)
|
||||
\x{7HHHHHHH 7HHHHHHH ... 7HHHHHHH}
|
||||
|
||||
Octal code point (1-11 digits)
|
||||
\o{17777777777 17777777777 ... 17777777777}
|
||||
|
||||
|
||||
3. Character types
|
||||
|
||||
. any character (except newline)
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
# make_unicode_egcb_data.py
|
||||
# Copyright (c) 2017-2019 K.Kosako
|
||||
# Copyright (c) 2017-2020 K.Kosako
|
||||
|
||||
import sys
|
||||
import re
|
||||
@ -195,7 +195,7 @@ PROPS = sorted(PROPS)
|
||||
print '/* unicode_egcb_data.c: Generated by make_unicode_egcb_data.py. */'
|
||||
COPYRIGHT = '''
|
||||
/*-
|
||||
* Copyright (c) 2017-2019 K.Kosako
|
||||
* Copyright (c) 2017-2020 K.Kosako
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
||||
@ -418,10 +418,39 @@ def parse_and_merge_properties(path, klass, prop_prefix = None, version_reg = No
|
||||
merge_props(PROPS, props)
|
||||
return dic, props, ver_m
|
||||
|
||||
|
||||
### main ###
|
||||
argv = sys.argv
|
||||
argc = len(argv)
|
||||
|
||||
COPYRIGHT = '''
|
||||
/*-
|
||||
* Copyright (c) 2016-2020 K.Kosako
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
'''.strip()
|
||||
|
||||
POSIX_ONLY = False
|
||||
INCLUDE_GRAPHEME_CLUSTER_DATA = False
|
||||
|
||||
@ -485,10 +514,14 @@ if INCLUDE_GRAPHEME_CLUSTER_DATA:
|
||||
add_posix_props(DIC)
|
||||
PROPS = sorted(PROPS)
|
||||
|
||||
|
||||
s = '''%{
|
||||
/* Generated by make_unicode_property_data.py. */
|
||||
'''
|
||||
print s
|
||||
print COPYRIGHT
|
||||
print ''
|
||||
|
||||
for prop in POSIX_LIST:
|
||||
print_property(prop, DIC[prop], "POSIX [[:%s:]]" % prop)
|
||||
|
||||
|
||||
@ -935,6 +935,10 @@ int onig_set_capture_num_limit P_((int num));
|
||||
ONIG_EXTERN
|
||||
int onig_set_parse_depth_limit P_((unsigned int depth));
|
||||
ONIG_EXTERN
|
||||
int onig_get_subexp_call_max_nest_level P_((void));
|
||||
ONIG_EXTERN
|
||||
int onig_set_subexp_call_max_nest_level P_((int level));
|
||||
ONIG_EXTERN
|
||||
int onig_unicode_define_user_property P_((const char* name, OnigCodePoint* ranges));
|
||||
ONIG_EXTERN
|
||||
int onig_end P_((void));
|
||||
|
||||
@ -1221,7 +1221,7 @@ struct OnigCalloutArgsStruct {
|
||||
#define RETRY_IN_MATCH_ARG_INIT(msa,mpv)
|
||||
#endif
|
||||
|
||||
#if defined(USE_CALL) && defined(SUBEXP_CALL_MAX_NEST_LEVEL)
|
||||
#if defined(USE_CALL)
|
||||
#define POP_CALL else if (stk->type == STK_RETURN) {subexp_call_nest_counter++;} else if (stk->type == STK_CALL_FRAME) {subexp_call_nest_counter--;}
|
||||
#else
|
||||
#define POP_CALL
|
||||
@ -2541,6 +2541,7 @@ backref_check_at_nested_level(regex_t* reg,
|
||||
}
|
||||
#endif /* USE_BACKREF_WITH_LEVEL */
|
||||
|
||||
static int SubexpCallMaxNestLevel = DEFAULT_SUBEXP_CALL_MAX_NEST_LEVEL;
|
||||
|
||||
#ifdef ONIG_DEBUG_STATISTICS
|
||||
|
||||
@ -2867,7 +2868,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
||||
OnigEncoding encode = reg->enc;
|
||||
OnigCaseFoldType case_fold_flag = reg->case_fold_flag;
|
||||
|
||||
#if defined(USE_CALL) && defined(SUBEXP_CALL_MAX_NEST_LEVEL)
|
||||
#ifdef USE_CALL
|
||||
unsigned long subexp_call_nest_counter = 0;
|
||||
#endif
|
||||
|
||||
@ -3189,7 +3190,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
||||
CASE_OP(CCLASS)
|
||||
DATA_ENSURE(1);
|
||||
if (BITSET_AT(p->cclass.bsp, *s) == 0) goto fail;
|
||||
s++;
|
||||
s += enclen(encode, s);
|
||||
INC_OP;
|
||||
JUMP_OUT_WITH_SPREV_SET;
|
||||
|
||||
@ -4046,11 +4047,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
||||
|
||||
#ifdef USE_CALL
|
||||
CASE_OP(CALL)
|
||||
#ifdef SUBEXP_CALL_MAX_NEST_LEVEL
|
||||
if (subexp_call_nest_counter == SUBEXP_CALL_MAX_NEST_LEVEL)
|
||||
if (subexp_call_nest_counter == SubexpCallMaxNestLevel)
|
||||
goto fail;
|
||||
subexp_call_nest_counter++;
|
||||
#endif
|
||||
addr = p->call.addr;
|
||||
INC_OP; STACK_PUSH_CALL_FRAME(p);
|
||||
p = reg->ops + addr;
|
||||
@ -4060,9 +4059,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
||||
CASE_OP(RETURN)
|
||||
STACK_RETURN(p);
|
||||
STACK_PUSH_RETURN;
|
||||
#ifdef SUBEXP_CALL_MAX_NEST_LEVEL
|
||||
subexp_call_nest_counter--;
|
||||
#endif
|
||||
JUMP_OUT;
|
||||
#endif
|
||||
|
||||
@ -5687,6 +5684,19 @@ onig_scan(regex_t* reg, const UChar* str, const UChar* end,
|
||||
return n;
|
||||
}
|
||||
|
||||
extern int
|
||||
onig_get_subexp_call_max_nest_level(void)
|
||||
{
|
||||
return SubexpCallMaxNestLevel;
|
||||
}
|
||||
|
||||
extern int
|
||||
onig_set_subexp_call_max_nest_level(int level)
|
||||
{
|
||||
SubexpCallMaxNestLevel = level;
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern OnigEncoding
|
||||
onig_get_encoding(regex_t* reg)
|
||||
{
|
||||
|
||||
@ -88,7 +88,7 @@
|
||||
#define DEFAULT_RETRY_LIMIT_IN_MATCH 10000000
|
||||
#define DEFAULT_RETRY_LIMIT_IN_SEARCH 0 /* unlimited */
|
||||
#define DEFAULT_PARSE_DEPTH_LIMIT 4096
|
||||
#define SUBEXP_CALL_MAX_NEST_LEVEL 16
|
||||
#define DEFAULT_SUBEXP_CALL_MAX_NEST_LEVEL 20
|
||||
|
||||
|
||||
#include "regenc.h"
|
||||
|
||||
@ -3544,6 +3544,129 @@ scan_octal_number(UChar** src, UChar* end, int minlen, int maxlen,
|
||||
return ONIG_NORMAL;
|
||||
}
|
||||
|
||||
static int
|
||||
scan_number_of_base(UChar** src, UChar* end, int minlen,
|
||||
OnigEncoding enc, OnigCodePoint* rcode, int base)
|
||||
{
|
||||
int r;
|
||||
|
||||
if (base == 16)
|
||||
r = scan_hexadecimal_number(src, end, minlen, 8, enc, rcode);
|
||||
else if (base == 8)
|
||||
r = scan_octal_number(src, end, minlen, 11, enc, rcode);
|
||||
else
|
||||
r = ONIGERR_INVALID_CODE_POINT_VALUE;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
#define IS_CODE_POINT_DIVIDE(c) ((c) == ' ' || (c) == '\n')
|
||||
|
||||
enum CPS_STATE {
|
||||
CPS_EMPTY = 0,
|
||||
CPS_START = 1,
|
||||
CPS_RANGE = 2
|
||||
};
|
||||
|
||||
static int
|
||||
check_code_point_sequence(UChar* p, UChar* end, int base, OnigEncoding enc,
|
||||
int in_cc)
|
||||
{
|
||||
int r;
|
||||
int n;
|
||||
int end_digit;
|
||||
int state;
|
||||
OnigCodePoint code;
|
||||
OnigCodePoint c;
|
||||
PFETCH_READY;
|
||||
|
||||
end_digit = FALSE;
|
||||
state = CPS_START;
|
||||
n = 0;
|
||||
while (! PEND) {
|
||||
start:
|
||||
PFETCH(c);
|
||||
if (c == '}') {
|
||||
end_char:
|
||||
if (state == CPS_RANGE) return ONIGERR_INVALID_CODE_POINT_VALUE;
|
||||
return n;
|
||||
}
|
||||
|
||||
if (IS_CODE_POINT_DIVIDE(c)) {
|
||||
while (! PEND) {
|
||||
PFETCH(c);
|
||||
if (! IS_CODE_POINT_DIVIDE(c)) break;
|
||||
}
|
||||
if (IS_CODE_POINT_DIVIDE(c))
|
||||
return ONIGERR_INVALID_CODE_POINT_VALUE;
|
||||
}
|
||||
else if (c == '-' && in_cc == TRUE) {
|
||||
range:
|
||||
if (state != CPS_START) return ONIGERR_INVALID_CODE_POINT_VALUE;
|
||||
if (PEND) return ONIGERR_INVALID_CODE_POINT_VALUE;
|
||||
end_digit = FALSE;
|
||||
state = CPS_RANGE;
|
||||
goto start;
|
||||
}
|
||||
else if (end_digit == TRUE) {
|
||||
if (base == 16) {
|
||||
if (IS_CODE_XDIGIT_ASCII(enc, c))
|
||||
return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
|
||||
}
|
||||
else if (base == 8) {
|
||||
if (IS_CODE_DIGIT_ASCII(enc, c) && c < '8')
|
||||
return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
|
||||
}
|
||||
|
||||
return ONIGERR_INVALID_CODE_POINT_VALUE;
|
||||
}
|
||||
|
||||
if (c == '}') goto end_char;
|
||||
if (c == '-' && in_cc == TRUE) goto range;
|
||||
|
||||
PUNFETCH;
|
||||
r = scan_number_of_base(&p, end, 1, enc, &code, base);
|
||||
if (r != 0) return r;
|
||||
n++;
|
||||
end_digit = TRUE;
|
||||
state = (state == CPS_RANGE) ? CPS_EMPTY : CPS_START;
|
||||
}
|
||||
|
||||
return ONIGERR_INVALID_CODE_POINT_VALUE;
|
||||
}
|
||||
|
||||
static int
|
||||
get_next_code_point(UChar** src, UChar* end, int base, OnigEncoding enc, int in_cc, OnigCodePoint* rcode)
|
||||
{
|
||||
int r;
|
||||
OnigCodePoint c;
|
||||
UChar* p = *src;
|
||||
PFETCH_READY;
|
||||
|
||||
while (! PEND) {
|
||||
PFETCH(c);
|
||||
if (! IS_CODE_POINT_DIVIDE(c)) break;
|
||||
}
|
||||
if (IS_CODE_POINT_DIVIDE(c))
|
||||
return ONIGERR_INVALID_CODE_POINT_VALUE;
|
||||
|
||||
if (c == '}') {
|
||||
*src = p;
|
||||
return 1; /* end of sequence */
|
||||
}
|
||||
else if (c == '-' && in_cc == TRUE) {
|
||||
*src = p;
|
||||
return 2; /* range */
|
||||
}
|
||||
|
||||
PUNFETCH;
|
||||
r = scan_number_of_base(&p, end, 1, enc, rcode, base);
|
||||
if (r != 0) return r;
|
||||
|
||||
*src = p;
|
||||
return ONIG_NORMAL;
|
||||
}
|
||||
|
||||
|
||||
#define BB_WRITE_CODE_POINT(bbuf,pos,code) \
|
||||
BB_WRITE(bbuf, pos, &(code), SIZE_CODE_POINT)
|
||||
@ -4229,8 +4352,9 @@ enum TokenSyms {
|
||||
|
||||
typedef struct {
|
||||
enum TokenSyms type;
|
||||
int code_point_continue;
|
||||
int escaped;
|
||||
int base; /* is number: 8, 16 (used in [....]) */
|
||||
int base_num; /* is number: 8, 16 (used in [....]) */
|
||||
UChar* backp;
|
||||
union {
|
||||
UChar* s;
|
||||
@ -4267,6 +4391,11 @@ typedef struct {
|
||||
} u;
|
||||
} PToken;
|
||||
|
||||
static void
|
||||
ptoken_init(PToken* tok)
|
||||
{
|
||||
tok->code_point_continue = 0;
|
||||
}
|
||||
|
||||
static int
|
||||
fetch_interval(UChar** src, UChar* end, PToken* tok, ScanEnv* env)
|
||||
@ -4836,7 +4965,7 @@ str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to,
|
||||
}
|
||||
|
||||
static int
|
||||
fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
|
||||
fetch_token_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
|
||||
{
|
||||
int r;
|
||||
OnigCodePoint code;
|
||||
@ -4847,6 +4976,24 @@ fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
|
||||
UChar* p = *src;
|
||||
PFETCH_READY;
|
||||
|
||||
if (tok->code_point_continue != 0) {
|
||||
r = get_next_code_point(&p, end, tok->base_num, enc, TRUE, &code);
|
||||
if (r == 1) {
|
||||
tok->code_point_continue = 0;
|
||||
}
|
||||
else if (r == 2) {
|
||||
tok->type = TK_CC_RANGE;
|
||||
goto end;
|
||||
}
|
||||
else if (r == 0) {
|
||||
tok->type = TK_CODE_POINT;
|
||||
tok->u.code = code;
|
||||
goto end;
|
||||
}
|
||||
else
|
||||
return r; /* error */
|
||||
}
|
||||
|
||||
if (PEND) {
|
||||
tok->type = TK_EOT;
|
||||
return tok->type;
|
||||
@ -4854,9 +5001,9 @@ fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
|
||||
|
||||
PFETCH(c);
|
||||
tok->type = TK_CHAR;
|
||||
tok->base = 0;
|
||||
tok->u.code = c;
|
||||
tok->escaped = 0;
|
||||
tok->base_num = 0;
|
||||
tok->u.code = c;
|
||||
tok->escaped = 0;
|
||||
|
||||
if (c == ']') {
|
||||
tok->type = TK_CC_CLOSE;
|
||||
@ -4953,16 +5100,8 @@ fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
|
||||
return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
|
||||
}
|
||||
|
||||
if (p > prev + enclen(enc, prev) && !PEND && (PPEEK_IS('}'))) {
|
||||
PINC;
|
||||
tok->type = TK_CODE_POINT;
|
||||
tok->base = 8;
|
||||
tok->u.code = code;
|
||||
}
|
||||
else {
|
||||
/* can't read nothing or invalid format */
|
||||
p = prev;
|
||||
}
|
||||
tok->base_num = 8;
|
||||
goto brace_code_point_entry;
|
||||
}
|
||||
break;
|
||||
|
||||
@ -4980,10 +5119,20 @@ fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
|
||||
return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
|
||||
}
|
||||
|
||||
if (p > prev + enclen(enc, prev) && !PEND && (PPEEK_IS('}'))) {
|
||||
PINC;
|
||||
tok->base_num = 16;
|
||||
brace_code_point_entry:
|
||||
if ((p > prev + enclen(enc, prev))) {
|
||||
if (PEND) return ONIGERR_INVALID_CODE_POINT_VALUE;
|
||||
if (PPEEK_IS('}')) {
|
||||
PINC;
|
||||
}
|
||||
else {
|
||||
r = check_code_point_sequence(p, end, tok->base_num, enc, TRUE);
|
||||
if (r < 0) return r;
|
||||
if (r == 0) return ONIGERR_INVALID_CODE_POINT_VALUE;
|
||||
tok->code_point_continue = TRUE;
|
||||
}
|
||||
tok->type = TK_CODE_POINT;
|
||||
tok->base = 16;
|
||||
tok->u.code = code;
|
||||
}
|
||||
else {
|
||||
@ -4998,8 +5147,8 @@ fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
|
||||
code = 0; /* but, it's not error */
|
||||
}
|
||||
tok->type = TK_CRUDE_BYTE;
|
||||
tok->base = 16;
|
||||
tok->u.byte = (UChar )code;
|
||||
tok->base_num = 16;
|
||||
tok->u.byte = (UChar )code;
|
||||
}
|
||||
break;
|
||||
|
||||
@ -5013,9 +5162,9 @@ fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
|
||||
if (p == prev) { /* can't read nothing. */
|
||||
code = 0; /* but, it's not error */
|
||||
}
|
||||
tok->type = TK_CODE_POINT;
|
||||
tok->base = 16;
|
||||
tok->u.code = code;
|
||||
tok->type = TK_CODE_POINT;
|
||||
tok->base_num = 16;
|
||||
tok->u.code = code;
|
||||
}
|
||||
break;
|
||||
|
||||
@ -5031,8 +5180,8 @@ fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
|
||||
code = 0; /* but, it's not error */
|
||||
}
|
||||
tok->type = TK_CRUDE_BYTE;
|
||||
tok->base = 8;
|
||||
tok->u.byte = (UChar )code;
|
||||
tok->base_num = 8;
|
||||
tok->u.byte = (UChar )code;
|
||||
}
|
||||
break;
|
||||
|
||||
@ -5096,15 +5245,29 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
|
||||
UChar* p = *src;
|
||||
PFETCH_READY;
|
||||
|
||||
if (tok->code_point_continue != 0) {
|
||||
r = get_next_code_point(&p, end, tok->base_num, enc, FALSE, &code);
|
||||
if (r == 1) {
|
||||
tok->code_point_continue = 0;
|
||||
}
|
||||
else if (r == 0) {
|
||||
tok->type = TK_CODE_POINT;
|
||||
tok->u.code = code;
|
||||
goto out;
|
||||
}
|
||||
else
|
||||
return r; /* error */
|
||||
}
|
||||
|
||||
start:
|
||||
if (PEND) {
|
||||
tok->type = TK_EOT;
|
||||
return tok->type;
|
||||
}
|
||||
|
||||
tok->type = TK_STRING;
|
||||
tok->base = 0;
|
||||
tok->backp = p;
|
||||
tok->type = TK_STRING;
|
||||
tok->base_num = 0;
|
||||
tok->backp = p;
|
||||
|
||||
PFETCH(c);
|
||||
if (IS_MC_ESC_CODE(c, syn)) {
|
||||
@ -5359,15 +5522,8 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
|
||||
return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
|
||||
}
|
||||
|
||||
if ((p > prev + enclen(enc, prev)) && !PEND && PPEEK_IS('}')) {
|
||||
PINC;
|
||||
tok->type = TK_CODE_POINT;
|
||||
tok->u.code = code;
|
||||
}
|
||||
else {
|
||||
/* can't read nothing or invalid format */
|
||||
p = prev;
|
||||
}
|
||||
tok->base_num = 8;
|
||||
goto brace_code_point_entry;
|
||||
}
|
||||
break;
|
||||
|
||||
@ -5384,8 +5540,19 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
|
||||
return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
|
||||
}
|
||||
|
||||
if ((p > prev + enclen(enc, prev)) && !PEND && PPEEK_IS('}')) {
|
||||
PINC;
|
||||
tok->base_num = 16;
|
||||
brace_code_point_entry:
|
||||
if ((p > prev + enclen(enc, prev))) {
|
||||
if (PEND) return ONIGERR_INVALID_CODE_POINT_VALUE;
|
||||
if (PPEEK_IS('}')) {
|
||||
PINC;
|
||||
}
|
||||
else {
|
||||
r = check_code_point_sequence(p, end, tok->base_num, enc, FALSE);
|
||||
if (r < 0) return r;
|
||||
if (r == 0) return ONIGERR_INVALID_CODE_POINT_VALUE;
|
||||
tok->code_point_continue = TRUE;
|
||||
}
|
||||
tok->type = TK_CODE_POINT;
|
||||
tok->u.code = code;
|
||||
}
|
||||
@ -5401,8 +5568,8 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
|
||||
code = 0; /* but, it's not error */
|
||||
}
|
||||
tok->type = TK_CRUDE_BYTE;
|
||||
tok->base = 16;
|
||||
tok->u.byte = (UChar )code;
|
||||
tok->base_num = 16;
|
||||
tok->u.byte = (UChar )code;
|
||||
}
|
||||
break;
|
||||
|
||||
@ -5416,9 +5583,9 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
|
||||
if (p == prev) { /* can't read nothing. */
|
||||
code = 0; /* but, it's not error */
|
||||
}
|
||||
tok->type = TK_CODE_POINT;
|
||||
tok->base = 16;
|
||||
tok->u.code = code;
|
||||
tok->type = TK_CODE_POINT;
|
||||
tok->base_num = 16;
|
||||
tok->u.code = code;
|
||||
}
|
||||
break;
|
||||
|
||||
@ -5466,8 +5633,8 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
|
||||
code = 0; /* but, it's not error */
|
||||
}
|
||||
tok->type = TK_CRUDE_BYTE;
|
||||
tok->base = 8;
|
||||
tok->u.byte = (UChar )code;
|
||||
tok->base_num = 8;
|
||||
tok->u.byte = (UChar )code;
|
||||
}
|
||||
else if (c != '0') {
|
||||
PINC;
|
||||
@ -5863,9 +6030,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef USE_VARIABLE_META_CHARS
|
||||
out:
|
||||
#endif
|
||||
*src = p;
|
||||
return tok->type;
|
||||
}
|
||||
@ -6416,10 +6581,10 @@ parse_cc(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)
|
||||
INC_PARSE_DEPTH(env->parse_depth);
|
||||
|
||||
prev_cc = (CClassNode* )NULL;
|
||||
r = fetch_token_in_cc(tok, src, end, env);
|
||||
r = fetch_token_cc(tok, src, end, env);
|
||||
if (r == TK_CHAR && tok->u.code == (OnigCodePoint )'^' && tok->escaped == 0) {
|
||||
neg = 1;
|
||||
r = fetch_token_in_cc(tok, src, end, env);
|
||||
r = fetch_token_cc(tok, src, end, env);
|
||||
}
|
||||
else {
|
||||
neg = 0;
|
||||
@ -6461,19 +6626,19 @@ parse_cc(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)
|
||||
break;
|
||||
|
||||
case TK_CRUDE_BYTE:
|
||||
/* tok->base != 0 : octal or hexadec. */
|
||||
if (! ONIGENC_IS_SINGLEBYTE(env->enc) && tok->base != 0) {
|
||||
/* tok->base_num != 0 : octal or hexadec. */
|
||||
if (! ONIGENC_IS_SINGLEBYTE(env->enc) && tok->base_num != 0) {
|
||||
int i, j;
|
||||
UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
|
||||
UChar* bufe = buf + ONIGENC_CODE_TO_MBC_MAXLEN;
|
||||
UChar* psave = p;
|
||||
int base = tok->base;
|
||||
int base_num = tok->base_num;
|
||||
|
||||
buf[0] = tok->u.byte;
|
||||
for (i = 1; i < ONIGENC_MBC_MAXLEN(env->enc); i++) {
|
||||
r = fetch_token_in_cc(tok, &p, end, env);
|
||||
r = fetch_token_cc(tok, &p, end, env);
|
||||
if (r < 0) goto err;
|
||||
if (r != TK_CRUDE_BYTE || tok->base != base) {
|
||||
if (r != TK_CRUDE_BYTE || tok->base_num != base_num) {
|
||||
fetched = 1;
|
||||
break;
|
||||
}
|
||||
@ -6496,7 +6661,7 @@ parse_cc(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)
|
||||
else if (i > len) { /* fetch back */
|
||||
p = psave;
|
||||
for (i = 1; i < len; i++) {
|
||||
r = fetch_token_in_cc(tok, &p, end, env);
|
||||
r = fetch_token_cc(tok, &p, end, env);
|
||||
}
|
||||
fetched = 0;
|
||||
}
|
||||
@ -6577,7 +6742,7 @@ parse_cc(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)
|
||||
|
||||
case TK_CC_RANGE:
|
||||
if (state == CS_VALUE) {
|
||||
r = fetch_token_in_cc(tok, &p, end, env);
|
||||
r = fetch_token_cc(tok, &p, end, env);
|
||||
if (r < 0) goto err;
|
||||
|
||||
fetched = 1;
|
||||
@ -6604,7 +6769,7 @@ parse_cc(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)
|
||||
in_code = tok->u.code;
|
||||
in_raw = 0;
|
||||
|
||||
r = fetch_token_in_cc(tok, &p, end, env);
|
||||
r = fetch_token_cc(tok, &p, end, env);
|
||||
if (r < 0) goto err;
|
||||
|
||||
fetched = 1;
|
||||
@ -6619,7 +6784,7 @@ parse_cc(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)
|
||||
goto any_char_in; /* [!--] is allowed */
|
||||
}
|
||||
else { /* CS_COMPLETE */
|
||||
r = fetch_token_in_cc(tok, &p, end, env);
|
||||
r = fetch_token_cc(tok, &p, end, env);
|
||||
if (r < 0) goto err;
|
||||
|
||||
fetched = 1;
|
||||
@ -6702,7 +6867,7 @@ parse_cc(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)
|
||||
if (fetched)
|
||||
r = tok->type;
|
||||
else {
|
||||
r = fetch_token_in_cc(tok, &p, end, env);
|
||||
r = fetch_token_cc(tok, &p, end, env);
|
||||
if (r < 0) goto err;
|
||||
}
|
||||
}
|
||||
@ -8587,6 +8752,7 @@ parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env)
|
||||
int r;
|
||||
PToken tok;
|
||||
|
||||
ptoken_init(&tok);
|
||||
r = fetch_token(&tok, src, end, env);
|
||||
if (r < 0) return r;
|
||||
r = parse_alts(top, &tok, TK_EOT, src, end, env, FALSE);
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
/* unicode_egcb_data.c: Generated by make_unicode_egcb_data.py. */
|
||||
/*-
|
||||
* Copyright (c) 2017-2019 K.Kosako
|
||||
* Copyright (c) 2017-2020 K.Kosako
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
||||
@ -33,6 +33,32 @@
|
||||
|
||||
/* Generated by make_unicode_property_data.py. */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 2016-2020 K.Kosako
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
|
||||
/* PROPERTY: 'NEWLINE': POSIX [[:NEWLINE:]] */
|
||||
static const OnigCodePoint
|
||||
|
||||
@ -33,6 +33,32 @@
|
||||
|
||||
/* Generated by make_unicode_property_data.py. */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 2016-2020 K.Kosako
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
|
||||
/* PROPERTY: 'NEWLINE': POSIX [[:NEWLINE:]] */
|
||||
static const OnigCodePoint
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
/* unicode_wb_data.c: Generated by make_unicode_wb_data.py. */
|
||||
/*-
|
||||
* Copyright (c) 2019 K.Kosako
|
||||
* Copyright (c) 2019-2020 K.Kosako
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
||||
@ -3,7 +3,7 @@
|
||||
<assemblyIdentity
|
||||
name="Notepad3"
|
||||
processorArchitecture="*"
|
||||
version="5.20.330.1"
|
||||
version="5.20.331.1"
|
||||
type="win32"
|
||||
/>
|
||||
<description>Notepad3 RC3</description>
|
||||
|
||||
@ -8,7 +8,7 @@
|
||||
#define SAPPNAME "Notepad3"
|
||||
#define VERSION_MAJOR 5
|
||||
#define VERSION_MINOR 20
|
||||
#define VERSION_REV 330
|
||||
#define VERSION_REV 331
|
||||
#define VERSION_BUILD 1
|
||||
#define SCINTILLA_VER 432
|
||||
#define ONIGURUMA_REGEX_VER 6.9.4
|
||||
@ -16,4 +16,4 @@
|
||||
#define TINYEXPR_VER 2018.05.11
|
||||
#define UTHASH_VER 2.1.0
|
||||
#define VERSION_PATCH RC3
|
||||
#define VERSION_COMMIT_ID dkt1-amr
|
||||
#define VERSION_COMMIT_ID t7820-rk
|
||||
|
||||
Loading…
Reference in New Issue
Block a user