diff --git a/oniguruma/doc/RE b/oniguruma/doc/RE index 8975d9e88..4561698a7 100644 --- a/oniguruma/doc/RE +++ b/oniguruma/doc/RE @@ -1,4 +1,4 @@ -Oniguruma Regular Expressions Version 6.9.5 2020/01/23 +Oniguruma Regular Expressions Version 6.9.5 2020/01/28 syntax: ONIG_SYNTAX_ONIGURUMA (default) @@ -132,6 +132,7 @@ syntax: ONIG_SYNTAX_ONIGURUMA (default) {,n} at least 0 but no more than n times ({0,n}) {n} n times + reluctant ?? 0 or 1 times @@ -141,6 +142,11 @@ syntax: ONIG_SYNTAX_ONIGURUMA (default) {n,}? at least n times {,n}? at least 0 but not more than n times (== {0,n}?) + {n}? is reluctant operator in ONIG_SYNTAX_JAVA and ONIG_SYNTAX_PERL only. + (In that case, it doesn't make sense to write so.) + In default syntax, /a{n}?/ === /(?:a{n})?/ + + possessive (greedy and does not backtrack once match) ?+ 1 or 0 times @@ -148,8 +154,8 @@ syntax: ONIG_SYNTAX_ONIGURUMA (default) ++ 1 or more times {n,m} (n > m) at least m but not more than n times - {n,m}+, {n,}+, {n}+ are possessive operators in ONIG_SYNTAX_JAVA and - ONIG_SYNTAX_PERL only. + {n,m}+, {n,}+, {n}+ are possessive operators in ONIG_SYNTAX_JAVA and + ONIG_SYNTAX_PERL only. ex. /a*+/ === /(?>a*)/ diff --git a/oniguruma/src/make_unicode_fold_data.py b/oniguruma/src/make_unicode_fold_data.py index b9085c599..cfe7b8b4e 100644 --- a/oniguruma/src/make_unicode_fold_data.py +++ b/oniguruma/src/make_unicode_fold_data.py @@ -28,6 +28,35 @@ UNFOLDS = {} TURKISH_UNFOLDS = {} LOCALE_UNFOLDS = {} +COPYRIGHT = ''' +/*- + * Copyright (c) 2017-2020 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +'''.strip() + + class Entry: def __init__(self, fold): self.fold = fold @@ -236,6 +265,8 @@ def output_macros(f, name): def output_fold_source(f, out_comment): print >> f, "/* This file was generated by make_unicode_fold_data.py. */" + print >> f, COPYRIGHT + print >> f, "\n" print >> f, '#include "regenc.h"' print >> f, '' if VERSION_INFO[0] < 0: @@ -249,41 +280,11 @@ def output_fold_source(f, out_comment): output_fold_data(f, DataName, out_comment) -HEAD = ''' -%{ -/* This gperf source file was generated by make_unicode_fold_data.py */ +def output_gperf_unfold_key(f): + head = "%{\n/* This gperf source file was generated by make_unicode_fold_data.py */\n\n" + COPYRIGHT + """\ -/*- - * Copyright (c) 2017-2020 K.Kosako - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ #include "regint.h" %} -'''.strip() - -def output_gperf_unfold_key(f): - head = HEAD + """\ struct ByUnfoldKey { OnigCodePoint code; @@ -303,7 +304,10 @@ struct ByUnfoldKey { print >> f, '%%' def output_gperf_fold_key(f, key_len): - head = HEAD + """\ + head = "%{\n/* This gperf source file was generated by make_unicode_fold_data.py */\n\n" + COPYRIGHT + """\ + +#include "regint.h" +%} short int %% diff --git a/oniguruma/src/regcomp.c b/oniguruma/src/regcomp.c index 139cef6cd..5f6cdda2d 100644 --- a/oniguruma/src/regcomp.c +++ b/oniguruma/src/regcomp.c @@ -765,7 +765,11 @@ node_char_len1(Node* node, regex_t* reg, MinMaxCharLen* ci, ScanEnv* env, UChar *s = sn->s; if (NODE_IS_IGNORECASE(node) && ! NODE_STRING_IS_CRUDE(node)) { - r = ONIGERR_PARSER_BUG; + /* Such a case is possible. + ex. /(?i)(?<=\1)(a)/ + Backref node refer to capture group, but it doesn't tune yet. + */ + r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN; break; } @@ -2055,8 +2059,7 @@ compile_anchor_look_behind_node(AnchorNode* node, regex_t* reg, ScanEnv* env) if (r < 0) return r; r = add_op(reg, OP_MOVE); if (r != 0) return r; - //ORIG: COP(reg)->move.n = (RelPositionType )(-ci.min); - COP(reg)->move.n = (RelPositionType )(0-ci.min); + COP(reg)->move.n = -((RelPositionType )ci.min); r = compile_tree(node->lead_node, reg, env); if (r != 0) return r; } @@ -2203,8 +2206,7 @@ compile_anchor_look_behind_not_node(AnchorNode* node, regex_t* reg, if (r < 0) return r; r = add_op(reg, OP_MOVE); if (r != 0) return r; - //ORIG: COP(reg)->move.n = (RelPositionType )(-ci.min); - COP(reg)->move.n = (RelPositionType )(0-ci.min); + COP(reg)->move.n = -((RelPositionType )ci.min); r = compile_tree(node->lead_node, reg, env); if (r != 0) return r; @@ -3005,8 +3007,18 @@ fix_unset_addr_list(UnsetAddrList* uslist, regex_t* reg) AbsAddrType* paddr; for (i = 0; i < uslist->num; i++) { - if (! NODE_IS_FIXED_ADDR(uslist->us[i].target)) - return ONIGERR_PARSER_BUG; + if (! NODE_IS_FIXED_ADDR(uslist->us[i].target)) { + if (NODE_IS_CALLED(uslist->us[i].target)) + return ONIGERR_PARSER_BUG; + else { + /* CASE: called node doesn't have called address. + ex. /((|a\g<1>)(.){0}){0}\g<3>/ + group-1 doesn't called, but compiled into bytecodes, + because group-3 is referred from outside. + */ + continue; + } + } en = BAG_(uslist->us[i].target); addr = en->m.called_addr; @@ -3486,7 +3498,7 @@ check_called_node_in_look_behind(Node* node, int not) static int -check_node_in_look_behind(Node* node, int not) +check_node_in_look_behind(Node* node, int not, int* used) { static unsigned int bag_mask[2] = { ALLOWED_BAG_IN_LB, ALLOWED_BAG_IN_LB_NOT }; @@ -3505,12 +3517,12 @@ check_node_in_look_behind(Node* node, int not) case NODE_LIST: case NODE_ALT: do { - r = check_node_in_look_behind(NODE_CAR(node), not); + r = check_node_in_look_behind(NODE_CAR(node), not, used); } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node))); break; case NODE_QUANT: - r = check_node_in_look_behind(NODE_BODY(node), not); + r = check_node_in_look_behind(NODE_BODY(node), not, used); break; case NODE_BAG: @@ -3519,14 +3531,19 @@ check_node_in_look_behind(Node* node, int not) if (((1<type) & bag_mask[not]) == 0) return 1; - r = check_node_in_look_behind(NODE_BODY(node), not); - if (r == 0 && en->type == BAG_IF_ELSE) { + r = check_node_in_look_behind(NODE_BODY(node), not, used); + if (r != 0) break; + + if (en->type == BAG_MEMORY) { + if (NODE_IS_BACKREF(node) || NODE_IS_CALLED(node)) *used = TRUE; + } + else if (en->type == BAG_IF_ELSE) { if (IS_NOT_NULL(en->te.Then)) { - r = check_node_in_look_behind(en->te.Then, not); + r = check_node_in_look_behind(en->te.Then, not, used); if (r != 0) break; } if (IS_NOT_NULL(en->te.Else)) { - r = check_node_in_look_behind(en->te.Else, not); + r = check_node_in_look_behind(en->te.Else, not, used); } } } @@ -3538,7 +3555,7 @@ check_node_in_look_behind(Node* node, int not) return 1; if (IS_NOT_NULL(NODE_BODY(node))) - r = check_node_in_look_behind(NODE_BODY(node), not); + r = check_node_in_look_behind(NODE_BODY(node), not, used); break; case NODE_GIMMICK: @@ -4660,10 +4677,18 @@ tune_look_behind(Node* node, regex_t* reg, int state, ScanEnv* env) { int r; int state1; + int used; MinMaxCharLen ci; Node* body; AnchorNode* an = ANCHOR_(node); + used = FALSE; + r = check_node_in_look_behind(NODE_ANCHOR_BODY(an), + an->type == ANCR_LOOK_BEHIND_NOT ? 1 : 0, + &used); + if (r < 0) return r; + if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; + if (an->type == ANCR_LOOK_BEHIND_NOT) state1 = state | IN_NOT | IN_LOOK_BEHIND; else @@ -4681,7 +4706,13 @@ tune_look_behind(Node* node, regex_t* reg, int state, ScanEnv* env) r = node_char_len(body, reg, &ci, env); if (r >= 0) { - if (ci.min == 0 && ci.min_is_sure != 0) { + /* #177: overflow in onigenc_step_back() */ + if ((ci.max != INFINITE_LEN && ci.max > LOOK_BEHIND_MAX_CHAR_LEN) + || ci.min > LOOK_BEHIND_MAX_CHAR_LEN) { + return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; + } + + if (ci.min == 0 && ci.min_is_sure != 0 && used == FALSE) { if (an->type == ANCR_LOOK_BEHIND_NOT) r = onig_node_reset_fail(node); else @@ -4713,12 +4744,17 @@ tune_look_behind(Node* node, regex_t* reg, int state, ScanEnv* env) } else { Node* tail; - an->char_min_len = ci.min; - an->char_max_len = ci.max; - r = get_tree_tail_literal(body, &tail, reg); - if (r == GET_VALUE_FOUND) { - r = onig_node_copy(&(an->lead_node), tail); - if (r != 0) return r; + + /* check lead_node is already set by double call after + divide_look_behind_alternatives() */ + if (IS_NULL(an->lead_node)) { + an->char_min_len = ci.min; + an->char_max_len = ci.max; + r = get_tree_tail_literal(body, &tail, reg); + if (r == GET_VALUE_FOUND) { + r = onig_node_copy(&(an->lead_node), tail); + if (r != 0) return r; + } } r = ONIG_NORMAL; } @@ -5610,10 +5646,6 @@ tune_anchor(Node* node, regex_t* reg, int state, ScanEnv* env) case ANCR_LOOK_BEHIND: case ANCR_LOOK_BEHIND_NOT: - r = check_node_in_look_behind(NODE_ANCHOR_BODY(an), - an->type == ANCR_LOOK_BEHIND_NOT ? 1 : 0); - if (r < 0) return r; - if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; r = tune_look_behind(node, reg, state, env); break; @@ -6170,7 +6202,7 @@ concat_opt_exact_str(OptStr* to, UChar* s, UChar* end, OnigEncoding enc) to->len = i; - if (p >= end && to->len == (int )(end - s)) + if (p >= end) to->reach_end = 1; } @@ -6623,6 +6655,20 @@ optimize_nodes(Node* node, OptNode* opt, OptEnv* env) OnigLen min, max; QuantNode* qn = QUANT_(node); + /* Issue #175 + ex. /\g<1>{0}(?<=|())/ + + Empty and unused nodes in look-behind is removed in + tune_look_behind(). + Called group nodes are assigned to be not called if the caller side is + inside of zero-repetition. + As a result, the nodes are considered unused. + */ + if (qn->upper == 0) { + mml_set_min_max(&opt->len, 0, 0); + break; + } + r = optimize_nodes(NODE_BODY(node), &xo, env); if (r != 0) break; @@ -6795,7 +6841,7 @@ set_optimize_map(regex_t* reg, OptMap* m) reg->dist_max = m->mm.max; if (reg->dist_min != INFINITE_LEN) { - reg->threshold_len = reg->dist_min + 1; + reg->threshold_len = reg->dist_min + ONIGENC_MBC_MINLEN(reg->enc); } } @@ -6988,9 +7034,8 @@ print_anchor(FILE* f, int anchor) static void print_optimize_info(FILE* f, regex_t* reg) { - static const char* on[] = { "NONE", "STR", - "STR_FAST", "STR_FAST_STEP_FORWARD", - "STR_CASE_FOLD", "MAP" }; + static const char* on[] = + { "NONE", "STR", "STR_FAST", "STR_FAST_STEP_FORWARD", "MAP" }; fprintf(f, "optimize: %s\n", on[reg->optimize]); fprintf(f, " anchor: "); print_anchor(f, reg->anchor); @@ -7682,7 +7727,7 @@ print_indent_tree(FILE* f, Node* node, int indent) fprintf(f, " ", node); switch (CTYPE_(node)->ctype) { case CTYPE_ANYCHAR: - fprintf(f, "", node); + fprintf(f, "anychar"); break; case ONIGENC_CTYPE_WORD: @@ -7769,9 +7814,10 @@ print_indent_tree(FILE* f, Node* node, int indent) #endif case NODE_QUANT: - fprintf(f, "{%d,%d}%s\n", node, + fprintf(f, "{%d,%d}%s%s\n", node, QUANT_(node)->lower, QUANT_(node)->upper, - (QUANT_(node)->greedy ? "" : "?")); + (QUANT_(node)->greedy ? "" : "?"), + QUANT_(node)->include_referred == 0 ? "" : " referred"); print_indent_tree(f, NODE_BODY(node), indent + add); break; @@ -7811,6 +7857,10 @@ print_indent_tree(FILE* f, Node* node, int indent) break; case BAG_MEMORY: fprintf(f, "memory:%d", BAG_(node)->m.regnum); + if (NODE_IS_CALLED(node)) + fprintf(f, ", called"); + if (NODE_IS_FIXED_ADDR(node)) + fprintf(f, ", fixed-addr"); break; case BAG_STOP_BACKTRACK: fprintf(f, "stop-bt"); diff --git a/oniguruma/src/regexec.c b/oniguruma/src/regexec.c index 0487f88d2..76fd754fb 100644 --- a/oniguruma/src/regexec.c +++ b/oniguruma/src/regexec.c @@ -2536,9 +2536,9 @@ typedef struct { #endif /* USE_THREADED_CODE */ #define INC_OP p++ -#define NEXT_OUT SOP_OUT; NEXT_OP -#define JUMP_OUT SOP_OUT; JUMP_OP -#define BREAK_OUT SOP_OUT; BREAK_OP +#define JUMP_OUT_WITH_SPREV_SET SOP_OUT; NEXT_OP +#define JUMP_OUT SOP_OUT; JUMP_OP +#define BREAK_OUT SOP_OUT; BREAK_OP #define CHECK_INTERRUPT_JUMP_OUT SOP_OUT; CHECK_INTERRUPT_IN_MATCH; JUMP_OP @@ -2881,7 +2881,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (*ps != *s) goto fail; s++; INC_OP; - NEXT_OUT; + JUMP_OUT_WITH_SPREV_SET; CASE_OP(STR_2) DATA_ENSURE(2); @@ -2958,7 +2958,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (*ps != *s) goto fail; s++; INC_OP; - NEXT_OUT; + JUMP_OUT_WITH_SPREV_SET; CASE_OP(STR_MB2N2) DATA_ENSURE(4); @@ -3043,7 +3043,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (BITSET_AT(p->cclass.bsp, *s) == 0) goto fail; s++; INC_OP; - NEXT_OUT; + JUMP_OUT_WITH_SPREV_SET; CASE_OP(CCLASS_MB) DATA_ENSURE(1); @@ -3063,7 +3063,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (! onig_is_in_code_range(p->cclass_mb.mb, code)) goto fail; } INC_OP; - NEXT_OUT; + JUMP_OUT_WITH_SPREV_SET; CASE_OP(CCLASS_MIX) DATA_ENSURE(1); @@ -3077,14 +3077,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, s++; } INC_OP; - NEXT_OUT; + JUMP_OUT_WITH_SPREV_SET; CASE_OP(CCLASS_NOT) DATA_ENSURE(1); if (BITSET_AT(p->cclass.bsp, *s) != 0) goto fail; s += enclen(encode, s); INC_OP; - NEXT_OUT; + JUMP_OUT_WITH_SPREV_SET; CASE_OP(CCLASS_MB_NOT) DATA_ENSURE(1); @@ -3113,7 +3113,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, cc_mb_not_success: INC_OP; - NEXT_OUT; + JUMP_OUT_WITH_SPREV_SET; CASE_OP(CCLASS_MIX_NOT) DATA_ENSURE(1); @@ -3127,7 +3127,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, s++; } INC_OP; - NEXT_OUT; + JUMP_OUT_WITH_SPREV_SET; CASE_OP(ANYCHAR) DATA_ENSURE(1); @@ -3136,7 +3136,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; s += n; INC_OP; - NEXT_OUT; + JUMP_OUT_WITH_SPREV_SET; CASE_OP(ANYCHAR_ML) DATA_ENSURE(1); @@ -3144,7 +3144,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, DATA_ENSURE(n); s += n; INC_OP; - NEXT_OUT; + JUMP_OUT_WITH_SPREV_SET; CASE_OP(ANYCHAR_STAR) INC_OP; @@ -3192,7 +3192,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, s += n; } } - NEXT_OUT; + JUMP_OUT; CASE_OP(ANYCHAR_ML_STAR_PEEK_NEXT) { @@ -3216,7 +3216,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } } } - NEXT_OUT; + JUMP_OUT; CASE_OP(WORD) DATA_ENSURE(1); @@ -3225,7 +3225,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, s += enclen(encode, s); INC_OP; - NEXT_OUT; + JUMP_OUT_WITH_SPREV_SET; CASE_OP(WORD_ASCII) DATA_ENSURE(1); @@ -3234,7 +3234,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, s += enclen(encode, s); INC_OP; - NEXT_OUT; + JUMP_OUT_WITH_SPREV_SET; CASE_OP(NO_WORD) DATA_ENSURE(1); @@ -3243,7 +3243,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, s += enclen(encode, s); INC_OP; - NEXT_OUT; + JUMP_OUT_WITH_SPREV_SET; CASE_OP(NO_WORD_ASCII) DATA_ENSURE(1); @@ -3252,7 +3252,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, s += enclen(encode, s); INC_OP; - NEXT_OUT; + JUMP_OUT_WITH_SPREV_SET; CASE_OP(WORD_BOUNDARY) { @@ -3919,11 +3919,15 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, else { int len; - for (tlen = 0; tlen < p->move.n; tlen++) { + for (tlen = p->move.n; tlen > 0; tlen--) { len = enclen(encode, s); - if (s + len > end) goto fail; sprev = s; s += len; + if (s > end) goto fail; + if (s == end) { + if (tlen != 1) goto fail; + else break; + } } } sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); @@ -5318,17 +5322,16 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end, if (! forward_search(reg, str, end, s, sch_range, &low, &high, (UChar** )NULL)) goto mismatch; - if ((reg->anchor & ANCR_ANYCHAR_INF) != 0) { + if ((reg->anchor & ANCR_ANYCHAR_INF) != 0 && + (reg->anchor & (ANCR_LOOK_BEHIND | ANCR_PREC_READ_NOT)) == 0) { do { MATCH_AND_RETURN_CHECK(data_range); prev = s; s += enclen(reg->enc, s); - if ((reg->anchor & (ANCR_LOOK_BEHIND | ANCR_PREC_READ_NOT)) == 0) { - while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) { - prev = s; - s += enclen(reg->enc, s); - } + while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) { + prev = s; + s += enclen(reg->enc, s); } } while (s < range); goto mismatch; diff --git a/oniguruma/src/regint.h b/oniguruma/src/regint.h index 926e96cc9..837045c77 100644 --- a/oniguruma/src/regint.h +++ b/oniguruma/src/regint.h @@ -180,8 +180,9 @@ #define CHECK_NULL_RETURN_MEMERR(p) if (IS_NULL(p)) return ONIGERR_MEMORY #define NULL_UCHARP ((UChar* )0) -#define CHAR_MAP_SIZE 256 -#define INFINITE_LEN ONIG_INFINITE_DISTANCE +#define CHAR_MAP_SIZE 256 +#define INFINITE_LEN ONIG_INFINITE_DISTANCE +#define LOOK_BEHIND_MAX_CHAR_LEN INT_MAX /* escape other system UChar definition */ #ifdef ONIG_ESCAPE_UCHAR_COLLISION diff --git a/oniguruma/src/unicode_fold_data.c b/oniguruma/src/unicode_fold_data.c index 68694b0ba..f2429b056 100644 --- a/oniguruma/src/unicode_fold_data.c +++ b/oniguruma/src/unicode_fold_data.c @@ -1,4 +1,31 @@ /* This file was generated by make_unicode_fold_data.py. */ +/*- + * Copyright (c) 2017-2020 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + + #include "regenc.h" #define UNICODE_CASEFOLD_VERSION 120100 diff --git a/src/Notepad3.c b/src/Notepad3.c index c98379f80..20e1c975b 100644 --- a/src/Notepad3.c +++ b/src/Notepad3.c @@ -3279,7 +3279,7 @@ LRESULT MsgInitMenu(HWND hwnd, WPARAM wParam, LPARAM lParam) i == SCLEX_PERL || i == SCLEX_PYTHON || i == SCLEX_PROPERTIES ||i == SCLEX_CONF || i == SCLEX_POWERSHELL || i == SCLEX_BATCH || i == SCLEX_DIFF || i == SCLEX_BASH || i == SCLEX_TCL || i == SCLEX_AU3 || i == SCLEX_LATEX || i == SCLEX_AHKL || i == SCLEX_RUBY || i == SCLEX_CMAKE || i == SCLEX_MARKDOWN || - i == SCLEX_YAML || i == SCLEX_REGISTRY || i == SCLEX_NIMROD || i == SCLEX_TOML) && !ro); + i == SCLEX_YAML || i == SCLEX_REGISTRY || i == SCLEX_NIM || i == SCLEX_TOML) && !ro); EnableCmd(hmenu, CMD_INSERTNEWLINE, !ro); EnableCmd(hmenu, IDM_EDIT_INSERT_TAG, !ro); @@ -4624,60 +4624,65 @@ LRESULT MsgCommand(HWND hwnd, UINT umsg, WPARAM wParam, LPARAM lParam) BeginWaitCursor(NULL); switch (SciCall_GetLexer()) { - default: - case SCLEX_NULL: - case SCLEX_CSS: - case SCLEX_DIFF: - case SCLEX_MARKDOWN: - case SCLEX_JSON: - break; - case SCLEX_HTML: - case SCLEX_XML: - case SCLEX_CPP: - case SCLEX_PASCAL: - EditToggleLineComments(Globals.hwndEdit, L"//", false); - break; - case SCLEX_VBSCRIPT: - case SCLEX_VB: - EditToggleLineComments(Globals.hwndEdit, L"'", false); - break; - case SCLEX_MAKEFILE: - case SCLEX_PERL: - case SCLEX_PYTHON: - case SCLEX_CONF: - case SCLEX_BASH: - case SCLEX_TCL: - case SCLEX_RUBY: - case SCLEX_POWERSHELL: - case SCLEX_CMAKE: - case SCLEX_AVS: - case SCLEX_YAML: - case SCLEX_COFFEESCRIPT: - case SCLEX_NIMROD: - case SCLEX_TOML: - EditToggleLineComments(Globals.hwndEdit, L"#", true); - break; - case SCLEX_ASM: - case SCLEX_PROPERTIES: - case SCLEX_AU3: - case SCLEX_AHKL: - case SCLEX_NSIS: // # could also be used instead - case SCLEX_INNOSETUP: - case SCLEX_REGISTRY: - EditToggleLineComments(Globals.hwndEdit, L";", true); - break; - case SCLEX_LUA: - case SCLEX_SQL: - case SCLEX_VHDL: - EditToggleLineComments(Globals.hwndEdit, L"--", true); - break; - case SCLEX_BATCH: - EditToggleLineComments(Globals.hwndEdit, L"rem ", true); - break; - case SCLEX_LATEX: - case SCLEX_MATLAB: - EditToggleLineComments(Globals.hwndEdit, L"%", true); - break; + case SCLEX_CPP: + case SCLEX_D: + case SCLEX_HTML: + case SCLEX_PASCAL: + case SCLEX_RUST: + case SCLEX_XML: + EditToggleLineComments(Globals.hwndEdit, L"//", false); + break; + case SCLEX_VB: + case SCLEX_VBSCRIPT: + EditToggleLineComments(Globals.hwndEdit, L"'", false); + break; + case SCLEX_AVS: + case SCLEX_BASH: + case SCLEX_CMAKE: + case SCLEX_COFFEESCRIPT: + case SCLEX_CONF: + case SCLEX_MAKEFILE: + case SCLEX_NIM: + case SCLEX_PERL: + case SCLEX_POWERSHELL: + case SCLEX_PYTHON: + case SCLEX_R: + case SCLEX_RUBY: + case SCLEX_TCL: + case SCLEX_TOML: + case SCLEX_YAML: + EditToggleLineComments(Globals.hwndEdit, L"#", true); + break; + case SCLEX_AHKL: + case SCLEX_ASM: + case SCLEX_AU3: + case SCLEX_INNOSETUP: + case SCLEX_NSIS: // # could also be used instead + case SCLEX_PROPERTIES: + case SCLEX_REGISTRY: + EditToggleLineComments(Globals.hwndEdit, L";", true); + break; + case SCLEX_LUA: + case SCLEX_SQL: + case SCLEX_VHDL: + EditToggleLineComments(Globals.hwndEdit, L"--", true); + break; + case SCLEX_BATCH: + EditToggleLineComments(Globals.hwndEdit, L"rem ", true); + break; + case SCLEX_LATEX: + case SCLEX_MATLAB: + EditToggleLineComments(Globals.hwndEdit, L"%", true); + break; + // ------------------ + case SCLEX_NULL: + case SCLEX_CSS: + case SCLEX_DIFF: + case SCLEX_JSON: + case SCLEX_MARKDOWN: + default: + // do nothing + break; } EndWaitCursor(); } @@ -4689,55 +4694,63 @@ LRESULT MsgCommand(HWND hwnd, UINT umsg, WPARAM wParam, LPARAM lParam) BeginWaitCursor(NULL); switch (SciCall_GetLexer()) { - default: - case SCLEX_NULL: - case SCLEX_VBSCRIPT: - case SCLEX_MAKEFILE: - case SCLEX_VB: - case SCLEX_ASM: - case SCLEX_PERL: - case SCLEX_PYTHON: - case SCLEX_PROPERTIES: - case SCLEX_CONF: - case SCLEX_POWERSHELL: - case SCLEX_BATCH: - case SCLEX_DIFF: - case SCLEX_BASH: - case SCLEX_TCL: - case SCLEX_AU3: - case SCLEX_LATEX: - case SCLEX_AHKL: - case SCLEX_RUBY: - case SCLEX_CMAKE: - case SCLEX_MARKDOWN: - case SCLEX_YAML: - case SCLEX_JSON: - case SCLEX_REGISTRY: - case SCLEX_NIMROD: - case SCLEX_TOML: - break; - case SCLEX_AVS: - case SCLEX_CPP: - case SCLEX_CSS: - case SCLEX_HTML: - case SCLEX_NSIS: - case SCLEX_SQL: - case SCLEX_VHDL: - case SCLEX_XML: - EditEncloseSelection(Globals.hwndEdit, L"/*", L"*/"); - break; - case SCLEX_INNOSETUP: - case SCLEX_PASCAL: - EditEncloseSelection(Globals.hwndEdit, L"{", L"}"); - break; - case SCLEX_LUA: - EditEncloseSelection(Globals.hwndEdit, L"--[[", L"]]"); - break; - case SCLEX_COFFEESCRIPT: - EditEncloseSelection(Globals.hwndEdit, L"###", L"###"); - break; - case SCLEX_MATLAB: - EditEncloseSelection(Globals.hwndEdit, L"%{", L"%}"); + case SCLEX_D: + //~EditEncloseSelection(Globals.hwndEdit, L"/+", L"+/"); + //~break; + case SCLEX_AVS: + case SCLEX_CPP: + case SCLEX_CSS: + case SCLEX_HTML: + case SCLEX_NSIS: + case SCLEX_RUST: + case SCLEX_SQL: + case SCLEX_VHDL: + case SCLEX_XML: + EditEncloseSelection(Globals.hwndEdit, L"/*", L"*/"); + break; + case SCLEX_INNOSETUP: + case SCLEX_PASCAL: + EditEncloseSelection(Globals.hwndEdit, L"{", L"}"); + break; + case SCLEX_LUA: + EditEncloseSelection(Globals.hwndEdit, L"--[[", L"]]"); + break; + case SCLEX_COFFEESCRIPT: + EditEncloseSelection(Globals.hwndEdit, L"###", L"###"); + break; + case SCLEX_MATLAB: + EditEncloseSelection(Globals.hwndEdit, L"%{", L"%}"); + break; + // ------------------ + case SCLEX_NULL: + case SCLEX_AHKL: + case SCLEX_ASM: + case SCLEX_AU3: + case SCLEX_BASH: + case SCLEX_BATCH: + case SCLEX_CMAKE: + case SCLEX_CONF: + case SCLEX_DIFF: + case SCLEX_JSON: + case SCLEX_LATEX: + case SCLEX_MAKEFILE: + case SCLEX_MARKDOWN: + case SCLEX_NIM: + case SCLEX_PERL: + case SCLEX_POWERSHELL: + case SCLEX_PROPERTIES: + case SCLEX_PYTHON: + case SCLEX_R: + case SCLEX_REGISTRY: + case SCLEX_RUBY: + case SCLEX_TCL: + case SCLEX_TOML: + case SCLEX_VB: + case SCLEX_VBSCRIPT: + case SCLEX_YAML: + default: + // do nothing + break; } EndWaitCursor(); } diff --git a/src/Styles.c b/src/Styles.c index 3e011ed02..8b632b9ce 100644 --- a/src/Styles.c +++ b/src/Styles.c @@ -2293,7 +2293,7 @@ void Style_SetIndentGuides(HWND hwnd,bool bShow) if (!Flags.SimpleIndentGuides) { switch (SciCall_GetLexer()) { case SCLEX_PYTHON: - case SCLEX_NIMROD: + case SCLEX_NIM: iIndentView = SC_IV_LOOKFORWARD; break; default: