diff --git a/language/common_res.h b/language/common_res.h index 469b3c625..cf7bb05f2 100644 --- a/language/common_res.h +++ b/language/common_res.h @@ -81,6 +81,7 @@ #define IDS_MUI_EOLMODENAME_CRLF 13017 #define IDS_MUI_EOLMODENAME_CR 13018 #define IDS_MUI_EOLMODENAME_LF 13019 +#define IDS_MUI_INF_PRSVFILEMODTM 13020 #define IDS_MUI_SELRECT 14000 #define IDS_MUI_BUFFERTOOSMALL 14001 diff --git a/language/np3_de_de/strings_de_de.rc b/language/np3_de_de/strings_de_de.rc index f8a324fe3..fab6abf75 100644 --- a/language/np3_de_de/strings_de_de.rc +++ b/language/np3_de_de/strings_de_de.rc @@ -183,6 +183,8 @@ BEGIN IDS_MUI_DROP_NO_FILE "Es wurde kein gültiger Dateiname erkannt. Falls aus einer 32-bit Applikation gedropped wurde, dann versuche die Datei auf die Werkzeugleiste zu ziehen." IDS_MUI_URL_OPEN_FILE "\nAlt + Click um die Datei zu öffnen." IDS_MUI_URL_OPEN_BROWSER "\nCtrl + Click um die URL im Browser zu öffnen." + IDS_MUI_INF_PRSVFILEMODTM + "Die Speicherung mit originalem Zeitstempel wurde aktiviert.\nDiese Option gilt nun für die gesamte Sitzung (Datei)!" END STRINGTABLE diff --git a/language/np3_en_gb/strings_en_gb.rc b/language/np3_en_gb/strings_en_gb.rc index 2390b6de3..302957a88 100644 --- a/language/np3_en_gb/strings_en_gb.rc +++ b/language/np3_en_gb/strings_en_gb.rc @@ -183,6 +183,8 @@ BEGIN IDS_MUI_DROP_NO_FILE "No valid filename retrieved.\nIf dropping from 32-bit application,\nplease drag and drop to Notepad3's tool bar." IDS_MUI_URL_OPEN_FILE "\nAlt + Click to open the file." IDS_MUI_URL_OPEN_BROWSER "\nCtrl + Click to open link in browser." + IDS_MUI_INF_PRSVFILEMODTM + "Preserving original File Modification Timestamp enabled.\nThis option will stay for this session!" END STRINGTABLE diff --git a/language/np3_en_us/strings_en_us.rc b/language/np3_en_us/strings_en_us.rc index f33ca9a2b..0c74041d3 100644 --- a/language/np3_en_us/strings_en_us.rc +++ b/language/np3_en_us/strings_en_us.rc @@ -183,6 +183,8 @@ BEGIN IDS_MUI_DROP_NO_FILE "No valid filename retrieved.\nIf dropping from 32-bit application,\nplease drag and drop to Notepad3's tool bar." IDS_MUI_URL_OPEN_FILE "\nAlt + Click to open the file." IDS_MUI_URL_OPEN_BROWSER "\nCtrl + Click to open link in browser." + IDS_MUI_INF_PRSVFILEMODTM + "Preserving original File Modification Timestamp enabled.\nThis option will stay for this session!" END STRINGTABLE diff --git a/oniguruma/doc/API b/oniguruma/doc/API index 049db02b0..0b0da54b3 100644 --- a/oniguruma/doc/API +++ b/oniguruma/doc/API @@ -168,7 +168,7 @@ Oniguruma API Version 6.9.3 2019/07/06 # int onig_new_deluxe(regex_t** reg, const UChar* pattern, const UChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo) - This function is deprecate, and it does not allow the case where + This function is deprecated, and it does not allow the case where the encoding of pattern and target is different. Create a regex object. diff --git a/oniguruma/src/regcomp.c b/oniguruma/src/regcomp.c index c6a7cfa74..03cd2e422 100644 --- a/oniguruma/src/regcomp.c +++ b/oniguruma/src/regcomp.c @@ -621,10 +621,16 @@ is_strict_real_node(Node* node) } static int -compile_tree_empty_check(Node* node, regex_t* reg, int emptiness, ScanEnv* env) +compile_tree_empty_check(QuantNode* qn, regex_t* reg, ScanEnv* env) { int r; - int saved_num_null_check = reg->num_null_check; + int saved_num_null_check; + int emptiness; + Node* body; + + body = NODE_BODY((Node* )qn); + emptiness = qn->emptiness; + saved_num_null_check = reg->num_null_check; if (emptiness != BODY_IS_NOT_EMPTY) { r = add_op(reg, OP_EMPTY_CHECK_START); @@ -633,14 +639,18 @@ compile_tree_empty_check(Node* node, regex_t* reg, int emptiness, ScanEnv* env) reg->num_null_check++; } - r = compile_tree(node, reg, env); + r = compile_tree(body, reg, env); if (r != 0) return r; if (emptiness != BODY_IS_NOT_EMPTY) { if (emptiness == BODY_IS_EMPTY_POSSIBILITY) r = add_op(reg, OP_EMPTY_CHECK_END); - else if (emptiness == BODY_IS_EMPTY_POSSIBILITY_MEM) - r = add_op(reg, OP_EMPTY_CHECK_END_MEMST); + else if (emptiness == BODY_IS_EMPTY_POSSIBILITY_MEM) { + if (NODE_IS_EMPTY_STATUS_CHECK(qn) != 0) + r = add_op(reg, OP_EMPTY_CHECK_END_MEMST); + else + r = add_op(reg, OP_EMPTY_CHECK_END); + } else if (emptiness == BODY_IS_EMPTY_POSSIBILITY_REC) r = add_op(reg, OP_EMPTY_CHECK_END_MEMST_PUSH); @@ -937,7 +947,7 @@ compile_range_repeat_node(QuantNode* qn, int target_len, int emptiness, r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper); if (r != 0) return r; - r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, emptiness, env); + r = compile_tree_empty_check(qn, reg, env); if (r != 0) return r; if ( @@ -1020,7 +1030,7 @@ compile_length_quantifier_node(QuantNode* qn, regex_t* reg) len += OPSIZE_JUMP + mod_tlen + OPSIZE_PUSH; } else if (qn->upper == 0) { - if (qn->is_refered != 0) { /* /(?..){0}/ */ + if (qn->include_referred != 0) { /* /(?..){0}/ */ len = OPSIZE_JUMP + tlen; } else @@ -1116,7 +1126,7 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env) COP(reg)->push_or_jump_exact1.addr = SIZE_INC + mod_tlen + OPSIZE_JUMP; COP(reg)->push_or_jump_exact1.c = STR_(qn->head_exact)->s[0]; - r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, emptiness, env); + r = compile_tree_empty_check(qn, reg, env); if (r != 0) return r; addr = -(mod_tlen + (int )OPSIZE_PUSH_OR_JUMP_EXACT1); @@ -1129,7 +1139,7 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env) COP(reg)->push_if_peek_next.addr = SIZE_INC + mod_tlen + OPSIZE_JUMP; COP(reg)->push_if_peek_next.c = STR_(qn->next_head_exact)->s[0]; - r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, emptiness, env); + r = compile_tree_empty_check(qn, reg, env); if (r != 0) return r; addr = -(mod_tlen + (int )OPSIZE_PUSH_IF_PEEK_NEXT); @@ -1139,7 +1149,7 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env) if (r != 0) return r; COP(reg)->push.addr = SIZE_INC + mod_tlen + OPSIZE_JUMP; - r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, emptiness, env); + r = compile_tree_empty_check(qn, reg, env); if (r != 0) return r; addr = -(mod_tlen + (int )OPSIZE_PUSH); @@ -1154,7 +1164,7 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env) if (r != 0) return r; COP(reg)->jump.addr = mod_tlen + SIZE_INC; - r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, emptiness, env); + r = compile_tree_empty_check(qn, reg, env); if (r != 0) return r; r = add_op(reg, OP_PUSH); @@ -1163,7 +1173,7 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env) } } else if (qn->upper == 0) { - if (qn->is_refered != 0) { /* /(?..){0}/ */ + if (qn->include_referred != 0) { /* /(?..){0}/ */ r = add_op(reg, OP_JUMP); if (r != 0) return r; COP(reg)->jump.addr = tlen + SIZE_INC; @@ -1267,7 +1277,7 @@ compile_length_bag_node(BagNode* node, regex_t* reg) if (NODE_IS_CALLED(node)) { len = OPSIZE_MEMORY_START_PUSH + tlen + OPSIZE_CALL + OPSIZE_JUMP + OPSIZE_RETURN; - if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum)) + if (MEM_STATUS_AT0(reg->push_mem_end, node->m.regnum)) len += (NODE_IS_RECURSION(node) ? OPSIZE_MEMORY_END_PUSH_REC : OPSIZE_MEMORY_END_PUSH); else @@ -1276,18 +1286,18 @@ compile_length_bag_node(BagNode* node, regex_t* reg) } else if (NODE_IS_RECURSION(node)) { len = OPSIZE_MEMORY_START_PUSH; - len += tlen + (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum) + len += tlen + (MEM_STATUS_AT0(reg->push_mem_end, node->m.regnum) ? OPSIZE_MEMORY_END_PUSH_REC : OPSIZE_MEMORY_END_REC); } else #endif { - if (MEM_STATUS_AT0(reg->bt_mem_start, node->m.regnum)) + if (MEM_STATUS_AT0(reg->push_mem_start, node->m.regnum)) len = OPSIZE_MEMORY_START_PUSH; else len = OPSIZE_MEMORY_START; - len += tlen + (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum) + len += tlen + (MEM_STATUS_AT0(reg->push_mem_end, node->m.regnum) ? OPSIZE_MEMORY_END_PUSH : OPSIZE_MEMORY_END); } break; @@ -1380,7 +1390,7 @@ compile_bag_memory_node(BagNode* node, regex_t* reg, ScanEnv* env) else { len = compile_length_tree(NODE_BAG_BODY(node), reg); len += (OPSIZE_MEMORY_START_PUSH + OPSIZE_RETURN); - if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum)) + if (MEM_STATUS_AT0(reg->push_mem_end, node->m.regnum)) len += (NODE_IS_RECURSION(node) ? OPSIZE_MEMORY_END_PUSH_REC : OPSIZE_MEMORY_END_PUSH); else @@ -1394,7 +1404,7 @@ compile_bag_memory_node(BagNode* node, regex_t* reg, ScanEnv* env) } #endif - if (MEM_STATUS_AT0(reg->bt_mem_start, node->m.regnum)) + if (MEM_STATUS_AT0(reg->push_mem_start, node->m.regnum)) r = add_op(reg, OP_MEMORY_START_PUSH); else r = add_op(reg, OP_MEMORY_START); @@ -1405,7 +1415,7 @@ compile_bag_memory_node(BagNode* node, regex_t* reg, ScanEnv* env) if (r != 0) return r; #ifdef USE_CALL - if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum)) + if (MEM_STATUS_AT0(reg->push_mem_end, node->m.regnum)) r = add_op(reg, (NODE_IS_RECURSION(node) ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH)); else @@ -1418,7 +1428,7 @@ compile_bag_memory_node(BagNode* node, regex_t* reg, ScanEnv* env) r = add_op(reg, OP_RETURN); } #else - if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum)) + if (MEM_STATUS_AT0(reg->push_mem_end, node->m.regnum)) r = add_op(reg, OP_MEMORY_END_PUSH); else r = add_op(reg, OP_MEMORY_END); @@ -2303,11 +2313,11 @@ disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env) } } - loc = env->capture_history; - MEM_STATUS_CLEAR(env->capture_history); + loc = env->cap_history; + MEM_STATUS_CLEAR(env->cap_history); for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) { if (MEM_STATUS_AT(loc, i)) { - MEM_STATUS_ON_SIMPLE(env->capture_history, map[i].new_val); + MEM_STATUS_ON_SIMPLE(env->cap_history, map[i].new_val); } } @@ -2871,9 +2881,9 @@ tree_min_len(Node* node, ScanEnv* env) if (NODE_IS_RECURSION(node)) break; backs = BACKREFS_P(br); - len = tree_min_len(mem_env[backs[0]].node, env); + len = tree_min_len(mem_env[backs[0]].mem_node, env); for (i = 1; i < br->back_num; i++) { - tmin = tree_min_len(mem_env[backs[i]].node, env); + tmin = tree_min_len(mem_env[backs[i]].mem_node, env); if (len > tmin) len = tmin; } } @@ -3042,7 +3052,7 @@ tree_max_len(Node* node, ScanEnv* env) } backs = BACKREFS_P(br); for (i = 0; i < br->back_num; i++) { - tmax = tree_max_len(mem_env[backs[i]].node, env); + tmax = tree_max_len(mem_env[backs[i]].mem_node, env); if (len < tmax) len = tmax; } } @@ -3179,7 +3189,7 @@ check_backrefs(Node* node, ScanEnv* env) if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF; - NODE_STATUS_ADD(mem_env[backs[i]].node, BACKREF); + NODE_STATUS_ADD(mem_env[backs[i]].mem_node, BACKREF); } r = 0; } @@ -3193,6 +3203,204 @@ check_backrefs(Node* node, ScanEnv* env) return r; } +static int +set_empty_repeat_node_trav(Node* node, Node* empty, ScanEnv* env) +{ + int r; + + switch (NODE_TYPE(node)) { + case NODE_LIST: + case NODE_ALT: + do { + r = set_empty_repeat_node_trav(NODE_CAR(node), empty, env); + } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node))); + break; + + case NODE_ANCHOR: + { + AnchorNode* an = ANCHOR_(node); + + if (! ANCHOR_HAS_BODY(an)) { + r = 0; + break; + } + + switch (an->type) { + case ANCR_PREC_READ: + case ANCR_LOOK_BEHIND: + empty = NULL_NODE; + break; + default: + break; + } + r = set_empty_repeat_node_trav(NODE_BODY(node), empty, env); + } + break; + + case NODE_QUANT: + { + QuantNode* qn = QUANT_(node); + + if (qn->emptiness != BODY_IS_NOT_EMPTY) empty = node; + r = set_empty_repeat_node_trav(NODE_BODY(node), empty, env); + } + break; + + case NODE_BAG: + if (IS_NOT_NULL(NODE_BODY(node))) { + r = set_empty_repeat_node_trav(NODE_BODY(node), empty, env); + if (r != 0) return r; + } + { + BagNode* en = BAG_(node); + + if (en->type == BAG_MEMORY) { + if (NODE_IS_BACKREF(node)) { + if (IS_NOT_NULL(empty)) + SCANENV_MEMENV(env)[en->m.regnum].empty_repeat_node = empty; + } + } + else if (en->type == BAG_IF_ELSE) { + if (IS_NOT_NULL(en->te.Then)) { + r = set_empty_repeat_node_trav(en->te.Then, empty, env); + if (r != 0) return r; + } + if (IS_NOT_NULL(en->te.Else)) { + r = set_empty_repeat_node_trav(en->te.Else, empty, env); + } + } + } + break; + + default: + r = 0; + break; + } + + return r; +} + +static int +is_ancestor_node(Node* node, Node* me) +{ + Node* parent; + + while ((parent = NODE_PARENT(me)) != NULL_NODE) { + if (parent == node) return 1; + me = parent; + } + return 0; +} + +static void +set_empty_status_check_trav(Node* node, ScanEnv* env) +{ + switch (NODE_TYPE(node)) { + case NODE_LIST: + case NODE_ALT: + do { + set_empty_status_check_trav(NODE_CAR(node), env); + } while (IS_NOT_NULL(node = NODE_CDR(node))); + break; + + case NODE_ANCHOR: + { + AnchorNode* an = ANCHOR_(node); + + if (! ANCHOR_HAS_BODY(an)) break; + set_empty_status_check_trav(NODE_BODY(node), env); + } + break; + + case NODE_QUANT: + set_empty_status_check_trav(NODE_BODY(node), env); + break; + + case NODE_BAG: + if (IS_NOT_NULL(NODE_BODY(node))) + set_empty_status_check_trav(NODE_BODY(node), env); + { + BagNode* en = BAG_(node); + + if (en->type == BAG_IF_ELSE) { + if (IS_NOT_NULL(en->te.Then)) { + set_empty_status_check_trav(en->te.Then, env); + } + if (IS_NOT_NULL(en->te.Else)) { + set_empty_status_check_trav(en->te.Else, env); + } + } + } + break; + + case NODE_BACKREF: + { + int i; + int* backs; + MemEnv* mem_env = SCANENV_MEMENV(env); + BackRefNode* br = BACKREF_(node); + backs = BACKREFS_P(br); + for (i = 0; i < br->back_num; i++) { + Node* ernode = mem_env[backs[i]].empty_repeat_node; + if (IS_NOT_NULL(ernode)) { + if (! is_ancestor_node(ernode, node)) { + MEM_STATUS_LIMIT_ON(env->reg->empty_status_mem, backs[i]); + NODE_STATUS_ADD(ernode, EMPTY_STATUS_CHECK); + NODE_STATUS_ADD(mem_env[backs[i]].mem_node, EMPTY_STATUS_CHECK); + } + } + } + } + break; + + default: + break; + } +} + +static void +set_parent_node_trav(Node* node, Node* parent) +{ + NODE_PARENT(node) = parent; + + switch (NODE_TYPE(node)) { + case NODE_LIST: + case NODE_ALT: + do { + set_parent_node_trav(NODE_CAR(node), node); + } while (IS_NOT_NULL(node = NODE_CDR(node))); + break; + + case NODE_ANCHOR: + if (! ANCHOR_HAS_BODY(ANCHOR_(node))) break; + set_parent_node_trav(NODE_BODY(node), node); + break; + + case NODE_QUANT: + set_parent_node_trav(NODE_BODY(node), node); + break; + + case NODE_BAG: + if (IS_NOT_NULL(NODE_BODY(node))) + set_parent_node_trav(NODE_BODY(node), node); + { + BagNode* en = BAG_(node); + + if (en->type == BAG_IF_ELSE) { + if (IS_NOT_NULL(en->te.Then)) + set_parent_node_trav(en->te.Then, node); + if (IS_NOT_NULL(en->te.Else)) { + set_parent_node_trav(en->te.Else, node); + } + } + } + break; + + default: + break; + } +} + #ifdef USE_CALL @@ -3298,6 +3506,9 @@ infinite_recursive_call_check(Node* node, ScanEnv* env, int head) if ((eret & RECURSION_MUST) == 0) r &= ~RECURSION_MUST; } + else { + r &= ~RECURSION_MUST; + } } else { r = infinite_recursive_call_check(NODE_BODY(node), env, head); @@ -3472,7 +3683,7 @@ recursive_call_check_trav(Node* node, ScanEnv* env, int state) r = recursive_call_check_trav(NODE_BODY(node), env, state); if (QUANT_(node)->upper == 0) { if (r == FOUND_CALLED_NODE) - QUANT_(node)->is_refered = 1; + QUANT_(node)->include_referred = 1; } break; @@ -3495,8 +3706,10 @@ recursive_call_check_trav(Node* node, ScanEnv* env, int state) if (! NODE_IS_RECURSION(node)) { NODE_STATUS_ADD(node, MARK1); r = recursive_call_check(NODE_BODY(node)); - if (r != 0) + if (r != 0) { NODE_STATUS_ADD(node, RECURSION); + MEM_STATUS_ON(env->backtrack_mem, en->m.regnum); + } NODE_STATUS_REMOVE(node, MARK1); } @@ -4141,7 +4354,7 @@ setup_call_node_call(CallNode* cn, ScanEnv* env, int state) } set_call_attr: - NODE_CALL_BODY(cn) = mem_env[cn->group_num].node; + NODE_CALL_BODY(cn) = mem_env[cn->group_num].mem_node; if (IS_NULL(NODE_CALL_BODY(cn))) { onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end); @@ -4427,6 +4640,8 @@ setup_called_state_call(Node* node, int state) } } else if (en->type == BAG_IF_ELSE) { + state |= IN_ALT; + setup_called_state_call(NODE_BODY(node), state); if (IS_NOT_NULL(en->te.Then)) { setup_called_state_call(en->te.Then, state); } @@ -4483,6 +4698,7 @@ setup_called_state(Node* node, int state) setup_called_state(NODE_BODY(node), state); break; case BAG_IF_ELSE: + state |= IN_ALT; setup_called_state(NODE_BODY(node), state); if (IS_NOT_NULL(en->te.Then)) setup_called_state(en->te.Then, state); @@ -4634,12 +4850,6 @@ setup_quant(Node* node, regex_t* reg, int state, ScanEnv* env) if (d == 0) { #ifdef USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT qn->emptiness = quantifiers_memory_node_info(body); - if (qn->emptiness == BODY_IS_EMPTY_POSSIBILITY_REC) { - if (NODE_TYPE(body) == NODE_BAG && - BAG_(body)->type == BAG_MEMORY) { - MEM_STATUS_ON(env->bt_mem_end, BAG_(body)->m.regnum); - } - } #else qn->emptiness = BODY_IS_EMPTY_POSSIBILITY; #endif @@ -4739,10 +4949,9 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) for (i = 0; i < br->back_num; i++) { if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF; MEM_STATUS_ON(env->backrefed_mem, p[i]); - MEM_STATUS_ON(env->bt_mem_start, p[i]); #ifdef USE_BACKREF_WITH_LEVEL if (NODE_IS_NEST_LEVEL(node)) { - MEM_STATUS_ON(env->bt_mem_end, p[i]); + MEM_STATUS_ON(env->backtrack_mem, p[i]); } #endif } @@ -4770,7 +4979,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT | IN_MULTI_ENTRY)) != 0 || NODE_IS_RECURSION(node)) { - MEM_STATUS_ON(env->bt_mem_start, en->m.regnum); + MEM_STATUS_ON(env->backtrack_mem, en->m.regnum); } r = setup_tree(NODE_BODY(node), reg, state, env); break; @@ -5725,11 +5934,11 @@ optimize_nodes(Node* node, OptNode* opt, OptEnv* env) break; } backs = BACKREFS_P(br); - min = tree_min_len(mem_env[backs[0]].node, env->scan_env); - max = tree_max_len(mem_env[backs[0]].node, env->scan_env); + min = tree_min_len(mem_env[backs[0]].mem_node, env->scan_env); + max = tree_max_len(mem_env[backs[0]].mem_node, env->scan_env); for (i = 1; i < br->back_num; i++) { - tmin = tree_min_len(mem_env[backs[i]].node, env->scan_env); - tmax = tree_max_len(mem_env[backs[i]].node, env->scan_env); + tmin = tree_min_len(mem_env[backs[i]].mem_node, env->scan_env); + tmax = tree_max_len(mem_env[backs[i]].mem_node, env->scan_env); if (min > tmin) min = tmin; if (max < tmax) max = tmax; } @@ -6301,7 +6510,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, Node* root; ScanEnv scan_env; #ifdef USE_CALL - UnsetAddrList uslist; + UnsetAddrList uslist = {0}; #endif root = 0; @@ -6328,6 +6537,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, reg->num_null_check = 0; reg->repeat_range_alloc = 0; reg->repeat_range = (OnigRepeatRange* )NULL; + reg->empty_status_mem = 0; r = onig_parse_tree(&root, pattern, pattern_end, reg, &scan_env); if (r != 0) goto err; @@ -6370,20 +6580,38 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, r = setup_tree(root, reg, 0, &scan_env); if (r != 0) goto err_unset; + if (scan_env.backref_num != 0) { + set_parent_node_trav(root, NULL_NODE); + r = set_empty_repeat_node_trav(root, NULL_NODE, &scan_env); + if (r != 0) goto err_unset; + set_empty_status_check_trav(root, &scan_env); + } + #ifdef ONIG_DEBUG_PARSE print_tree(stderr, root); #endif - reg->capture_history = scan_env.capture_history; - reg->bt_mem_start = scan_env.bt_mem_start; - reg->bt_mem_start |= reg->capture_history; - if (IS_FIND_CONDITION(reg->options)) - MEM_STATUS_ON_ALL(reg->bt_mem_end); - else { - reg->bt_mem_end = scan_env.bt_mem_end; - reg->bt_mem_end |= reg->capture_history; + reg->capture_history = scan_env.cap_history; + reg->push_mem_start = scan_env.backtrack_mem | scan_env.cap_history; + +#ifdef USE_CALLOUT + if (IS_NOT_NULL(reg->extp) && reg->extp->callout_num != 0) { + reg->push_mem_end = reg->push_mem_start; } - reg->bt_mem_start |= reg->bt_mem_end; + else { + if (MEM_STATUS_IS_ALL_ON(reg->push_mem_start)) + reg->push_mem_end = scan_env.backrefed_mem | scan_env.cap_history; + else + reg->push_mem_end = reg->push_mem_start & + (scan_env.backrefed_mem | scan_env.cap_history); + } +#else + if (MEM_STATUS_IS_ALL_ON(reg->push_mem_start)) + reg->push_mem_end = scan_env.backrefed_mem | scan_env.cap_history; + else + reg->push_mem_end = reg->push_mem_start & + (scan_env.backrefed_mem | scan_env.cap_history); +#endif clear_optimize_info(reg); #ifndef ONIG_DONT_OPTIMIZE @@ -6417,14 +6645,14 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, } #endif - if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0) + if ((reg->num_repeat != 0) || (reg->push_mem_end != 0) #ifdef USE_CALLOUT || (IS_NOT_NULL(reg->extp) && reg->extp->callout_num != 0) #endif ) reg->stack_pop_level = STACK_POP_LEVEL_ALL; else { - if (reg->bt_mem_start != 0) + if (reg->push_mem_start != 0) reg->stack_pop_level = STACK_POP_LEVEL_MEM_START; else reg->stack_pop_level = STACK_POP_LEVEL_FREE; diff --git a/oniguruma/src/regenc.c b/oniguruma/src/regenc.c index 0892ef825..8b03bb9db 100644 --- a/oniguruma/src/regenc.c +++ b/oniguruma/src/regenc.c @@ -182,7 +182,8 @@ onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc, p += enclen(enc, p); } else { - if (prev) *prev = (const UChar* )NULL; /* Sorry */ + if (prev) + *prev = onigenc_get_prev_char_head(enc, start, p); } return p; } diff --git a/oniguruma/src/regexec.c b/oniguruma/src/regexec.c index 172bfb493..e0770643e 100644 --- a/oniguruma/src/regexec.c +++ b/oniguruma/src/regexec.c @@ -39,6 +39,15 @@ #define CHECK_INTERRUPT_IN_MATCH +#define STACK_MEM_START(reg, i) \ + (MEM_STATUS_AT((reg)->push_mem_start, (i)) != 0 ? \ + STACK_AT(mem_start_stk[i])->u.mem.pstr : (UChar* )((void* )(mem_start_stk[i]))) + +#define STACK_MEM_END(reg, i) \ + (MEM_STATUS_AT((reg)->push_mem_end, (i)) != 0 ? \ + STACK_AT(mem_end_stk[i])->u.mem.pstr : (UChar* )((void* )(mem_end_stk[i]))) + + #ifdef USE_CALLOUT typedef struct { int last_match_at_call_counter; @@ -625,8 +634,8 @@ onig_print_compiled_byte_code_list(FILE* f, regex_t* reg) Operation* start = reg->ops; Operation* end = reg->ops + reg->ops_used; - fprintf(f, "bt_mem_start: 0x%x, bt_mem_end: 0x%x\n", - reg->bt_mem_start, reg->bt_mem_end); + fprintf(f, "push_mem_start: 0x%x, push_mem_end: 0x%x\n", + reg->push_mem_start, reg->push_mem_end); fprintf(f, "code-length: %d\n", reg->ops_used); bp = start; @@ -1929,7 +1938,7 @@ stack_double(int is_alloca, char** arg_alloc_base, (addr) = 0;\ }\ else {\ - if (MEM_STATUS_AT((reg)->bt_mem_end, k->zid))\ + if (MEM_STATUS_AT((reg)->push_mem_end, k->zid))\ (addr) = STACK_AT(k->u.mem.prev_end)->u.mem.pstr;\ else\ (addr) = (UChar* )k->u.mem.prev_end;\ @@ -1950,10 +1959,10 @@ stack_double(int is_alloca, char** arg_alloc_base, }\ else {\ UChar* endp;\ - int level = 0;\ (isnull) = 1;\ while (k < stk) {\ - if (k->type == STK_MEM_START && level == 0) {\ + if (k->type == STK_MEM_START &&\ + MEM_STATUS_LIMIT_AT((reg)->empty_status_mem, k->zid)) {\ STACK_MEM_START_GET_PREV_END_ADDR(k, reg, endp);\ if (endp == 0) {\ (isnull) = 0; break;\ @@ -1965,12 +1974,6 @@ stack_double(int is_alloca, char** arg_alloc_base, (isnull) = -1; /* empty, but position changed */ \ }\ }\ - else if (k->type == STK_PREC_READ_START) {\ - level++;\ - }\ - else if (k->type == STK_PREC_READ_END) {\ - level--;\ - }\ k++;\ }\ break;\ @@ -1995,11 +1998,11 @@ stack_double(int is_alloca, char** arg_alloc_base, }\ else {\ UChar* endp;\ - int prec_level = 0;\ (isnull) = 1;\ while (k < stk) {\ if (k->type == STK_MEM_START) {\ - if (level == 0 && prec_level == 0) {\ + if (level == 0 && \ + MEM_STATUS_LIMIT_AT((reg)->empty_status_mem, k->zid) !=0) {\ STACK_MEM_START_GET_PREV_END_ADDR(k, reg, endp);\ if (endp == 0) {\ (isnull) = 0; break;\ @@ -2018,12 +2021,6 @@ stack_double(int is_alloca, char** arg_alloc_base, else if (k->type == STK_EMPTY_CHECK_END) {\ if (k->zid == (sid)) level--;\ }\ - else if (k->type == STK_PREC_READ_START) {\ - prec_level++;\ - }\ - else if (k->type == STK_PREC_READ_END) {\ - prec_level--;\ - }\ k++;\ }\ break;\ @@ -2700,15 +2697,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, rmt[0].rm_eo = (regoff_t )(s - str); for (i = 1; i <= num_mem; i++) { if (mem_end_stk[i] != INVALID_STACK_INDEX) { - if (MEM_STATUS_AT(reg->bt_mem_start, i)) - rmt[i].rm_so = (regoff_t )(STACK_AT(mem_start_stk[i])->u.mem.pstr - str); - else - rmt[i].rm_so = (regoff_t )((UChar* )((void* )(mem_start_stk[i])) - str); - - rmt[i].rm_eo = (regoff_t )((MEM_STATUS_AT(reg->bt_mem_end, i) - ? STACK_AT(mem_end_stk[i])->u.mem.pstr - : (UChar* )((void* )mem_end_stk[i])) - - str); + rmt[i].rm_so = (regoff_t )(STACK_MEM_START(reg, i) - str); + rmt[i].rm_eo = (regoff_t )(STACK_MEM_END(reg, i) - str); } else { rmt[i].rm_so = rmt[i].rm_eo = ONIG_REGION_NOTPOS; @@ -2721,14 +2711,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, region->end[0] = (int )(s - str); for (i = 1; i <= num_mem; i++) { if (mem_end_stk[i] != INVALID_STACK_INDEX) { - if (MEM_STATUS_AT(reg->bt_mem_start, i)) - region->beg[i] = (int )(STACK_AT(mem_start_stk[i])->u.mem.pstr - str); - else - region->beg[i] = (int )((UChar* )((void* )mem_start_stk[i]) - str); - - region->end[i] = (int )((MEM_STATUS_AT(reg->bt_mem_end, i) - ? STACK_AT(mem_end_stk[i])->u.mem.pstr - : (UChar* )((void* )mem_end_stk[i])) - str); + region->beg[i] = (int )(STACK_MEM_START(reg, i) - str); + region->end[i] = (int )(STACK_MEM_END(reg, i) - str); } else { region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS; @@ -3442,7 +3426,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, mem_end_stk[mem] = (StackIndex )((void* )s); STACK_GET_MEM_START(mem, stkp); - if (MEM_STATUS_AT(reg->bt_mem_start, mem)) + if (MEM_STATUS_AT(reg->push_mem_start, mem)) mem_start_stk[mem] = GET_STACK_INDEX(stkp); else mem_start_stk[mem] = (StackIndex )((void* )stkp->u.mem.pstr); @@ -3470,14 +3454,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail; if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail; - if (MEM_STATUS_AT(reg->bt_mem_start, mem)) - pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; - else - pstart = (UChar* )((void* )mem_start_stk[mem]); - - pend = (MEM_STATUS_AT(reg->bt_mem_end, mem) - ? STACK_AT(mem_end_stk[mem])->u.mem.pstr - : (UChar* )((void* )mem_end_stk[mem])); + pstart = STACK_MEM_START(reg, mem); + pend = STACK_MEM_END(reg, mem); n = (int )(pend - pstart); if (n != 0) { DATA_ENSURE(n); @@ -3499,14 +3477,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail; if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail; - if (MEM_STATUS_AT(reg->bt_mem_start, mem)) - pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; - else - pstart = (UChar* )((void* )mem_start_stk[mem]); - - pend = (MEM_STATUS_AT(reg->bt_mem_end, mem) - ? STACK_AT(mem_end_stk[mem])->u.mem.pstr - : (UChar* )((void* )mem_end_stk[mem])); + pstart = STACK_MEM_START(reg, mem); + pend = STACK_MEM_END(reg, mem); n = (int )(pend - pstart); if (n != 0) { DATA_ENSURE(n); @@ -3531,14 +3503,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue; if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue; - if (MEM_STATUS_AT(reg->bt_mem_start, mem)) - pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; - else - pstart = (UChar* )((void* )mem_start_stk[mem]); - - pend = (MEM_STATUS_AT(reg->bt_mem_end, mem) - ? STACK_AT(mem_end_stk[mem])->u.mem.pstr - : (UChar* )((void* )mem_end_stk[mem])); + pstart = STACK_MEM_START(reg, mem); + pend = STACK_MEM_END(reg, mem); n = (int )(pend - pstart); if (n != 0) { DATA_ENSURE(n); @@ -3569,14 +3535,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue; if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue; - if (MEM_STATUS_AT(reg->bt_mem_start, mem)) - pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; - else - pstart = (UChar* )((void* )mem_start_stk[mem]); - - pend = (MEM_STATUS_AT(reg->bt_mem_end, mem) - ? STACK_AT(mem_end_stk[mem])->u.mem.pstr - : (UChar* )((void* )mem_end_stk[mem])); + pstart = STACK_MEM_START(reg, mem); + pend = STACK_MEM_END(reg, mem); n = (int )(pend - pstart); if (n != 0) { DATA_ENSURE(n); @@ -4044,14 +4004,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, OnigCalloutFunc func; OnigCalloutArgs args; - of = ONIG_CALLOUT_OF_NAME; - name_id = p->callout_name.id; - mem = p->callout_name.num; + of = ONIG_CALLOUT_OF_NAME; + mem = p->callout_name.num; callout_common_entry: e = onig_reg_callout_list_at(reg, mem); in = e->in; if (of == ONIG_CALLOUT_OF_NAME) { + name_id = p->callout_name.id; func = onig_get_callout_start_func(reg, mem); } else { @@ -4506,17 +4466,17 @@ onig_match_with_param(regex_t* reg, const UChar* str, const UChar* end, } static int -forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, - UChar* range, UChar** low, UChar** high, UChar** low_prev) +forward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* start, + UChar* range, UChar** low, UChar** high, UChar** low_prev) { UChar *p, *pprev = (UChar* )NULL; #ifdef ONIG_DEBUG_SEARCH - fprintf(stderr, "forward_search_range: str: %p, end: %p, s: %p, range: %p\n", - str, end, s, range); + fprintf(stderr, "forward_search: str: %p, end: %p, start: %p, range: %p\n", + str, end, start, range); #endif - p = s; + p = start; if (reg->dmin > 0) { if (ONIGENC_IS_SINGLEBYTE(reg->enc)) { p += reg->dmin; @@ -4559,7 +4519,7 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, } if (p && p < range) { - if (p - reg->dmin < s) { + if (p - reg->dmin < start) { retry_gate: pprev = p; p += enclen(reg->enc, p); @@ -4572,8 +4532,7 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, switch (reg->sub_anchor) { case ANCR_BEGIN_LINE: if (!ON_STR_BEGIN(p)) { - prev = onigenc_get_prev_char_head(reg->enc, - (pprev ? pprev : str), p); + prev = onigenc_get_prev_char_head(reg->enc, (pprev ? pprev : str), p); if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) goto retry_gate; } @@ -4594,6 +4553,7 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, #endif ) goto retry_gate; + break; } } @@ -4601,8 +4561,8 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, if (reg->dmax == 0) { *low = p; if (low_prev) { - if (*low > s) - *low_prev = onigenc_get_prev_char_head(reg->enc, s, p); + if (*low > start) + *low_prev = onigenc_get_prev_char_head(reg->enc, start, p); else *low_prev = onigenc_get_prev_char_head(reg->enc, (pprev ? pprev : str), p); @@ -4617,12 +4577,9 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, } else { *low = p - reg->dmax; - if (*low > s) { - *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s, + if (*low > start) { + *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, start, *low, (const UChar** )low_prev); - if (low_prev && IS_NULL(*low_prev)) - *low_prev = onigenc_get_prev_char_head(reg->enc, - (pprev ? pprev : s), *low); } else { if (low_prev) @@ -4637,7 +4594,7 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, #ifdef ONIG_DEBUG_SEARCH fprintf(stderr, - "forward_search_range success: low: %d, high: %d, dmin: %d, dmax: %d\n", + "forward_search success: low: %d, high: %d, dmin: %d, dmax: %d\n", (int )(*low - str), (int )(*high - str), reg->dmin, reg->dmax); #endif return 1; /* success */ @@ -4648,9 +4605,8 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, static int -backward_search_range(regex_t* reg, const UChar* str, const UChar* end, - UChar* s, const UChar* range, UChar* adjrange, - UChar** low, UChar** high) +backward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* s, + const UChar* range, UChar* adjrange, UChar** low, UChar** high) { UChar *p; @@ -4731,7 +4687,7 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end, } #ifdef ONIG_DEBUG_SEARCH - fprintf(stderr, "backward_search_range: low: %d, high: %d\n", + fprintf(stderr, "backward_search: low: %d, high: %d\n", (int )(*low - str), (int )(*high - str)); #endif return 1; /* success */ @@ -4739,7 +4695,7 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end, fail: #ifdef ONIG_DEBUG_SEARCH - fprintf(stderr, "backward_search_range: fail.\n"); + fprintf(stderr, "backward_search: fail.\n"); #endif return 0; /* fail */ } @@ -4958,8 +4914,8 @@ onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end, if (reg->dmax != INFINITE_LEN) { do { - if (! forward_search_range(reg, str, end, s, sch_range, - &low, &high, &low_prev)) goto mismatch; + if (! forward_search(reg, str, end, s, sch_range, &low, &high, + &low_prev)) goto mismatch; if (s < low) { s = low; prev = low_prev; @@ -4973,8 +4929,8 @@ onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end, goto mismatch; } else { /* check only. */ - if (! forward_search_range(reg, str, end, s, sch_range, - &low, &high, (UChar** )NULL)) goto mismatch; + if (! forward_search(reg, str, end, s, sch_range, &low, &high, + (UChar** )NULL)) goto mismatch; if ((reg->anchor & ANCR_ANYCHAR_INF) != 0) { do { @@ -5023,8 +4979,8 @@ onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end, do { sch_start = s + reg->dmax; if (sch_start > end) sch_start = (UChar* )end; - if (backward_search_range(reg, str, end, sch_start, range, adjrange, - &low, &high) <= 0) + if (backward_search(reg, str, end, sch_start, range, adjrange, + &low, &high) <= 0) goto mismatch; if (s > high) @@ -5053,8 +5009,8 @@ onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end, start, sch_start); } } - if (backward_search_range(reg, str, end, sch_start, range, adjrange, - &low, &high) <= 0) goto mismatch; + if (backward_search(reg, str, end, sch_start, range, adjrange, + &low, &high) <= 0) goto mismatch; } } @@ -5403,22 +5359,20 @@ onig_get_capture_range_in_callout(OnigCalloutArgs* a, int mem_num, int* begin, i const UChar* str; StackType* stk_base; int i; + StackIndex* mem_start_stk; + StackIndex* mem_end_stk; i = mem_num; reg = a->regex; str = a->string; stk_base = a->stk_base; + mem_start_stk = a->mem_start_stk; + mem_end_stk = a->mem_end_stk; if (i > 0) { if (a->mem_end_stk[i] != INVALID_STACK_INDEX) { - if (MEM_STATUS_AT(reg->bt_mem_start, i)) - *begin = (int )(STACK_AT(a->mem_start_stk[i])->u.mem.pstr - str); - else - *begin = (int )((UChar* )((void* )a->mem_start_stk[i]) - str); - - *end = (int )((MEM_STATUS_AT(reg->bt_mem_end, i) - ? STACK_AT(a->mem_end_stk[i])->u.mem.pstr - : (UChar* )((void* )a->mem_end_stk[i])) - str); + *begin = (int )(STACK_MEM_START(reg, i) - str); + *end = (int )(STACK_MEM_END(reg, i) - str); } else { *begin = *end = ONIG_REGION_NOTPOS; diff --git a/oniguruma/src/regint.h b/oniguruma/src/regint.h index 753ab608f..a53885dbf 100644 --- a/oniguruma/src/regint.h +++ b/oniguruma/src/regint.h @@ -290,6 +290,8 @@ typedef unsigned int MemStatusType; #define MEM_STATUS_AT0(stats,n) \ ((n) > 0 && (n) < (int )MEM_STATUS_BITS_NUM ? ((stats) & ((MemStatusType )1 << n)) : ((stats) & 1)) +#define MEM_STATUS_IS_ALL_ON(stats) (((stats) & 1) != 0) + #define MEM_STATUS_ON(stats,n) do {\ if ((n) < (int )MEM_STATUS_BITS_NUM) {\ if ((n) != 0)\ @@ -304,6 +306,14 @@ typedef unsigned int MemStatusType; (stats) |= ((MemStatusType )1 << (n));\ } while (0) +#define MEM_STATUS_LIMIT_AT(stats,n) \ + ((n) < (int )MEM_STATUS_BITS_NUM ? ((stats) & ((MemStatusType )1 << n)) : 0) +#define MEM_STATUS_LIMIT_ON(stats,n) do {\ + if ((n) < (int )MEM_STATUS_BITS_NUM && (n) != 0) {\ + (stats) |= ((MemStatusType )1 << (n));\ + }\ +} while (0) + #define INT_MAX_LIMIT ((1UL << (SIZEOF_INT * 8 - 1)) - 1) @@ -887,28 +897,29 @@ struct re_pattern_buffer { #ifdef USE_DIRECT_THREADED_CODE enum OpCode* ocs; #endif - Operation* ops_curr; - unsigned int ops_used; /* used space for ops */ - unsigned int ops_alloc; /* allocated space for ops */ + Operation* ops_curr; + unsigned int ops_used; /* used space for ops */ + unsigned int ops_alloc; /* allocated space for ops */ unsigned char* string_pool; unsigned char* string_pool_end; - int num_mem; /* used memory(...) num counted from 1 */ - int num_repeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */ - int num_null_check; /* OP_EMPTY_CHECK_START/END id counter */ - int num_call; /* number of subexp call */ - unsigned int capture_history; /* (?@...) flag (1-31) */ - unsigned int bt_mem_start; /* need backtrack flag */ - unsigned int bt_mem_end; /* need backtrack flag */ - int stack_pop_level; - int repeat_range_alloc; + int num_mem; /* used memory(...) num counted from 1 */ + int num_repeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */ + int num_null_check; /* OP_EMPTY_CHECK_START/END id counter */ + int num_call; /* number of subexp call */ + MemStatusType capture_history; /* (?@...) flag (1-31) */ + MemStatusType push_mem_start; /* need backtrack flag */ + MemStatusType push_mem_end; /* need backtrack flag */ + MemStatusType empty_status_mem; + int stack_pop_level; + int repeat_range_alloc; OnigRepeatRange* repeat_range; - OnigEncoding enc; - OnigOptionType options; - OnigSyntaxType* syntax; - OnigCaseFoldType case_fold_flag; - void* name_table; + OnigEncoding enc; + OnigOptionType options; + OnigSyntaxType* syntax; + OnigCaseFoldType case_fold_flag; + void* name_table; /* optimization info (string search, char-map and anchors) */ int optimize; /* optimize flag */ diff --git a/oniguruma/src/regparse.c b/oniguruma/src/regparse.c index df97be9b2..91f692ae0 100644 --- a/oniguruma/src/regparse.c +++ b/oniguruma/src/regparse.c @@ -1945,9 +1945,8 @@ callout_tag_entry(ScanEnv* env, regex_t* reg, UChar* name, UChar* name_end, static void scan_env_clear(ScanEnv* env) { - MEM_STATUS_CLEAR(env->capture_history); - MEM_STATUS_CLEAR(env->bt_mem_start); - MEM_STATUS_CLEAR(env->bt_mem_end); + MEM_STATUS_CLEAR(env->cap_history); + MEM_STATUS_CLEAR(env->backtrack_mem); MEM_STATUS_CLEAR(env->backrefed_mem); env->error = (UChar* )NULL; env->error_end = (UChar* )NULL; @@ -1966,6 +1965,7 @@ scan_env_clear(ScanEnv* env) xmemset(env->mem_env_static, 0, sizeof(env->mem_env_static)); env->parse_depth = 0; + env->backref_num = 0; env->keep_num = 0; env->save_num = 0; env->save_alloc_num = 0; @@ -1997,7 +1997,8 @@ scan_env_add_mem_entry(ScanEnv* env) } for (i = env->num_mem + 1; i < alloc; i++) { - p[i].node = NULL_NODE; + p[i].mem_node = NULL_NODE; + p[i].empty_repeat_node = NULL_NODE; } env->mem_env_dynamic = p; @@ -2013,7 +2014,7 @@ static int scan_env_set_mem_node(ScanEnv* env, int num, Node* node) { if (env->num_mem >= num) - SCANENV_MEMENV(env)[num].node = node; + SCANENV_MEMENV(env)[num].mem_node = node; else return ONIGERR_PARSER_BUG; return 0; @@ -2326,7 +2327,7 @@ node_new_backref(int back_num, int* backrefs, int by_name, for (i = 0; i < back_num; i++) { if (backrefs[i] <= env->num_mem && - IS_NULL(SCANENV_MEMENV(env)[backrefs[i]].node)) { + IS_NULL(SCANENV_MEMENV(env)[backrefs[i]].mem_node)) { NODE_STATUS_ADD(node, RECURSION); /* /...(\1).../ */ break; } @@ -2346,6 +2347,8 @@ node_new_backref(int back_num, int* backrefs, int by_name, for (i = 0; i < back_num; i++) p[i] = backrefs[i]; } + + env->backref_num++; return node; } @@ -2393,13 +2396,13 @@ node_new_quantifier(int lower, int upper, int by_number) CHECK_NULL_RETURN(node); NODE_SET_TYPE(node, NODE_QUANT); - QUANT_(node)->lower = lower; - QUANT_(node)->upper = upper; - QUANT_(node)->greedy = 1; - QUANT_(node)->emptiness = BODY_IS_NOT_EMPTY; - QUANT_(node)->head_exact = NULL_NODE; - QUANT_(node)->next_head_exact = NULL_NODE; - QUANT_(node)->is_refered = 0; + QUANT_(node)->lower = lower; + QUANT_(node)->upper = upper; + QUANT_(node)->greedy = 1; + QUANT_(node)->emptiness = BODY_IS_NOT_EMPTY; + QUANT_(node)->head_exact = NULL_NODE; + QUANT_(node)->next_head_exact = NULL_NODE; + QUANT_(node)->include_referred = 0; if (by_number != 0) NODE_STATUS_ADD(node, BY_NUMBER); @@ -5270,7 +5273,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env) if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_DECIMAL_BACKREF) && (num <= env->num_mem || num <= 9)) { /* This spec. from GNU regex */ if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) { - if (num > env->num_mem || IS_NULL(SCANENV_MEMENV(env)[num].node)) + if (num > env->num_mem || IS_NULL(SCANENV_MEMENV(env)[num].mem_node)) return ONIGERR_INVALID_BACKREF; } @@ -5341,7 +5344,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env) if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) { if (back_num > env->num_mem || - IS_NULL(SCANENV_MEMENV(env)[back_num].node)) + IS_NULL(SCANENV_MEMENV(env)[back_num].mem_node)) return ONIGERR_INVALID_BACKREF; } tok->type = TK_BACKREF; @@ -5358,7 +5361,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env) int i; for (i = 0; i < num; i++) { if (backs[i] > env->num_mem || - IS_NULL(SCANENV_MEMENV(env)[backs[i]].node)) + IS_NULL(SCANENV_MEMENV(env)[backs[i]].mem_node)) return ONIGERR_INVALID_BACKREF; } } @@ -6045,10 +6048,12 @@ fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env) { int r; OnigCodePoint c; - OnigEncoding enc = env->enc; - UChar *prev, *start, *p = *src; + OnigEncoding enc; + UChar *prev, *start, *p; - r = 0; + p = *src; + enc = env->enc; + r = ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS; start = prev = p; while (!PEND) { @@ -6056,18 +6061,20 @@ fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env) PFETCH_S(c); if (c == '}') { r = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, start, prev); - if (r < 0) break; + if (r >= 0) { + *src = p; + } + else { + onig_scan_env_set_error_string(env, r, *src, prev); + } - *src = p; return r; } else if (c == '(' || c == ')' || c == '{' || c == '|') { - r = ONIGERR_INVALID_CHAR_PROPERTY_NAME; break; } } - onig_scan_env_set_error_string(env, r, *src, prev); return r; } @@ -6230,12 +6237,13 @@ parse_char_class(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env) Node* node; CClassNode *cc, *prev_cc; CClassNode work_cc; - - enum CCSTATE state; - enum CCVALTYPE val_type, in_type; int val_israw, in_israw; + enum CCSTATE state; + enum CCVALTYPE in_type; + enum CCVALTYPE val_type; *np = NULL_NODE; + val_type = -1; env->parse_depth++; if (env->parse_depth > ParseDepthLimit) return ONIGERR_PARSE_DEPTH_LIMIT_OVER; @@ -6741,7 +6749,8 @@ parse_long(OnigEncoding enc, UChar* s, UChar* end, int sign_on, long max, long* static int parse_callout_args(int skip_mode, int cterm, UChar** src, UChar* end, - unsigned int types[], OnigValue vals[], ScanEnv* env) + int max_arg_num, unsigned int types[], OnigValue vals[], + ScanEnv* env) { #define MAX_CALLOUT_ARG_BYTE_LENGTH 128 @@ -6760,9 +6769,9 @@ parse_callout_args(int skip_mode, int cterm, UChar** src, UChar* end, if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN; + c = 0; n = 0; while (n < ONIG_CALLOUT_MAX_ARGS_NUM) { - c = 0; cn = 0; esc = 0; eesc = 0; @@ -6809,6 +6818,9 @@ parse_callout_args(int skip_mode, int cterm, UChar** src, UChar* end, } if (cn != 0) { + if (max_arg_num >= 0 && n >= max_arg_num) + return ONIGERR_INVALID_CALLOUT_ARG; + if (skip_mode == 0) { if ((types[n] & ONIG_TYPE_LONG) != 0) { int fixed = 0; @@ -6941,7 +6953,7 @@ parse_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* en /* read for single check only */ save = p; - arg_num = parse_callout_args(1, '}', &p, end, 0, 0, env); + arg_num = parse_callout_args(1, '}', &p, end, -1, 0, 0, env); if (arg_num < 0) return arg_num; is_not_single = PPEEK_IS(cterm) ? 0 : 1; @@ -6955,7 +6967,7 @@ parse_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* en types[i] = get_callout_arg_type_by_name_id(name_id, i); } - arg_num = parse_callout_args(0, '}', &p, end, types, vals, env); + arg_num = parse_callout_args(0, '}', &p, end, max_arg_num, types, vals, env); if (arg_num < 0) return arg_num; if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; @@ -7115,7 +7127,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end, CHECK_NULL_RETURN_MEMERR(*np); BAG_(*np)->m.regnum = num; if (list_capture != 0) - MEM_STATUS_ON_SIMPLE(env->capture_history, num); + MEM_STATUS_ON_SIMPLE(env->cap_history, num); env->num_named++; } else { @@ -7257,7 +7269,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end, if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_STRICT_CHECK_BACKREF)) { if (back_num > env->num_mem || - IS_NULL(SCANENV_MEMENV(env)[back_num].node)) + IS_NULL(SCANENV_MEMENV(env)[back_num].mem_node)) return ONIGERR_INVALID_BACKREF; } @@ -7279,7 +7291,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end, int i; for (i = 0; i < num; i++) { if (backs[i] > env->num_mem || - IS_NULL(SCANENV_MEMENV(env)[backs[i]].node)) + IS_NULL(SCANENV_MEMENV(env)[backs[i]].mem_node)) return ONIGERR_INVALID_BACKREF; } } @@ -7434,7 +7446,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end, return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY; } BAG_(*np)->m.regnum = num; - MEM_STATUS_ON_SIMPLE(env->capture_history, num); + MEM_STATUS_ON_SIMPLE(env->cap_history, num); } else { return ONIGERR_UNDEFINED_GROUP_OPTION; diff --git a/oniguruma/src/regparse.h b/oniguruma/src/regparse.h index bcc25b72b..d23848db1 100644 --- a/oniguruma/src/regparse.h +++ b/oniguruma/src/regparse.h @@ -73,9 +73,12 @@ enum BodyEmptyType { BODY_IS_EMPTY_POSSIBILITY_REC = 3 }; +struct _Node; + typedef struct { NodeType node_type; int status; + struct _Node* parent; UChar* s; UChar* end; @@ -87,6 +90,7 @@ typedef struct { typedef struct { NodeType node_type; int status; + struct _Node* parent; unsigned int flags; BitSet bs; @@ -96,6 +100,7 @@ typedef struct { typedef struct { NodeType node_type; int status; + struct _Node* parent; struct _Node* body; int lower; @@ -104,12 +109,13 @@ typedef struct { enum BodyEmptyType emptiness; struct _Node* head_exact; struct _Node* next_head_exact; - int is_refered; /* include called node. don't eliminate even if {0} */ + int include_referred; /* include called node. don't eliminate even if {0} */ } QuantNode; typedef struct { NodeType node_type; int status; + struct _Node* parent; struct _Node* body; enum BagType type; @@ -152,6 +158,7 @@ typedef struct { typedef struct { NodeType node_type; int status; + struct _Node* parent; struct _Node* body; /* to BagNode : BAG_MEMORY */ int by_number; @@ -166,6 +173,7 @@ typedef struct { typedef struct { NodeType node_type; int status; + struct _Node* parent; int back_num; int back_static[NODE_BACKREFS_SIZE]; @@ -176,6 +184,7 @@ typedef struct { typedef struct { NodeType node_type; int status; + struct _Node* parent; struct _Node* body; int type; @@ -186,6 +195,7 @@ typedef struct { typedef struct { NodeType node_type; int status; + struct _Node* parent; struct _Node* car; struct _Node* cdr; @@ -194,6 +204,7 @@ typedef struct { typedef struct { NodeType node_type; int status; + struct _Node* parent; int ctype; int not; @@ -204,6 +215,7 @@ typedef struct { typedef struct { NodeType node_type; int status; + struct _Node* parent; enum GimmickType type; int detail_type; @@ -216,6 +228,7 @@ typedef struct _Node { struct { NodeType node_type; int status; + struct _Node* parent; struct _Node* body; } base; @@ -326,6 +339,7 @@ typedef struct _Node { #define NODE_ST_FIXED_OPTION (1<<18) #define NODE_ST_PROHIBIT_RECURSION (1<<19) #define NODE_ST_SUPER (1<<20) +#define NODE_ST_EMPTY_STATUS_CHECK (1<<21) #define NODE_STATUS(node) (((Node* )node)->u.base.status) @@ -355,7 +369,10 @@ typedef struct _Node { ((NODE_STATUS(node) & NODE_ST_PROHIBIT_RECURSION) != 0) #define NODE_IS_STRICT_REAL_REPEAT(node) \ ((NODE_STATUS(node) & NODE_ST_STRICT_REAL_REPEAT) != 0) +#define NODE_IS_EMPTY_STATUS_CHECK(node) \ + ((NODE_STATUS(node) & NODE_ST_EMPTY_STATUS_CHECK) != 0) +#define NODE_PARENT(node) ((node)->u.base.parent) #define NODE_BODY(node) ((node)->u.base.body) #define NODE_QUANT_BODY(node) ((node)->body) #define NODE_BAG_BODY(node) ((node)->body) @@ -368,7 +385,8 @@ typedef struct _Node { (senv)->mem_env_dynamic : (senv)->mem_env_static) typedef struct { - Node* node; + Node* mem_node; + Node* empty_repeat_node; } MemEnv; typedef struct { @@ -380,9 +398,8 @@ typedef struct { OnigCaseFoldType case_fold_flag; OnigEncoding enc; OnigSyntaxType* syntax; - MemStatusType capture_history; - MemStatusType bt_mem_start; - MemStatusType bt_mem_end; + MemStatusType cap_history; + MemStatusType backtrack_mem; /* backtrack/recursion */ MemStatusType backrefed_mem; UChar* pattern; UChar* pattern_end; @@ -400,7 +417,7 @@ typedef struct { MemEnv mem_env_static[SCANENV_MEMENV_SIZE]; MemEnv* mem_env_dynamic; unsigned int parse_depth; - + int backref_num; int keep_num; int save_num; int save_alloc_num; diff --git a/src/Notepad3.c b/src/Notepad3.c index 401a48a23..54279a668 100644 --- a/src/Notepad3.c +++ b/src/Notepad3.c @@ -3194,7 +3194,7 @@ LRESULT MsgInitMenu(HWND hwnd, WPARAM wParam, LPARAM lParam) EnableCmd(hmenu,IDM_EDIT_STREAMCOMMENT, !(i == SCLEX_NULL || i == SCLEX_VBSCRIPT || i == SCLEX_MAKEFILE || i == SCLEX_VB || i == SCLEX_ASM || - i == SCLEX_SQL || i == SCLEX_PERL || i == SCLEX_PYTHON || i == SCLEX_PROPERTIES ||i == SCLEX_CONF || + i == SCLEX_PERL || i == SCLEX_PYTHON || i == SCLEX_PROPERTIES ||i == SCLEX_CONF || i == SCLEX_POWERSHELL || i == SCLEX_BATCH || i == SCLEX_DIFF || i == SCLEX_BASH || i == SCLEX_TCL || i == SCLEX_AU3 || i == SCLEX_LATEX || i == SCLEX_AHKL || i == SCLEX_RUBY || i == SCLEX_CMAKE || i == SCLEX_MARKDOWN || i == SCLEX_YAML || i == SCLEX_REGISTRY || i == SCLEX_NIMROD || i == SCLEX_TOML) && !ro); @@ -3530,6 +3530,9 @@ LRESULT MsgCommand(HWND hwnd, UINT umsg, WPARAM wParam, LPARAM lParam) case IDM_FILE_PRESERVE_FILEMODTIME: + if (!Flags.bPreserveFileModTime) { + InfoBoxLng(MB_OK, L"PreserveFileModTime", IDS_MUI_INF_PRSVFILEMODTM); + } Flags.bPreserveFileModTime = true; FileSave(true, false, false, false, Flags.bPreserveFileModTime); break; @@ -4520,8 +4523,8 @@ LRESULT MsgCommand(HWND hwnd, UINT umsg, WPARAM wParam, LPARAM lParam) case SCLEX_REGISTRY: EditToggleLineComments(Globals.hwndEdit, L";", true); break; - case SCLEX_SQL: case SCLEX_LUA: + case SCLEX_SQL: case SCLEX_VHDL: EditToggleLineComments(Globals.hwndEdit, L"--", true); break; @@ -4549,7 +4552,6 @@ LRESULT MsgCommand(HWND hwnd, UINT umsg, WPARAM wParam, LPARAM lParam) case SCLEX_MAKEFILE: case SCLEX_VB: case SCLEX_ASM: - case SCLEX_SQL: case SCLEX_PERL: case SCLEX_PYTHON: case SCLEX_PROPERTIES: @@ -4571,17 +4573,18 @@ LRESULT MsgCommand(HWND hwnd, UINT umsg, WPARAM wParam, LPARAM lParam) case SCLEX_NIMROD: case SCLEX_TOML: break; - case SCLEX_HTML: - case SCLEX_XML: - case SCLEX_CSS: - case SCLEX_CPP: - case SCLEX_NSIS: case SCLEX_AVS: + case SCLEX_CPP: + case SCLEX_CSS: + case SCLEX_HTML: + case SCLEX_NSIS: + case SCLEX_SQL: case SCLEX_VHDL: + case SCLEX_XML: EditEncloseSelection(Globals.hwndEdit, L"/*", L"*/"); break; - case SCLEX_PASCAL: case SCLEX_INNOSETUP: + case SCLEX_PASCAL: EditEncloseSelection(Globals.hwndEdit, L"{", L"}"); break; case SCLEX_LUA: diff --git a/src/Styles.c b/src/Styles.c index a59ade374..4f69617d4 100644 --- a/src/Styles.c +++ b/src/Styles.c @@ -884,10 +884,6 @@ void Style_SetLexerSpecificProperties(const int lexerId) { switch (lexerId) { - case SCLEX_XML: - SciCall_SetProperty("lexer.xml.allow.scripts", "1"); - break; - case SCLEX_CPP: SciCall_SetProperty("styling.within.preprocessor", "1"); SciCall_SetProperty("lexer.cpp.track.preprocessor", "0"); @@ -922,6 +918,10 @@ void Style_SetLexerSpecificProperties(const int lexerId) SciCall_SetProperty("tab.timmy.whinge.level", "1"); break; + case SCLEX_XML: + SciCall_SetProperty("lexer.xml.allow.scripts", "1"); + break; + default: break; }