From 7523fee425fd5a05555b7ef15d0ac0f0fbbe5ec0 Mon Sep 17 00:00:00 2001 From: Rainer Kottenhoff Date: Mon, 12 Oct 2020 16:47:54 +0200 Subject: [PATCH] + fix: Oniguruma flexible EOL handling (Win CRLF, Unix LF and Mac CR) + fix: Find/Repl all in range algo --- scintilla/.editorconfig | 8 +++ scintilla/oniguruma/.clang-format | 19 +++++ .../scintilla/OnigurumaRegExEngine.cxx | 21 +++--- scintilla/oniguruma/src/ascii.c | 52 ++++++++++++++ scintilla/oniguruma/src/oniguruma.h | 8 +++ scintilla/oniguruma/src/regenc.c | 21 +++++- scintilla/oniguruma/src/regenc.h | 8 ++- scintilla/oniguruma/src/regexec.c | 70 +++++++++++++------ scintilla/oniguruma/src/regparse.c | 4 +- scintilla/oniguruma/src/regposix.c | 8 ++- scintilla/oniguruma/src/unicode.c | 4 +- scintilla/oniguruma/src/utf8.c | 70 ++++++++++++++++++- src/Edit.c | 30 ++++---- 13 files changed, 262 insertions(+), 61 deletions(-) create mode 100644 scintilla/oniguruma/.clang-format diff --git a/scintilla/.editorconfig b/scintilla/.editorconfig index 393acfb91..7cdead2d1 100644 --- a/scintilla/.editorconfig +++ b/scintilla/.editorconfig @@ -30,3 +30,11 @@ tab_width = 4 [**.{ini}] charset = utf-8-sig + +[oniguruma/**.{h,c,cpp,hpp,cxx}] +charset = utf-8 +# space (w=2) indentation +indent_style = space +indent_size = 2 +tab_width = 2 + diff --git a/scintilla/oniguruma/.clang-format b/scintilla/oniguruma/.clang-format new file mode 100644 index 000000000..f2607d307 --- /dev/null +++ b/scintilla/oniguruma/.clang-format @@ -0,0 +1,19 @@ +--- +# https://clang.llvm.org/docs/ClangFormatStyleOptions.html +BasedOnStyle: LLVM +IndentWidth: 2 +TabWidth: 2 + +--- +Language: Cpp +AccessModifierOffset: -2 +AlignAfterOpenBracket: DontAlign +AllowShortBlocksOnASingleLine: Empty +AllowShortCaseLabelsOnASingleLine: true +ColumnLimit: 0 +Cpp11BracedListStyle: false +FixNamespaceComments: false +SortIncludes: false +UseTab: Never + +--- diff --git a/scintilla/oniguruma/scintilla/OnigurumaRegExEngine.cxx b/scintilla/oniguruma/scintilla/OnigurumaRegExEngine.cxx index da7daa67b..2a478deaf 100644 --- a/scintilla/oniguruma/scintilla/OnigurumaRegExEngine.cxx +++ b/scintilla/oniguruma/scintilla/OnigurumaRegExEngine.cxx @@ -59,8 +59,8 @@ using namespace Scintilla; enum class EOLmode : int { CRLF = SC_EOL_CRLF, CR = SC_EOL_CR, LF = SC_EOL_LF }; -//static OnigEncoding s_UsedEncodingsTypes[] = { ONIG_ENCODING_UTF8, ONIG_ENCODING_UTF8_CR, ONIG_ENCODING_UTF8_CRLF }; -static OnigEncoding s_UsedEncodingsTypes[] = { ONIG_ENCODING_UTF8 }; +//static OnigEncoding s_UsedEncodingsTypes[] = { ONIG_ENCODING_UTF8 }; +static OnigEncoding s_UsedEncodingsTypes[] = { ONIG_ENCODING_UTF8, ONIG_ENCODING_UTF8_CR, ONIG_ENCODING_UTF8_CRLF }; // ============================================================================ // ============================================================================ @@ -282,8 +282,8 @@ Sci::Position OnigurumaRegExEngine::FindText(Document* doc, Sci::Position minPos OnigOptionType onigOptions; SetSimpleOptions(onigOptions, eolMode, caseSensitive, findForward, searchFlags); - ONIG_OPTION_ON(onigOptions, (rangeBeg != 0) ? ONIG_OPTION_NOTBOL : ONIG_OPTION_NONE); - ONIG_OPTION_ON(onigOptions, (rangeEnd != docLen) ? ONIG_OPTION_NOTEOL : ONIG_OPTION_NONE); + ONIG_OPTION_ON(onigOptions, (rangeBeg > 0) ? ONIG_OPTION_NOTBOL : ONIG_OPTION_NONE); + ONIG_OPTION_ON(onigOptions, (rangeEnd < docLen) ? ONIG_OPTION_NOTEOL : ONIG_OPTION_NONE); std::string sPattern(pattern); std::string const & sRegExprStrg = translateRegExpr(sPattern, word, wordStart, doc->eolMode, onigOptions); @@ -299,9 +299,10 @@ Sci::Position OnigurumaRegExEngine::FindText(Document* doc, Sci::Position minPos OnigErrorInfo einfo; onig_free(m_RegExpr); - OnigEncoding const onigEncType = ONIG_ENCODING_UTF8; - - int res = onig_new(&m_RegExpr, UCharCPtr(m_RegExprStrg.c_str()), UCharCPtr(m_RegExprStrg.c_str() + m_RegExprStrg.length()), + //OnigEncoding const onigEncType = ONIG_ENCODING_UTF8; + OnigEncoding const onigEncType = (eolMode == EOLmode::LF) ? ONIG_ENCODING_UTF8 : + ((eolMode == EOLmode::CR) ? ONIG_ENCODING_UTF8_CR : ONIG_ENCODING_UTF8_CRLF); + int res = onig_new(&m_RegExpr, UCharCPtr(m_RegExprStrg.c_str()), UCharCPtr(m_RegExprStrg.c_str() + m_RegExprStrg.length()), m_CmplOptions, onigEncType, &m_OnigSyntax, &einfo); if (res != ONIG_NORMAL) { onig_error_code_to_str(UCharPtr(m_ErrorInfo), res, &einfo); @@ -740,9 +741,9 @@ OnigPosition SimpleRegExEngine::Find(const OnigUChar* pattern, const OnigUChar* try { onig_free(m_RegExpr); - //OnigEncoding const onigEncType = (m_EOLmode == EOLmode::LF) ? ONIG_ENCODING_UTF8 : - // ((m_EOLmode == EOLmode::CR) ? ONIG_ENCODING_UTF8_CR : ONIG_ENCODING_UTF8_CRLF); - OnigEncoding const onigEncType = ONIG_ENCODING_UTF8; + //OnigEncoding const onigEncType = ONIG_ENCODING_UTF8; + OnigEncoding const onigEncType = (m_EOLmode == EOLmode::LF) ? ONIG_ENCODING_UTF8 : + ((m_EOLmode == EOLmode::CR) ? ONIG_ENCODING_UTF8_CR : ONIG_ENCODING_UTF8_CRLF); OnigErrorInfo einfo; int res = onig_new(&m_RegExpr, pattern, (pattern + patternLen), m_Options, onigEncType, &m_OnigSyntax, &einfo); diff --git a/scintilla/oniguruma/src/ascii.c b/scintilla/oniguruma/src/ascii.c index f2dc0d316..8330b82d4 100644 --- a/scintilla/oniguruma/src/ascii.c +++ b/scintilla/oniguruma/src/ascii.c @@ -116,3 +116,55 @@ OnigEncodingType OnigEncodingASCII = { ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1, 0, 0 }; + + +OnigEncodingType OnigEncodingASCII_CR = { + onigenc_single_byte_mbc_enc_len, + "US-ASCII", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0d, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + onigenc_ascii_mbc_case_fold, + onigenc_ascii_apply_all_case_fold, + onigenc_ascii_get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + ascii_is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match, + init, + 0, /* is_initialized */ + onigenc_always_true_is_valid_mbc_string, + ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1, + 0, 0 +}; + + +OnigEncodingType OnigEncodingASCII_CRLF = { + onigenc_single_byte_mbc_enc_len, + "US-ASCII", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0d_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + onigenc_ascii_mbc_case_fold, + onigenc_ascii_apply_all_case_fold, + onigenc_ascii_get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + ascii_is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match, + init, + 0, /* is_initialized */ + onigenc_always_true_is_valid_mbc_string, + ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1, + 0, 0 +}; + + diff --git a/scintilla/oniguruma/src/oniguruma.h b/scintilla/oniguruma/src/oniguruma.h index efdbba43f..aac257254 100644 --- a/scintilla/oniguruma/src/oniguruma.h +++ b/scintilla/oniguruma/src/oniguruma.h @@ -189,10 +189,16 @@ ONIG_EXTERN OnigEncodingType OnigEncodingBIG5; ONIG_EXTERN OnigEncodingType OnigEncodingGB18030; #else // lean and mean ONIG_EXTERN OnigEncodingType OnigEncodingASCII; +ONIG_EXTERN OnigEncodingType OnigEncodingASCII_CR; +ONIG_EXTERN OnigEncodingType OnigEncodingASCII_CRLF; ONIG_EXTERN OnigEncodingType OnigEncodingUTF8; +ONIG_EXTERN OnigEncodingType OnigEncodingUTF8_CR; +ONIG_EXTERN OnigEncodingType OnigEncodingUTF8_CRLF; #endif #define ONIG_ENCODING_ASCII (&OnigEncodingASCII) +#define ONIG_ENCODING_ASCII_CR (&OnigEncodingASCII_CR) +#define ONIG_ENCODING_ASCII_CRLF (&OnigEncodingASCII_CRLF) #define ONIG_ENCODING_ISO_8859_1 (&OnigEncodingISO_8859_1) #define ONIG_ENCODING_ISO_8859_2 (&OnigEncodingISO_8859_2) #define ONIG_ENCODING_ISO_8859_3 (&OnigEncodingISO_8859_3) @@ -209,6 +215,8 @@ ONIG_EXTERN OnigEncodingType OnigEncodingUTF8; #define ONIG_ENCODING_ISO_8859_15 (&OnigEncodingISO_8859_15) #define ONIG_ENCODING_ISO_8859_16 (&OnigEncodingISO_8859_16) #define ONIG_ENCODING_UTF8 (&OnigEncodingUTF8) +#define ONIG_ENCODING_UTF8_CR (&OnigEncodingUTF8_CR) +#define ONIG_ENCODING_UTF8_CRLF (&OnigEncodingUTF8_CRLF) #define ONIG_ENCODING_UTF16_BE (&OnigEncodingUTF16_BE) #define ONIG_ENCODING_UTF16_LE (&OnigEncodingUTF16_LE) #define ONIG_ENCODING_UTF32_BE (&OnigEncodingUTF32_BE) diff --git a/scintilla/oniguruma/src/regenc.c b/scintilla/oniguruma/src/regenc.c index 27e45493b..3b1f842d6 100644 --- a/scintilla/oniguruma/src/regenc.c +++ b/scintilla/oniguruma/src/regenc.c @@ -692,7 +692,26 @@ extern int onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end) { if (p < end) { - if (*p == NEWLINE_CODE) return 1; + if (*p == NEWLINE_CODE) return 1; // LF + } + return 0; +} + +extern int +onigenc_is_mbc_newline_0x0d(const UChar* p, const UChar* end) +{ + if (p < end) { + if (*p == CARRIAGE_RET) return 1; // CR + } + return 0; +} + +extern int +onigenc_is_mbc_newline_0x0d_0x0a(const UChar* p, const UChar* end) +{ + if (p < end) { + //~if ((*p == CARRIAGE_RET) && (*(p+1) == NEWLINE_CODE)) return 1; // CRLF + if ((*p == NEWLINE_CODE) || (*p == CARRIAGE_RET)) return 1; // LF|CR } return 0; } diff --git a/scintilla/oniguruma/src/regenc.h b/scintilla/oniguruma/src/regenc.h index 644f0f298..28e71014e 100644 --- a/scintilla/oniguruma/src/regenc.h +++ b/scintilla/oniguruma/src/regenc.h @@ -79,6 +79,7 @@ typedef struct { #define MAX_CODE_POINT (~((OnigCodePoint )0)) #define ASCII_LIMIT 127 #define NEWLINE_CODE 0x0a +#define CARRIAGE_RET 0x0d #define enclen(enc,p) ONIGENC_MBC_ENC_LEN(enc,p) @@ -116,12 +117,12 @@ struct PropertyNameCtype { int ctype; }; -#define USE_CRNL_AS_LINE_TERMINATOR +//~#define USE_CRNL_AS_LINE_TERMINATOR ~ solved by is_new_line() fct-ptr #define USE_UNICODE_PROPERTIES #define USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER #define USE_UNICODE_WORD_BREAK /* #define USE_UNICODE_CASE_FOLD_TURKISH_AZERI */ -#define USE_UNICODE_ALL_LINE_TERMINATORS /* see Unicode.org UTS #18 */ +/* #define USE_UNICODE_ALL_LINE_TERMINATORS */ /* see Unicode.org UTS #18 */ //~#define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_ASCII @@ -152,7 +153,8 @@ extern int onigenc_apply_all_case_fold_with_map P_((int map_size, const OnigPair extern int onigenc_get_case_fold_codes_by_str_with_map P_((int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])); extern int onigenc_not_support_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[])); extern int onigenc_is_mbc_newline_0x0a P_((const UChar* p, const UChar* end)); - +extern int onigenc_is_mbc_newline_0x0d P_((const UChar* p, const UChar* end)); +extern int onigenc_is_mbc_newline_0x0d_0x0a P_((const UChar* p, const UChar* end)); /* methods for single byte encoding */ extern int onigenc_ascii_mbc_case_fold P_((OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower)); diff --git a/scintilla/oniguruma/src/regexec.c b/scintilla/oniguruma/src/regexec.c index 3d0488138..19a6a9cba 100644 --- a/scintilla/oniguruma/src/regexec.c +++ b/scintilla/oniguruma/src/regexec.c @@ -40,10 +40,17 @@ #ifdef USE_CRNL_AS_LINE_TERMINATOR #define ONIGENC_IS_MBC_CRNL(enc,p,end) \ - (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \ + (ONIGENC_MBC_TO_CODE(enc,p,end) == CARRIAGE_RET && \ ONIGENC_IS_MBC_NEWLINE(enc,(p+enclen(enc,p)),end)) #endif +// --- fexible NP3 mode (CR|LF|CRLF) dependant ANCHOR and BOL/EOL handling --- +const OnigUChar* const _CRLF = "\r\n\0"; +#define IS_CRLF_NEWLINE(enc) ((enc)->is_mbc_newline(&_CRLF[0], &_CRLF[1]) && (enc)->is_mbc_newline(&_CRLF[1], &_CRLF[2])) +#define IS_LF_CODE(enc, s, end) (ONIGENC_MBC_TO_CODE((enc), (s), (end)) == NEWLINE_CODE) +#define IS_CR_CODE(enc, s, end) (ONIGENC_MBC_TO_CODE((enc), (s), (end)) == CARRIAGE_RET) +// ---------------------------------------------------------------------------- + #define CHECK_INTERRUPT_IN_MATCH #define STACK_MEM_START(reg, idx) \ @@ -3345,7 +3352,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, STACK_PUSH_ALT(p, s); n = enclen(encode, s); DATA_ENSURE(n); - if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; + if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; s += n; } JUMP_OUT; @@ -3377,7 +3384,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } n = enclen(encode, s); DATA_ENSURE(n); - if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; + if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; s += n; } } @@ -3587,9 +3594,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, else if (! ON_STR_END(s)) { UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) { - INC_OP; - JUMP_OUT; - } + if (!IS_CRLF_NEWLINE(encode) || IS_LF_CODE(encode, sprev, end)) { + INC_OP; + JUMP_OUT; + } + } } goto fail; @@ -3607,8 +3616,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, #endif } else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) { - INC_OP; - JUMP_OUT; + if (!IS_CRLF_NEWLINE(encode) || IS_CR_CODE(encode, s, end)) { + INC_OP; + JUMP_OUT; + } } #ifdef USE_CRNL_AS_LINE_TERMINATOR else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) { @@ -5133,8 +5144,13 @@ forward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* start, case ANCR_BEGIN_LINE: if (!ON_STR_BEGIN(p)) { prev = onigenc_get_prev_char_head(reg->enc, (pprev ? pprev : str), p); - if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) - goto retry_gate; + if (IS_NOT_NULL(prev)) { + if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) { + goto retry_gate; + } else if (IS_CRLF_NEWLINE(reg->enc) && !IS_LF_CODE(reg->enc, prev, end)) { + goto retry_gate; + } + } } break; @@ -5146,14 +5162,16 @@ forward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* start, if (prev && ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) goto retry_gate; #endif - } - else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end) + } else if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end) #ifdef USE_CRNL_AS_LINE_TERMINATOR - && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end) + && !ONIGENC_IS_MBC_CRNL(reg->enc, p, end) #endif - ) + ) { goto retry_gate; - + } + else if (IS_CRLF_NEWLINE(reg->enc) && !IS_CR_CODE(reg->enc, p, end)) { + goto retry_gate; + } break; } } @@ -5228,9 +5246,14 @@ backward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* s, case ANCR_BEGIN_LINE: if (!ON_STR_BEGIN(p)) { prev = onigenc_get_prev_char_head(reg->enc, str, p); - if (IS_NOT_NULL(prev) && !ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) { - p = prev; - goto retry; + if (IS_NOT_NULL(prev)) { + if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) { + p = prev; + goto retry; + } else if (IS_CRLF_NEWLINE(reg->enc) && !IS_LF_CODE(reg->enc, prev, end)) { + p = prev; + goto retry; + } } } break; @@ -5245,12 +5268,15 @@ backward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* s, goto retry; } #endif - } - else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end) + } else if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end) #ifdef USE_CRNL_AS_LINE_TERMINATOR - && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end) + && !ONIGENC_IS_MBC_CRNL(reg->enc, p, end) #endif - ) { + ) { + p = onigenc_get_prev_char_head(reg->enc, adjrange, p); + if (IS_NULL(p)) goto fail; + goto retry; + } else if (IS_CRLF_NEWLINE(reg->enc) && !IS_LF_CODE(reg->enc, p, end)) { p = onigenc_get_prev_char_head(reg->enc, adjrange, p); if (IS_NULL(p)) goto fail; goto retry; diff --git a/scintilla/oniguruma/src/regparse.c b/scintilla/oniguruma/src/regparse.c index 90e159d7b..eadfd2b62 100644 --- a/scintilla/oniguruma/src/regparse.c +++ b/scintilla/oniguruma/src/regparse.c @@ -4350,7 +4350,7 @@ node_new_general_newline(Node** node, ScanEnv* env) Node* x; CClassNode* cc; - dlen = ONIGENC_CODE_TO_MBC(env->enc, 0x0d, buf); + dlen = ONIGENC_CODE_TO_MBC(env->enc, CARRIAGE_RET, buf); if (dlen < 0) return dlen; alen = ONIGENC_CODE_TO_MBC(env->enc, NEWLINE_CODE, buf + dlen); if (alen < 0) return alen; @@ -4363,7 +4363,7 @@ node_new_general_newline(Node** node, ScanEnv* env) cc = CCLASS_(ncc); if (dlen == 1) { - bitset_set_range(cc->bs, NEWLINE_CODE, 0x0d); + bitset_set_range(cc->bs, NEWLINE_CODE, CARRIAGE_RET); } else { r = add_code_range(&(cc->mbuf), env, NEWLINE_CODE, 0x0d); diff --git a/scintilla/oniguruma/src/regposix.c b/scintilla/oniguruma/src/regposix.c index ea29c0304..003f4cf25 100644 --- a/scintilla/oniguruma/src/regposix.c +++ b/scintilla/oniguruma/src/regposix.c @@ -379,4 +379,10 @@ reg_foreach_name(onig_posix_regex_t* reg, return onig_posix_reg_foreach_name(reg, func, arg); } -#endif +extern int +reg_number_of_names(onig_posix_regex_t* reg) +{ + return onig_posix_reg_number_of_names(reg); +} + +#endif /* USE_BINARY_COMPATIBLE_POSIX_API */ diff --git a/scintilla/oniguruma/src/unicode.c b/scintilla/oniguruma/src/unicode.c index 6703d4b9a..901f483d0 100644 --- a/scintilla/oniguruma/src/unicode.c +++ b/scintilla/oniguruma/src/unicode.c @@ -994,7 +994,7 @@ onigenc_egcb_is_break_position(OnigEncoding enc, UChar* p, UChar* prev, #ifdef USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER if (! ONIGENC_IS_UNICODE_ENCODING(enc)) { - return from != 0x000d || to != NEWLINE_CODE; + return from != CARRIAGE_RET || to != NEWLINE_CODE; } btype = unicode_egcb_is_break_2code(from, to); @@ -1037,7 +1037,7 @@ onigenc_egcb_is_break_position(OnigEncoding enc, UChar* p, UChar* prev, return 1; #else - return from != 0x000d || to != NEWLINE_CODE; + return from != CARRIAGE_RET || to != NEWLINE_CODE; #endif /* USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER */ } diff --git a/scintilla/oniguruma/src/utf8.c b/scintilla/oniguruma/src/utf8.c index d41cb66c9..48e0803c7 100644 --- a/scintilla/oniguruma/src/utf8.c +++ b/scintilla/oniguruma/src/utf8.c @@ -98,13 +98,14 @@ is_valid_mbc_string(const UChar* p, const UChar* end) } +#if 0 static int is_mbc_newline(const UChar * p, const UChar * end) { if (p < end) { if (*p == 0x0a) return 1; -#if defined(USE_CRNL_AS_LINE_TERMINATOR) || defined(USE_UNICODE_ALL_LINE_TERMINATORS) +#ifdef USE_CRNL_AS_LINE_TERMINATOR if (*p == 0x0d) return 1; #endif @@ -123,6 +124,7 @@ is_mbc_newline(const UChar * p, const UChar * end) return 0; } +#endif static OnigCodePoint @@ -298,8 +300,8 @@ OnigEncodingType OnigEncodingUTF8 = { 6, #endif 1, /* min enc length */ - //~onigenc_is_mbc_newline_0x0a, - is_mbc_newline, + //is_mbc_newline, + onigenc_is_mbc_newline_0x0a, mbc_to_code, code_to_mbclen, code_to_mbc, @@ -317,3 +319,65 @@ OnigEncodingType OnigEncodingUTF8 = { ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_UNICODE|ENC_FLAG_SKIP_OFFSET_1_OR_0, 0, 0 }; + + +OnigEncodingType OnigEncodingUTF8_CR = { + mbc_enc_len, + "UTF-8", /* name */ +#ifdef USE_RFC3629_RANGE + 4, /* max enc length */ +#else + 6, +#endif + 1, /* min enc length */ + //is_mbc_newline, + onigenc_is_mbc_newline_0x0d, + mbc_to_code, + code_to_mbclen, + code_to_mbc, + mbc_case_fold, + onigenc_unicode_apply_all_case_fold, + get_case_fold_codes_by_str, + onigenc_unicode_property_name_to_ctype, + onigenc_unicode_is_code_ctype, + get_ctype_code_range, + left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match, + NULL, /* init */ + NULL, /* is_initialized */ + is_valid_mbc_string, + ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_UNICODE|ENC_FLAG_SKIP_OFFSET_1_OR_0, + 0, 0 +}; + + +OnigEncodingType OnigEncodingUTF8_CRLF = { + mbc_enc_len, + "UTF-8", /* name */ +#ifdef USE_RFC3629_RANGE + 4, /* max enc length */ +#else + 6, +#endif + 1, /* min enc length */ + //is_mbc_newline, + onigenc_is_mbc_newline_0x0d_0x0a, + mbc_to_code, + code_to_mbclen, + code_to_mbc, + mbc_case_fold, + onigenc_unicode_apply_all_case_fold, + get_case_fold_codes_by_str, + onigenc_unicode_property_name_to_ctype, + onigenc_unicode_is_code_ctype, + get_ctype_code_range, + left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match, + NULL, /* init */ + NULL, /* is_initialized */ + is_valid_mbc_string, + ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_UNICODE|ENC_FLAG_SKIP_OFFSET_1_OR_0, + 0, 0 +}; + + diff --git a/src/Edit.c b/src/Edit.c index a247926fd..8453fe957 100644 --- a/src/Edit.c +++ b/src/Edit.c @@ -7043,9 +7043,9 @@ int EditReplaceAllInRange(HWND hwnd, LPCEDITFINDREPLACE lpefr, DocPos iStartPos, DocPos const saveTargetEnd = SciCall_GetTargetEnd(); DocPos start = iStartPos; - DocPos end = iEndPos; - - DocPos iPos = _FindInTarget(szFind, slen, sFlags, &start, &end, false, FRMOD_NORM); + DocPos end_m = iEndPos; + DocPos end = end_m; + DocPos iPos = _FindInTarget(szFind, slen, sFlags, &start, &end, false, FRMOD_NORM); if ((iPos < -1) && (lpefr->fuFlags & SCFIND_REGEXP)) { InfoBoxLng(MB_ICONWARNING, L"MsgInvalidRegex", IDS_MUI_REGEX_INVALID); @@ -7057,22 +7057,18 @@ int EditReplaceAllInRange(HWND hwnd, LPCEDITFINDREPLACE lpefr, DocPos iStartPos, _BEGIN_UNDO_ACTION_; - start = iStartPos; - DocPos iNewEndPos = iEndPos; - while ((iPos >= 0) && (start <= iEndPos)) + while ((iPos >= 0) && (start <= end_m)) { - end = iNewEndPos; - iPos = _FindInTarget(szFind, slen, sFlags, &start, &end, true, FRMOD_NORM); - if ((iPos >= 0) && (iPos <= iNewEndPos)) { - SciCall_SetTargetRange(iPos, end); - DocPos const replLen = Sci_ReplaceTarget(iReplaceMsg, -1, pszReplace); - start = SciCall_GetTargetEnd(); - chgLenDiff += replLen - (end - iPos); - iNewEndPos = iEndPos + chgLenDiff; - ++iCount; - } - } + SciCall_SetTargetRange(iPos, end); + DocPos const replLen = Sci_ReplaceTarget(iReplaceMsg, -1, pszReplace); + chgLenDiff += replLen - (end - iPos); + start = SciCall_PositionAfter(SciCall_GetTargetEnd()); + end_m = iEndPos + chgLenDiff; + end = end_m; + ++iCount; + iPos = _FindInTarget(szFind, slen, sFlags, &start, &end, false, FRMOD_NORM); + } _END_UNDO_ACTION_; SciCall_SetTargetRange(saveTargetBeg, saveTargetEnd + chgLenDiff); //restore