diff --git a/scintilla/oniguruma/scintilla/OnigurumaRegExEngine.cxx b/scintilla/oniguruma/scintilla/OnigurumaRegExEngine.cxx index 544908a96..7667b832b 100644 --- a/scintilla/oniguruma/scintilla/OnigurumaRegExEngine.cxx +++ b/scintilla/oniguruma/scintilla/OnigurumaRegExEngine.cxx @@ -87,9 +87,8 @@ static void SetSimpleOptions(OnigOptionType& onigOptions, EOLmode /*eolMode*/, ONIG_OPTION_OFF(onigOptions, ONIG_OPTION_EXTEND); ONIG_OPTION_OFF(onigOptions, ONIG_OPTION_FIND_LONGEST); ONIG_OPTION_OFF(onigOptions, ONIG_OPTION_MATCH_WHOLE_STRING); - ONIG_OPTION_OFF(onigOptions, ONIG_SYN_OP_DOT_ANYCHAR); // (!!!) - //ONIG_OPTION_OFF(onigOptions, ONIG_OPTION_ASCII_RANGE); - //ONIG_OPTION_OFF(onigOptions, ONIG_OPTION_CAPTURE_GROUP); + //~ONIG_OPTION_OFF(onigOptions, ONIG_OPTION_ASCII_RANGE); + //~ONIG_OPTION_OFF(onigOptions, ONIG_OPTION_CAPTURE_GROUP); // dynamic options //switch (eolMode) { @@ -101,11 +100,9 @@ static void SetSimpleOptions(OnigOptionType& onigOptions, EOLmode /*eolMode*/, //} if (FlagSet(searchFlags, FindOption::DotMatchAll)) { - //~ONIG_OPTION_ON(onigOptions, ONIG_SYN_OP_DOT_ANYCHAR); ONIG_OPTION_ON(onigOptions, ONIG_OPTION_MULTILINE); } else { - //~ONIG_OPTION_OFF(onigOptions, ONIG_SYN_OP_DOT_ANYCHAR); ONIG_OPTION_OFF(onigOptions, ONIG_OPTION_MULTILINE); } @@ -314,8 +311,7 @@ Sci::Position OnigurumaRegExEngine::FindText(Document* doc, Sci::Position minPos m_ErrorInfo[0] = '\0'; try { - // OnigEncoding const onigEncType = ONIG_ENCODING_UTF8; - OnigEncoding const onigEncType = (eolMode == EOLmode::CR) ? ONIG_ENCODING_UTF8_CR : ONIG_ENCODING_UTF8; + OnigEncoding const onigEncType = (eolMode == EOLmode::LF) ? ONIG_ENCODING_UTF8 : ONIG_ENCODING_UTF8_CR; OnigErrorInfo einfo; int const res = onig_new(&m_RegExpr, UCharCPtr(m_RegExprStrg.c_str()), UCharCPtr(m_RegExprStrg.c_str() + m_RegExprStrg.length()), @@ -548,6 +544,8 @@ void OnigurumaRegExEngine::clear() { std::string OnigurumaRegExEngine::translateRegExpr(const std::string & regExprStr, bool wholeWord, bool wordStart, EndOfLine eolMode, OnigOptionType & /*rxOptions*/) { + UNREFERENCED_PARAMETER(eolMode); + std::string transRegExpr; if (wholeWord || wordStart) { // push '\b' at the begin of regexpr @@ -564,11 +562,13 @@ std::string OnigurumaRegExEngine::translateRegExpr(const std::string & regExprSt transRegExpr.append(regExprStr); } - //if (wholeString) { - // ONIG_OPTION_ON(rxOptions, ONIG_OPTION_MATCH_WHOLE_STRING); - //} else { - // ONIG_OPTION_OFF(rxOptions, ONIG_OPTION_MATCH_WHOLE_STRING); - //} +#if 0 + if (wholeString) { + ONIG_OPTION_ON(rxOptions, ONIG_OPTION_MATCH_WHOLE_STRING); + } else { + ONIG_OPTION_OFF(rxOptions, ONIG_OPTION_MATCH_WHOLE_STRING); + } +#endif // Oniguruma supports LTGT word boundary by: ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END // @@ -577,18 +577,19 @@ std::string OnigurumaRegExEngine::translateRegExpr(const std::string & regExprSt //~replaceAll(transRegExpr, R"(\>)", R"((?<=\w)(?!\w))"); // word end //~replaceAll(transRegExpr, R"(\(?<=\w)(?!\w))", R"(\\>)"); // esc'd - - // EOL modes + #if 0 + // EOL modes is controlled by switch (eolMode) { case EndOfLine::Lf: case EndOfLine::Cr: - //ONIG_OPTION_OFF(rxOptions, ONIG_OPTION_CRLF_AS_LINE_SEPARATOR); + ONIG_OPTION_OFF(rxOptions, ONIG_OPTION_CRLF_AS_LINE_SEPARATOR); break; case EndOfLine::CrLf: - //ONIG_OPTION_ON(rxOptions, ONIG_OPTION_CRLF_AS_LINE_SEPARATOR); + ONIG_OPTION_ON(rxOptions, ONIG_OPTION_CRLF_AS_LINE_SEPARATOR); break; } + #endif return transRegExpr; } diff --git a/scintilla/oniguruma/src/regenc.c b/scintilla/oniguruma/src/regenc.c index ba1306da5..11ee50a08 100644 --- a/scintilla/oniguruma/src/regenc.c +++ b/scintilla/oniguruma/src/regenc.c @@ -701,8 +701,16 @@ extern int onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end) { if (p < end) { +#ifdef USE_END_OF_FILE_AS_LINE_TERMINATOR + if ((*p == NEWLINE_CODE)||(*p == END_OF_FILE)) return 1; // LF +#else if (*p == NEWLINE_CODE) return 1; // LF +#endif } +#ifdef USE_END_OF_FILE_AS_LINE_TERMINATOR + // if (p == end) + // return 1; +#endif return 0; } @@ -710,8 +718,16 @@ extern int onigenc_is_mbc_newline_0x0d(const UChar* p, const UChar* end) { if (p < end) { +#ifdef USE_END_OF_FILE_AS_LINE_TERMINATOR + if ((*p == CARRIAGE_RET)||(*p == END_OF_FILE)) return 1; // CR +#else if (*p == CARRIAGE_RET) return 1; // CR +#endif } +#ifdef USE_END_OF_FILE_AS_LINE_TERMINATOR + // if (p == end) + // return 1; +#endif return 0; } diff --git a/scintilla/oniguruma/src/regenc.h b/scintilla/oniguruma/src/regenc.h index 7a59b47a4..e828ed81e 100644 --- a/scintilla/oniguruma/src/regenc.h +++ b/scintilla/oniguruma/src/regenc.h @@ -80,6 +80,7 @@ typedef struct { #define ASCII_LIMIT 127 #define NEWLINE_CODE 0x0a #define CARRIAGE_RET 0x0d +#define END_OF_FILE 0x00 #define enclen(enc,p) ONIGENC_MBC_ENC_LEN(enc,p) @@ -117,6 +118,7 @@ struct PropertyNameCtype { int ctype; }; +#define USE_END_OF_FILE_AS_LINE_TERMINATOR #define USE_CRNL_AS_LINE_TERMINATOR #define USE_UNICODE_PROPERTIES #define USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER diff --git a/src/Edit.c b/src/Edit.c index 521ae8870..408c681c6 100644 --- a/src/Edit.c +++ b/src/Edit.c @@ -5918,12 +5918,17 @@ static char* _GetReplaceString(HWND hwnd, CLPCEDITFINDREPLACE lpefr, int* iRepla // // _FindInTarget() // +// ONIG_MISMATCH +#define NOT_FOUND ((DocPos)(-1LL)) +#define VALIDATE_FOUND_POS(pos, nxt, stp) (((nxt) ? ((pos) > (stp)) : ((pos) < (stp))) ? NOT_FOUND : (pos)) + + static DocPos _FindInTarget(LPCWSTR wchFind, int sFlags, DocPos* begin, DocPos* end, bool bForceNext, FR_UPD_MODES fMode) { static char chFind[8192] = { '\0' }; // max find buffer - DocPos iPos = -1LL; // not found + DocPos iPos = NOT_FOUND; if (StrIsEmpty(wchFind)) { return iPos; @@ -5933,18 +5938,37 @@ static DocPos _FindInTarget(LPCWSTR wchFind, int sFlags, DocPos const saveTargetBeg = SciCall_GetTargetStart(); DocPos const saveTargetEnd = SciCall_GetTargetEnd(); - DocPos start = *begin; - DocPos stop = *end; + DocPos const start = *begin; + DocPos const stop = *end; bool const bFindNext = (start <= stop); // else find previous + DocPos const len = (DocPos)(WideCharToMultiByte(Encoding_SciCP, 0, wchFind, -1, chFind, COUNTOF(chFind), NULL, NULL) - 1); + SciCall_SetSearchFlags(sFlags); SciCall_SetTargetRange(start, stop); + iPos = SciCall_SearchInTarget(len, chFind); + iPos = VALIDATE_FOUND_POS(iPos, bFindNext, stop); // not found if beyond stop - DocPos const len = (DocPos)WideCharToMultiByte(Encoding_SciCP, 0, wchFind, -1, chFind, COUNTOF(chFind), NULL, NULL); - - iPos = SciCall_SearchInTarget(len - 1, chFind); - iPos = (bFindNext ? (iPos > stop) : (iPos < stop)) ? -1LL : iPos; // not found if beyond stop - +#if 1 + // handle next in case of zero-length-matches or invalid position (regex) ! + bool const bZeroLenMatch = ((iPos == start) && (start == SciCall_GetTargetEnd())); + bool bValidPos = !(bForceNext && bZeroLenMatch) && Sci_IsValidPos(iPos, bFindNext); + DocPos oldStart = start; + while (!bValidPos) { + DocPos const newStart = (bFindNext ? SciCall_PositionAfter(oldStart) : SciCall_PositionBefore(oldStart)); + bool const bProceed = (bFindNext ? (newStart < stop) : (newStart > stop)) && (newStart != oldStart); + if (bProceed) { + SciCall_SetTargetRange(newStart, stop); + iPos = SciCall_SearchInTarget(len, chFind); + iPos = VALIDATE_FOUND_POS(iPos, bFindNext, stop); // not found if beyond stop + } + else { + iPos = NOT_FOUND; // already at document begin, end or stuck => not found + } + bValidPos = Sci_IsValidPos(iPos, bFindNext); // NOT_FOUND is a valid pos + oldStart = newStart; + } +#else // handle next in case of zero-length-matches (regex) ! if (iPos == start) { DocPos const nstop = SciCall_GetTargetEnd(); @@ -5959,6 +5983,7 @@ static DocPos _FindInTarget(LPCWSTR wchFind, int sFlags, } } } +#endif if (iPos >= 0) { if (fMode != FRMOD_IGNORE) { @@ -7136,7 +7161,7 @@ bool EditFindNext(HWND hwnd, const LPEDITFINDREPLACE lpefr, bool bExtendSelectio DocPos iPos = _FindInTarget(wchFind, sFlags, &start, &end, true, FRMOD_NORM); - if ((iPos < -1LL) && (lpefr->fuFlags & SCFIND_REGEXP)) { + if ((iPos < NOT_FOUND) && (lpefr->fuFlags & SCFIND_REGEXP)) { InfoBoxLng(MB_ICONWARNING, L"MsgInvalidRegex", IDS_MUI_REGEX_INVALID); bSuppressNotFound = true; } else if ((iPos < 0LL) && (start >= 0LL) && !bExtendSelection) { @@ -7157,7 +7182,7 @@ bool EditFindNext(HWND hwnd, const LPEDITFINDREPLACE lpefr, bool bExtendSelectio } else { LONG const result = InfoBoxLng(MB_OKCANCEL, L"MsgFindWrap1", IDS_MUI_FIND_WRAPFW); if (!IsYesOkay(result)) { - iPos = -1LL; + iPos = NOT_FOUND; bSuppressNotFound = true; } bFoundWrapAround = (INFOBOX_MODE(result) != 0); @@ -7228,7 +7253,7 @@ bool EditFindPrev(HWND hwnd, LPEDITFINDREPLACE lpefr, bool bExtendSelection, boo DocPos iPos = _FindInTarget(wchFind, sFlags, &start, &end, true, FRMOD_NORM); - if ((iPos < -1LL) && (sFlags & SCFIND_REGEXP)) { + if ((iPos < NOT_FOUND) && (sFlags & SCFIND_REGEXP)) { InfoBoxLng(MB_ICONWARNING, L"MsgInvalidRegex", IDS_MUI_REGEX_INVALID); bSuppressNotFound = true; } else if ((iPos < 0LL) && (start <= iDocEndPos) && !bExtendSelection) { @@ -7242,14 +7267,14 @@ bool EditFindPrev(HWND hwnd, LPEDITFINDREPLACE lpefr, bool bExtendSelection, boo iPos = _FindInTarget(wchFind, sFlags, &start, &end, false, FRMOD_WRAPED); if ((iPos < 0LL) || (start == _start)) { - if ((iPos < -1LL) && (sFlags & SCFIND_REGEXP)) { + if ((iPos < NOT_FOUND) && (sFlags & SCFIND_REGEXP)) { InfoBoxLng(MB_ICONWARNING, L"MsgInvalidRegex", IDS_MUI_REGEX_INVALID); bSuppressNotFound = true; } } else { LONG const result = InfoBoxLng(MB_OKCANCEL, L"MsgFindWrap2", IDS_MUI_FIND_WRAPRE); if (!IsYesOkay(result)) { - iPos = -1LL; + iPos = NOT_FOUND; bSuppressNotFound = true; } bFoundWrapAround = (INFOBOX_MODE(result) != 0); @@ -7479,7 +7504,7 @@ DocPosU EditReplaceAllInRange(HWND hwnd, LPEDITFINDREPLACE lpefr, DocPos iStartP DocPos end = iEndPos; DocPos iPos = _FindInTarget(wchFind, sFlags, &start, &end, false, FRMOD_NORM); - if ((iPos < -1LL) && bIsRegExpr) { + if ((iPos < NOT_FOUND) && bIsRegExpr) { InfoBoxLng(MB_ICONWARNING, L"MsgInvalidRegex", IDS_MUI_REGEX_INVALID); return 0; } @@ -7508,7 +7533,7 @@ DocPosU EditReplaceAllInRange(HWND hwnd, LPEDITFINDREPLACE lpefr, DocPos iStartP } start = iStartPos; end = iEndPos; - iPos = (start <= end) ? _FindInTarget(wchFind, sFlags, &start, &end, true, FRMOD_NORM) : -1LL; + iPos = (start <= end) ? _FindInTarget(wchFind, sFlags, &start, &end, true, FRMOD_NORM) : NOT_FOUND; } EndUndoTransAction(); diff --git a/src/SciCall.h b/src/SciCall.h index 811a7ab9b..892775a2c 100644 --- a/src/SciCall.h +++ b/src/SciCall.h @@ -800,6 +800,10 @@ DeclareSciCallR0(IsSelectionRectangle, SELECTIONISRECTANGLE, bool); #define Sci_ClampAlpha(alpha) clampi((alpha), SC_ALPHA_TRANSPARENT, SC_ALPHA_OPAQUE) //~SC_ALPHA_NOALPHA +__forceinline bool Sci_IsValidPos(DocPos pos, bool fwd) +{ + return (pos == ((pos > 0) ? (fwd ? SciCall_PositionAfter(SciCall_PositionBefore(pos)) : SciCall_PositionBefore(SciCall_PositionAfter(pos))) : pos)); +} // max. line length in range (incl. line-breaks) inline DocPos Sci_GetRangeMaxLineLength(DocLn iBeginLine, DocLn iEndLine) diff --git a/test/test_files/regex/test_eol_eof.txt b/test/test_files/regex/test_eol_eof.txt new file mode 100644 index 000000000..e4dbec29e --- /dev/null +++ b/test/test_files/regex/test_eol_eof.txt @@ -0,0 +1,5 @@ +test + +testabc + +abctest \ No newline at end of file