From af9173e65fa9bd90ee9cc77eb0bddc9f62447a11 Mon Sep 17 00:00:00 2001 From: "METANEOCORTEX\\Kotti" Date: Tue, 11 Jul 2023 00:34:14 +0200 Subject: [PATCH] fix: regEx search with begin/end line meta chars --- .../scintilla/OnigurumaRegExEngine.cxx | 61 ++++++++++++------- scintilla/oniguruma/src/regenc.c | 4 +- src/Edit.c | 48 +++++++++++---- src/SciCall.h | 6 +- test/test_files/regex/test_eol_eof.txt | 2 + 5 files changed, 82 insertions(+), 39 deletions(-) diff --git a/scintilla/oniguruma/scintilla/OnigurumaRegExEngine.cxx b/scintilla/oniguruma/scintilla/OnigurumaRegExEngine.cxx index 7667b832b..b1bb83273 100644 --- a/scintilla/oniguruma/scintilla/OnigurumaRegExEngine.cxx +++ b/scintilla/oniguruma/scintilla/OnigurumaRegExEngine.cxx @@ -62,9 +62,8 @@ using namespace Scintilla::Internal; // *** Oningmo configuration *** // ============================================================================ -enum class EOLmode : int { CRLF = SC_EOL_CRLF, CR = SC_EOL_CR, LF = SC_EOL_LF }; +enum class EOLmode : int { UDEF = -1, CRLF = SC_EOL_CRLF, CR = SC_EOL_CR, LF = SC_EOL_LF }; -//static OnigEncoding s_UsedEncodingsTypes[] = { ONIG_ENCODING_UTF8 }; static OnigEncoding s_UsedEncodingsTypes[] = { ONIG_ENCODING_UTF8, ONIG_ENCODING_UTF8_CR }; // ============================================================================ @@ -73,22 +72,37 @@ static OnigEncoding s_UsedEncodingsTypes[] = { ONIG_ENCODING_UTF8, ONIG_ENCODING // ------------------------------------ // --- Onigmo Engine Simple Options --- // ------------------------------------ -static void SetSimpleOptions(OnigOptionType& onigOptions, EOLmode /*eolMode*/, - const bool caseSensitive, const bool forwardSearch, - const FindOption searchFlags = FindOption::None) -{ +static void SetSimpleOptions(OnigOptionType &onigOptions, EOLmode /*eolMode*/, + const bool caseSensitive, const bool forwardSearch, + const FindOption searchFlags = FindOption::None, + const bool rangeBegIsDocBeg = true, const bool rangeEndIsDocEnd = true) { // fixed options onigOptions = ONIG_OPTION_DEFAULT; // Notepad3 forced options - ONIG_OPTION_ON(onigOptions, ONIG_OPTION_NEGATE_SINGLELINE); ONIG_OPTION_OFF(onigOptions, ONIG_OPTION_SINGLELINE); ONIG_OPTION_OFF(onigOptions, ONIG_OPTION_POSIX_REGION); ONIG_OPTION_OFF(onigOptions, ONIG_OPTION_EXTEND); ONIG_OPTION_OFF(onigOptions, ONIG_OPTION_FIND_LONGEST); ONIG_OPTION_OFF(onigOptions, ONIG_OPTION_MATCH_WHOLE_STRING); + ONIG_OPTION_OFF(onigOptions, ONIG_OPTION_NOTBOL); + ONIG_OPTION_OFF(onigOptions, ONIG_OPTION_NOTEOL); + // ---------------------------------------------------------- + + if (rangeBegIsDocBeg) { + ONIG_OPTION_OFF(onigOptions, ONIG_OPTION_NOT_BEGIN_STRING); + } else { + ONIG_OPTION_ON(onigOptions, ONIG_OPTION_NOT_BEGIN_STRING); + } + if (rangeEndIsDocEnd) { + ONIG_OPTION_OFF(onigOptions, ONIG_OPTION_NOT_END_STRING); + } else { + ONIG_OPTION_ON(onigOptions, ONIG_OPTION_NOT_END_STRING); + } + //~ONIG_OPTION_OFF(onigOptions, ONIG_OPTION_ASCII_RANGE); //~ONIG_OPTION_OFF(onigOptions, ONIG_OPTION_CAPTURE_GROUP); + //~ONIG_OPTION_ON(onigOptions, ONIG_OPTION_NOT_BEGIN_POSITION); // dynamic options //switch (eolMode) { @@ -135,6 +149,7 @@ public: , m_CmplOptions(ONIG_OPTION_DEFAULT) , m_RegExpr(nullptr) , m_Region({0,0,nullptr,nullptr,nullptr}) + , m_EOLmode(EOLmode::UDEF) , m_RangeBeg(-1) , m_RangeEnd(-1) , m_ErrorInfo() @@ -181,6 +196,7 @@ private: OnigOptionType m_CmplOptions; OnigRegex m_RegExpr; OnigRegion m_Region; + EOLmode m_EOLmode; Sci::Position m_RangeBeg; Sci::Position m_RangeEnd; @@ -286,21 +302,19 @@ Sci::Position OnigurumaRegExEngine::FindText(Document* doc, Sci::Position minPos int const increment = findForward ? 1 : -1; // Range endpoints should not be inside DBCS characters, but just in case, move them. - minPos = doc->MovePositionOutsideChar(minPos + (findForward ? 0 : -1), increment, false); - maxPos = doc->MovePositionOutsideChar(maxPos, increment, false); + minPos = doc->MovePositionOutsideChar(minPos, increment, true); + maxPos = doc->MovePositionOutsideChar(maxPos, increment, true); Sci::Position const rangeBeg = (findForward) ? minPos : maxPos; Sci::Position const rangeEnd = (findForward) ? maxPos : minPos; Sci::Position const rangeLen = (rangeEnd - rangeBeg); OnigOptionType onigOptions; - SetSimpleOptions(onigOptions, eolMode, caseSensitive, findForward, searchFlags); - ONIG_OPTION_ON(onigOptions, (rangeBeg > docBegPos) ? ONIG_OPTION_NOTBOL : ONIG_OPTION_NONE); - ONIG_OPTION_ON(onigOptions, (rangeEnd < docEndPos) ? ONIG_OPTION_NOTEOL : ONIG_OPTION_NONE); - + SetSimpleOptions(onigOptions, eolMode, caseSensitive, findForward, searchFlags, (rangeBeg == docBegPos), (rangeEnd == docEndPos)); + std::string const sRegExprStrg = translateRegExpr(pattern, word, wordStart, doc->eolMode, onigOptions); - bool const bReCompile = (m_RegExpr == nullptr) || (m_CmplOptions != onigOptions) || (m_RegExprStrg.compare(sRegExprStrg) != 0); + bool const bReCompile = (m_RegExpr == nullptr) || (m_CmplOptions != onigOptions) || (m_RegExprStrg.compare(sRegExprStrg) != 0) || (m_EOLmode != eolMode); if (bReCompile) { clear(); @@ -308,11 +322,13 @@ Sci::Position OnigurumaRegExEngine::FindText(Document* doc, Sci::Position minPos m_CmplOptions = onigOptions; m_RangeBeg = rangeBeg; m_RangeEnd = rangeEnd; + m_EOLmode = eolMode; m_ErrorInfo[0] = '\0'; + try { - OnigEncoding const onigEncType = (eolMode == EOLmode::LF) ? ONIG_ENCODING_UTF8 : ONIG_ENCODING_UTF8_CR; - + OnigEncoding const onigEncType = ((eolMode == EOLmode::CR) ? ONIG_ENCODING_UTF8_CR : ONIG_ENCODING_UTF8); + OnigErrorInfo einfo; int const res = onig_new(&m_RegExpr, UCharCPtr(m_RegExprStrg.c_str()), UCharCPtr(m_RegExprStrg.c_str() + m_RegExprStrg.length()), m_CmplOptions, onigEncType, &m_OnigSyntax, &einfo); @@ -714,11 +730,11 @@ class SimpleRegExEngine public: explicit SimpleRegExEngine(const EOLmode eolMode) - : m_EOLmode(eolMode) - , m_OnigSyntax(*NP3_ONIG_SYNTAX_FLAVOR) + : m_OnigSyntax(*NP3_ONIG_SYNTAX_FLAVOR) , m_Options(ONIG_OPTION_DEFAULT) , m_RegExpr(nullptr) , m_Region({ 0,0,nullptr,nullptr,nullptr }) + , m_EOLmode(eolMode) , m_ErrorInfo() , m_MatchPos(ONIG_MISMATCH) , m_MatchLen(0) @@ -750,11 +766,11 @@ private: private: - EOLmode m_EOLmode; OnigSyntaxType m_OnigSyntax; OnigOptionType m_Options; OnigRegex m_RegExpr; OnigRegion m_Region; + EOLmode m_EOLmode; OnigUChar m_ErrorInfo[ONIG_MAX_ERROR_MESSAGE_LEN]; @@ -777,15 +793,16 @@ OnigPos SimpleRegExEngine::Find(const OnigUChar* pattern, const OnigUChar* docum return OnigPos(-1); } + bool const findForward = true; + // init search options - SetSimpleOptions(m_Options, m_EOLmode, caseSensitive, true); + SetSimpleOptions(m_Options, m_EOLmode, caseSensitive, findForward); m_ErrorInfo[0] = '\0'; try { onig_free(m_RegExpr); - //OnigEncoding const onigEncType = ONIG_ENCODING_UTF8; - OnigEncoding const onigEncType = (m_EOLmode == EOLmode::CR) ? ONIG_ENCODING_UTF8_CR : ONIG_ENCODING_UTF8; + OnigEncoding const onigEncType = ((m_EOLmode == EOLmode::CR) ? ONIG_ENCODING_UTF8_CR : ONIG_ENCODING_UTF8); OnigErrorInfo einfo; int res = onig_new(&m_RegExpr, pattern, (pattern + patternLen), m_Options, onigEncType, &m_OnigSyntax, &einfo); diff --git a/scintilla/oniguruma/src/regenc.c b/scintilla/oniguruma/src/regenc.c index 11ee50a08..650454787 100644 --- a/scintilla/oniguruma/src/regenc.c +++ b/scintilla/oniguruma/src/regenc.c @@ -708,7 +708,7 @@ onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end) #endif } #ifdef USE_END_OF_FILE_AS_LINE_TERMINATOR - // if (p == end) + //if (p == end) // return 1; #endif return 0; @@ -725,7 +725,7 @@ onigenc_is_mbc_newline_0x0d(const UChar* p, const UChar* end) #endif } #ifdef USE_END_OF_FILE_AS_LINE_TERMINATOR - // if (p == end) + //if (p == end) // return 1; #endif return 0; diff --git a/src/Edit.c b/src/Edit.c index 151bdac7c..a11de8f6c 100644 --- a/src/Edit.c +++ b/src/Edit.c @@ -5897,7 +5897,18 @@ static char* _GetReplaceString(HWND hwnd, CLPCEDITFINDREPLACE lpefr, int* iRepla // // ONIG_MISMATCH #define NOT_FOUND ((DocPos)(-1LL)) -#define VALIDATE_FOUND_POS(pos, nxt, stp) (((nxt) ? ((pos) > (stp)) : ((pos) < (stp))) ? NOT_FOUND : (pos)) + +__forceinline DocPos validate_found_pos(DocPos pos, const DocPos rbeg, const DocPos rend) +{ + if (pos >= 0LL) { + if (rbeg <= rend) { // forward search + if ((pos < rbeg) || (pos > rend)) { pos = NOT_FOUND; } + } else { + if ((pos < rend) || (pos > rbeg)) { pos = NOT_FOUND; } + } + } + return pos; +} static DocPos _FindInTarget(LPCWSTR wchFind, int sFlags, @@ -5923,27 +5934,23 @@ static DocPos _FindInTarget(LPCWSTR wchFind, int sFlags, SciCall_SetSearchFlags(sFlags); SciCall_SetTargetRange(start, stop); - iPos = SciCall_SearchInTarget(len, chFind); - iPos = VALIDATE_FOUND_POS(iPos, bFindNext, stop); // not found if beyond stop + iPos = validate_found_pos(SciCall_SearchInTarget(len, chFind), start, stop); // not found if beyond stop #if 1 // handle next in case of zero-length-matches or invalid position (regex) ! bool const bZeroLenMatch = ((iPos == start) && (start == SciCall_GetTargetEnd())); - bool bValidPos = !(bForceNext && bZeroLenMatch) && Sci_IsValidPos(iPos, bFindNext); - DocPos oldStart = start; + bool bValidPos = !(bForceNext && bZeroLenMatch) && Sci_IsPosValid(iPos); while (!bValidPos) { - DocPos const newStart = (bFindNext ? SciCall_PositionAfter(oldStart) : SciCall_PositionBefore(oldStart)); - bool const bProceed = (bFindNext ? (newStart < stop) : (newStart > stop)) && (newStart != oldStart); + DocPos const newStart = (bFindNext ? SciCall_PositionAfter(iPos) : SciCall_PositionBefore(iPos)); + bool const bProceed = (bFindNext ? (newStart < stop) : (newStart > stop)) && (newStart != iPos); if (bProceed) { SciCall_SetTargetRange(newStart, stop); - iPos = SciCall_SearchInTarget(len, chFind); - iPos = VALIDATE_FOUND_POS(iPos, bFindNext, stop); // not found if beyond stop + iPos = validate_found_pos(SciCall_SearchInTarget(len, chFind), newStart, stop); // not found if beyond stop } else { iPos = NOT_FOUND; // already at document begin, end or stuck => not found } - bValidPos = Sci_IsValidPos(iPos, bFindNext); // NOT_FOUND is a valid pos - oldStart = newStart; + bValidPos = Sci_IsPosValid(iPos); // NOT_FOUND is a valid pos } #else // handle next in case of zero-length-matches (regex) ! @@ -7192,6 +7199,13 @@ bool EditFindNext(HWND hwnd, const LPEDITFINDREPLACE lpefr, bool bExtendSelectio if (iPos == end) { _ShowZeroLengthCallTip(iPos); } + if ((iPos+1) == end) { + char const p = SciCall_GetCharAt(iPos); + char const e = SciCall_GetCharAt(end); + if (p == 0x0d && e == 0x0a) { + _ShowZeroLengthCallTip(iPos); + } + } if (bFoundWrapAround) { ShowWrapAroundCallTip(true); } @@ -7223,8 +7237,9 @@ bool EditFindPrev(HWND hwnd, LPEDITFINDREPLACE lpefr, bool bExtendSelection, boo DocPos const iDocEndPos = Sci_GetDocEndPosition(); EditSetCaretToSelectionStart(); // fluent switch between Next/Prev - DocPos start = SciCall_GetCurrentPos(); - DocPos end = 0LL; + DocPos const curPos = SciCall_GetCurrentPos(); + DocPos start = (curPos > 0) ? SciCall_PositionBefore(curPos) : SciCall_PositionBefore(iDocEndPos); + DocPos end = 0LL; Sci_CallTipCancelEx(); @@ -7284,6 +7299,13 @@ bool EditFindPrev(HWND hwnd, LPEDITFINDREPLACE lpefr, bool bExtendSelection, boo if (iPos == end) { _ShowZeroLengthCallTip(iPos); } + if ((iPos + 1) == end) { + char const p = SciCall_GetCharAt(iPos); + char const e = SciCall_GetCharAt(end); + if (p == 0x0d && e == 0x0a) { + _ShowZeroLengthCallTip(iPos); + } + } if (bFoundWrapAround) { ShowWrapAroundCallTip(false); } diff --git a/src/SciCall.h b/src/SciCall.h index 91827a9c2..1fe1f0d41 100644 --- a/src/SciCall.h +++ b/src/SciCall.h @@ -800,9 +800,11 @@ DeclareSciCallR0(IsSelectionRectangle, SELECTIONISRECTANGLE, bool); #define Sci_ClampAlpha(alpha) clampi((alpha), SC_ALPHA_TRANSPARENT, SC_ALPHA_OPAQUE) //~SC_ALPHA_NOALPHA -__forceinline bool Sci_IsValidPos(DocPos pos, bool fwd) +// ---------------------------------------------------------------------------- + +__forceinline bool Sci_IsPosValid(const DocPos pos) { - return (pos == ((pos > 0) ? (fwd ? SciCall_PositionAfter(SciCall_PositionBefore(pos)) : SciCall_PositionBefore(SciCall_PositionAfter(pos))) : pos)); + return (pos == ((pos > 0) ? SciCall_PositionAfter(SciCall_PositionBefore(pos)) : pos)); } // ---------------------------------------------------------------------------- diff --git a/test/test_files/regex/test_eol_eof.txt b/test/test_files/regex/test_eol_eof.txt index e4dbec29e..5d468d34f 100644 --- a/test/test_files/regex/test_eol_eof.txt +++ b/test/test_files/regex/test_eol_eof.txt @@ -1,5 +1,7 @@ test +Pattern: [^],[$],[t],[test],[t.*$],[.*$],[^.*],[^.*$] + testabc abctest \ No newline at end of file