fix: regEx search with begin/end line meta chars

This commit is contained in:
METANEOCORTEX\Kotti 2023-07-11 00:34:14 +02:00
parent e615332401
commit af9173e65f
5 changed files with 82 additions and 39 deletions

View File

@ -62,9 +62,8 @@ using namespace Scintilla::Internal;
// *** Oningmo configuration ***
// ============================================================================
enum class EOLmode : int { CRLF = SC_EOL_CRLF, CR = SC_EOL_CR, LF = SC_EOL_LF };
enum class EOLmode : int { UDEF = -1, CRLF = SC_EOL_CRLF, CR = SC_EOL_CR, LF = SC_EOL_LF };
//static OnigEncoding s_UsedEncodingsTypes[] = { ONIG_ENCODING_UTF8 };
static OnigEncoding s_UsedEncodingsTypes[] = { ONIG_ENCODING_UTF8, ONIG_ENCODING_UTF8_CR };
// ============================================================================
@ -73,22 +72,37 @@ static OnigEncoding s_UsedEncodingsTypes[] = { ONIG_ENCODING_UTF8, ONIG_ENCODING
// ------------------------------------
// --- Onigmo Engine Simple Options ---
// ------------------------------------
static void SetSimpleOptions(OnigOptionType& onigOptions, EOLmode /*eolMode*/,
const bool caseSensitive, const bool forwardSearch,
const FindOption searchFlags = FindOption::None)
{
static void SetSimpleOptions(OnigOptionType &onigOptions, EOLmode /*eolMode*/,
const bool caseSensitive, const bool forwardSearch,
const FindOption searchFlags = FindOption::None,
const bool rangeBegIsDocBeg = true, const bool rangeEndIsDocEnd = true) {
// fixed options
onigOptions = ONIG_OPTION_DEFAULT;
// Notepad3 forced options
ONIG_OPTION_ON(onigOptions, ONIG_OPTION_NEGATE_SINGLELINE);
ONIG_OPTION_OFF(onigOptions, ONIG_OPTION_SINGLELINE);
ONIG_OPTION_OFF(onigOptions, ONIG_OPTION_POSIX_REGION);
ONIG_OPTION_OFF(onigOptions, ONIG_OPTION_EXTEND);
ONIG_OPTION_OFF(onigOptions, ONIG_OPTION_FIND_LONGEST);
ONIG_OPTION_OFF(onigOptions, ONIG_OPTION_MATCH_WHOLE_STRING);
ONIG_OPTION_OFF(onigOptions, ONIG_OPTION_NOTBOL);
ONIG_OPTION_OFF(onigOptions, ONIG_OPTION_NOTEOL);
// ----------------------------------------------------------
if (rangeBegIsDocBeg) {
ONIG_OPTION_OFF(onigOptions, ONIG_OPTION_NOT_BEGIN_STRING);
} else {
ONIG_OPTION_ON(onigOptions, ONIG_OPTION_NOT_BEGIN_STRING);
}
if (rangeEndIsDocEnd) {
ONIG_OPTION_OFF(onigOptions, ONIG_OPTION_NOT_END_STRING);
} else {
ONIG_OPTION_ON(onigOptions, ONIG_OPTION_NOT_END_STRING);
}
//~ONIG_OPTION_OFF(onigOptions, ONIG_OPTION_ASCII_RANGE);
//~ONIG_OPTION_OFF(onigOptions, ONIG_OPTION_CAPTURE_GROUP);
//~ONIG_OPTION_ON(onigOptions, ONIG_OPTION_NOT_BEGIN_POSITION);
// dynamic options
//switch (eolMode) {
@ -135,6 +149,7 @@ public:
, m_CmplOptions(ONIG_OPTION_DEFAULT)
, m_RegExpr(nullptr)
, m_Region({0,0,nullptr,nullptr,nullptr})
, m_EOLmode(EOLmode::UDEF)
, m_RangeBeg(-1)
, m_RangeEnd(-1)
, m_ErrorInfo()
@ -181,6 +196,7 @@ private:
OnigOptionType m_CmplOptions;
OnigRegex m_RegExpr;
OnigRegion m_Region;
EOLmode m_EOLmode;
Sci::Position m_RangeBeg;
Sci::Position m_RangeEnd;
@ -286,21 +302,19 @@ Sci::Position OnigurumaRegExEngine::FindText(Document* doc, Sci::Position minPos
int const increment = findForward ? 1 : -1;
// Range endpoints should not be inside DBCS characters, but just in case, move them.
minPos = doc->MovePositionOutsideChar(minPos + (findForward ? 0 : -1), increment, false);
maxPos = doc->MovePositionOutsideChar(maxPos, increment, false);
minPos = doc->MovePositionOutsideChar(minPos, increment, true);
maxPos = doc->MovePositionOutsideChar(maxPos, increment, true);
Sci::Position const rangeBeg = (findForward) ? minPos : maxPos;
Sci::Position const rangeEnd = (findForward) ? maxPos : minPos;
Sci::Position const rangeLen = (rangeEnd - rangeBeg);
OnigOptionType onigOptions;
SetSimpleOptions(onigOptions, eolMode, caseSensitive, findForward, searchFlags);
ONIG_OPTION_ON(onigOptions, (rangeBeg > docBegPos) ? ONIG_OPTION_NOTBOL : ONIG_OPTION_NONE);
ONIG_OPTION_ON(onigOptions, (rangeEnd < docEndPos) ? ONIG_OPTION_NOTEOL : ONIG_OPTION_NONE);
SetSimpleOptions(onigOptions, eolMode, caseSensitive, findForward, searchFlags, (rangeBeg == docBegPos), (rangeEnd == docEndPos));
std::string const sRegExprStrg = translateRegExpr(pattern, word, wordStart, doc->eolMode, onigOptions);
bool const bReCompile = (m_RegExpr == nullptr) || (m_CmplOptions != onigOptions) || (m_RegExprStrg.compare(sRegExprStrg) != 0);
bool const bReCompile = (m_RegExpr == nullptr) || (m_CmplOptions != onigOptions) || (m_RegExprStrg.compare(sRegExprStrg) != 0) || (m_EOLmode != eolMode);
if (bReCompile) {
clear();
@ -308,11 +322,13 @@ Sci::Position OnigurumaRegExEngine::FindText(Document* doc, Sci::Position minPos
m_CmplOptions = onigOptions;
m_RangeBeg = rangeBeg;
m_RangeEnd = rangeEnd;
m_EOLmode = eolMode;
m_ErrorInfo[0] = '\0';
try {
OnigEncoding const onigEncType = (eolMode == EOLmode::LF) ? ONIG_ENCODING_UTF8 : ONIG_ENCODING_UTF8_CR;
OnigEncoding const onigEncType = ((eolMode == EOLmode::CR) ? ONIG_ENCODING_UTF8_CR : ONIG_ENCODING_UTF8);
OnigErrorInfo einfo;
int const res = onig_new(&m_RegExpr, UCharCPtr(m_RegExprStrg.c_str()), UCharCPtr(m_RegExprStrg.c_str() + m_RegExprStrg.length()),
m_CmplOptions, onigEncType, &m_OnigSyntax, &einfo);
@ -714,11 +730,11 @@ class SimpleRegExEngine
public:
explicit SimpleRegExEngine(const EOLmode eolMode)
: m_EOLmode(eolMode)
, m_OnigSyntax(*NP3_ONIG_SYNTAX_FLAVOR)
: m_OnigSyntax(*NP3_ONIG_SYNTAX_FLAVOR)
, m_Options(ONIG_OPTION_DEFAULT)
, m_RegExpr(nullptr)
, m_Region({ 0,0,nullptr,nullptr,nullptr })
, m_EOLmode(eolMode)
, m_ErrorInfo()
, m_MatchPos(ONIG_MISMATCH)
, m_MatchLen(0)
@ -750,11 +766,11 @@ private:
private:
EOLmode m_EOLmode;
OnigSyntaxType m_OnigSyntax;
OnigOptionType m_Options;
OnigRegex m_RegExpr;
OnigRegion m_Region;
EOLmode m_EOLmode;
OnigUChar m_ErrorInfo[ONIG_MAX_ERROR_MESSAGE_LEN];
@ -777,15 +793,16 @@ OnigPos SimpleRegExEngine::Find(const OnigUChar* pattern, const OnigUChar* docum
return OnigPos(-1);
}
bool const findForward = true;
// init search options
SetSimpleOptions(m_Options, m_EOLmode, caseSensitive, true);
SetSimpleOptions(m_Options, m_EOLmode, caseSensitive, findForward);
m_ErrorInfo[0] = '\0';
try {
onig_free(m_RegExpr);
//OnigEncoding const onigEncType = ONIG_ENCODING_UTF8;
OnigEncoding const onigEncType = (m_EOLmode == EOLmode::CR) ? ONIG_ENCODING_UTF8_CR : ONIG_ENCODING_UTF8;
OnigEncoding const onigEncType = ((m_EOLmode == EOLmode::CR) ? ONIG_ENCODING_UTF8_CR : ONIG_ENCODING_UTF8);
OnigErrorInfo einfo;
int res = onig_new(&m_RegExpr, pattern, (pattern + patternLen), m_Options, onigEncType, &m_OnigSyntax, &einfo);

View File

@ -708,7 +708,7 @@ onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end)
#endif
}
#ifdef USE_END_OF_FILE_AS_LINE_TERMINATOR
// if (p == end)
//if (p == end)
// return 1;
#endif
return 0;
@ -725,7 +725,7 @@ onigenc_is_mbc_newline_0x0d(const UChar* p, const UChar* end)
#endif
}
#ifdef USE_END_OF_FILE_AS_LINE_TERMINATOR
// if (p == end)
//if (p == end)
// return 1;
#endif
return 0;

View File

@ -5897,7 +5897,18 @@ static char* _GetReplaceString(HWND hwnd, CLPCEDITFINDREPLACE lpefr, int* iRepla
//
// ONIG_MISMATCH
#define NOT_FOUND ((DocPos)(-1LL))
#define VALIDATE_FOUND_POS(pos, nxt, stp) (((nxt) ? ((pos) > (stp)) : ((pos) < (stp))) ? NOT_FOUND : (pos))
__forceinline DocPos validate_found_pos(DocPos pos, const DocPos rbeg, const DocPos rend)
{
if (pos >= 0LL) {
if (rbeg <= rend) { // forward search
if ((pos < rbeg) || (pos > rend)) { pos = NOT_FOUND; }
} else {
if ((pos < rend) || (pos > rbeg)) { pos = NOT_FOUND; }
}
}
return pos;
}
static DocPos _FindInTarget(LPCWSTR wchFind, int sFlags,
@ -5923,27 +5934,23 @@ static DocPos _FindInTarget(LPCWSTR wchFind, int sFlags,
SciCall_SetSearchFlags(sFlags);
SciCall_SetTargetRange(start, stop);
iPos = SciCall_SearchInTarget(len, chFind);
iPos = VALIDATE_FOUND_POS(iPos, bFindNext, stop); // not found if beyond stop
iPos = validate_found_pos(SciCall_SearchInTarget(len, chFind), start, stop); // not found if beyond stop
#if 1
// handle next in case of zero-length-matches or invalid position (regex) !
bool const bZeroLenMatch = ((iPos == start) && (start == SciCall_GetTargetEnd()));
bool bValidPos = !(bForceNext && bZeroLenMatch) && Sci_IsValidPos(iPos, bFindNext);
DocPos oldStart = start;
bool bValidPos = !(bForceNext && bZeroLenMatch) && Sci_IsPosValid(iPos);
while (!bValidPos) {
DocPos const newStart = (bFindNext ? SciCall_PositionAfter(oldStart) : SciCall_PositionBefore(oldStart));
bool const bProceed = (bFindNext ? (newStart < stop) : (newStart > stop)) && (newStart != oldStart);
DocPos const newStart = (bFindNext ? SciCall_PositionAfter(iPos) : SciCall_PositionBefore(iPos));
bool const bProceed = (bFindNext ? (newStart < stop) : (newStart > stop)) && (newStart != iPos);
if (bProceed) {
SciCall_SetTargetRange(newStart, stop);
iPos = SciCall_SearchInTarget(len, chFind);
iPos = VALIDATE_FOUND_POS(iPos, bFindNext, stop); // not found if beyond stop
iPos = validate_found_pos(SciCall_SearchInTarget(len, chFind), newStart, stop); // not found if beyond stop
}
else {
iPos = NOT_FOUND; // already at document begin, end or stuck => not found
}
bValidPos = Sci_IsValidPos(iPos, bFindNext); // NOT_FOUND is a valid pos
oldStart = newStart;
bValidPos = Sci_IsPosValid(iPos); // NOT_FOUND is a valid pos
}
#else
// handle next in case of zero-length-matches (regex) !
@ -7192,6 +7199,13 @@ bool EditFindNext(HWND hwnd, const LPEDITFINDREPLACE lpefr, bool bExtendSelectio
if (iPos == end) {
_ShowZeroLengthCallTip(iPos);
}
if ((iPos+1) == end) {
char const p = SciCall_GetCharAt(iPos);
char const e = SciCall_GetCharAt(end);
if (p == 0x0d && e == 0x0a) {
_ShowZeroLengthCallTip(iPos);
}
}
if (bFoundWrapAround) {
ShowWrapAroundCallTip(true);
}
@ -7223,8 +7237,9 @@ bool EditFindPrev(HWND hwnd, LPEDITFINDREPLACE lpefr, bool bExtendSelection, boo
DocPos const iDocEndPos = Sci_GetDocEndPosition();
EditSetCaretToSelectionStart(); // fluent switch between Next/Prev
DocPos start = SciCall_GetCurrentPos();
DocPos end = 0LL;
DocPos const curPos = SciCall_GetCurrentPos();
DocPos start = (curPos > 0) ? SciCall_PositionBefore(curPos) : SciCall_PositionBefore(iDocEndPos);
DocPos end = 0LL;
Sci_CallTipCancelEx();
@ -7284,6 +7299,13 @@ bool EditFindPrev(HWND hwnd, LPEDITFINDREPLACE lpefr, bool bExtendSelection, boo
if (iPos == end) {
_ShowZeroLengthCallTip(iPos);
}
if ((iPos + 1) == end) {
char const p = SciCall_GetCharAt(iPos);
char const e = SciCall_GetCharAt(end);
if (p == 0x0d && e == 0x0a) {
_ShowZeroLengthCallTip(iPos);
}
}
if (bFoundWrapAround) {
ShowWrapAroundCallTip(false);
}

View File

@ -800,9 +800,11 @@ DeclareSciCallR0(IsSelectionRectangle, SELECTIONISRECTANGLE, bool);
#define Sci_ClampAlpha(alpha) clampi((alpha), SC_ALPHA_TRANSPARENT, SC_ALPHA_OPAQUE) //~SC_ALPHA_NOALPHA
__forceinline bool Sci_IsValidPos(DocPos pos, bool fwd)
// ----------------------------------------------------------------------------
__forceinline bool Sci_IsPosValid(const DocPos pos)
{
return (pos == ((pos > 0) ? (fwd ? SciCall_PositionAfter(SciCall_PositionBefore(pos)) : SciCall_PositionBefore(SciCall_PositionAfter(pos))) : pos));
return (pos == ((pos > 0) ? SciCall_PositionAfter(SciCall_PositionBefore(pos)) : pos));
}
// ----------------------------------------------------------------------------

View File

@ -1,5 +1,7 @@
test
Pattern: [^],[$],[t],[test],[t.*$],[.*$],[^.*],[^.*$]
testabc
abctest