Merge pull request #4914 from RaiKoHoff/Dev_Master

Change: patch Oniguruma engine to accept EOF as line-terminator
This commit is contained in:
Rainer Kottenhoff 2023-07-07 15:36:21 +02:00 committed by GitHub
commit 3400d33aad
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 84 additions and 31 deletions

View File

@ -87,9 +87,8 @@ static void SetSimpleOptions(OnigOptionType& onigOptions, EOLmode /*eolMode*/,
ONIG_OPTION_OFF(onigOptions, ONIG_OPTION_EXTEND);
ONIG_OPTION_OFF(onigOptions, ONIG_OPTION_FIND_LONGEST);
ONIG_OPTION_OFF(onigOptions, ONIG_OPTION_MATCH_WHOLE_STRING);
ONIG_OPTION_OFF(onigOptions, ONIG_SYN_OP_DOT_ANYCHAR); // (!!!)
//ONIG_OPTION_OFF(onigOptions, ONIG_OPTION_ASCII_RANGE);
//ONIG_OPTION_OFF(onigOptions, ONIG_OPTION_CAPTURE_GROUP);
//~ONIG_OPTION_OFF(onigOptions, ONIG_OPTION_ASCII_RANGE);
//~ONIG_OPTION_OFF(onigOptions, ONIG_OPTION_CAPTURE_GROUP);
// dynamic options
//switch (eolMode) {
@ -101,11 +100,9 @@ static void SetSimpleOptions(OnigOptionType& onigOptions, EOLmode /*eolMode*/,
//}
if (FlagSet(searchFlags, FindOption::DotMatchAll)) {
//~ONIG_OPTION_ON(onigOptions, ONIG_SYN_OP_DOT_ANYCHAR);
ONIG_OPTION_ON(onigOptions, ONIG_OPTION_MULTILINE);
}
else {
//~ONIG_OPTION_OFF(onigOptions, ONIG_SYN_OP_DOT_ANYCHAR);
ONIG_OPTION_OFF(onigOptions, ONIG_OPTION_MULTILINE);
}
@ -314,8 +311,7 @@ Sci::Position OnigurumaRegExEngine::FindText(Document* doc, Sci::Position minPos
m_ErrorInfo[0] = '\0';
try {
// OnigEncoding const onigEncType = ONIG_ENCODING_UTF8;
OnigEncoding const onigEncType = (eolMode == EOLmode::CR) ? ONIG_ENCODING_UTF8_CR : ONIG_ENCODING_UTF8;
OnigEncoding const onigEncType = (eolMode == EOLmode::LF) ? ONIG_ENCODING_UTF8 : ONIG_ENCODING_UTF8_CR;
OnigErrorInfo einfo;
int const res = onig_new(&m_RegExpr, UCharCPtr(m_RegExprStrg.c_str()), UCharCPtr(m_RegExprStrg.c_str() + m_RegExprStrg.length()),
@ -548,6 +544,8 @@ void OnigurumaRegExEngine::clear() {
std::string OnigurumaRegExEngine::translateRegExpr(const std::string & regExprStr, bool wholeWord, bool wordStart, EndOfLine eolMode, OnigOptionType & /*rxOptions*/)
{
UNREFERENCED_PARAMETER(eolMode);
std::string transRegExpr;
if (wholeWord || wordStart) { // push '\b' at the begin of regexpr
@ -564,11 +562,13 @@ std::string OnigurumaRegExEngine::translateRegExpr(const std::string & regExprSt
transRegExpr.append(regExprStr);
}
//if (wholeString) {
// ONIG_OPTION_ON(rxOptions, ONIG_OPTION_MATCH_WHOLE_STRING);
//} else {
// ONIG_OPTION_OFF(rxOptions, ONIG_OPTION_MATCH_WHOLE_STRING);
//}
#if 0
if (wholeString) {
ONIG_OPTION_ON(rxOptions, ONIG_OPTION_MATCH_WHOLE_STRING);
} else {
ONIG_OPTION_OFF(rxOptions, ONIG_OPTION_MATCH_WHOLE_STRING);
}
#endif
// Oniguruma supports LTGT word boundary by: ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END
//
@ -577,18 +577,19 @@ std::string OnigurumaRegExEngine::translateRegExpr(const std::string & regExprSt
//~replaceAll(transRegExpr, R"(\>)", R"((?<=\w)(?!\w))"); // word end
//~replaceAll(transRegExpr, R"(\(?<=\w)(?!\w))", R"(\\>)"); // esc'd
// EOL modes
#if 0
// EOL modes is controlled by
switch (eolMode) {
case EndOfLine::Lf:
case EndOfLine::Cr:
//ONIG_OPTION_OFF(rxOptions, ONIG_OPTION_CRLF_AS_LINE_SEPARATOR);
ONIG_OPTION_OFF(rxOptions, ONIG_OPTION_CRLF_AS_LINE_SEPARATOR);
break;
case EndOfLine::CrLf:
//ONIG_OPTION_ON(rxOptions, ONIG_OPTION_CRLF_AS_LINE_SEPARATOR);
ONIG_OPTION_ON(rxOptions, ONIG_OPTION_CRLF_AS_LINE_SEPARATOR);
break;
}
#endif
return transRegExpr;
}

View File

@ -701,8 +701,16 @@ extern int
onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end)
{
if (p < end) {
#ifdef USE_END_OF_FILE_AS_LINE_TERMINATOR
if ((*p == NEWLINE_CODE)||(*p == END_OF_FILE)) return 1; // LF
#else
if (*p == NEWLINE_CODE) return 1; // LF
#endif
}
#ifdef USE_END_OF_FILE_AS_LINE_TERMINATOR
// if (p == end)
// return 1;
#endif
return 0;
}
@ -710,8 +718,16 @@ extern int
onigenc_is_mbc_newline_0x0d(const UChar* p, const UChar* end)
{
if (p < end) {
#ifdef USE_END_OF_FILE_AS_LINE_TERMINATOR
if ((*p == CARRIAGE_RET)||(*p == END_OF_FILE)) return 1; // CR
#else
if (*p == CARRIAGE_RET) return 1; // CR
#endif
}
#ifdef USE_END_OF_FILE_AS_LINE_TERMINATOR
// if (p == end)
// return 1;
#endif
return 0;
}

View File

@ -80,6 +80,7 @@ typedef struct {
#define ASCII_LIMIT 127
#define NEWLINE_CODE 0x0a
#define CARRIAGE_RET 0x0d
#define END_OF_FILE 0x00
#define enclen(enc,p) ONIGENC_MBC_ENC_LEN(enc,p)
@ -117,6 +118,7 @@ struct PropertyNameCtype {
int ctype;
};
#define USE_END_OF_FILE_AS_LINE_TERMINATOR
#define USE_CRNL_AS_LINE_TERMINATOR
#define USE_UNICODE_PROPERTIES
#define USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER

View File

@ -5918,12 +5918,17 @@ static char* _GetReplaceString(HWND hwnd, CLPCEDITFINDREPLACE lpefr, int* iRepla
//
// _FindInTarget()
//
// ONIG_MISMATCH
#define NOT_FOUND ((DocPos)(-1LL))
#define VALIDATE_FOUND_POS(pos, nxt, stp) (((nxt) ? ((pos) > (stp)) : ((pos) < (stp))) ? NOT_FOUND : (pos))
static DocPos _FindInTarget(LPCWSTR wchFind, int sFlags,
DocPos* begin, DocPos* end, bool bForceNext, FR_UPD_MODES fMode)
{
static char chFind[8192] = { '\0' }; // max find buffer
DocPos iPos = -1LL; // not found
DocPos iPos = NOT_FOUND;
if (StrIsEmpty(wchFind)) {
return iPos;
@ -5933,18 +5938,37 @@ static DocPos _FindInTarget(LPCWSTR wchFind, int sFlags,
DocPos const saveTargetBeg = SciCall_GetTargetStart();
DocPos const saveTargetEnd = SciCall_GetTargetEnd();
DocPos start = *begin;
DocPos stop = *end;
DocPos const start = *begin;
DocPos const stop = *end;
bool const bFindNext = (start <= stop); // else find previous
DocPos const len = (DocPos)(WideCharToMultiByte(Encoding_SciCP, 0, wchFind, -1, chFind, COUNTOF(chFind), NULL, NULL) - 1);
SciCall_SetSearchFlags(sFlags);
SciCall_SetTargetRange(start, stop);
iPos = SciCall_SearchInTarget(len, chFind);
iPos = VALIDATE_FOUND_POS(iPos, bFindNext, stop); // not found if beyond stop
DocPos const len = (DocPos)WideCharToMultiByte(Encoding_SciCP, 0, wchFind, -1, chFind, COUNTOF(chFind), NULL, NULL);
iPos = SciCall_SearchInTarget(len - 1, chFind);
iPos = (bFindNext ? (iPos > stop) : (iPos < stop)) ? -1LL : iPos; // not found if beyond stop
#if 1
// handle next in case of zero-length-matches or invalid position (regex) !
bool const bZeroLenMatch = ((iPos == start) && (start == SciCall_GetTargetEnd()));
bool bValidPos = !(bForceNext && bZeroLenMatch) && Sci_IsValidPos(iPos, bFindNext);
DocPos oldStart = start;
while (!bValidPos) {
DocPos const newStart = (bFindNext ? SciCall_PositionAfter(oldStart) : SciCall_PositionBefore(oldStart));
bool const bProceed = (bFindNext ? (newStart < stop) : (newStart > stop)) && (newStart != oldStart);
if (bProceed) {
SciCall_SetTargetRange(newStart, stop);
iPos = SciCall_SearchInTarget(len, chFind);
iPos = VALIDATE_FOUND_POS(iPos, bFindNext, stop); // not found if beyond stop
}
else {
iPos = NOT_FOUND; // already at document begin, end or stuck => not found
}
bValidPos = Sci_IsValidPos(iPos, bFindNext); // NOT_FOUND is a valid pos
oldStart = newStart;
}
#else
// handle next in case of zero-length-matches (regex) !
if (iPos == start) {
DocPos const nstop = SciCall_GetTargetEnd();
@ -5959,6 +5983,7 @@ static DocPos _FindInTarget(LPCWSTR wchFind, int sFlags,
}
}
}
#endif
if (iPos >= 0) {
if (fMode != FRMOD_IGNORE) {
@ -7136,7 +7161,7 @@ bool EditFindNext(HWND hwnd, const LPEDITFINDREPLACE lpefr, bool bExtendSelectio
DocPos iPos = _FindInTarget(wchFind, sFlags, &start, &end, true, FRMOD_NORM);
if ((iPos < -1LL) && (lpefr->fuFlags & SCFIND_REGEXP)) {
if ((iPos < NOT_FOUND) && (lpefr->fuFlags & SCFIND_REGEXP)) {
InfoBoxLng(MB_ICONWARNING, L"MsgInvalidRegex", IDS_MUI_REGEX_INVALID);
bSuppressNotFound = true;
} else if ((iPos < 0LL) && (start >= 0LL) && !bExtendSelection) {
@ -7157,7 +7182,7 @@ bool EditFindNext(HWND hwnd, const LPEDITFINDREPLACE lpefr, bool bExtendSelectio
} else {
LONG const result = InfoBoxLng(MB_OKCANCEL, L"MsgFindWrap1", IDS_MUI_FIND_WRAPFW);
if (!IsYesOkay(result)) {
iPos = -1LL;
iPos = NOT_FOUND;
bSuppressNotFound = true;
}
bFoundWrapAround = (INFOBOX_MODE(result) != 0);
@ -7228,7 +7253,7 @@ bool EditFindPrev(HWND hwnd, LPEDITFINDREPLACE lpefr, bool bExtendSelection, boo
DocPos iPos = _FindInTarget(wchFind, sFlags, &start, &end, true, FRMOD_NORM);
if ((iPos < -1LL) && (sFlags & SCFIND_REGEXP)) {
if ((iPos < NOT_FOUND) && (sFlags & SCFIND_REGEXP)) {
InfoBoxLng(MB_ICONWARNING, L"MsgInvalidRegex", IDS_MUI_REGEX_INVALID);
bSuppressNotFound = true;
} else if ((iPos < 0LL) && (start <= iDocEndPos) && !bExtendSelection) {
@ -7242,14 +7267,14 @@ bool EditFindPrev(HWND hwnd, LPEDITFINDREPLACE lpefr, bool bExtendSelection, boo
iPos = _FindInTarget(wchFind, sFlags, &start, &end, false, FRMOD_WRAPED);
if ((iPos < 0LL) || (start == _start)) {
if ((iPos < -1LL) && (sFlags & SCFIND_REGEXP)) {
if ((iPos < NOT_FOUND) && (sFlags & SCFIND_REGEXP)) {
InfoBoxLng(MB_ICONWARNING, L"MsgInvalidRegex", IDS_MUI_REGEX_INVALID);
bSuppressNotFound = true;
}
} else {
LONG const result = InfoBoxLng(MB_OKCANCEL, L"MsgFindWrap2", IDS_MUI_FIND_WRAPRE);
if (!IsYesOkay(result)) {
iPos = -1LL;
iPos = NOT_FOUND;
bSuppressNotFound = true;
}
bFoundWrapAround = (INFOBOX_MODE(result) != 0);
@ -7479,7 +7504,7 @@ DocPosU EditReplaceAllInRange(HWND hwnd, LPEDITFINDREPLACE lpefr, DocPos iStartP
DocPos end = iEndPos;
DocPos iPos = _FindInTarget(wchFind, sFlags, &start, &end, false, FRMOD_NORM);
if ((iPos < -1LL) && bIsRegExpr) {
if ((iPos < NOT_FOUND) && bIsRegExpr) {
InfoBoxLng(MB_ICONWARNING, L"MsgInvalidRegex", IDS_MUI_REGEX_INVALID);
return 0;
}
@ -7508,7 +7533,7 @@ DocPosU EditReplaceAllInRange(HWND hwnd, LPEDITFINDREPLACE lpefr, DocPos iStartP
}
start = iStartPos;
end = iEndPos;
iPos = (start <= end) ? _FindInTarget(wchFind, sFlags, &start, &end, true, FRMOD_NORM) : -1LL;
iPos = (start <= end) ? _FindInTarget(wchFind, sFlags, &start, &end, true, FRMOD_NORM) : NOT_FOUND;
}
EndUndoTransAction();

View File

@ -800,6 +800,10 @@ DeclareSciCallR0(IsSelectionRectangle, SELECTIONISRECTANGLE, bool);
#define Sci_ClampAlpha(alpha) clampi((alpha), SC_ALPHA_TRANSPARENT, SC_ALPHA_OPAQUE) //~SC_ALPHA_NOALPHA
__forceinline bool Sci_IsValidPos(DocPos pos, bool fwd)
{
return (pos == ((pos > 0) ? (fwd ? SciCall_PositionAfter(SciCall_PositionBefore(pos)) : SciCall_PositionBefore(SciCall_PositionAfter(pos))) : pos));
}
// max. line length in range (incl. line-breaks)
inline DocPos Sci_GetRangeMaxLineLength(DocLn iBeginLine, DocLn iEndLine)

View File

@ -0,0 +1,5 @@
test
testabc
abctest