mirror of
https://github.com/rizonesoft/Notepad3.git
synced 2026-06-11 21:03:05 +08:00
+ fix: Onigmo backward search
+ fix: use all ASCII line breaks for line termination ($) meta char
This commit is contained in:
parent
cce69112ca
commit
85dd425893
@ -252,17 +252,21 @@ static int
|
||||
is_mbc_newline(const UChar* p, const UChar* end, OnigEncoding enc)
|
||||
{
|
||||
if (p < end) {
|
||||
if ((*p == 0x0a) || (*p == 0x0d)) return 1; // LF or CR
|
||||
|
||||
#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
|
||||
if (*p == 0x0b || *p == 0x0c || *p == 0x0d) return 1;
|
||||
if (*p == 0x0a) return 1; // LF
|
||||
|
||||
#if defined(USE_ASCII_ALL_LINE_BREAKS) || defined(USE_UNICODE_ALL_LINE_TERMINATORS)
|
||||
if (*p == 0x0b || *p == 0x0c || *p == 0x0d) return 1; // VT FF CR
|
||||
#endif
|
||||
|
||||
#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
|
||||
if (p + 1 < end) {
|
||||
if (*(p+1) == 0x85 && *p == 0xc2) /* U+0085 */
|
||||
return 1;
|
||||
if (*(p + 1) == 0x85 && *p == 0xc2) /* U+0085 */
|
||||
return 1;
|
||||
if (p + 2 < end) {
|
||||
if ((*(p+2) == 0xa8 || *(p+2) == 0xa9)
|
||||
&& *(p+1) == 0x80 && *p == 0xe2) /* U+2028, U+2029 */
|
||||
return 1;
|
||||
if ((*(p + 2) == 0xa8 || *(p + 2) == 0xa9)
|
||||
&& *(p + 1) == 0x80 && *p == 0xe2) /* U+2028, U+2029 */
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@ -359,7 +363,7 @@ code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc ARG_UNUSED)
|
||||
|
||||
static int
|
||||
mbc_case_fold(OnigCaseFoldType flag, const UChar** pp,
|
||||
const UChar* end, UChar* fold, OnigEncoding enc)
|
||||
const UChar* end, UChar* fold, OnigEncoding enc)
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
@ -367,10 +371,10 @@ mbc_case_fold(OnigCaseFoldType flag, const UChar** pp,
|
||||
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
|
||||
if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
|
||||
if (*p == 0x49) {
|
||||
*fold++ = 0xc4;
|
||||
*fold = 0xb1;
|
||||
(*pp)++;
|
||||
return 2;
|
||||
*fold++ = 0xc4;
|
||||
*fold = 0xb1;
|
||||
(*pp)++;
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@ -387,7 +391,7 @@ mbc_case_fold(OnigCaseFoldType flag, const UChar** pp,
|
||||
|
||||
static int
|
||||
get_ctype_code_range(OnigCtype ctype, OnigCodePoint *sb_out,
|
||||
const OnigCodePoint* ranges[], OnigEncoding enc ARG_UNUSED)
|
||||
const OnigCodePoint* ranges[], OnigEncoding enc ARG_UNUSED)
|
||||
{
|
||||
*sb_out = 0x80;
|
||||
return onigenc_unicode_ctype_code_range(ctype, ranges);
|
||||
|
||||
@ -133,6 +133,7 @@ typedef struct {
|
||||
#define USE_UNICODE_PROPERTIES
|
||||
#define USE_UNICODE_AGE_PROPERTIES
|
||||
/* #define USE_UNICODE_CASE_FOLD_TURKISH_AZERI */
|
||||
#define USE_ASCII_ALL_LINE_BREAKS // LF, VT, FF, CR
|
||||
/* #define USE_UNICODE_ALL_LINE_TERMINATORS */ /* see Unicode.org UTS #18 */
|
||||
|
||||
|
||||
@ -187,8 +188,8 @@ ONIG_EXTERN int onigenc_unicode_apply_all_case_fold(OnigCaseFoldType flag, OnigA
|
||||
#define UTF16_IS_SURROGATE_SECOND(c) (((c) & 0xfc) == 0xdc)
|
||||
#define UTF16_IS_SURROGATE(c) (((c) & 0xf8) == 0xd8)
|
||||
#define UNICODE_VALID_CODEPOINT_P(c) ( \
|
||||
((c) <= 0x10ffff) && \
|
||||
!((c) < 0x10000 && UTF16_IS_SURROGATE((c) >> 8)))
|
||||
((c) <= 0x10ffff) && \
|
||||
!((c) < 0x10000 && UTF16_IS_SURROGATE((c) >> 8)))
|
||||
|
||||
#define ONIGENC_ISO_8859_1_TO_LOWER_CASE(c) \
|
||||
OnigEncISO_8859_1_ToLowerCaseTable[c]
|
||||
@ -234,8 +235,8 @@ extern int ONIG_ENC_REGISTER(const char *, OnigEncoding);
|
||||
# define OnigEncodingDefine(f,n) \
|
||||
OnigEncodingDeclare(n); \
|
||||
void Init_##f(void) { \
|
||||
ONIG_ENC_REGISTER(OnigEncodingName(n).name, \
|
||||
&OnigEncodingName(n)); \
|
||||
ONIG_ENC_REGISTER(OnigEncodingName(n).name, \
|
||||
&OnigEncodingName(n)); \
|
||||
} \
|
||||
OnigEncodingDeclare(n)
|
||||
#else
|
||||
|
||||
@ -205,7 +205,7 @@ static void replaceAll(std::string& source, const std::string& from, const std::
|
||||
* Has not been tested with backwards DBCS searches yet.
|
||||
*/
|
||||
long OnigmoRegExEngine::FindText(Document* doc, Sci::Position minPos, Sci::Position maxPos, const char *pattern,
|
||||
bool caseSensitive, bool word, bool wordStart, int searchFlags, Sci::Position *length)
|
||||
bool caseSensitive, bool word, bool wordStart, int searchFlags, Sci::Position *length)
|
||||
{
|
||||
if (!(pattern && (strlen(pattern) > 0))) {
|
||||
*length = 0;
|
||||
@ -214,15 +214,18 @@ long OnigmoRegExEngine::FindText(Document* doc, Sci::Position minPos, Sci::Posit
|
||||
|
||||
Sci::Position docLen = SciPos(doc->Length());
|
||||
|
||||
const bool findForward = (minPos <= maxPos);
|
||||
const int increment = findForward ? 1 : -1;
|
||||
|
||||
// Range endpoints should not be inside DBCS characters, but just in case, move them.
|
||||
minPos = doc->MovePositionOutsideChar(minPos, 1, false);
|
||||
maxPos = doc->MovePositionOutsideChar(maxPos, 1, false);
|
||||
const bool findprevious = (minPos > maxPos);
|
||||
Sci::Position rangeBeg = (findprevious) ? maxPos : minPos;
|
||||
Sci::Position rangeEnd = (findprevious) ? minPos : maxPos;
|
||||
minPos = doc->MovePositionOutsideChar(minPos, increment, false);
|
||||
maxPos = doc->MovePositionOutsideChar(maxPos, increment, false);
|
||||
|
||||
Sci::Position rangeBeg = (findForward) ? minPos : maxPos;
|
||||
Sci::Position rangeEnd = (findForward) ? maxPos : minPos;
|
||||
Sci::Position rangeLen = (rangeEnd - rangeBeg);
|
||||
|
||||
|
||||
|
||||
// -----------------------------
|
||||
// --- Onigmo Engine Options ---
|
||||
// -----------------------------
|
||||
@ -241,7 +244,7 @@ long OnigmoRegExEngine::FindText(Document* doc, Sci::Position minPos, Sci::Posit
|
||||
else {
|
||||
ONIG_OPTION_OFF(onigmoOptions, ONIG_OPTION_DOTALL);
|
||||
}
|
||||
|
||||
|
||||
|
||||
//ONIG_OPTION_ON(onigmoOptions, ONIG_OPTION_SINGLELINE);
|
||||
ONIG_OPTION_ON(onigmoOptions, ONIG_OPTION_NEGATE_SINGLELINE);
|
||||
@ -257,8 +260,7 @@ long OnigmoRegExEngine::FindText(Document* doc, Sci::Position minPos, Sci::Posit
|
||||
|
||||
bool bReCompile = (m_RegExpr == nullptr) || (m_CmplOptions != onigmoOptions) || (m_RegExprStrg.compare(sRegExprStrg) != 0);
|
||||
|
||||
if (bReCompile)
|
||||
{
|
||||
if (bReCompile) {
|
||||
m_RegExprStrg.clear();
|
||||
m_RegExprStrg = sRegExprStrg;
|
||||
m_CmplOptions = onigmoOptions;
|
||||
@ -289,12 +291,14 @@ long OnigmoRegExEngine::FindText(Document* doc, Sci::Position minPos, Sci::Posit
|
||||
UChar* docBegPtr = (UChar*)doc->RangePointer(0, docLen);
|
||||
UChar* docSEndPtr = (UChar*)doc->RangePointer(docLen, 0);
|
||||
UChar* rangeBegPtr = (UChar*)doc->RangePointer(rangeBeg, rangeLen);
|
||||
UChar* rangeEndPtr = (UChar*)doc->RangePointer(rangeEnd, rangeLen);
|
||||
|
||||
UChar* rangeEndPtr = (UChar*)doc->RangePointer(rangeEnd, 0);
|
||||
|
||||
OnigPosition result = ONIG_MISMATCH;
|
||||
try {
|
||||
result = onig_search(m_RegExpr, docBegPtr, docSEndPtr, rangeBegPtr, rangeEndPtr, &m_Region, onigmoOptions);
|
||||
if (findForward)
|
||||
result = onig_search(m_RegExpr, docBegPtr, docSEndPtr, rangeBegPtr, rangeEndPtr, &m_Region, onigmoOptions);
|
||||
else // X //
|
||||
result = onig_search(m_RegExpr, docBegPtr, docSEndPtr, rangeEndPtr, rangeBegPtr, &m_Region, onigmoOptions);
|
||||
}
|
||||
catch (...) {
|
||||
return Cast2long(-3); // -1 is normally used for not found, -3 is used here for exception
|
||||
@ -305,26 +309,7 @@ long OnigmoRegExEngine::FindText(Document* doc, Sci::Position minPos, Sci::Posit
|
||||
return Cast2long(-3);
|
||||
}
|
||||
|
||||
if (findprevious) // search for last occurrence in range
|
||||
{
|
||||
//SPEEDUP: onig_scan() ???
|
||||
|
||||
while ((result >= 0) && (rangeBegPtr <= rangeEndPtr))
|
||||
{
|
||||
m_MatchPos = SciPos(result); //SciPos(m_Region.beg[0]);
|
||||
m_MatchLen = SciPos(m_Region.end[0] - result);
|
||||
|
||||
rangeBegPtr = docBegPtr + (m_MatchPos + max(1,m_MatchLen));
|
||||
|
||||
try {
|
||||
result = onig_search(m_RegExpr, docBegPtr, docSEndPtr, rangeBegPtr, rangeEndPtr, &m_Region, onigmoOptions);
|
||||
}
|
||||
catch (...) {
|
||||
return Cast2long(-3);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if ((result >= 0) && (rangeBegPtr <= rangeEndPtr))
|
||||
if ((result >= 0) && (rangeBegPtr <= rangeEndPtr))
|
||||
{
|
||||
m_MatchPos = SciPos(result); //SciPos(m_Region.beg[0]);
|
||||
m_MatchLen = SciPos(m_Region.end[0] - result);
|
||||
|
||||
Loading…
Reference in New Issue
Block a user