diff --git a/onigmo/enc/utf_8.c b/onigmo/enc/utf_8.c index fae48adca..f5ec6b9ea 100644 --- a/onigmo/enc/utf_8.c +++ b/onigmo/enc/utf_8.c @@ -252,17 +252,21 @@ static int is_mbc_newline(const UChar* p, const UChar* end, OnigEncoding enc) { if (p < end) { - if ((*p == 0x0a) || (*p == 0x0d)) return 1; // LF or CR -#ifdef USE_UNICODE_ALL_LINE_TERMINATORS - if (*p == 0x0b || *p == 0x0c || *p == 0x0d) return 1; + if (*p == 0x0a) return 1; // LF + +#if defined(USE_ASCII_ALL_LINE_BREAKS) || defined(USE_UNICODE_ALL_LINE_TERMINATORS) + if (*p == 0x0b || *p == 0x0c || *p == 0x0d) return 1; // VT FF CR +#endif + +#ifdef USE_UNICODE_ALL_LINE_TERMINATORS if (p + 1 < end) { - if (*(p+1) == 0x85 && *p == 0xc2) /* U+0085 */ - return 1; + if (*(p + 1) == 0x85 && *p == 0xc2) /* U+0085 */ + return 1; if (p + 2 < end) { - if ((*(p+2) == 0xa8 || *(p+2) == 0xa9) - && *(p+1) == 0x80 && *p == 0xe2) /* U+2028, U+2029 */ - return 1; + if ((*(p + 2) == 0xa8 || *(p + 2) == 0xa9) + && *(p + 1) == 0x80 && *p == 0xe2) /* U+2028, U+2029 */ + return 1; } } #endif @@ -359,7 +363,7 @@ code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc ARG_UNUSED) static int mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, - const UChar* end, UChar* fold, OnigEncoding enc) + const UChar* end, UChar* fold, OnigEncoding enc) { const UChar* p = *pp; @@ -367,10 +371,10 @@ mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, #ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) { if (*p == 0x49) { - *fold++ = 0xc4; - *fold = 0xb1; - (*pp)++; - return 2; + *fold++ = 0xc4; + *fold = 0xb1; + (*pp)++; + return 2; } } #endif @@ -387,7 +391,7 @@ mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, static int get_ctype_code_range(OnigCtype ctype, OnigCodePoint *sb_out, - const OnigCodePoint* ranges[], OnigEncoding enc ARG_UNUSED) + const OnigCodePoint* ranges[], OnigEncoding enc ARG_UNUSED) { *sb_out = 0x80; return onigenc_unicode_ctype_code_range(ctype, ranges); diff --git a/onigmo/regenc.h b/onigmo/regenc.h index 95f235dc4..b4f57732d 100644 --- a/onigmo/regenc.h +++ b/onigmo/regenc.h @@ -133,6 +133,7 @@ typedef struct { #define USE_UNICODE_PROPERTIES #define USE_UNICODE_AGE_PROPERTIES /* #define USE_UNICODE_CASE_FOLD_TURKISH_AZERI */ +#define USE_ASCII_ALL_LINE_BREAKS // LF, VT, FF, CR /* #define USE_UNICODE_ALL_LINE_TERMINATORS */ /* see Unicode.org UTS #18 */ @@ -187,8 +188,8 @@ ONIG_EXTERN int onigenc_unicode_apply_all_case_fold(OnigCaseFoldType flag, OnigA #define UTF16_IS_SURROGATE_SECOND(c) (((c) & 0xfc) == 0xdc) #define UTF16_IS_SURROGATE(c) (((c) & 0xf8) == 0xd8) #define UNICODE_VALID_CODEPOINT_P(c) ( \ - ((c) <= 0x10ffff) && \ - !((c) < 0x10000 && UTF16_IS_SURROGATE((c) >> 8))) + ((c) <= 0x10ffff) && \ + !((c) < 0x10000 && UTF16_IS_SURROGATE((c) >> 8))) #define ONIGENC_ISO_8859_1_TO_LOWER_CASE(c) \ OnigEncISO_8859_1_ToLowerCaseTable[c] @@ -234,8 +235,8 @@ extern int ONIG_ENC_REGISTER(const char *, OnigEncoding); # define OnigEncodingDefine(f,n) \ OnigEncodingDeclare(n); \ void Init_##f(void) { \ - ONIG_ENC_REGISTER(OnigEncodingName(n).name, \ - &OnigEncodingName(n)); \ + ONIG_ENC_REGISTER(OnigEncodingName(n).name, \ + &OnigEncodingName(n)); \ } \ OnigEncodingDeclare(n) #else diff --git a/scionigmo/OnigmoRegExEngine.cxx b/scionigmo/OnigmoRegExEngine.cxx index e8f22b9d3..ab24b8e0e 100644 --- a/scionigmo/OnigmoRegExEngine.cxx +++ b/scionigmo/OnigmoRegExEngine.cxx @@ -205,7 +205,7 @@ static void replaceAll(std::string& source, const std::string& from, const std:: * Has not been tested with backwards DBCS searches yet. */ long OnigmoRegExEngine::FindText(Document* doc, Sci::Position minPos, Sci::Position maxPos, const char *pattern, - bool caseSensitive, bool word, bool wordStart, int searchFlags, Sci::Position *length) + bool caseSensitive, bool word, bool wordStart, int searchFlags, Sci::Position *length) { if (!(pattern && (strlen(pattern) > 0))) { *length = 0; @@ -214,15 +214,18 @@ long OnigmoRegExEngine::FindText(Document* doc, Sci::Position minPos, Sci::Posit Sci::Position docLen = SciPos(doc->Length()); + const bool findForward = (minPos <= maxPos); + const int increment = findForward ? 1 : -1; + // Range endpoints should not be inside DBCS characters, but just in case, move them. - minPos = doc->MovePositionOutsideChar(minPos, 1, false); - maxPos = doc->MovePositionOutsideChar(maxPos, 1, false); - const bool findprevious = (minPos > maxPos); - Sci::Position rangeBeg = (findprevious) ? maxPos : minPos; - Sci::Position rangeEnd = (findprevious) ? minPos : maxPos; + minPos = doc->MovePositionOutsideChar(minPos, increment, false); + maxPos = doc->MovePositionOutsideChar(maxPos, increment, false); + + Sci::Position rangeBeg = (findForward) ? minPos : maxPos; + Sci::Position rangeEnd = (findForward) ? maxPos : minPos; Sci::Position rangeLen = (rangeEnd - rangeBeg); - + // ----------------------------- // --- Onigmo Engine Options --- // ----------------------------- @@ -241,7 +244,7 @@ long OnigmoRegExEngine::FindText(Document* doc, Sci::Position minPos, Sci::Posit else { ONIG_OPTION_OFF(onigmoOptions, ONIG_OPTION_DOTALL); } - + //ONIG_OPTION_ON(onigmoOptions, ONIG_OPTION_SINGLELINE); ONIG_OPTION_ON(onigmoOptions, ONIG_OPTION_NEGATE_SINGLELINE); @@ -257,8 +260,7 @@ long OnigmoRegExEngine::FindText(Document* doc, Sci::Position minPos, Sci::Posit bool bReCompile = (m_RegExpr == nullptr) || (m_CmplOptions != onigmoOptions) || (m_RegExprStrg.compare(sRegExprStrg) != 0); - if (bReCompile) - { + if (bReCompile) { m_RegExprStrg.clear(); m_RegExprStrg = sRegExprStrg; m_CmplOptions = onigmoOptions; @@ -289,12 +291,14 @@ long OnigmoRegExEngine::FindText(Document* doc, Sci::Position minPos, Sci::Posit UChar* docBegPtr = (UChar*)doc->RangePointer(0, docLen); UChar* docSEndPtr = (UChar*)doc->RangePointer(docLen, 0); UChar* rangeBegPtr = (UChar*)doc->RangePointer(rangeBeg, rangeLen); - UChar* rangeEndPtr = (UChar*)doc->RangePointer(rangeEnd, rangeLen); - + UChar* rangeEndPtr = (UChar*)doc->RangePointer(rangeEnd, 0); OnigPosition result = ONIG_MISMATCH; try { - result = onig_search(m_RegExpr, docBegPtr, docSEndPtr, rangeBegPtr, rangeEndPtr, &m_Region, onigmoOptions); + if (findForward) + result = onig_search(m_RegExpr, docBegPtr, docSEndPtr, rangeBegPtr, rangeEndPtr, &m_Region, onigmoOptions); + else // X // + result = onig_search(m_RegExpr, docBegPtr, docSEndPtr, rangeEndPtr, rangeBegPtr, &m_Region, onigmoOptions); } catch (...) { return Cast2long(-3); // -1 is normally used for not found, -3 is used here for exception @@ -305,26 +309,7 @@ long OnigmoRegExEngine::FindText(Document* doc, Sci::Position minPos, Sci::Posit return Cast2long(-3); } - if (findprevious) // search for last occurrence in range - { - //SPEEDUP: onig_scan() ??? - - while ((result >= 0) && (rangeBegPtr <= rangeEndPtr)) - { - m_MatchPos = SciPos(result); //SciPos(m_Region.beg[0]); - m_MatchLen = SciPos(m_Region.end[0] - result); - - rangeBegPtr = docBegPtr + (m_MatchPos + max(1,m_MatchLen)); - - try { - result = onig_search(m_RegExpr, docBegPtr, docSEndPtr, rangeBegPtr, rangeEndPtr, &m_Region, onigmoOptions); - } - catch (...) { - return Cast2long(-3); - } - } - } - else if ((result >= 0) && (rangeBegPtr <= rangeEndPtr)) + if ((result >= 0) && (rangeBegPtr <= rangeEndPtr)) { m_MatchPos = SciPos(result); //SciPos(m_Region.beg[0]); m_MatchLen = SciPos(m_Region.end[0] - result);