From ebcf9f0aa63abfe139dd30d500e36a9c4c41c1dc Mon Sep 17 00:00:00 2001 From: Rainer Kottenhoff Date: Fri, 17 Nov 2017 11:42:10 +0100 Subject: [PATCH] + optimized DeelX regex interface --- scintilla/deelx/DeelxRegexSearch.cxx | 144 +++++++++++++-------------- scintilla/deelx/deelx64.h | 17 ++-- 2 files changed, 80 insertions(+), 81 deletions(-) diff --git a/scintilla/deelx/DeelxRegexSearch.cxx b/scintilla/deelx/DeelxRegexSearch.cxx index 0757feb26..e702550c6 100644 --- a/scintilla/deelx/DeelxRegexSearch.cxx +++ b/scintilla/deelx/DeelxRegexSearch.cxx @@ -47,9 +47,17 @@ #include "deelx64.h" // DEELX - Regular Expression Engine (v1.3) // --------------------------------------------------------------- - using namespace Scintilla; +#define SciPos(pos) static_cast(pos) +#define SciLn(line) static_cast(line) +#define SciPosExt(pos) static_cast(pos) + +#define DeelXPos(pos) static_cast(pos) +#define Cast2int(n) static_cast(n) + +// --------------------------------------------------------------- + class DeelxRegexSearch : public RegexSearchBase { public: @@ -61,33 +69,25 @@ public: , m_Match() , m_MatchPos(-1) , m_MatchLength(0) - , m_pContext(nullptr) , m_SubstitutionBuffer(nullptr) {} virtual ~DeelxRegexSearch() { ReleaseSubstitutionBuffer(); - ReleaseContext(); +// ReleaseContext(SciPos(-1), SciPos(-1)); + m_RegExprStrg.clear(); } - virtual long FindText(Document* doc,int minPos,int maxPos,const char* pattern, - bool caseSensitive,bool word,bool wordStart,int flags,int* length) override; + virtual long FindText(Document* doc, Sci::Position minPos, Sci::Position maxPos, const char* pattern, + bool caseSensitive, bool word, bool wordStart, int flags, Sci::Position* length) override; - virtual const char* SubstituteByPosition(Document* doc,const char* text,int* length) override; + virtual const char* SubstituteByPosition(Document* doc, const char* text, Sci::Position* length) override; private: - inline void ReleaseContext() - { - if (m_pContext != nullptr) { - m_RegExpr.ReleaseContext(m_pContext); - m_pContext = nullptr; - } - } - - inline void ReleaseSubstitutionBuffer() + __inline void ReleaseSubstitutionBuffer() { if (m_SubstitutionBuffer) { m_RegExpr.ReleaseString(m_SubstitutionBuffer); @@ -95,14 +95,17 @@ private: } } + std::string& translateRegExpr(std::string& regExprStr, bool wholeWord, bool wordStart); + std::string& convertReplExpr(std::string& replStr); + private: + std::string m_RegExprStrg; int m_CompileFlags; deelx::CRegexpT m_RegExpr; deelx::MatchResult m_Match; - deelx::index_t m_MatchPos; - deelx::index_t m_MatchLength; - deelx::CContext* m_pContext; + Sci::Position m_MatchPos; + Sci::Position m_MatchLength; char* m_SubstitutionBuffer; }; // ============================================================================ @@ -113,15 +116,6 @@ RegexSearchBase *Scintilla::CreateRegexSearch(CharClassify *charClassTable) return new DeelxRegexSearch(charClassTable); } -// ============================================================================ - -/** - * forward declaration of utility functions - */ -std::string& translateRegExpr(std::string& regExprStr,bool wholeWord,bool wordStart); -std::string& convertReplExpr(std::string& replStr); - - // ============================================================================ @@ -130,8 +124,8 @@ std::string& convertReplExpr(std::string& replStr); * searches (just pass minPos > maxPos to do a backward search) * Has not been tested with backwards DBCS searches yet. */ -long DeelxRegexSearch::FindText(Document* doc,int minPos,int maxPos,const char *pattern, - bool caseSensitive,bool word,bool wordStart,int searchFlags,int *length) +long DeelxRegexSearch::FindText(Document* doc, Sci::Position minPos, Sci::Position maxPos, const char *pattern, + bool caseSensitive, bool word, bool wordStart, int searchFlags, Sci::Position *length) { const bool right2left = false; // always left-to-right match mode const bool extended = false; // ignore spaces and use '#' as line-comment) @@ -146,14 +140,14 @@ long DeelxRegexSearch::FindText(Document* doc,int minPos,int maxPos,const char * //compileFlags |= (deelx::SINGLELINE | deelx::MULTILINE | deelx::GLOBAL); // the .(dot) also matches line-breaks compileFlags |= (extended) ? deelx::EXTENDED : deelx::NO_FLAG; - compileFlags |= (!caseSensitive) ? deelx::IGNORECASE : deelx::NO_FLAG; + compileFlags |= (caseSensitive) ? deelx::NO_FLAG : deelx::IGNORECASE; compileFlags |= (right2left) ? deelx::RIGHTTOLEFT : deelx::NO_FLAG; std::string sRegExprStrg = translateRegExpr(std::string(pattern),word,wordStart); bool bReCompile = (m_CompileFlags != compileFlags) || (m_RegExprStrg.compare(sRegExprStrg) != 0); + if (bReCompile) { - m_RegExprStrg.clear(); m_RegExprStrg = sRegExprStrg; m_CompileFlags = compileFlags; try { @@ -161,29 +155,33 @@ long DeelxRegexSearch::FindText(Document* doc,int minPos,int maxPos,const char * } catch (...) { return -2; // -1 is normally used for not found, -2 is used here for invalid regex + // DeelX is very fault tolerant and assumes what the user may want ... :-/ + // so -2 may not occur! } } - int rangeBegin = (findprevious) ? maxPos : minPos; - int rangeEnd = (findprevious) ? minPos : maxPos; - int rangeLength = abs(maxPos - minPos); - + Sci::Position rangeBegin = (findprevious) ? maxPos : minPos; + Sci::Position rangeEnd = (findprevious) ? minPos : maxPos; - Sci_Position linesTotal = doc->LinesTotal(); - Sci_Position fileLastPos = doc->Length(); + Sci::Line linesTotal = doc->LinesTotal(); + Sci::Position fileLastPos = SciPos(doc->Length()); - Sci_Position lineOfBegPos = doc->LineFromPosition(static_cast(rangeBegin)); - Sci_Position lineOfEndPos = doc->LineFromPosition(static_cast(rangeEnd)); + Sci::Line lineOfBegPos = SciLn(doc->LineFromPosition(SciPosExt(rangeBegin))); + Sci::Line lineOfEndPos = SciLn(doc->LineFromPosition(SciPosExt(rangeEnd))); - Sci_Position lineStartOfBegPos = doc->LineStart(lineOfBegPos); - Sci_Position lineEndOfEndPos = doc->LineEnd(lineOfEndPos); + Sci::Position lineStartOfBegPos = SciPos(doc->LineStart(SciPosExt(lineOfBegPos))); + Sci::Position lineEndOfEndPos = SciPos(doc->LineEnd(SciPosExt(lineOfEndPos))); + + // --- adapt range start/end according to search pattern --- size_t begMetaPos = m_RegExprStrg.find_first_of('^'); bool bFoundBegMeta = (begMetaPos != std::string::npos) && ((begMetaPos == 0) || (m_RegExprStrg.find_first_of('\\') != (begMetaPos - 1))); if (bFoundBegMeta) { - if (lineStartOfBegPos != static_cast(rangeBegin)) { - rangeBegin = (lineOfBegPos < linesTotal) ? doc->LineStart(lineOfBegPos + 1) : doc->LineEnd(linesTotal); + if (lineStartOfBegPos != rangeBegin) { + rangeBegin = SciPos((lineOfBegPos < linesTotal) ? + doc->LineStart(SciPosExt(lineOfBegPos + 1)) : + doc->LineEnd(SciPosExt(linesTotal))); rangeEnd = (rangeBegin <= rangeEnd) ? rangeEnd : rangeBegin; } } @@ -192,65 +190,67 @@ long DeelxRegexSearch::FindText(Document* doc,int minPos,int maxPos,const char * bool bFoundEndMeta = (endMetaPos != std::string::npos) && ((endMetaPos == 0) || (m_RegExprStrg.find_last_of('\\') != (endMetaPos - 1))); if (bFoundEndMeta) { - if (lineEndOfEndPos != static_cast(rangeEnd)) { - rangeEnd = (0 < lineOfEndPos) ? doc->LineEnd(lineOfEndPos - 1) : 0; + if (lineEndOfEndPos != rangeEnd) { + rangeEnd = SciPos((0 < lineOfEndPos) ? doc->LineEnd(SciPosExt(lineOfEndPos - 1)) : 0); rangeBegin = (rangeBegin <= rangeEnd) ? rangeBegin : rangeEnd; } } - ReleaseContext(); - m_pContext = m_RegExpr.PrepareMatch(doc->RangePointer(rangeBegin,rangeLength)); +//@@@ Sci::Position rangeLength = rangeEnd - rangeBegin; - m_MatchPos = -1; // not found - m_MatchLength = 0; - m_Match = m_RegExpr.Match(m_pContext); + // --- start search --- + + m_MatchPos = SciPos(-1); // not found + m_MatchLength = SciPos(0); + const deelx::index_t searchStop = DeelXPos(rangeEnd); if (findprevious) // search previous { + deelx::CContext* pContext = m_RegExpr.PrepareMatch(doc->RangePointer(0, fileLastPos), 0); + m_Match = m_RegExpr.Match(pContext); // search for last occurrence in range - while (m_Match.IsMatched() && (m_Match.GetStart() < rangeLength)) + while (m_Match.IsMatched() && (m_Match.GetStart() < searchStop)) { - m_MatchPos = rangeBegin + m_Match.GetStart(); - m_MatchLength = (m_Match.GetEnd() - m_Match.GetStart()); - - m_Match = m_RegExpr.Match(m_pContext); //next + m_MatchPos = SciPos(m_Match.GetStart()); + m_MatchLength = SciPos(m_Match.GetEnd() - m_Match.GetStart()); + m_Match = m_RegExpr.Match(pContext); //next } + m_RegExpr.ReleaseContext(pContext); } - else - { - if (m_Match.IsMatched() && (m_Match.GetStart() < rangeLength)) - { - m_MatchPos = rangeBegin + m_Match.GetStart(); - m_MatchLength = (m_Match.GetEnd() - m_Match.GetStart()); + else { + m_Match = m_RegExpr.Match(doc->RangePointer(0, fileLastPos), + DeelXPos(fileLastPos), DeelXPos(rangeBegin)); + if (m_Match.IsMatched() && (m_Match.GetStart() < searchStop)) { + m_MatchPos = SciPos(m_Match.GetStart()); + m_MatchLength = SciPos(m_Match.GetEnd() - m_Match.GetStart()); } } //NOTE: potential 64-bit-size issue at interface here: - *length = static_cast(m_MatchLength); + *length = SciPos(m_MatchLength); return static_cast(m_MatchPos); } // ============================================================================ -const char* DeelxRegexSearch::SubstituteByPosition(Document* doc,const char* text,int* length) +const char* DeelxRegexSearch::SubstituteByPosition(Document* doc, const char* text, Sci::Position* length) { if (!m_Match.IsMatched() || (m_MatchPos < 0)) { - *length = 0; + *length = SciPos(0); return nullptr; } std::string sReplStrg = convertReplExpr(std::string(text,*length)); + deelx::index_t replLength = DeelXPos(sReplStrg.length()); - //NOTE: potential 64-bit-size issue at interface here: - const char* pString = doc->RangePointer(static_cast(m_MatchPos),static_cast(m_MatchLength)); + const char* pString = doc->RangePointer(m_MatchPos,m_MatchLength); - deelx::index_t resLength; ReleaseSubstitutionBuffer(); - m_SubstitutionBuffer = m_RegExpr.Replace(pString,m_MatchLength,sReplStrg.c_str(), - static_cast(sReplStrg.length()),resLength); + deelx::index_t resLength; + m_SubstitutionBuffer = m_RegExpr.Replace(pString,m_MatchLength,sReplStrg.c_str(),replLength,resLength); //NOTE: potential 64-bit-size issue at interface here: - *length = static_cast(resLength); + *length = SciPos(resLength); return m_SubstitutionBuffer; } @@ -286,7 +286,7 @@ void replaceAll(std::string& source,const std::string& from,const std::string& t -std::string& translateRegExpr(std::string& regExprStr,bool wholeWord,bool wordStart) +std::string& DeelxRegexSearch::translateRegExpr(std::string& regExprStr,bool wholeWord,bool wordStart) { std::string tmpStr; @@ -310,7 +310,7 @@ std::string& translateRegExpr(std::string& regExprStr,bool wholeWord,bool wordSt -std::string& convertReplExpr(std::string& replStr) +std::string& DeelxRegexSearch::convertReplExpr(std::string& replStr) { std::string tmpStr; for (size_t i = 0; i < replStr.length(); ++i) { diff --git a/scintilla/deelx/deelx64.h b/scintilla/deelx/deelx64.h index b27d19725..a0b4b6845 100644 --- a/scintilla/deelx/deelx64.h +++ b/scintilla/deelx/deelx64.h @@ -29,14 +29,13 @@ #include #include #include - -#include +#include namespace deelx { // integer type for pointer arithmetic & casts (64-bit aware) //typedef int index_t; // preserve original "deelx.h" v1.3 behavior - using index_t = INT_PTR; + using index_t = ptrdiff_t; //INT_PTR extern "C" { using POSIX_FUNC = int(*)(int); @@ -3612,9 +3611,9 @@ public: public: MatchResult MatchExact(const CHART * tstring, CContext * pContext = nullptr) const; - MatchResult MatchExact(const CHART * tstring, int length, CContext * pContext = nullptr) const; - MatchResult Match(const CHART * tstring, int start = -1, CContext * pContext = nullptr) const; - MatchResult Match(const CHART * tstring, int length, int start, CContext * pContext = nullptr) const; + MatchResult MatchExact(const CHART * tstring, index_t length, CContext * pContext = nullptr) const; + MatchResult Match(const CHART * tstring, index_t start = -1, CContext * pContext = nullptr) const; + MatchResult Match(const CHART * tstring, index_t length, index_t start, CContext * pContext = nullptr) const; MatchResult Match(CContext * pContext) const; CContext * PrepareMatch(const CHART * tstring, index_t start = -1, CContext * pContext = nullptr) const; CContext * PrepareMatch(const CHART * tstring, index_t length, index_t start, CContext * pContext = nullptr) const; @@ -3659,7 +3658,7 @@ template inline MatchResult CRegexpT ::MatchExact(const CH return MatchExact(tstring, CBufferRefT(tstring).GetSize(), pContext); } -template MatchResult CRegexpT ::MatchExact(const CHART * tstring, int length, CContext * pContext) const +template MatchResult CRegexpT ::MatchExact(const CHART * tstring, index_t length, CContext * pContext) const { if (m_builder.m_pTopElx == 0) return nullptr; @@ -3725,12 +3724,12 @@ template MatchResult CRegexpT ::MatchExact(const CHART * t } } -template MatchResult CRegexpT ::Match(const CHART * tstring, int start, CContext * pContext) const +template MatchResult CRegexpT ::Match(const CHART * tstring, index_t start, CContext * pContext) const { return Match(tstring, CBufferRefT(tstring).GetSize(), start, pContext); } -template MatchResult CRegexpT ::Match(const CHART * tstring, int length, int start, CContext * pContext) const +template MatchResult CRegexpT ::Match(const CHART * tstring, index_t length, index_t start, CContext * pContext) const { if (m_builder.m_pTopElx == 0) return nullptr;