+ optimized DeelX regex interface

This commit is contained in:
Rainer Kottenhoff 2017-11-17 11:42:10 +01:00
parent f711dc4a25
commit ebcf9f0aa6
2 changed files with 80 additions and 81 deletions

View File

@ -47,9 +47,17 @@
#include "deelx64.h" // DEELX - Regular Expression Engine (v1.3)
// ---------------------------------------------------------------
using namespace Scintilla;
#define SciPos(pos) static_cast<Sci::Position>(pos)
#define SciLn(line) static_cast<Sci::Line>(line)
#define SciPosExt(pos) static_cast<Sci_Position>(pos)
#define DeelXPos(pos) static_cast<deelx::index_t>(pos)
#define Cast2int(n) static_cast<int>(n)
// ---------------------------------------------------------------
class DeelxRegexSearch : public RegexSearchBase
{
public:
@ -61,33 +69,25 @@ public:
, m_Match()
, m_MatchPos(-1)
, m_MatchLength(0)
, m_pContext(nullptr)
, m_SubstitutionBuffer(nullptr)
{}
virtual ~DeelxRegexSearch()
{
ReleaseSubstitutionBuffer();
ReleaseContext();
// ReleaseContext(SciPos(-1), SciPos(-1));
m_RegExprStrg.clear();
}
virtual long FindText(Document* doc,int minPos,int maxPos,const char* pattern,
bool caseSensitive,bool word,bool wordStart,int flags,int* length) override;
virtual long FindText(Document* doc, Sci::Position minPos, Sci::Position maxPos, const char* pattern,
bool caseSensitive, bool word, bool wordStart, int flags, Sci::Position* length) override;
virtual const char* SubstituteByPosition(Document* doc,const char* text,int* length) override;
virtual const char* SubstituteByPosition(Document* doc, const char* text, Sci::Position* length) override;
private:
inline void ReleaseContext()
{
if (m_pContext != nullptr) {
m_RegExpr.ReleaseContext(m_pContext);
m_pContext = nullptr;
}
}
inline void ReleaseSubstitutionBuffer()
__inline void ReleaseSubstitutionBuffer()
{
if (m_SubstitutionBuffer) {
m_RegExpr.ReleaseString(m_SubstitutionBuffer);
@ -95,14 +95,17 @@ private:
}
}
std::string& translateRegExpr(std::string& regExprStr, bool wholeWord, bool wordStart);
std::string& convertReplExpr(std::string& replStr);
private:
std::string m_RegExprStrg;
int m_CompileFlags;
deelx::CRegexpT<char> m_RegExpr;
deelx::MatchResult m_Match;
deelx::index_t m_MatchPos;
deelx::index_t m_MatchLength;
deelx::CContext* m_pContext;
Sci::Position m_MatchPos;
Sci::Position m_MatchLength;
char* m_SubstitutionBuffer;
};
// ============================================================================
@ -113,15 +116,6 @@ RegexSearchBase *Scintilla::CreateRegexSearch(CharClassify *charClassTable)
return new DeelxRegexSearch(charClassTable);
}
// ============================================================================
/**
* forward declaration of utility functions
*/
std::string& translateRegExpr(std::string& regExprStr,bool wholeWord,bool wordStart);
std::string& convertReplExpr(std::string& replStr);
// ============================================================================
@ -130,8 +124,8 @@ std::string& convertReplExpr(std::string& replStr);
* searches (just pass minPos > maxPos to do a backward search)
* Has not been tested with backwards DBCS searches yet.
*/
long DeelxRegexSearch::FindText(Document* doc,int minPos,int maxPos,const char *pattern,
bool caseSensitive,bool word,bool wordStart,int searchFlags,int *length)
long DeelxRegexSearch::FindText(Document* doc, Sci::Position minPos, Sci::Position maxPos, const char *pattern,
bool caseSensitive, bool word, bool wordStart, int searchFlags, Sci::Position *length)
{
const bool right2left = false; // always left-to-right match mode
const bool extended = false; // ignore spaces and use '#' as line-comment)
@ -146,14 +140,14 @@ long DeelxRegexSearch::FindText(Document* doc,int minPos,int maxPos,const char *
//compileFlags |= (deelx::SINGLELINE | deelx::MULTILINE | deelx::GLOBAL); // the .(dot) also matches line-breaks
compileFlags |= (extended) ? deelx::EXTENDED : deelx::NO_FLAG;
compileFlags |= (!caseSensitive) ? deelx::IGNORECASE : deelx::NO_FLAG;
compileFlags |= (caseSensitive) ? deelx::NO_FLAG : deelx::IGNORECASE;
compileFlags |= (right2left) ? deelx::RIGHTTOLEFT : deelx::NO_FLAG;
std::string sRegExprStrg = translateRegExpr(std::string(pattern),word,wordStart);
bool bReCompile = (m_CompileFlags != compileFlags) || (m_RegExprStrg.compare(sRegExprStrg) != 0);
if (bReCompile) {
m_RegExprStrg.clear();
m_RegExprStrg = sRegExprStrg;
m_CompileFlags = compileFlags;
try {
@ -161,29 +155,33 @@ long DeelxRegexSearch::FindText(Document* doc,int minPos,int maxPos,const char *
}
catch (...) {
return -2; // -1 is normally used for not found, -2 is used here for invalid regex
// DeelX is very fault tolerant and assumes what the user may want ... :-/
// so -2 may not occur!
}
}
int rangeBegin = (findprevious) ? maxPos : minPos;
int rangeEnd = (findprevious) ? minPos : maxPos;
int rangeLength = abs(maxPos - minPos);
Sci::Position rangeBegin = (findprevious) ? maxPos : minPos;
Sci::Position rangeEnd = (findprevious) ? minPos : maxPos;
Sci_Position linesTotal = doc->LinesTotal();
Sci_Position fileLastPos = doc->Length();
Sci::Line linesTotal = doc->LinesTotal();
Sci::Position fileLastPos = SciPos(doc->Length());
Sci_Position lineOfBegPos = doc->LineFromPosition(static_cast<Sci_Position>(rangeBegin));
Sci_Position lineOfEndPos = doc->LineFromPosition(static_cast<Sci_Position>(rangeEnd));
Sci::Line lineOfBegPos = SciLn(doc->LineFromPosition(SciPosExt(rangeBegin)));
Sci::Line lineOfEndPos = SciLn(doc->LineFromPosition(SciPosExt(rangeEnd)));
Sci_Position lineStartOfBegPos = doc->LineStart(lineOfBegPos);
Sci_Position lineEndOfEndPos = doc->LineEnd(lineOfEndPos);
Sci::Position lineStartOfBegPos = SciPos(doc->LineStart(SciPosExt(lineOfBegPos)));
Sci::Position lineEndOfEndPos = SciPos(doc->LineEnd(SciPosExt(lineOfEndPos)));
// --- adapt range start/end according to search pattern ---
size_t begMetaPos = m_RegExprStrg.find_first_of('^');
bool bFoundBegMeta = (begMetaPos != std::string::npos) &&
((begMetaPos == 0) || (m_RegExprStrg.find_first_of('\\') != (begMetaPos - 1)));
if (bFoundBegMeta) {
if (lineStartOfBegPos != static_cast<Sci_Position>(rangeBegin)) {
rangeBegin = (lineOfBegPos < linesTotal) ? doc->LineStart(lineOfBegPos + 1) : doc->LineEnd(linesTotal);
if (lineStartOfBegPos != rangeBegin) {
rangeBegin = SciPos((lineOfBegPos < linesTotal) ?
doc->LineStart(SciPosExt(lineOfBegPos + 1)) :
doc->LineEnd(SciPosExt(linesTotal)));
rangeEnd = (rangeBegin <= rangeEnd) ? rangeEnd : rangeBegin;
}
}
@ -192,65 +190,67 @@ long DeelxRegexSearch::FindText(Document* doc,int minPos,int maxPos,const char *
bool bFoundEndMeta = (endMetaPos != std::string::npos) &&
((endMetaPos == 0) || (m_RegExprStrg.find_last_of('\\') != (endMetaPos - 1)));
if (bFoundEndMeta) {
if (lineEndOfEndPos != static_cast<Sci_Position>(rangeEnd)) {
rangeEnd = (0 < lineOfEndPos) ? doc->LineEnd(lineOfEndPos - 1) : 0;
if (lineEndOfEndPos != rangeEnd) {
rangeEnd = SciPos((0 < lineOfEndPos) ? doc->LineEnd(SciPosExt(lineOfEndPos - 1)) : 0);
rangeBegin = (rangeBegin <= rangeEnd) ? rangeBegin : rangeEnd;
}
}
ReleaseContext();
m_pContext = m_RegExpr.PrepareMatch(doc->RangePointer(rangeBegin,rangeLength));
//@@@ Sci::Position rangeLength = rangeEnd - rangeBegin;
m_MatchPos = -1; // not found
m_MatchLength = 0;
m_Match = m_RegExpr.Match(m_pContext);
// --- start search ---
m_MatchPos = SciPos(-1); // not found
m_MatchLength = SciPos(0);
const deelx::index_t searchStop = DeelXPos(rangeEnd);
if (findprevious) // search previous
{
deelx::CContext* pContext = m_RegExpr.PrepareMatch(doc->RangePointer(0, fileLastPos), 0);
m_Match = m_RegExpr.Match(pContext);
// search for last occurrence in range
while (m_Match.IsMatched() && (m_Match.GetStart() < rangeLength))
while (m_Match.IsMatched() && (m_Match.GetStart() < searchStop))
{
m_MatchPos = rangeBegin + m_Match.GetStart();
m_MatchLength = (m_Match.GetEnd() - m_Match.GetStart());
m_Match = m_RegExpr.Match(m_pContext); //next
m_MatchPos = SciPos(m_Match.GetStart());
m_MatchLength = SciPos(m_Match.GetEnd() - m_Match.GetStart());
m_Match = m_RegExpr.Match(pContext); //next
}
m_RegExpr.ReleaseContext(pContext);
}
else
{
if (m_Match.IsMatched() && (m_Match.GetStart() < rangeLength))
{
m_MatchPos = rangeBegin + m_Match.GetStart();
m_MatchLength = (m_Match.GetEnd() - m_Match.GetStart());
else {
m_Match = m_RegExpr.Match(doc->RangePointer(0, fileLastPos),
DeelXPos(fileLastPos), DeelXPos(rangeBegin));
if (m_Match.IsMatched() && (m_Match.GetStart() < searchStop)) {
m_MatchPos = SciPos(m_Match.GetStart());
m_MatchLength = SciPos(m_Match.GetEnd() - m_Match.GetStart());
}
}
//NOTE: potential 64-bit-size issue at interface here:
*length = static_cast<int>(m_MatchLength);
*length = SciPos(m_MatchLength);
return static_cast<long>(m_MatchPos);
}
// ============================================================================
const char* DeelxRegexSearch::SubstituteByPosition(Document* doc,const char* text,int* length)
const char* DeelxRegexSearch::SubstituteByPosition(Document* doc, const char* text, Sci::Position* length)
{
if (!m_Match.IsMatched() || (m_MatchPos < 0)) {
*length = 0;
*length = SciPos(0);
return nullptr;
}
std::string sReplStrg = convertReplExpr(std::string(text,*length));
deelx::index_t replLength = DeelXPos(sReplStrg.length());
//NOTE: potential 64-bit-size issue at interface here:
const char* pString = doc->RangePointer(static_cast<int>(m_MatchPos),static_cast<int>(m_MatchLength));
const char* pString = doc->RangePointer(m_MatchPos,m_MatchLength);
deelx::index_t resLength;
ReleaseSubstitutionBuffer();
m_SubstitutionBuffer = m_RegExpr.Replace(pString,m_MatchLength,sReplStrg.c_str(),
static_cast<deelx::index_t>(sReplStrg.length()),resLength);
deelx::index_t resLength;
m_SubstitutionBuffer = m_RegExpr.Replace(pString,m_MatchLength,sReplStrg.c_str(),replLength,resLength);
//NOTE: potential 64-bit-size issue at interface here:
*length = static_cast<int>(resLength);
*length = SciPos(resLength);
return m_SubstitutionBuffer;
}
@ -286,7 +286,7 @@ void replaceAll(std::string& source,const std::string& from,const std::string& t
std::string& translateRegExpr(std::string& regExprStr,bool wholeWord,bool wordStart)
std::string& DeelxRegexSearch::translateRegExpr(std::string& regExprStr,bool wholeWord,bool wordStart)
{
std::string tmpStr;
@ -310,7 +310,7 @@ std::string& translateRegExpr(std::string& regExprStr,bool wholeWord,bool wordSt
std::string& convertReplExpr(std::string& replStr)
std::string& DeelxRegexSearch::convertReplExpr(std::string& replStr)
{
std::string tmpStr;
for (size_t i = 0; i < replStr.length(); ++i) {

View File

@ -29,14 +29,13 @@
#include <limits.h>
#include <string.h>
#include <stdlib.h>
#include <basetsd.h>
#include <stddef.h>
namespace deelx
{
// integer type for pointer arithmetic & casts (64-bit aware)
//typedef int index_t; // preserve original "deelx.h" v1.3 behavior
using index_t = INT_PTR;
using index_t = ptrdiff_t; //INT_PTR
extern "C" {
using POSIX_FUNC = int(*)(int);
@ -3612,9 +3611,9 @@ public:
public:
MatchResult MatchExact(const CHART * tstring, CContext * pContext = nullptr) const;
MatchResult MatchExact(const CHART * tstring, int length, CContext * pContext = nullptr) const;
MatchResult Match(const CHART * tstring, int start = -1, CContext * pContext = nullptr) const;
MatchResult Match(const CHART * tstring, int length, int start, CContext * pContext = nullptr) const;
MatchResult MatchExact(const CHART * tstring, index_t length, CContext * pContext = nullptr) const;
MatchResult Match(const CHART * tstring, index_t start = -1, CContext * pContext = nullptr) const;
MatchResult Match(const CHART * tstring, index_t length, index_t start, CContext * pContext = nullptr) const;
MatchResult Match(CContext * pContext) const;
CContext * PrepareMatch(const CHART * tstring, index_t start = -1, CContext * pContext = nullptr) const;
CContext * PrepareMatch(const CHART * tstring, index_t length, index_t start, CContext * pContext = nullptr) const;
@ -3659,7 +3658,7 @@ template <class CHART> inline MatchResult CRegexpT <CHART> ::MatchExact(const CH
return MatchExact(tstring, CBufferRefT<CHART>(tstring).GetSize(), pContext);
}
template <class CHART> MatchResult CRegexpT <CHART> ::MatchExact(const CHART * tstring, int length, CContext * pContext) const
template <class CHART> MatchResult CRegexpT <CHART> ::MatchExact(const CHART * tstring, index_t length, CContext * pContext) const
{
if (m_builder.m_pTopElx == 0)
return nullptr;
@ -3725,12 +3724,12 @@ template <class CHART> MatchResult CRegexpT <CHART> ::MatchExact(const CHART * t
}
}
template <class CHART> MatchResult CRegexpT <CHART> ::Match(const CHART * tstring, int start, CContext * pContext) const
template <class CHART> MatchResult CRegexpT <CHART> ::Match(const CHART * tstring, index_t start, CContext * pContext) const
{
return Match(tstring, CBufferRefT<CHART>(tstring).GetSize(), start, pContext);
}
template <class CHART> MatchResult CRegexpT <CHART> ::Match(const CHART * tstring, int length, int start, CContext * pContext) const
template <class CHART> MatchResult CRegexpT <CHART> ::Match(const CHART * tstring, index_t length, index_t start, CContext * pContext) const
{
if (m_builder.m_pTopElx == 0)
return nullptr;