diff --git a/scintilla/Scintilla.vcxproj b/scintilla/Scintilla.vcxproj index 4e7822ee6..cd26e7b95 100644 --- a/scintilla/Scintilla.vcxproj +++ b/scintilla/Scintilla.vcxproj @@ -121,7 +121,7 @@ true Disabled NotUsing - _SCL_SECURE_NO_WARNINGS;WIN32;_DEBUG;_WINDOWS;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES;STATIC_BUILD;SCI_LEXER;USE_D2D;%(PreprocessorDefinitions) + _SCL_SECURE_NO_WARNINGS;WIN32;SCI_OWNREGEX;_DEBUG;_WINDOWS;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES;STATIC_BUILD;SCI_LEXER;USE_D2D;%(PreprocessorDefinitions) MultiThreadedDebug Level3 @@ -134,7 +134,7 @@ true Disabled NotUsing - _SCL_SECURE_NO_WARNINGS;_WIN64;_DEBUG;_WINDOWS;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES;STATIC_BUILD;SCI_LEXER;USE_D2D;%(PreprocessorDefinitions) + _SCL_SECURE_NO_WARNINGS;_WIN64;SCI_OWNREGEX;_DEBUG;_WINDOWS;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES;STATIC_BUILD;SCI_LEXER;USE_D2D;%(PreprocessorDefinitions) MultiThreadedDebug Level3 @@ -150,7 +150,7 @@ true MaxSpeed NotUsing - _SCL_SECURE_NO_WARNINGS;WIN32;NDEBUG;_WINDOWS;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES;STATIC_BUILD;SCI_LEXER;USE_D2D;%(PreprocessorDefinitions) + _SCL_SECURE_NO_WARNINGS;WIN32;SCI_OWNREGEX;NDEBUG;_WINDOWS;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES;STATIC_BUILD;SCI_LEXER;USE_D2D;%(PreprocessorDefinitions) MultiThreaded Level3 @@ -162,7 +162,7 @@ true MaxSpeed NotUsing - _SCL_SECURE_NO_WARNINGS;_WIN64;NDEBUG;_WINDOWS;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES;STATIC_BUILD;SCI_LEXER;USE_D2D;%(PreprocessorDefinitions) + _SCL_SECURE_NO_WARNINGS;_WIN64;SCI_OWNREGEX;NDEBUG;_WINDOWS;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES;STATIC_BUILD;SCI_LEXER;USE_D2D;%(PreprocessorDefinitions) MultiThreaded Level3 @@ -171,6 +171,7 @@ + @@ -249,6 +250,7 @@ + @@ -304,6 +306,9 @@ + + + diff --git a/scintilla/Scintilla.vcxproj.filters b/scintilla/Scintilla.vcxproj.filters index e49186e26..a944b9d5d 100644 --- a/scintilla/Scintilla.vcxproj.filters +++ b/scintilla/Scintilla.vcxproj.filters @@ -16,6 +16,12 @@ {afe7e35e-cd81-406c-a770-df29d2b3fc95} + + {67242aad-9133-44e7-9774-c36f5a9194bc} + + + {4e167b73-0447-4a31-a66b-64c2d684516d} + @@ -237,6 +243,18 @@ win32 + + deelx + + + lexers + + + lexers + + + lexers + @@ -398,6 +416,16 @@ win32 - + + deelx + + + include + + + + + deelx\doc + \ No newline at end of file diff --git a/scintilla/deelx/DeelxRegexSearch.cxx b/scintilla/deelx/DeelxRegexSearch.cxx new file mode 100644 index 000000000..d7a3875fb --- /dev/null +++ b/scintilla/deelx/DeelxRegexSearch.cxx @@ -0,0 +1,330 @@ +/** + * @file DeelxRegexSearch.cxx + * @brief integrate DeelX regex searching for Scintilla library + * (Scintilla Lib is copyright 1998-2016 by Neil Hodgson ) + * + * uses DEELX - Regular Expression Engine (v1.3) (deelx.h) - http://www.regexlab.com/deelx/ + * download: http://www.regexlab.com/download/deelx/deelx.zip (v1.2) + * or : https://github.com/AndreasMartin72/mksqlite/blob/master/deelx/deelx.h (v1.3) + * (Copyright Announcement: Free to use/redistribute. Provenance must be declared when redistributed) + * API documentation see accompanying "deelx_en.chm" HTML Help. + * + * @autor Rainer Kottenhoff (RaPeHoff) + * + * Install: + * - place files (deelx64.h, DeelxRegexSearch.cxx, deelx_en.chm) + * in a directory (deelx) within the scintilla project (.../scintilla/deelx/) + * - add source files to scintilla project (Scintilla.vcxproj in VS) + * - define compiler (preprocessor) macro for scintilla project named "SCI_OWNREGEX" + * -> this will switch from scintilla's buildin regex engine to deelx's regex engine + * - recompile and link scintilla library + * - build application + */ + +#ifdef SCI_OWNREGEX + +#include +#include +#include + +#pragma warning( push ) +#pragma warning( disable : 4996 ) // Scintilla's "unsafe" use of std::copy() (SplitVector.h) +// // or use -D_SCL_SECURE_NO_WARNINGS preprocessor define + +#include "Platform.h" +#include "Scintilla.h" +#include "ILexer.h" +#include "SplitVector.h" +#include "Partitioning.h" +#include "CellBuffer.h" +#include "CaseFolder.h" +#include "RunStyles.h" +#include "Decoration.h" +#include "CharClassify.h" +#include "Document.h" +// --------------------------------------------------------------- +#include "deelx64.h" // DEELX - Regular Expression Engine (v1.3) +// --------------------------------------------------------------- + +#ifdef SCI_NAMESPACE +using namespace Scintilla; +#endif + +class DeelxRegexSearch : public RegexSearchBase +{ +public: + + explicit DeelxRegexSearch(CharClassify* charClassTable) + : m_RegExpr() + , m_Match() + , m_MatchPos(-1) + , m_MatchLength(0) + , m_pContext(nullptr) + , m_SubstitutionBuffer(nullptr) + {} + + virtual ~DeelxRegexSearch() + { + ReleaseSubstitutionBuffer(); + ReleaseContext(); + } + + virtual long FindText(Document* doc, int minPos, int maxPos, const char* pattern, + bool caseSensitive, bool word, bool wordStart, int flags, int* length) override; + + virtual const char* SubstituteByPosition(Document* doc, const char* text, int* length) override; + + +private: + + inline void ReleaseContext() + { + if (m_pContext != nullptr) { + m_RegExpr.ReleaseContext(m_pContext); + m_pContext = nullptr; + } + } + + inline void ReleaseSubstitutionBuffer() + { + if (m_SubstitutionBuffer) { + m_RegExpr.ReleaseString(m_SubstitutionBuffer); + m_SubstitutionBuffer = nullptr; + } + } + +private: + deelx::CRegexpT m_RegExpr; + deelx::MatchResult m_Match; + deelx::index_t m_MatchPos; + deelx::index_t m_MatchLength; + deelx::CContext* m_pContext; + char* m_SubstitutionBuffer; +}; +// ============================================================================ + + +#ifdef SCI_NAMESPACE +RegexSearchBase *Scintilla::CreateRegexSearch(CharClassify *charClassTable) +{ + return new DeelxRegexSearch(charClassTable); +} +#else +RegexSearchBase *CreateRegexSearch(CharClassify *charClassTable) +{ + return new DeelxRegexSearch(charClassTable); +} +#endif + +// ============================================================================ + +/** + * forward declaration of utility functions + */ +std::string& translateRegExpr(std::string& regExprStr, bool wholeWord, bool wordStart); +std::string& convertReplExpr(std::string& replStr); + + +// ============================================================================ + + +/** + * Find text in document, supporting both forward and backward + * searches (just pass minPos > maxPos to do a backward search) + * Has not been tested with backwards DBCS searches yet. + */ +long DeelxRegexSearch::FindText(Document* doc, int minPos, int maxPos, const char *pattern, + bool caseSensitive, bool word, bool wordStart, int searchFlags, int *length) +{ + int startPos, endPos; + bool left2right; + + if (minPos <= maxPos) { + left2right = true; + startPos = minPos; + endPos = maxPos; + } + else { // backward search + left2right = false; + startPos = maxPos; + endPos = minPos; + } + + // Range endpoints should not be inside DBCS characters, but just in case, move them. + startPos = doc->MovePositionOutsideChar(startPos, 1, false); + endPos = doc->MovePositionOutsideChar(endPos, 1, false); + + int compileFlags(deelx::MULTILINE | deelx::GLOBAL | deelx::EXTENDED); // the .(dot) does not match line-breaks + //int compileFlags(deelx::SINGLELINE | deelx::MULTILINE | deelx::GLOBAL | deelx::EXTENDED); // the .(dot) also matches line-breaks + compileFlags |= (caseSensitive) ? deelx::NO_FLAG : deelx::IGNORECASE; + compileFlags |= (left2right) ? deelx::NO_FLAG : deelx::RIGHTTOLEFT; + + std::string sRegExprStrg = translateRegExpr(std::string(pattern, *length), word, wordStart); + + try { + m_RegExpr.Compile(sRegExprStrg.c_str(), compileFlags); + } + catch (...) { + return -2; // -1 is normally used for not found, -2 is used here for invalid regex + } + + int rangeLen = endPos - startPos; + int searchStartPos = left2right ? 0 : rangeLen; + ReleaseContext(); + m_pContext = m_RegExpr.PrepareMatch(doc->RangePointer(startPos, rangeLen), searchStartPos); + + m_Match = m_RegExpr.Match(m_pContext); + + m_MatchPos = -1; // not found + m_MatchLength = 0; + if (m_Match.IsMatched()) { + m_MatchPos = startPos + m_Match.GetStart(); + m_MatchLength = (m_Match.GetEnd() - m_Match.GetStart()); + } + + //NOTE: potential 64-bit-size issue at interface here: + *length = static_cast(m_MatchLength); + return static_cast(m_MatchPos); +} +// ============================================================================ + + +const char* DeelxRegexSearch::SubstituteByPosition(Document* doc, const char* text, int* length) +{ + if (!m_Match.IsMatched() || (m_MatchPos < 0)) { + *length = 0; + return nullptr; + } + std::string sReplStrg = convertReplExpr(std::string(text, *length)); + + //NOTE: potential 64-bit-size issue at interface here: + const char* pString = doc->RangePointer(static_cast(m_MatchPos), static_cast(m_MatchLength)); + + deelx::index_t resLength; + ReleaseSubstitutionBuffer(); + m_SubstitutionBuffer = m_RegExpr.Replace(pString, m_MatchLength, sReplStrg.c_str(), + static_cast(sReplStrg.length()), resLength); + + //NOTE: potential 64-bit-size issue at interface here: + *length = static_cast(resLength); + + return m_SubstitutionBuffer; +} +// ============================================================================ + + + + +// ============================================================================ +// Some Helpers +// ============================================================================ + + +void replaceAll(std::string& source, const std::string& from, const std::string& to) +{ + std::string newString; + newString.reserve(source.length() * 2); // avoids a few memory allocations + + std::string::size_type lastPos = 0; + std::string::size_type findPos; + + while (std::string::npos != (findPos = source.find(from, lastPos))) { + newString.append(source, lastPos, findPos - lastPos); + newString += to; + lastPos = findPos + from.length(); + } + // Care for the rest after last occurrence + newString += source.substr(lastPos); + + source.swap(newString); +} +// ---------------------------------------------------------------------------- + + + +std::string& translateRegExpr(std::string& regExprStr, bool wholeWord, bool wordStart) +{ + std::string tmpStr; + + if (wholeWord || wordStart) { // push '\b' at the begin of regexpr + tmpStr.push_back('\\'); + tmpStr.push_back('b'); + tmpStr.append(regExprStr); + if (wholeWord) { // push '\b' at the end of regexpr + tmpStr.push_back('\\'); + tmpStr.push_back('b'); + } + replaceAll(tmpStr, ".", "\\w"); + } + else { + tmpStr.append(regExprStr); + } + std::swap(regExprStr, tmpStr); + return regExprStr; +} +// ---------------------------------------------------------------------------- + + + +std::string& convertReplExpr(std::string& replStr) +{ + std::string tmpStr; + for (size_t i = 0; i < replStr.length(); ++i) { + char ch = replStr[i]; + if (ch == '\\') { + ch = replStr[++i]; // next char + if (ch == '\\') { + // skip 2nd backslash ("\\") + if (i < replStr.length()) { ch = replStr[++i]; } + else { break; } + } + if (ch >= '1' && ch <= '9') { + // former behavior convenience: + // change "\\" to deelx's group reference ($) + tmpStr.push_back('$'); + } + switch (ch) { + // check for escape seq: + case 'a': + tmpStr.push_back('\a'); + break; + case 'b': + tmpStr.push_back('\b'); + break; + case 'f': + tmpStr.push_back('\f'); + break; + case 'n': + tmpStr.push_back('\n'); + break; + case 'r': + tmpStr.push_back('\r'); + break; + case 't': + tmpStr.push_back('\t'); + break; + case 'v': + tmpStr.push_back('\v'); + break; + case '\\': + tmpStr.push_back('\\'); + break; + default: + // unknown ctrl seq + tmpStr.push_back(ch); + break; + } + } + else { + tmpStr.push_back(ch); + } + } //for + + std::swap(replStr, tmpStr); + return replStr; +} +// ============================================================================ + +#pragma warning( pop ) + +#endif //SCI_OWNREGEX diff --git a/scintilla/deelx/deelx64.h b/scintilla/deelx/deelx64.h new file mode 100644 index 000000000..a63a33648 --- /dev/null +++ b/scintilla/deelx/deelx64.h @@ -0,0 +1,4830 @@ +// deelx64.h +// +// DEELX Regular Expression Engine (v1.3) +// +// Copyright 2006 ~ 2013 (c) RegExLab.com +// All Rights Reserved. +// +// http://www.regexlab.com/deelx/ +// +// Author: Ê·ÊÙΰ (sswater shi) +// sswater@gmail.com +// +// $Revision $ +// +// + adaption for 64-bit usage: "basetsd : INT_PTR" replaces int-pointer arithmetic and buffer indexes +// + Cppcheck cleanup + +#ifndef __DEELX_REGEXP64__H__ +#define __DEELX_REGEXP64__H__ + +#include +#include +#include +#include +#include + +#include + +namespace deelx +{ + // integer type for pointer arithmetic & casts (64-bit aware) + //typedef int index_t; // preserve original "deelx.h" v1.3 behavior + typedef INT_PTR index_t; + +extern "C" { + typedef int(*POSIX_FUNC)(int); + int isblank(int c); +} + +// +// Data Reference +// +template class CBufferRefT +{ +public: + explicit CBufferRefT(const ELT * pcsz, index_t length); + explicit CBufferRefT(const ELT * pcsz); + +public: + int nCompare(const ELT * pcsz) const; + int nCompareNoCase(const ELT * pcsz) const; + int Compare(const ELT * pcsz) const; + int CompareNoCase(const ELT * pcsz) const; + int Compare(const CBufferRefT &) const; + int CompareNoCase(const CBufferRefT &) const; + + ELT At(index_t nIndex, ELT def = 0) const; + ELT operator [] (index_t nIndex) const; + + const ELT * GetBuffer() const; + index_t GetSize() const; + +public: + virtual ~CBufferRefT(); + + // Content +protected: + ELT * m_pBuffer; + index_t m_nSize; +}; + +// +// Implemenation +// +template CBufferRefT ::CBufferRefT(const ELT * pcsz, index_t length) + : m_pBuffer((ELT *)pcsz) + , m_nSize(length) +{ +} + +template CBufferRefT ::CBufferRefT(const ELT * pcsz) + : m_pBuffer((ELT *)pcsz) + , m_nSize(0) +{ + if (pcsz != 0) while (m_pBuffer[m_nSize] != 0) m_nSize++; +} + +template int CBufferRefT ::nCompare(const ELT * pcsz) const +{ + for (index_t i = 0; i < m_nSize; i++) + { + if (m_pBuffer[i] != pcsz[i]) + return m_pBuffer[i] - pcsz[i]; + } + return 0; +} + +template int CBufferRefT ::nCompareNoCase(const ELT * pcsz) const +{ + for (index_t i = 0; i < m_nSize; i++) + { + if (m_pBuffer[i] != pcsz[i]) + { + if (toupper((int)m_pBuffer[i]) != toupper((int)pcsz[i])) + return m_pBuffer[i] - pcsz[i]; + } + } + + return 0; +} + +template inline int CBufferRefT ::Compare(const ELT * pcsz) const +{ + return nCompare(pcsz) ? 1 : (int)pcsz[m_nSize]; +} + +template inline int CBufferRefT ::CompareNoCase(const ELT * pcsz) const +{ + return nCompareNoCase(pcsz) ? 1 : (int)pcsz[m_nSize]; +} + +template inline int CBufferRefT ::Compare(const CBufferRefT & cref) const +{ + return m_nSize == cref.m_nSize ? nCompare(cref.GetBuffer()) : 1; +} + +template inline int CBufferRefT ::CompareNoCase(const CBufferRefT & cref) const +{ + return m_nSize == cref.m_nSize ? nCompareNoCase(cref.GetBuffer()) : 1; +} + +template inline ELT CBufferRefT ::At(index_t nIndex, ELT def) const +{ + return nIndex >= m_nSize ? def : m_pBuffer[nIndex]; +} + +template inline ELT CBufferRefT :: operator [] (index_t nIndex) const +{ + return nIndex >= m_nSize ? 0 : m_pBuffer[nIndex]; +} + +template const ELT * CBufferRefT ::GetBuffer() const +{ + static const ELT _def[] = { 0 }; return m_pBuffer ? m_pBuffer : _def; +} + +template inline index_t CBufferRefT ::GetSize() const +{ + return m_nSize; +} + +template CBufferRefT :: ~CBufferRefT() +{} + +// +// Data Buffer +// +template class CBufferT : public CBufferRefT +{ +public: + explicit CBufferT(const ELT * pcsz, index_t length); + explicit CBufferT(const ELT * pcsz); + CBufferT(); + +public: + ELT & operator [] (index_t nIndex); + const ELT & operator [] (index_t nIndex) const; + void Append(const ELT * pcsz, index_t length, index_t eol = 0); + void Append(ELT el, index_t eol = 0); + +public: + void Push(ELT el); + void Push(const CBufferRefT & buf); + int Pop(ELT & el); + int Pop(CBufferT & buf); + int Peek(ELT & el) const; + +public: + const ELT * GetBuffer() const; + ELT * GetBuffer(); + ELT * Detach(); + void Release(); + void Prepare(index_t index, int fill = 0); + void Restore(index_t size); + + ELT * PrepareInsert(index_t nPos, index_t nSize) + { + index_t nOldSize = CBufferRefT::m_nSize; + Restore(nPos > CBufferRefT::m_nSize ? nPos : CBufferRefT::m_nSize + nSize); + + if (nPos < nOldSize) + { + ELT * from = CBufferRefT::m_pBuffer + nPos, *to = CBufferRefT::m_pBuffer + nPos + nSize; + memmove(to, from, sizeof(ELT) * (nOldSize - nPos)); + } + + return CBufferRefT::m_pBuffer + nPos; + } + + void Insert(index_t nIndex, const ELT & rT) + { + Insert(nIndex, &rT, 1); + } + + void Insert(index_t nIndex, const ELT * pT, index_t nSize) + { + memcpy(PrepareInsert(nIndex, nSize), pT, sizeof(ELT) * nSize); + } + + void Remove(index_t nIndex) + { + Remove(nIndex, 1); + } + + void Remove(index_t nIndex, index_t nSize) + { + if (nIndex < CBufferRefT ::m_nSize) + { + if (nIndex + nSize >= CBufferRefT ::m_nSize) + { + Restore(nIndex); + } + else + { + memmove(CBufferRefT ::m_pBuffer + nIndex, CBufferRefT ::m_pBuffer + nIndex + nSize, sizeof(ELT) * (CBufferRefT ::m_nSize - nIndex - nSize)); + Restore(CBufferRefT ::m_nSize - nSize); + } + } + } + + void SetMaxLength(index_t nSize) + { + if (nSize > m_nMaxLength) + { + if (m_nMaxLength < 8) + m_nMaxLength = 8; + + if (nSize > m_nMaxLength) + m_nMaxLength *= 2; + + if (nSize > m_nMaxLength) + { + m_nMaxLength = nSize + 11; + m_nMaxLength -= m_nMaxLength & 0x07; + } + + CBufferRefT ::m_pBuffer = (ELT *)realloc(CBufferRefT ::m_pBuffer, sizeof(ELT) * m_nMaxLength); + } + } + +public: + virtual ~CBufferT(); + + // Content +protected: + index_t m_nMaxLength; +}; + +// +// Implemenation +// +template CBufferT ::CBufferT(const ELT * pcsz, index_t length) : CBufferRefT (0, length) +{ + m_nMaxLength = CBufferRefT ::m_nSize + 1; + + CBufferRefT ::m_pBuffer = (ELT *)malloc(sizeof(ELT) * m_nMaxLength); + memcpy(CBufferRefT::m_pBuffer, pcsz, sizeof(ELT) * CBufferRefT ::m_nSize); + CBufferRefT::m_pBuffer[CBufferRefT ::m_nSize] = 0; +} + +template CBufferT ::CBufferT(const ELT * pcsz) : CBufferRefT (pcsz) +{ + m_nMaxLength = CBufferRefT ::m_nSize + 1; + + CBufferRefT ::m_pBuffer = (ELT *)malloc(sizeof(ELT) * m_nMaxLength); + memcpy(CBufferRefT::m_pBuffer, pcsz, sizeof(ELT) * CBufferRefT ::m_nSize); + CBufferRefT::m_pBuffer[CBufferRefT ::m_nSize] = 0; +} + +template CBufferT ::CBufferT() : CBufferRefT (0, 0) +{ + m_nMaxLength = 0; + CBufferRefT::m_pBuffer = 0; +} + +template inline ELT & CBufferT :: operator [] (index_t nIndex) +{ + return CBufferRefT::m_pBuffer[nIndex]; +} + +template inline const ELT & CBufferT :: operator [] (index_t nIndex) const +{ + return CBufferRefT::m_pBuffer[nIndex]; +} + +template void CBufferT ::Append(const ELT * pcsz, index_t length, index_t eol) +{ + index_t nNewLength = m_nMaxLength; + + // Check length + if (nNewLength < 8) + nNewLength = 8; + + if (CBufferRefT ::m_nSize + length + eol > nNewLength) + nNewLength *= 2; + + if (CBufferRefT ::m_nSize + length + eol > nNewLength) + { + nNewLength = CBufferRefT ::m_nSize + length + eol + 11; + nNewLength -= nNewLength % 8; + } + + // Realloc + if (nNewLength > m_nMaxLength) + { + CBufferRefT ::m_pBuffer = (ELT *)realloc(CBufferRefT::m_pBuffer, sizeof(ELT) * nNewLength); + m_nMaxLength = nNewLength; + } + + // Append + memcpy(CBufferRefT::m_pBuffer + CBufferRefT ::m_nSize, pcsz, sizeof(ELT) * length); + CBufferRefT ::m_nSize += length; + + if (eol > 0) CBufferRefT::m_pBuffer[CBufferRefT ::m_nSize] = 0; +} + +template inline void CBufferT ::Append(ELT el, index_t eol) +{ + Append(&el, 1, eol); +} + +template void CBufferT ::Push(ELT el) +{ + // Realloc + if (CBufferRefT ::m_nSize >= m_nMaxLength) + { + index_t nNewLength = m_nMaxLength * 2; + if (nNewLength < 8) nNewLength = 8; + + CBufferRefT ::m_pBuffer = (ELT *)realloc(CBufferRefT::m_pBuffer, sizeof(ELT) * nNewLength); + m_nMaxLength = nNewLength; + } + + // Append + CBufferRefT::m_pBuffer[CBufferRefT ::m_nSize++] = el; +} + +template void CBufferT ::Push(const CBufferRefT & buf) +{ + for (index_t i = 0; i < buf.GetSize(); i++) + { + Push(buf[i]); + } + + Push((ELT)buf.GetSize()); +} + +template inline int CBufferT ::Pop(ELT & el) +{ + if (CBufferRefT ::m_nSize > 0) + { + el = CBufferRefT::m_pBuffer[--CBufferRefT ::m_nSize]; + return 1; + } + else + { + return 0; + } +} + +template int CBufferT ::Pop(CBufferT & buf) +{ + index_t size; + int res = 1; + res = res && Pop(*(ELT*)&size); + buf.Restore(size); + + for (index_t i = size - 1; i >= 0; i--) + { + res = res && Pop(buf[i]); + } + + return res; +} + +template inline int CBufferT ::Peek(ELT & el) const +{ + if (CBufferRefT ::m_nSize > 0) + { + el = CBufferRefT::m_pBuffer[CBufferRefT ::m_nSize - 1]; + return 1; + } + else + { + return 0; + } +} + +template const ELT * CBufferT ::GetBuffer() const +{ + static const ELT _def[] = { 0 }; return CBufferRefT::m_pBuffer ? CBufferRefT::m_pBuffer : _def; +} + +template ELT * CBufferT ::GetBuffer() +{ + static const ELT _def[] = { 0 }; return CBufferRefT::m_pBuffer ? CBufferRefT::m_pBuffer : (ELT *)_def; +} + +template ELT * CBufferT ::Detach() +{ + ELT * pBuffer = CBufferRefT::m_pBuffer; + + CBufferRefT ::m_pBuffer = 0; + CBufferRefT ::m_nSize = m_nMaxLength = 0; + + return pBuffer; +} + +template void CBufferT ::Release() +{ + ELT * pBuffer = Detach(); + + if (pBuffer != 0) free(pBuffer); +} + +template void CBufferT ::Prepare(index_t index, int fill) +{ + index_t nNewSize = index + 1; + + // Realloc + if (nNewSize > m_nMaxLength) + { + index_t nNewLength = m_nMaxLength; + + if (nNewLength < 8) + nNewLength = 8; + + if (nNewSize > nNewLength) + nNewLength *= 2; + + if (nNewSize > nNewLength) + { + nNewLength = nNewSize + 11; + nNewLength -= nNewLength % 8; + } + + CBufferRefT ::m_pBuffer = (ELT *)realloc(CBufferRefT::m_pBuffer, sizeof(ELT) * nNewLength); + m_nMaxLength = nNewLength; + } + + // size + if (CBufferRefT ::m_nSize < nNewSize) + { + memset(CBufferRefT::m_pBuffer + CBufferRefT ::m_nSize, fill, sizeof(ELT) * (nNewSize - CBufferRefT ::m_nSize)); + CBufferRefT ::m_nSize = nNewSize; + } +} + +template inline void CBufferT ::Restore(index_t size) +{ + SetMaxLength(size); + CBufferRefT ::m_nSize = size; +} + +template CBufferT :: ~CBufferT() +{ + if (CBufferRefT::m_pBuffer != 0) free(CBufferRefT::m_pBuffer); +} + +template class CSortedBufferT : public CBufferT +{ +public: + explicit CSortedBufferT(int reverse = 0); + explicit CSortedBufferT(int(*)(const void *, const void *)); + +public: + void Add(const T & rT); + void Add(const T * pT, index_t nSize); + int Remove(const T & rT); + static void RemoveAll(); + + void SortFreeze() { m_bSortFreezed = 1; } + void SortUnFreeze(); + +public: + int Find(const T & rT, int(*compare)(const void *, const void *) = 0) { return FindAs(*(T*)&rT, compare); } + int FindAs(const T & rT, int(*)(const void *, const void *) = 0); + index_t GetSize() const { return CBufferRefT::m_nSize; } + T & operator [] (index_t nIndex) { return CBufferT :: operator [] (nIndex); } + +protected: + int(*m_fncompare)(const void *, const void *); + static int compareT(const void *, const void *); + static int compareReverseT(const void *, const void *); + + int m_bSortFreezed; +}; + +template CSortedBufferT ::CSortedBufferT(int reverse) +{ + m_fncompare = reverse ? compareReverseT : compareT; + m_bSortFreezed = 0; +} + +template CSortedBufferT ::CSortedBufferT(int(*compare)(const void *, const void *)) +{ + m_fncompare = compare; + m_bSortFreezed = 0; +} + +template void CSortedBufferT ::Add(const T & rT) +{ + if (m_bSortFreezed != 0) + { + CBufferT ::Append(rT); + return; + } + + index_t a = 0, b = CBufferRefT::m_nSize - 1, c = CBufferRefT::m_nSize / 2; + + while (a <= b) + { + int r = m_fncompare(&rT, &CBufferRefT::m_pBuffer[c]); + + if (r < 0) b = c - 1; + else if (r > 0) a = c + 1; + else break; + + c = (a + b + 1) / 2; + } + + CBufferT ::Insert(c, rT); +} + +template void CSortedBufferT ::Add(const T * pT, index_t nSize) +{ + CBufferT ::Append(pT, nSize); + + if (m_bSortFreezed == 0) + { + qsort(CBufferRefT::m_pBuffer, CBufferRefT::m_nSize, sizeof(T), m_fncompare); + } +} + +template int CSortedBufferT ::FindAs(const T & rT, int(*compare)(const void *, const void *)) +{ + const T * pT = (const T *)bsearch(&rT, CBufferRefT::m_pBuffer, CBufferRefT::m_nSize, sizeof(T), compare == 0 ? m_fncompare : compare); + + if (pT != NULL) + return static_cast(pT - CBufferRefT::m_pBuffer); //TODO: x64bit > 4GB ? + else + return -1; +} + +template int CSortedBufferT ::Remove(const T & rT) +{ + int pos = Find(rT); + if (pos >= 0) CBufferT ::Remove(pos); + return pos; +} + +template inline void CSortedBufferT ::RemoveAll() +{ + CBufferT::Restore(0); +} + +template void CSortedBufferT ::SortUnFreeze() +{ + if (m_bSortFreezed != 0) + { + m_bSortFreezed = 0; + qsort(CBufferRefT::m_pBuffer, CBufferRefT::m_nSize, sizeof(T), m_fncompare); + } +} + +template int CSortedBufferT ::compareT(const void * elem1, const void * elem2) +{ + if (*(const T *)elem1 == *(const T *)elem2) + return 0; + else if (*(const T *)elem1 < *(const T *)elem2) + return -1; + else + return 1; +} + +template int CSortedBufferT ::compareReverseT(const void * elem1, const void * elem2) +{ + if (*(const T *)elem1 == *(const T *)elem2) + return 0; + else if (*(const T *)elem1 > *(const T *)elem2) + return -1; + else + return 1; +} + +// +// Context +// +class CContext +{ +public: + CBufferT m_stack; + CBufferT m_capturestack, m_captureindex; + +public: + index_t m_nCurrentPos; + index_t m_nBeginPos; + index_t m_nLastBeginPos; + index_t m_nParenZindex; + index_t m_nCursiveLimit; + + void * m_pMatchString; + index_t m_pMatchStringLength; +}; + +class CContextShot +{ +public: + explicit CContextShot(CContext * pContext) + { + m_nCurrentPos = pContext->m_nCurrentPos; + nsize = pContext->m_stack.GetSize(); + ncsize = pContext->m_capturestack.GetSize(); + } + + void Restore(CContext * pContext) + { + pContext->m_stack.Restore(nsize); + pContext->m_capturestack.Restore(ncsize); + pContext->m_nCurrentPos = m_nCurrentPos; + } + +public: + index_t m_nCurrentPos; + index_t nsize; + index_t ncsize; +}; + +// +// Interface +// +class ElxInterface +{ +public: + virtual int Match(CContext * pContext) const = 0; + virtual int MatchNext(CContext * pContext) const = 0; + +public: + virtual ~ElxInterface() {}; +}; + +// +// Alternative +// +template class CAlternativeElxT : public ElxInterface +{ +public: + int Match(CContext * pContext) const; + int MatchNext(CContext * pContext) const; + +public: + CAlternativeElxT(); + +public: + CBufferT m_elxlist; +}; + +typedef CAlternativeElxT <0> CAlternativeElx; + +// +// Assert +// +template class CAssertElxT : public ElxInterface +{ +public: + int Match(CContext * pContext) const; + int MatchNext(CContext * pContext) const; + +public: + CAssertElxT(ElxInterface * pelx, int byes = 1); + +public: + ElxInterface * m_pelx; + int m_byes; +}; + +typedef CAssertElxT <0> CAssertElx; + +// +// Back reference elx +// +template class CBackrefElxT : public ElxInterface +{ +public: + int Match(CContext * pContext) const; + int MatchNext(CContext * pContext) const; + +public: + CBackrefElxT(int nnumber, int brightleft, int bignorecase); + +public: + index_t m_nnumber; + int m_brightleft; + int m_bignorecase; + + CBufferT m_szNamed; +}; + +// +// Implementation +// +template CBackrefElxT ::CBackrefElxT(int nnumber, int brightleft, int bignorecase) +{ + m_nnumber = nnumber; + m_brightleft = brightleft; + m_bignorecase = bignorecase; +} + +template int CBackrefElxT ::Match(CContext * pContext) const +{ + // check number, for named + if (m_nnumber < 0 || m_nnumber >= pContext->m_captureindex.GetSize()) return 0; + + index_t index = pContext->m_captureindex[m_nnumber]; + if (index < 0) return 0; + + // check enclosed + index_t pos1 = pContext->m_capturestack[index + 1]; + index_t pos2 = pContext->m_capturestack[index + 2]; + + if (pos2 < 0) pos2 = pContext->m_nCurrentPos; + + // info + index_t lpos = pos1 < pos2 ? pos1 : pos2; + index_t rpos = pos1 < pos2 ? pos2 : pos1; + index_t slen = rpos - lpos; + + const CHART * pcsz = (const CHART *)pContext->m_pMatchString; + index_t npos = pContext->m_nCurrentPos; + index_t tlen = pContext->m_pMatchStringLength; + + // compare + int bsucc; + CBufferRefT refstr(pcsz + lpos, slen); + + if (m_brightleft) + { + if (npos < slen) + return 0; + + if (m_bignorecase) + bsucc = !refstr.nCompareNoCase(pcsz + (npos - slen)); + else + bsucc = !refstr.nCompare(pcsz + (npos - slen)); + + if (bsucc) + { + pContext->m_stack.Push(npos); + pContext->m_nCurrentPos -= slen; + } + } + else + { + if (npos + slen > tlen) + return 0; + + if (m_bignorecase) + bsucc = !refstr.nCompareNoCase(pcsz + npos); + else + bsucc = !refstr.nCompare(pcsz + npos); + + if (bsucc) + { + pContext->m_stack.Push(npos); + pContext->m_nCurrentPos += slen; + } + } + + return bsucc; +} + +template int CBackrefElxT ::MatchNext(CContext * pContext) const +{ + index_t npos = 0; + + pContext->m_stack.Pop(npos); + pContext->m_nCurrentPos = npos; + + return 0; +} + +// RCHART +#ifndef RCHART +#define RCHART(ch) ((CHART)ch) +#endif + +// BOUNDARY_TYPE +enum BOUNDARY_TYPE +{ + BOUNDARY_FILE_BEGIN, // begin of whole text + BOUNDARY_FILE_END, // end of whole text + BOUNDARY_FILE_END_N, // end of whole text, or before newline at the end + BOUNDARY_LINE_BEGIN, // begin of line + BOUNDARY_LINE_END, // end of line + BOUNDARY_WORD_BEGIN, // begin of word + BOUNDARY_WORD_END, // end of word + BOUNDARY_WORD_EDGE +}; + +// +// Boundary Elx +// +template class CBoundaryElxT : public ElxInterface +{ +public: + int Match(CContext * pContext) const; + int MatchNext(CContext * pContext) const; + +public: + CBoundaryElxT(int ntype, int byes = 1); + +protected: + static int IsWordChar(CHART ch); + +public: + int m_ntype; + int m_byes; +}; + +// +// Implementation +// +template CBoundaryElxT ::CBoundaryElxT(int ntype, int byes) +{ + m_ntype = ntype; + m_byes = byes; +} + +template int CBoundaryElxT ::Match(CContext * pContext) const +{ + const CHART * pcsz = (const CHART *)pContext->m_pMatchString; + index_t npos = pContext->m_nCurrentPos; + index_t tlen = pContext->m_pMatchStringLength; + + CHART chL = npos > 0 ? pcsz[npos - 1] : 0; + CHART chR = npos < tlen ? pcsz[npos] : 0; + + int bsucc = 0; + + switch (m_ntype) + { + case BOUNDARY_FILE_BEGIN: + bsucc = (npos <= 0); + break; + + case BOUNDARY_FILE_END: + bsucc = (npos >= tlen); + break; + + case BOUNDARY_FILE_END_N: + bsucc = (npos >= tlen) || (pcsz[tlen - 1] == RCHART('\n') && (npos == tlen - 1 || (pcsz[tlen - 2] == RCHART('\r') && npos == tlen - 2))); + break; + + case BOUNDARY_LINE_BEGIN: + bsucc = (npos <= 0) || (chL == RCHART('\n')) || ((chL == RCHART('\r')) && (chR != RCHART('\n'))); + break; + + case BOUNDARY_LINE_END: + bsucc = (npos >= tlen) || (chR == RCHART('\r')) || ((chR == RCHART('\n')) && (chL != RCHART('\r'))); + break; + + case BOUNDARY_WORD_BEGIN: + bsucc = !IsWordChar(chL) && IsWordChar(chR); + break; + + case BOUNDARY_WORD_END: + bsucc = IsWordChar(chL) && !IsWordChar(chR); + break; + + case BOUNDARY_WORD_EDGE: + bsucc = IsWordChar(chL) ? !IsWordChar(chR) : IsWordChar(chR); + break; + } + + return m_byes ? bsucc : !bsucc; +} + +template int CBoundaryElxT ::MatchNext(CContext *) const +{ + return 0; +} + +template inline int CBoundaryElxT ::IsWordChar(CHART ch) +{ + return (ch >= RCHART('A') && ch <= RCHART('Z')) || (ch >= RCHART('a') && ch <= RCHART('z')) || (ch >= RCHART('0') && ch <= RCHART('9')) || (ch == RCHART('_')); +} + +// +// Bracket +// +template class CBracketElxT : public ElxInterface +{ +public: + int Match(CContext * pContext) const; + int MatchNext(CContext * pContext) const; + +public: + CBracketElxT(index_t nnumber, int bright); + static int CheckCaptureIndex(index_t & index, CContext * pContext, index_t number); + +public: + index_t m_nnumber; + index_t m_balancing; + int m_bright; + + CBufferT m_szNamed; + CBufferT m_szBalancing; +}; + +template CBracketElxT ::CBracketElxT(index_t nnumber, int bright) +{ + m_nnumber = nnumber; + m_bright = bright; + m_balancing = -1; +} + +template inline int CBracketElxT ::CheckCaptureIndex(index_t & index, CContext * pContext, index_t number) +{ + if (index >= pContext->m_capturestack.GetSize()) + index = pContext->m_capturestack.GetSize() - 4; + + while (index >= 0) + { + if (pContext->m_capturestack[index] == number) + { + return 1; + } + + index -= 4; + } + return 0; +} + +// +// capturestack[index+0] => Group number +// capturestack[index+1] => Capture start pos +// capturestack[index+2] => Capture end pos +// capturestack[index+3] => Capture enclose z-index, zindex<0 means inner group with same name +// +template int CBracketElxT ::Match(CContext * pContext) const +{ + // check, for named + if (m_nnumber < 0) return 0; + + if (!m_bright) + { + pContext->m_captureindex.Prepare(m_nnumber, -1); + index_t index = pContext->m_captureindex[m_nnumber]; + + // check + if (CheckCaptureIndex(index, pContext, m_nnumber) && pContext->m_capturestack[index + 2] < 0) + { + pContext->m_capturestack[index + 3] --; + return 1; + } + + // balancing left + if (m_balancing >= 0) + { + index_t balancing_index = pContext->m_captureindex[m_balancing]; + if (!CheckCaptureIndex(balancing_index, pContext, m_balancing) || + pContext->m_capturestack[balancing_index + 2] < 0) + { + return 0; + } + } + + // save + pContext->m_captureindex[m_nnumber] = pContext->m_capturestack.GetSize(); + + pContext->m_capturestack.Push(m_nnumber); + pContext->m_capturestack.Push(pContext->m_nCurrentPos); + pContext->m_capturestack.Push(-1); + pContext->m_capturestack.Push(0); // z-index + } + else + { + // check + index_t index = pContext->m_captureindex[m_nnumber]; + + if (CheckCaptureIndex(index, pContext, m_nnumber)) + { + if (pContext->m_capturestack[index + 3] < 0) // check inner group with same name + { + pContext->m_capturestack[index + 3] ++; + return 1; + } + + // balancing right + index_t balancing_index = -1; + if (m_balancing >= 0) + { + balancing_index = pContext->m_captureindex[m_balancing]; + if (!CheckCaptureIndex(balancing_index, pContext, m_balancing)) + { + // TODO ERROR + return 0; + } + } + + // save + pContext->m_capturestack[index + 2] = pContext->m_nCurrentPos; + pContext->m_capturestack[index + 3] = pContext->m_nParenZindex++; + + // balancing right + if (m_balancing >= 0) + { + // backup index + pContext->m_stack.Push(balancing_index); + + if (balancing_index >= 0) + { + pContext->m_capturestack[index + 2] = pContext->m_capturestack[index + 1]; + pContext->m_capturestack[index + 1] = pContext->m_capturestack[balancing_index + 2]; + + // destopy capture + pContext->m_capturestack[balancing_index] = -1; + balancing_index -= 4; + CheckCaptureIndex(balancing_index, pContext, m_balancing); + pContext->m_captureindex[m_balancing] = balancing_index; + } + } + } + } + + return 1; +} + +template int CBracketElxT ::MatchNext(CContext * pContext) const +{ + index_t index = pContext->m_captureindex[m_nnumber]; + if (!CheckCaptureIndex(index, pContext, m_nnumber)) + { + return 0; + } + + if (!m_bright) + { + if (pContext->m_capturestack[index + 3] < 0) + { + pContext->m_capturestack[index + 3] ++; + return 0; + } + + pContext->m_capturestack.Restore(pContext->m_capturestack.GetSize() - 4); + + // to find + CheckCaptureIndex(index, pContext, m_nnumber); + + // new index + pContext->m_captureindex[m_nnumber] = index; + } + else + { + if (pContext->m_capturestack[index + 2] >= 0) + { + // balancing right + if (m_balancing >= 0) + { + index_t balancing_index = -1; + pContext->m_stack.Pop(balancing_index); + + if (balancing_index >= 0) + { + pContext->m_capturestack[balancing_index] = m_balancing; + pContext->m_captureindex[m_balancing] = balancing_index; + } + } + pContext->m_capturestack[index + 2] = -1; + pContext->m_capturestack[index + 3] = 0; + } + else + { + pContext->m_capturestack[index + 3] --; + } + } + + return 0; +} + +// +// Deletage +// +template class CDelegateElxT : public ElxInterface +{ +public: + int Match(CContext * pContext) const; + int MatchNext(CContext * pContext) const; + +public: + explicit CDelegateElxT(int ndata = 0); + +public: + ElxInterface * m_pelx; + index_t m_ndata; // +0 : recursive to + // -3 : named recursive + + CBufferT m_szNamed; +}; + +template CDelegateElxT ::CDelegateElxT(int ndata) +{ + m_pelx = 0; + m_ndata = ndata; +} + +template int CDelegateElxT ::Match(CContext * pContext) const +{ + if (m_pelx != 0) + { + if (pContext->m_nCursiveLimit > 0) + { + pContext->m_nCursiveLimit--; + int result = m_pelx->Match(pContext); + pContext->m_nCursiveLimit++; + return result; + } + else + return 0; + } + else + return 1; +} + +template int CDelegateElxT ::MatchNext(CContext * pContext) const +{ + if (m_pelx != 0) + return m_pelx->MatchNext(pContext); + else + return 0; +} + +// +// Empty +// +template class CEmptyElxT : public ElxInterface +{ +public: + int Match(CContext * pContext) const; + int MatchNext(CContext * pContext) const; + +public: + CEmptyElxT(); +}; + +typedef CEmptyElxT <0> CEmptyElx; + +// +// Global +// +template class CGlobalElxT : public ElxInterface +{ +public: + int Match(CContext * pContext) const; + int MatchNext(CContext * pContext) const; + +public: + CGlobalElxT(); +}; + +typedef CGlobalElxT <0> CGlobalElx; + +// +// Repeat +// +template class CRepeatElxT : public ElxInterface +{ +public: + int Match(CContext * pContext) const; + int MatchNext(CContext * pContext) const; + +public: + CRepeatElxT(ElxInterface * pelx, int ntimes); + +protected: + int MatchFixed(CContext * pContext) const; + int MatchNextFixed(CContext * pContext) const; + int MatchForward(CContext * pContext) const + { + CContextShot shot(pContext); + + if (!m_pelx->Match(pContext)) + return 0; + + if (pContext->m_nCurrentPos != shot.m_nCurrentPos) + return 1; + + if (!m_pelx->MatchNext(pContext)) + return 0; + + if (pContext->m_nCurrentPos != shot.m_nCurrentPos) + return 1; + + shot.Restore(pContext); + return 0; + } + +public: + ElxInterface * m_pelx; + int m_nfixed; +}; + +typedef CRepeatElxT <0> CRepeatElx; + +// +// Greedy +// +template class CGreedyElxT : public CRepeatElxT +{ +public: + int Match(CContext * pContext) const; + int MatchNext(CContext * pContext) const; + +public: + CGreedyElxT(ElxInterface * pelx, int nmin = 0, int nmax = INT_MAX); + +protected: + int MatchVart(CContext * pContext) const; + int MatchNextVart(CContext * pContext) const; + +public: + int m_nvart; +}; + +typedef CGreedyElxT <0> CGreedyElx; + +// +// Independent +// +template class CIndependentElxT : public ElxInterface +{ +public: + int Match(CContext * pContext) const; + int MatchNext(CContext * pContext) const; + +public: + explicit CIndependentElxT(ElxInterface * pelx); + +public: + ElxInterface * m_pelx; +}; + +typedef CIndependentElxT <0> CIndependentElx; + +// +// List +// +template class CListElxT : public ElxInterface +{ +public: + int Match(CContext * pContext) const; + int MatchNext(CContext * pContext) const; + +public: + explicit CListElxT(int brightleft); + +public: + CBufferT m_elxlist; + int m_brightleft; +}; + +typedef CListElxT <0> CListElx; + +// +// Posix Elx +// +template class CPosixElxT : public ElxInterface +{ +public: + int Match(CContext * pContext) const; + int MatchNext(CContext * pContext) const; + +public: + CPosixElxT(const char * posix, int brightleft); + +public: + POSIX_FUNC m_posixfun; + int m_brightleft; + int m_byes; +}; + +// +// Implementation +// +template CPosixElxT ::CPosixElxT(const char * posix, int brightleft) +{ + m_brightleft = brightleft; + + if (posix[1] == '^') + { + m_byes = 0; + posix += 2; + } + else + { + m_byes = 1; + posix += 1; + } + + if (!strncmp(posix, "alnum:", 6)) m_posixfun = ::isalnum; + else if (!strncmp(posix, "alpha:", 6)) m_posixfun = ::isalpha; + else if (!strncmp(posix, "ascii:", 6)) m_posixfun = ::isascii; + else if (!strncmp(posix, "cntrl:", 6)) m_posixfun = ::iscntrl; + else if (!strncmp(posix, "digit:", 6)) m_posixfun = ::isdigit; + else if (!strncmp(posix, "graph:", 6)) m_posixfun = ::isgraph; + else if (!strncmp(posix, "lower:", 6)) m_posixfun = ::islower; + else if (!strncmp(posix, "print:", 6)) m_posixfun = ::isprint; + else if (!strncmp(posix, "punct:", 6)) m_posixfun = ::ispunct; + else if (!strncmp(posix, "space:", 6)) m_posixfun = ::isspace; + else if (!strncmp(posix, "upper:", 6)) m_posixfun = ::isupper; + else if (!strncmp(posix, "xdigit:", 7)) m_posixfun = ::isxdigit; + else if (!strncmp(posix, "blank:", 6)) m_posixfun = isblank; + else m_posixfun = 0; +} + +inline int isblank(int c) +{ + return c == 0x20 || c == '\t'; +} + +template int CPosixElxT ::Match(CContext * pContext) const +{ + if (m_posixfun == 0) return 0; + + index_t tlen = pContext->m_pMatchStringLength; + index_t npos = pContext->m_nCurrentPos; + + // check + index_t at = m_brightleft ? npos - 1 : npos; + if (at < 0 || at >= tlen) + return 0; + + CHART ch = ((const CHART *)pContext->m_pMatchString)[at]; + + int bsucc = (*m_posixfun)(ch); + + if (!m_byes) + bsucc = !bsucc; + + if (bsucc) + pContext->m_nCurrentPos += m_brightleft ? -1 : 1; + + return bsucc; +} + +template int CPosixElxT ::MatchNext(CContext * pContext) const +{ + pContext->m_nCurrentPos -= m_brightleft ? -1 : 1; + return 0; +} + +// +// Possessive +// +template class CPossessiveElxT : public CGreedyElxT +{ +public: + int Match(CContext * pContext) const; + int MatchNext(CContext * pContext) const; + +public: + CPossessiveElxT(ElxInterface * pelx, int nmin = 0, int nmax = INT_MAX); +}; + +typedef CPossessiveElxT <0> CPossessiveElx; + +// +// Range Elx +// +template class CRangeElxT : public ElxInterface +{ +public: + int Match(CContext * pContext) const; + int MatchNext(CContext * pContext) const; + +public: + CRangeElxT(int brightleft, int byes); + +public: + int IsContainChar(CHART ch) const; + +public: + CBufferT m_ranges; + CBufferT m_chars; + CBufferT m_embeds; + +public: + int m_brightleft; + int m_byes; +}; + +// +// Implementation +// +template CRangeElxT ::CRangeElxT(int brightleft, int byes) +{ + m_brightleft = brightleft; + m_byes = byes; +} + +template int CRangeElxT ::Match(CContext * pContext) const +{ + index_t tlen = pContext->m_pMatchStringLength; + index_t npos = pContext->m_nCurrentPos; + + // check + index_t at = m_brightleft ? npos - 1 : npos; + if (at < 0 || at >= tlen) + return 0; + + CHART ch = ((const CHART *)pContext->m_pMatchString)[at]; + int bsucc = 0, i; + + // compare + for (i = 0; !bsucc && i < m_ranges.GetSize(); i += 2) + { + if (m_ranges[i] <= ch && ch <= m_ranges[i + 1]) bsucc = 1; + } + + for (i = 0; !bsucc && i < m_chars.GetSize(); i++) + { + if (m_chars[i] == ch) bsucc = 1; + } + + for (i = 0; !bsucc && i < m_embeds.GetSize(); i++) + { + if (m_embeds[i]->Match(pContext)) + { + pContext->m_nCurrentPos = npos; + bsucc = 1; + } + } + + if (!m_byes) + bsucc = !bsucc; + + if (bsucc) + pContext->m_nCurrentPos += m_brightleft ? -1 : 1; + + return bsucc; +} + +template int CRangeElxT ::IsContainChar(CHART ch) const +{ + int bsucc = 0, i; + + // compare + for (i = 0; !bsucc && i < m_ranges.GetSize(); i += 2) + { + if (m_ranges[i] <= ch && ch <= m_ranges[i + 1]) bsucc = 1; + } + + for (i = 0; !bsucc && i < m_chars.GetSize(); i++) + { + if (m_chars[i] == ch) bsucc = 1; + } + + return bsucc; +} + +template int CRangeElxT ::MatchNext(CContext * pContext) const +{ + pContext->m_nCurrentPos -= m_brightleft ? -1 : 1; + return 0; +} + +// +// Reluctant +// +template class CReluctantElxT : public CRepeatElxT +{ +public: + int Match(CContext * pContext) const; + int MatchNext(CContext * pContext) const; + +public: + CReluctantElxT(ElxInterface * pelx, int nmin = 0, int nmax = INT_MAX); + +protected: + static int MatchVart(CContext * pContext); + int MatchNextVart(CContext * pContext) const; + +public: + int m_nvart; +}; + +typedef CReluctantElxT <0> CReluctantElx; + +// +// String Elx +// +template class CStringElxT : public ElxInterface +{ +public: + int Match(CContext * pContext) const; + int MatchNext(CContext * pContext) const; + +public: + CStringElxT(const CHART * fixed, index_t nlength, int brightleft, int bignorecase); + +public: + CBufferT m_szPattern; + int m_brightleft; + int m_bignorecase; +}; + +// +// Implementation +// +template CStringElxT ::CStringElxT(const CHART * fixed, index_t nlength, int brightleft, int bignorecase) : m_szPattern(fixed, nlength) +{ + m_brightleft = brightleft; + m_bignorecase = bignorecase; +} + +template int CStringElxT ::Match(CContext * pContext) const +{ + const CHART * pcsz = (const CHART *)pContext->m_pMatchString; + index_t npos = pContext->m_nCurrentPos; + index_t tlen = pContext->m_pMatchStringLength; + index_t slen = m_szPattern.GetSize(); + + int bsucc; + + if (m_brightleft) + { + if (npos < slen) + return 0; + + if (m_bignorecase) + bsucc = !m_szPattern.nCompareNoCase(pcsz + (npos - slen)); + else + bsucc = !m_szPattern.nCompare(pcsz + (npos - slen)); + + if (bsucc) + pContext->m_nCurrentPos -= slen; + } + else + { + if (npos + slen > tlen) + return 0; + + if (m_bignorecase) + bsucc = !m_szPattern.nCompareNoCase(pcsz + npos); + else + bsucc = !m_szPattern.nCompare(pcsz + npos); + + if (bsucc) + pContext->m_nCurrentPos += slen; + } + + return bsucc; +} + +template int CStringElxT ::MatchNext(CContext * pContext) const +{ + index_t slen = m_szPattern.GetSize(); + + if (m_brightleft) + pContext->m_nCurrentPos += slen; + else + pContext->m_nCurrentPos -= slen; + + return 0; +} + +// +// CConditionElx +// +template class CConditionElxT : public ElxInterface +{ +public: + int Match(CContext * pContext) const; + int MatchNext(CContext * pContext) const; + +public: + CConditionElxT(); + +public: + // backref condition + index_t m_nnumber; + CBufferT m_szNamed; + + // elx condition + ElxInterface * m_pelxask; + + // selection + ElxInterface * m_pelxyes, *m_pelxno; +}; + +template CConditionElxT ::CConditionElxT() + : m_nnumber(-1) + , m_szNamed() + , m_pelxask(nullptr) + , m_pelxyes(nullptr) + , m_pelxno(nullptr) +{ +} + +template int CConditionElxT ::Match(CContext * pContext) const +{ + // status + index_t nbegin = pContext->m_nCurrentPos; + index_t nsize = pContext->m_stack.GetSize(); + index_t ncsize = pContext->m_capturestack.GetSize(); + + // condition result + int condition_yes = 0; + + // backref type + if (m_nnumber >= 0) + { + do + { + if (m_nnumber >= pContext->m_captureindex.GetSize()) break; + + index_t index = pContext->m_captureindex[m_nnumber]; + if (index < 0) break; + + // else valid + condition_yes = 1; + } while (0); + } + else + { + if (m_pelxask == 0) + condition_yes = 1; + else + condition_yes = m_pelxask->Match(pContext); + + pContext->m_stack.Restore(nsize); + pContext->m_nCurrentPos = nbegin; + } + + // elx result + int bsucc; + if (condition_yes) + bsucc = m_pelxyes == 0 ? 1 : m_pelxyes->Match(pContext); + else + bsucc = m_pelxno == 0 ? 1 : m_pelxno->Match(pContext); + + if (bsucc) + { + pContext->m_stack.Push(ncsize); + pContext->m_stack.Push(condition_yes); + } + else + { + pContext->m_capturestack.Restore(ncsize); + } + + return bsucc; +} + +template int CConditionElxT ::MatchNext(CContext * pContext) const +{ + // pop + index_t ncsize, condition_yes; + + pContext->m_stack.Pop(condition_yes); + pContext->m_stack.Pop(ncsize); + + // elx result + int bsucc; + if (condition_yes) + bsucc = m_pelxyes == 0 ? 0 : m_pelxyes->MatchNext(pContext); + else + bsucc = m_pelxno == 0 ? 0 : m_pelxno->MatchNext(pContext); + + if (bsucc) + { + pContext->m_stack.Push(ncsize); + pContext->m_stack.Push(condition_yes); + } + else + { + pContext->m_capturestack.Restore(ncsize); + } + + return bsucc; +} + +// +// MatchResult +// +template class MatchResultT +{ +public: + int IsMatched() const; + +public: + index_t GetStart() const; + index_t GetEnd() const; + +public: + index_t MaxGroupNumber() const; + index_t GetGroupStart(index_t nGroupNumber) const; + index_t GetGroupEnd(index_t nGroupNumber) const; + +public: + MatchResultT(const MatchResultT & from) { *this = from; } + MatchResultT(CContext * pContext = 0, index_t nMaxNumber = -1); + MatchResultT & operator = (const MatchResultT &); + inline operator int() const { return IsMatched(); } + +public: + CBufferT m_result; +}; + +typedef MatchResultT <0> MatchResult; + +// Stocked Elx IDs +enum STOCKELX_ID_DEFINES +{ + STOCKELX_EMPTY = 0, + + /////////////////////// + + STOCKELX_DOT_ALL, + STOCKELX_DOT_NOT_ALL, + + STOCKELX_WORD, + STOCKELX_WORD_NOT, + + STOCKELX_SPACE, + STOCKELX_SPACE_NOT, + + STOCKELX_DIGITAL, + STOCKELX_DIGITAL_NOT, + + ////////////////////// + + STOCKELX_DOT_ALL_RIGHTLEFT, + STOCKELX_DOT_NOT_ALL_RIGHTLEFT, + + STOCKELX_WORD_RIGHTLEFT, + STOCKELX_WORD_RIGHTLEFT_NOT, + + STOCKELX_SPACE_RIGHTLEFT, + STOCKELX_SPACE_RIGHTLEFT_NOT, + + STOCKELX_DIGITAL_RIGHTLEFT, + STOCKELX_DIGITAL_RIGHTLEFT_NOT, + + ///////////////////// + + STOCKELX_COUNT +}; + +// REGEX_FLAGS +#ifndef _REGEX_FLAGS_DEFINED +enum REGEX_FLAGS +{ + NO_FLAG = 0, + SINGLELINE = 0x01, + MULTILINE = 0x02, + GLOBAL = 0x04, + IGNORECASE = 0x08, + RIGHTTOLEFT = 0x10, + EXTENDED = 0x20 +}; +#define _REGEX_FLAGS_DEFINED +#endif + +// +// Builder T +// +template class CBuilderT +{ +public: + typedef CDelegateElxT CDelegateElx; + typedef CBracketElxT CBracketElx; + typedef CBackrefElxT CBackrefElx; + typedef CConditionElxT CConditionElx; + + // Methods +public: + ElxInterface * Build(const CBufferRefT & pattern, int flags); + index_t GetNamedNumber(const CBufferRefT & named) const; + void Clear(); + +public: + CBuilderT(); + virtual ~CBuilderT(); + + // Public Attributes +public: + ElxInterface * m_pTopElx; + int m_nFlags; + index_t m_nMaxNumber; + index_t m_nNextNamed; + index_t m_nGroupCount; + int m_nNextBalancing; + + CBufferT m_objlist; + CBufferT m_grouplist; + CBufferT m_recursivelist; + CBufferT m_namedlist; + CBufferT m_namedbackreflist; + CBufferT m_namedconditionlist; + CBufferT m_purebalancinglist; + + // CHART_INFO +protected: + struct CHART_INFO + { + public: + CHART ch; + int type; + int pos; + int len; + + public: + CHART_INFO(CHART c, int t, int p = 0, int l = 0) { ch = c; type = t; pos = p; len = l; } + inline int operator == (const CHART_INFO & ci) const { return ch == ci.ch && type == ci.type; } + inline int operator != (const CHART_INFO & ci) const { return !operator == (ci); } + }; + +protected: + static unsigned int Hex2Int(const CHART * pcsz, int length, int & used); + static int ReadDec(char * & str, unsigned int & dec); + void MoveNext(); + int GetNext2(); + + ElxInterface * BuildAlternative(int vaflags); + ElxInterface * BuildList(int & flags); + ElxInterface * BuildRepeat(int & flags); + ElxInterface * BuildSimple(int & flags); + ElxInterface * BuildCharset(int & flags); + ElxInterface * BuildRecursive(int & flags); + ElxInterface * BuildBoundary(int & flags); + ElxInterface * BuildBackref(int & flags); + + ElxInterface * GetStockElx(int nStockId); + ElxInterface * Keep(ElxInterface * pElx); + + // Private Attributes +protected: + CBufferRefT m_pattern; + CHART_INFO prev, curr, next, nex2; + int m_nNextPos; + int m_nCharsetDepth; + int m_bQuoted; + POSIX_FUNC m_quote_fun; + + // Backup current pos + struct Snapshot + { + CHART_INFO prev, curr, next, nex2; + int m_nNextPos; + int m_nCharsetDepth; + int m_bQuoted; + POSIX_FUNC m_quote_fun; + Snapshot() : prev(0, 0), curr(0, 0), next(0, 0), nex2(0, 0) + , m_nNextPos(0), m_nCharsetDepth(0), m_bQuoted(0), m_quote_fun() + {} + }; + void Backup(Snapshot * pdata) { memcpy(pdata, &prev, sizeof(Snapshot)); } + void Restore(Snapshot * pdata) { memcpy(&prev, pdata, sizeof(Snapshot)); } + + ElxInterface * m_pStockElxs[STOCKELX_COUNT]; +}; + +// +// Implementation +// +template CBuilderT ::CBuilderT() + : m_nFlags(0) + , m_nNextBalancing(0) +// protected + , m_pattern(0, 0) + , prev(0, 0) + , curr(0, 0) + , next(0, 0) + , nex2(0, 0) + , m_nNextPos(0) + , m_nCharsetDepth(0) + , m_bQuoted(0) + , m_quote_fun() +{ + Clear(); +} + +template CBuilderT :: ~CBuilderT() +{ + Clear(); +} + +template index_t CBuilderT ::GetNamedNumber(const CBufferRefT & named) const +{ + for (int i = 0; i < m_namedlist.GetSize(); i++) + { + if (!((CBracketElx *)m_namedlist[i]->m_elxlist[0])->m_szNamed.CompareNoCase(named)) + return ((CBracketElx *)m_namedlist[i]->m_elxlist[0])->m_nnumber; + } + + return -3; +} + +template ElxInterface * CBuilderT ::Build(const CBufferRefT & pattern, int flags) +{ + // init + m_pattern = pattern; + m_nNextPos = 0; + m_nCharsetDepth = 0; + m_nMaxNumber = 0; + m_nNextNamed = 0; + m_nNextBalancing = 0; + m_nFlags = flags; + m_bQuoted = 0; + m_quote_fun = 0; + + m_grouplist.Restore(0); + m_recursivelist.Restore(0); + m_namedlist.Restore(0); + m_namedbackreflist.Restore(0); + m_namedconditionlist.Restore(0); + m_purebalancinglist.Restore(0); + + int i; + for (i = 0; i < 3; i++) MoveNext(); + + // build + m_pTopElx = BuildAlternative(flags); + + // group 0 + m_grouplist.Prepare(0); + m_grouplist[0] = m_pTopElx; + + // append named to unnamed + m_nGroupCount = m_grouplist.GetSize(); + + m_grouplist.Prepare(m_nMaxNumber + m_namedlist.GetSize()); + + for (i = 0; i < m_namedlist.GetSize(); i++) + { + CBracketElx * pleft = (CBracketElx *)m_namedlist[i]->m_elxlist[0]; + CBracketElx * pright = (CBracketElx *)m_namedlist[i]->m_elxlist[2]; + + // append + m_grouplist[m_nGroupCount++] = m_namedlist[i]; + + if (pleft->m_nnumber > 0) + continue; + + // same name + index_t find_same_name = GetNamedNumber(pleft->m_szNamed); + if (find_same_name >= 0) + { + pleft->m_nnumber = find_same_name; + pright->m_nnumber = find_same_name; + } + else + { + m_nMaxNumber++; + + pleft->m_nnumber = m_nMaxNumber; + pright->m_nnumber = m_nMaxNumber; + } + } + + for (i = 0; i < m_namedlist.GetSize(); i++) + { + CBracketElx * pleft = (CBracketElx *)m_namedlist[i]->m_elxlist[0]; + CBracketElx * pright = (CBracketElx *)m_namedlist[i]->m_elxlist[2]; + + // balancing + if (pleft->m_szBalancing.GetSize() > 0) + { + index_t balancing_to = GetNamedNumber(pleft->m_szBalancing); + if (balancing_to >= 0) + { + pleft->m_balancing = balancing_to; + pright->m_balancing = balancing_to; + } + else + { + //TODO: ERROR + } + } + } + + for (i = 1; i < m_nGroupCount; i++) + { + CBracketElx * pleft = (CBracketElx *)((CListElx*)m_grouplist[i])->m_elxlist[0]; + + if (pleft->m_nnumber > m_nMaxNumber) + m_nMaxNumber = pleft->m_nnumber; + } + + // pure balancing group + index_t nMaxNumber = m_nMaxNumber; + for (i = 0; i < m_purebalancinglist.GetSize(); i++) + { + CBracketElx * pleft = (CBracketElx *)m_purebalancinglist[i]->m_elxlist[0]; + CBracketElx * pright = (CBracketElx *)m_purebalancinglist[i]->m_elxlist[2]; + + nMaxNumber++; + + pleft->m_nnumber = nMaxNumber; + pright->m_nnumber = nMaxNumber; + + // balancing + if (pleft->m_szBalancing.GetSize() > 0) + { + index_t balancing_to = GetNamedNumber(pleft->m_szBalancing); + if (balancing_to >= 0) + { + pleft->m_balancing = balancing_to; + pright->m_balancing = balancing_to; + } + else + { + //TODO: ERROR + } + } + } + + // connect recursive + for (i = 0; i < m_recursivelist.GetSize(); i++) + { + if (m_recursivelist[i]->m_ndata == -3) + m_recursivelist[i]->m_ndata = GetNamedNumber(m_recursivelist[i]->m_szNamed); + + if (m_recursivelist[i]->m_ndata >= 0 && m_recursivelist[i]->m_ndata <= m_nMaxNumber) + { + if (m_recursivelist[i]->m_ndata == 0) + m_recursivelist[i]->m_pelx = m_pTopElx; + else for (int j = 1; j < m_grouplist.GetSize(); j++) + { + if (m_recursivelist[i]->m_ndata == ((CBracketElx *)((CListElx*)m_grouplist[j])->m_elxlist[0])->m_nnumber) + { + m_recursivelist[i]->m_pelx = m_grouplist[j]; + break; + } + } + } + } + + // named backref + for (i = 0; i < m_namedbackreflist.GetSize(); i++) + { + m_namedbackreflist[i]->m_nnumber = GetNamedNumber(m_namedbackreflist[i]->m_szNamed); + } + + // named condition + for (i = 0; i < m_namedconditionlist.GetSize(); i++) + { + index_t nn = GetNamedNumber(m_namedconditionlist[i]->m_szNamed); + if (nn >= 0) + { + m_namedconditionlist[i]->m_nnumber = nn; + m_namedconditionlist[i]->m_pelxask = 0; + } + } + + return m_pTopElx; +} + +template void CBuilderT ::Clear() +{ + for (int i = 0; i < m_objlist.GetSize(); i++) + { + delete m_objlist[i]; + } + + m_objlist.Restore(0); + m_pTopElx = 0; + m_nMaxNumber = 0; + + memset(m_pStockElxs, 0, sizeof(m_pStockElxs)); +} + +// +// hex to int +// +template unsigned int CBuilderT ::Hex2Int(const CHART * pcsz, int length, int & used) +{ + unsigned int result = 0; + int & i = used; + + for (i = 0; i < length; i++) + { + if (pcsz[i] >= RCHART('0') && pcsz[i] <= RCHART('9')) + result = (result << 4) + (pcsz[i] - RCHART('0')); + else if (pcsz[i] >= RCHART('A') && pcsz[i] <= RCHART('F')) + result = (result << 4) + (0x0A + (pcsz[i] - RCHART('A'))); + else if (pcsz[i] >= RCHART('a') && pcsz[i] <= RCHART('f')) + result = (result << 4) + (0x0A + (pcsz[i] - RCHART('a'))); + else + break; + } + + return result; +} + +template inline ElxInterface * CBuilderT ::Keep(ElxInterface * pelx) +{ + m_objlist.Push(pelx); + return pelx; +} + +template void CBuilderT ::MoveNext() +{ + // forwards + prev = curr; + curr = next; + next = nex2; + + // get nex2 + while (!GetNext2()) {}; +} + +template int CBuilderT ::GetNext2() +{ + // check length + if (m_nNextPos >= m_pattern.GetSize()) + { + nex2 = CHART_INFO(0, 1, m_nNextPos, 0); + return 1; + } + + int delta = 1; + CHART ch = m_pattern[m_nNextPos]; + + // if quoted + if (m_bQuoted) + { + if (ch == RCHART('\\')) + { + if (m_pattern[m_nNextPos + 1] == RCHART('E')) + { + m_quote_fun = 0; + m_bQuoted = 0; + m_nNextPos += 2; + return 0; + } + } + + if (m_quote_fun != 0) + nex2 = CHART_INFO((CHART)(*m_quote_fun)((int)ch), 0, m_nNextPos, delta); + else + nex2 = CHART_INFO(ch, 0, m_nNextPos, delta); + + m_nNextPos += delta; + + return 1; + } + + // common + switch (ch) + { + case RCHART('\\'): + { + CHART ch1 = m_pattern[m_nNextPos + 1]; + + // backref + if (ch1 >= RCHART('0') && ch1 <= RCHART('9')) + { + nex2 = CHART_INFO(ch, 1, m_nNextPos, delta); + break; + } + + // escape + delta = 2; + + switch (ch1) + { + case RCHART('A'): + case RCHART('Z'): + case RCHART('z'): + case RCHART('w'): + case RCHART('W'): + case RCHART('s'): + case RCHART('S'): + case RCHART('B'): + case RCHART('d'): + case RCHART('D'): + case RCHART('k'): + case RCHART('g'): + nex2 = CHART_INFO(ch1, 1, m_nNextPos, delta); + break; + + case RCHART('b'): + if (m_nCharsetDepth > 0) + nex2 = CHART_INFO('\b', 0, m_nNextPos, delta); + else + nex2 = CHART_INFO(ch1, 1, m_nNextPos, delta); + break; + + /* + case RCHART('<'): + case RCHART('>'): + if(m_nCharsetDepth > 0) + nex2 = CHART_INFO(ch1, 0, m_nNextPos, delta); + else + nex2 = CHART_INFO(ch1, 1, m_nNextPos, delta); + break; + */ + + case RCHART('x'): + if (m_pattern[m_nNextPos + 2] != '{') + { + int red = 0; + unsigned int ch2 = Hex2Int(m_pattern.GetBuffer() + m_nNextPos + 2, 2, red); + + delta += red; + + if (red > 0) + nex2 = CHART_INFO(RCHART(ch2), 0, m_nNextPos, delta); + else + nex2 = CHART_INFO(ch1, 0, m_nNextPos, delta); + + break; + } + + case RCHART('u'): + if (m_pattern[m_nNextPos + 2] != '{') + { + int red = 0; + unsigned int ch2 = Hex2Int(m_pattern.GetBuffer() + m_nNextPos + 2, 4, red); + + delta += red; + + if (red > 0) + nex2 = CHART_INFO(RCHART(ch2), 0, m_nNextPos, delta); + else + nex2 = CHART_INFO(ch1, 0, m_nNextPos, delta); + } + else + { + int red = 0; + unsigned int ch2 = Hex2Int(m_pattern.GetBuffer() + m_nNextPos + 3, sizeof(int) * 2, red); + + delta += red; + + while (m_nNextPos + delta < m_pattern.GetSize() && m_pattern.At(m_nNextPos + delta) != RCHART('}')) + delta++; + + delta++; // skip '}' + + nex2 = CHART_INFO(RCHART(ch2), 0, m_nNextPos, delta); + } + break; + + case RCHART('a'): nex2 = CHART_INFO(RCHART('\a'), 0, m_nNextPos, delta); break; + case RCHART('f'): nex2 = CHART_INFO(RCHART('\f'), 0, m_nNextPos, delta); break; + case RCHART('n'): nex2 = CHART_INFO(RCHART('\n'), 0, m_nNextPos, delta); break; + case RCHART('r'): nex2 = CHART_INFO(RCHART('\r'), 0, m_nNextPos, delta); break; + case RCHART('t'): nex2 = CHART_INFO(RCHART('\t'), 0, m_nNextPos, delta); break; + case RCHART('v'): nex2 = CHART_INFO(RCHART('\v'), 0, m_nNextPos, delta); break; + case RCHART('e'): nex2 = CHART_INFO(RCHART(27), 0, m_nNextPos, delta); break; + + case RCHART('G'): // skip '\G' + if (m_nCharsetDepth > 0) + { + m_nNextPos += 2; + return 0; + } + else + { + nex2 = CHART_INFO(ch1, 1, m_nNextPos, delta); + break; + } + + case RCHART('L'): + if (!m_quote_fun) m_quote_fun = ::tolower; + + case RCHART('U'): + if (!m_quote_fun) m_quote_fun = ::toupper; + + case RCHART('Q'): + { + m_bQuoted = 1; + m_nNextPos += 2; + return 0; + } + + case RCHART('E'): + { + m_quote_fun = 0; + m_bQuoted = 0; + m_nNextPos += 2; + return 0; + } + + case 0: + if (m_nNextPos + 1 >= m_pattern.GetSize()) + { + delta = 1; + nex2 = CHART_INFO(ch, 0, m_nNextPos, delta); + } + else + nex2 = CHART_INFO(ch1, 0, m_nNextPos, delta); // common '\0' char + break; + + default: + nex2 = CHART_INFO(ch1, 0, m_nNextPos, delta); + break; + } + } + break; + + case RCHART('*'): + case RCHART('+'): + case RCHART('?'): + case RCHART('.'): + case RCHART('{'): + case RCHART('}'): + case RCHART(')'): + case RCHART('|'): + case RCHART('$'): + if (m_nCharsetDepth > 0) + nex2 = CHART_INFO(ch, 0, m_nNextPos, delta); + else + nex2 = CHART_INFO(ch, 1, m_nNextPos, delta); + break; + + case RCHART('-'): + if (m_nCharsetDepth > 0) + nex2 = CHART_INFO(ch, 1, m_nNextPos, delta); + else + nex2 = CHART_INFO(ch, 0, m_nNextPos, delta); + break; + + case RCHART('('): + { + CHART ch1 = m_pattern[m_nNextPos + 1]; + CHART ch2 = m_pattern[m_nNextPos + 2]; + + // skip remark + if (ch1 == RCHART('?') && ch2 == RCHART('#')) + { + m_nNextPos += 2; + while (m_nNextPos < m_pattern.GetSize()) + { + if (m_pattern[m_nNextPos] == RCHART(')')) + break; + + m_nNextPos++; + } + + if (m_pattern[m_nNextPos] == RCHART(')')) + { + m_nNextPos++; + + // get next nex2 + return 0; + } + } + else + { + if (m_nCharsetDepth > 0) + nex2 = CHART_INFO(ch, 0, m_nNextPos, delta); + else + nex2 = CHART_INFO(ch, 1, m_nNextPos, delta); + } + } + break; + + case RCHART('#'): + if (m_nFlags & EXTENDED) + { + // skip remark + m_nNextPos++; + + while (m_nNextPos < m_pattern.GetSize()) + { + if (m_pattern[m_nNextPos] == RCHART('\n') || m_pattern[m_nNextPos] == RCHART('\r')) + break; + + m_nNextPos++; + } + + // get next nex2 + return 0; + } + else + { + nex2 = CHART_INFO(ch, 0, m_nNextPos, delta); + } + break; + + case RCHART(' '): + case RCHART('\f'): + case RCHART('\n'): + case RCHART('\r'): + case RCHART('\t'): + case RCHART('\v'): + if (m_nFlags & EXTENDED) + { + m_nNextPos++; + + // get next nex2 + return 0; + } + else + { + nex2 = CHART_INFO(ch, 0, m_nNextPos, delta); + } + break; + + case RCHART('['): + if (m_nCharsetDepth == 0 || m_pattern.At(m_nNextPos + 1, 0) == RCHART(':')) + { + m_nCharsetDepth++; + nex2 = CHART_INFO(ch, 1, m_nNextPos, delta); + } + else + { + nex2 = CHART_INFO(ch, 0, m_nNextPos, delta); + } + break; + + case RCHART(']'): + if (m_nCharsetDepth > 0) + { + m_nCharsetDepth--; + nex2 = CHART_INFO(ch, 1, m_nNextPos, delta); + } + else + { + nex2 = CHART_INFO(ch, 0, m_nNextPos, delta); + } + break; + + case RCHART(':'): + if (next == CHART_INFO(RCHART('['), 1)) + nex2 = CHART_INFO(ch, 1, m_nNextPos, delta); + else + nex2 = CHART_INFO(ch, 0, m_nNextPos, delta); + break; + + case RCHART('^'): + if (m_nCharsetDepth == 0 || next == CHART_INFO(RCHART('['), 1) || (curr == CHART_INFO(RCHART('['), 1) && next == CHART_INFO(RCHART(':'), 1))) + nex2 = CHART_INFO(ch, 1, m_nNextPos, delta); + else + nex2 = CHART_INFO(ch, 0, m_nNextPos, delta); + break; + + case 0: + if (m_nNextPos >= m_pattern.GetSize()) + nex2 = CHART_INFO(ch, 1, m_nNextPos, delta); // end of string + else + nex2 = CHART_INFO(ch, 0, m_nNextPos, delta); // common '\0' char + break; + + default: + nex2 = CHART_INFO(ch, 0, m_nNextPos, delta); + break; + } + + m_nNextPos += delta; + + return 1; +} + +template ElxInterface * CBuilderT ::GetStockElx(int nStockId) +{ + ElxInterface ** pStockElxs = m_pStockElxs; + + // check + if (nStockId < 0 || nStockId >= STOCKELX_COUNT) + return GetStockElx(0); + + // create if no + if (pStockElxs[nStockId] == 0) + { + switch (nStockId) + { + case STOCKELX_EMPTY: + pStockElxs[nStockId] = Keep(new CEmptyElx()); + break; + + case STOCKELX_WORD: + { + CRangeElxT * pRange = (CRangeElxT *)Keep(new CRangeElxT (0, 1)); + + pRange->m_ranges.Push(RCHART('A')); pRange->m_ranges.Push(RCHART('Z')); + pRange->m_ranges.Push(RCHART('a')); pRange->m_ranges.Push(RCHART('z')); + pRange->m_ranges.Push(RCHART('0')); pRange->m_ranges.Push(RCHART('9')); + pRange->m_chars.Push(RCHART('_')); + + pStockElxs[nStockId] = pRange; + } + break; + + case STOCKELX_WORD_NOT: + { + CRangeElxT * pRange = (CRangeElxT *)Keep(new CRangeElxT (0, 0)); + + pRange->m_ranges.Push(RCHART('A')); pRange->m_ranges.Push(RCHART('Z')); + pRange->m_ranges.Push(RCHART('a')); pRange->m_ranges.Push(RCHART('z')); + pRange->m_ranges.Push(RCHART('0')); pRange->m_ranges.Push(RCHART('9')); + pRange->m_chars.Push(RCHART('_')); + + pStockElxs[nStockId] = pRange; + } + break; + + case STOCKELX_DOT_ALL: + pStockElxs[nStockId] = Keep(new CRangeElxT (0, 0)); + break; + + case STOCKELX_DOT_NOT_ALL: + { + CRangeElxT * pRange = (CRangeElxT *)Keep(new CRangeElxT (0, 0)); + + pRange->m_chars.Push(RCHART('\n')); + + pStockElxs[nStockId] = pRange; + } + break; + + case STOCKELX_SPACE: + { + CRangeElxT * pRange = (CRangeElxT *)Keep(new CRangeElxT (0, 1)); + + pRange->m_chars.Push(RCHART(' ')); + pRange->m_chars.Push(RCHART('\t')); + pRange->m_chars.Push(RCHART('\r')); + pRange->m_chars.Push(RCHART('\n')); + + pStockElxs[nStockId] = pRange; + } + break; + + case STOCKELX_SPACE_NOT: + { + CRangeElxT * pRange = (CRangeElxT *)Keep(new CRangeElxT (0, 0)); + + pRange->m_chars.Push(RCHART(' ')); + pRange->m_chars.Push(RCHART('\t')); + pRange->m_chars.Push(RCHART('\r')); + pRange->m_chars.Push(RCHART('\n')); + + pStockElxs[nStockId] = pRange; + } + break; + + case STOCKELX_DIGITAL: + { + CRangeElxT * pRange = (CRangeElxT *)Keep(new CRangeElxT (0, 1)); + + pRange->m_ranges.Push(RCHART('0')); pRange->m_ranges.Push(RCHART('9')); + + pStockElxs[nStockId] = pRange; + } + break; + + case STOCKELX_DIGITAL_NOT: + { + CRangeElxT * pRange = (CRangeElxT *)Keep(new CRangeElxT (0, 0)); + + pRange->m_ranges.Push(RCHART('0')); pRange->m_ranges.Push(RCHART('9')); + + pStockElxs[nStockId] = pRange; + } + break; + + case STOCKELX_WORD_RIGHTLEFT: + { + CRangeElxT * pRange = (CRangeElxT *)Keep(new CRangeElxT (1, 1)); + + pRange->m_ranges.Push(RCHART('A')); pRange->m_ranges.Push(RCHART('Z')); + pRange->m_ranges.Push(RCHART('a')); pRange->m_ranges.Push(RCHART('z')); + pRange->m_ranges.Push(RCHART('0')); pRange->m_ranges.Push(RCHART('9')); + pRange->m_chars.Push(RCHART('_')); + + pStockElxs[nStockId] = pRange; + } + break; + + case STOCKELX_WORD_RIGHTLEFT_NOT: + { + CRangeElxT * pRange = (CRangeElxT *)Keep(new CRangeElxT (1, 0)); + + pRange->m_ranges.Push(RCHART('A')); pRange->m_ranges.Push(RCHART('Z')); + pRange->m_ranges.Push(RCHART('a')); pRange->m_ranges.Push(RCHART('z')); + pRange->m_ranges.Push(RCHART('0')); pRange->m_ranges.Push(RCHART('9')); + pRange->m_chars.Push(RCHART('_')); + + pStockElxs[nStockId] = pRange; + } + break; + + case STOCKELX_DOT_ALL_RIGHTLEFT: + pStockElxs[nStockId] = Keep(new CRangeElxT (1, 0)); + break; + + case STOCKELX_DOT_NOT_ALL_RIGHTLEFT: + { + CRangeElxT * pRange = (CRangeElxT *)Keep(new CRangeElxT (1, 0)); + + pRange->m_chars.Push(RCHART('\n')); + + pStockElxs[nStockId] = pRange; + } + break; + + case STOCKELX_SPACE_RIGHTLEFT: + { + CRangeElxT * pRange = (CRangeElxT *)Keep(new CRangeElxT (1, 1)); + + pRange->m_chars.Push(RCHART(' ')); + pRange->m_chars.Push(RCHART('\t')); + pRange->m_chars.Push(RCHART('\r')); + pRange->m_chars.Push(RCHART('\n')); + pRange->m_chars.Push(RCHART('\f')); + pRange->m_chars.Push(RCHART('\v')); + + pStockElxs[nStockId] = pRange; + } + break; + + case STOCKELX_SPACE_RIGHTLEFT_NOT: + { + CRangeElxT * pRange = (CRangeElxT *)Keep(new CRangeElxT (1, 0)); + + pRange->m_chars.Push(RCHART(' ')); + pRange->m_chars.Push(RCHART('\t')); + pRange->m_chars.Push(RCHART('\r')); + pRange->m_chars.Push(RCHART('\n')); + pRange->m_chars.Push(RCHART('\f')); + pRange->m_chars.Push(RCHART('\v')); + + pStockElxs[nStockId] = pRange; + } + break; + + case STOCKELX_DIGITAL_RIGHTLEFT: + { + CRangeElxT * pRange = (CRangeElxT *)Keep(new CRangeElxT (1, 1)); + + pRange->m_ranges.Push(RCHART('0')); pRange->m_ranges.Push(RCHART('9')); + + pStockElxs[nStockId] = pRange; + } + break; + + case STOCKELX_DIGITAL_RIGHTLEFT_NOT: + { + CRangeElxT * pRange = (CRangeElxT *)Keep(new CRangeElxT (1, 0)); + + pRange->m_ranges.Push(RCHART('0')); pRange->m_ranges.Push(RCHART('9')); + + pStockElxs[nStockId] = pRange; + } + break; + } + } + + // return + return pStockElxs[nStockId]; +} + +template ElxInterface * CBuilderT ::BuildAlternative(int vaflags) +{ + if (curr == CHART_INFO(0, 1)) + return GetStockElx(STOCKELX_EMPTY); + + // flag instance + int flags = vaflags; + + // first part + ElxInterface * pAlternativeOne = BuildList(flags); + + // check alternative + if (curr == CHART_INFO(RCHART('|'), 1)) + { + CAlternativeElx * pAlternative = (CAlternativeElx *)Keep(new CAlternativeElx()); + pAlternative->m_elxlist.Push(pAlternativeOne); + + // loop + while (curr == CHART_INFO(RCHART('|'), 1)) + { + // skip '|' itself + MoveNext(); + + pAlternativeOne = BuildList(flags); + pAlternative->m_elxlist.Push(pAlternativeOne); + } + + return pAlternative; + } + + return pAlternativeOne; +} + +template ElxInterface * CBuilderT ::BuildList(int & flags) +{ + if (curr == CHART_INFO(0, 1) || curr == CHART_INFO(RCHART('|'), 1) || curr == CHART_INFO(RCHART(')'), 1)) + return GetStockElx(STOCKELX_EMPTY); + + // first + ElxInterface * pListOne = BuildRepeat(flags); + + if (curr != CHART_INFO(0, 1) && curr != CHART_INFO(RCHART('|'), 1) && curr != CHART_INFO(RCHART(')'), 1)) + { + CListElx * pList = (CListElx *)Keep(new CListElx(flags & RIGHTTOLEFT)); + pList->m_elxlist.Push(pListOne); + + while (curr != CHART_INFO(0, 1) && curr != CHART_INFO(RCHART('|'), 1) && curr != CHART_INFO(RCHART(')'), 1)) + { + pListOne = BuildRepeat(flags); + + // add + pList->m_elxlist.Push(pListOne); + } + + return pList; + } + + return pListOne; +} + +template ElxInterface * CBuilderT ::BuildRepeat(int & flags) +{ + // simple + ElxInterface * pSimple = BuildSimple(flags); + + if (curr.type == 0) return pSimple; + + // is quantifier or not + int bIsQuantifier = 1; + + // quantifier range + unsigned int nMin = 0, nMax = 0; + + switch (curr.ch) + { + case RCHART('{'): + { + CBufferT re; + + // skip '{' + MoveNext(); + + // copy + while (curr != CHART_INFO(0, 1) && curr != CHART_INFO(RCHART('}'), 1)) + { + re.Append(((curr.ch & (CHART)0xff) == curr.ch) ? (char)curr.ch : 0, 1); + MoveNext(); + } + + // skip '}' + MoveNext(); + + // read + int red; + char * str = re.GetBuffer(); + + if (!ReadDec(str, nMin)) + red = 0; + else if (*str != ',') + red = 1; + else + { + str++; + + if (!ReadDec(str, nMax)) + red = 2; + else + red = 3; + } + + // check + if (red <= 1) nMax = nMin; + if (red == 2) nMax = INT_MAX; + if (nMax < nMin) nMax = nMin; + } + break; + + case RCHART('?'): + nMin = 0; + nMax = 1; + + // skip '?' + MoveNext(); + break; + + case RCHART('*'): + nMin = 0; + nMax = INT_MAX; + + // skip '*' + MoveNext(); + break; + + case RCHART('+'): + nMin = 1; + nMax = INT_MAX; + + // skip '+' + MoveNext(); + break; + + default: + bIsQuantifier = 0; + break; + } + + // do quantify + if (bIsQuantifier) + { + // 0 times + if (nMax == 0) + return GetStockElx(STOCKELX_EMPTY); + + // fixed times + if (nMin == nMax) + { + if (curr == CHART_INFO(RCHART('?'), 1) || curr == CHART_INFO(RCHART('+'), 1)) + MoveNext(); + + return Keep(new CRepeatElx(pSimple, nMin)); + } + + // range times + if (curr == CHART_INFO(RCHART('?'), 1)) + { + MoveNext(); + return Keep(new CReluctantElx(pSimple, nMin, nMax)); + } + else if (curr == CHART_INFO(RCHART('+'), 1)) + { + MoveNext(); + return Keep(new CPossessiveElx(pSimple, nMin, nMax)); + } + else + { + return Keep(new CGreedyElx(pSimple, nMin, nMax)); + } + } + + return pSimple; +} + +template ElxInterface * CBuilderT ::BuildSimple(int & flags) +{ + CBufferT fixed; + + while (curr != CHART_INFO(0, 1)) + { + if (curr.type == 0) + { + if (next == CHART_INFO(RCHART('{'), 1) || next == CHART_INFO(RCHART('?'), 1) || next == CHART_INFO(RCHART('*'), 1) || next == CHART_INFO(RCHART('+'), 1)) + { + if (fixed.GetSize() == 0) + { + fixed.Append(curr.ch, 1); + MoveNext(); + } + + break; + } + else + { + fixed.Append(curr.ch, 1); + MoveNext(); + } + } + else if (curr.type == 1) + { + CHART vch = curr.ch; + + // end of simple + if (vch == RCHART(')') || vch == RCHART('|')) + break; + + // has fixed already + if (fixed.GetSize() > 0) + break; + + // left parentheses + if (vch == RCHART('(')) + { + return BuildRecursive(flags); + } + + // char set + if (vch == RCHART('[') || vch == RCHART('.') || vch == RCHART('w') || vch == RCHART('W') || + vch == RCHART('s') || vch == RCHART('S') || vch == RCHART('d') || vch == RCHART('D') + ) + { + return BuildCharset(flags); + } + + // boundary + if (vch == RCHART('^') || vch == RCHART('$') || vch == RCHART('A') || vch == RCHART('Z') || vch == RCHART('z') || + vch == RCHART('b') || vch == RCHART('B') || vch == RCHART('G') // vch == RCHART('<') || vch == RCHART('>') + ) + { + return BuildBoundary(flags); + } + + // backref + if (vch == RCHART('\\') || vch == RCHART('k') || vch == RCHART('g')) + { + return BuildBackref(flags); + } + + // treat vchar as char + fixed.Append(curr.ch, 1); + MoveNext(); + } + } + + if (fixed.GetSize() > 0) + return Keep(new CStringElxT (fixed.GetBuffer(), fixed.GetSize(), flags & RIGHTTOLEFT, flags & IGNORECASE)); + else + return GetStockElx(STOCKELX_EMPTY); +} + +#define deelx_max(a, b) (((a) > (b)) ? (a) : (b)) +#define deelx_min(a, b) (((a) < (b)) ? (a) : (b)) + +template ElxInterface * CBuilderT ::BuildCharset(int & flags) +{ + // char + CHART ch = curr.ch; + + // skip + MoveNext(); + + switch (ch) + { + case RCHART('.'): + return GetStockElx( + (flags & RIGHTTOLEFT) ? + ((flags & SINGLELINE) ? STOCKELX_DOT_ALL_RIGHTLEFT : STOCKELX_DOT_NOT_ALL_RIGHTLEFT) : + ((flags & SINGLELINE) ? STOCKELX_DOT_ALL : STOCKELX_DOT_NOT_ALL) + ); + + case RCHART('w'): + return GetStockElx((flags & RIGHTTOLEFT) ? STOCKELX_WORD_RIGHTLEFT : STOCKELX_WORD); + + case RCHART('W'): + return GetStockElx((flags & RIGHTTOLEFT) ? STOCKELX_WORD_RIGHTLEFT_NOT : STOCKELX_WORD_NOT); + + case RCHART('s'): + return GetStockElx((flags & RIGHTTOLEFT) ? STOCKELX_SPACE_RIGHTLEFT : STOCKELX_SPACE); + + case RCHART('S'): + return GetStockElx((flags & RIGHTTOLEFT) ? STOCKELX_SPACE_RIGHTLEFT_NOT : STOCKELX_SPACE_NOT); + + case RCHART('d'): + return GetStockElx((flags & RIGHTTOLEFT) ? STOCKELX_DIGITAL_RIGHTLEFT : STOCKELX_DIGITAL); + + case RCHART('D'): + return GetStockElx((flags & RIGHTTOLEFT) ? STOCKELX_DIGITAL_RIGHTLEFT_NOT : STOCKELX_DIGITAL_NOT); + + case RCHART('['): + { + CRangeElxT * pRange; + + // create + if (curr == CHART_INFO(RCHART(':'), 1)) + { + // Backup before posix + Snapshot shot; + Backup(&shot); + + CBufferT posix; + + do + { + posix.Append(((curr.ch & (CHART)0xff) == curr.ch) ? (char)curr.ch : 0, 1); + MoveNext(); + } while (curr.ch != RCHART(0) && curr != CHART_INFO(RCHART(']'), 1)); + + MoveNext(); // skip ']' + + // posix + CPosixElxT * pposix = (CPosixElxT *) Keep(new CPosixElxT (posix.GetBuffer(), flags & RIGHTTOLEFT)); + if (pposix->m_posixfun != 0) + { + return pposix; + } + + // restore if not posix + Restore(&shot); + } + + if (curr == CHART_INFO(RCHART('^'), 1)) + { + MoveNext(); // skip '^' + pRange = (CRangeElxT *)Keep(new CRangeElxT (flags & RIGHTTOLEFT, 0)); + } + else + { + pRange = (CRangeElxT *)Keep(new CRangeElxT (flags & RIGHTTOLEFT, 1)); + } + + // parse + while (curr != CHART_INFO(0, 1) && curr != CHART_INFO(RCHART(']'), 1)) + { + ch = curr.ch; + + if (curr.type == 1 && ( + ch == RCHART('.') || ch == RCHART('w') || ch == RCHART('W') || ch == RCHART('s') || ch == RCHART('S') || ch == RCHART('d') || ch == RCHART('D') || + (ch == RCHART('[') && next == CHART_INFO(RCHART(':'), 1)) + )) + { + pRange->m_embeds.Push(BuildCharset(flags)); + } + else if (next == CHART_INFO(RCHART('-'), 1) && nex2.type == 0) + { + pRange->m_ranges.Push(ch); pRange->m_ranges.Push(nex2.ch); + + // next + MoveNext(); + MoveNext(); + MoveNext(); + } + else + { + pRange->m_chars.Push(ch); + + // next + MoveNext(); + } + } + + // skip ']' + MoveNext(); + + if (flags & IGNORECASE) + { + CBufferT & ranges = pRange->m_ranges; + index_t i, oldcount = ranges.GetSize() / 2; + + for (i = 0; i < oldcount; i++) + { + CHART newmin, newmax; + + if (ranges[i * 2] <= RCHART('Z') && ranges[i * 2 + 1] >= RCHART('A')) + { + newmin = tolower(deelx_max(RCHART('A'), ranges[i * 2])); + newmax = tolower(deelx_min(RCHART('Z'), ranges[i * 2 + 1])); + + if (newmin < ranges[i * 2] || newmax > ranges[i * 2 + 1]) + { + ranges.Push(newmin); + ranges.Push(newmax); + } + } + + if (ranges[i * 2] <= RCHART('z') && ranges[i * 2 + 1] >= RCHART('a')) + { + newmin = toupper(deelx_max(RCHART('a'), ranges[i * 2])); + newmax = toupper(deelx_min(RCHART('z'), ranges[i * 2 + 1])); + + if (newmin < ranges[i * 2] || newmax > ranges[i * 2 + 1]) + { + ranges.Push(newmin); + ranges.Push(newmax); + } + } + } + + CBufferT & chars = pRange->m_chars; + oldcount = chars.GetSize(); + for (i = 0; i < oldcount; i++) + { + if (isupper(chars[i]) && !pRange->IsContainChar(tolower(chars[i]))) + chars.Push(tolower(chars[i])); + + if (islower(chars[i]) && !pRange->IsContainChar(toupper(chars[i]))) + chars.Push(toupper(chars[i])); + } + } + + return pRange; + } + } + + return GetStockElx(STOCKELX_EMPTY); +} + +template ElxInterface * CBuilderT ::BuildRecursive(int & flags) +{ + // skip '(' + MoveNext(); + + if (curr == CHART_INFO(RCHART('?'), 1)) + { + ElxInterface * pElx = 0; + + // skip '?' + MoveNext(); + + int bNegative = 0; + CHART named_end = RCHART('>'); + + switch (curr.ch) + { + case RCHART('!'): + bNegative = 1; + + case RCHART('='): + { + MoveNext(); // skip '!' or '=' + pElx = Keep(new CAssertElx(BuildAlternative(flags & ~RIGHTTOLEFT), !bNegative)); + } + break; + + case RCHART('<'): + switch (next.ch) + { + case RCHART('!'): + bNegative = 1; + + case RCHART('='): + MoveNext(); // skip '<' + MoveNext(); // skip '!' or '=' + { + pElx = Keep(new CAssertElx(BuildAlternative(flags | RIGHTTOLEFT), !bNegative)); + } + break; + + default: // named group + break; + } + // break if assertion // else named + if (pElx != 0) break; + + case RCHART('P'): + if (curr.ch == RCHART('P')) MoveNext(); // skip 'P' + + case RCHART('\''): + if (curr.ch == RCHART('<')) named_end = RCHART('>'); + else if (curr.ch == RCHART('\'')) named_end = RCHART('\''); + MoveNext(); // skip '<' or '\'' + { + CListElx * pList = (CListElx *)Keep(new CListElx(flags & RIGHTTOLEFT)); + CBracketElx * pleft = (CBracketElx *)Keep(new CBracketElx(-1, (flags & RIGHTTOLEFT) ? 1 : 0)); + CBracketElx * pright = (CBracketElx *)Keep(new CBracketElx(-1, (flags & RIGHTTOLEFT) ? 0 : 1)); + + // save name + CBufferT & name = pleft->m_szNamed, &balancing_name = pleft->m_szBalancing, *pname = &name; + CBufferT num, balancing_num, *pnum = # + + while (curr.ch != RCHART(0) && curr.ch != named_end) + { + if (curr.ch == RCHART('-')) + { + pname = &balancing_name; + pnum = &balancing_num; + MoveNext(); + continue; + } + + pname->Append(curr.ch, 1); + pnum->Append(((curr.ch & (CHART)0xff) == curr.ch) ? (char)curr.ch : 0, 1); + MoveNext(); + } + MoveNext(); // skip '>' or '\'' + + // check + unsigned int number; + char * str = num.GetBuffer(); + + if (ReadDec(str, number) ? (*str == '\0') : 0) + { + pleft->m_nnumber = number; + pright->m_nnumber = number; + + name.Release(); + } + + str = balancing_num.GetBuffer(); + if (ReadDec(str, number) ? (*str == '\0') : 0) + { + pleft->m_balancing = number; + pright->m_balancing = number; + + balancing_name.Release(); + } + + // left, center, right + pList->m_elxlist.Push(pleft); + pList->m_elxlist.Push(BuildAlternative(flags)); + pList->m_elxlist.Push(pright); + + // named number + if (pleft->m_nnumber >= 0 || name.GetSize() > 0) + { + index_t nThisBackref = m_nNextNamed++; + m_namedlist.Prepare(nThisBackref); + m_namedlist[nThisBackref] = pList; + } + else if (pleft->m_balancing >= 0 || balancing_name.GetSize() > 0) + { + int nThisBalancing = m_nNextBalancing++; + m_purebalancinglist.Prepare(nThisBalancing, 0); + m_purebalancinglist[nThisBalancing] = pList; + } + else + { + // TODO ERROR + } + + pElx = pList; + } + break; + + case RCHART('>'): + { + MoveNext(); // skip '>' + pElx = Keep(new CIndependentElx(BuildAlternative(flags))); + } + break; + + case RCHART('R'): + MoveNext(); // skip 'R' + while (curr.ch != RCHART(0) && isspace(curr.ch)) MoveNext(); // skip space + + if (curr.ch == RCHART('<') || curr.ch == RCHART('\'')) + { + named_end = curr.ch == RCHART('<') ? RCHART('>') : RCHART('\''); + CDelegateElx * pDelegate = (CDelegateElx *)Keep(new CDelegateElx(-3)); + + MoveNext(); // skip '<' or '\\' + + // save name + CBufferT & name = pDelegate->m_szNamed; + CBufferT num; + + while (curr.ch != RCHART(0) && curr.ch != named_end) + { + name.Append(curr.ch, 1); + num.Append(((curr.ch & (CHART)0xff) == curr.ch) ? (char)curr.ch : 0, 1); + MoveNext(); + } + MoveNext(); // skip '>' or '\'' + + // check + unsigned int number; + char * str = num.GetBuffer(); + + if (ReadDec(str, number) ? (*str == '\0') : 0) + { + pDelegate->m_ndata = number; + name.Release(); + } + + m_recursivelist.Push(pDelegate); + pElx = pDelegate; + } + else + { + CBufferT rto; + while (curr.ch != RCHART(0) && curr.ch != RCHART(')')) + { + rto.Append(((curr.ch & (CHART)0xff) == curr.ch) ? (char)curr.ch : 0, 1); + MoveNext(); + } + + unsigned int rtono = 0; + char * str = rto.GetBuffer(); + ReadDec(str, rtono); + + CDelegateElx * pDelegate = (CDelegateElx *)Keep(new CDelegateElx(rtono)); + + m_recursivelist.Push(pDelegate); + pElx = pDelegate; + } + break; + + case RCHART('('): + { + CConditionElx * pConditionElx = (CConditionElx *)Keep(new CConditionElx()); + + // condition + ElxInterface * & pCondition = pConditionElx->m_pelxask; + + if (next == CHART_INFO(RCHART('?'), 1)) + { + pCondition = BuildRecursive(flags); + } + else // named, assert or number + { + MoveNext(); // skip '(' + int pos0 = curr.pos; + + // save elx condition + pCondition = Keep(new CAssertElx(BuildAlternative(flags), 1)); + + // save name + pConditionElx->m_szNamed.Append(m_pattern.GetBuffer() + pos0, curr.pos - pos0, 1); + + // save number + CBufferT numstr; + while (pos0 < curr.pos) + { + CHART ch = m_pattern[pos0]; + numstr.Append(((ch & (CHART)0xff) == ch) ? (char)ch : 0, 1); + pos0++; + } + + unsigned int number; + char * str = numstr.GetBuffer(); + + // valid group number + if (ReadDec(str, number) ? (*str == '\0') : 0) + { + pConditionElx->m_nnumber = number; + pCondition = 0; + } + else // maybe elx, maybe named + { + pConditionElx->m_nnumber = -1; + m_namedconditionlist.Push(pConditionElx); + } + + MoveNext(); // skip ')' + } + + // alternative + { + int newflags = flags; + + pConditionElx->m_pelxyes = BuildList(newflags); + } + + if (curr.ch == RCHART('|')) + { + MoveNext(); // skip '|' + + pConditionElx->m_pelxno = BuildAlternative(flags); + } + else + { + pConditionElx->m_pelxno = 0; + } + + pElx = pConditionElx; + } + break; + + default: + while (curr.ch != RCHART(0) && isspace(curr.ch)) MoveNext(); // skip space + + if (curr.ch >= RCHART('0') && curr.ch <= RCHART('9')) // recursive (?1) => (?R1) + { + CBufferT rto; + while (curr.ch != RCHART(0) && curr.ch != RCHART(')')) + { + rto.Append(((curr.ch & (CHART)0xff) == curr.ch) ? (char)curr.ch : 0, 1); + MoveNext(); + } + + unsigned int rtono = 0; + char * str = rto.GetBuffer(); + ReadDec(str, rtono); + + CDelegateElx * pDelegate = (CDelegateElx *)Keep(new CDelegateElx(rtono)); + + m_recursivelist.Push(pDelegate); + pElx = pDelegate; + } + else + { + // flag + int newflags = flags; + while (curr != CHART_INFO(0, 1) && curr.ch != RCHART(':') && curr.ch != RCHART(')') && curr != CHART_INFO(RCHART('('), 1)) + { + int tochange = 0; + + switch (curr.ch) + { + case RCHART('i'): + case RCHART('I'): + tochange = IGNORECASE; + break; + + case RCHART('s'): + case RCHART('S'): + tochange = SINGLELINE; + break; + + case RCHART('m'): + case RCHART('M'): + tochange = MULTILINE; + break; + + case RCHART('g'): + case RCHART('G'): + tochange = GLOBAL; + break; + + case RCHART('-'): + bNegative = 1; + break; + } + + if (bNegative) + newflags &= ~tochange; + else + newflags |= tochange; + + // move to next char + MoveNext(); + } + + if (curr.ch == RCHART(':') || curr == CHART_INFO(RCHART('('), 1)) + { + // skip ':' + if (curr.ch == RCHART(':')) MoveNext(); + + pElx = BuildAlternative(newflags); + } + else + { + // change parent flags + flags = newflags; + + pElx = GetStockElx(STOCKELX_EMPTY); + } + } + break; + } + + MoveNext(); // skip ')' + + return pElx; + } + else + { + // group and number + CListElx * pList = (CListElx *)Keep(new CListElx(flags & RIGHTTOLEFT)); + index_t nThisBackref = ++m_nMaxNumber; + + // left, center, right + pList->m_elxlist.Push(Keep(new CBracketElx(nThisBackref, (flags & RIGHTTOLEFT) ? 1 : 0))); + pList->m_elxlist.Push(BuildAlternative(flags)); + pList->m_elxlist.Push(Keep(new CBracketElx(nThisBackref, (flags & RIGHTTOLEFT) ? 0 : 1))); + + // for recursive + m_grouplist.Prepare(nThisBackref); + m_grouplist[nThisBackref] = pList; + + // right + MoveNext(); // skip ')' + + return pList; + } +} + +template ElxInterface * CBuilderT ::BuildBoundary(int & flags) +{ + // char + CHART ch = curr.ch; + + // skip + MoveNext(); + + switch (ch) + { + case RCHART('^'): + return Keep(new CBoundaryElxT ((flags & MULTILINE) ? BOUNDARY_LINE_BEGIN : BOUNDARY_FILE_BEGIN)); + + case RCHART('$'): + return Keep(new CBoundaryElxT ((flags & MULTILINE) ? BOUNDARY_LINE_END : BOUNDARY_FILE_END)); + + case RCHART('b'): + return Keep(new CBoundaryElxT (BOUNDARY_WORD_EDGE)); + + case RCHART('B'): + return Keep(new CBoundaryElxT (BOUNDARY_WORD_EDGE, 0)); + + case RCHART('A'): + return Keep(new CBoundaryElxT (BOUNDARY_FILE_BEGIN)); + + case RCHART('Z'): + return Keep(new CBoundaryElxT (BOUNDARY_FILE_END_N)); + + case RCHART('z'): + return Keep(new CBoundaryElxT (BOUNDARY_FILE_END)); + + case RCHART('G'): + if (flags & GLOBAL) + return Keep(new CGlobalElx()); + else + return GetStockElx(STOCKELX_EMPTY); + + default: + return GetStockElx(STOCKELX_EMPTY); + } +} + +template ElxInterface * CBuilderT ::BuildBackref(int & flags) +{ + // skip '\\' or '\k' or '\g' + MoveNext(); + + if (curr.ch == RCHART('<') || curr.ch == RCHART('\'')) + { + CHART named_end = curr.ch == RCHART('<') ? RCHART('>') : RCHART('\''); + CBackrefElxT * pbackref = (CBackrefElxT *)Keep(new CBackrefElxT (-1, flags & RIGHTTOLEFT, flags & IGNORECASE)); + + MoveNext(); // skip '<' or '\'' + + // save name + CBufferT & name = pbackref->m_szNamed; + CBufferT num; + + while (curr.ch != RCHART(0) && curr.ch != named_end) + { + name.Append(curr.ch, 1); + num.Append(((curr.ch & (CHART)0xff) == curr.ch) ? (char)curr.ch : 0, 1); + MoveNext(); + } + MoveNext(); // skip '>' or '\'' + + // check + unsigned int number; + char * str = num.GetBuffer(); + + if (ReadDec(str, number) ? (*str == '\0') : 0) + { + pbackref->m_nnumber = number; + name.Release(); + } + else + { + m_namedbackreflist.Push(pbackref); + } + + return pbackref; + } + else + { + unsigned int nbackref = 0; + + for (int i = 0; i < 3; i++) + { + if (curr.ch >= RCHART('0') && curr.ch <= RCHART('9')) + nbackref = nbackref * 10 + (curr.ch - RCHART('0')); + else + break; + + MoveNext(); + } + + return Keep(new CBackrefElxT (nbackref, flags & RIGHTTOLEFT, flags & IGNORECASE)); + } +} + +template int CBuilderT ::ReadDec(char * & str, unsigned int & dec) +{ + int s = 0; + while (str[s] != 0 && isspace(str[s])) s++; + + if (str[s] < '0' || str[s] > '9') return 0; + + dec = 0; + unsigned int i; + + for (i = s; i < sizeof(CHART) * 3 + s; i++) + { + if (str[i] >= '0' && str[i] <= '9') + dec = dec * 10 + (str[i] - '0'); + else + break; + } + + while (str[i] != 0 && isspace(str[i])) i++; + str += i; + + return 1; +} + +// +// Regexp +// +template class CRegexpT +{ +public: + CRegexpT(const CHART * pattern = 0, int flags = 0); + CRegexpT(const CHART * pattern, index_t length, int flags); + void Compile(const CHART * pattern, int flags = 0); + void Compile(const CHART * pattern, index_t length, int flags); + +public: + MatchResult MatchExact(const CHART * tstring, CContext * pContext = 0) const; + MatchResult MatchExact(const CHART * tstring, int length, CContext * pContext = 0) const; + MatchResult Match(const CHART * tstring, int start = -1, CContext * pContext = 0) const; + MatchResult Match(const CHART * tstring, int length, int start, CContext * pContext = 0) const; + MatchResult Match(CContext * pContext) const; + CContext * PrepareMatch(const CHART * tstring, index_t start = -1, CContext * pContext = 0) const; + CContext * PrepareMatch(const CHART * tstring, index_t length, index_t start, CContext * pContext = 0) const; + CHART * Replace(const CHART * tstring, const CHART * replaceto, index_t start = -1, int ntimes = -1, MatchResult * result = 0, CContext * pContext = 0) const; + CHART * Replace(const CHART * tstring, index_t string_length, const CHART * replaceto, index_t to_length, index_t & result_length, index_t start = -1, int ntimes = -1, MatchResult * result = 0, CContext * pContext = 0) const; + int GetNamedGroupNumber(const CHART * group_name) const; + +public: + static void ReleaseString(CHART * tstring); + static void ReleaseContext(CContext * pContext); + +public: + CBuilderT m_builder; +}; + +// +// Implementation +// +template CRegexpT ::CRegexpT(const CHART * pattern, int flags) +{ + Compile(pattern, CBufferRefT(pattern).GetSize(), flags); +} + +template CRegexpT ::CRegexpT(const CHART * pattern, index_t length, int flags) +{ + Compile(pattern, length, flags); +} + +template inline void CRegexpT ::Compile(const CHART * pattern, int flags) +{ + Compile(pattern, CBufferRefT(pattern).GetSize(), flags); +} + +template void CRegexpT ::Compile(const CHART * pattern, index_t length, int flags) +{ + m_builder.Clear(); + if (pattern != 0) m_builder.Build(CBufferRefT(pattern, length), flags); +} + +template inline MatchResult CRegexpT ::MatchExact(const CHART * tstring, CContext * pContext) const +{ + return MatchExact(tstring, CBufferRefT(tstring).GetSize(), pContext); +} + +template MatchResult CRegexpT ::MatchExact(const CHART * tstring, int length, CContext * pContext) const +{ + if (m_builder.m_pTopElx == 0) + return 0; + + // info + int endpos = 0; + + CContext context; + if (pContext == 0) pContext = &context; + + pContext->m_stack.Restore(0); + pContext->m_capturestack.Restore(0); + pContext->m_captureindex.Restore(0); + + pContext->m_nParenZindex = 0; + pContext->m_nLastBeginPos = -1; + pContext->m_pMatchString = (void*)tstring; + pContext->m_pMatchStringLength = length; + pContext->m_nCursiveLimit = 100; + + if (m_builder.m_nFlags & RIGHTTOLEFT) + { + pContext->m_nBeginPos = length; + pContext->m_nCurrentPos = length; + endpos = 0; + } + else + { + pContext->m_nBeginPos = 0; + pContext->m_nCurrentPos = 0; + endpos = length; + } + + pContext->m_captureindex.Prepare(m_builder.m_nMaxNumber, -1); + pContext->m_captureindex[0] = 0; + pContext->m_capturestack.Push(0); + pContext->m_capturestack.Push(pContext->m_nCurrentPos); + pContext->m_capturestack.Push(-1); + pContext->m_capturestack.Push(-1); + + // match + if (!m_builder.m_pTopElx->Match(pContext)) + return 0; + else + { + while (pContext->m_nCurrentPos != endpos) + { + if (!m_builder.m_pTopElx->MatchNext(pContext)) + return 0; + else + { + if (pContext->m_nLastBeginPos == pContext->m_nBeginPos && pContext->m_nBeginPos == pContext->m_nCurrentPos) + return 0; + else + pContext->m_nLastBeginPos = pContext->m_nCurrentPos; + } + } + + // end pos + pContext->m_capturestack[2] = pContext->m_nCurrentPos; + + return MatchResult(pContext, m_builder.m_nMaxNumber); + } +} + +template MatchResult CRegexpT ::Match(const CHART * tstring, int start, CContext * pContext) const +{ + return Match(tstring, CBufferRefT(tstring).GetSize(), start, pContext); +} + +template MatchResult CRegexpT ::Match(const CHART * tstring, int length, int start, CContext * pContext) const +{ + if (m_builder.m_pTopElx == 0) + return 0; + + CContext context; + if (pContext == 0) pContext = &context; + + PrepareMatch(tstring, length, start, pContext); + + return Match(pContext); +} + +template MatchResult CRegexpT ::Match(CContext * pContext) const +{ + if (m_builder.m_pTopElx == 0) + return 0; + + index_t endpos; + int delta; + + if (m_builder.m_nFlags & RIGHTTOLEFT) + { + endpos = -1; + delta = -1; + } + else + { + endpos = pContext->m_pMatchStringLength + 1; + delta = 1; + } + + while (pContext->m_nCurrentPos != endpos) + { + pContext->m_captureindex.Restore(0); + pContext->m_stack.Restore(0); + pContext->m_capturestack.Restore(0); + + pContext->m_captureindex.Prepare(m_builder.m_nMaxNumber, -1); + pContext->m_captureindex[0] = 0; + pContext->m_capturestack.Push(0); + pContext->m_capturestack.Push(pContext->m_nCurrentPos); + pContext->m_capturestack.Push(-1); + pContext->m_capturestack.Push(-1); + + if (m_builder.m_pTopElx->Match(pContext)) + { + pContext->m_capturestack[2] = pContext->m_nCurrentPos; + + // zero width + if (pContext->m_capturestack[1] == pContext->m_nCurrentPos) + { + pContext->m_nCurrentPos += delta; + } + + // save pos + pContext->m_nLastBeginPos = pContext->m_nBeginPos; + pContext->m_nBeginPos = pContext->m_nCurrentPos; + + // return + return MatchResult(pContext, m_builder.m_nMaxNumber); + } + else + { + pContext->m_nCurrentPos += delta; + } + } + + return 0; +} + +template inline CContext * CRegexpT ::PrepareMatch(const CHART * tstring, index_t start, CContext * pContext) const +{ + return PrepareMatch(tstring, CBufferRefT(tstring).GetSize(), start, pContext); +} + +template CContext * CRegexpT ::PrepareMatch(const CHART * tstring, index_t length, index_t start, CContext * pContext) const +{ + if (m_builder.m_pTopElx == 0) + return 0; + + if (pContext == 0) pContext = new CContext(); + + pContext->m_nParenZindex = 0; + pContext->m_nLastBeginPos = -1; + pContext->m_pMatchString = (void*)tstring; + pContext->m_pMatchStringLength = length; + pContext->m_nCursiveLimit = 100; + + if (start < 0) + { + if (m_builder.m_nFlags & RIGHTTOLEFT) + { + pContext->m_nBeginPos = length; + pContext->m_nCurrentPos = length; + } + else + { + pContext->m_nBeginPos = 0; + pContext->m_nCurrentPos = 0; + } + } + else + { + if (start > length) start = length + ((m_builder.m_nFlags & RIGHTTOLEFT) ? 0 : 1); + + pContext->m_nBeginPos = start; + pContext->m_nCurrentPos = start; + } + + return pContext; +} + +template inline int CRegexpT ::GetNamedGroupNumber(const CHART * group_name) const +{ + return m_builder.GetNamedNumber(group_name); +} + +template CHART * CRegexpT ::Replace(const CHART * tstring, const CHART * replaceto, index_t start, int ntimes, MatchResult * result, CContext * pContext) const +{ + index_t result_length = 0; + return Replace(tstring, CBufferRefT(tstring).GetSize(), replaceto, CBufferRefT(replaceto).GetSize(), result_length, start, ntimes, result, pContext); +} + +template CHART * CRegexpT ::Replace(const CHART * tstring, index_t string_length, const CHART * replaceto, index_t to_length, index_t & result_length, index_t start, int ntimes, MatchResult * remote_result, CContext * oContext) const +{ + if (m_builder.m_pTopElx == 0) return 0; + + // --- compile replace to --- + + CBufferT compiledto; + + static const CHART rtoptn[] = { RCHART('\\'), RCHART('$'), RCHART('('), RCHART('?'), RCHART(':'), RCHART('['), RCHART('$'), RCHART('&'), RCHART('`'), RCHART('\''), RCHART('+'), RCHART('_'), RCHART('\\'), RCHART('d'), RCHART(']'), RCHART('|'), RCHART('\\'), RCHART('{'), RCHART('.'), RCHART('*'), RCHART('?'), RCHART('\\'), RCHART('}'), RCHART(')'), RCHART('\0') }; + static CRegexpT rtoreg(rtoptn); + + MatchResult local_result(0), *result = remote_result ? remote_result : &local_result; + + // prepare + CContext * pContext = rtoreg.PrepareMatch(replaceto, to_length, -1, oContext); + index_t lastIndex = 0; + index_t nmatch = 0; + + while (((*result) = rtoreg.Match(pContext)).IsMatched()) + { + index_t delta = result->GetStart() - lastIndex; + if (delta > 0) + { + delta = result->GetStart() - lastIndex; + compiledto.Push(lastIndex); + compiledto.Push(delta); + } + + lastIndex = result->GetStart(); + delta = 2; + + switch (replaceto[lastIndex + 1]) + { + case RCHART('$'): + compiledto.Push(lastIndex); + compiledto.Push(1); + break; + + case RCHART('&'): + case RCHART('`'): + case RCHART('\''): + case RCHART('+'): + case RCHART('_'): + compiledto.Push(-1); + compiledto.Push((int)replaceto[lastIndex + 1]); + break; + + case RCHART('{'): + delta = result->GetEnd() - result->GetStart(); + nmatch = m_builder.GetNamedNumber(CBufferRefT (replaceto + (lastIndex + 2), delta - 3)); + + if (nmatch > 0 && nmatch <= m_builder.m_nMaxNumber) + { + compiledto.Push(-2); + compiledto.Push(nmatch); + } + else + { + compiledto.Push(lastIndex); + compiledto.Push(delta); + } + break; + + default: + nmatch = 0; + for (delta = 1; delta <= 3; delta++) + { + CHART ch = replaceto[lastIndex + delta]; + + if (ch < RCHART('0') || ch > RCHART('9')) + break; + + nmatch = nmatch * 10 + (ch - RCHART('0')); + } + + if (nmatch > m_builder.m_nMaxNumber) + { + while (nmatch > m_builder.m_nMaxNumber) + { + nmatch /= 10; + delta--; + } + + if (nmatch == 0) + { + delta = 1; + } + } + + if (delta == 1) + { + compiledto.Push(lastIndex); + compiledto.Push(1); + } + else + { + compiledto.Push(-2); + compiledto.Push(nmatch); + } + break; + } + + lastIndex += delta; + } + + if (lastIndex < to_length) + { + compiledto.Push(lastIndex); + compiledto.Push(to_length - lastIndex); + } + + int rightleft = m_builder.m_nFlags & RIGHTTOLEFT; + + index_t cmplSize = compiledto.GetSize(); + index_t tb = rightleft ? cmplSize - 2 : 0; + index_t te = rightleft ? -2 : cmplSize; + index_t ts = rightleft ? -2 : 2; + + // --- compile complete --- + + index_t beginpos = rightleft ? string_length : 0; + index_t endpos = rightleft ? 0 : string_length; + + index_t toIndex0 = 0; + index_t toIndex1 = 0; + index_t i; + int ntime; + + CBufferT buffer; + + // prepare + pContext = PrepareMatch(tstring, string_length, start, pContext); + lastIndex = beginpos; + + // Match + for (ntime = 0; ntimes < 0 || ntime < ntimes; ntime++) + { + (*result) = Match(pContext); + + if (!result->IsMatched()) + break; + + // before + if (rightleft) + { + index_t distance = lastIndex - result->GetEnd(); + if (distance) + { + buffer.Push(tstring + result->GetEnd()); + buffer.Push((const CHART *)distance); + + toIndex1 -= distance; + } + lastIndex = result->GetStart(); + } + else + { + index_t distance = result->GetStart() - lastIndex; + if (distance) + { + buffer.Push(tstring + lastIndex); + buffer.Push((const CHART *)distance); + + toIndex1 += distance; + } + lastIndex = result->GetEnd(); + } + + toIndex0 = toIndex1; + + // middle + for (i = tb; i != te; i += ts) + { + index_t off = compiledto[i]; + index_t len = compiledto[i + 1]; + + const CHART * sub = replaceto + off; + + if (off == -1) + { + switch (RCHART(len)) + { + case RCHART('&'): + sub = tstring + result->GetStart(); + len = result->GetEnd() - result->GetStart(); + break; + + case RCHART('`'): + sub = tstring; + len = result->GetStart(); + break; + + case RCHART('\''): + sub = tstring + result->GetEnd(); + len = string_length - result->GetEnd(); + break; + + case RCHART('+'): + for (nmatch = result->MaxGroupNumber(); nmatch >= 0; nmatch--) + { + if (result->GetGroupStart(nmatch) >= 0) break; + } + sub = tstring + result->GetGroupStart(nmatch); + len = result->GetGroupEnd(nmatch) - result->GetGroupStart(nmatch); + break; + + case RCHART('_'): + sub = tstring; + len = string_length; + break; + } + } + else if (off == -2) + { + //TODO:@@@ check to use nmatch instead of len here ??? + int l = static_cast(len); + sub = tstring + result->GetGroupStart(l); + len = result->GetGroupEnd(l) - result->GetGroupStart(l); + } + + buffer.Push(sub); + buffer.Push((const CHART *)len); + + toIndex1 += rightleft ? (-len) : len; + } + } + + // after + if (rightleft) + { + if (endpos < lastIndex) + { + buffer.Push(tstring + endpos); + buffer.Push((const CHART *)(lastIndex - endpos)); + } + } + else + { + if (lastIndex < endpos) + { + buffer.Push(tstring + lastIndex); + buffer.Push((const CHART *)(endpos - lastIndex)); + } + } + + if (oContext == 0) ReleaseContext(pContext); + + // join string + result_length = 0; + for (i = 0; i < buffer.GetSize(); i += 2) + { + result_length += (index_t)buffer[i + 1]; + } + + CBufferT result_string; + result_string.Prepare(result_length); + result_string.Restore(0); + + if (rightleft) + { + for (i = buffer.GetSize() - 2; i >= 0; i -= 2) + { + result_string.Append(buffer[i], (index_t)buffer[i + 1]); + } + } + else + { + for (i = 0; i < buffer.GetSize(); i += 2) + { + result_string.Append(buffer[i], (index_t)buffer[i + 1]); + } + } + + result_string.Append(0); + + result->m_result.Append(result_length, 3); + result->m_result.Append(ntime); + + if (rightleft) + { + result->m_result.Append(result_length - toIndex1); + result->m_result.Append(result_length - toIndex0); + } + else + { + result->m_result.Append(toIndex0); + result->m_result.Append(toIndex1); + } + + return result_string.Detach(); +} + +template inline void CRegexpT ::ReleaseString(CHART * tstring) +{ + if (tstring != 0) free(tstring); +} + +template inline void CRegexpT ::ReleaseContext(CContext * pContext) +{ + if (pContext != 0) delete pContext; +} + +// +// All implementations +// +template CAlternativeElxT ::CAlternativeElxT() +{} + +template int CAlternativeElxT ::Match(CContext * pContext) const +{ + if (m_elxlist.GetSize() == 0) + return 1; + + // try all + for (int n = 0; n < m_elxlist.GetSize(); n++) + { + if (m_elxlist[n]->Match(pContext)) + { + pContext->m_stack.Push(n); + return 1; + } + } + + return 0; +} + +template int CAlternativeElxT ::MatchNext(CContext * pContext) const +{ + if (m_elxlist.GetSize() == 0) + return 0; + + index_t n = 0; + + // recall prev + pContext->m_stack.Pop(n); + + // prev + if (m_elxlist[n]->MatchNext(pContext)) + { + pContext->m_stack.Push(n); + return 1; + } + else + { + // try rest + for (n++; n < m_elxlist.GetSize(); n++) + { + if (m_elxlist[n]->Match(pContext)) + { + pContext->m_stack.Push(n); + return 1; + } + } + + return 0; + } +} + +// assertx.cpp: implementation of the CAssertElx class. +// +template CAssertElxT ::CAssertElxT(ElxInterface * pelx, int byes) +{ + m_pelx = pelx; + m_byes = byes; +} + +template int CAssertElxT ::Match(CContext * pContext) const +{ + index_t nbegin = pContext->m_nCurrentPos; + index_t nsize = pContext->m_stack.GetSize(); + index_t ncsize = pContext->m_capturestack.GetSize(); + int bsucc; + + // match + if (m_byes) + bsucc = m_pelx->Match(pContext); + else + bsucc = !m_pelx->Match(pContext); + + // status + pContext->m_stack.Restore(nsize); + pContext->m_nCurrentPos = nbegin; + + if (bsucc) + pContext->m_stack.Push(ncsize); + else + pContext->m_capturestack.Restore(ncsize); + + return bsucc; +} + +template int CAssertElxT ::MatchNext(CContext * pContext) const +{ + index_t ncsize = 0; + + pContext->m_stack.Pop(ncsize); + pContext->m_capturestack.Restore(ncsize); + + return 0; +} + +// emptyelx.cpp: implementation of the CEmptyElx class. +// +template CEmptyElxT ::CEmptyElxT() +{} + +template int CEmptyElxT ::Match(CContext *) const +{ + return 1; +} + +template int CEmptyElxT ::MatchNext(CContext *) const +{ + return 0; +} + +// globalx.cpp: implementation of the CGlobalElx class. +// +template CGlobalElxT ::CGlobalElxT() +{} + +template int CGlobalElxT ::Match(CContext * pContext) const +{ + return pContext->m_nCurrentPos == pContext->m_nBeginPos; +} + +template int CGlobalElxT ::MatchNext(CContext *) const +{ + return 0; +} + +// greedelx.cpp: implementation of the CGreedyElx class. +// +template CGreedyElxT ::CGreedyElxT(ElxInterface * pelx, int nmin, int nmax) : CRepeatElxT (pelx, nmin) +{ + m_nvart = nmax - nmin; +} + +template int CGreedyElxT ::Match(CContext * pContext) const +{ + if (!CRepeatElxT ::MatchFixed(pContext)) + return 0; + + while (!MatchVart(pContext)) + { + if (!CRepeatElxT ::MatchNextFixed(pContext)) + return 0; + } + + return 1; +} + +template int CGreedyElxT ::MatchNext(CContext * pContext) const +{ + if (MatchNextVart(pContext)) + return 1; + + if (!CRepeatElxT ::MatchNextFixed(pContext)) + return 0; + + while (!MatchVart(pContext)) + { + if (!CRepeatElxT ::MatchNextFixed(pContext)) + return 0; + } + + return 1; +} + +template int CGreedyElxT ::MatchVart(CContext * pContext) const +{ + int n = 0; + index_t nbegin00 = pContext->m_nCurrentPos; + index_t nsize = pContext->m_stack.GetSize(); + index_t ncsize = pContext->m_capturestack.GetSize(); + + while (n < m_nvart && CRepeatElx::MatchForward(pContext)) + { + n++; + } + + pContext->m_stack.Push(ncsize); + pContext->m_stack.Push(nsize); + pContext->m_stack.Push(pContext->m_nCurrentPos); + pContext->m_stack.Push(1); + pContext->m_stack.Push(nbegin00); + pContext->m_stack.Push(n); + + return 1; +} + +template int CGreedyElxT ::MatchNextVart(CContext * pContext) const +{ + index_t n, nbegin00, nsize, ncsize; + CSortedBufferT nbegin99; + pContext->m_stack.Pop(n); + pContext->m_stack.Pop(nbegin00); + pContext->m_stack.Pop(nbegin99); + pContext->m_stack.Pop(nsize); + pContext->m_stack.Pop(ncsize); + + if (n == 0) return 0; + + index_t n0 = n; + + if (!CRepeatElxT::m_pelx->MatchNext(pContext)) + { + n--; + } + + // not to re-match + else if (pContext->m_nCurrentPos == nbegin00) + { + pContext->m_stack.Restore(nsize); + pContext->m_capturestack.Restore(ncsize); + pContext->m_nCurrentPos = nbegin00; + + return 0; + } + + // fix 2012-10-26, thanks to chenlx01@sohu.com + else + { + CContextShot shot(pContext); + + while (n < m_nvart && CRepeatElx::MatchForward(pContext)) + { + n++; + } + + if (nbegin99.Find(pContext->m_nCurrentPos) >= 0) + { + shot.Restore(pContext); + n = n0; + } + else + { + nbegin99.Add(pContext->m_nCurrentPos); + } + } + + pContext->m_stack.Push(ncsize); + pContext->m_stack.Push(nsize); + pContext->m_stack.Push(nbegin99); + pContext->m_stack.Push(nbegin00); + pContext->m_stack.Push(n); + + return 1; +} + +// indepelx.cpp: implementation of the CIndependentElx class. +// +template CIndependentElxT ::CIndependentElxT(ElxInterface * pelx) +{ + m_pelx = pelx; +} + +template int CIndependentElxT ::Match(CContext * pContext) const +{ + index_t nbegin = pContext->m_nCurrentPos; + index_t nsize = pContext->m_stack.GetSize(); + index_t ncsize = pContext->m_capturestack.GetSize(); + + // match + int bsucc = m_pelx->Match(pContext); + + // status + pContext->m_stack.Restore(nsize); + + if (bsucc) + { + pContext->m_stack.Push(nbegin); + pContext->m_stack.Push(ncsize); + } + + return bsucc; +} + +template int CIndependentElxT ::MatchNext(CContext * pContext) const +{ + index_t nbegin = 0, ncsize = 0; + + pContext->m_stack.Pop(ncsize); + pContext->m_stack.Pop(nbegin); + + pContext->m_capturestack.Restore(ncsize); + pContext->m_nCurrentPos = nbegin; + + return 0; +} + +// listelx.cpp: implementation of the CListElx class. +// +template CListElxT ::CListElxT(int brightleft) +{ + m_brightleft = brightleft; +} + +template int CListElxT ::Match(CContext * pContext) const +{ + if (m_elxlist.GetSize() == 0) + return 1; + + // prepare + index_t bol = m_brightleft ? m_elxlist.GetSize() : -1; + index_t stp = m_brightleft ? -1 : 1; + index_t eol = m_brightleft ? -1 : m_elxlist.GetSize(); + + // from first + index_t n = bol + stp; + + // match all + while (n != eol) + { + if (m_elxlist[n]->Match(pContext)) + { + n += stp; + } + else + { + n -= stp; + + while (n != bol && !m_elxlist[n]->MatchNext(pContext)) + n -= stp; + + if (n != bol) + n += stp; + else + return 0; + } + } + + return 1; +} + +template int CListElxT ::MatchNext(CContext * pContext) const +{ + if (m_elxlist.GetSize() == 0) + return 0; + + // prepare + index_t bol = m_brightleft ? m_elxlist.GetSize() : -1; + index_t stp = m_brightleft ? -1 : 1; + index_t eol = m_brightleft ? -1 : m_elxlist.GetSize(); + + // from last + index_t n = eol - stp; + + while (n != bol && !m_elxlist[n]->MatchNext(pContext)) + n -= stp; + + if (n != bol) + n += stp; + else + return 0; + + // match rest + while (n != eol) + { + if (m_elxlist[n]->Match(pContext)) + { + n += stp; + } + else + { + n -= stp; + + while (n != bol && !m_elxlist[n]->MatchNext(pContext)) + n -= stp; + + if (n != bol) + n += stp; + else + return 0; + } + } + + return 1; +} + +// mresult.cpp: implementation of the MatchResult class. +// +template MatchResultT ::MatchResultT(CContext * pContext, index_t nMaxNumber) +{ + if (pContext != 0) + { + m_result.Prepare(nMaxNumber * 2 + 3, -1); + + // matched + m_result[0] = 1; + m_result[1] = nMaxNumber; + + for (int n = 0; n <= nMaxNumber; n++) + { + index_t index = pContext->m_captureindex[n]; + //if( index < 0 ) continue; + if (!CBracketElxT::CheckCaptureIndex(index, pContext, n)) continue; + // check enclosed + index_t pos1 = pContext->m_capturestack[index + 1]; + index_t pos2 = pContext->m_capturestack[index + 2]; + + // info + m_result[n * 2 + 2] = pos1 < pos2 ? pos1 : pos2; + m_result[n * 2 + 3] = pos1 < pos2 ? pos2 : pos1; + } + } +} + +template inline int MatchResultT ::IsMatched() const +{ + return static_cast(m_result.At(0, 0)); +} + +template inline index_t MatchResultT ::MaxGroupNumber() const +{ + return m_result.At(1, 0); +} + +template inline index_t MatchResultT ::GetStart() const +{ + return m_result.At(2, -1); +} + +template inline index_t MatchResultT ::GetEnd() const +{ + return m_result.At(3, -1); +} + +template inline index_t MatchResultT ::GetGroupStart(index_t nGroupNumber) const +{ + return m_result.At(2 + nGroupNumber * 2, -1); +} + +template inline index_t MatchResultT ::GetGroupEnd(index_t nGroupNumber) const +{ + return m_result.At(2 + nGroupNumber * 2 + 1, -1); +} + +template MatchResultT & MatchResultT :: operator = (const MatchResultT & result) +{ + m_result.Restore(0); + if (result.m_result.GetSize() > 0) m_result.Append(result.m_result.GetBuffer(), result.m_result.GetSize()); + + return *this; +} + +// posselx.cpp: implementation of the CPossessiveElx class. +// +template CPossessiveElxT ::CPossessiveElxT(ElxInterface * pelx, int nmin, int nmax) : CGreedyElxT (pelx, nmin, nmax) +{} + +template int CPossessiveElxT ::Match(CContext * pContext) const +{ + index_t nbegin = pContext->m_nCurrentPos; + index_t nsize = pContext->m_stack.GetSize(); + index_t ncsize = pContext->m_capturestack.GetSize(); + int bsucc = 1; + + // match + if (!CRepeatElxT ::MatchFixed(pContext)) + { + bsucc = 0; + } + else + { + while (!CGreedyElxT ::MatchVart(pContext)) + { + if (!CRepeatElxT ::MatchNextFixed(pContext)) + { + bsucc = 0; + break; + } + } + } + + // status + pContext->m_stack.Restore(nsize); + + if (bsucc) + { + pContext->m_stack.Push(nbegin); + pContext->m_stack.Push(ncsize); + } + + return bsucc; +} + +template int CPossessiveElxT ::MatchNext(CContext * pContext) const +{ + index_t nbegin = 0, ncsize = 0; + + pContext->m_stack.Pop(ncsize); + pContext->m_stack.Pop(nbegin); + + pContext->m_capturestack.Restore(ncsize); + pContext->m_nCurrentPos = nbegin; + + return 0; +} + +// reluctx.cpp: implementation of the CReluctantElx class. +// +template CReluctantElxT ::CReluctantElxT(ElxInterface * pelx, int nmin, int nmax) : CRepeatElxT (pelx, nmin) +{ + m_nvart = nmax - nmin; +} + +template int CReluctantElxT ::Match(CContext * pContext) const +{ + if (!CRepeatElxT ::MatchFixed(pContext)) + return 0; + + while (!MatchVart(pContext)) + { + if (!CRepeatElxT ::MatchNextFixed(pContext)) + return 0; + } + + return 1; +} + +template int CReluctantElxT ::MatchNext(CContext * pContext) const +{ + if (MatchNextVart(pContext)) + return 1; + + if (!CRepeatElxT ::MatchNextFixed(pContext)) + return 0; + + while (!MatchVart(pContext)) + { + if (!CRepeatElxT ::MatchNextFixed(pContext)) + return 0; + } + + return 1; +} + +template int CReluctantElxT ::MatchVart(CContext * pContext) +{ + pContext->m_stack.Push(0); + + return 1; +} + +template int CReluctantElxT ::MatchNextVart(CContext * pContext) const +{ + index_t n = 0, nbegin = pContext->m_nCurrentPos; + + pContext->m_stack.Pop(n); + + if (n < m_nvart && CRepeatElxT ::m_pelx->Match(pContext)) + { + while (pContext->m_nCurrentPos == nbegin) + { + if (!CRepeatElxT ::m_pelx->MatchNext(pContext)) break; + } + + if (pContext->m_nCurrentPos != nbegin) + { + n++; + + pContext->m_stack.Push(nbegin); + pContext->m_stack.Push(n); + + return 1; + } + } + + while (n > 0) + { + pContext->m_stack.Pop(nbegin); + + while (CRepeatElxT ::m_pelx->MatchNext(pContext)) + { + if (pContext->m_nCurrentPos != nbegin) + { + pContext->m_stack.Push(nbegin); + pContext->m_stack.Push(n); + + return 1; + } + } + + n--; + } + + return 0; +} + +// repeatx.cpp: implementation of the CRepeatElx class. +// +template CRepeatElxT ::CRepeatElxT(ElxInterface * pelx, int ntimes) +{ + m_pelx = pelx; + m_nfixed = ntimes; +} + +template int CRepeatElxT ::Match(CContext * pContext) const +{ + return MatchFixed(pContext); +} + +template int CRepeatElxT ::MatchNext(CContext * pContext) const +{ + return MatchNextFixed(pContext); +} + +template int CRepeatElxT ::MatchFixed(CContext * pContext) const +{ + if (m_nfixed == 0) + return 1; + + int n = 0; + + while (n < m_nfixed) + { + if (m_pelx->Match(pContext)) + { + n++; + } + else + { + n--; + + while (n >= 0 && !m_pelx->MatchNext(pContext)) + n--; + + if (n >= 0) + n++; + else + return 0; + } + } + + return 1; +} + +template int CRepeatElxT ::MatchNextFixed(CContext * pContext) const +{ + if (m_nfixed == 0) + return 0; + + // from last + int n = m_nfixed - 1; + + while (n >= 0 && !m_pelx->MatchNext(pContext)) + n--; + + if (n >= 0) + n++; + else + return 0; + + // match rest + while (n < m_nfixed) + { + if (m_pelx->Match(pContext)) + { + n++; + } + else + { + n--; + + while (n >= 0 && !m_pelx->MatchNext(pContext)) + n--; + + if (n >= 0) + n++; + else + return 0; + } + } + + return 1; +} + +// Regexp +typedef CRegexpT CRegexpA; +typedef CRegexpT CRegexpW; + +#if defined(_UNICODE) || defined(UNICODE) +typedef CRegexpW CRegexp; +#else +typedef CRegexpA CRegexp; +#endif + +} + +#endif//__DEELX_REGEXP64__H__ diff --git a/scintilla/deelx/doc/Deelx - Regular Expression Engine - Features.url b/scintilla/deelx/doc/Deelx - Regular Expression Engine - Features.url new file mode 100644 index 000000000..d4f73160e --- /dev/null +++ b/scintilla/deelx/doc/Deelx - Regular Expression Engine - Features.url @@ -0,0 +1,2 @@ +[InternetShortcut] +URL=http://www.regexlab.com/en/deelx/introidx.htm diff --git a/scintilla/deelx/doc/Deelx Regular Expression Syntax Reference.txt b/scintilla/deelx/doc/Deelx Regular Expression Syntax Reference.txt new file mode 100644 index 000000000..003c5821f --- /dev/null +++ b/scintilla/deelx/doc/Deelx Regular Expression Syntax Reference.txt @@ -0,0 +1,426 @@ +Regular Expression Syntax Reference http://www.regexlab.com/en/regref.htm + + [All rights reserved: http://www.regexlab.com/en/regref.htm] + [Author: sswater shi (sswater@gmail.com)] + + + Introduction + + Regular expression is to express a characteristic in a string, and then to match another string + with the characteristic. For example, pattern "ab+" means "one 'a' and at least one 'b' ", so "ab", + "abb", "abbbbbbb" match the pattern. + + Regular expression is used to : (1) test a string whether it matches a pattern, such as a email + address. (2) to find a substring which matches certain pattern, from a whole text. (3) to do + complex replacement in a text. + + It is very simple to study regular expression syntax, and the few abstract concepts can be + understood easily too. Many articles does not introduce its concepts from simple ones to + abstract ones step by step, so some persons may feel it is difficult to study. On the other hand, + each regular expression engine's document will describe its special function, but this part of + special function is not what we should study first. + + + 1. Regular Expression Basic Syntax + + 1.1 Common Characters + + Letters, numbers, the underline, and punctuations with no special definition are "common + characters". When regular expression matches a string, a common character can match the + same character. + + - Example1: When pattern "c" matches string "abcde", match result: success; substring + matched: "c"; position: starts at 2, ends at 3. + + - Example2: When pattern "bcd" matches string "abcde",match result: success; substring + matched: "bcd"; position: starts at 1, ends at 4. + + + 1.2 Simple escaped characters + + Nonprinting characters which we know: + + Expression Matches + \r, \n Carriage return, newline character + \t Tabs + \\ Matches "\" itself + + Some punctuations are specially defined in regular expression. To match these characters in + string, add "\" in pattern. For example: ^, $ has special definition, so we need to use "\^" and + "\$" to match them. + + + Expression Matches + \^ Matches "^" itself + \$ Matches "$" itself + + \. Matches dot(.) itself + + These escaped characters have the same effect as "common characters": to match a certain + character. + + + - Example1: When pattern "\$d" matches string "abc$de", match result: success; substring + matched: "$d"; position: starts at 3, ends at 5. + + + 1.3 Expression matches anyone of many characters + + Some expressions can match anyone of many characters. For example: "\d" can match any + number character. Each of these expressions can match only one character at one time, though + they can match any character of a certain group of characters. + + Expression Matches + + \d Any digit character, any one of 0~9 + \w Any alpha, numeric, underline, any one of A~Z,a~z,0~9,_ + \s Any one of space, tab, newline, return, or newpage character + + . '.' matches any character except the newline character(\n) + + - Example1: When pattern "\d\d" matches "abc123", match result: success; substring + matched: "12"; position: starts at 3, ends at 5. + + - Example2: When pattern "a.\d" matches "aaa100", match result: success; substring + matched: "aa1"; position: starts at 1, ends at 4. + + + 1.4 Custom expression matches anyone of many characters + + Expression uses square brackets [ ] to contain a series of characters, it can match anyone of + them. Uses [^ ] to contain a series of characters, it can match anyone character except + characters contained. + + Expression Matches + + [ab5@] Matches "a" or "b" or "5" or "@" + [^abc] Matches any character except "a","b","c" + + [f-k] Any character among "f"~"k" + [^A-F0-3] Any character except "A"~"F","0"~"3" + + - Example1: When pattern "[bcd][bcd]" matches "abc123" , match result: success; substring + matched: "bc"; position: starts at 1, ends at 3. + + - Example2: When pattern "[^abc]" matches "abc123", match result: success; substring + matched: "1"; position: starts at 3, ends at 4. + + + 1.5 Special expression to quantify matching + + All expressions introduced before can match character only one time. If a expression is + followed by a quantifier, it can matches more than one times. + + + For example: we can use the pattern "[bcd]{2}" instead of "[bcd][bcd]". + + Expression Function + Match exactly n times, example: "\w{2}" equals "\w\w"; "a{5}" + {n} + equals "aaaaa" + {m,n} At least m but no more than n times: "ba{1,3}" matches "ba","baa","baaa" + + {m,} Match at least n times: "\w\d{2,}" matches "a12","_456","M12344"... + ? Match 1 or 0 times, equivalent to {0,1}: "a[cd]?" matches "a","ac","ad". + + + Match 1 or more times, equivalent to {1,}: "a+b" matches "ab","aab","aaab"... + * Match 0 or more times, equivalent to {0,}: "\^*b" matches "b","^^^b"... + + + - Example1: When pattern "\d+\.?\d*" matches "It costs $12.5", match result: success; + substring matched:"12.5"; position: starts at 10, ends at 14. + + + - Example2: When pattern "go{2,8}gle" matches "Ads by goooooogle", match result: + success; substring matched: "goooooogle"; position: starts at 7, ends at 17. + + + + 1.6 Some special punctuations with abstract function + + Some punctuations in pattern have special function: + + Expression Function + ^ Match the beginning of the string + $ Match the end of the string + + \b Match a word boundary + + More examples to help you to understand. + + - Example1: When pattern "^aaa" matches "xxx aaa xxx", match result: failed. Because "^" + must match the beginning of the string. It could match successfully on condition that "aaa" is + at the beginning of the string, such as "aaa xxx xxx". + + - Example2: When pattern "aaa$" matches "xxx aaa xxx", match result: failed. Bacause "$" + must match the end of the string. It could match successfully on condition that "aaa" is at the + end of the string, such as "xxx xxx aaa". + + - Example3: When pattern ".\b." matches "@@@abc", match result: success; substring + matched: "@a"; position: starts at 2, ends at 4. + Further explanation: "\b" is similar to "^" and "$", matches no character itself, but it require a + '\w' character at its one side, another not '\w' character at the other side. + + + - Example4: When pattern "\bend\b" matches "weekend,endfor,end", match result: + success; substring matched: "end"; position: starts at 15, ends at 18. + + Some special punctuation can make effect on other sub-patterns: + + + Expression Function + | Alternation, matches either left side or right side + (1). Let sub-patterns in it to be a whole part when it is quantified. + ( ) + (2). Match result of sub-patterns in it can be retrieved individually + + - Example5: When pattern "Tom|Jack" matches string "I'm Tom, he is Jack", match result: + success; substring matched: "Tom"; position: starts at 4, ends at 7. When match next, match + result: success; substring matched: "Jack"; position: starts at 15, ends at 19. + + + - Example6: When pattern "(go\s*)+" matches "Let's go go go!", match result: success; + substring matched: "go go go"; position: starts at 6, ends at 14. + + - Example7: When pattern "?(\d+\.?\d*)" matches "$10.9,?20.5", match result: success; + substring matched: "?20.5"; position: starts at 6, ends at 10. Match result of sub-patterns + in "( )" is: "20.5". + + + 2. Regular expression advanced syntax + + 2.1 Reluctant or greedy quantifiers + + There are serval method to quantify subpattern, such as: "{m,n}", "{m,}", "?", "*", "+". By + default, a quantified subpattern is "greedy", that is, it will match as many times as possible + (given a particular starting location) while still allowing the rest of the pattern to match. For + example, to match "dxxxdxxxd": + + Expression Match result + + (d)(\w+) "\w+" matches all characters "xxxdxxxd" behind of "d" + "\w+" matches all characters "xxxdxxx" between the first "d" and the last + (d)(\w+)(d) "d". In order to let the whole pattern match success, "\w" has to give up the + last "d", although it can match the last "d" too. + + Thus it can be seen that: when "\w+" matches, it will match as many characters as possible. + In the second example, it does not match the last "d", but this is in order to let the whole + pattern match successfully. Pattern with "*" or "{m,n}" will also match as many times as + possible, pattern with "?" will match if possible. This type of matching is called "greedy + matching". ? + + + Reluctant Matching: + + To follow the quantifier with a "?", it can let the pattern to match the minimum number of + times possible. This type of matching is called reluctant matching. In order to let the whole + pattern match successfully, the reluctant pattern may match a few more times if it is required. + For example, to match "dxxxdxxxd": + + Expression Match result + + (d)(\w+?) "\w+?" match as few times as possible, so "\w+?" matches only one "x" + In order to let the whole pattern match successfully, "\w+?" has to match + (d)(\w+?)(d) + "xxx". So, match result is: "\w+?" matches "xxx" + + More examples: + + - Example1: When pattern "(.*)" matches "

aa

+

bb

", match result: success; substring matched: the whole + "

aa

bb

", "" in the pattern matches the last + "" in the string. + + - Example2: For comparison, when pattern "(.*?)" matches the string in + example1, it matches "

aa

". When match next, the next "

bb

+ " can be matched. + + + 2.2 Referring to matched substring \1, \2... + + During the process of matching, the match results of subpattern between parentheses "( )" + are recorded for later use. When retrieving match results, those match result of subpattern can + be retrieved individually, and this has been demonstrated many times in former examples. In + practice, parentheses "( )" must be used to get what we want indeed after match, such as + "(.*?)". + + + In fact, those match result of subpattern between parentheses can be used not only after + matching, but also during matching. The latter part of subpattern, can refer the match result of + former subpattern. Usage: "\" plus a number to refer to the corresponding substring. "\1" refers + to 1st pair of parentheses' match result, "\2" refers to 2nd pair of parentheses' match result. + + Examples: + + - Example1: When pattern "('|")(.*?)(\1)" matches " 'Hello', "World" ", match result: success; + substring matched: " 'Hello' "; when match next, substring matched: " "World" ". + + - Example2: When pattern "(\w)\1{4,}" matches "aa bbbb abcdefg ccccc 111121111 + 999999999", match result: success; substring matched: "ccccc"; when match next, substring + matched "999999999". This pattern require a character of "\w" to repeat at least 5 times. + Pay attention to comparison with "\w{5,}". + + - Example3: When pattern "<(\w+)\s*(\w+(=('|").*?\4)?\s*)*>.*?" matches "", match result: success. If both "" and "" are + not "td", the match will fail. + + + 2.2b DEELX Regular Expression Replace Syntax + + $1 ~ $999 - Stands for what a certain group captured. If the number is larger than the max group number, + DEELX will use less digitals, till the number is smaller than or equal to the max group number. + For example: + If the max group number is 20, "$999" means "$9" and common string "99", while "$15" means the 15th group. + If you need "$1" and common string "5", you can use $0015 , DEELX at most recognize 3 digitals as number. + + ${name} - Stands for what a named group captured. + $$ - Stands for a single dollars ($). + $& - Stands for what the overall expression captured. + $` - The substring before the beginning of what the overall expression captured in the original text. + $' - The substring behind the end of what the overall expression captured in the original text. + $+ - Stands for what a group captured, which group has the max group number among those groups + that have captured. For example: when "aaa(b+)|ccc(b+)" matches "aaabbb" , + $+ stands for $1, even though $2 has the max group number. + $_ - Stands for the whole original text. + + + 2.3 Lookahead assertion; Lookbehind assertion + In former chapters, I have introduced serval punctuations with special function: + "^","$","\b". They all do not match any characters, but they all require certain conditions on + their position. Now, this chapter will introduce more methods to add conditions on the gap + between characters. + + Lookahead assertion: "(?=xxxxx)", "(?!xxxxx)" + + Format: "(?=xxxxx)", the condition which it add on the gap is that: string on the right side of + the gap must be abe to match the subpattern "xxxxx" between the parentheses. It is just a + condition, not a match operation, so there is no match result. + + - Example1: When pattern "Windows (?=NT|XP)" matches "Windows 98, Windows NT, + Windows 2000", it can match only "Windows " of "Windows NT", the other "Windows " could + not be matched. + + - Example2: When pattern "(\w)((?=\1\1\1)(\1))+" matches "aaa ffffff 999999999", it can + match first 4 "f"s among the 6 "f"s, it can match first 7 "9"s among 9 "9"s. + + Format: "(?!xxxxx)", string on the right side of the gap must not be able to match the + subpattern "xxxxx". + + - Example3: When pattern "((?!\bstop\b).)+" matches "fdjka ljfdl stop fjdsla fdj", it will + match from the beginning of string to the position of "stop". If there is no "stop" in the string, + the pattern will match the whole string. + + + - Example4: When pattern "do(?!\w)" matches "done, do, dog", it can only match "do". + Here, "(?!\w)" has the same effect as "\b". + + Lookbehind assertion: "(?<=xxxxx)", "(? +#include +#include +#include +#include + +extern "C" { + typedef int (*POSIX_FUNC)(int); + int isblank(int c); +} + +// +// Data Reference +// +template class CBufferRefT +{ +public: + CBufferRefT(const ELT * pcsz, int length); + CBufferRefT(const ELT * pcsz); + +public: + int nCompare (const ELT * pcsz) const; + int nCompareNoCase(const ELT * pcsz) const; + int Compare (const ELT * pcsz) const; + int CompareNoCase(const ELT * pcsz) const; + int Compare (const CBufferRefT &) const; + int CompareNoCase(const CBufferRefT &) const; + + ELT At (int nIndex, ELT def = 0) const; + ELT operator [] (int nIndex) const; + + const ELT * GetBuffer() const; + int GetSize() const; + +public: + virtual ~CBufferRefT(); + +// Content +protected: + ELT * m_pBuffer; + int m_nSize; +}; + +// +// Implemenation +// +template CBufferRefT :: CBufferRefT(const ELT * pcsz, int length) +{ + m_pBuffer = (ELT *)pcsz; + m_nSize = length; +} + +template CBufferRefT :: CBufferRefT(const ELT * pcsz) +{ + m_pBuffer = (ELT *)pcsz; + m_nSize = 0; + + if(pcsz != 0) while(m_pBuffer[m_nSize] != 0) m_nSize ++; +} + +template int CBufferRefT :: nCompare(const ELT * pcsz) const +{ + for(int i=0; i int CBufferRefT :: nCompareNoCase(const ELT * pcsz) const +{ + for(int i=0; i inline int CBufferRefT :: Compare(const ELT * pcsz) const +{ + return nCompare(pcsz) ? 1 : (int)pcsz[m_nSize]; +} + +template inline int CBufferRefT :: CompareNoCase(const ELT * pcsz) const +{ + return nCompareNoCase(pcsz) ? 1 : (int)pcsz[m_nSize]; +} + +template inline int CBufferRefT :: Compare(const CBufferRefT & cref) const +{ + return m_nSize == cref.m_nSize ? nCompare(cref.GetBuffer()) : 1; +} + +template inline int CBufferRefT :: CompareNoCase(const CBufferRefT & cref) const +{ + return m_nSize == cref.m_nSize ? nCompareNoCase(cref.GetBuffer()) : 1; +} + +template inline ELT CBufferRefT :: At(int nIndex, ELT def) const +{ + return nIndex >= m_nSize ? def : m_pBuffer[nIndex]; +} + +template inline ELT CBufferRefT :: operator [] (int nIndex) const +{ + return nIndex >= m_nSize ? 0 : m_pBuffer[nIndex]; +} + +template const ELT * CBufferRefT :: GetBuffer() const +{ + static const ELT _def[] = {0}; return m_pBuffer ? m_pBuffer : _def; +} + +template inline int CBufferRefT :: GetSize() const +{ + return m_nSize; +} + +template CBufferRefT :: ~CBufferRefT() +{ +} + +// +// Data Buffer +// +template class CBufferT : public CBufferRefT +{ +public: + CBufferT(const ELT * pcsz, int length); + CBufferT(const ELT * pcsz); + CBufferT(); + +public: + ELT & operator [] (int nIndex); + const ELT & operator [] (int nIndex) const; + void Append(const ELT * pcsz, int length, int eol = 0); + void Append(ELT el, int eol = 0); + +public: + void Push(ELT el); + void Push(const CBufferRefT & buf); + int Pop (ELT & el); + int Pop (CBufferT & buf); + int Peek(ELT & el) const; + +public: + const ELT * GetBuffer() const; + ELT * GetBuffer(); + ELT * Detach(); + void Release(); + void Prepare(int index, int fill = 0); + void Restore(int size); + + ELT * PrepareInsert(int nPos, int nSize) + { + int nOldSize = CBufferRefT::m_nSize; + Restore(nPos > CBufferRefT::m_nSize ? nPos : CBufferRefT::m_nSize + nSize); + + if( nPos < nOldSize ) + { + ELT * from = CBufferRefT::m_pBuffer + nPos, * to = CBufferRefT::m_pBuffer + nPos + nSize; + memmove(to, from, sizeof(ELT) * (nOldSize - nPos)); + } + + return CBufferRefT::m_pBuffer + nPos; + } + + void Insert(int nIndex, const ELT & rT) + { + Insert(nIndex, &rT, 1); + } + + void Insert(int nIndex, const ELT * pT, int nSize) + { + memcpy(PrepareInsert(nIndex, nSize), pT, sizeof(ELT) * nSize); + } + + void Remove(int nIndex) + { + Remove(nIndex, 1); + } + + void Remove(int nIndex, int nSize) + { + if( nIndex < CBufferRefT :: m_nSize ) + { + if( nIndex + nSize >= CBufferRefT :: m_nSize ) + { + Restore(nIndex); + } + else + { + memmove(CBufferRefT :: m_pBuffer + nIndex, CBufferRefT :: m_pBuffer + nIndex + nSize, sizeof(ELT) * (CBufferRefT :: m_nSize - nIndex - nSize)); + Restore(CBufferRefT :: m_nSize - nSize); + } + } + } + + void SetMaxLength(int nSize) + { + if( nSize > m_nMaxLength ) + { + if( m_nMaxLength < 8 ) + m_nMaxLength = 8; + + if( nSize > m_nMaxLength ) + m_nMaxLength *= 2; + + if( nSize > m_nMaxLength ) + { + m_nMaxLength = nSize + 11; + m_nMaxLength -= m_nMaxLength & 0x07; + } + + CBufferRefT :: m_pBuffer = (ELT *) realloc(CBufferRefT :: m_pBuffer, sizeof(ELT) * m_nMaxLength); + } + } + +public: + virtual ~CBufferT(); + +// Content +protected: + int m_nMaxLength; +}; + +// +// Implemenation +// +template CBufferT :: CBufferT(const ELT * pcsz, int length) : CBufferRefT (0, length) +{ + m_nMaxLength = CBufferRefT :: m_nSize + 1; + + CBufferRefT :: m_pBuffer = (ELT *) malloc(sizeof(ELT) * m_nMaxLength); + memcpy(CBufferRefT::m_pBuffer, pcsz, sizeof(ELT) * CBufferRefT :: m_nSize); + CBufferRefT::m_pBuffer[CBufferRefT :: m_nSize] = 0; +} + +template CBufferT :: CBufferT(const ELT * pcsz) : CBufferRefT (pcsz) +{ + m_nMaxLength = CBufferRefT :: m_nSize + 1; + + CBufferRefT :: m_pBuffer = (ELT *) malloc(sizeof(ELT) * m_nMaxLength); + memcpy(CBufferRefT::m_pBuffer, pcsz, sizeof(ELT) * CBufferRefT :: m_nSize); + CBufferRefT::m_pBuffer[CBufferRefT :: m_nSize] = 0; +} + +template CBufferT :: CBufferT() : CBufferRefT (0, 0) +{ + m_nMaxLength = 0; + CBufferRefT::m_pBuffer = 0; +} + +template inline ELT & CBufferT :: operator [] (int nIndex) +{ + return CBufferRefT::m_pBuffer[nIndex]; +} + +template inline const ELT & CBufferT :: operator [] (int nIndex) const +{ + return CBufferRefT::m_pBuffer[nIndex]; +} + +template void CBufferT :: Append(const ELT * pcsz, int length, int eol) +{ + int nNewLength = m_nMaxLength; + + // Check length + if(nNewLength < 8) + nNewLength = 8; + + if(CBufferRefT :: m_nSize + length + eol > nNewLength) + nNewLength *= 2; + + if(CBufferRefT :: m_nSize + length + eol > nNewLength) + { + nNewLength = CBufferRefT :: m_nSize + length + eol + 11; + nNewLength -= nNewLength % 8; + } + + // Realloc + if(nNewLength > m_nMaxLength) + { + CBufferRefT :: m_pBuffer = (ELT *) realloc(CBufferRefT::m_pBuffer, sizeof(ELT) * nNewLength); + m_nMaxLength = nNewLength; + } + + // Append + memcpy(CBufferRefT::m_pBuffer + CBufferRefT :: m_nSize, pcsz, sizeof(ELT) * length); + CBufferRefT :: m_nSize += length; + + if(eol > 0) CBufferRefT::m_pBuffer[CBufferRefT :: m_nSize] = 0; +} + +template inline void CBufferT :: Append(ELT el, int eol) +{ + Append(&el, 1, eol); +} + +template void CBufferT :: Push(ELT el) +{ + // Realloc + if(CBufferRefT :: m_nSize >= m_nMaxLength) + { + int nNewLength = m_nMaxLength * 2; + if( nNewLength < 8 ) nNewLength = 8; + + CBufferRefT :: m_pBuffer = (ELT *) realloc(CBufferRefT::m_pBuffer, sizeof(ELT) * nNewLength); + m_nMaxLength = nNewLength; + } + + // Append + CBufferRefT::m_pBuffer[CBufferRefT :: m_nSize++] = el; +} + +template void CBufferT :: Push(const CBufferRefT & buf) +{ + for(int i=0; i inline int CBufferT :: Pop(ELT & el) +{ + if(CBufferRefT :: m_nSize > 0) + { + el = CBufferRefT::m_pBuffer[--CBufferRefT :: m_nSize]; + return 1; + } + else + { + return 0; + } +} + +template int CBufferT :: Pop (CBufferT & buf) +{ + int size, res = 1; + res = res && Pop(*(ELT*)&size); + buf.Restore(size); + + for(int i=size-1; i>=0; i--) + { + res = res && Pop(buf[i]); + } + + return res; +} + +template inline int CBufferT :: Peek(ELT & el) const +{ + if(CBufferRefT :: m_nSize > 0) + { + el = CBufferRefT::m_pBuffer[CBufferRefT :: m_nSize - 1]; + return 1; + } + else + { + return 0; + } +} + +template const ELT * CBufferT :: GetBuffer() const +{ + static const ELT _def[] = {0}; return CBufferRefT::m_pBuffer ? CBufferRefT::m_pBuffer : _def; +} + +template ELT * CBufferT :: GetBuffer() +{ + static const ELT _def[] = {0}; return CBufferRefT::m_pBuffer ? CBufferRefT::m_pBuffer : (ELT *)_def; +} + +template ELT * CBufferT :: Detach() +{ + ELT * pBuffer = CBufferRefT::m_pBuffer; + + CBufferRefT :: m_pBuffer = 0; + CBufferRefT :: m_nSize = m_nMaxLength = 0; + + return pBuffer; +} + +template void CBufferT :: Release() +{ + ELT * pBuffer = Detach(); + + if(pBuffer != 0) free(pBuffer); +} + +template void CBufferT :: Prepare(int index, int fill) +{ + int nNewSize = index + 1; + + // Realloc + if(nNewSize > m_nMaxLength) + { + int nNewLength = m_nMaxLength; + + if( nNewLength < 8 ) + nNewLength = 8; + + if( nNewSize > nNewLength ) + nNewLength *= 2; + + if( nNewSize > nNewLength ) + { + nNewLength = nNewSize + 11; + nNewLength -= nNewLength % 8; + } + + CBufferRefT :: m_pBuffer = (ELT *) realloc(CBufferRefT::m_pBuffer, sizeof(ELT) * nNewLength); + m_nMaxLength = nNewLength; + } + + // size + if( CBufferRefT :: m_nSize < nNewSize ) + { + memset(CBufferRefT::m_pBuffer + CBufferRefT :: m_nSize, fill, sizeof(ELT) * (nNewSize - CBufferRefT :: m_nSize)); + CBufferRefT :: m_nSize = nNewSize; + } +} + +template inline void CBufferT :: Restore(int size) +{ + SetMaxLength(size); + CBufferRefT :: m_nSize = size; +} + +template CBufferT :: ~CBufferT() +{ + if(CBufferRefT::m_pBuffer != 0) free(CBufferRefT::m_pBuffer); +} + +template class CSortedBufferT : public CBufferT +{ +public: + CSortedBufferT(int reverse = 0); + CSortedBufferT(int(*)(const void *, const void *)); + +public: + void Add(const T & rT); + void Add(const T * pT, int nSize); + int Remove(const T & rT); + void RemoveAll(); + + void SortFreeze() { m_bSortFreezed = 1; } + void SortUnFreeze(); + +public: + int Find(const T & rT, int(* compare)(const void *, const void *) = 0) { return FindAs(*(T*)&rT, compare); } + int FindAs(const T & rT, int(*)(const void *, const void *) = 0); + int GetSize() const { return CBufferRefT::m_nSize; } + T & operator [] (int nIndex) { return CBufferT :: operator [] (nIndex); } + +protected: + int (* m_fncompare)(const void *, const void *); + static int compareT(const void *, const void *); + static int compareReverseT(const void *, const void *); + + int m_bSortFreezed; +}; + +template CSortedBufferT :: CSortedBufferT(int reverse) +{ + m_fncompare = reverse ? compareReverseT : compareT; + m_bSortFreezed = 0; +} + +template CSortedBufferT :: CSortedBufferT(int (* compare)(const void *, const void *)) +{ + m_fncompare = compare; + m_bSortFreezed = 0; +} + +template void CSortedBufferT :: Add(const T & rT) +{ + if(m_bSortFreezed != 0) + { + Append(rT); + return; + } + + int a = 0, b = CBufferRefT::m_nSize - 1, c = CBufferRefT::m_nSize / 2; + + while(a <= b) + { + int r = m_fncompare(&rT, &CBufferRefT::m_pBuffer[c]); + + if ( r < 0 ) b = c - 1; + else if( r > 0 ) a = c + 1; + else break; + + c = (a + b + 1) / 2; + } + + Insert(c, rT); +} + +template void CSortedBufferT :: Add(const T * pT, int nSize) +{ + Append(pT, nSize); + + if(m_bSortFreezed == 0) + { + qsort(CBufferRefT::m_pBuffer, CBufferRefT::m_nSize, sizeof(T), m_fncompare); + } +} + +template int CSortedBufferT :: FindAs(const T & rT, int(* compare)(const void *, const void *)) +{ + const T * pT = (const T *)bsearch(&rT, CBufferRefT::m_pBuffer, CBufferRefT::m_nSize, sizeof(T), compare == 0 ? m_fncompare : compare); + + if( pT != NULL ) + return pT - CBufferRefT::m_pBuffer; + else + return -1; +} + +template int CSortedBufferT :: Remove(const T & rT) +{ + int pos = Find(rT); + if( pos >= 0 ) CBufferT :: Remove(pos); + return pos; +} + +template inline void CSortedBufferT :: RemoveAll() +{ + CBufferT::Restore(0); +} + +template void CSortedBufferT :: SortUnFreeze() +{ + if(m_bSortFreezed != 0) + { + m_bSortFreezed = 0; + qsort(CBufferRefT::m_pBuffer, CBufferRefT::m_nSize, sizeof(T), m_fncompare); + } +} + +template int CSortedBufferT :: compareT(const void * elem1, const void * elem2) +{ + if( *(const T *)elem1 == *(const T *)elem2 ) + return 0; + else if( *(const T *)elem1 < *(const T *)elem2 ) + return -1; + else + return 1; +} + +template int CSortedBufferT :: compareReverseT(const void * elem1, const void * elem2) +{ + if( *(const T *)elem1 == *(const T *)elem2 ) + return 0; + else if( *(const T *)elem1 > *(const T *)elem2 ) + return -1; + else + return 1; +} + +// +// Context +// +class CContext +{ +public: + CBufferT m_stack; + CBufferT m_capturestack, m_captureindex; + +public: + int m_nCurrentPos; + int m_nBeginPos; + int m_nLastBeginPos; + int m_nParenZindex; + int m_nCursiveLimit; + + void * m_pMatchString; + int m_pMatchStringLength; +}; + +class CContextShot +{ +public: + CContextShot(CContext * pContext) + { + m_nCurrentPos = pContext->m_nCurrentPos; + nsize = pContext->m_stack.GetSize(); + ncsize = pContext->m_capturestack.GetSize(); + } + + void Restore(CContext * pContext) + { + pContext->m_stack.Restore(nsize); + pContext->m_capturestack.Restore(ncsize); + pContext->m_nCurrentPos = m_nCurrentPos; + } + +public: + int m_nCurrentPos; + int nsize ; + int ncsize; +}; + +// +// Interface +// +class ElxInterface +{ +public: + virtual int Match (CContext * pContext) const = 0; + virtual int MatchNext(CContext * pContext) const = 0; + +public: + virtual ~ElxInterface() {}; +}; + +// +// Alternative +// +template class CAlternativeElxT : public ElxInterface +{ +public: + int Match (CContext * pContext) const; + int MatchNext(CContext * pContext) const; + +public: + CAlternativeElxT(); + +public: + CBufferT m_elxlist; +}; + +typedef CAlternativeElxT <0> CAlternativeElx; + +// +// Assert +// +template class CAssertElxT : public ElxInterface +{ +public: + int Match (CContext * pContext) const; + int MatchNext(CContext * pContext) const; + +public: + CAssertElxT(ElxInterface * pelx, int byes = 1); + +public: + ElxInterface * m_pelx; + int m_byes; +}; + +typedef CAssertElxT <0> CAssertElx; + +// +// Back reference elx +// +template class CBackrefElxT : public ElxInterface +{ +public: + int Match (CContext * pContext) const; + int MatchNext(CContext * pContext) const; + +public: + CBackrefElxT(int nnumber, int brightleft, int bignorecase); + +public: + int m_nnumber; + int m_brightleft; + int m_bignorecase; + + CBufferT m_szNamed; +}; + +// +// Implementation +// +template CBackrefElxT :: CBackrefElxT(int nnumber, int brightleft, int bignorecase) +{ + m_nnumber = nnumber; + m_brightleft = brightleft; + m_bignorecase = bignorecase; +} + +template int CBackrefElxT :: Match(CContext * pContext) const +{ + // check number, for named + if( m_nnumber < 0 || m_nnumber >= pContext->m_captureindex.GetSize() ) return 0; + + int index = pContext->m_captureindex[m_nnumber]; + if( index < 0 ) return 0; + + // check enclosed + int pos1 = pContext->m_capturestack[index + 1]; + int pos2 = pContext->m_capturestack[index + 2]; + + if( pos2 < 0 ) pos2 = pContext->m_nCurrentPos; + + // info + int lpos = pos1 < pos2 ? pos1 : pos2; + int rpos = pos1 < pos2 ? pos2 : pos1; + int slen = rpos - lpos; + + const CHART * pcsz = (const CHART *)pContext->m_pMatchString; + int npos = pContext->m_nCurrentPos; + int tlen = pContext->m_pMatchStringLength; + + // compare + int bsucc; + CBufferRefT refstr(pcsz + lpos, slen); + + if( m_brightleft ) + { + if(npos < slen) + return 0; + + if(m_bignorecase) + bsucc = ! refstr.nCompareNoCase(pcsz + (npos - slen)); + else + bsucc = ! refstr.nCompare (pcsz + (npos - slen)); + + if( bsucc ) + { + pContext->m_stack.Push(npos); + pContext->m_nCurrentPos -= slen; + } + } + else + { + if(npos + slen > tlen) + return 0; + + if(m_bignorecase) + bsucc = ! refstr.nCompareNoCase(pcsz + npos); + else + bsucc = ! refstr.nCompare (pcsz + npos); + + if( bsucc ) + { + pContext->m_stack.Push(npos); + pContext->m_nCurrentPos += slen; + } + } + + return bsucc; +} + +template int CBackrefElxT :: MatchNext(CContext * pContext) const +{ + int npos = 0; + + pContext->m_stack.Pop(npos); + pContext->m_nCurrentPos = npos; + + return 0; +} + +// RCHART +#ifndef RCHART + #define RCHART(ch) ((CHART)ch) +#endif + +// BOUNDARY_TYPE +enum BOUNDARY_TYPE +{ + BOUNDARY_FILE_BEGIN, // begin of whole text + BOUNDARY_FILE_END , // end of whole text + BOUNDARY_FILE_END_N, // end of whole text, or before newline at the end + BOUNDARY_LINE_BEGIN, // begin of line + BOUNDARY_LINE_END , // end of line + BOUNDARY_WORD_BEGIN, // begin of word + BOUNDARY_WORD_END , // end of word + BOUNDARY_WORD_EDGE +}; + +// +// Boundary Elx +// +template class CBoundaryElxT : public ElxInterface +{ +public: + int Match (CContext * pContext) const; + int MatchNext(CContext * pContext) const; + +public: + CBoundaryElxT(int ntype, int byes = 1); + +protected: + static int IsWordChar(CHART ch); + +public: + int m_ntype; + int m_byes; +}; + +// +// Implementation +// +template CBoundaryElxT :: CBoundaryElxT(int ntype, int byes) +{ + m_ntype = ntype; + m_byes = byes; +} + +template int CBoundaryElxT :: Match(CContext * pContext) const +{ + const CHART * pcsz = (const CHART *)pContext->m_pMatchString; + int npos = pContext->m_nCurrentPos; + int tlen = pContext->m_pMatchStringLength; + + CHART chL = npos > 0 ? pcsz[npos - 1] : 0; + CHART chR = npos < tlen ? pcsz[npos ] : 0; + + int bsucc = 0; + + switch(m_ntype) + { + case BOUNDARY_FILE_BEGIN: + bsucc = (npos <= 0); + break; + + case BOUNDARY_FILE_END: + bsucc = (npos >= tlen); + break; + + case BOUNDARY_FILE_END_N: + bsucc = (npos >= tlen) || (pcsz[tlen-1] == RCHART('\n') && (npos == tlen-1 || (pcsz[tlen-2] == RCHART('\r') && npos == tlen-2))); + break; + + case BOUNDARY_LINE_BEGIN: + bsucc = (npos <= 0 ) || (chL == RCHART('\n')) || ((chL == RCHART('\r')) && (chR != RCHART('\n'))); + break; + + case BOUNDARY_LINE_END: + bsucc = (npos >= tlen) || (chR == RCHART('\r')) || ((chR == RCHART('\n')) && (chL != RCHART('\r'))); + break; + + case BOUNDARY_WORD_BEGIN: + bsucc = ! IsWordChar(chL) && IsWordChar(chR); + break; + + case BOUNDARY_WORD_END: + bsucc = IsWordChar(chL) && ! IsWordChar(chR); + break; + + case BOUNDARY_WORD_EDGE: + bsucc = IsWordChar(chL) ? ! IsWordChar(chR) : IsWordChar(chR); + break; + } + + return m_byes ? bsucc : ! bsucc; +} + +template int CBoundaryElxT :: MatchNext(CContext *) const +{ + return 0; +} + +template inline int CBoundaryElxT :: IsWordChar(CHART ch) +{ + return (ch >= RCHART('A') && ch <= RCHART('Z')) || (ch >= RCHART('a') && ch <= RCHART('z')) || (ch >= RCHART('0') && ch <= RCHART('9')) || (ch == RCHART('_')); +} + +// +// Bracket +// +template class CBracketElxT : public ElxInterface +{ +public: + int Match (CContext * pContext) const; + int MatchNext(CContext * pContext) const; + +public: + CBracketElxT(int nnumber, int bright); + static int CheckCaptureIndex(int & index, CContext * pContext, int number); + +public: + int m_nnumber; + int m_bright; + + CBufferT m_szNamed; +}; + +template CBracketElxT :: CBracketElxT(int nnumber, int bright) +{ + m_nnumber = nnumber; + m_bright = bright; +} + +template inline int CBracketElxT :: CheckCaptureIndex(int & index, CContext * pContext, int number) +{ + if( index >= pContext->m_capturestack.GetSize() ) + index = pContext->m_capturestack.GetSize() - 4; + + while(index >= 0) + { + if(pContext->m_capturestack[index] == number) + { + return 1; + } + + index -= 4; + } + + + return 0; +} + +// +// capturestack[index+0] => Group number +// capturestack[index+1] => Capture start pos +// capturestack[index+2] => Capture end pos +// capturestack[index+3] => Capture enclose z-index, zindex<0 means inner group with same name +// +template int CBracketElxT :: Match(CContext * pContext) const +{ + // check, for named + if(m_nnumber < 0) return 0; + + if( ! m_bright ) + { + pContext->m_captureindex.Prepare(m_nnumber, -1); + int index = pContext->m_captureindex[m_nnumber]; + + // check + if(CheckCaptureIndex(index, pContext, m_nnumber) && pContext->m_capturestack[index+2] < 0) + { + pContext->m_capturestack[index+3] --; + return 1; + } + + // save + pContext->m_captureindex[m_nnumber] = pContext->m_capturestack.GetSize(); + + pContext->m_capturestack.Push(m_nnumber); + pContext->m_capturestack.Push(pContext->m_nCurrentPos); + pContext->m_capturestack.Push(-1); + pContext->m_capturestack.Push( 0); // z-index + } + else + { + // check + int index = pContext->m_captureindex[m_nnumber]; + + if(CheckCaptureIndex(index, pContext, m_nnumber)) + { + if(pContext->m_capturestack[index + 3] < 0) // check inner group with same name + { + pContext->m_capturestack[index + 3] ++; + return 1; + } + + // save + pContext->m_capturestack[index + 2] = pContext->m_nCurrentPos; + pContext->m_capturestack[index + 3] = pContext->m_nParenZindex ++; + } + } + + return 1; +} + +template int CBracketElxT :: MatchNext(CContext * pContext) const +{ + int index = pContext->m_captureindex[m_nnumber]; + if( ! CheckCaptureIndex(index, pContext, m_nnumber) ) + { + return 0; + } + + if( ! m_bright ) + { + if(pContext->m_capturestack[index + 3] < 0) + { + pContext->m_capturestack[index + 3] ++; + return 0; + } + + pContext->m_capturestack.Restore(pContext->m_capturestack.GetSize() - 4); + + // to find + CheckCaptureIndex(index, pContext, m_nnumber); + + // new index + pContext->m_captureindex[m_nnumber] = index; + } + else + { + if( pContext->m_capturestack[index + 2] >= 0 ) + { + pContext->m_capturestack[index + 2] = -1; + pContext->m_capturestack[index + 3] = 0; + } + else + { + pContext->m_capturestack[index + 3] --; + } + } + + return 0; +} + +// +// Deletage +// +template class CDelegateElxT : public ElxInterface +{ +public: + int Match (CContext * pContext) const; + int MatchNext(CContext * pContext) const; + +public: + CDelegateElxT(int ndata = 0); + +public: + ElxInterface * m_pelx; + int m_ndata; // +0 : recursive to + // -3 : named recursive + + CBufferT m_szNamed; +}; + +template CDelegateElxT :: CDelegateElxT(int ndata) +{ + m_pelx = 0; + m_ndata = ndata; +} + +template int CDelegateElxT :: Match(CContext * pContext) const +{ + if(m_pelx != 0) + { + if(pContext->m_nCursiveLimit > 0) + { + pContext->m_nCursiveLimit --; + int result = m_pelx->Match(pContext); + pContext->m_nCursiveLimit ++; + return result; + } + else + return 0; + } + else + return 1; +} + +template int CDelegateElxT :: MatchNext(CContext * pContext) const +{ + if(m_pelx != 0) + return m_pelx->MatchNext(pContext); + else + return 0; +} + +// +// Empty +// +template class CEmptyElxT : public ElxInterface +{ +public: + int Match (CContext * pContext) const; + int MatchNext(CContext * pContext) const; + +public: + CEmptyElxT(); +}; + +typedef CEmptyElxT <0> CEmptyElx; + +// +// Global +// +template class CGlobalElxT : public ElxInterface +{ +public: + int Match (CContext * pContext) const; + int MatchNext(CContext * pContext) const; + +public: + CGlobalElxT(); +}; + +typedef CGlobalElxT <0> CGlobalElx; + +// +// Repeat +// +template class CRepeatElxT : public ElxInterface +{ +public: + int Match (CContext * pContext) const; + int MatchNext(CContext * pContext) const; + +public: + CRepeatElxT(ElxInterface * pelx, int ntimes); + +protected: + int MatchFixed (CContext * pContext) const; + int MatchNextFixed(CContext * pContext) const; + int MatchForward (CContext * pContext) const + { + CContextShot shot(pContext); + + if( ! m_pelx->Match(pContext) ) + return 0; + + if(pContext->m_nCurrentPos != shot.m_nCurrentPos) + return 1; + + if( ! m_pelx->MatchNext(pContext) ) + return 0; + + if(pContext->m_nCurrentPos != shot.m_nCurrentPos) + return 1; + + shot.Restore(pContext); + return 0; + } + +public: + ElxInterface * m_pelx; + int m_nfixed; +}; + +typedef CRepeatElxT <0> CRepeatElx; + +// +// Greedy +// +template class CGreedyElxT : public CRepeatElxT +{ +public: + int Match (CContext * pContext) const; + int MatchNext(CContext * pContext) const; + +public: + CGreedyElxT(ElxInterface * pelx, int nmin = 0, int nmax = INT_MAX); + +protected: + int MatchVart (CContext * pContext) const; + int MatchNextVart(CContext * pContext) const; + +public: + int m_nvart; +}; + +typedef CGreedyElxT <0> CGreedyElx; + +// +// Independent +// +template class CIndependentElxT : public ElxInterface +{ +public: + int Match (CContext * pContext) const; + int MatchNext(CContext * pContext) const; + +public: + CIndependentElxT(ElxInterface * pelx); + +public: + ElxInterface * m_pelx; +}; + +typedef CIndependentElxT <0> CIndependentElx; + +// +// List +// +template class CListElxT : public ElxInterface +{ +public: + int Match (CContext * pContext) const; + int MatchNext(CContext * pContext) const; + +public: + CListElxT(int brightleft); + +public: + CBufferT m_elxlist; + int m_brightleft; +}; + +typedef CListElxT <0> CListElx; + +// +// Posix Elx +// +template class CPosixElxT : public ElxInterface +{ +public: + int Match (CContext * pContext) const; + int MatchNext(CContext * pContext) const; + +public: + CPosixElxT(const char * posix, int brightleft); + +public: + POSIX_FUNC m_posixfun; + int m_brightleft; + int m_byes; +}; + +// +// Implementation +// +template CPosixElxT :: CPosixElxT(const char * posix, int brightleft) +{ + m_brightleft = brightleft; + + if(posix[1] == '^') + { + m_byes = 0; + posix += 2; + } + else + { + m_byes = 1; + posix += 1; + } + + if (!strncmp(posix, "alnum:", 6)) m_posixfun = ::isalnum ; + else if(!strncmp(posix, "alpha:", 6)) m_posixfun = ::isalpha ; + else if(!strncmp(posix, "ascii:", 6)) m_posixfun = ::isascii ; + else if(!strncmp(posix, "cntrl:", 6)) m_posixfun = ::iscntrl ; + else if(!strncmp(posix, "digit:", 6)) m_posixfun = ::isdigit ; + else if(!strncmp(posix, "graph:", 6)) m_posixfun = ::isgraph ; + else if(!strncmp(posix, "lower:", 6)) m_posixfun = ::islower ; + else if(!strncmp(posix, "print:", 6)) m_posixfun = ::isprint ; + else if(!strncmp(posix, "punct:", 6)) m_posixfun = ::ispunct ; + else if(!strncmp(posix, "space:", 6)) m_posixfun = ::isspace ; + else if(!strncmp(posix, "upper:", 6)) m_posixfun = ::isupper ; + else if(!strncmp(posix, "xdigit:",7)) m_posixfun = ::isxdigit; + else if(!strncmp(posix, "blank:", 6)) m_posixfun = isblank ; + else m_posixfun = 0 ; +} + +inline int isblank(int c) +{ + return c == 0x20 || c == '\t'; +} + +template int CPosixElxT :: Match(CContext * pContext) const +{ + if(m_posixfun == 0) return 0; + + int tlen = pContext->m_pMatchStringLength; + int npos = pContext->m_nCurrentPos; + + // check + int at = m_brightleft ? npos - 1 : npos; + if( at < 0 || at >= tlen ) + return 0; + + CHART ch = ((const CHART *)pContext->m_pMatchString)[at]; + + int bsucc = (*m_posixfun)(ch); + + if( ! m_byes ) + bsucc = ! bsucc; + + if( bsucc ) + pContext->m_nCurrentPos += m_brightleft ? -1 : 1; + + return bsucc; +} + +template int CPosixElxT :: MatchNext(CContext * pContext) const +{ + pContext->m_nCurrentPos -= m_brightleft ? -1 : 1; + return 0; +} + +// +// Possessive +// +template class CPossessiveElxT : public CGreedyElxT +{ +public: + int Match (CContext * pContext) const; + int MatchNext(CContext * pContext) const; + +public: + CPossessiveElxT(ElxInterface * pelx, int nmin = 0, int nmax = INT_MAX); +}; + +typedef CPossessiveElxT <0> CPossessiveElx; + +// +// Range Elx +// +template class CRangeElxT : public ElxInterface +{ +public: + int Match (CContext * pContext) const; + int MatchNext(CContext * pContext) const; + +public: + CRangeElxT(int brightleft, int byes); + +public: + int IsContainChar(CHART ch) const; + +public: + CBufferT m_ranges; + CBufferT m_chars; + CBufferT m_embeds; + +public: + int m_brightleft; + int m_byes; +}; + +// +// Implementation +// +template CRangeElxT :: CRangeElxT(int brightleft, int byes) +{ + m_brightleft = brightleft; + m_byes = byes; +} + +template int CRangeElxT :: Match(CContext * pContext) const +{ + int tlen = pContext->m_pMatchStringLength; + int npos = pContext->m_nCurrentPos; + + // check + int at = m_brightleft ? npos - 1 : npos; + if( at < 0 || at >= tlen ) + return 0; + + CHART ch = ((const CHART *)pContext->m_pMatchString)[at]; + int bsucc = 0, i; + + // compare + for(i=0; !bsucc && iMatch(pContext)) + { + pContext->m_nCurrentPos = npos; + bsucc = 1; + } + } + + if( ! m_byes ) + bsucc = ! bsucc; + + if( bsucc ) + pContext->m_nCurrentPos += m_brightleft ? -1 : 1; + + return bsucc; +} + +template int CRangeElxT :: IsContainChar(CHART ch) const +{ + int bsucc = 0, i; + + // compare + for(i=0; !bsucc && i int CRangeElxT :: MatchNext(CContext * pContext) const +{ + pContext->m_nCurrentPos -= m_brightleft ? -1 : 1; + return 0; +} + +// +// Reluctant +// +template class CReluctantElxT : public CRepeatElxT +{ +public: + int Match (CContext * pContext) const; + int MatchNext(CContext * pContext) const; + +public: + CReluctantElxT(ElxInterface * pelx, int nmin = 0, int nmax = INT_MAX); + +protected: + int MatchVart (CContext * pContext) const; + int MatchNextVart(CContext * pContext) const; + +public: + int m_nvart; +}; + +typedef CReluctantElxT <0> CReluctantElx; + +// +// String Elx +// +template class CStringElxT : public ElxInterface +{ +public: + int Match (CContext * pContext) const; + int MatchNext(CContext * pContext) const; + +public: + CStringElxT(const CHART * fixed, int nlength, int brightleft, int bignorecase); + +public: + CBufferT m_szPattern; + int m_brightleft; + int m_bignorecase; +}; + +// +// Implementation +// +template CStringElxT :: CStringElxT(const CHART * fixed, int nlength, int brightleft, int bignorecase) : m_szPattern(fixed, nlength) +{ + m_brightleft = brightleft; + m_bignorecase = bignorecase; +} + +template int CStringElxT :: Match(CContext * pContext) const +{ + const CHART * pcsz = (const CHART *)pContext->m_pMatchString; + int npos = pContext->m_nCurrentPos; + int tlen = pContext->m_pMatchStringLength; + int slen = m_szPattern.GetSize(); + + int bsucc; + + if(m_brightleft) + { + if(npos < slen) + return 0; + + if(m_bignorecase) + bsucc = ! m_szPattern.nCompareNoCase(pcsz + (npos - slen)); + else + bsucc = ! m_szPattern.nCompare (pcsz + (npos - slen)); + + if( bsucc ) + pContext->m_nCurrentPos -= slen; + } + else + { + if(npos + slen > tlen) + return 0; + + if(m_bignorecase) + bsucc = ! m_szPattern.nCompareNoCase(pcsz + npos); + else + bsucc = ! m_szPattern.nCompare (pcsz + npos); + + if( bsucc ) + pContext->m_nCurrentPos += slen; + } + + return bsucc; +} + +template int CStringElxT :: MatchNext(CContext * pContext) const +{ + int slen = m_szPattern.GetSize(); + + if(m_brightleft) + pContext->m_nCurrentPos += slen; + else + pContext->m_nCurrentPos -= slen; + + return 0; +} + +// +// CConditionElx +// +template class CConditionElxT : public ElxInterface +{ +public: + int Match (CContext * pContext) const; + int MatchNext(CContext * pContext) const; + +public: + CConditionElxT(); + +public: + // backref condition + int m_nnumber; + CBufferT m_szNamed; + + // elx condition + ElxInterface * m_pelxask; + + // selection + ElxInterface * m_pelxyes, * m_pelxno; +}; + +template CConditionElxT :: CConditionElxT() +{ + m_nnumber = -1; +} + +template int CConditionElxT :: Match(CContext * pContext) const +{ + // status + int nbegin = pContext->m_nCurrentPos; + int nsize = pContext->m_stack.GetSize(); + int ncsize = pContext->m_capturestack.GetSize(); + + // condition result + int condition_yes = 0; + + // backref type + if( m_nnumber >= 0 ) + { + do + { + if(m_nnumber >= pContext->m_captureindex.GetSize()) break; + + int index = pContext->m_captureindex[m_nnumber]; + if( index < 0) break; + + // else valid + condition_yes = 1; + } + while(0); + } + else + { + if( m_pelxask == 0 ) + condition_yes = 1; + else + condition_yes = m_pelxask->Match(pContext); + + pContext->m_stack.Restore(nsize); + pContext->m_nCurrentPos = nbegin; + } + + // elx result + int bsucc; + if( condition_yes ) + bsucc = m_pelxyes == 0 ? 1 : m_pelxyes->Match(pContext); + else + bsucc = m_pelxno == 0 ? 1 : m_pelxno ->Match(pContext); + + if( bsucc ) + { + pContext->m_stack.Push(ncsize); + pContext->m_stack.Push(condition_yes); + } + else + { + pContext->m_capturestack.Restore(ncsize); + } + + return bsucc; +} + +template int CConditionElxT :: MatchNext(CContext * pContext) const +{ + // pop + int ncsize, condition_yes; + + pContext->m_stack.Pop(condition_yes); + pContext->m_stack.Pop(ncsize); + + // elx result + int bsucc; + if( condition_yes ) + bsucc = m_pelxyes == 0 ? 0 : m_pelxyes->MatchNext(pContext); + else + bsucc = m_pelxno == 0 ? 0 : m_pelxno ->MatchNext(pContext); + + if( bsucc ) + { + pContext->m_stack.Push(ncsize); + pContext->m_stack.Push(condition_yes); + } + else + { + pContext->m_capturestack.Restore(ncsize); + } + + return bsucc; +} + +// +// MatchResult +// +template class MatchResultT +{ +public: + int IsMatched() const; + +public: + int GetStart() const; + int GetEnd () const; + +public: + int MaxGroupNumber() const; + int GetGroupStart(int nGroupNumber) const; + int GetGroupEnd (int nGroupNumber) const; + +public: + MatchResultT(const MatchResultT & from) { *this = from; } + MatchResultT(CContext * pContext = 0, int nMaxNumber = -1); + MatchResultT & operator = (const MatchResultT &); + inline operator int() const { return IsMatched(); } + +public: + CBufferT m_result; +}; + +typedef MatchResultT <0> MatchResult; + +// Stocked Elx IDs +enum STOCKELX_ID_DEFINES +{ + STOCKELX_EMPTY = 0, + + /////////////////////// + + STOCKELX_DOT_ALL, + STOCKELX_DOT_NOT_ALL, + + STOCKELX_WORD, + STOCKELX_WORD_NOT, + + STOCKELX_SPACE, + STOCKELX_SPACE_NOT, + + STOCKELX_DIGITAL, + STOCKELX_DIGITAL_NOT, + + ////////////////////// + + STOCKELX_DOT_ALL_RIGHTLEFT, + STOCKELX_DOT_NOT_ALL_RIGHTLEFT, + + STOCKELX_WORD_RIGHTLEFT, + STOCKELX_WORD_RIGHTLEFT_NOT, + + STOCKELX_SPACE_RIGHTLEFT, + STOCKELX_SPACE_RIGHTLEFT_NOT, + + STOCKELX_DIGITAL_RIGHTLEFT, + STOCKELX_DIGITAL_RIGHTLEFT_NOT, + + ///////////////////// + + STOCKELX_COUNT +}; + +// REGEX_FLAGS +#ifndef _REGEX_FLAGS_DEFINED + enum REGEX_FLAGS + { + NO_FLAG = 0, + SINGLELINE = 0x01, + MULTILINE = 0x02, + GLOBAL = 0x04, + IGNORECASE = 0x08, + RIGHTTOLEFT = 0x10, + EXTENDED = 0x20 + }; + #define _REGEX_FLAGS_DEFINED +#endif + +// +// Builder T +// +template class CBuilderT +{ +public: + typedef CDelegateElxT CDelegateElx; + typedef CBracketElxT CBracketElx; + typedef CBackrefElxT CBackrefElx; + typedef CConditionElxT CConditionElx; + +// Methods +public: + ElxInterface * Build(const CBufferRefT & pattern, int flags); + int GetNamedNumber(const CBufferRefT & named) const; + void Clear(); + +public: + CBuilderT(); + ~CBuilderT(); + +// Public Attributes +public: + ElxInterface * m_pTopElx; + int m_nFlags; + int m_nMaxNumber; + int m_nNextNamed; + int m_nGroupCount; + + CBufferT m_objlist; + CBufferT m_grouplist; + CBufferT m_recursivelist; + CBufferT m_namedlist; + CBufferT m_namedbackreflist; + CBufferT m_namedconditionlist; + +// CHART_INFO +protected: + struct CHART_INFO + { + public: + CHART ch; + int type; + int pos; + int len; + + public: + CHART_INFO(CHART c, int t, int p = 0, int l = 0) { ch = c; type = t; pos = p; len = l; } + inline int operator == (const CHART_INFO & ci) { return ch == ci.ch && type == ci.type; } + inline int operator != (const CHART_INFO & ci) { return ! operator == (ci); } + }; + +protected: + static unsigned int Hex2Int(const CHART * pcsz, int length, int & used); + static int ReadDec(char * & str, unsigned int & dec); + void MoveNext(); + int GetNext2(); + + ElxInterface * BuildAlternative(int vaflags); + ElxInterface * BuildList (int & flags); + ElxInterface * BuildRepeat (int & flags); + ElxInterface * BuildSimple (int & flags); + ElxInterface * BuildCharset (int & flags); + ElxInterface * BuildRecursive (int & flags); + ElxInterface * BuildBoundary (int & flags); + ElxInterface * BuildBackref (int & flags); + + ElxInterface * GetStockElx (int nStockId); + ElxInterface * Keep(ElxInterface * pElx); + +// Private Attributes +protected: + CBufferRefT m_pattern; + CHART_INFO prev, curr, next, nex2; + int m_nNextPos; + int m_nCharsetDepth; + int m_bQuoted; + POSIX_FUNC m_quote_fun; + + // Backup current pos + struct Snapshot + { + CHART_INFO prev, curr, next, nex2; + int m_nNextPos; + int m_nCharsetDepth; + int m_bQuoted; + POSIX_FUNC m_quote_fun; + Snapshot():prev(0,0),curr(0,0),next(0,0),nex2(0,0) {} + }; + void Backup (Snapshot * pdata) { memcpy(pdata, &prev, sizeof(Snapshot)); } + void Restore(Snapshot * pdata) { memcpy(&prev, pdata, sizeof(Snapshot)); } + + ElxInterface * m_pStockElxs[STOCKELX_COUNT]; +}; + +// +// Implementation +// +template CBuilderT :: CBuilderT() : m_pattern(0, 0), prev(0, 0), curr(0, 0), next(0, 0), nex2(0, 0) +{ + Clear(); +} + +template CBuilderT :: ~CBuilderT() +{ + Clear(); +} + +template int CBuilderT :: GetNamedNumber(const CBufferRefT & named) const +{ + for(int i=0; im_elxlist[0])->m_szNamed.CompareNoCase(named) ) + return ((CBracketElx *)m_namedlist[i]->m_elxlist[0])->m_nnumber; + } + + return -3; +} + +template ElxInterface * CBuilderT :: Build(const CBufferRefT & pattern, int flags) +{ + // init + m_pattern = pattern; + m_nNextPos = 0; + m_nCharsetDepth = 0; + m_nMaxNumber = 0; + m_nNextNamed = 0; + m_nFlags = flags; + m_bQuoted = 0; + m_quote_fun = 0; + + m_grouplist .Restore(0); + m_recursivelist .Restore(0); + m_namedlist .Restore(0); + m_namedbackreflist .Restore(0); + m_namedconditionlist.Restore(0); + + int i; + for(i=0; i<3; i++) MoveNext(); + + // build + m_pTopElx = BuildAlternative(flags); + + // group 0 + m_grouplist.Prepare(0); + m_grouplist[0] = m_pTopElx; + + // append named to unnamed + m_nGroupCount = m_grouplist.GetSize(); + + m_grouplist.Prepare(m_nMaxNumber + m_namedlist.GetSize()); + + for(i=0; im_elxlist[0]; + CBracketElx * pright = (CBracketElx *)m_namedlist[i]->m_elxlist[2]; + + // append + m_grouplist[m_nGroupCount ++] = m_namedlist[i]; + + if( pleft->m_nnumber > 0 ) + continue; + + // same name + int find_same_name = GetNamedNumber(pleft->m_szNamed); + if( find_same_name >= 0 ) + { + pleft ->m_nnumber = find_same_name; + pright->m_nnumber = find_same_name; + } + else + { + m_nMaxNumber ++; + + pleft ->m_nnumber = m_nMaxNumber; + pright->m_nnumber = m_nMaxNumber; + } + } + + for(i=1; im_elxlist[0]; + + if( pleft->m_nnumber > m_nMaxNumber ) + m_nMaxNumber = pleft->m_nnumber; + } + + // connect recursive + for(i=0; im_ndata == -3 ) + m_recursivelist[i]->m_ndata = GetNamedNumber(m_recursivelist[i]->m_szNamed); + + if( m_recursivelist[i]->m_ndata >= 0 && m_recursivelist[i]->m_ndata <= m_nMaxNumber ) + { + if( m_recursivelist[i]->m_ndata == 0 ) + m_recursivelist[i]->m_pelx = m_pTopElx; + else for(int j=1; jm_ndata == ((CBracketElx *)((CListElx*)m_grouplist[j])->m_elxlist[0])->m_nnumber) + { + m_recursivelist[i]->m_pelx = m_grouplist[j]; + break; + } + } + } + } + + // named backref + for(i=0; im_nnumber = GetNamedNumber(m_namedbackreflist[i]->m_szNamed); + } + + // named condition + for(i=0; im_szNamed); + if( nn >= 0 ) + { + m_namedconditionlist[i]->m_nnumber = nn; + m_namedconditionlist[i]->m_pelxask = 0; + } + } + + return m_pTopElx; +} + +template void CBuilderT :: Clear() +{ + for(int i=0; i unsigned int CBuilderT :: Hex2Int(const CHART * pcsz, int length, int & used) +{ + unsigned int result = 0; + int & i = used; + + for(i=0; i= RCHART('0') && pcsz[i] <= RCHART('9')) + result = (result << 4) + (pcsz[i] - RCHART('0')); + else if(pcsz[i] >= RCHART('A') && pcsz[i] <= RCHART('F')) + result = (result << 4) + (0x0A + (pcsz[i] - RCHART('A'))); + else if(pcsz[i] >= RCHART('a') && pcsz[i] <= RCHART('f')) + result = (result << 4) + (0x0A + (pcsz[i] - RCHART('a'))); + else + break; + } + + return result; +} + +template inline ElxInterface * CBuilderT :: Keep(ElxInterface * pelx) +{ + m_objlist.Push(pelx); + return pelx; +} + +template void CBuilderT :: MoveNext() +{ + // forwards + prev = curr; + curr = next; + next = nex2; + + // get nex2 + while( ! GetNext2() ) {}; +} + +template int CBuilderT :: GetNext2() +{ + // check length + if(m_nNextPos >= m_pattern.GetSize()) + { + nex2 = CHART_INFO(0, 1, m_nNextPos, 0); + return 1; + } + + int delta = 1; + CHART ch = m_pattern[m_nNextPos]; + + // if quoted + if(m_bQuoted) + { + if(ch == RCHART('\\')) + { + if(m_pattern[m_nNextPos + 1] == RCHART('E')) + { + m_quote_fun = 0; + m_bQuoted = 0; + m_nNextPos += 2; + return 0; + } + } + + if(m_quote_fun != 0) + nex2 = CHART_INFO((CHART)(*m_quote_fun)((int)ch), 0, m_nNextPos, delta); + else + nex2 = CHART_INFO(ch, 0, m_nNextPos, delta); + + m_nNextPos += delta; + + return 1; + } + + // common + switch(ch) + { + case RCHART('\\'): + { + CHART ch1 = m_pattern[m_nNextPos+1]; + + // backref + if(ch1 >= RCHART('0') && ch1 <= RCHART('9')) + { + nex2 = CHART_INFO(ch, 1, m_nNextPos, delta); + break; + } + + // escape + delta = 2; + + switch(ch1) + { + case RCHART('A'): + case RCHART('Z'): + case RCHART('z'): + case RCHART('w'): + case RCHART('W'): + case RCHART('s'): + case RCHART('S'): + case RCHART('B'): + case RCHART('d'): + case RCHART('D'): + case RCHART('k'): + case RCHART('g'): + nex2 = CHART_INFO(ch1, 1, m_nNextPos, delta); + break; + + case RCHART('b'): + if(m_nCharsetDepth > 0) + nex2 = CHART_INFO('\b', 0, m_nNextPos, delta); + else + nex2 = CHART_INFO(ch1, 1, m_nNextPos, delta); + break; + + /* + case RCHART('<'): + case RCHART('>'): + if(m_nCharsetDepth > 0) + nex2 = CHART_INFO(ch1, 0, m_nNextPos, delta); + else + nex2 = CHART_INFO(ch1, 1, m_nNextPos, delta); + break; + */ + + case RCHART('x'): + if(m_pattern[m_nNextPos+2] != '{') + { + int red = 0; + unsigned int ch2 = Hex2Int(m_pattern.GetBuffer() + m_nNextPos + 2, 2, red); + + delta += red; + + if(red > 0) + nex2 = CHART_INFO(RCHART(ch2), 0, m_nNextPos, delta); + else + nex2 = CHART_INFO(ch1, 0, m_nNextPos, delta); + + break; + } + + case RCHART('u'): + if(m_pattern[m_nNextPos+2] != '{') + { + int red = 0; + unsigned int ch2 = Hex2Int(m_pattern.GetBuffer() + m_nNextPos + 2, 4, red); + + delta += red; + + if(red > 0) + nex2 = CHART_INFO(RCHART(ch2), 0, m_nNextPos, delta); + else + nex2 = CHART_INFO(ch1, 0, m_nNextPos, delta); + } + else + { + int red = 0; + unsigned int ch2 = Hex2Int(m_pattern.GetBuffer() + m_nNextPos + 3, sizeof(int) * 2, red); + + delta += red; + + while(m_nNextPos + delta < m_pattern.GetSize() && m_pattern.At(m_nNextPos + delta) != RCHART('}')) + delta ++; + + delta ++; // skip '}' + + nex2 = CHART_INFO(RCHART(ch2), 0, m_nNextPos, delta); + } + break; + + case RCHART('a'): nex2 = CHART_INFO(RCHART('\a'), 0, m_nNextPos, delta); break; + case RCHART('f'): nex2 = CHART_INFO(RCHART('\f'), 0, m_nNextPos, delta); break; + case RCHART('n'): nex2 = CHART_INFO(RCHART('\n'), 0, m_nNextPos, delta); break; + case RCHART('r'): nex2 = CHART_INFO(RCHART('\r'), 0, m_nNextPos, delta); break; + case RCHART('t'): nex2 = CHART_INFO(RCHART('\t'), 0, m_nNextPos, delta); break; + case RCHART('v'): nex2 = CHART_INFO(RCHART('\v'), 0, m_nNextPos, delta); break; + case RCHART('e'): nex2 = CHART_INFO(RCHART( 27 ), 0, m_nNextPos, delta); break; + + case RCHART('G'): // skip '\G' + if(m_nCharsetDepth > 0) + { + m_nNextPos += 2; + return 0; + } + else + { + nex2 = CHART_INFO(ch1, 1, m_nNextPos, delta); + break; + } + + case RCHART('L'): + if( ! m_quote_fun ) m_quote_fun = ::tolower; + + case RCHART('U'): + if( ! m_quote_fun ) m_quote_fun = ::toupper; + + case RCHART('Q'): + { + m_bQuoted = 1; + m_nNextPos += 2; + return 0; + } + + case RCHART('E'): + { + m_quote_fun = 0; + m_bQuoted = 0; + m_nNextPos += 2; + return 0; + } + + case 0: + if(m_nNextPos+1 >= m_pattern.GetSize()) + { + delta = 1; + nex2 = CHART_INFO(ch , 0, m_nNextPos, delta); + } + else + nex2 = CHART_INFO(ch1, 0, m_nNextPos, delta); // common '\0' char + break; + + default: + nex2 = CHART_INFO(ch1, 0, m_nNextPos, delta); + break; + } + } + break; + + case RCHART('*'): + case RCHART('+'): + case RCHART('?'): + case RCHART('.'): + case RCHART('{'): + case RCHART('}'): + case RCHART(')'): + case RCHART('|'): + case RCHART('$'): + if(m_nCharsetDepth > 0) + nex2 = CHART_INFO(ch, 0, m_nNextPos, delta); + else + nex2 = CHART_INFO(ch, 1, m_nNextPos, delta); + break; + + case RCHART('-'): + if(m_nCharsetDepth > 0) + nex2 = CHART_INFO(ch, 1, m_nNextPos, delta); + else + nex2 = CHART_INFO(ch, 0, m_nNextPos, delta); + break; + + case RCHART('('): + { + CHART ch1 = m_pattern[m_nNextPos+1]; + CHART ch2 = m_pattern[m_nNextPos+2]; + + // skip remark + if(ch1 == RCHART('?') && ch2 == RCHART('#')) + { + m_nNextPos += 2; + while(m_nNextPos < m_pattern.GetSize()) + { + if(m_pattern[m_nNextPos] == RCHART(')')) + break; + + m_nNextPos ++; + } + + if(m_pattern[m_nNextPos] == RCHART(')')) + { + m_nNextPos ++; + + // get next nex2 + return 0; + } + } + else + { + if(m_nCharsetDepth > 0) + nex2 = CHART_INFO(ch, 0, m_nNextPos, delta); + else + nex2 = CHART_INFO(ch, 1, m_nNextPos, delta); + } + } + break; + + case RCHART('#'): + if(m_nFlags & EXTENDED) + { + // skip remark + m_nNextPos ++; + + while(m_nNextPos < m_pattern.GetSize()) + { + if(m_pattern[m_nNextPos] == RCHART('\n') || m_pattern[m_nNextPos] == RCHART('\r')) + break; + + m_nNextPos ++; + } + + // get next nex2 + return 0; + } + else + { + nex2 = CHART_INFO(ch, 0, m_nNextPos, delta); + } + break; + + case RCHART(' '): + case RCHART('\f'): + case RCHART('\n'): + case RCHART('\r'): + case RCHART('\t'): + case RCHART('\v'): + if(m_nFlags & EXTENDED) + { + m_nNextPos ++; + + // get next nex2 + return 0; + } + else + { + nex2 = CHART_INFO(ch, 0, m_nNextPos, delta); + } + break; + + case RCHART('['): + if( m_nCharsetDepth == 0 || m_pattern.At(m_nNextPos + 1, 0) == RCHART(':') ) + { + m_nCharsetDepth ++; + nex2 = CHART_INFO(ch, 1, m_nNextPos, delta); + } + else + { + nex2 = CHART_INFO(ch, 0, m_nNextPos, delta); + } + break; + + case RCHART(']'): + if(m_nCharsetDepth > 0) + { + m_nCharsetDepth --; + nex2 = CHART_INFO(ch, 1, m_nNextPos, delta); + } + else + { + nex2 = CHART_INFO(ch, 0, m_nNextPos, delta); + } + break; + + case RCHART(':'): + if(next == CHART_INFO(RCHART('['), 1)) + nex2 = CHART_INFO(ch, 1, m_nNextPos, delta); + else + nex2 = CHART_INFO(ch, 0, m_nNextPos, delta); + break; + + case RCHART('^'): + if(m_nCharsetDepth == 0 || next == CHART_INFO(RCHART('['), 1) || (curr == CHART_INFO(RCHART('['), 1) && next == CHART_INFO(RCHART(':'), 1))) + nex2 = CHART_INFO(ch, 1, m_nNextPos, delta); + else + nex2 = CHART_INFO(ch, 0, m_nNextPos, delta); + break; + + case 0: + if(m_nNextPos >= m_pattern.GetSize()) + nex2 = CHART_INFO(ch, 1, m_nNextPos, delta); // end of string + else + nex2 = CHART_INFO(ch, 0, m_nNextPos, delta); // common '\0' char + break; + + default: + nex2 = CHART_INFO(ch, 0, m_nNextPos, delta); + break; + } + + m_nNextPos += delta; + + return 1; +} + +template ElxInterface * CBuilderT :: GetStockElx(int nStockId) +{ + ElxInterface ** pStockElxs = m_pStockElxs; + + // check + if(nStockId < 0 || nStockId >= STOCKELX_COUNT) + return GetStockElx(0); + + // create if no + if(pStockElxs[nStockId] == 0) + { + switch(nStockId) + { + case STOCKELX_EMPTY: + pStockElxs[nStockId] = Keep(new CEmptyElx()); + break; + + case STOCKELX_WORD: + { + CRangeElxT * pRange = (CRangeElxT *)Keep(new CRangeElxT (0, 1)); + + pRange->m_ranges.Push(RCHART('A')); pRange->m_ranges.Push(RCHART('Z')); + pRange->m_ranges.Push(RCHART('a')); pRange->m_ranges.Push(RCHART('z')); + pRange->m_ranges.Push(RCHART('0')); pRange->m_ranges.Push(RCHART('9')); + pRange->m_chars .Push(RCHART('_')); + + pStockElxs[nStockId] = pRange; + } + break; + + case STOCKELX_WORD_NOT: + { + CRangeElxT * pRange = (CRangeElxT *)Keep(new CRangeElxT (0, 0)); + + pRange->m_ranges.Push(RCHART('A')); pRange->m_ranges.Push(RCHART('Z')); + pRange->m_ranges.Push(RCHART('a')); pRange->m_ranges.Push(RCHART('z')); + pRange->m_ranges.Push(RCHART('0')); pRange->m_ranges.Push(RCHART('9')); + pRange->m_chars .Push(RCHART('_')); + + pStockElxs[nStockId] = pRange; + } + break; + + case STOCKELX_DOT_ALL: + pStockElxs[nStockId] = Keep(new CRangeElxT (0, 0)); + break; + + case STOCKELX_DOT_NOT_ALL: + { + CRangeElxT * pRange = (CRangeElxT *)Keep(new CRangeElxT (0, 0)); + + pRange->m_chars .Push(RCHART('\n')); + + pStockElxs[nStockId] = pRange; + } + break; + + case STOCKELX_SPACE: + { + CRangeElxT * pRange = (CRangeElxT *)Keep(new CRangeElxT (0, 1)); + + pRange->m_chars .Push(RCHART(' ')); + pRange->m_chars .Push(RCHART('\t')); + pRange->m_chars .Push(RCHART('\r')); + pRange->m_chars .Push(RCHART('\n')); + + pStockElxs[nStockId] = pRange; + } + break; + + case STOCKELX_SPACE_NOT: + { + CRangeElxT * pRange = (CRangeElxT *)Keep(new CRangeElxT (0, 0)); + + pRange->m_chars .Push(RCHART(' ')); + pRange->m_chars .Push(RCHART('\t')); + pRange->m_chars .Push(RCHART('\r')); + pRange->m_chars .Push(RCHART('\n')); + + pStockElxs[nStockId] = pRange; + } + break; + + case STOCKELX_DIGITAL: + { + CRangeElxT * pRange = (CRangeElxT *)Keep(new CRangeElxT (0, 1)); + + pRange->m_ranges.Push(RCHART('0')); pRange->m_ranges.Push(RCHART('9')); + + pStockElxs[nStockId] = pRange; + } + break; + + case STOCKELX_DIGITAL_NOT: + { + CRangeElxT * pRange = (CRangeElxT *)Keep(new CRangeElxT (0, 0)); + + pRange->m_ranges.Push(RCHART('0')); pRange->m_ranges.Push(RCHART('9')); + + pStockElxs[nStockId] = pRange; + } + break; + + case STOCKELX_WORD_RIGHTLEFT: + { + CRangeElxT * pRange = (CRangeElxT *)Keep(new CRangeElxT (1, 1)); + + pRange->m_ranges.Push(RCHART('A')); pRange->m_ranges.Push(RCHART('Z')); + pRange->m_ranges.Push(RCHART('a')); pRange->m_ranges.Push(RCHART('z')); + pRange->m_ranges.Push(RCHART('0')); pRange->m_ranges.Push(RCHART('9')); + pRange->m_chars .Push(RCHART('_')); + + pStockElxs[nStockId] = pRange; + } + break; + + case STOCKELX_WORD_RIGHTLEFT_NOT: + { + CRangeElxT * pRange = (CRangeElxT *)Keep(new CRangeElxT (1, 0)); + + pRange->m_ranges.Push(RCHART('A')); pRange->m_ranges.Push(RCHART('Z')); + pRange->m_ranges.Push(RCHART('a')); pRange->m_ranges.Push(RCHART('z')); + pRange->m_ranges.Push(RCHART('0')); pRange->m_ranges.Push(RCHART('9')); + pRange->m_chars .Push(RCHART('_')); + + pStockElxs[nStockId] = pRange; + } + break; + + case STOCKELX_DOT_ALL_RIGHTLEFT: + pStockElxs[nStockId] = Keep(new CRangeElxT (1, 0)); + break; + + case STOCKELX_DOT_NOT_ALL_RIGHTLEFT: + { + CRangeElxT * pRange = (CRangeElxT *)Keep(new CRangeElxT (1, 0)); + + pRange->m_chars .Push(RCHART('\n')); + + pStockElxs[nStockId] = pRange; + } + break; + + case STOCKELX_SPACE_RIGHTLEFT: + { + CRangeElxT * pRange = (CRangeElxT *)Keep(new CRangeElxT (1, 1)); + + pRange->m_chars .Push(RCHART(' ')); + pRange->m_chars .Push(RCHART('\t')); + pRange->m_chars .Push(RCHART('\r')); + pRange->m_chars .Push(RCHART('\n')); + pRange->m_chars .Push(RCHART('\f')); + pRange->m_chars .Push(RCHART('\v')); + + pStockElxs[nStockId] = pRange; + } + break; + + case STOCKELX_SPACE_RIGHTLEFT_NOT: + { + CRangeElxT * pRange = (CRangeElxT *)Keep(new CRangeElxT (1, 0)); + + pRange->m_chars .Push(RCHART(' ')); + pRange->m_chars .Push(RCHART('\t')); + pRange->m_chars .Push(RCHART('\r')); + pRange->m_chars .Push(RCHART('\n')); + pRange->m_chars .Push(RCHART('\f')); + pRange->m_chars .Push(RCHART('\v')); + + pStockElxs[nStockId] = pRange; + } + break; + + case STOCKELX_DIGITAL_RIGHTLEFT: + { + CRangeElxT * pRange = (CRangeElxT *)Keep(new CRangeElxT (1, 1)); + + pRange->m_ranges.Push(RCHART('0')); pRange->m_ranges.Push(RCHART('9')); + + pStockElxs[nStockId] = pRange; + } + break; + + case STOCKELX_DIGITAL_RIGHTLEFT_NOT: + { + CRangeElxT * pRange = (CRangeElxT *)Keep(new CRangeElxT (1, 0)); + + pRange->m_ranges.Push(RCHART('0')); pRange->m_ranges.Push(RCHART('9')); + + pStockElxs[nStockId] = pRange; + } + break; + } + } + + // return + return pStockElxs[nStockId]; +} + +template ElxInterface * CBuilderT :: BuildAlternative(int vaflags) +{ + if(curr == CHART_INFO(0, 1)) + return GetStockElx(STOCKELX_EMPTY); + + // flag instance + int flags = vaflags; + + // first part + ElxInterface * pAlternativeOne = BuildList(flags); + + // check alternative + if(curr == CHART_INFO(RCHART('|'), 1)) + { + CAlternativeElx * pAlternative = (CAlternativeElx *)Keep(new CAlternativeElx()); + pAlternative->m_elxlist.Push(pAlternativeOne); + + // loop + while(curr == CHART_INFO(RCHART('|'), 1)) + { + // skip '|' itself + MoveNext(); + + pAlternativeOne = BuildList(flags); + pAlternative->m_elxlist.Push(pAlternativeOne); + } + + return pAlternative; + } + + return pAlternativeOne; +} + +template ElxInterface * CBuilderT :: BuildList(int & flags) +{ + if(curr == CHART_INFO(0, 1) || curr == CHART_INFO(RCHART('|'), 1) || curr == CHART_INFO(RCHART(')'), 1)) + return GetStockElx(STOCKELX_EMPTY); + + // first + ElxInterface * pListOne = BuildRepeat(flags); + + if(curr != CHART_INFO(0, 1) && curr != CHART_INFO(RCHART('|'), 1) && curr != CHART_INFO(RCHART(')'), 1)) + { + CListElx * pList = (CListElx *)Keep(new CListElx(flags & RIGHTTOLEFT)); + pList->m_elxlist.Push(pListOne); + + while(curr != CHART_INFO(0, 1) && curr != CHART_INFO(RCHART('|'), 1) && curr != CHART_INFO(RCHART(')'), 1)) + { + pListOne = BuildRepeat(flags); + + // add + pList->m_elxlist.Push(pListOne); + } + + return pList; + } + + return pListOne; +} + +template ElxInterface * CBuilderT :: BuildRepeat(int & flags) +{ + // simple + ElxInterface * pSimple = BuildSimple(flags); + + if(curr.type == 0) return pSimple; + + // is quantifier or not + int bIsQuantifier = 1; + + // quantifier range + unsigned int nMin = 0, nMax = 0; + + switch(curr.ch) + { + case RCHART('{'): + { + CBufferT re; + + // skip '{' + MoveNext(); + + // copy + while(curr != CHART_INFO(0, 1) && curr != CHART_INFO(RCHART('}'), 1)) + { + re.Append(((curr.ch & (CHART)0xff) == curr.ch) ? (char)curr.ch : 0, 1); + MoveNext(); + } + + // skip '}' + MoveNext(); + + // read + int red; + char * str = re.GetBuffer(); + + if( ! ReadDec(str, nMin) ) + red = 0; + else if( *str != ',' ) + red = 1; + else + { + str ++; + + if( ! ReadDec(str, nMax) ) + red = 2; + else + red = 3; + } + + // check + if(red <= 1 ) nMax = nMin; + if(red == 2 ) nMax = INT_MAX; + if(nMax < nMin) nMax = nMin; + } + break; + + case RCHART('?'): + nMin = 0; + nMax = 1; + + // skip '?' + MoveNext(); + break; + + case RCHART('*'): + nMin = 0; + nMax = INT_MAX; + + // skip '*' + MoveNext(); + break; + + case RCHART('+'): + nMin = 1; + nMax = INT_MAX; + + // skip '+' + MoveNext(); + break; + + default: + bIsQuantifier = 0; + break; + } + + // do quantify + if(bIsQuantifier) + { + // 0 times + if(nMax == 0) + return GetStockElx(STOCKELX_EMPTY); + + // fixed times + if(nMin == nMax) + { + if(curr == CHART_INFO(RCHART('?'), 1) || curr == CHART_INFO(RCHART('+'), 1)) + MoveNext(); + + return Keep(new CRepeatElx(pSimple, nMin)); + } + + // range times + if(curr == CHART_INFO(RCHART('?'), 1)) + { + MoveNext(); + return Keep(new CReluctantElx(pSimple, nMin, nMax)); + } + else if(curr == CHART_INFO(RCHART('+'), 1)) + { + MoveNext(); + return Keep(new CPossessiveElx(pSimple, nMin, nMax)); + } + else + { + return Keep(new CGreedyElx(pSimple, nMin, nMax)); + } + } + + return pSimple; +} + +template ElxInterface * CBuilderT :: BuildSimple(int & flags) +{ + CBufferT fixed; + + while(curr != CHART_INFO(0, 1)) + { + if(curr.type == 0) + { + if(next == CHART_INFO(RCHART('{'), 1) || next == CHART_INFO(RCHART('?'), 1) || next == CHART_INFO(RCHART('*'), 1) || next == CHART_INFO(RCHART('+'), 1)) + { + if(fixed.GetSize() == 0) + { + fixed.Append(curr.ch, 1); + MoveNext(); + } + + break; + } + else + { + fixed.Append(curr.ch, 1); + MoveNext(); + } + } + else if(curr.type == 1) + { + CHART vch = curr.ch; + + // end of simple + if(vch == RCHART(')') || vch == RCHART('|')) + break; + + // has fixed already + if(fixed.GetSize() > 0) + break; + + // left parentheses + if(vch == RCHART('(')) + { + return BuildRecursive(flags); + } + + // char set + if( vch == RCHART('[') || vch == RCHART('.') || vch == RCHART('w') || vch == RCHART('W') || + vch == RCHART('s') || vch == RCHART('S') || vch == RCHART('d') || vch == RCHART('D') + ) + { + return BuildCharset(flags); + } + + // boundary + if( vch == RCHART('^') || vch == RCHART('$') || vch == RCHART('A') || vch == RCHART('Z') || vch == RCHART('z') || + vch == RCHART('b') || vch == RCHART('B') || vch == RCHART('G') // vch == RCHART('<') || vch == RCHART('>') + ) + { + return BuildBoundary(flags); + } + + // backref + if(vch == RCHART('\\') || vch == RCHART('k') || vch == RCHART('g')) + { + return BuildBackref(flags); + } + + // treat vchar as char + fixed.Append(curr.ch, 1); + MoveNext(); + } + } + + if(fixed.GetSize() > 0) + return Keep(new CStringElxT (fixed.GetBuffer(), fixed.GetSize(), flags & RIGHTTOLEFT, flags & IGNORECASE)); + else + return GetStockElx(STOCKELX_EMPTY); +} + +#define max(a, b) (((a) > (b)) ? (a) : (b)) +#define min(a, b) (((a) < (b)) ? (a) : (b)) + +template ElxInterface * CBuilderT :: BuildCharset(int & flags) +{ + // char + CHART ch = curr.ch; + + // skip + MoveNext(); + + switch(ch) + { + case RCHART('.'): + return GetStockElx( + flags & RIGHTTOLEFT ? + ((flags & SINGLELINE) ? STOCKELX_DOT_ALL_RIGHTLEFT : STOCKELX_DOT_NOT_ALL_RIGHTLEFT) : + ((flags & SINGLELINE) ? STOCKELX_DOT_ALL : STOCKELX_DOT_NOT_ALL) + ); + + case RCHART('w'): + return GetStockElx(flags & RIGHTTOLEFT ? STOCKELX_WORD_RIGHTLEFT : STOCKELX_WORD); + + case RCHART('W'): + return GetStockElx(flags & RIGHTTOLEFT ? STOCKELX_WORD_RIGHTLEFT_NOT : STOCKELX_WORD_NOT); + + case RCHART('s'): + return GetStockElx(flags & RIGHTTOLEFT ? STOCKELX_SPACE_RIGHTLEFT : STOCKELX_SPACE); + + case RCHART('S'): + return GetStockElx(flags & RIGHTTOLEFT ? STOCKELX_SPACE_RIGHTLEFT_NOT : STOCKELX_SPACE_NOT); + + case RCHART('d'): + return GetStockElx(flags & RIGHTTOLEFT ? STOCKELX_DIGITAL_RIGHTLEFT : STOCKELX_DIGITAL); + + case RCHART('D'): + return GetStockElx(flags & RIGHTTOLEFT ? STOCKELX_DIGITAL_RIGHTLEFT_NOT : STOCKELX_DIGITAL_NOT); + + case RCHART('['): + { + CRangeElxT * pRange; + + // create + if(curr == CHART_INFO(RCHART(':'), 1)) + { + // Backup before posix + Snapshot shot; + Backup(&shot); + + CBufferT posix; + + do { + posix.Append(((curr.ch & (CHART)0xff) == curr.ch) ? (char)curr.ch : 0, 1); + MoveNext(); + } + while(curr.ch != RCHART(0) && curr != CHART_INFO(RCHART(']'), 1)); + + MoveNext(); // skip ']' + + // posix + CPosixElxT * pposix = (CPosixElxT *) Keep(new CPosixElxT (posix.GetBuffer(), flags & RIGHTTOLEFT)); + if(pposix->m_posixfun != 0) + { + return pposix; + } + + // restore if not posix + Restore(&shot); + } + + if(curr == CHART_INFO(RCHART('^'), 1)) + { + MoveNext(); // skip '^' + pRange = (CRangeElxT *)Keep(new CRangeElxT (flags & RIGHTTOLEFT, 0)); + } + else + { + pRange = (CRangeElxT *)Keep(new CRangeElxT (flags & RIGHTTOLEFT, 1)); + } + + // parse + while(curr != CHART_INFO(0, 1) && curr != CHART_INFO(RCHART(']'), 1)) + { + ch = curr.ch; + + if(curr.type == 1 && ( + ch == RCHART('.') || ch == RCHART('w') || ch == RCHART('W') || ch == RCHART('s') || ch == RCHART('S') || ch == RCHART('d') || ch == RCHART('D') || + (ch == RCHART('[') && next == CHART_INFO(RCHART(':'), 1)) + )) + { + pRange->m_embeds.Push(BuildCharset(flags)); + } + else if(next == CHART_INFO(RCHART('-'), 1) && nex2.type == 0) + { + pRange->m_ranges.Push(ch); pRange->m_ranges.Push(nex2.ch); + + // next + MoveNext(); + MoveNext(); + MoveNext(); + } + else + { + pRange->m_chars.Push(ch); + + // next + MoveNext(); + } + } + + // skip ']' + MoveNext(); + + if( flags & IGNORECASE ) + { + CBufferT & ranges = pRange->m_ranges; + int i, oldcount = ranges.GetSize() / 2; + + for(i=0; i= RCHART('A') ) + { + newmin = tolower( max(RCHART('A'), ranges[i*2 ]) ); + newmax = tolower( min(RCHART('Z'), ranges[i*2+1]) ); + + if( newmin < ranges[i*2] || newmax > ranges[i*2+1] ) + { + ranges.Push(newmin); + ranges.Push(newmax); + } + } + + if( ranges[i*2] <= RCHART('z') && ranges[i*2+1] >= RCHART('a') ) + { + newmin = toupper( max(RCHART('a'), ranges[i*2 ]) ); + newmax = toupper( min(RCHART('z'), ranges[i*2+1]) ); + + if( newmin < ranges[i*2] || newmax > ranges[i*2+1] ) + { + ranges.Push(newmin); + ranges.Push(newmax); + } + } + } + + CBufferT & chars = pRange->m_chars; + oldcount = chars.GetSize(); + for(i=0; iIsContainChar(tolower(chars[i])) ) + chars.Push(tolower(chars[i])); + + if( islower(chars[i]) && ! pRange->IsContainChar(toupper(chars[i])) ) + chars.Push(toupper(chars[i])); + } + } + + return pRange; + } + } + + return GetStockElx(STOCKELX_EMPTY); +} + +template ElxInterface * CBuilderT :: BuildRecursive(int & flags) +{ + // skip '(' + MoveNext(); + + if(curr == CHART_INFO(RCHART('?'), 1)) + { + ElxInterface * pElx = 0; + + // skip '?' + MoveNext(); + + int bNegative = 0; + CHART named_end = RCHART('>'); + + switch(curr.ch) + { + case RCHART('!'): + bNegative = 1; + + case RCHART('='): + { + MoveNext(); // skip '!' or '=' + pElx = Keep(new CAssertElx(BuildAlternative(flags & ~RIGHTTOLEFT), !bNegative)); + } + break; + + case RCHART('<'): + switch(next.ch) + { + case RCHART('!'): + bNegative = 1; + + case RCHART('='): + MoveNext(); // skip '<' + MoveNext(); // skip '!' or '=' + { + pElx = Keep(new CAssertElx(BuildAlternative(flags | RIGHTTOLEFT), !bNegative)); + } + break; + + default: // named group + break; + } + // break if assertion // else named + if(pElx != 0) break; + + case RCHART('P'): + if(curr.ch == RCHART('P')) MoveNext(); // skip 'P' + + case RCHART('\''): + if (curr.ch == RCHART('<' )) named_end = RCHART('>' ); + else if(curr.ch == RCHART('\'')) named_end = RCHART('\''); + MoveNext(); // skip '<' or '\'' + { + // named number + int nThisBackref = m_nNextNamed ++; + + CListElx * pList = (CListElx *)Keep(new CListElx(flags & RIGHTTOLEFT)); + CBracketElx * pleft = (CBracketElx *)Keep(new CBracketElx(-1, flags & RIGHTTOLEFT ? 1 : 0)); + CBracketElx * pright = (CBracketElx *)Keep(new CBracketElx(-1, flags & RIGHTTOLEFT ? 0 : 1)); + + // save name + CBufferT & name = pleft->m_szNamed; + CBufferT num; + + while(curr.ch != RCHART(0) && curr.ch != named_end) + { + name.Append(curr.ch, 1); + num .Append(((curr.ch & (CHART)0xff) == curr.ch) ? (char)curr.ch : 0, 1); + MoveNext(); + } + MoveNext(); // skip '>' or '\'' + + // check + unsigned int number; + char * str = num.GetBuffer(); + + if( ReadDec(str, number) ? ( *str == '\0') : 0 ) + { + pleft ->m_nnumber = number; + pright->m_nnumber = number; + + name.Release(); + } + + // left, center, right + pList->m_elxlist.Push(pleft); + pList->m_elxlist.Push(BuildAlternative(flags)); + pList->m_elxlist.Push(pright); + + // for recursive + m_namedlist.Prepare(nThisBackref); + m_namedlist[nThisBackref] = pList; + + pElx = pList; + } + break; + + case RCHART('>'): + { + MoveNext(); // skip '>' + pElx = Keep(new CIndependentElx(BuildAlternative(flags))); + } + break; + + case RCHART('R'): + MoveNext(); // skip 'R' + while(curr.ch != RCHART(0) && isspace(curr.ch)) MoveNext(); // skip space + + if(curr.ch == RCHART('<') || curr.ch == RCHART('\'')) + { + named_end = curr.ch == RCHART('<') ? RCHART('>') : RCHART('\''); + CDelegateElx * pDelegate = (CDelegateElx *)Keep(new CDelegateElx(-3)); + + MoveNext(); // skip '<' or '\\' + + // save name + CBufferT & name = pDelegate->m_szNamed; + CBufferT num; + + while(curr.ch != RCHART(0) && curr.ch != named_end) + { + name.Append(curr.ch, 1); + num .Append(((curr.ch & (CHART)0xff) == curr.ch) ? (char)curr.ch : 0, 1); + MoveNext(); + } + MoveNext(); // skip '>' or '\'' + + // check + unsigned int number; + char * str = num.GetBuffer(); + + if( ReadDec(str, number) ? ( *str == '\0') : 0 ) + { + pDelegate->m_ndata = number; + name.Release(); + } + + m_recursivelist.Push(pDelegate); + pElx = pDelegate; + } + else + { + CBufferT rto; + while(curr.ch != RCHART(0) && curr.ch != RCHART(')')) + { + rto.Append(((curr.ch & (CHART)0xff) == curr.ch) ? (char)curr.ch : 0, 1); + MoveNext(); + } + + unsigned int rtono = 0; + char * str = rto.GetBuffer(); + ReadDec(str, rtono); + + CDelegateElx * pDelegate = (CDelegateElx *)Keep(new CDelegateElx(rtono)); + + m_recursivelist.Push(pDelegate); + pElx = pDelegate; + } + break; + + case RCHART('('): + { + CConditionElx * pConditionElx = (CConditionElx *)Keep(new CConditionElx()); + + // condition + ElxInterface * & pCondition = pConditionElx->m_pelxask; + + if(next == CHART_INFO(RCHART('?'), 1)) + { + pCondition = BuildRecursive(flags); + } + else // named, assert or number + { + MoveNext(); // skip '(' + int pos0 = curr.pos; + + // save elx condition + pCondition = Keep(new CAssertElx(BuildAlternative(flags), 1)); + + // save name + pConditionElx->m_szNamed.Append(m_pattern.GetBuffer() + pos0, curr.pos - pos0, 1); + + // save number + CBufferT numstr; + while(pos0 < curr.pos) + { + CHART ch = m_pattern[pos0]; + numstr.Append(((ch & (CHART)0xff) == ch) ? (char)ch : 0, 1); + pos0 ++; + } + + unsigned int number; + char * str = numstr.GetBuffer(); + + // valid group number + if( ReadDec(str, number) ? ( *str == '\0') : 0 ) + { + pConditionElx->m_nnumber = number; + pCondition = 0; + } + else // maybe elx, maybe named + { + pConditionElx->m_nnumber = -1; + m_namedconditionlist.Push(pConditionElx); + } + + MoveNext(); // skip ')' + } + + // alternative + { + int newflags = flags; + + pConditionElx->m_pelxyes = BuildList(newflags); + } + + if(curr.ch == RCHART('|')) + { + MoveNext(); // skip '|' + + pConditionElx->m_pelxno = BuildAlternative(flags); + } + else + { + pConditionElx->m_pelxno = 0; + } + + pElx = pConditionElx; + } + break; + + default: + while(curr.ch != RCHART(0) && isspace(curr.ch)) MoveNext(); // skip space + + if(curr.ch >= RCHART('0') && curr.ch <= RCHART('9')) // recursive (?1) => (?R1) + { + CBufferT rto; + while(curr.ch != RCHART(0) && curr.ch != RCHART(')')) + { + rto.Append(((curr.ch & (CHART)0xff) == curr.ch) ? (char)curr.ch : 0, 1); + MoveNext(); + } + + unsigned int rtono = 0; + char * str = rto.GetBuffer(); + ReadDec(str, rtono); + + CDelegateElx * pDelegate = (CDelegateElx *)Keep(new CDelegateElx(rtono)); + + m_recursivelist.Push(pDelegate); + pElx = pDelegate; + } + else + { + // flag + int newflags = flags; + while(curr != CHART_INFO(0, 1) && curr.ch != RCHART(':') && curr.ch != RCHART(')') && curr != CHART_INFO(RCHART('('), 1)) + { + int tochange = 0; + + switch(curr.ch) + { + case RCHART('i'): + case RCHART('I'): + tochange = IGNORECASE; + break; + + case RCHART('s'): + case RCHART('S'): + tochange = SINGLELINE; + break; + + case RCHART('m'): + case RCHART('M'): + tochange = MULTILINE; + break; + + case RCHART('g'): + case RCHART('G'): + tochange = GLOBAL; + break; + + case RCHART('-'): + bNegative = 1; + break; + } + + if(bNegative) + newflags &= ~tochange; + else + newflags |= tochange; + + // move to next char + MoveNext(); + } + + if(curr.ch == RCHART(':') || curr == CHART_INFO(RCHART('('), 1)) + { + // skip ':' + if(curr.ch == RCHART(':')) MoveNext(); + + pElx = BuildAlternative(newflags); + } + else + { + // change parent flags + flags = newflags; + + pElx = GetStockElx(STOCKELX_EMPTY); + } + } + break; + } + + MoveNext(); // skip ')' + + return pElx; + } + else + { + // group and number + CListElx * pList = (CListElx *)Keep(new CListElx(flags & RIGHTTOLEFT)); + int nThisBackref = ++ m_nMaxNumber; + + // left, center, right + pList->m_elxlist.Push(Keep(new CBracketElx(nThisBackref, flags & RIGHTTOLEFT ? 1 : 0))); + pList->m_elxlist.Push(BuildAlternative(flags)); + pList->m_elxlist.Push(Keep(new CBracketElx(nThisBackref, flags & RIGHTTOLEFT ? 0 : 1))); + + // for recursive + m_grouplist.Prepare(nThisBackref); + m_grouplist[nThisBackref] = pList; + + // right + MoveNext(); // skip ')' + + return pList; + } +} + +template ElxInterface * CBuilderT :: BuildBoundary(int & flags) +{ + // char + CHART ch = curr.ch; + + // skip + MoveNext(); + + switch(ch) + { + case RCHART('^'): + return Keep(new CBoundaryElxT ((flags & MULTILINE) ? BOUNDARY_LINE_BEGIN : BOUNDARY_FILE_BEGIN)); + + case RCHART('$'): + return Keep(new CBoundaryElxT ((flags & MULTILINE) ? BOUNDARY_LINE_END : BOUNDARY_FILE_END)); + + case RCHART('b'): + return Keep(new CBoundaryElxT (BOUNDARY_WORD_EDGE)); + + case RCHART('B'): + return Keep(new CBoundaryElxT (BOUNDARY_WORD_EDGE, 0)); + + case RCHART('A'): + return Keep(new CBoundaryElxT (BOUNDARY_FILE_BEGIN)); + + case RCHART('Z'): + return Keep(new CBoundaryElxT (BOUNDARY_FILE_END_N)); + + case RCHART('z'): + return Keep(new CBoundaryElxT (BOUNDARY_FILE_END)); + + case RCHART('G'): + if(flags & GLOBAL) + return Keep(new CGlobalElx()); + else + return GetStockElx(STOCKELX_EMPTY); + + default: + return GetStockElx(STOCKELX_EMPTY); + } +} + +template ElxInterface * CBuilderT :: BuildBackref(int & flags) +{ + // skip '\\' or '\k' or '\g' + MoveNext(); + + if(curr.ch == RCHART('<') || curr.ch == RCHART('\'')) + { + CHART named_end = curr.ch == RCHART('<') ? RCHART('>') : RCHART('\''); + CBackrefElxT * pbackref = (CBackrefElxT *)Keep(new CBackrefElxT (-1, flags & RIGHTTOLEFT, flags & IGNORECASE)); + + MoveNext(); // skip '<' or '\'' + + // save name + CBufferT & name = pbackref->m_szNamed; + CBufferT num; + + while(curr.ch != RCHART(0) && curr.ch != named_end) + { + name.Append(curr.ch, 1); + num .Append(((curr.ch & (CHART)0xff) == curr.ch) ? (char)curr.ch : 0, 1); + MoveNext(); + } + MoveNext(); // skip '>' or '\'' + + // check + unsigned int number; + char * str = num.GetBuffer(); + + if( ReadDec(str, number) ? ( *str == '\0') : 0 ) + { + pbackref->m_nnumber = number; + name.Release(); + } + else + { + m_namedbackreflist.Push(pbackref); + } + + return pbackref; + } + else + { + unsigned int nbackref = 0; + + for(int i=0; i<3; i++) + { + if(curr.ch >= RCHART('0') && curr.ch <= RCHART('9')) + nbackref = nbackref * 10 + (curr.ch - RCHART('0')); + else + break; + + MoveNext(); + } + + return Keep(new CBackrefElxT (nbackref, flags & RIGHTTOLEFT, flags & IGNORECASE)); + } +} + +template int CBuilderT :: ReadDec(char * & str, unsigned int & dec) +{ + int s = 0; + while(str[s] != 0 && isspace(str[s])) s++; + + if(str[s] < '0' || str[s] > '9') return 0; + + dec = 0; + unsigned int i; + + for(i = s; i= '0' && str[i] <= '9') + dec = dec * 10 + (str[i] - '0'); + else + break; + } + + while(str[i] != 0 && isspace(str[i])) i++; + str += i; + + return 1; +} + +// +// Regexp +// +template class CRegexpT +{ +public: + CRegexpT(const CHART * pattern = 0, int flags = 0); + CRegexpT(const CHART * pattern, int length, int flags); + void Compile(const CHART * pattern, int flags = 0); + void Compile(const CHART * pattern, int length, int flags); + +public: + MatchResult MatchExact(const CHART * tstring, CContext * pContext = 0) const; + MatchResult MatchExact(const CHART * tstring, int length, CContext * pContext = 0) const; + MatchResult Match(const CHART * tstring, int start = -1, CContext * pContext = 0) const; + MatchResult Match(const CHART * tstring, int length, int start, CContext * pContext = 0) const; + MatchResult Match(CContext * pContext) const; + CContext * PrepareMatch(const CHART * tstring, int start = -1, CContext * pContext = 0) const; + CContext * PrepareMatch(const CHART * tstring, int length, int start, CContext * pContext = 0) const; + CHART * Replace(const CHART * tstring, const CHART * replaceto, int start = -1, int ntimes = -1, MatchResult * result = 0, CContext * pContext = 0) const; + CHART * Replace(const CHART * tstring, int string_length, const CHART * replaceto, int to_length, int & result_length, int start = -1, int ntimes = -1, MatchResult * result = 0, CContext * pContext = 0) const; + int GetNamedGroupNumber(const CHART * group_name) const; + +public: + static void ReleaseString (CHART * tstring ); + static void ReleaseContext(CContext * pContext); + +public: + CBuilderT m_builder; +}; + +// +// Implementation +// +template CRegexpT :: CRegexpT(const CHART * pattern, int flags) +{ + Compile(pattern, CBufferRefT(pattern).GetSize(), flags); +} + +template CRegexpT :: CRegexpT(const CHART * pattern, int length, int flags) +{ + Compile(pattern, length, flags); +} + +template inline void CRegexpT :: Compile(const CHART * pattern, int flags) +{ + Compile(pattern, CBufferRefT(pattern).GetSize(), flags); +} + +template void CRegexpT :: Compile(const CHART * pattern, int length, int flags) +{ + m_builder.Clear(); + if(pattern != 0) m_builder.Build(CBufferRefT(pattern, length), flags); +} + +template inline MatchResult CRegexpT :: MatchExact(const CHART * tstring, CContext * pContext) const +{ + return MatchExact(tstring, CBufferRefT(tstring).GetSize(), pContext); +} + +template MatchResult CRegexpT :: MatchExact(const CHART * tstring, int length, CContext * pContext) const +{ + if(m_builder.m_pTopElx == 0) + return 0; + + // info + int endpos = 0; + + CContext context; + if(pContext == 0) pContext = &context; + + pContext->m_stack.Restore(0); + pContext->m_capturestack.Restore(0); + pContext->m_captureindex.Restore(0); + + pContext->m_nParenZindex = 0; + pContext->m_nLastBeginPos = -1; + pContext->m_pMatchString = (void*)tstring; + pContext->m_pMatchStringLength = length; + pContext->m_nCursiveLimit = 100; + + if(m_builder.m_nFlags & RIGHTTOLEFT) + { + pContext->m_nBeginPos = length; + pContext->m_nCurrentPos = length; + endpos = 0; + } + else + { + pContext->m_nBeginPos = 0; + pContext->m_nCurrentPos = 0; + endpos = length; + } + + pContext->m_captureindex.Prepare(m_builder.m_nMaxNumber, -1); + pContext->m_captureindex[0] = 0; + pContext->m_capturestack.Push(0); + pContext->m_capturestack.Push(pContext->m_nCurrentPos); + pContext->m_capturestack.Push(-1); + pContext->m_capturestack.Push(-1); + + // match + if( ! m_builder.m_pTopElx->Match( pContext ) ) + return 0; + else + { + while( pContext->m_nCurrentPos != endpos ) + { + if( ! m_builder.m_pTopElx->MatchNext( pContext ) ) + return 0; + else + { + if( pContext->m_nLastBeginPos == pContext->m_nBeginPos && pContext->m_nBeginPos == pContext->m_nCurrentPos ) + return 0; + else + pContext->m_nLastBeginPos = pContext->m_nCurrentPos; + } + } + + // end pos + pContext->m_capturestack[2] = pContext->m_nCurrentPos; + + return MatchResult( pContext, m_builder.m_nMaxNumber ); + } +} + +template MatchResult CRegexpT :: Match(const CHART * tstring, int start, CContext * pContext) const +{ + return Match(tstring, CBufferRefT(tstring).GetSize(), start, pContext); +} + +template MatchResult CRegexpT :: Match(const CHART * tstring, int length, int start, CContext * pContext) const +{ + if(m_builder.m_pTopElx == 0) + return 0; + + CContext context; + if(pContext == 0) pContext = &context; + + PrepareMatch(tstring, length, start, pContext); + + return Match( pContext ); +} + +template MatchResult CRegexpT :: Match(CContext * pContext) const +{ + if(m_builder.m_pTopElx == 0) + return 0; + + int endpos, delta; + + if(m_builder.m_nFlags & RIGHTTOLEFT) + { + endpos = -1; + delta = -1; + } + else + { + endpos = pContext->m_pMatchStringLength + 1; + delta = 1; + } + + while(pContext->m_nCurrentPos != endpos) + { + pContext->m_captureindex.Restore(0); + pContext->m_stack .Restore(0); + pContext->m_capturestack.Restore(0); + + pContext->m_captureindex.Prepare(m_builder.m_nMaxNumber, -1); + pContext->m_captureindex[0] = 0; + pContext->m_capturestack.Push(0); + pContext->m_capturestack.Push(pContext->m_nCurrentPos); + pContext->m_capturestack.Push(-1); + pContext->m_capturestack.Push(-1); + + if( m_builder.m_pTopElx->Match( pContext ) ) + { + pContext->m_capturestack[2] = pContext->m_nCurrentPos; + + // zero width + if( pContext->m_capturestack[1] == pContext->m_nCurrentPos ) + { + pContext->m_nCurrentPos += delta; + } + + // save pos + pContext->m_nLastBeginPos = pContext->m_nBeginPos; + pContext->m_nBeginPos = pContext->m_nCurrentPos; + + // return + return MatchResult( pContext, m_builder.m_nMaxNumber ); + } + else + { + pContext->m_nCurrentPos += delta; + } + } + + return 0; +} + +template inline CContext * CRegexpT :: PrepareMatch(const CHART * tstring, int start, CContext * pContext) const +{ + return PrepareMatch(tstring, CBufferRefT(tstring).GetSize(), start, pContext); +} + +template CContext * CRegexpT :: PrepareMatch(const CHART * tstring, int length, int start, CContext * pContext) const +{ + if(m_builder.m_pTopElx == 0) + return 0; + + if(pContext == 0) pContext = new CContext(); + + pContext->m_nParenZindex = 0; + pContext->m_nLastBeginPos = -1; + pContext->m_pMatchString = (void*)tstring; + pContext->m_pMatchStringLength = length; + pContext->m_nCursiveLimit = 100; + + if(start < 0) + { + if(m_builder.m_nFlags & RIGHTTOLEFT) + { + pContext->m_nBeginPos = length; + pContext->m_nCurrentPos = length; + } + else + { + pContext->m_nBeginPos = 0; + pContext->m_nCurrentPos = 0; + } + } + else + { + if(start > length) start = length + ((m_builder.m_nFlags & RIGHTTOLEFT)?0:1); + + pContext->m_nBeginPos = start; + pContext->m_nCurrentPos = start; + } + + return pContext; +} + +template inline int CRegexpT :: GetNamedGroupNumber(const CHART * group_name) const +{ + return m_builder.GetNamedNumber(group_name); +} + +template CHART * CRegexpT :: Replace(const CHART * tstring, const CHART * replaceto, int start, int ntimes, MatchResult * result, CContext * pContext) const +{ + int result_length = 0; + return Replace(tstring, CBufferRefT(tstring).GetSize(), replaceto, CBufferRefT(replaceto).GetSize(), result_length, start, ntimes, result, pContext); +} + +template CHART * CRegexpT :: Replace(const CHART * tstring, int string_length, const CHART * replaceto, int to_length, int & result_length, int start, int ntimes, MatchResult * remote_result, CContext * oContext) const +{ + if(m_builder.m_pTopElx == 0) return 0; + + // --- compile replace to --- + + CBufferT compiledto; + + static const CHART rtoptn[] = { RCHART('\\'), RCHART('$' ), RCHART('('), RCHART('?'), RCHART(':'), RCHART('[' ), RCHART('$' ), RCHART('&' ), RCHART('`' ), RCHART('\''), RCHART('+'), RCHART('_' ), RCHART('\\'), RCHART('d'), RCHART(']'), RCHART('|'), RCHART('\\'), RCHART('{'), RCHART('.'), RCHART('*'), RCHART('?'), RCHART('\\'), RCHART('}'), RCHART(')' ), RCHART('\0') }; + static CRegexpT rtoreg(rtoptn); + + MatchResult local_result(0), * result = remote_result ? remote_result : & local_result; + + // prepare + CContext * pContext = rtoreg.PrepareMatch(replaceto, to_length, -1, oContext); + int lastIndex = 0, nmatch = 0; + + while( ((*result) = rtoreg.Match(pContext)).IsMatched() ) + { + int delta = result->GetStart() - lastIndex; + if( delta > 0 ) + { + compiledto.Push(lastIndex); + compiledto.Push(delta); + } + + lastIndex = result->GetStart(); + delta = 2; + + switch(replaceto[lastIndex + 1]) + { + case RCHART('$'): + compiledto.Push(lastIndex); + compiledto.Push(1); + break; + + case RCHART('&'): + case RCHART('`'): + case RCHART('\''): + case RCHART('+'): + case RCHART('_'): + compiledto.Push(-1); + compiledto.Push((int)replaceto[lastIndex + 1]); + break; + + case RCHART('{'): + delta = result->GetEnd() - result->GetStart(); + nmatch = m_builder.GetNamedNumber(CBufferRefT (replaceto + (lastIndex + 2), delta - 3)); + + if(nmatch > 0 && nmatch <= m_builder.m_nMaxNumber) + { + compiledto.Push(-2); + compiledto.Push(nmatch); + } + else + { + compiledto.Push(lastIndex); + compiledto.Push(delta); + } + break; + + default: + nmatch = 0; + for(delta=1; delta<=3; delta++) + { + CHART ch = replaceto[lastIndex + delta]; + + if(ch < RCHART('0') || ch > RCHART('9')) + break; + + nmatch = nmatch * 10 + (ch - RCHART('0')); + } + + if(nmatch > m_builder.m_nMaxNumber) + { + while(nmatch > m_builder.m_nMaxNumber) + { + nmatch /= 10; + delta --; + } + + if(nmatch == 0) + { + delta = 1; + } + } + + if(delta == 1) + { + compiledto.Push(lastIndex); + compiledto.Push(1); + } + else + { + compiledto.Push(-2); + compiledto.Push(nmatch); + } + break; + } + + lastIndex += delta; + } + + if(lastIndex < to_length) + { + compiledto.Push(lastIndex); + compiledto.Push(to_length - lastIndex); + } + + int rightleft = m_builder.m_nFlags & RIGHTTOLEFT; + + int tb = rightleft ? compiledto.GetSize() - 2 : 0; + int te = rightleft ? -2 : compiledto.GetSize(); + int ts = rightleft ? -2 : 2; + + // --- compile complete --- + + int beginpos = rightleft ? string_length : 0; + int endpos = rightleft ? 0 : string_length; + + int toIndex0 = 0; + int toIndex1 = 0; + int i, ntime; + + CBufferT buffer; + + // prepare + pContext = PrepareMatch(tstring, string_length, start, pContext); + lastIndex = beginpos; + + // Match + for(ntime = 0; ntimes < 0 || ntime < ntimes; ntime ++) + { + (*result) = Match(pContext); + + if( ! result->IsMatched() ) + break; + + // before + if( rightleft ) + { + int distance = lastIndex - result->GetEnd(); + if( distance ) + { + buffer.Push(tstring + result->GetEnd()); + buffer.Push((const CHART *)distance); + + toIndex1 -= distance; + } + lastIndex = result->GetStart(); + } + else + { + int distance = result->GetStart() - lastIndex; + if( distance ) + { + buffer.Push(tstring + lastIndex); + buffer.Push((const CHART *)distance); + + toIndex1 += distance; + } + lastIndex = result->GetEnd(); + } + + toIndex0 = toIndex1; + + // middle + for(i=tb; i!=te; i+=ts) + { + int off = compiledto[i]; + int len = compiledto[i + 1]; + + const CHART * sub = replaceto + off; + + if( off == -1 ) + { + switch(RCHART(len)) + { + case RCHART('&'): + sub = tstring + result->GetStart(); + len = result->GetEnd() - result->GetStart(); + break; + + case RCHART('`'): + sub = tstring; + len = result->GetStart(); + break; + + case RCHART('\''): + sub = tstring + result->GetEnd(); + len = string_length - result->GetEnd(); + break; + + case RCHART('+'): + for(nmatch = result->MaxGroupNumber(); nmatch >= 0; nmatch --) + { + if(result->GetGroupStart(nmatch) >= 0) break; + } + sub = tstring + result->GetGroupStart(nmatch); + len = result->GetGroupEnd(nmatch) - result->GetGroupStart(nmatch); + break; + + case RCHART('_'): + sub = tstring; + len = string_length; + break; + } + } + else if( off == -2 ) + { + sub = tstring + result->GetGroupStart(len); + len = result->GetGroupEnd(len) - result->GetGroupStart(len); + } + + buffer.Push(sub); + buffer.Push((const CHART *)len); + + toIndex1 += rightleft ? (-len) : len; + } + } + + // after + if(rightleft) + { + if(endpos < lastIndex) + { + buffer.Push(tstring + endpos); + buffer.Push((const CHART *)(lastIndex - endpos)); + } + } + else + { + if(lastIndex < endpos) + { + buffer.Push(tstring + lastIndex); + buffer.Push((const CHART *)(endpos - lastIndex)); + } + } + + if(oContext == 0) ReleaseContext(pContext); + + // join string + result_length = 0; + for(i=0; i result_string; + result_string.Prepare(result_length); + result_string.Restore(0); + + if(rightleft) + { + for(i=buffer.GetSize()-2; i>=0; i-=2) + { + result_string.Append(buffer[i], (int)buffer[i+1]); + } + } + else + { + for(i=0; im_result.Append(result_length, 3); + result->m_result.Append(ntime); + + if(rightleft) + { + result->m_result.Append(result_length - toIndex1); + result->m_result.Append(result_length - toIndex0); + } + else + { + result->m_result.Append(toIndex0); + result->m_result.Append(toIndex1); + } + + return result_string.Detach(); +} + +template inline void CRegexpT :: ReleaseString(CHART * tstring) +{ + if(tstring != 0) free(tstring); +} + +template inline void CRegexpT :: ReleaseContext(CContext * pContext) +{ + if(pContext != 0) delete pContext; +} + +// +// All implementations +// +template CAlternativeElxT :: CAlternativeElxT() +{ +} + +template int CAlternativeElxT :: Match(CContext * pContext) const +{ + if(m_elxlist.GetSize() == 0) + return 1; + + // try all + for(int n = 0; n < m_elxlist.GetSize(); n++) + { + if(m_elxlist[n]->Match(pContext)) + { + pContext->m_stack.Push(n); + return 1; + } + } + + return 0; +} + +template int CAlternativeElxT :: MatchNext(CContext * pContext) const +{ + if(m_elxlist.GetSize() == 0) + return 0; + + int n = 0; + + // recall prev + pContext->m_stack.Pop(n); + + // prev + if(m_elxlist[n]->MatchNext(pContext)) + { + pContext->m_stack.Push(n); + return 1; + } + else + { + // try rest + for(n++; n < m_elxlist.GetSize(); n++) + { + if(m_elxlist[n]->Match(pContext)) + { + pContext->m_stack.Push(n); + return 1; + } + } + + return 0; + } +} + +// assertx.cpp: implementation of the CAssertElx class. +// +template CAssertElxT :: CAssertElxT(ElxInterface * pelx, int byes) +{ + m_pelx = pelx; + m_byes = byes; +} + +template int CAssertElxT :: Match(CContext * pContext) const +{ + int nbegin = pContext->m_nCurrentPos; + int nsize = pContext->m_stack.GetSize(); + int ncsize = pContext->m_capturestack.GetSize(); + int bsucc; + + // match + if( m_byes ) + bsucc = m_pelx->Match(pContext); + else + bsucc = ! m_pelx->Match(pContext); + + // status + pContext->m_stack.Restore(nsize); + pContext->m_nCurrentPos = nbegin; + + if( bsucc ) + pContext->m_stack.Push(ncsize); + else + pContext->m_capturestack.Restore(ncsize); + + return bsucc; +} + +template int CAssertElxT :: MatchNext(CContext * pContext) const +{ + int ncsize = 0; + + pContext->m_stack.Pop(ncsize); + pContext->m_capturestack.Restore(ncsize); + + return 0; +} + +// emptyelx.cpp: implementation of the CEmptyElx class. +// +template CEmptyElxT :: CEmptyElxT() +{ +} + +template int CEmptyElxT :: Match(CContext *) const +{ + return 1; +} + +template int CEmptyElxT :: MatchNext(CContext *) const +{ + return 0; +} + +// globalx.cpp: implementation of the CGlobalElx class. +// +template CGlobalElxT ::CGlobalElxT() +{ +} + +template int CGlobalElxT :: Match(CContext * pContext) const +{ + return pContext->m_nCurrentPos == pContext->m_nBeginPos; +} + +template int CGlobalElxT :: MatchNext(CContext *) const +{ + return 0; +} + +// greedelx.cpp: implementation of the CGreedyElx class. +// +template CGreedyElxT :: CGreedyElxT(ElxInterface * pelx, int nmin, int nmax) : CRepeatElxT (pelx, nmin) +{ + m_nvart = nmax - nmin; +} + +template int CGreedyElxT :: Match(CContext * pContext) const +{ + if( ! CRepeatElxT :: MatchFixed(pContext) ) + return 0; + + while( ! MatchVart(pContext) ) + { + if( ! CRepeatElxT :: MatchNextFixed(pContext) ) + return 0; + } + + return 1; +} + +template int CGreedyElxT :: MatchNext(CContext * pContext) const +{ + if( MatchNextVart(pContext) ) + return 1; + + if( ! CRepeatElxT :: MatchNextFixed(pContext) ) + return 0; + + while( ! MatchVart(pContext) ) + { + if( ! CRepeatElxT :: MatchNextFixed(pContext) ) + return 0; + } + + return 1; +} + +template int CGreedyElxT :: MatchVart(CContext * pContext) const +{ + int n = 0; + int nbegin00 = pContext->m_nCurrentPos; + int nsize = pContext->m_stack.GetSize(); + int ncsize = pContext->m_capturestack.GetSize(); + + while(n < m_nvart && CRepeatElx::MatchForward(pContext)) + { + n ++; + } + + pContext->m_stack.Push(ncsize); + pContext->m_stack.Push(nsize); + pContext->m_stack.Push(pContext->m_nCurrentPos); + pContext->m_stack.Push(1); + pContext->m_stack.Push(nbegin00); + pContext->m_stack.Push(n); + + return 1; +} + +template int CGreedyElxT :: MatchNextVart(CContext * pContext) const +{ + int n, nbegin00, nsize, ncsize; + CSortedBufferT nbegin99; + pContext->m_stack.Pop(n); + pContext->m_stack.Pop(nbegin00); + pContext->m_stack.Pop(nbegin99); + pContext->m_stack.Pop(nsize); + pContext->m_stack.Pop(ncsize); + + if(n == 0) return 0; + + int n0 = n; + + if( ! CRepeatElxT::m_pelx->MatchNext(pContext) ) + { + n --; + } + + // not to re-match + else if(pContext->m_nCurrentPos == nbegin00) + { + pContext->m_stack.Restore(nsize); + pContext->m_capturestack.Restore(ncsize); + pContext->m_nCurrentPos = nbegin00; + + return 0; + } + + // fix 2012-10-26, thanks to chenlx01@sohu.com + else + { + CContextShot shot(pContext); + + while(n < m_nvart && CRepeatElx::MatchForward(pContext)) + { + n ++; + } + + if(nbegin99.Find(pContext->m_nCurrentPos) >= 0) + { + shot.Restore(pContext); + n = n0; + } + else + { + nbegin99.Add(pContext->m_nCurrentPos); + } + } + + pContext->m_stack.Push(ncsize); + pContext->m_stack.Push(nsize); + pContext->m_stack.Push(nbegin99); + pContext->m_stack.Push(nbegin00); + pContext->m_stack.Push(n); + + return 1; +} + +// indepelx.cpp: implementation of the CIndependentElx class. +// +template CIndependentElxT :: CIndependentElxT(ElxInterface * pelx) +{ + m_pelx = pelx; +} + +template int CIndependentElxT :: Match(CContext * pContext) const +{ + int nbegin = pContext->m_nCurrentPos; + int nsize = pContext->m_stack.GetSize(); + int ncsize = pContext->m_capturestack.GetSize(); + + // match + int bsucc = m_pelx->Match(pContext); + + // status + pContext->m_stack.Restore(nsize); + + if( bsucc ) + { + pContext->m_stack.Push(nbegin); + pContext->m_stack.Push(ncsize); + } + + return bsucc; +} + +template int CIndependentElxT :: MatchNext(CContext * pContext) const +{ + int nbegin = 0, ncsize = 0; + + pContext->m_stack.Pop(ncsize); + pContext->m_stack.Pop(nbegin); + + pContext->m_capturestack.Restore(ncsize); + pContext->m_nCurrentPos = nbegin; + + return 0; +} + +// listelx.cpp: implementation of the CListElx class. +// +template CListElxT :: CListElxT(int brightleft) +{ + m_brightleft = brightleft; +} + +template int CListElxT :: Match(CContext * pContext) const +{ + if(m_elxlist.GetSize() == 0) + return 1; + + // prepare + int bol = m_brightleft ? m_elxlist.GetSize() : -1; + int stp = m_brightleft ? -1 : 1; + int eol = m_brightleft ? -1 : m_elxlist.GetSize(); + + // from first + int n = bol + stp; + + // match all + while(n != eol) + { + if(m_elxlist[n]->Match(pContext)) + { + n += stp; + } + else + { + n -= stp; + + while(n != bol && ! m_elxlist[n]->MatchNext(pContext)) + n -= stp; + + if(n != bol) + n += stp; + else + return 0; + } + } + + return 1; +} + +template int CListElxT :: MatchNext(CContext * pContext) const +{ + if(m_elxlist.GetSize() == 0) + return 0; + + // prepare + int bol = m_brightleft ? m_elxlist.GetSize() : -1; + int stp = m_brightleft ? -1 : 1; + int eol = m_brightleft ? -1 : m_elxlist.GetSize(); + + // from last + int n = eol - stp; + + while(n != bol && ! m_elxlist[n]->MatchNext(pContext)) + n -= stp; + + if(n != bol) + n += stp; + else + return 0; + + // match rest + while(n != eol) + { + if(m_elxlist[n]->Match(pContext)) + { + n += stp; + } + else + { + n -= stp; + + while(n != bol && ! m_elxlist[n]->MatchNext(pContext)) + n -= stp; + + if(n != bol) + n += stp; + else + return 0; + } + } + + return 1; +} + +// mresult.cpp: implementation of the MatchResult class. +// +template MatchResultT :: MatchResultT(CContext * pContext, int nMaxNumber) +{ + if(pContext != 0) + { + m_result.Prepare(nMaxNumber * 2 + 3, -1); + + // matched + m_result[0] = 1; + m_result[1] = nMaxNumber; + + for(int n = 0; n <= nMaxNumber; n++) + { + int index = pContext->m_captureindex[n]; + //if( index < 0 ) continue; + if( ! CBracketElxT::CheckCaptureIndex(index, pContext, n) ) continue; + + // check enclosed + int pos1 = pContext->m_capturestack[index + 1]; + int pos2 = pContext->m_capturestack[index + 2]; + + // info + m_result[n*2 + 2] = pos1 < pos2 ? pos1 : pos2; + m_result[n*2 + 3] = pos1 < pos2 ? pos2 : pos1; + } + } +} + +template inline int MatchResultT :: IsMatched() const +{ + return m_result.At(0, 0); +} + +template inline int MatchResultT :: MaxGroupNumber() const +{ + return m_result.At(1, 0); +} + +template inline int MatchResultT :: GetStart() const +{ + return m_result.At(2, -1); +} + +template inline int MatchResultT :: GetEnd() const +{ + return m_result.At(3, -1); +} + +template inline int MatchResultT :: GetGroupStart(int nGroupNumber) const +{ + return m_result.At(2 + nGroupNumber * 2, -1); +} + +template inline int MatchResultT :: GetGroupEnd(int nGroupNumber) const +{ + return m_result.At(2 + nGroupNumber * 2 + 1, -1); +} + +template MatchResultT & MatchResultT :: operator = (const MatchResultT & result) +{ + m_result.Restore(0); + if(result.m_result.GetSize() > 0) m_result.Append(result.m_result.GetBuffer(), result.m_result.GetSize()); + + return *this; +} + +// posselx.cpp: implementation of the CPossessiveElx class. +// +template CPossessiveElxT :: CPossessiveElxT(ElxInterface * pelx, int nmin, int nmax) : CGreedyElxT (pelx, nmin, nmax) +{ +} + +template int CPossessiveElxT :: Match(CContext * pContext) const +{ + int nbegin = pContext->m_nCurrentPos; + int nsize = pContext->m_stack.GetSize(); + int ncsize = pContext->m_capturestack.GetSize(); + int bsucc = 1; + + // match + if( ! CRepeatElxT :: MatchFixed(pContext) ) + { + bsucc = 0; + } + else + { + while( ! CGreedyElxT :: MatchVart(pContext) ) + { + if( ! CRepeatElxT :: MatchNextFixed(pContext) ) + { + bsucc = 0; + break; + } + } + } + + // status + pContext->m_stack.Restore(nsize); + + if( bsucc ) + { + pContext->m_stack.Push(nbegin); + pContext->m_stack.Push(ncsize); + } + + return bsucc; +} + +template int CPossessiveElxT :: MatchNext(CContext * pContext) const +{ + int nbegin = 0, ncsize = 0; + + pContext->m_stack.Pop(ncsize); + pContext->m_stack.Pop(nbegin); + + pContext->m_capturestack.Restore(ncsize); + pContext->m_nCurrentPos = nbegin; + + return 0; +} + +// reluctx.cpp: implementation of the CReluctantElx class. +// +template CReluctantElxT :: CReluctantElxT(ElxInterface * pelx, int nmin, int nmax) : CRepeatElxT (pelx, nmin) +{ + m_nvart = nmax - nmin; +} + +template int CReluctantElxT :: Match(CContext * pContext) const +{ + if( ! CRepeatElxT :: MatchFixed(pContext) ) + return 0; + + while( ! MatchVart(pContext) ) + { + if( ! CRepeatElxT :: MatchNextFixed(pContext) ) + return 0; + } + + return 1; +} + +template int CReluctantElxT :: MatchNext(CContext * pContext) const +{ + if( MatchNextVart(pContext) ) + return 1; + + if( ! CRepeatElxT :: MatchNextFixed(pContext) ) + return 0; + + while( ! MatchVart(pContext) ) + { + if( ! CRepeatElxT :: MatchNextFixed(pContext) ) + return 0; + } + + return 1; +} + +template int CReluctantElxT :: MatchVart(CContext * pContext) const +{ + pContext->m_stack.Push(0); + + return 1; +} + +template int CReluctantElxT :: MatchNextVart(CContext * pContext) const +{ + int n = 0, nbegin = pContext->m_nCurrentPos; + + pContext->m_stack.Pop(n); + + if(n < m_nvart && CRepeatElxT :: m_pelx->Match(pContext)) + { + while(pContext->m_nCurrentPos == nbegin) + { + if( ! CRepeatElxT :: m_pelx->MatchNext(pContext) ) break; + } + + if(pContext->m_nCurrentPos != nbegin) + { + n ++; + + pContext->m_stack.Push(nbegin); + pContext->m_stack.Push(n); + + return 1; + } + } + + while(n > 0) + { + pContext->m_stack.Pop(nbegin); + + while( CRepeatElxT :: m_pelx->MatchNext(pContext) ) + { + if(pContext->m_nCurrentPos != nbegin) + { + pContext->m_stack.Push(nbegin); + pContext->m_stack.Push(n); + + return 1; + } + } + + n --; + } + + return 0; +} + +// repeatx.cpp: implementation of the CRepeatElx class. +// +template CRepeatElxT :: CRepeatElxT(ElxInterface * pelx, int ntimes) +{ + m_pelx = pelx; + m_nfixed = ntimes; +} + +template int CRepeatElxT :: Match(CContext * pContext) const +{ + return MatchFixed(pContext); +} + +template int CRepeatElxT :: MatchNext(CContext * pContext) const +{ + return MatchNextFixed(pContext); +} + +template int CRepeatElxT :: MatchFixed(CContext * pContext) const +{ + if(m_nfixed == 0) + return 1; + + int n = 0; + + while(n < m_nfixed) + { + if(m_pelx->Match(pContext)) + { + n ++; + } + else + { + n --; + + while(n >= 0 && ! m_pelx->MatchNext(pContext)) + n --; + + if(n >= 0) + n ++; + else + return 0; + } + } + + return 1; +} + +template int CRepeatElxT :: MatchNextFixed(CContext * pContext) const +{ + if(m_nfixed == 0) + return 0; + + // from last + int n = m_nfixed - 1; + + while(n >= 0 && ! m_pelx->MatchNext(pContext)) + n --; + + if(n >= 0) + n ++; + else + return 0; + + // match rest + while(n < m_nfixed) + { + if(m_pelx->Match(pContext)) + { + n ++; + } + else + { + n --; + + while(n >= 0 && ! m_pelx->MatchNext(pContext)) + n --; + + if(n >= 0) + n ++; + else + return 0; + } + } + + return 1; +} + +// Regexp +typedef CRegexpT CRegexpA; +typedef CRegexpT CRegexpW; + +#if defined(_UNICODE) || defined(UNICODE) + typedef CRegexpW CRegexp; +#else + typedef CRegexpA CRegexp; +#endif + +#endif//__DEELX_REGEXP__H__ diff --git a/scintilla/deelx/doc/orig_src/deelx13.h b/scintilla/deelx/doc/orig_src/deelx13.h new file mode 100644 index 000000000..8b1a6ed9e --- /dev/null +++ b/scintilla/deelx/doc/orig_src/deelx13.h @@ -0,0 +1,4804 @@ +// deelx.h +// +// DEELX Regular Expression Engine (v1.3) +// +// Copyright 2006 ~ 2013 (c) RegExLab.com +// All Rights Reserved. +// +// http://www.regexlab.com/deelx/ +// +// Author: Ê·ÊÙΰ (sswater shi) +// sswater@gmail.com +// +// $Revision$ +// + +#ifndef __DEELX_REGEXP__H__ +#define __DEELX_REGEXP__H__ + +#include +#include +#include +#include +#include + +extern "C" { + typedef int (*POSIX_FUNC)(int); + int _isblank(int c); +} + +// +// Data Reference +// +template class CBufferRefT +{ +public: + CBufferRefT(const ELT * pcsz, int length); + CBufferRefT(const ELT * pcsz); + +public: + int nCompare (const ELT * pcsz) const; + int nCompareNoCase(const ELT * pcsz) const; + int Compare (const ELT * pcsz) const; + int CompareNoCase(const ELT * pcsz) const; + int Compare (const CBufferRefT &) const; + int CompareNoCase(const CBufferRefT &) const; + + ELT At (int nIndex, ELT def = 0) const; + ELT operator [] (int nIndex) const; + + const ELT * GetBuffer() const; + int GetSize() const; + +public: + virtual ~CBufferRefT(); + +// Content +protected: + ELT * m_pBuffer; + int m_nSize; +}; + +// +// Implemenation +// +template CBufferRefT :: CBufferRefT(const ELT * pcsz, int length) +{ + m_pBuffer = (ELT *)pcsz; + m_nSize = length; +} + +template CBufferRefT :: CBufferRefT(const ELT * pcsz) +{ + m_pBuffer = (ELT *)pcsz; + m_nSize = 0; + + if(pcsz != 0) while(m_pBuffer[m_nSize] != 0) m_nSize ++; +} + +template int CBufferRefT :: nCompare(const ELT * pcsz) const +{ + for(int i=0; i int CBufferRefT :: nCompareNoCase(const ELT * pcsz) const +{ + for(int i=0; i inline int CBufferRefT :: Compare(const ELT * pcsz) const +{ + return nCompare(pcsz) ? 1 : (int)pcsz[m_nSize]; +} + +template inline int CBufferRefT :: CompareNoCase(const ELT * pcsz) const +{ + return nCompareNoCase(pcsz) ? 1 : (int)pcsz[m_nSize]; +} + +template inline int CBufferRefT :: Compare(const CBufferRefT & cref) const +{ + return m_nSize == cref.m_nSize ? nCompare(cref.GetBuffer()) : 1; +} + +template inline int CBufferRefT :: CompareNoCase(const CBufferRefT & cref) const +{ + return m_nSize == cref.m_nSize ? nCompareNoCase(cref.GetBuffer()) : 1; +} + +template inline ELT CBufferRefT :: At(int nIndex, ELT def) const +{ + return nIndex >= m_nSize ? def : m_pBuffer[nIndex]; +} + +template inline ELT CBufferRefT :: operator [] (int nIndex) const +{ + return nIndex >= m_nSize ? 0 : m_pBuffer[nIndex]; +} + +template const ELT * CBufferRefT :: GetBuffer() const +{ + static const ELT _def[] = {0}; return m_pBuffer ? m_pBuffer : _def; +} + +template inline int CBufferRefT :: GetSize() const +{ + return m_nSize; +} + +template CBufferRefT :: ~CBufferRefT() +{ +} + +// +// Data Buffer +// +template class CBufferT : public CBufferRefT +{ +public: + CBufferT(const ELT * pcsz, int length); + CBufferT(const ELT * pcsz); + CBufferT(); + +public: + ELT & operator [] (int nIndex); + const ELT & operator [] (int nIndex) const; + void Append(const ELT * pcsz, int length, int eol = 0); + void Append(ELT el, int eol = 0); + +public: + void Push(ELT el); + void Push(const CBufferRefT & buf); + int Pop (ELT & el); + int Pop (CBufferT & buf); + int Peek(ELT & el) const; + +public: + const ELT * GetBuffer() const; + ELT * GetBuffer(); + ELT * Detach(); + void Release(); + void Prepare(int index, int fill = 0); + void Restore(int size); + + ELT * PrepareInsert(int nPos, int nSize) + { + int nOldSize = CBufferRefT::m_nSize; + Restore(nPos > CBufferRefT::m_nSize ? nPos : CBufferRefT::m_nSize + nSize); + + if( nPos < nOldSize ) + { + ELT * from = CBufferRefT::m_pBuffer + nPos, * to = CBufferRefT::m_pBuffer + nPos + nSize; + memmove(to, from, sizeof(ELT) * (nOldSize - nPos)); + } + + return CBufferRefT::m_pBuffer + nPos; + } + + void Insert(int nIndex, const ELT & rT) + { + Insert(nIndex, &rT, 1); + } + + void Insert(int nIndex, const ELT * pT, int nSize) + { + memcpy(PrepareInsert(nIndex, nSize), pT, sizeof(ELT) * nSize); + } + + void Remove(int nIndex) + { + Remove(nIndex, 1); + } + + void Remove(int nIndex, int nSize) + { + if( nIndex < CBufferRefT :: m_nSize ) + { + if( nIndex + nSize >= CBufferRefT :: m_nSize ) + { + Restore(nIndex); + } + else + { + memmove(CBufferRefT :: m_pBuffer + nIndex, CBufferRefT :: m_pBuffer + nIndex + nSize, sizeof(ELT) * (CBufferRefT :: m_nSize - nIndex - nSize)); + Restore(CBufferRefT :: m_nSize - nSize); + } + } + } + + void SetMaxLength(int nSize) + { + if( nSize > m_nMaxLength ) + { + if( m_nMaxLength < 8 ) + m_nMaxLength = 8; + + if( nSize > m_nMaxLength ) + m_nMaxLength *= 2; + + if( nSize > m_nMaxLength ) + { + m_nMaxLength = nSize + 11; + m_nMaxLength -= m_nMaxLength & 0x07; + } + + CBufferRefT :: m_pBuffer = (ELT *) realloc(CBufferRefT :: m_pBuffer, sizeof(ELT) * m_nMaxLength); + } + } + +public: + virtual ~CBufferT(); + +// Content +protected: + int m_nMaxLength; +}; + +// +// Implemenation +// +template CBufferT :: CBufferT(const ELT * pcsz, int length) : CBufferRefT (0, length) +{ + m_nMaxLength = CBufferRefT :: m_nSize + 1; + + CBufferRefT :: m_pBuffer = (ELT *) malloc(sizeof(ELT) * m_nMaxLength); + memcpy(CBufferRefT::m_pBuffer, pcsz, sizeof(ELT) * CBufferRefT :: m_nSize); + CBufferRefT::m_pBuffer[CBufferRefT :: m_nSize] = 0; +} + +template CBufferT :: CBufferT(const ELT * pcsz) : CBufferRefT (pcsz) +{ + m_nMaxLength = CBufferRefT :: m_nSize + 1; + + CBufferRefT :: m_pBuffer = (ELT *) malloc(sizeof(ELT) * m_nMaxLength); + memcpy(CBufferRefT::m_pBuffer, pcsz, sizeof(ELT) * CBufferRefT :: m_nSize); + CBufferRefT::m_pBuffer[CBufferRefT :: m_nSize] = 0; +} + +template CBufferT :: CBufferT() : CBufferRefT (0, 0) +{ + m_nMaxLength = 0; + CBufferRefT::m_pBuffer = 0; +} + +template inline ELT & CBufferT :: operator [] (int nIndex) +{ + return CBufferRefT::m_pBuffer[nIndex]; +} + +template inline const ELT & CBufferT :: operator [] (int nIndex) const +{ + return CBufferRefT::m_pBuffer[nIndex]; +} + +template void CBufferT :: Append(const ELT * pcsz, int length, int eol) +{ + int nNewLength = m_nMaxLength; + + // Check length + if(nNewLength < 8) + nNewLength = 8; + + if(CBufferRefT :: m_nSize + length + eol > nNewLength) + nNewLength *= 2; + + if(CBufferRefT :: m_nSize + length + eol > nNewLength) + { + nNewLength = CBufferRefT :: m_nSize + length + eol + 11; + nNewLength -= nNewLength % 8; + } + + // Realloc + if(nNewLength > m_nMaxLength) + { + CBufferRefT :: m_pBuffer = (ELT *) realloc(CBufferRefT::m_pBuffer, sizeof(ELT) * nNewLength); + m_nMaxLength = nNewLength; + } + + // Append + memcpy(CBufferRefT::m_pBuffer + CBufferRefT :: m_nSize, pcsz, sizeof(ELT) * length); + CBufferRefT :: m_nSize += length; + + if(eol > 0) CBufferRefT::m_pBuffer[CBufferRefT :: m_nSize] = 0; +} + +template inline void CBufferT :: Append(ELT el, int eol) +{ + Append(&el, 1, eol); +} + +template void CBufferT :: Push(ELT el) +{ + // Realloc + if(CBufferRefT :: m_nSize >= m_nMaxLength) + { + int nNewLength = m_nMaxLength * 2; + if( nNewLength < 8 ) nNewLength = 8; + + CBufferRefT :: m_pBuffer = (ELT *) realloc(CBufferRefT::m_pBuffer, sizeof(ELT) * nNewLength); + m_nMaxLength = nNewLength; + } + + // Append + CBufferRefT::m_pBuffer[CBufferRefT :: m_nSize++] = el; +} + +template void CBufferT :: Push(const CBufferRefT & buf) +{ + for(int i=0; i inline int CBufferT :: Pop(ELT & el) +{ + if(CBufferRefT :: m_nSize > 0) + { + el = CBufferRefT::m_pBuffer[--CBufferRefT :: m_nSize]; + return 1; + } + else + { + return 0; + } +} + +template int CBufferT :: Pop (CBufferT & buf) +{ + int size, res = 1; + res = res && Pop(*(ELT*)&size); + buf.Restore(size); + + for(int i=size-1; i>=0; i--) + { + res = res && Pop(buf[i]); + } + + return res; +} + +template inline int CBufferT :: Peek(ELT & el) const +{ + if(CBufferRefT :: m_nSize > 0) + { + el = CBufferRefT::m_pBuffer[CBufferRefT :: m_nSize - 1]; + return 1; + } + else + { + return 0; + } +} + +template const ELT * CBufferT :: GetBuffer() const +{ + static const ELT _def[] = {0}; return CBufferRefT::m_pBuffer ? CBufferRefT::m_pBuffer : _def; +} + +template ELT * CBufferT :: GetBuffer() +{ + static const ELT _def[] = {0}; return CBufferRefT::m_pBuffer ? CBufferRefT::m_pBuffer : (ELT *)_def; +} + +template ELT * CBufferT :: Detach() +{ + ELT * pBuffer = CBufferRefT::m_pBuffer; + + CBufferRefT :: m_pBuffer = 0; + CBufferRefT :: m_nSize = m_nMaxLength = 0; + + return pBuffer; +} + +template void CBufferT :: Release() +{ + ELT * pBuffer = Detach(); + + if(pBuffer != 0) free(pBuffer); +} + +template void CBufferT :: Prepare(int index, int fill) +{ + int nNewSize = index + 1; + + // Realloc + if(nNewSize > m_nMaxLength) + { + int nNewLength = m_nMaxLength; + + if( nNewLength < 8 ) + nNewLength = 8; + + if( nNewSize > nNewLength ) + nNewLength *= 2; + + if( nNewSize > nNewLength ) + { + nNewLength = nNewSize + 11; + nNewLength -= nNewLength % 8; + } + + CBufferRefT :: m_pBuffer = (ELT *) realloc(CBufferRefT::m_pBuffer, sizeof(ELT) * nNewLength); + m_nMaxLength = nNewLength; + } + + // size + if( CBufferRefT :: m_nSize < nNewSize ) + { + memset(CBufferRefT::m_pBuffer + CBufferRefT :: m_nSize, fill, sizeof(ELT) * (nNewSize - CBufferRefT :: m_nSize)); + CBufferRefT :: m_nSize = nNewSize; + } +} + +template inline void CBufferT :: Restore(int size) +{ + SetMaxLength(size); + CBufferRefT :: m_nSize = size; +} + +template CBufferT :: ~CBufferT() +{ + if(CBufferRefT::m_pBuffer != 0) free(CBufferRefT::m_pBuffer); +} + +template class CSortedBufferT : public CBufferT +{ +public: + CSortedBufferT(int reverse = 0); + CSortedBufferT(int(*)(const void *, const void *)); + +public: + void Add(const T & rT); + void Add(const T * pT, int nSize); + int Remove(const T & rT); + void RemoveAll(); + + void SortFreeze() { m_bSortFreezed = 1; } + void SortUnFreeze(); + +public: + int Find(const T & rT, int(* compare)(const void *, const void *) = 0) { return FindAs(*(T*)&rT, compare); } + int FindAs(const T & rT, int(*)(const void *, const void *) = 0); + int GetSize() const { return CBufferRefT::m_nSize; } + T & operator [] (int nIndex) { return CBufferT :: operator [] (nIndex); } + +protected: + int (* m_fncompare)(const void *, const void *); + static int compareT(const void *, const void *); + static int compareReverseT(const void *, const void *); + + int m_bSortFreezed; +}; + +template CSortedBufferT :: CSortedBufferT(int reverse) +{ + m_fncompare = reverse ? compareReverseT : compareT; + m_bSortFreezed = 0; +} + +template CSortedBufferT :: CSortedBufferT(int (* compare)(const void *, const void *)) +{ + m_fncompare = compare; + m_bSortFreezed = 0; +} + +template void CSortedBufferT :: Add(const T & rT) +{ + if(m_bSortFreezed != 0) + { + CBufferT :: Append(rT); + return; + } + + int a = 0, b = CBufferRefT::m_nSize - 1, c = CBufferRefT::m_nSize / 2; + + while(a <= b) + { + int r = m_fncompare(&rT, &CBufferRefT::m_pBuffer[c]); + + if ( r < 0 ) b = c - 1; + else if( r > 0 ) a = c + 1; + else break; + + c = (a + b + 1) / 2; + } + + CBufferT :: Insert(c, rT); +} + +template void CSortedBufferT :: Add(const T * pT, int nSize) +{ + CBufferT :: Append(pT, nSize); + + if(m_bSortFreezed == 0) + { + qsort(CBufferRefT::m_pBuffer, CBufferRefT::m_nSize, sizeof(T), m_fncompare); + } +} + +template int CSortedBufferT :: FindAs(const T & rT, int(* compare)(const void *, const void *)) +{ + const T * pT = (const T *)bsearch(&rT, CBufferRefT::m_pBuffer, CBufferRefT::m_nSize, sizeof(T), compare == 0 ? m_fncompare : compare); + + if( pT != NULL ) + return pT - CBufferRefT::m_pBuffer; + else + return -1; +} + +template int CSortedBufferT :: Remove(const T & rT) +{ + int pos = Find(rT); + if( pos >= 0 ) CBufferT :: Remove(pos); + return pos; +} + +template inline void CSortedBufferT :: RemoveAll() +{ + CBufferT::Restore(0); +} + +template void CSortedBufferT :: SortUnFreeze() +{ + if(m_bSortFreezed != 0) + { + m_bSortFreezed = 0; + qsort(CBufferRefT::m_pBuffer, CBufferRefT::m_nSize, sizeof(T), m_fncompare); + } +} + +template int CSortedBufferT :: compareT(const void * elem1, const void * elem2) +{ + if( *(const T *)elem1 == *(const T *)elem2 ) + return 0; + else if( *(const T *)elem1 < *(const T *)elem2 ) + return -1; + else + return 1; +} + +template int CSortedBufferT :: compareReverseT(const void * elem1, const void * elem2) +{ + if( *(const T *)elem1 == *(const T *)elem2 ) + return 0; + else if( *(const T *)elem1 > *(const T *)elem2 ) + return -1; + else + return 1; +} + +// +// Context +// +class CContext +{ +public: + CBufferT m_stack; + CBufferT m_capturestack, m_captureindex; + +public: + int m_nCurrentPos; + int m_nBeginPos; + int m_nLastBeginPos; + int m_nParenZindex; + int m_nCursiveLimit; + + void * m_pMatchString; + int m_pMatchStringLength; +}; + +class CContextShot +{ +public: + CContextShot(CContext * pContext) + { + m_nCurrentPos = pContext->m_nCurrentPos; + nsize = pContext->m_stack.GetSize(); + ncsize = pContext->m_capturestack.GetSize(); + } + + void Restore(CContext * pContext) + { + pContext->m_stack.Restore(nsize); + pContext->m_capturestack.Restore(ncsize); + pContext->m_nCurrentPos = m_nCurrentPos; + } + +public: + int m_nCurrentPos; + int nsize ; + int ncsize; +}; + +// +// Interface +// +class ElxInterface +{ +public: + virtual int Match (CContext * pContext) const = 0; + virtual int MatchNext(CContext * pContext) const = 0; + +public: + virtual ~ElxInterface() {}; +}; + +// +// Alternative +// +template class CAlternativeElxT : public ElxInterface +{ +public: + int Match (CContext * pContext) const; + int MatchNext(CContext * pContext) const; + +public: + CAlternativeElxT(); + +public: + CBufferT m_elxlist; +}; + +typedef CAlternativeElxT <0> CAlternativeElx; + +// +// Assert +// +template class CAssertElxT : public ElxInterface +{ +public: + int Match (CContext * pContext) const; + int MatchNext(CContext * pContext) const; + +public: + CAssertElxT(ElxInterface * pelx, int byes = 1); + +public: + ElxInterface * m_pelx; + int m_byes; +}; + +typedef CAssertElxT <0> CAssertElx; + +// +// Back reference elx +// +template class CBackrefElxT : public ElxInterface +{ +public: + int Match (CContext * pContext) const; + int MatchNext(CContext * pContext) const; + +public: + CBackrefElxT(int nnumber, int brightleft, int bignorecase); + +public: + int m_nnumber; + int m_brightleft; + int m_bignorecase; + + CBufferT m_szNamed; +}; + +// +// Implementation +// +template CBackrefElxT :: CBackrefElxT(int nnumber, int brightleft, int bignorecase) +{ + m_nnumber = nnumber; + m_brightleft = brightleft; + m_bignorecase = bignorecase; +} + +template int CBackrefElxT :: Match(CContext * pContext) const +{ + // check number, for named + if( m_nnumber < 0 || m_nnumber >= pContext->m_captureindex.GetSize() ) return 0; + + int index = pContext->m_captureindex[m_nnumber]; + if( index < 0 ) return 0; + + // check enclosed + int pos1 = pContext->m_capturestack[index + 1]; + int pos2 = pContext->m_capturestack[index + 2]; + + if( pos2 < 0 ) pos2 = pContext->m_nCurrentPos; + + // info + int lpos = pos1 < pos2 ? pos1 : pos2; + int rpos = pos1 < pos2 ? pos2 : pos1; + int slen = rpos - lpos; + + const CHART * pcsz = (const CHART *)pContext->m_pMatchString; + int npos = pContext->m_nCurrentPos; + int tlen = pContext->m_pMatchStringLength; + + // compare + int bsucc; + CBufferRefT refstr(pcsz + lpos, slen); + + if( m_brightleft ) + { + if(npos < slen) + return 0; + + if(m_bignorecase) + bsucc = ! refstr.nCompareNoCase(pcsz + (npos - slen)); + else + bsucc = ! refstr.nCompare (pcsz + (npos - slen)); + + if( bsucc ) + { + pContext->m_stack.Push(npos); + pContext->m_nCurrentPos -= slen; + } + } + else + { + if(npos + slen > tlen) + return 0; + + if(m_bignorecase) + bsucc = ! refstr.nCompareNoCase(pcsz + npos); + else + bsucc = ! refstr.nCompare (pcsz + npos); + + if( bsucc ) + { + pContext->m_stack.Push(npos); + pContext->m_nCurrentPos += slen; + } + } + + return bsucc; +} + +template int CBackrefElxT :: MatchNext(CContext * pContext) const +{ + int npos = 0; + + pContext->m_stack.Pop(npos); + pContext->m_nCurrentPos = npos; + + return 0; +} + +// RCHART +#ifndef RCHART + #define RCHART(ch) ((CHART)ch) +#endif + +// BOUNDARY_TYPE +enum BOUNDARY_TYPE +{ + BOUNDARY_FILE_BEGIN, // begin of whole text + BOUNDARY_FILE_END , // end of whole text + BOUNDARY_FILE_END_N, // end of whole text, or before newline at the end + BOUNDARY_LINE_BEGIN, // begin of line + BOUNDARY_LINE_END , // end of line + BOUNDARY_WORD_BEGIN, // begin of word + BOUNDARY_WORD_END , // end of word + BOUNDARY_WORD_EDGE +}; + +// +// Boundary Elx +// +template class CBoundaryElxT : public ElxInterface +{ +public: + int Match (CContext * pContext) const; + int MatchNext(CContext * pContext) const; + +public: + CBoundaryElxT(int ntype, int byes = 1); + +protected: + static int IsWordChar(CHART ch); + +public: + int m_ntype; + int m_byes; +}; + +// +// Implementation +// +template CBoundaryElxT :: CBoundaryElxT(int ntype, int byes) +{ + m_ntype = ntype; + m_byes = byes; +} + +template int CBoundaryElxT :: Match(CContext * pContext) const +{ + const CHART * pcsz = (const CHART *)pContext->m_pMatchString; + int npos = pContext->m_nCurrentPos; + int tlen = pContext->m_pMatchStringLength; + + CHART chL = npos > 0 ? pcsz[npos - 1] : 0; + CHART chR = npos < tlen ? pcsz[npos ] : 0; + + int bsucc = 0; + + switch(m_ntype) + { + case BOUNDARY_FILE_BEGIN: + bsucc = (npos <= 0); + break; + + case BOUNDARY_FILE_END: + bsucc = (npos >= tlen); + break; + + case BOUNDARY_FILE_END_N: + bsucc = (npos >= tlen) || (pcsz[tlen-1] == RCHART('\n') && (npos == tlen-1 || (pcsz[tlen-2] == RCHART('\r') && npos == tlen-2))); + break; + + case BOUNDARY_LINE_BEGIN: + bsucc = (npos <= 0 ) || (chL == RCHART('\n')) || ((chL == RCHART('\r')) && (chR != RCHART('\n'))); + break; + + case BOUNDARY_LINE_END: + bsucc = (npos >= tlen) || (chR == RCHART('\r')) || ((chR == RCHART('\n')) && (chL != RCHART('\r'))); + break; + + case BOUNDARY_WORD_BEGIN: + bsucc = ! IsWordChar(chL) && IsWordChar(chR); + break; + + case BOUNDARY_WORD_END: + bsucc = IsWordChar(chL) && ! IsWordChar(chR); + break; + + case BOUNDARY_WORD_EDGE: + bsucc = IsWordChar(chL) ? ! IsWordChar(chR) : IsWordChar(chR); + break; + } + + return m_byes ? bsucc : ! bsucc; +} + +template int CBoundaryElxT :: MatchNext(CContext *) const +{ + return 0; +} + +template inline int CBoundaryElxT :: IsWordChar(CHART ch) +{ + return (ch >= RCHART('A') && ch <= RCHART('Z')) || (ch >= RCHART('a') && ch <= RCHART('z')) || (ch >= RCHART('0') && ch <= RCHART('9')) || (ch == RCHART('_')); +} + +// +// Bracket +// +template class CBracketElxT : public ElxInterface +{ +public: + int Match (CContext * pContext) const; + int MatchNext(CContext * pContext) const; + +public: + CBracketElxT(int nnumber, int bright); + static int CheckCaptureIndex(int & index, CContext * pContext, int number); + +public: + int m_nnumber; + int m_bright; + int m_balancing; + + CBufferT m_szNamed; + CBufferT m_szBalancing; +}; + +template CBracketElxT :: CBracketElxT(int nnumber, int bright) +{ + m_nnumber = nnumber; + m_bright = bright; + m_balancing = -1; +} + +template inline int CBracketElxT :: CheckCaptureIndex(int & index, CContext * pContext, int number) +{ + if( index >= pContext->m_capturestack.GetSize() ) + index = pContext->m_capturestack.GetSize() - 4; + + while(index >= 0) + { + if(pContext->m_capturestack[index] == number) + { + return 1; + } + + index -= 4; + } + + + return 0; +} + +// +// capturestack[index+0] => Group number +// capturestack[index+1] => Capture start pos +// capturestack[index+2] => Capture end pos +// capturestack[index+3] => Capture enclose z-index, zindex<0 means inner group with same name +// +template int CBracketElxT :: Match(CContext * pContext) const +{ + // check, for named + if(m_nnumber < 0) return 0; + + if( ! m_bright ) + { + pContext->m_captureindex.Prepare(m_nnumber, -1); + int index = pContext->m_captureindex[m_nnumber]; + + // check + if(CheckCaptureIndex(index, pContext, m_nnumber) && pContext->m_capturestack[index+2] < 0) + { + pContext->m_capturestack[index+3] --; + return 1; + } + + // balancing left + if(m_balancing >= 0) + { + int balancing_index = pContext->m_captureindex[m_balancing]; + if( ! CheckCaptureIndex(balancing_index, pContext, m_balancing) || + pContext->m_capturestack[balancing_index+2] < 0 ) + { + return 0; + } + } + + // save + pContext->m_captureindex[m_nnumber] = pContext->m_capturestack.GetSize(); + + pContext->m_capturestack.Push(m_nnumber); + pContext->m_capturestack.Push(pContext->m_nCurrentPos); + pContext->m_capturestack.Push(-1); + pContext->m_capturestack.Push( 0); // z-index + } + else + { + // check + int index = pContext->m_captureindex[m_nnumber]; + + if(CheckCaptureIndex(index, pContext, m_nnumber)) + { + if(pContext->m_capturestack[index + 3] < 0) // check inner group with same name + { + pContext->m_capturestack[index + 3] ++; + return 1; + } + + // balancing right + int balancing_index = -1; + if(m_balancing >= 0) + { + balancing_index = pContext->m_captureindex[m_balancing]; + if( ! CheckCaptureIndex(balancing_index, pContext, m_balancing) ) + { + // TODO ERROR + return 0; + } + } + + // save + pContext->m_capturestack[index + 2] = pContext->m_nCurrentPos; + pContext->m_capturestack[index + 3] = pContext->m_nParenZindex ++; + + // balancing right + if(m_balancing >= 0) + { + // backup index + pContext->m_stack.Push(balancing_index); + + if(balancing_index >= 0) + { + pContext->m_capturestack[index+2] = pContext->m_capturestack[index+1]; + pContext->m_capturestack[index+1] = pContext->m_capturestack[balancing_index+2]; + + // destopy capture + pContext->m_capturestack[balancing_index] = -1; + balancing_index -= 4; + CheckCaptureIndex(balancing_index, pContext, m_balancing); + pContext->m_captureindex[m_balancing] = balancing_index; + } + } + } + } + + return 1; +} + +template int CBracketElxT :: MatchNext(CContext * pContext) const +{ + int index = pContext->m_captureindex[m_nnumber]; + if( ! CheckCaptureIndex(index, pContext, m_nnumber) ) + { + return 0; + } + + if( ! m_bright ) + { + if(pContext->m_capturestack[index + 3] < 0) + { + pContext->m_capturestack[index + 3] ++; + return 0; + } + + pContext->m_capturestack.Restore(pContext->m_capturestack.GetSize() - 4); + + // to find + CheckCaptureIndex(index, pContext, m_nnumber); + + // new index + pContext->m_captureindex[m_nnumber] = index; + } + else + { + if( pContext->m_capturestack[index + 2] >= 0 ) + { + // balancing right + if(m_balancing >= 0) + { + int balancing_index = -1; + pContext->m_stack.Pop(balancing_index); + + if(balancing_index >= 0) + { + pContext->m_capturestack[balancing_index] = m_balancing; + pContext->m_captureindex[m_balancing] = balancing_index; + } + } + + pContext->m_capturestack[index + 2] = -1; + pContext->m_capturestack[index + 3] = 0; + } + else + { + pContext->m_capturestack[index + 3] --; + } + } + + return 0; +} + +// +// Deletage +// +template class CDelegateElxT : public ElxInterface +{ +public: + int Match (CContext * pContext) const; + int MatchNext(CContext * pContext) const; + +public: + CDelegateElxT(int ndata = 0); + +public: + ElxInterface * m_pelx; + int m_ndata; // +0 : recursive to + // -3 : named recursive + + CBufferT m_szNamed; +}; + +template CDelegateElxT :: CDelegateElxT(int ndata) +{ + m_pelx = 0; + m_ndata = ndata; +} + +template int CDelegateElxT :: Match(CContext * pContext) const +{ + if(m_pelx != 0) + { + if(pContext->m_nCursiveLimit > 0) + { + pContext->m_nCursiveLimit --; + int result = m_pelx->Match(pContext); + pContext->m_nCursiveLimit ++; + return result; + } + else + return 0; + } + else + return 1; +} + +template int CDelegateElxT :: MatchNext(CContext * pContext) const +{ + if(m_pelx != 0) + return m_pelx->MatchNext(pContext); + else + return 0; +} + +// +// Empty +// +template class CEmptyElxT : public ElxInterface +{ +public: + int Match (CContext * pContext) const; + int MatchNext(CContext * pContext) const; + +public: + CEmptyElxT(); +}; + +typedef CEmptyElxT <0> CEmptyElx; + +// +// Global +// +template class CGlobalElxT : public ElxInterface +{ +public: + int Match (CContext * pContext) const; + int MatchNext(CContext * pContext) const; + +public: + CGlobalElxT(); +}; + +typedef CGlobalElxT <0> CGlobalElx; + +// +// Repeat +// +template class CRepeatElxT : public ElxInterface +{ +public: + int Match (CContext * pContext) const; + int MatchNext(CContext * pContext) const; + +public: + CRepeatElxT(ElxInterface * pelx, int ntimes); + +protected: + int MatchFixed (CContext * pContext) const; + int MatchNextFixed(CContext * pContext) const; + int MatchForward (CContext * pContext) const + { + CContextShot shot(pContext); + + if( ! m_pelx->Match(pContext) ) + return 0; + + if(pContext->m_nCurrentPos != shot.m_nCurrentPos) + return 1; + + if( ! m_pelx->MatchNext(pContext) ) + return 0; + + if(pContext->m_nCurrentPos != shot.m_nCurrentPos) + return 1; + + shot.Restore(pContext); + return 0; + } + +public: + ElxInterface * m_pelx; + int m_nfixed; +}; + +typedef CRepeatElxT <0> CRepeatElx; + +// +// Greedy +// +template class CGreedyElxT : public CRepeatElxT +{ +public: + int Match (CContext * pContext) const; + int MatchNext(CContext * pContext) const; + +public: + CGreedyElxT(ElxInterface * pelx, int nmin = 0, int nmax = INT_MAX); + +protected: + int MatchVart (CContext * pContext) const; + int MatchNextVart(CContext * pContext) const; + +public: + int m_nvart; +}; + +typedef CGreedyElxT <0> CGreedyElx; + +// +// Independent +// +template class CIndependentElxT : public ElxInterface +{ +public: + int Match (CContext * pContext) const; + int MatchNext(CContext * pContext) const; + +public: + CIndependentElxT(ElxInterface * pelx); + +public: + ElxInterface * m_pelx; +}; + +typedef CIndependentElxT <0> CIndependentElx; + +// +// List +// +template class CListElxT : public ElxInterface +{ +public: + int Match (CContext * pContext) const; + int MatchNext(CContext * pContext) const; + +public: + CListElxT(int brightleft); + +public: + CBufferT m_elxlist; + int m_brightleft; +}; + +typedef CListElxT <0> CListElx; + +// +// Posix Elx +// +template class CPosixElxT : public ElxInterface +{ +public: + int Match (CContext * pContext) const; + int MatchNext(CContext * pContext) const; + +public: + CPosixElxT(const char * posix, int brightleft); + +public: + POSIX_FUNC m_posixfun; + int m_brightleft; + int m_byes; +}; + +// +// Implementation +// +template CPosixElxT :: CPosixElxT(const char * posix, int brightleft) +{ + m_brightleft = brightleft; + + if(posix[1] == '^') + { + m_byes = 0; + posix += 2; + } + else + { + m_byes = 1; + posix += 1; + } + + if (!strncmp(posix, "alnum:", 6)) m_posixfun = ::isalnum ; + else if(!strncmp(posix, "alpha:", 6)) m_posixfun = ::isalpha ; + else if(!strncmp(posix, "ascii:", 6)) m_posixfun = ::isascii ; + else if(!strncmp(posix, "cntrl:", 6)) m_posixfun = ::iscntrl ; + else if(!strncmp(posix, "digit:", 6)) m_posixfun = ::isdigit ; + else if(!strncmp(posix, "graph:", 6)) m_posixfun = ::isgraph ; + else if(!strncmp(posix, "lower:", 6)) m_posixfun = ::islower ; + else if(!strncmp(posix, "print:", 6)) m_posixfun = ::isprint ; + else if(!strncmp(posix, "punct:", 6)) m_posixfun = ::ispunct ; + else if(!strncmp(posix, "space:", 6)) m_posixfun = ::isspace ; + else if(!strncmp(posix, "upper:", 6)) m_posixfun = ::isupper ; + else if(!strncmp(posix, "xdigit:",7)) m_posixfun = ::isxdigit; + else if(!strncmp(posix, "blank:", 6)) m_posixfun = _isblank ; + else m_posixfun = 0 ; +} + +inline int _isblank(int c) +{ + return c == 0x20 || c == '\t'; +} + +template int CPosixElxT :: Match(CContext * pContext) const +{ + if(m_posixfun == 0) return 0; + + int tlen = pContext->m_pMatchStringLength; + int npos = pContext->m_nCurrentPos; + + // check + int at = m_brightleft ? npos - 1 : npos; + if( at < 0 || at >= tlen ) + return 0; + + CHART ch = ((const CHART *)pContext->m_pMatchString)[at]; + + int bsucc = (*m_posixfun)(ch); + + if( ! m_byes ) + bsucc = ! bsucc; + + if( bsucc ) + pContext->m_nCurrentPos += m_brightleft ? -1 : 1; + + return bsucc; +} + +template int CPosixElxT :: MatchNext(CContext * pContext) const +{ + pContext->m_nCurrentPos -= m_brightleft ? -1 : 1; + return 0; +} + +// +// Possessive +// +template class CPossessiveElxT : public CGreedyElxT +{ +public: + int Match (CContext * pContext) const; + int MatchNext(CContext * pContext) const; + +public: + CPossessiveElxT(ElxInterface * pelx, int nmin = 0, int nmax = INT_MAX); +}; + +typedef CPossessiveElxT <0> CPossessiveElx; + +// +// Range Elx +// +template class CRangeElxT : public ElxInterface +{ +public: + int Match (CContext * pContext) const; + int MatchNext(CContext * pContext) const; + +public: + CRangeElxT(int brightleft, int byes); + +public: + int IsContainChar(CHART ch) const; + +public: + CBufferT m_ranges; + CBufferT m_chars; + CBufferT m_embeds; + +public: + int m_brightleft; + int m_byes; +}; + +// +// Implementation +// +template CRangeElxT :: CRangeElxT(int brightleft, int byes) +{ + m_brightleft = brightleft; + m_byes = byes; +} + +template int CRangeElxT :: Match(CContext * pContext) const +{ + int tlen = pContext->m_pMatchStringLength; + int npos = pContext->m_nCurrentPos; + + // check + int at = m_brightleft ? npos - 1 : npos; + if( at < 0 || at >= tlen ) + return 0; + + CHART ch = ((const CHART *)pContext->m_pMatchString)[at]; + int bsucc = 0, i; + + // compare + for(i=0; !bsucc && iMatch(pContext)) + { + pContext->m_nCurrentPos = npos; + bsucc = 1; + } + } + + if( ! m_byes ) + bsucc = ! bsucc; + + if( bsucc ) + pContext->m_nCurrentPos += m_brightleft ? -1 : 1; + + return bsucc; +} + +template int CRangeElxT :: IsContainChar(CHART ch) const +{ + int bsucc = 0, i; + + // compare + for(i=0; !bsucc && i int CRangeElxT :: MatchNext(CContext * pContext) const +{ + pContext->m_nCurrentPos -= m_brightleft ? -1 : 1; + return 0; +} + +// +// Reluctant +// +template class CReluctantElxT : public CRepeatElxT +{ +public: + int Match (CContext * pContext) const; + int MatchNext(CContext * pContext) const; + +public: + CReluctantElxT(ElxInterface * pelx, int nmin = 0, int nmax = INT_MAX); + +protected: + int MatchVart (CContext * pContext) const; + int MatchNextVart(CContext * pContext) const; + +public: + int m_nvart; +}; + +typedef CReluctantElxT <0> CReluctantElx; + +// +// String Elx +// +template class CStringElxT : public ElxInterface +{ +public: + int Match (CContext * pContext) const; + int MatchNext(CContext * pContext) const; + +public: + CStringElxT(const CHART * fixed, int nlength, int brightleft, int bignorecase); + +public: + CBufferT m_szPattern; + int m_brightleft; + int m_bignorecase; +}; + +// +// Implementation +// +template CStringElxT :: CStringElxT(const CHART * fixed, int nlength, int brightleft, int bignorecase) : m_szPattern(fixed, nlength) +{ + m_brightleft = brightleft; + m_bignorecase = bignorecase; +} + +template int CStringElxT :: Match(CContext * pContext) const +{ + const CHART * pcsz = (const CHART *)pContext->m_pMatchString; + int npos = pContext->m_nCurrentPos; + int tlen = pContext->m_pMatchStringLength; + int slen = m_szPattern.GetSize(); + + int bsucc; + + if(m_brightleft) + { + if(npos < slen) + return 0; + + if(m_bignorecase) + bsucc = ! m_szPattern.nCompareNoCase(pcsz + (npos - slen)); + else + bsucc = ! m_szPattern.nCompare (pcsz + (npos - slen)); + + if( bsucc ) + pContext->m_nCurrentPos -= slen; + } + else + { + if(npos + slen > tlen) + return 0; + + if(m_bignorecase) + bsucc = ! m_szPattern.nCompareNoCase(pcsz + npos); + else + bsucc = ! m_szPattern.nCompare (pcsz + npos); + + if( bsucc ) + pContext->m_nCurrentPos += slen; + } + + return bsucc; +} + +template int CStringElxT :: MatchNext(CContext * pContext) const +{ + int slen = m_szPattern.GetSize(); + + if(m_brightleft) + pContext->m_nCurrentPos += slen; + else + pContext->m_nCurrentPos -= slen; + + return 0; +} + +// +// CConditionElx +// +template class CConditionElxT : public ElxInterface +{ +public: + int Match (CContext * pContext) const; + int MatchNext(CContext * pContext) const; + +public: + CConditionElxT(); + +public: + // backref condition + int m_nnumber; + CBufferT m_szNamed; + + // elx condition + ElxInterface * m_pelxask; + + // selection + ElxInterface * m_pelxyes, * m_pelxno; +}; + +template CConditionElxT :: CConditionElxT() +{ + m_nnumber = -1; +} + +template int CConditionElxT :: Match(CContext * pContext) const +{ + // status + int nbegin = pContext->m_nCurrentPos; + int nsize = pContext->m_stack.GetSize(); + int ncsize = pContext->m_capturestack.GetSize(); + + // condition result + int condition_yes = 0; + + // backref type + if( m_nnumber >= 0 ) + { + do + { + if(m_nnumber >= pContext->m_captureindex.GetSize()) break; + + int index = pContext->m_captureindex[m_nnumber]; + if( index < 0) break; + + // else valid + condition_yes = 1; + } + while(0); + } + else + { + if( m_pelxask == 0 ) + condition_yes = 1; + else + condition_yes = m_pelxask->Match(pContext); + + pContext->m_stack.Restore(nsize); + pContext->m_nCurrentPos = nbegin; + } + + // elx result + int bsucc; + if( condition_yes ) + bsucc = m_pelxyes == 0 ? 1 : m_pelxyes->Match(pContext); + else + bsucc = m_pelxno == 0 ? 1 : m_pelxno ->Match(pContext); + + if( bsucc ) + { + pContext->m_stack.Push(ncsize); + pContext->m_stack.Push(condition_yes); + } + else + { + pContext->m_capturestack.Restore(ncsize); + } + + return bsucc; +} + +template int CConditionElxT :: MatchNext(CContext * pContext) const +{ + // pop + int ncsize, condition_yes; + + pContext->m_stack.Pop(condition_yes); + pContext->m_stack.Pop(ncsize); + + // elx result + int bsucc; + if( condition_yes ) + bsucc = m_pelxyes == 0 ? 0 : m_pelxyes->MatchNext(pContext); + else + bsucc = m_pelxno == 0 ? 0 : m_pelxno ->MatchNext(pContext); + + if( bsucc ) + { + pContext->m_stack.Push(ncsize); + pContext->m_stack.Push(condition_yes); + } + else + { + pContext->m_capturestack.Restore(ncsize); + } + + return bsucc; +} + +// +// MatchResult +// +template class MatchResultT +{ +public: + int IsMatched() const; + +public: + int GetStart() const; + int GetEnd () const; + +public: + int MaxGroupNumber() const; + int GetGroupStart(int nGroupNumber) const; + int GetGroupEnd (int nGroupNumber) const; + +public: + MatchResultT(const MatchResultT & from) { *this = from; } + MatchResultT(CContext * pContext = 0, int nMaxNumber = -1); + MatchResultT & operator = (const MatchResultT &); + inline operator int() const { return IsMatched(); } + +public: + CBufferT m_result; +}; + +typedef MatchResultT <0> MatchResult; + +// Stocked Elx IDs +enum STOCKELX_ID_DEFINES +{ + STOCKELX_EMPTY = 0, + + /////////////////////// + + STOCKELX_DOT_ALL, + STOCKELX_DOT_NOT_ALL, + + STOCKELX_WORD, + STOCKELX_WORD_NOT, + + STOCKELX_SPACE, + STOCKELX_SPACE_NOT, + + STOCKELX_DIGITAL, + STOCKELX_DIGITAL_NOT, + + ////////////////////// + + STOCKELX_DOT_ALL_RIGHTLEFT, + STOCKELX_DOT_NOT_ALL_RIGHTLEFT, + + STOCKELX_WORD_RIGHTLEFT, + STOCKELX_WORD_RIGHTLEFT_NOT, + + STOCKELX_SPACE_RIGHTLEFT, + STOCKELX_SPACE_RIGHTLEFT_NOT, + + STOCKELX_DIGITAL_RIGHTLEFT, + STOCKELX_DIGITAL_RIGHTLEFT_NOT, + + ///////////////////// + + STOCKELX_COUNT +}; + +// REGEX_FLAGS +#ifndef _REGEX_FLAGS_DEFINED + enum REGEX_FLAGS + { + NO_FLAG = 0, + SINGLELINE = 0x01, + MULTILINE = 0x02, + GLOBAL = 0x04, + IGNORECASE = 0x08, + RIGHTTOLEFT = 0x10, + EXTENDED = 0x20 + }; + #define _REGEX_FLAGS_DEFINED +#endif + +// +// Builder T +// +template class CBuilderT +{ +public: + typedef CDelegateElxT CDelegateElx; + typedef CBracketElxT CBracketElx; + typedef CBackrefElxT CBackrefElx; + typedef CConditionElxT CConditionElx; + +// Methods +public: + ElxInterface * Build(const CBufferRefT & pattern, int flags); + int GetNamedNumber(const CBufferRefT & named) const; + void Clear(); + +public: + CBuilderT(); + ~CBuilderT(); + +// Public Attributes +public: + ElxInterface * m_pTopElx; + int m_nFlags; + int m_nMaxNumber; + int m_nNextNamed; + int m_nGroupCount; + int m_nNextBalancing; + + CBufferT m_objlist; + CBufferT m_grouplist; + CBufferT m_recursivelist; + CBufferT m_namedlist; + CBufferT m_namedbackreflist; + CBufferT m_namedconditionlist; + CBufferT m_purebalancinglist; + +// CHART_INFO +protected: + struct CHART_INFO + { + public: + CHART ch; + int type; + int pos; + int len; + + public: + CHART_INFO(CHART c, int t, int p = 0, int l = 0) { ch = c; type = t; pos = p; len = l; } + inline int operator == (const CHART_INFO & ci) { return ch == ci.ch && type == ci.type; } + inline int operator != (const CHART_INFO & ci) { return ! operator == (ci); } + }; + +protected: + static unsigned int Hex2Int(const CHART * pcsz, int length, int & used); + static int ReadDec(char * & str, unsigned int & dec); + void MoveNext(); + int GetNext2(); + + ElxInterface * BuildAlternative(int vaflags); + ElxInterface * BuildList (int & flags); + ElxInterface * BuildRepeat (int & flags); + ElxInterface * BuildSimple (int & flags); + ElxInterface * BuildCharset (int & flags); + ElxInterface * BuildRecursive (int & flags); + ElxInterface * BuildBoundary (int & flags); + ElxInterface * BuildBackref (int & flags); + + ElxInterface * GetStockElx (int nStockId); + ElxInterface * Keep(ElxInterface * pElx); + +// Private Attributes +protected: + CBufferRefT m_pattern; + CHART_INFO prev, curr, next, nex2; + int m_nNextPos; + int m_nCharsetDepth; + int m_bQuoted; + POSIX_FUNC m_quote_fun; + + // Backup current pos + struct Snapshot + { + CHART_INFO prev, curr, next, nex2; + int m_nNextPos; + int m_nCharsetDepth; + int m_bQuoted; + POSIX_FUNC m_quote_fun; + Snapshot():prev(0,0),curr(0,0),next(0,0),nex2(0,0) {} + }; + void Backup (Snapshot * pdata) { memcpy(pdata, &prev, sizeof(Snapshot)); } + void Restore(Snapshot * pdata) { memcpy(&prev, pdata, sizeof(Snapshot)); } + + ElxInterface * m_pStockElxs[STOCKELX_COUNT]; +}; + +// +// Implementation +// +template CBuilderT :: CBuilderT() : m_pattern(0, 0), prev(0, 0), curr(0, 0), next(0, 0), nex2(0, 0) +{ + Clear(); +} + +template CBuilderT :: ~CBuilderT() +{ + Clear(); +} + +template int CBuilderT :: GetNamedNumber(const CBufferRefT & named) const +{ + for(int i=0; im_elxlist[0])->m_szNamed.CompareNoCase(named) ) + return ((CBracketElx *)m_namedlist[i]->m_elxlist[0])->m_nnumber; + } + + return -3; +} + +template ElxInterface * CBuilderT :: Build(const CBufferRefT & pattern, int flags) +{ + // init + m_pattern = pattern; + m_nNextPos = 0; + m_nCharsetDepth = 0; + m_nMaxNumber = 0; + m_nNextNamed = 0; + m_nNextBalancing= 0; + m_nFlags = flags; + m_bQuoted = 0; + m_quote_fun = 0; + + m_grouplist .Restore(0); + m_recursivelist .Restore(0); + m_namedlist .Restore(0); + m_namedbackreflist .Restore(0); + m_namedconditionlist.Restore(0); + m_purebalancinglist .Restore(0); + + int i; + for(i=0; i<3; i++) MoveNext(); + + // build + m_pTopElx = BuildAlternative(flags); + + // group 0 + m_grouplist.Prepare(0); + m_grouplist[0] = m_pTopElx; + + // append named to unnamed + m_nGroupCount = m_grouplist.GetSize(); + + m_grouplist.Prepare(m_nMaxNumber + m_namedlist.GetSize()); + + for(i=0; im_elxlist[0]; + CBracketElx * pright = (CBracketElx *)m_namedlist[i]->m_elxlist[2]; + + // append + m_grouplist[m_nGroupCount ++] = m_namedlist[i]; + + if( pleft->m_nnumber > 0 ) + continue; + + // same name + int find_same_name = GetNamedNumber(pleft->m_szNamed); + if( find_same_name >= 0 ) + { + pleft ->m_nnumber = find_same_name; + pright->m_nnumber = find_same_name; + } + else + { + m_nMaxNumber ++; + + pleft ->m_nnumber = m_nMaxNumber; + pright->m_nnumber = m_nMaxNumber; + } + } + + for(i=0; im_elxlist[0]; + CBracketElx * pright = (CBracketElx *)m_namedlist[i]->m_elxlist[2]; + + // balancing + if(pleft->m_szBalancing.GetSize() > 0) + { + int balancing_to = GetNamedNumber(pleft->m_szBalancing); + if(balancing_to >= 0) + { + pleft ->m_balancing = balancing_to; + pright->m_balancing = balancing_to; + } + else + { + // TODO ERROR + } + } + } + + for(i=1; im_elxlist[0]; + + if( pleft->m_nnumber > m_nMaxNumber ) + m_nMaxNumber = pleft->m_nnumber; + } + + // pure balancing group + int nMaxNumber = m_nMaxNumber; + for(i=0; im_elxlist[0]; + CBracketElx * pright = (CBracketElx *)m_purebalancinglist[i]->m_elxlist[2]; + + nMaxNumber ++; + + pleft ->m_nnumber = nMaxNumber; + pright->m_nnumber = nMaxNumber; + + // balancing + if(pleft->m_szBalancing.GetSize() > 0) + { + int balancing_to = GetNamedNumber(pleft->m_szBalancing); + if(balancing_to >= 0) + { + pleft ->m_balancing = balancing_to; + pright->m_balancing = balancing_to; + } + else + { + // TODO ERROR + } + } + } + + // connect recursive + for(i=0; im_ndata == -3 ) + m_recursivelist[i]->m_ndata = GetNamedNumber(m_recursivelist[i]->m_szNamed); + + if( m_recursivelist[i]->m_ndata >= 0 && m_recursivelist[i]->m_ndata <= m_nMaxNumber ) + { + if( m_recursivelist[i]->m_ndata == 0 ) + m_recursivelist[i]->m_pelx = m_pTopElx; + else for(int j=1; jm_ndata == ((CBracketElx *)((CListElx*)m_grouplist[j])->m_elxlist[0])->m_nnumber) + { + m_recursivelist[i]->m_pelx = m_grouplist[j]; + break; + } + } + } + } + + // named backref + for(i=0; im_nnumber = GetNamedNumber(m_namedbackreflist[i]->m_szNamed); + } + + // named condition + for(i=0; im_szNamed); + if( nn >= 0 ) + { + m_namedconditionlist[i]->m_nnumber = nn; + m_namedconditionlist[i]->m_pelxask = 0; + } + } + + return m_pTopElx; +} + +template void CBuilderT :: Clear() +{ + for(int i=0; i unsigned int CBuilderT :: Hex2Int(const CHART * pcsz, int length, int & used) +{ + unsigned int result = 0; + int & i = used; + + for(i=0; i= RCHART('0') && pcsz[i] <= RCHART('9')) + result = (result << 4) + (pcsz[i] - RCHART('0')); + else if(pcsz[i] >= RCHART('A') && pcsz[i] <= RCHART('F')) + result = (result << 4) + (0x0A + (pcsz[i] - RCHART('A'))); + else if(pcsz[i] >= RCHART('a') && pcsz[i] <= RCHART('f')) + result = (result << 4) + (0x0A + (pcsz[i] - RCHART('a'))); + else + break; + } + + return result; +} + +template inline ElxInterface * CBuilderT :: Keep(ElxInterface * pelx) +{ + m_objlist.Push(pelx); + return pelx; +} + +template void CBuilderT :: MoveNext() +{ + // forwards + prev = curr; + curr = next; + next = nex2; + + // get nex2 + while( ! GetNext2() ) {}; +} + +template int CBuilderT :: GetNext2() +{ + // check length + if(m_nNextPos >= m_pattern.GetSize()) + { + nex2 = CHART_INFO(0, 1, m_nNextPos, 0); + return 1; + } + + int delta = 1; + CHART ch = m_pattern[m_nNextPos]; + + // if quoted + if(m_bQuoted) + { + if(ch == RCHART('\\')) + { + if(m_pattern[m_nNextPos + 1] == RCHART('E')) + { + m_quote_fun = 0; + m_bQuoted = 0; + m_nNextPos += 2; + return 0; + } + } + + if(m_quote_fun != 0) + nex2 = CHART_INFO((CHART)(*m_quote_fun)((int)ch), 0, m_nNextPos, delta); + else + nex2 = CHART_INFO(ch, 0, m_nNextPos, delta); + + m_nNextPos += delta; + + return 1; + } + + // common + switch(ch) + { + case RCHART('\\'): + { + CHART ch1 = m_pattern[m_nNextPos+1]; + + // backref + if(ch1 >= RCHART('0') && ch1 <= RCHART('9')) + { + nex2 = CHART_INFO(ch, 1, m_nNextPos, delta); + break; + } + + // escape + delta = 2; + + switch(ch1) + { + case RCHART('A'): + case RCHART('Z'): + case RCHART('z'): + case RCHART('w'): + case RCHART('W'): + case RCHART('s'): + case RCHART('S'): + case RCHART('B'): + case RCHART('d'): + case RCHART('D'): + case RCHART('k'): + case RCHART('g'): + nex2 = CHART_INFO(ch1, 1, m_nNextPos, delta); + break; + + case RCHART('b'): + if(m_nCharsetDepth > 0) + nex2 = CHART_INFO('\b', 0, m_nNextPos, delta); + else + nex2 = CHART_INFO(ch1, 1, m_nNextPos, delta); + break; + + /* + case RCHART('<'): + case RCHART('>'): + if(m_nCharsetDepth > 0) + nex2 = CHART_INFO(ch1, 0, m_nNextPos, delta); + else + nex2 = CHART_INFO(ch1, 1, m_nNextPos, delta); + break; + */ + + case RCHART('x'): + if(m_pattern[m_nNextPos+2] != '{') + { + int red = 0; + unsigned int ch2 = Hex2Int(m_pattern.GetBuffer() + m_nNextPos + 2, 2, red); + + delta += red; + + if(red > 0) + nex2 = CHART_INFO(RCHART(ch2), 0, m_nNextPos, delta); + else + nex2 = CHART_INFO(ch1, 0, m_nNextPos, delta); + + break; + } + + case RCHART('u'): + if(m_pattern[m_nNextPos+2] != '{') + { + int red = 0; + unsigned int ch2 = Hex2Int(m_pattern.GetBuffer() + m_nNextPos + 2, 4, red); + + delta += red; + + if(red > 0) + nex2 = CHART_INFO(RCHART(ch2), 0, m_nNextPos, delta); + else + nex2 = CHART_INFO(ch1, 0, m_nNextPos, delta); + } + else + { + int red = 0; + unsigned int ch2 = Hex2Int(m_pattern.GetBuffer() + m_nNextPos + 3, sizeof(int) * 2, red); + + delta += red; + + while(m_nNextPos + delta < m_pattern.GetSize() && m_pattern.At(m_nNextPos + delta) != RCHART('}')) + delta ++; + + delta ++; // skip '}' + + nex2 = CHART_INFO(RCHART(ch2), 0, m_nNextPos, delta); + } + break; + + case RCHART('a'): nex2 = CHART_INFO(RCHART('\a'), 0, m_nNextPos, delta); break; + case RCHART('f'): nex2 = CHART_INFO(RCHART('\f'), 0, m_nNextPos, delta); break; + case RCHART('n'): nex2 = CHART_INFO(RCHART('\n'), 0, m_nNextPos, delta); break; + case RCHART('r'): nex2 = CHART_INFO(RCHART('\r'), 0, m_nNextPos, delta); break; + case RCHART('t'): nex2 = CHART_INFO(RCHART('\t'), 0, m_nNextPos, delta); break; + case RCHART('v'): nex2 = CHART_INFO(RCHART('\v'), 0, m_nNextPos, delta); break; + case RCHART('e'): nex2 = CHART_INFO(RCHART( 27 ), 0, m_nNextPos, delta); break; + + case RCHART('G'): // skip '\G' + if(m_nCharsetDepth > 0) + { + m_nNextPos += 2; + return 0; + } + else + { + nex2 = CHART_INFO(ch1, 1, m_nNextPos, delta); + break; + } + + case RCHART('L'): + if( ! m_quote_fun ) m_quote_fun = ::tolower; + + case RCHART('U'): + if( ! m_quote_fun ) m_quote_fun = ::toupper; + + case RCHART('Q'): + { + m_bQuoted = 1; + m_nNextPos += 2; + return 0; + } + + case RCHART('E'): + { + m_quote_fun = 0; + m_bQuoted = 0; + m_nNextPos += 2; + return 0; + } + + case 0: + if(m_nNextPos+1 >= m_pattern.GetSize()) + { + delta = 1; + nex2 = CHART_INFO(ch , 0, m_nNextPos, delta); + } + else + nex2 = CHART_INFO(ch1, 0, m_nNextPos, delta); // common '\0' char + break; + + default: + nex2 = CHART_INFO(ch1, 0, m_nNextPos, delta); + break; + } + } + break; + + case RCHART('*'): + case RCHART('+'): + case RCHART('?'): + case RCHART('.'): + case RCHART('{'): + case RCHART('}'): + case RCHART(')'): + case RCHART('|'): + case RCHART('$'): + if(m_nCharsetDepth > 0) + nex2 = CHART_INFO(ch, 0, m_nNextPos, delta); + else + nex2 = CHART_INFO(ch, 1, m_nNextPos, delta); + break; + + case RCHART('-'): + if(m_nCharsetDepth > 0) + nex2 = CHART_INFO(ch, 1, m_nNextPos, delta); + else + nex2 = CHART_INFO(ch, 0, m_nNextPos, delta); + break; + + case RCHART('('): + { + CHART ch1 = m_pattern[m_nNextPos+1]; + CHART ch2 = m_pattern[m_nNextPos+2]; + + // skip remark + if(ch1 == RCHART('?') && ch2 == RCHART('#')) + { + m_nNextPos += 2; + while(m_nNextPos < m_pattern.GetSize()) + { + if(m_pattern[m_nNextPos] == RCHART(')')) + break; + + m_nNextPos ++; + } + + if(m_pattern[m_nNextPos] == RCHART(')')) + { + m_nNextPos ++; + + // get next nex2 + return 0; + } + } + else + { + if(m_nCharsetDepth > 0) + nex2 = CHART_INFO(ch, 0, m_nNextPos, delta); + else + nex2 = CHART_INFO(ch, 1, m_nNextPos, delta); + } + } + break; + + case RCHART('#'): + if(m_nFlags & EXTENDED) + { + // skip remark + m_nNextPos ++; + + while(m_nNextPos < m_pattern.GetSize()) + { + if(m_pattern[m_nNextPos] == RCHART('\n') || m_pattern[m_nNextPos] == RCHART('\r')) + break; + + m_nNextPos ++; + } + + // get next nex2 + return 0; + } + else + { + nex2 = CHART_INFO(ch, 0, m_nNextPos, delta); + } + break; + + case RCHART(' '): + case RCHART('\f'): + case RCHART('\n'): + case RCHART('\r'): + case RCHART('\t'): + case RCHART('\v'): + if(m_nFlags & EXTENDED) + { + m_nNextPos ++; + + // get next nex2 + return 0; + } + else + { + nex2 = CHART_INFO(ch, 0, m_nNextPos, delta); + } + break; + + case RCHART('['): + if( m_nCharsetDepth == 0 || m_pattern.At(m_nNextPos + 1, 0) == RCHART(':') ) + { + m_nCharsetDepth ++; + nex2 = CHART_INFO(ch, 1, m_nNextPos, delta); + } + else + { + nex2 = CHART_INFO(ch, 0, m_nNextPos, delta); + } + break; + + case RCHART(']'): + if(m_nCharsetDepth > 0) + { + m_nCharsetDepth --; + nex2 = CHART_INFO(ch, 1, m_nNextPos, delta); + } + else + { + nex2 = CHART_INFO(ch, 0, m_nNextPos, delta); + } + break; + + case RCHART(':'): + if(next == CHART_INFO(RCHART('['), 1)) + nex2 = CHART_INFO(ch, 1, m_nNextPos, delta); + else + nex2 = CHART_INFO(ch, 0, m_nNextPos, delta); + break; + + case RCHART('^'): + if(m_nCharsetDepth == 0 || next == CHART_INFO(RCHART('['), 1) || (curr == CHART_INFO(RCHART('['), 1) && next == CHART_INFO(RCHART(':'), 1))) + nex2 = CHART_INFO(ch, 1, m_nNextPos, delta); + else + nex2 = CHART_INFO(ch, 0, m_nNextPos, delta); + break; + + case 0: + if(m_nNextPos >= m_pattern.GetSize()) + nex2 = CHART_INFO(ch, 1, m_nNextPos, delta); // end of string + else + nex2 = CHART_INFO(ch, 0, m_nNextPos, delta); // common '\0' char + break; + + default: + nex2 = CHART_INFO(ch, 0, m_nNextPos, delta); + break; + } + + m_nNextPos += delta; + + return 1; +} + +template ElxInterface * CBuilderT :: GetStockElx(int nStockId) +{ + ElxInterface ** pStockElxs = m_pStockElxs; + + // check + if(nStockId < 0 || nStockId >= STOCKELX_COUNT) + return GetStockElx(0); + + // create if no + if(pStockElxs[nStockId] == 0) + { + switch(nStockId) + { + case STOCKELX_EMPTY: + pStockElxs[nStockId] = Keep(new CEmptyElx()); + break; + + case STOCKELX_WORD: + { + CRangeElxT * pRange = (CRangeElxT *)Keep(new CRangeElxT (0, 1)); + + pRange->m_ranges.Push(RCHART('A')); pRange->m_ranges.Push(RCHART('Z')); + pRange->m_ranges.Push(RCHART('a')); pRange->m_ranges.Push(RCHART('z')); + pRange->m_ranges.Push(RCHART('0')); pRange->m_ranges.Push(RCHART('9')); + pRange->m_chars .Push(RCHART('_')); + + pStockElxs[nStockId] = pRange; + } + break; + + case STOCKELX_WORD_NOT: + { + CRangeElxT * pRange = (CRangeElxT *)Keep(new CRangeElxT (0, 0)); + + pRange->m_ranges.Push(RCHART('A')); pRange->m_ranges.Push(RCHART('Z')); + pRange->m_ranges.Push(RCHART('a')); pRange->m_ranges.Push(RCHART('z')); + pRange->m_ranges.Push(RCHART('0')); pRange->m_ranges.Push(RCHART('9')); + pRange->m_chars .Push(RCHART('_')); + + pStockElxs[nStockId] = pRange; + } + break; + + case STOCKELX_DOT_ALL: + pStockElxs[nStockId] = Keep(new CRangeElxT (0, 0)); + break; + + case STOCKELX_DOT_NOT_ALL: + { + CRangeElxT * pRange = (CRangeElxT *)Keep(new CRangeElxT (0, 0)); + + pRange->m_chars .Push(RCHART('\n')); + + pStockElxs[nStockId] = pRange; + } + break; + + case STOCKELX_SPACE: + { + CRangeElxT * pRange = (CRangeElxT *)Keep(new CRangeElxT (0, 1)); + + pRange->m_chars .Push(RCHART(' ')); + pRange->m_chars .Push(RCHART('\t')); + pRange->m_chars .Push(RCHART('\r')); + pRange->m_chars .Push(RCHART('\n')); + + pStockElxs[nStockId] = pRange; + } + break; + + case STOCKELX_SPACE_NOT: + { + CRangeElxT * pRange = (CRangeElxT *)Keep(new CRangeElxT (0, 0)); + + pRange->m_chars .Push(RCHART(' ')); + pRange->m_chars .Push(RCHART('\t')); + pRange->m_chars .Push(RCHART('\r')); + pRange->m_chars .Push(RCHART('\n')); + + pStockElxs[nStockId] = pRange; + } + break; + + case STOCKELX_DIGITAL: + { + CRangeElxT * pRange = (CRangeElxT *)Keep(new CRangeElxT (0, 1)); + + pRange->m_ranges.Push(RCHART('0')); pRange->m_ranges.Push(RCHART('9')); + + pStockElxs[nStockId] = pRange; + } + break; + + case STOCKELX_DIGITAL_NOT: + { + CRangeElxT * pRange = (CRangeElxT *)Keep(new CRangeElxT (0, 0)); + + pRange->m_ranges.Push(RCHART('0')); pRange->m_ranges.Push(RCHART('9')); + + pStockElxs[nStockId] = pRange; + } + break; + + case STOCKELX_WORD_RIGHTLEFT: + { + CRangeElxT * pRange = (CRangeElxT *)Keep(new CRangeElxT (1, 1)); + + pRange->m_ranges.Push(RCHART('A')); pRange->m_ranges.Push(RCHART('Z')); + pRange->m_ranges.Push(RCHART('a')); pRange->m_ranges.Push(RCHART('z')); + pRange->m_ranges.Push(RCHART('0')); pRange->m_ranges.Push(RCHART('9')); + pRange->m_chars .Push(RCHART('_')); + + pStockElxs[nStockId] = pRange; + } + break; + + case STOCKELX_WORD_RIGHTLEFT_NOT: + { + CRangeElxT * pRange = (CRangeElxT *)Keep(new CRangeElxT (1, 0)); + + pRange->m_ranges.Push(RCHART('A')); pRange->m_ranges.Push(RCHART('Z')); + pRange->m_ranges.Push(RCHART('a')); pRange->m_ranges.Push(RCHART('z')); + pRange->m_ranges.Push(RCHART('0')); pRange->m_ranges.Push(RCHART('9')); + pRange->m_chars .Push(RCHART('_')); + + pStockElxs[nStockId] = pRange; + } + break; + + case STOCKELX_DOT_ALL_RIGHTLEFT: + pStockElxs[nStockId] = Keep(new CRangeElxT (1, 0)); + break; + + case STOCKELX_DOT_NOT_ALL_RIGHTLEFT: + { + CRangeElxT * pRange = (CRangeElxT *)Keep(new CRangeElxT (1, 0)); + + pRange->m_chars .Push(RCHART('\n')); + + pStockElxs[nStockId] = pRange; + } + break; + + case STOCKELX_SPACE_RIGHTLEFT: + { + CRangeElxT * pRange = (CRangeElxT *)Keep(new CRangeElxT (1, 1)); + + pRange->m_chars .Push(RCHART(' ')); + pRange->m_chars .Push(RCHART('\t')); + pRange->m_chars .Push(RCHART('\r')); + pRange->m_chars .Push(RCHART('\n')); + pRange->m_chars .Push(RCHART('\f')); + pRange->m_chars .Push(RCHART('\v')); + + pStockElxs[nStockId] = pRange; + } + break; + + case STOCKELX_SPACE_RIGHTLEFT_NOT: + { + CRangeElxT * pRange = (CRangeElxT *)Keep(new CRangeElxT (1, 0)); + + pRange->m_chars .Push(RCHART(' ')); + pRange->m_chars .Push(RCHART('\t')); + pRange->m_chars .Push(RCHART('\r')); + pRange->m_chars .Push(RCHART('\n')); + pRange->m_chars .Push(RCHART('\f')); + pRange->m_chars .Push(RCHART('\v')); + + pStockElxs[nStockId] = pRange; + } + break; + + case STOCKELX_DIGITAL_RIGHTLEFT: + { + CRangeElxT * pRange = (CRangeElxT *)Keep(new CRangeElxT (1, 1)); + + pRange->m_ranges.Push(RCHART('0')); pRange->m_ranges.Push(RCHART('9')); + + pStockElxs[nStockId] = pRange; + } + break; + + case STOCKELX_DIGITAL_RIGHTLEFT_NOT: + { + CRangeElxT * pRange = (CRangeElxT *)Keep(new CRangeElxT (1, 0)); + + pRange->m_ranges.Push(RCHART('0')); pRange->m_ranges.Push(RCHART('9')); + + pStockElxs[nStockId] = pRange; + } + break; + } + } + + // return + return pStockElxs[nStockId]; +} + +template ElxInterface * CBuilderT :: BuildAlternative(int vaflags) +{ + if(curr == CHART_INFO(0, 1)) + return GetStockElx(STOCKELX_EMPTY); + + // flag instance + int flags = vaflags; + + // first part + ElxInterface * pAlternativeOne = BuildList(flags); + + // check alternative + if(curr == CHART_INFO(RCHART('|'), 1)) + { + CAlternativeElx * pAlternative = (CAlternativeElx *)Keep(new CAlternativeElx()); + pAlternative->m_elxlist.Push(pAlternativeOne); + + // loop + while(curr == CHART_INFO(RCHART('|'), 1)) + { + // skip '|' itself + MoveNext(); + + pAlternativeOne = BuildList(flags); + pAlternative->m_elxlist.Push(pAlternativeOne); + } + + return pAlternative; + } + + return pAlternativeOne; +} + +template ElxInterface * CBuilderT :: BuildList(int & flags) +{ + if(curr == CHART_INFO(0, 1) || curr == CHART_INFO(RCHART('|'), 1) || curr == CHART_INFO(RCHART(')'), 1)) + return GetStockElx(STOCKELX_EMPTY); + + // first + ElxInterface * pListOne = BuildRepeat(flags); + + if(curr != CHART_INFO(0, 1) && curr != CHART_INFO(RCHART('|'), 1) && curr != CHART_INFO(RCHART(')'), 1)) + { + CListElx * pList = (CListElx *)Keep(new CListElx(flags & RIGHTTOLEFT)); + pList->m_elxlist.Push(pListOne); + + while(curr != CHART_INFO(0, 1) && curr != CHART_INFO(RCHART('|'), 1) && curr != CHART_INFO(RCHART(')'), 1)) + { + pListOne = BuildRepeat(flags); + + // add + pList->m_elxlist.Push(pListOne); + } + + return pList; + } + + return pListOne; +} + +template ElxInterface * CBuilderT :: BuildRepeat(int & flags) +{ + // simple + ElxInterface * pSimple = BuildSimple(flags); + + if(curr.type == 0) return pSimple; + + // is quantifier or not + int bIsQuantifier = 1; + + // quantifier range + unsigned int nMin = 0, nMax = 0; + + switch(curr.ch) + { + case RCHART('{'): + { + CBufferT re; + + // skip '{' + MoveNext(); + + // copy + while(curr != CHART_INFO(0, 1) && curr != CHART_INFO(RCHART('}'), 1)) + { + re.Append(((curr.ch & (CHART)0xff) == curr.ch) ? (char)curr.ch : 0, 1); + MoveNext(); + } + + // skip '}' + MoveNext(); + + // read + int red; + char * str = re.GetBuffer(); + + if( ! ReadDec(str, nMin) ) + red = 0; + else if( *str != ',' ) + red = 1; + else + { + str ++; + + if( ! ReadDec(str, nMax) ) + red = 2; + else + red = 3; + } + + // check + if(red <= 1 ) nMax = nMin; + if(red == 2 ) nMax = INT_MAX; + if(nMax < nMin) nMax = nMin; + } + break; + + case RCHART('?'): + nMin = 0; + nMax = 1; + + // skip '?' + MoveNext(); + break; + + case RCHART('*'): + nMin = 0; + nMax = INT_MAX; + + // skip '*' + MoveNext(); + break; + + case RCHART('+'): + nMin = 1; + nMax = INT_MAX; + + // skip '+' + MoveNext(); + break; + + default: + bIsQuantifier = 0; + break; + } + + // do quantify + if(bIsQuantifier) + { + // 0 times + if(nMax == 0) + return GetStockElx(STOCKELX_EMPTY); + + // fixed times + if(nMin == nMax) + { + if(curr == CHART_INFO(RCHART('?'), 1) || curr == CHART_INFO(RCHART('+'), 1)) + MoveNext(); + + return Keep(new CRepeatElx(pSimple, nMin)); + } + + // range times + if(curr == CHART_INFO(RCHART('?'), 1)) + { + MoveNext(); + return Keep(new CReluctantElx(pSimple, nMin, nMax)); + } + else if(curr == CHART_INFO(RCHART('+'), 1)) + { + MoveNext(); + return Keep(new CPossessiveElx(pSimple, nMin, nMax)); + } + else + { + return Keep(new CGreedyElx(pSimple, nMin, nMax)); + } + } + + return pSimple; +} + +template ElxInterface * CBuilderT :: BuildSimple(int & flags) +{ + CBufferT fixed; + + while(curr != CHART_INFO(0, 1)) + { + if(curr.type == 0) + { + if(next == CHART_INFO(RCHART('{'), 1) || next == CHART_INFO(RCHART('?'), 1) || next == CHART_INFO(RCHART('*'), 1) || next == CHART_INFO(RCHART('+'), 1)) + { + if(fixed.GetSize() == 0) + { + fixed.Append(curr.ch, 1); + MoveNext(); + } + + break; + } + else + { + fixed.Append(curr.ch, 1); + MoveNext(); + } + } + else if(curr.type == 1) + { + CHART vch = curr.ch; + + // end of simple + if(vch == RCHART(')') || vch == RCHART('|')) + break; + + // has fixed already + if(fixed.GetSize() > 0) + break; + + // left parentheses + if(vch == RCHART('(')) + { + return BuildRecursive(flags); + } + + // char set + if( vch == RCHART('[') || vch == RCHART('.') || vch == RCHART('w') || vch == RCHART('W') || + vch == RCHART('s') || vch == RCHART('S') || vch == RCHART('d') || vch == RCHART('D') + ) + { + return BuildCharset(flags); + } + + // boundary + if( vch == RCHART('^') || vch == RCHART('$') || vch == RCHART('A') || vch == RCHART('Z') || vch == RCHART('z') || + vch == RCHART('b') || vch == RCHART('B') || vch == RCHART('G') // vch == RCHART('<') || vch == RCHART('>') + ) + { + return BuildBoundary(flags); + } + + // backref + if(vch == RCHART('\\') || vch == RCHART('k') || vch == RCHART('g')) + { + return BuildBackref(flags); + } + + // treat vchar as char + fixed.Append(curr.ch, 1); + MoveNext(); + } + } + + if(fixed.GetSize() > 0) + return Keep(new CStringElxT (fixed.GetBuffer(), fixed.GetSize(), flags & RIGHTTOLEFT, flags & IGNORECASE)); + else + return GetStockElx(STOCKELX_EMPTY); +} + +#define deelx_max(a, b) (((a) > (b)) ? (a) : (b)) +#define deelx_min(a, b) (((a) < (b)) ? (a) : (b)) + +template ElxInterface * CBuilderT :: BuildCharset(int & flags) +{ + // char + CHART ch = curr.ch; + + // skip + MoveNext(); + + switch(ch) + { + case RCHART('.'): + return GetStockElx( + flags & RIGHTTOLEFT ? + ((flags & SINGLELINE) ? STOCKELX_DOT_ALL_RIGHTLEFT : STOCKELX_DOT_NOT_ALL_RIGHTLEFT) : + ((flags & SINGLELINE) ? STOCKELX_DOT_ALL : STOCKELX_DOT_NOT_ALL) + ); + + case RCHART('w'): + return GetStockElx(flags & RIGHTTOLEFT ? STOCKELX_WORD_RIGHTLEFT : STOCKELX_WORD); + + case RCHART('W'): + return GetStockElx(flags & RIGHTTOLEFT ? STOCKELX_WORD_RIGHTLEFT_NOT : STOCKELX_WORD_NOT); + + case RCHART('s'): + return GetStockElx(flags & RIGHTTOLEFT ? STOCKELX_SPACE_RIGHTLEFT : STOCKELX_SPACE); + + case RCHART('S'): + return GetStockElx(flags & RIGHTTOLEFT ? STOCKELX_SPACE_RIGHTLEFT_NOT : STOCKELX_SPACE_NOT); + + case RCHART('d'): + return GetStockElx(flags & RIGHTTOLEFT ? STOCKELX_DIGITAL_RIGHTLEFT : STOCKELX_DIGITAL); + + case RCHART('D'): + return GetStockElx(flags & RIGHTTOLEFT ? STOCKELX_DIGITAL_RIGHTLEFT_NOT : STOCKELX_DIGITAL_NOT); + + case RCHART('['): + { + CRangeElxT * pRange; + + // create + if(curr == CHART_INFO(RCHART(':'), 1)) + { + // Backup before posix + Snapshot shot; + Backup(&shot); + + CBufferT posix; + + do { + posix.Append(((curr.ch & (CHART)0xff) == curr.ch) ? (char)curr.ch : 0, 1); + MoveNext(); + } + while(curr.ch != RCHART(0) && curr != CHART_INFO(RCHART(']'), 1)); + + MoveNext(); // skip ']' + + // posix + CPosixElxT * pposix = (CPosixElxT *) Keep(new CPosixElxT (posix.GetBuffer(), flags & RIGHTTOLEFT)); + if(pposix->m_posixfun != 0) + { + return pposix; + } + + // restore if not posix + Restore(&shot); + } + + if(curr == CHART_INFO(RCHART('^'), 1)) + { + MoveNext(); // skip '^' + pRange = (CRangeElxT *)Keep(new CRangeElxT (flags & RIGHTTOLEFT, 0)); + } + else + { + pRange = (CRangeElxT *)Keep(new CRangeElxT (flags & RIGHTTOLEFT, 1)); + } + + // parse + while(curr != CHART_INFO(0, 1) && curr != CHART_INFO(RCHART(']'), 1)) + { + ch = curr.ch; + + if(curr.type == 1 && ( + ch == RCHART('.') || ch == RCHART('w') || ch == RCHART('W') || ch == RCHART('s') || ch == RCHART('S') || ch == RCHART('d') || ch == RCHART('D') || + (ch == RCHART('[') && next == CHART_INFO(RCHART(':'), 1)) + )) + { + pRange->m_embeds.Push(BuildCharset(flags)); + } + else if(next == CHART_INFO(RCHART('-'), 1) && nex2.type == 0) + { + pRange->m_ranges.Push(ch); pRange->m_ranges.Push(nex2.ch); + + // next + MoveNext(); + MoveNext(); + MoveNext(); + } + else + { + pRange->m_chars.Push(ch); + + // next + MoveNext(); + } + } + + // skip ']' + MoveNext(); + + if( flags & IGNORECASE ) + { + CBufferT & ranges = pRange->m_ranges; + int i, oldcount = ranges.GetSize() / 2; + + for(i=0; i= RCHART('A') ) + { + newmin = tolower( deelx_max(RCHART('A'), ranges[i*2 ]) ); + newmax = tolower( deelx_min(RCHART('Z'), ranges[i*2+1]) ); + + if( newmin < ranges[i*2] || newmax > ranges[i*2+1] ) + { + ranges.Push(newmin); + ranges.Push(newmax); + } + } + + if( ranges[i*2] <= RCHART('z') && ranges[i*2+1] >= RCHART('a') ) + { + newmin = toupper( deelx_max(RCHART('a'), ranges[i*2 ]) ); + newmax = toupper( deelx_min(RCHART('z'), ranges[i*2+1]) ); + + if( newmin < ranges[i*2] || newmax > ranges[i*2+1] ) + { + ranges.Push(newmin); + ranges.Push(newmax); + } + } + } + + CBufferT & chars = pRange->m_chars; + oldcount = chars.GetSize(); + for(i=0; iIsContainChar(tolower(chars[i])) ) + chars.Push(tolower(chars[i])); + + if( islower(chars[i]) && ! pRange->IsContainChar(toupper(chars[i])) ) + chars.Push(toupper(chars[i])); + } + } + + return pRange; + } + } + + return GetStockElx(STOCKELX_EMPTY); +} + +template ElxInterface * CBuilderT :: BuildRecursive(int & flags) +{ + // skip '(' + MoveNext(); + + if(curr == CHART_INFO(RCHART('?'), 1)) + { + ElxInterface * pElx = 0; + + // skip '?' + MoveNext(); + + int bNegative = 0; + CHART named_end = RCHART('>'); + + switch(curr.ch) + { + case RCHART('!'): + bNegative = 1; + + case RCHART('='): + { + MoveNext(); // skip '!' or '=' + pElx = Keep(new CAssertElx(BuildAlternative(flags & ~RIGHTTOLEFT), !bNegative)); + } + break; + + case RCHART('<'): + switch(next.ch) + { + case RCHART('!'): + bNegative = 1; + + case RCHART('='): + MoveNext(); // skip '<' + MoveNext(); // skip '!' or '=' + { + pElx = Keep(new CAssertElx(BuildAlternative(flags | RIGHTTOLEFT), !bNegative)); + } + break; + + default: // named group + break; + } + // break if assertion // else named + if(pElx != 0) break; + + case RCHART('P'): + if(curr.ch == RCHART('P')) MoveNext(); // skip 'P' + + case RCHART('\''): + if (curr.ch == RCHART('<' )) named_end = RCHART('>' ); + else if(curr.ch == RCHART('\'')) named_end = RCHART('\''); + MoveNext(); // skip '<' or '\'' + { + CListElx * pList = (CListElx *)Keep(new CListElx(flags & RIGHTTOLEFT)); + CBracketElx * pleft = (CBracketElx *)Keep(new CBracketElx(-1, flags & RIGHTTOLEFT ? 1 : 0)); + CBracketElx * pright = (CBracketElx *)Keep(new CBracketElx(-1, flags & RIGHTTOLEFT ? 0 : 1)); + + // save name + CBufferT & name = pleft->m_szNamed, & balancing_name = pleft->m_szBalancing, * pname = &name; + CBufferT num, balancing_num, * pnum = # + + while(curr.ch != RCHART(0) && curr.ch != named_end) + { + if(curr.ch == RCHART('-')) + { + pname = &balancing_name; + pnum = &balancing_num; + MoveNext(); + continue; + } + + pname->Append(curr.ch, 1); + pnum ->Append(((curr.ch & (CHART)0xff) == curr.ch) ? (char)curr.ch : 0, 1); + MoveNext(); + } + MoveNext(); // skip '>' or '\'' + + // check + unsigned int number; + char * str = num.GetBuffer(); + + if( ReadDec(str, number) ? ( *str == '\0') : 0 ) + { + pleft ->m_nnumber = number; + pright->m_nnumber = number; + + name.Release(); + } + + str = balancing_num.GetBuffer(); + if( ReadDec(str, number) ? ( *str == '\0') : 0 ) + { + pleft ->m_balancing = number; + pright->m_balancing = number; + + balancing_name.Release(); + } + + // left, center, right + pList->m_elxlist.Push(pleft); + pList->m_elxlist.Push(BuildAlternative(flags)); + pList->m_elxlist.Push(pright); + + // named number + if(pleft->m_nnumber >= 0 || name.GetSize() > 0) + { + int nThisBackref = m_nNextNamed ++; + m_namedlist.Prepare(nThisBackref); + m_namedlist[nThisBackref] = pList; + } + else if(pleft->m_balancing >= 0 || balancing_name.GetSize() > 0) + { + int nThisBalancing = m_nNextBalancing ++; + m_purebalancinglist.Prepare(nThisBalancing, 0); + m_purebalancinglist[nThisBalancing] = pList; + } + else + { + // TODO ERROR + } + + pElx = pList; + } + break; + + case RCHART('>'): + { + MoveNext(); // skip '>' + pElx = Keep(new CIndependentElx(BuildAlternative(flags))); + } + break; + + case RCHART('R'): + MoveNext(); // skip 'R' + while(curr.ch != RCHART(0) && isspace(curr.ch)) MoveNext(); // skip space + + if(curr.ch == RCHART('<') || curr.ch == RCHART('\'')) + { + named_end = curr.ch == RCHART('<') ? RCHART('>') : RCHART('\''); + CDelegateElx * pDelegate = (CDelegateElx *)Keep(new CDelegateElx(-3)); + + MoveNext(); // skip '<' or '\\' + + // save name + CBufferT & name = pDelegate->m_szNamed; + CBufferT num; + + while(curr.ch != RCHART(0) && curr.ch != named_end) + { + name.Append(curr.ch, 1); + num .Append(((curr.ch & (CHART)0xff) == curr.ch) ? (char)curr.ch : 0, 1); + MoveNext(); + } + MoveNext(); // skip '>' or '\'' + + // check + unsigned int number; + char * str = num.GetBuffer(); + + if( ReadDec(str, number) ? ( *str == '\0') : 0 ) + { + pDelegate->m_ndata = number; + name.Release(); + } + + m_recursivelist.Push(pDelegate); + pElx = pDelegate; + } + else + { + CBufferT rto; + while(curr.ch != RCHART(0) && curr.ch != RCHART(')')) + { + rto.Append(((curr.ch & (CHART)0xff) == curr.ch) ? (char)curr.ch : 0, 1); + MoveNext(); + } + + unsigned int rtono = 0; + char * str = rto.GetBuffer(); + ReadDec(str, rtono); + + CDelegateElx * pDelegate = (CDelegateElx *)Keep(new CDelegateElx(rtono)); + + m_recursivelist.Push(pDelegate); + pElx = pDelegate; + } + break; + + case RCHART('('): + { + CConditionElx * pConditionElx = (CConditionElx *)Keep(new CConditionElx()); + + // condition + ElxInterface * & pCondition = pConditionElx->m_pelxask; + + if(next == CHART_INFO(RCHART('?'), 1)) + { + pCondition = BuildRecursive(flags); + } + else // named, assert or number + { + MoveNext(); // skip '(' + int pos0 = curr.pos; + + // save elx condition + pCondition = Keep(new CAssertElx(BuildAlternative(flags), 1)); + + // save name + pConditionElx->m_szNamed.Append(m_pattern.GetBuffer() + pos0, curr.pos - pos0, 1); + + // save number + CBufferT numstr; + while(pos0 < curr.pos) + { + CHART ch = m_pattern[pos0]; + numstr.Append(((ch & (CHART)0xff) == ch) ? (char)ch : 0, 1); + pos0 ++; + } + + unsigned int number; + char * str = numstr.GetBuffer(); + + // valid group number + if( ReadDec(str, number) ? ( *str == '\0') : 0 ) + { + pConditionElx->m_nnumber = number; + pCondition = 0; + } + else // maybe elx, maybe named + { + pConditionElx->m_nnumber = -1; + m_namedconditionlist.Push(pConditionElx); + } + + MoveNext(); // skip ')' + } + + // alternative + { + int newflags = flags; + + pConditionElx->m_pelxyes = BuildList(newflags); + } + + if(curr.ch == RCHART('|')) + { + MoveNext(); // skip '|' + + pConditionElx->m_pelxno = BuildAlternative(flags); + } + else + { + pConditionElx->m_pelxno = 0; + } + + pElx = pConditionElx; + } + break; + + default: + while(curr.ch != RCHART(0) && isspace(curr.ch)) MoveNext(); // skip space + + if(curr.ch >= RCHART('0') && curr.ch <= RCHART('9')) // recursive (?1) => (?R1) + { + CBufferT rto; + while(curr.ch != RCHART(0) && curr.ch != RCHART(')')) + { + rto.Append(((curr.ch & (CHART)0xff) == curr.ch) ? (char)curr.ch : 0, 1); + MoveNext(); + } + + unsigned int rtono = 0; + char * str = rto.GetBuffer(); + ReadDec(str, rtono); + + CDelegateElx * pDelegate = (CDelegateElx *)Keep(new CDelegateElx(rtono)); + + m_recursivelist.Push(pDelegate); + pElx = pDelegate; + } + else + { + // flag + int newflags = flags; + while(curr != CHART_INFO(0, 1) && curr.ch != RCHART(':') && curr.ch != RCHART(')') && curr != CHART_INFO(RCHART('('), 1)) + { + int tochange = 0; + + switch(curr.ch) + { + case RCHART('i'): + case RCHART('I'): + tochange = IGNORECASE; + break; + + case RCHART('s'): + case RCHART('S'): + tochange = SINGLELINE; + break; + + case RCHART('m'): + case RCHART('M'): + tochange = MULTILINE; + break; + + case RCHART('g'): + case RCHART('G'): + tochange = GLOBAL; + break; + + case RCHART('-'): + bNegative = 1; + break; + } + + if(bNegative) + newflags &= ~tochange; + else + newflags |= tochange; + + // move to next char + MoveNext(); + } + + if(curr.ch == RCHART(':') || curr == CHART_INFO(RCHART('('), 1)) + { + // skip ':' + if(curr.ch == RCHART(':')) MoveNext(); + + pElx = BuildAlternative(newflags); + } + else + { + // change parent flags + flags = newflags; + + pElx = GetStockElx(STOCKELX_EMPTY); + } + } + break; + } + + MoveNext(); // skip ')' + + return pElx; + } + else + { + // group and number + CListElx * pList = (CListElx *)Keep(new CListElx(flags & RIGHTTOLEFT)); + int nThisBackref = ++ m_nMaxNumber; + + // left, center, right + pList->m_elxlist.Push(Keep(new CBracketElx(nThisBackref, flags & RIGHTTOLEFT ? 1 : 0))); + pList->m_elxlist.Push(BuildAlternative(flags)); + pList->m_elxlist.Push(Keep(new CBracketElx(nThisBackref, flags & RIGHTTOLEFT ? 0 : 1))); + + // for recursive + m_grouplist.Prepare(nThisBackref); + m_grouplist[nThisBackref] = pList; + + // right + MoveNext(); // skip ')' + + return pList; + } +} + +template ElxInterface * CBuilderT :: BuildBoundary(int & flags) +{ + // char + CHART ch = curr.ch; + + // skip + MoveNext(); + + switch(ch) + { + case RCHART('^'): + return Keep(new CBoundaryElxT ((flags & MULTILINE) ? BOUNDARY_LINE_BEGIN : BOUNDARY_FILE_BEGIN)); + + case RCHART('$'): + return Keep(new CBoundaryElxT ((flags & MULTILINE) ? BOUNDARY_LINE_END : BOUNDARY_FILE_END)); + + case RCHART('b'): + return Keep(new CBoundaryElxT (BOUNDARY_WORD_EDGE)); + + case RCHART('B'): + return Keep(new CBoundaryElxT (BOUNDARY_WORD_EDGE, 0)); + + case RCHART('A'): + return Keep(new CBoundaryElxT (BOUNDARY_FILE_BEGIN)); + + case RCHART('Z'): + return Keep(new CBoundaryElxT (BOUNDARY_FILE_END_N)); + + case RCHART('z'): + return Keep(new CBoundaryElxT (BOUNDARY_FILE_END)); + + case RCHART('G'): + if(flags & GLOBAL) + return Keep(new CGlobalElx()); + else + return GetStockElx(STOCKELX_EMPTY); + + default: + return GetStockElx(STOCKELX_EMPTY); + } +} + +template ElxInterface * CBuilderT :: BuildBackref(int & flags) +{ + // skip '\\' or '\k' or '\g' + MoveNext(); + + if(curr.ch == RCHART('<') || curr.ch == RCHART('\'')) + { + CHART named_end = curr.ch == RCHART('<') ? RCHART('>') : RCHART('\''); + CBackrefElxT * pbackref = (CBackrefElxT *)Keep(new CBackrefElxT (-1, flags & RIGHTTOLEFT, flags & IGNORECASE)); + + MoveNext(); // skip '<' or '\'' + + // save name + CBufferT & name = pbackref->m_szNamed; + CBufferT num; + + while(curr.ch != RCHART(0) && curr.ch != named_end) + { + name.Append(curr.ch, 1); + num .Append(((curr.ch & (CHART)0xff) == curr.ch) ? (char)curr.ch : 0, 1); + MoveNext(); + } + MoveNext(); // skip '>' or '\'' + + // check + unsigned int number; + char * str = num.GetBuffer(); + + if( ReadDec(str, number) ? ( *str == '\0') : 0 ) + { + pbackref->m_nnumber = number; + name.Release(); + } + else + { + m_namedbackreflist.Push(pbackref); + } + + return pbackref; + } + else + { + unsigned int nbackref = 0; + + for(int i=0; i<3; i++) + { + if(curr.ch >= RCHART('0') && curr.ch <= RCHART('9')) + nbackref = nbackref * 10 + (curr.ch - RCHART('0')); + else + break; + + MoveNext(); + } + + return Keep(new CBackrefElxT (nbackref, flags & RIGHTTOLEFT, flags & IGNORECASE)); + } +} + +template int CBuilderT :: ReadDec(char * & str, unsigned int & dec) +{ + int s = 0; + while(str[s] != 0 && isspace(str[s])) s++; + + if(str[s] < '0' || str[s] > '9') return 0; + + dec = 0; + unsigned int i; + + for(i = s; i= '0' && str[i] <= '9') + dec = dec * 10 + (str[i] - '0'); + else + break; + } + + while(str[i] != 0 && isspace(str[i])) i++; + str += i; + + return 1; +} + +// +// Regexp +// +template class CRegexpT +{ +public: + CRegexpT(const CHART * pattern = 0, int flags = 0); + CRegexpT(const CHART * pattern, int length, int flags); + void Compile(const CHART * pattern, int flags = 0); + void Compile(const CHART * pattern, int length, int flags); + +public: + MatchResult MatchExact(const CHART * tstring, CContext * pContext = 0) const; + MatchResult MatchExact(const CHART * tstring, int length, CContext * pContext = 0) const; + MatchResult Match(const CHART * tstring, int start = -1, CContext * pContext = 0) const; + MatchResult Match(const CHART * tstring, int length, int start, CContext * pContext = 0) const; + MatchResult Match(CContext * pContext) const; + CContext * PrepareMatch(const CHART * tstring, int start = -1, CContext * pContext = 0) const; + CContext * PrepareMatch(const CHART * tstring, int length, int start, CContext * pContext = 0) const; + CHART * Replace(const CHART * tstring, const CHART * replaceto, int start = -1, int ntimes = -1, MatchResult * result = 0, CContext * pContext = 0) const; + CHART * Replace(const CHART * tstring, int string_length, const CHART * replaceto, int to_length, int & result_length, int start = -1, int ntimes = -1, MatchResult * result = 0, CContext * pContext = 0) const; + int GetNamedGroupNumber(const CHART * group_name) const; + +public: + static void ReleaseString (CHART * tstring ); + static void ReleaseContext(CContext * pContext); + +public: + CBuilderT m_builder; +}; + +// +// Implementation +// +template CRegexpT :: CRegexpT(const CHART * pattern, int flags) +{ + Compile(pattern, CBufferRefT(pattern).GetSize(), flags); +} + +template CRegexpT :: CRegexpT(const CHART * pattern, int length, int flags) +{ + Compile(pattern, length, flags); +} + +template inline void CRegexpT :: Compile(const CHART * pattern, int flags) +{ + Compile(pattern, CBufferRefT(pattern).GetSize(), flags); +} + +template void CRegexpT :: Compile(const CHART * pattern, int length, int flags) +{ + m_builder.Clear(); + if(pattern != 0) m_builder.Build(CBufferRefT(pattern, length), flags); +} + +template inline MatchResult CRegexpT :: MatchExact(const CHART * tstring, CContext * pContext) const +{ + return MatchExact(tstring, CBufferRefT(tstring).GetSize(), pContext); +} + +template MatchResult CRegexpT :: MatchExact(const CHART * tstring, int length, CContext * pContext) const +{ + if(m_builder.m_pTopElx == 0) + return 0; + + // info + int endpos = 0; + + CContext context; + if(pContext == 0) pContext = &context; + + pContext->m_stack.Restore(0); + pContext->m_capturestack.Restore(0); + pContext->m_captureindex.Restore(0); + + pContext->m_nParenZindex = 0; + pContext->m_nLastBeginPos = -1; + pContext->m_pMatchString = (void*)tstring; + pContext->m_pMatchStringLength = length; + pContext->m_nCursiveLimit = 100; + + if(m_builder.m_nFlags & RIGHTTOLEFT) + { + pContext->m_nBeginPos = length; + pContext->m_nCurrentPos = length; + endpos = 0; + } + else + { + pContext->m_nBeginPos = 0; + pContext->m_nCurrentPos = 0; + endpos = length; + } + + pContext->m_captureindex.Prepare(m_builder.m_nMaxNumber, -1); + pContext->m_captureindex[0] = 0; + pContext->m_capturestack.Push(0); + pContext->m_capturestack.Push(pContext->m_nCurrentPos); + pContext->m_capturestack.Push(-1); + pContext->m_capturestack.Push(-1); + + // match + if( ! m_builder.m_pTopElx->Match( pContext ) ) + return 0; + else + { + while( pContext->m_nCurrentPos != endpos ) + { + if( ! m_builder.m_pTopElx->MatchNext( pContext ) ) + return 0; + else + { + if( pContext->m_nLastBeginPos == pContext->m_nBeginPos && pContext->m_nBeginPos == pContext->m_nCurrentPos ) + return 0; + else + pContext->m_nLastBeginPos = pContext->m_nCurrentPos; + } + } + + // end pos + pContext->m_capturestack[2] = pContext->m_nCurrentPos; + + return MatchResult( pContext, m_builder.m_nMaxNumber ); + } +} + +template MatchResult CRegexpT :: Match(const CHART * tstring, int start, CContext * pContext) const +{ + return Match(tstring, CBufferRefT(tstring).GetSize(), start, pContext); +} + +template MatchResult CRegexpT :: Match(const CHART * tstring, int length, int start, CContext * pContext) const +{ + if(m_builder.m_pTopElx == 0) + return 0; + + CContext context; + if(pContext == 0) pContext = &context; + + PrepareMatch(tstring, length, start, pContext); + + return Match( pContext ); +} + +template MatchResult CRegexpT :: Match(CContext * pContext) const +{ + if(m_builder.m_pTopElx == 0) + return 0; + + int endpos, delta; + + if(m_builder.m_nFlags & RIGHTTOLEFT) + { + endpos = -1; + delta = -1; + } + else + { + endpos = pContext->m_pMatchStringLength + 1; + delta = 1; + } + + while(pContext->m_nCurrentPos != endpos) + { + pContext->m_captureindex.Restore(0); + pContext->m_stack .Restore(0); + pContext->m_capturestack.Restore(0); + + pContext->m_captureindex.Prepare(m_builder.m_nMaxNumber, -1); + pContext->m_captureindex[0] = 0; + pContext->m_capturestack.Push(0); + pContext->m_capturestack.Push(pContext->m_nCurrentPos); + pContext->m_capturestack.Push(-1); + pContext->m_capturestack.Push(-1); + + if( m_builder.m_pTopElx->Match( pContext ) ) + { + pContext->m_capturestack[2] = pContext->m_nCurrentPos; + + // zero width + if( pContext->m_capturestack[1] == pContext->m_nCurrentPos ) + { + pContext->m_nCurrentPos += delta; + } + + // save pos + pContext->m_nLastBeginPos = pContext->m_nBeginPos; + pContext->m_nBeginPos = pContext->m_nCurrentPos; + + // return + return MatchResult( pContext, m_builder.m_nMaxNumber ); + } + else + { + pContext->m_nCurrentPos += delta; + } + } + + return 0; +} + +template inline CContext * CRegexpT :: PrepareMatch(const CHART * tstring, int start, CContext * pContext) const +{ + return PrepareMatch(tstring, CBufferRefT(tstring).GetSize(), start, pContext); +} + +template CContext * CRegexpT :: PrepareMatch(const CHART * tstring, int length, int start, CContext * pContext) const +{ + if(m_builder.m_pTopElx == 0) + return 0; + + if(pContext == 0) pContext = new CContext(); + + pContext->m_nParenZindex = 0; + pContext->m_nLastBeginPos = -1; + pContext->m_pMatchString = (void*)tstring; + pContext->m_pMatchStringLength = length; + pContext->m_nCursiveLimit = 100; + + if(start < 0) + { + if(m_builder.m_nFlags & RIGHTTOLEFT) + { + pContext->m_nBeginPos = length; + pContext->m_nCurrentPos = length; + } + else + { + pContext->m_nBeginPos = 0; + pContext->m_nCurrentPos = 0; + } + } + else + { + if(start > length) start = length + ((m_builder.m_nFlags & RIGHTTOLEFT)?0:1); + + pContext->m_nBeginPos = start; + pContext->m_nCurrentPos = start; + } + + return pContext; +} + +template inline int CRegexpT :: GetNamedGroupNumber(const CHART * group_name) const +{ + return m_builder.GetNamedNumber(group_name); +} + +template CHART * CRegexpT :: Replace(const CHART * tstring, const CHART * replaceto, int start, int ntimes, MatchResult * result, CContext * pContext) const +{ + int result_length = 0; + return Replace(tstring, CBufferRefT(tstring).GetSize(), replaceto, CBufferRefT(replaceto).GetSize(), result_length, start, ntimes, result, pContext); +} + +template CHART * CRegexpT :: Replace(const CHART * tstring, int string_length, const CHART * replaceto, int to_length, int & result_length, int start, int ntimes, MatchResult * remote_result, CContext * oContext) const +{ + if(m_builder.m_pTopElx == 0) return 0; + + // --- compile replace to --- + + CBufferT compiledto; + + static const CHART rtoptn[] = { RCHART('\\'), RCHART('$' ), RCHART('('), RCHART('?'), RCHART(':'), RCHART('[' ), RCHART('$' ), RCHART('&' ), RCHART('`' ), RCHART('\''), RCHART('+'), RCHART('_' ), RCHART('\\'), RCHART('d'), RCHART(']'), RCHART('|'), RCHART('\\'), RCHART('{'), RCHART('.'), RCHART('*'), RCHART('?'), RCHART('\\'), RCHART('}'), RCHART(')' ), RCHART('\0') }; + static CRegexpT rtoreg(rtoptn); + + MatchResult local_result(0), * result = remote_result ? remote_result : & local_result; + + // prepare + CContext * pContext = rtoreg.PrepareMatch(replaceto, to_length, -1, oContext); + int lastIndex = 0, nmatch = 0; + + while( ((*result) = rtoreg.Match(pContext)).IsMatched() ) + { + int delta = result->GetStart() - lastIndex; + if( delta > 0 ) + { + compiledto.Push(lastIndex); + compiledto.Push(delta); + } + + lastIndex = result->GetStart(); + delta = 2; + + switch(replaceto[lastIndex + 1]) + { + case RCHART('$'): + compiledto.Push(lastIndex); + compiledto.Push(1); + break; + + case RCHART('&'): + case RCHART('`'): + case RCHART('\''): + case RCHART('+'): + case RCHART('_'): + compiledto.Push(-1); + compiledto.Push((int)replaceto[lastIndex + 1]); + break; + + case RCHART('{'): + delta = result->GetEnd() - result->GetStart(); + nmatch = m_builder.GetNamedNumber(CBufferRefT (replaceto + (lastIndex + 2), delta - 3)); + + if(nmatch > 0 && nmatch <= m_builder.m_nMaxNumber) + { + compiledto.Push(-2); + compiledto.Push(nmatch); + } + else + { + compiledto.Push(lastIndex); + compiledto.Push(delta); + } + break; + + default: + nmatch = 0; + for(delta=1; delta<=3; delta++) + { + CHART ch = replaceto[lastIndex + delta]; + + if(ch < RCHART('0') || ch > RCHART('9')) + break; + + nmatch = nmatch * 10 + (ch - RCHART('0')); + } + + if(nmatch > m_builder.m_nMaxNumber) + { + while(nmatch > m_builder.m_nMaxNumber) + { + nmatch /= 10; + delta --; + } + + if(nmatch == 0) + { + delta = 1; + } + } + + if(delta == 1) + { + compiledto.Push(lastIndex); + compiledto.Push(1); + } + else + { + compiledto.Push(-2); + compiledto.Push(nmatch); + } + break; + } + + lastIndex += delta; + } + + if(lastIndex < to_length) + { + compiledto.Push(lastIndex); + compiledto.Push(to_length - lastIndex); + } + + int rightleft = m_builder.m_nFlags & RIGHTTOLEFT; + + int tb = rightleft ? compiledto.GetSize() - 2 : 0; + int te = rightleft ? -2 : compiledto.GetSize(); + int ts = rightleft ? -2 : 2; + + // --- compile complete --- + + int beginpos = rightleft ? string_length : 0; + int endpos = rightleft ? 0 : string_length; + + int toIndex0 = 0; + int toIndex1 = 0; + int i, ntime; + + CBufferT buffer; + + // prepare + pContext = PrepareMatch(tstring, string_length, start, pContext); + lastIndex = beginpos; + + // Match + for(ntime = 0; ntimes < 0 || ntime < ntimes; ntime ++) + { + (*result) = Match(pContext); + + if( ! result->IsMatched() ) + break; + + // before + if( rightleft ) + { + int distance = lastIndex - result->GetEnd(); + if( distance ) + { + buffer.Push(tstring + result->GetEnd()); + buffer.Push((const CHART *)distance); + + toIndex1 -= distance; + } + lastIndex = result->GetStart(); + } + else + { + int distance = result->GetStart() - lastIndex; + if( distance ) + { + buffer.Push(tstring + lastIndex); + buffer.Push((const CHART *)distance); + + toIndex1 += distance; + } + lastIndex = result->GetEnd(); + } + + toIndex0 = toIndex1; + + // middle + for(i=tb; i!=te; i+=ts) + { + int off = compiledto[i]; + int len = compiledto[i + 1]; + + const CHART * sub = replaceto + off; + + if( off == -1 ) + { + switch(RCHART(len)) + { + case RCHART('&'): + sub = tstring + result->GetStart(); + len = result->GetEnd() - result->GetStart(); + break; + + case RCHART('`'): + sub = tstring; + len = result->GetStart(); + break; + + case RCHART('\''): + sub = tstring + result->GetEnd(); + len = string_length - result->GetEnd(); + break; + + case RCHART('+'): + for(nmatch = result->MaxGroupNumber(); nmatch >= 0; nmatch --) + { + if(result->GetGroupStart(nmatch) >= 0) break; + } + sub = tstring + result->GetGroupStart(nmatch); + len = result->GetGroupEnd(nmatch) - result->GetGroupStart(nmatch); + break; + + case RCHART('_'): + sub = tstring; + len = string_length; + break; + } + } + else if( off == -2 ) + { + sub = tstring + result->GetGroupStart(len); + len = result->GetGroupEnd(len) - result->GetGroupStart(len); + } + + buffer.Push(sub); + buffer.Push((const CHART *)len); + + toIndex1 += rightleft ? (-len) : len; + } + } + + // after + if(rightleft) + { + if(endpos < lastIndex) + { + buffer.Push(tstring + endpos); + buffer.Push((const CHART *)(lastIndex - endpos)); + } + } + else + { + if(lastIndex < endpos) + { + buffer.Push(tstring + lastIndex); + buffer.Push((const CHART *)(endpos - lastIndex)); + } + } + + if(oContext == 0) ReleaseContext(pContext); + + // join string + result_length = 0; + for(i=0; i result_string; + result_string.Prepare(result_length); + result_string.Restore(0); + + if(rightleft) + { + for(i=buffer.GetSize()-2; i>=0; i-=2) + { + result_string.Append(buffer[i], *(int*)(void*)&buffer[i+1]); + } + } + else + { + for(i=0; im_result.Append(result_length, 3); + result->m_result.Append(ntime); + + if(rightleft) + { + result->m_result.Append(result_length - toIndex1); + result->m_result.Append(result_length - toIndex0); + } + else + { + result->m_result.Append(toIndex0); + result->m_result.Append(toIndex1); + } + + return result_string.Detach(); +} + +template inline void CRegexpT :: ReleaseString(CHART * tstring) +{ + if(tstring != 0) free(tstring); +} + +template inline void CRegexpT :: ReleaseContext(CContext * pContext) +{ + if(pContext != 0) delete pContext; +} + +// +// All implementations +// +template CAlternativeElxT :: CAlternativeElxT() +{ +} + +template int CAlternativeElxT :: Match(CContext * pContext) const +{ + if(m_elxlist.GetSize() == 0) + return 1; + + // try all + for(int n = 0; n < m_elxlist.GetSize(); n++) + { + if(m_elxlist[n]->Match(pContext)) + { + pContext->m_stack.Push(n); + return 1; + } + } + + return 0; +} + +template int CAlternativeElxT :: MatchNext(CContext * pContext) const +{ + if(m_elxlist.GetSize() == 0) + return 0; + + int n = 0; + + // recall prev + pContext->m_stack.Pop(n); + + // prev + if(m_elxlist[n]->MatchNext(pContext)) + { + pContext->m_stack.Push(n); + return 1; + } + else + { + // try rest + for(n++; n < m_elxlist.GetSize(); n++) + { + if(m_elxlist[n]->Match(pContext)) + { + pContext->m_stack.Push(n); + return 1; + } + } + + return 0; + } +} + +// assertx.cpp: implementation of the CAssertElx class. +// +template CAssertElxT :: CAssertElxT(ElxInterface * pelx, int byes) +{ + m_pelx = pelx; + m_byes = byes; +} + +template int CAssertElxT :: Match(CContext * pContext) const +{ + int nbegin = pContext->m_nCurrentPos; + int nsize = pContext->m_stack.GetSize(); + int ncsize = pContext->m_capturestack.GetSize(); + int bsucc; + + // match + if( m_byes ) + bsucc = m_pelx->Match(pContext); + else + bsucc = ! m_pelx->Match(pContext); + + // status + pContext->m_stack.Restore(nsize); + pContext->m_nCurrentPos = nbegin; + + if( bsucc ) + pContext->m_stack.Push(ncsize); + else + pContext->m_capturestack.Restore(ncsize); + + return bsucc; +} + +template int CAssertElxT :: MatchNext(CContext * pContext) const +{ + int ncsize = 0; + + pContext->m_stack.Pop(ncsize); + pContext->m_capturestack.Restore(ncsize); + + return 0; +} + +// emptyelx.cpp: implementation of the CEmptyElx class. +// +template CEmptyElxT :: CEmptyElxT() +{ +} + +template int CEmptyElxT :: Match(CContext *) const +{ + return 1; +} + +template int CEmptyElxT :: MatchNext(CContext *) const +{ + return 0; +} + +// globalx.cpp: implementation of the CGlobalElx class. +// +template CGlobalElxT ::CGlobalElxT() +{ +} + +template int CGlobalElxT :: Match(CContext * pContext) const +{ + return pContext->m_nCurrentPos == pContext->m_nBeginPos; +} + +template int CGlobalElxT :: MatchNext(CContext *) const +{ + return 0; +} + +// greedelx.cpp: implementation of the CGreedyElx class. +// +template CGreedyElxT :: CGreedyElxT(ElxInterface * pelx, int nmin, int nmax) : CRepeatElxT (pelx, nmin) +{ + m_nvart = nmax - nmin; +} + +template int CGreedyElxT :: Match(CContext * pContext) const +{ + if( ! CRepeatElxT :: MatchFixed(pContext) ) + return 0; + + while( ! MatchVart(pContext) ) + { + if( ! CRepeatElxT :: MatchNextFixed(pContext) ) + return 0; + } + + return 1; +} + +template int CGreedyElxT :: MatchNext(CContext * pContext) const +{ + if( MatchNextVart(pContext) ) + return 1; + + if( ! CRepeatElxT :: MatchNextFixed(pContext) ) + return 0; + + while( ! MatchVart(pContext) ) + { + if( ! CRepeatElxT :: MatchNextFixed(pContext) ) + return 0; + } + + return 1; +} + +template int CGreedyElxT :: MatchVart(CContext * pContext) const +{ + int n = 0; + int nbegin00 = pContext->m_nCurrentPos; + int nsize = pContext->m_stack.GetSize(); + int ncsize = pContext->m_capturestack.GetSize(); + + while(n < m_nvart && CRepeatElx::MatchForward(pContext)) + { + n ++; + } + + pContext->m_stack.Push(ncsize); + pContext->m_stack.Push(nsize); + pContext->m_stack.Push(pContext->m_nCurrentPos); + pContext->m_stack.Push(1); + pContext->m_stack.Push(nbegin00); + pContext->m_stack.Push(n); + + return 1; +} + +template int CGreedyElxT :: MatchNextVart(CContext * pContext) const +{ + int n, nbegin00, nsize, ncsize; + CSortedBufferT nbegin99; + pContext->m_stack.Pop(n); + pContext->m_stack.Pop(nbegin00); + pContext->m_stack.Pop(nbegin99); + pContext->m_stack.Pop(nsize); + pContext->m_stack.Pop(ncsize); + + if(n == 0) return 0; + + int n0 = n; + + if( ! CRepeatElxT::m_pelx->MatchNext(pContext) ) + { + n --; + } + + // not to re-match + else if(pContext->m_nCurrentPos == nbegin00) + { + pContext->m_stack.Restore(nsize); + pContext->m_capturestack.Restore(ncsize); + pContext->m_nCurrentPos = nbegin00; + + return 0; + } + + // fix 2012-10-26, thanks to chenlx01@sohu.com + else + { + CContextShot shot(pContext); + + while(n < m_nvart && CRepeatElx::MatchForward(pContext)) + { + n ++; + } + + if(nbegin99.Find(pContext->m_nCurrentPos) >= 0) + { + shot.Restore(pContext); + n = n0; + } + else + { + nbegin99.Add(pContext->m_nCurrentPos); + } + } + + pContext->m_stack.Push(ncsize); + pContext->m_stack.Push(nsize); + pContext->m_stack.Push(nbegin99); + pContext->m_stack.Push(nbegin00); + pContext->m_stack.Push(n); + + return 1; +} + +// indepelx.cpp: implementation of the CIndependentElx class. +// +template CIndependentElxT :: CIndependentElxT(ElxInterface * pelx) +{ + m_pelx = pelx; +} + +template int CIndependentElxT :: Match(CContext * pContext) const +{ + int nbegin = pContext->m_nCurrentPos; + int nsize = pContext->m_stack.GetSize(); + int ncsize = pContext->m_capturestack.GetSize(); + + // match + int bsucc = m_pelx->Match(pContext); + + // status + pContext->m_stack.Restore(nsize); + + if( bsucc ) + { + pContext->m_stack.Push(nbegin); + pContext->m_stack.Push(ncsize); + } + + return bsucc; +} + +template int CIndependentElxT :: MatchNext(CContext * pContext) const +{ + int nbegin = 0, ncsize = 0; + + pContext->m_stack.Pop(ncsize); + pContext->m_stack.Pop(nbegin); + + pContext->m_capturestack.Restore(ncsize); + pContext->m_nCurrentPos = nbegin; + + return 0; +} + +// listelx.cpp: implementation of the CListElx class. +// +template CListElxT :: CListElxT(int brightleft) +{ + m_brightleft = brightleft; +} + +template int CListElxT :: Match(CContext * pContext) const +{ + if(m_elxlist.GetSize() == 0) + return 1; + + // prepare + int bol = m_brightleft ? m_elxlist.GetSize() : -1; + int stp = m_brightleft ? -1 : 1; + int eol = m_brightleft ? -1 : m_elxlist.GetSize(); + + // from first + int n = bol + stp; + + // match all + while(n != eol) + { + if(m_elxlist[n]->Match(pContext)) + { + n += stp; + } + else + { + n -= stp; + + while(n != bol && ! m_elxlist[n]->MatchNext(pContext)) + n -= stp; + + if(n != bol) + n += stp; + else + return 0; + } + } + + return 1; +} + +template int CListElxT :: MatchNext(CContext * pContext) const +{ + if(m_elxlist.GetSize() == 0) + return 0; + + // prepare + int bol = m_brightleft ? m_elxlist.GetSize() : -1; + int stp = m_brightleft ? -1 : 1; + int eol = m_brightleft ? -1 : m_elxlist.GetSize(); + + // from last + int n = eol - stp; + + while(n != bol && ! m_elxlist[n]->MatchNext(pContext)) + n -= stp; + + if(n != bol) + n += stp; + else + return 0; + + // match rest + while(n != eol) + { + if(m_elxlist[n]->Match(pContext)) + { + n += stp; + } + else + { + n -= stp; + + while(n != bol && ! m_elxlist[n]->MatchNext(pContext)) + n -= stp; + + if(n != bol) + n += stp; + else + return 0; + } + } + + return 1; +} + +// mresult.cpp: implementation of the MatchResult class. +// +template MatchResultT :: MatchResultT(CContext * pContext, int nMaxNumber) +{ + if(pContext != 0) + { + m_result.Prepare(nMaxNumber * 2 + 3, -1); + + // matched + m_result[0] = 1; + m_result[1] = nMaxNumber; + + for(int n = 0; n <= nMaxNumber; n++) + { + int index = pContext->m_captureindex[n]; + //if( index < 0 ) continue; + if( ! CBracketElxT::CheckCaptureIndex(index, pContext, n) ) continue; + + // check enclosed + int pos1 = pContext->m_capturestack[index + 1]; + int pos2 = pContext->m_capturestack[index + 2]; + + // info + m_result[n*2 + 2] = pos1 < pos2 ? pos1 : pos2; + m_result[n*2 + 3] = pos1 < pos2 ? pos2 : pos1; + } + } +} + +template inline int MatchResultT :: IsMatched() const +{ + return m_result.At(0, 0); +} + +template inline int MatchResultT :: MaxGroupNumber() const +{ + return m_result.At(1, 0); +} + +template inline int MatchResultT :: GetStart() const +{ + return m_result.At(2, -1); +} + +template inline int MatchResultT :: GetEnd() const +{ + return m_result.At(3, -1); +} + +template inline int MatchResultT :: GetGroupStart(int nGroupNumber) const +{ + return m_result.At(2 + nGroupNumber * 2, -1); +} + +template inline int MatchResultT :: GetGroupEnd(int nGroupNumber) const +{ + return m_result.At(2 + nGroupNumber * 2 + 1, -1); +} + +template MatchResultT & MatchResultT :: operator = (const MatchResultT & result) +{ + m_result.Restore(0); + if(result.m_result.GetSize() > 0) m_result.Append(result.m_result.GetBuffer(), result.m_result.GetSize()); + + return *this; +} + +// posselx.cpp: implementation of the CPossessiveElx class. +// +template CPossessiveElxT :: CPossessiveElxT(ElxInterface * pelx, int nmin, int nmax) : CGreedyElxT (pelx, nmin, nmax) +{ +} + +template int CPossessiveElxT :: Match(CContext * pContext) const +{ + int nbegin = pContext->m_nCurrentPos; + int nsize = pContext->m_stack.GetSize(); + int ncsize = pContext->m_capturestack.GetSize(); + int bsucc = 1; + + // match + if( ! CRepeatElxT :: MatchFixed(pContext) ) + { + bsucc = 0; + } + else + { + while( ! CGreedyElxT :: MatchVart(pContext) ) + { + if( ! CRepeatElxT :: MatchNextFixed(pContext) ) + { + bsucc = 0; + break; + } + } + } + + // status + pContext->m_stack.Restore(nsize); + + if( bsucc ) + { + pContext->m_stack.Push(nbegin); + pContext->m_stack.Push(ncsize); + } + + return bsucc; +} + +template int CPossessiveElxT :: MatchNext(CContext * pContext) const +{ + int nbegin = 0, ncsize = 0; + + pContext->m_stack.Pop(ncsize); + pContext->m_stack.Pop(nbegin); + + pContext->m_capturestack.Restore(ncsize); + pContext->m_nCurrentPos = nbegin; + + return 0; +} + +// reluctx.cpp: implementation of the CReluctantElx class. +// +template CReluctantElxT :: CReluctantElxT(ElxInterface * pelx, int nmin, int nmax) : CRepeatElxT (pelx, nmin) +{ + m_nvart = nmax - nmin; +} + +template int CReluctantElxT :: Match(CContext * pContext) const +{ + if( ! CRepeatElxT :: MatchFixed(pContext) ) + return 0; + + while( ! MatchVart(pContext) ) + { + if( ! CRepeatElxT :: MatchNextFixed(pContext) ) + return 0; + } + + return 1; +} + +template int CReluctantElxT :: MatchNext(CContext * pContext) const +{ + if( MatchNextVart(pContext) ) + return 1; + + if( ! CRepeatElxT :: MatchNextFixed(pContext) ) + return 0; + + while( ! MatchVart(pContext) ) + { + if( ! CRepeatElxT :: MatchNextFixed(pContext) ) + return 0; + } + + return 1; +} + +template int CReluctantElxT :: MatchVart(CContext * pContext) const +{ + pContext->m_stack.Push(0); + + return 1; +} + +template int CReluctantElxT :: MatchNextVart(CContext * pContext) const +{ + int n = 0, nbegin = pContext->m_nCurrentPos; + + pContext->m_stack.Pop(n); + + if(n < m_nvart && CRepeatElxT :: m_pelx->Match(pContext)) + { + while(pContext->m_nCurrentPos == nbegin) + { + if( ! CRepeatElxT :: m_pelx->MatchNext(pContext) ) break; + } + + if(pContext->m_nCurrentPos != nbegin) + { + n ++; + + pContext->m_stack.Push(nbegin); + pContext->m_stack.Push(n); + + return 1; + } + } + + while(n > 0) + { + pContext->m_stack.Pop(nbegin); + + while( CRepeatElxT :: m_pelx->MatchNext(pContext) ) + { + if(pContext->m_nCurrentPos != nbegin) + { + pContext->m_stack.Push(nbegin); + pContext->m_stack.Push(n); + + return 1; + } + } + + n --; + } + + return 0; +} + +// repeatx.cpp: implementation of the CRepeatElx class. +// +template CRepeatElxT :: CRepeatElxT(ElxInterface * pelx, int ntimes) +{ + m_pelx = pelx; + m_nfixed = ntimes; +} + +template int CRepeatElxT :: Match(CContext * pContext) const +{ + return MatchFixed(pContext); +} + +template int CRepeatElxT :: MatchNext(CContext * pContext) const +{ + return MatchNextFixed(pContext); +} + +template int CRepeatElxT :: MatchFixed(CContext * pContext) const +{ + if(m_nfixed == 0) + return 1; + + int n = 0; + + while(n < m_nfixed) + { + if(m_pelx->Match(pContext)) + { + n ++; + } + else + { + n --; + + while(n >= 0 && ! m_pelx->MatchNext(pContext)) + n --; + + if(n >= 0) + n ++; + else + return 0; + } + } + + return 1; +} + +template int CRepeatElxT :: MatchNextFixed(CContext * pContext) const +{ + if(m_nfixed == 0) + return 0; + + // from last + int n = m_nfixed - 1; + + while(n >= 0 && ! m_pelx->MatchNext(pContext)) + n --; + + if(n >= 0) + n ++; + else + return 0; + + // match rest + while(n < m_nfixed) + { + if(m_pelx->Match(pContext)) + { + n ++; + } + else + { + n --; + + while(n >= 0 && ! m_pelx->MatchNext(pContext)) + n --; + + if(n >= 0) + n ++; + else + return 0; + } + } + + return 1; +} + +// Regexp +typedef CRegexpT CRegexpA; +typedef CRegexpT CRegexpW; + +#if defined(_UNICODE) || defined(UNICODE) + typedef CRegexpW CRegexp; +#else + typedef CRegexpA CRegexp; +#endif + +#endif//__DEELX_REGEXP__H__ diff --git a/src/Notepad3.rc b/src/Notepad3.rc index 175ac7505..f167dba2d 100644 --- a/src/Notepad3.rc +++ b/src/Notepad3.rc @@ -1356,7 +1356,7 @@ END STRINGTABLE BEGIN IDS_BACKSLASHHELP "Backslash Transformations\n\n\\a\tAlert (BEL, Ascii 7)\n\\b\tBackspace (BS, Ascii 8)\n\\f\tFormfeed (FF, Ascii 12)\n\\n\tNewline (LF, Ascii 10)\n\\r\tCarriage return (CR, Ascii 13)\n\\t\tHorizontal Tab (HT, Ascii 9)\n\\v\tVertical Tab (VT, Ascii 11)\n\\ooo\tOctal Value\n\\u####\tHexadecimal Value\n\\xhh\tHexadecimal Value\n\\\\\tBackslash" - IDS_REGEXPHELP "RegExp Syntax (Single Lines Only)\n\n.\tAny character\n^\tStart of a line\n$\tEnd of a line\n\\<\tStart of a word\n\\>\tEnd of a word\n[...]\tA set of chars ([abc]) or a range ([a-z])\n[^...]\tChars NOT in the set or range\n\\d\tAny decimal digit\n\\D\tAny non-digit char\n\\s\tAny whitespace char\n\\S\tNot a whitespace char\n\\w\tAny ""word"" char\n\\W\tAny ""non-word"" char\n\\x\tEscape character with otherwise special meaning\n\\xHH\tChar with hex code HH\n?\tMatches preceding 0 or 1 times\n*\tMatches preceding 0 or more times\n+\tMatches preceding 1 or more times\n*? or +?\tNon greedy matching of quantifiers ""?"" and ""+""\n(\tStart of a region\n)\tEnd of a region\n\\n\tRefers to a region when replacing (n is 1-9)\n" + IDS_REGEXPHELP "RegExp Syntax (Multi Lines)\n\n.\tAny character, except line-breaks\n^\tStart of a line\n$\tEnd of a line\n\\<\tStart of a word\n\\>\tEnd of a word\n[...]\tA set of chars ([abc]) or a range ([a-z])\n[^...]\tChars NOT in the set or range\n\\d\tAny decimal digit\n\\D\tAny non-digit char\n\\s\tAny whitespace char\n\\S\tNot a whitespace char\n\\w\tAny ""word"" char\n\\W\tAny ""non-word"" char\n\\x\tEscape character with otherwise special meaning\n\\xHH\tChar with hex code HH\n?\tMatches preceding 0 or 1 times\n*\tMatches preceding 0 or more times\n+\tMatches preceding 1 or more times\n*? or +?\tNon greedy matching of quantifiers ""?"" and ""+""\n(\tStart of a region\n)\tEnd of a region\n\\n\tRefers to a region when replacing (n is 1-9)\n" IDS_WILDCARDHELP "Wildcard Search\n\n*\tMatches zero or more characters.\n?\tMatches exactly one character. " END