mirror of
https://github.com/rizonesoft/Notepad3.git
synced 2026-06-14 21:09:05 +08:00
+feature: first plain functional Oniguruma RegEx search engine interface
This commit is contained in:
parent
5c5681f2f2
commit
e974d4ecde
@ -270,7 +270,6 @@
|
||||
<ClCompile Include="..\oniguruma\utf32_le.c" />
|
||||
<ClCompile Include="..\oniguruma\utf8.c" />
|
||||
<ClCompile Include="..\scionigu\OniguRegExEngine.cxx" />
|
||||
<ClCompile Include="deelx\DeelxRegexSearch.cxx" />
|
||||
<ClCompile Include="lexers\LexAHK.cxx" />
|
||||
<ClCompile Include="lexers\LexAsm.cxx" />
|
||||
<ClCompile Include="lexers\LexAU3.cxx" />
|
||||
@ -360,8 +359,6 @@
|
||||
<ClInclude Include="..\oniguruma\regint.h" />
|
||||
<ClInclude Include="..\oniguruma\regparse.h" />
|
||||
<ClInclude Include="..\oniguruma\st.h" />
|
||||
<ClInclude Include="deelx\deelx.h" />
|
||||
<ClInclude Include="deelx\deelx64.h" />
|
||||
<ClInclude Include="include\ILexer.h" />
|
||||
<ClInclude Include="include\ILoader.h" />
|
||||
<ClInclude Include="include\Platform.h" />
|
||||
@ -422,9 +419,6 @@
|
||||
<ClInclude Include="src\XPM.h" />
|
||||
<ClInclude Include="win32\PlatWin.h" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="deelx\doc\deelx_en.chm" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
|
||||
@ -16,12 +16,6 @@
|
||||
<Filter Include="win32">
|
||||
<UniqueIdentifier>{afe7e35e-cd81-406c-a770-df29d2b3fc95}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="deelx">
|
||||
<UniqueIdentifier>{67242aad-9133-44e7-9774-c36f5a9194bc}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="deelx\doc">
|
||||
<UniqueIdentifier>{4e167b73-0447-4a31-a66b-64c2d684516d}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="oniguruma">
|
||||
<UniqueIdentifier>{d5a1f6c6-a4ec-4bb2-bc45-0698a95376d3}</UniqueIdentifier>
|
||||
</Filter>
|
||||
@ -249,9 +243,6 @@
|
||||
<ClCompile Include="win32\ScintillaWin.cxx">
|
||||
<Filter>win32</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="deelx\DeelxRegexSearch.cxx">
|
||||
<Filter>deelx</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="lexers\LexJSON.cxx">
|
||||
<Filter>lexers</Filter>
|
||||
</ClCompile>
|
||||
@ -602,9 +593,6 @@
|
||||
<ClInclude Include="win32\PlatWin.h">
|
||||
<Filter>win32</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="deelx\deelx64.h">
|
||||
<Filter>deelx</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="include\Sci_Position.h">
|
||||
<Filter>include</Filter>
|
||||
</ClInclude>
|
||||
@ -623,9 +611,6 @@
|
||||
<ClInclude Include="include\ILoader.h">
|
||||
<Filter>include</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="deelx\deelx.h">
|
||||
<Filter>deelx</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\oniguruma\config.h">
|
||||
<Filter>oniguruma</Filter>
|
||||
</ClInclude>
|
||||
@ -651,9 +636,4 @@
|
||||
<Filter>oniguruma</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="deelx\doc\deelx_en.chm">
|
||||
<Filter>deelx\doc</Filter>
|
||||
</None>
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
@ -40,13 +40,17 @@ using namespace Scintilla;
|
||||
#define SciLn(line) static_cast<Sci::Line>(line)
|
||||
#define SciPosExt(pos) static_cast<Sci_Position>(pos)
|
||||
|
||||
#define DeelXPos(pos) static_cast<deelx::index_t>(pos)
|
||||
#define Cast2long(n) static_cast<long>(n)
|
||||
|
||||
// ---------------------------------------------------------------
|
||||
|
||||
const int MAX_GROUP_COUNT = 10;
|
||||
|
||||
const OnigEncoding g_pEncodingType = ONIG_ENCODING_ASCII; // ONIG_ENCODING_SJIS
|
||||
static OnigEncoding use_encs[] = { g_pEncodingType };
|
||||
|
||||
const bool gExtended = false; // ignore spaces and use '#' as line-comment)
|
||||
|
||||
// ---------------------------------------------------------------
|
||||
|
||||
class OniguRegExEngine : public RegexSearchBase
|
||||
@ -55,11 +59,23 @@ public:
|
||||
|
||||
explicit OniguRegExEngine(CharClassify* charClassTable)
|
||||
: m_RegExprStrg()
|
||||
, m_CmplOptions(ONIG_OPTION_DEFAULT)
|
||||
, m_pRegExpr(nullptr)
|
||||
, m_ErrorInfo()
|
||||
, m_MatchPos(ONIG_MISMATCH)
|
||||
, m_MatchLen(0)
|
||||
, m_SubstBuffer()
|
||||
{}
|
||||
{
|
||||
onig_initialize(use_encs, sizeof(use_encs) / sizeof(use_encs[0]));
|
||||
m_pRegion = onig_region_new();
|
||||
}
|
||||
|
||||
virtual ~OniguRegExEngine()
|
||||
{
|
||||
if (m_pRegion)
|
||||
onig_region_free(m_pRegion, 1);
|
||||
|
||||
onig_end();
|
||||
}
|
||||
|
||||
virtual long FindText(Document* doc, Sci::Position minPos, Sci::Position maxPos, const char* pattern,
|
||||
@ -70,12 +86,21 @@ public:
|
||||
|
||||
private:
|
||||
|
||||
std::string& translateRegExpr(std::string& regExprStr, bool wholeWord, bool wordStart);
|
||||
std::string& translateRegExpr(std::string& regExprStr, bool wholeWord, bool wordStart, int eolMode);
|
||||
std::string& convertReplExpr(std::string& replStr);
|
||||
|
||||
private:
|
||||
|
||||
std::string m_RegExprStrg;
|
||||
|
||||
OnigOptionType m_CmplOptions;
|
||||
regex_t* m_pRegExpr;
|
||||
OnigRegion* m_pRegion;
|
||||
char m_ErrorInfo[ONIG_MAX_ERROR_MESSAGE_LEN];
|
||||
|
||||
Sci::Position m_MatchPos;
|
||||
Sci::Position m_MatchLen;
|
||||
|
||||
std::string m_SubstBuffer;
|
||||
};
|
||||
// ============================================================================
|
||||
@ -97,9 +122,82 @@ RegexSearchBase *Scintilla::CreateRegexSearch(CharClassify *charClassTable)
|
||||
long OniguRegExEngine::FindText(Document* doc, Sci::Position minPos, Sci::Position maxPos, const char *pattern,
|
||||
bool caseSensitive, bool word, bool wordStart, int searchFlags, Sci::Position *length)
|
||||
{
|
||||
|
||||
// Range endpoints should not be inside DBCS characters, but just in case, move them.
|
||||
minPos = doc->MovePositionOutsideChar(minPos, 1, false);
|
||||
maxPos = doc->MovePositionOutsideChar(maxPos, 1, false);
|
||||
const bool findprevious = (minPos > maxPos);
|
||||
|
||||
|
||||
OnigOptionType cmplOptions = ONIG_OPTION_DEFAULT;
|
||||
cmplOptions |= (ONIG_OPTION_MULTILINE | ONIG_OPTION_CAPTURE_GROUP); // the .(dot) does not match line-breaks
|
||||
cmplOptions |= (gExtended) ? ONIG_OPTION_EXTEND : ONIG_OPTION_NONE;
|
||||
cmplOptions |= (caseSensitive) ? ONIG_OPTION_NONE : ONIG_OPTION_IGNORECASE;
|
||||
|
||||
std::string sRegExprStrg = translateRegExpr(std::string(pattern), word, wordStart, doc->eolMode);
|
||||
|
||||
bool bReCompile = (m_CmplOptions != cmplOptions) || (m_RegExprStrg.compare(sRegExprStrg) != 0);
|
||||
|
||||
if (bReCompile) {
|
||||
m_RegExprStrg.clear();
|
||||
m_RegExprStrg = sRegExprStrg;
|
||||
m_CmplOptions = cmplOptions;
|
||||
m_ErrorInfo[0] = '\0';
|
||||
try {
|
||||
OnigErrorInfo einfo;
|
||||
int result = onig_new(&m_pRegExpr, (UChar*)m_RegExprStrg.c_str(), (UChar*)(m_RegExprStrg.c_str() + m_RegExprStrg.length()),
|
||||
m_CmplOptions, g_pEncodingType, ONIG_SYNTAX_DEFAULT, &einfo);
|
||||
if (result != 0) {
|
||||
onig_error_code_to_str((UChar*)m_ErrorInfo, result, &einfo);
|
||||
return Cast2long(-2);
|
||||
}
|
||||
}
|
||||
catch (...) {
|
||||
return Cast2long(-2); // -1 is normally used for not found, -2 is used here for invalid regex
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
UChar* docBegPtr = (UChar*)doc->RangePointer(0, SciPos(doc->Length()));
|
||||
UChar* docSEndPtr = (UChar*)doc->RangePointer(SciPos(doc->Length()),0);
|
||||
Sci::Position rangeLength = abs(maxPos - minPos);
|
||||
UChar* rangeBegPtr = (UChar*)doc->RangePointer((findprevious) ? maxPos : minPos, rangeLength);
|
||||
UChar* rangeEndPtr = (UChar*)doc->RangePointer((findprevious) ? minPos : maxPos, rangeLength);
|
||||
|
||||
m_MatchPos = SciPos(ONIG_MISMATCH); // not found
|
||||
m_MatchLen = SciPos(0);
|
||||
|
||||
int result = onig_search(m_pRegExpr, docBegPtr, docSEndPtr, rangeBegPtr, rangeEndPtr, m_pRegion, ONIG_OPTION_NONE);
|
||||
|
||||
if (result < ONIG_MISMATCH) {
|
||||
onig_error_code_to_str((UChar*)m_ErrorInfo, result);
|
||||
return Cast2long(-2);
|
||||
}
|
||||
|
||||
if (findprevious) // search previous
|
||||
{
|
||||
// search for last occurrence in range
|
||||
//SPEEDUP: onig_scan() ???
|
||||
while ((result >= 0) && (rangeBegPtr <= rangeEndPtr))
|
||||
{
|
||||
m_MatchPos = SciPos(m_pRegion->beg[0]);
|
||||
m_MatchLen = SciPos(m_pRegion->end[0] - m_pRegion->beg[0]);
|
||||
|
||||
rangeBegPtr = docBegPtr + (m_MatchPos + m_MatchLen);
|
||||
|
||||
result = onig_search(m_pRegExpr, docBegPtr, docSEndPtr, rangeBegPtr, rangeEndPtr, m_pRegion, ONIG_OPTION_NONE);
|
||||
}
|
||||
}
|
||||
else {
|
||||
if ((result >= 0) && (rangeBegPtr <= rangeEndPtr)) {
|
||||
m_MatchPos = SciPos(m_pRegion->beg[0]);
|
||||
m_MatchLen = SciPos(m_pRegion->end[0] - m_pRegion->beg[0]);
|
||||
}
|
||||
}
|
||||
|
||||
//NOTE: potential 64-bit-size issue at interface here:
|
||||
*length = SciPos(0);
|
||||
return static_cast<long>(0);
|
||||
*length = m_MatchLen;
|
||||
return static_cast<long>(m_MatchPos);
|
||||
}
|
||||
// ============================================================================
|
||||
|
||||
@ -175,7 +273,7 @@ static void replaceAll(std::string& source,const std::string& from,const std::st
|
||||
|
||||
|
||||
|
||||
std::string& OniguRegExEngine::translateRegExpr(std::string& regExprStr,bool wholeWord,bool wordStart)
|
||||
std::string& OniguRegExEngine::translateRegExpr(std::string& regExprStr, bool wholeWord, bool wordStart, int eolMode)
|
||||
{
|
||||
std::string tmpStr;
|
||||
|
||||
@ -187,12 +285,30 @@ std::string& OniguRegExEngine::translateRegExpr(std::string& regExprStr,bool who
|
||||
tmpStr.push_back('\\');
|
||||
tmpStr.push_back('b');
|
||||
}
|
||||
replaceAll(tmpStr,".",R"(\w)");
|
||||
replaceAll(tmpStr, ".", R"(\w)");
|
||||
}
|
||||
else {
|
||||
tmpStr.append(regExprStr);
|
||||
}
|
||||
std::swap(regExprStr,tmpStr);
|
||||
|
||||
switch (eolMode) {
|
||||
case SC_EOL_LF:
|
||||
// we are fine here
|
||||
break;
|
||||
|
||||
case SC_EOL_CR:
|
||||
//TODO: don't know what to do here ...
|
||||
break;
|
||||
|
||||
case SC_EOL_CRLF:
|
||||
{
|
||||
//replaceAll(tmpStr, "$", R"(\r$)");
|
||||
//replaceAll(tmpStr, R"(\\r$)", R"(\$)");
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
std::swap(regExprStr, tmpStr);
|
||||
return regExprStr;
|
||||
}
|
||||
// ----------------------------------------------------------------------------
|
||||
@ -274,7 +390,7 @@ std::string& OniguRegExEngine::convertReplExpr(std::string& replStr)
|
||||
}
|
||||
if (val[0]) {
|
||||
val[1] = 0;
|
||||
WideCharToMultiByte(CP_UTF8, 0, val, -1, buf, ARRAYSIZE(val), NULL, NULL);
|
||||
WideCharToMultiByte(CP_UTF8, 0, val, -1, buf, ARRAYSIZE(val), nullptr, nullptr);
|
||||
tmpStr.push_back(*pch++);
|
||||
while (*pch)
|
||||
tmpStr.push_back(*pch++);
|
||||
|
||||
Loading…
Reference in New Issue
Block a user