mirror of
https://github.com/rizonesoft/Notepad3.git
synced 2026-06-11 21:03:05 +08:00
+chg: Oniguruma syntax flavor for \h and \H (Ruby: match hex digit) to "horizontal space" definition of Perl / PCRE
This commit is contained in:
parent
863e017a07
commit
8bcc76089d
@ -69,6 +69,28 @@ static OnigEncoding s_UsedEncodingsTypes[] = { ONIG_ENCODING_UTF8 };
|
||||
// ============================================================================
|
||||
// ============================================================================
|
||||
|
||||
// https://stackoverflow.com/questions/22937618/reference-what-does-this-regex-mean/
|
||||
|
||||
#define NP3_ONIG_SYNTAX_FLAVOR ONIG_SYNTAX_DEFAULT // default is ONIG_SYNTAX_ONIGURUMA
|
||||
|
||||
// ensure some from special syntax options are excluded/included
|
||||
|
||||
const unsigned int RemSynOptions_1[1] = { 0 };
|
||||
const unsigned int RemSynOptions_2[] = {
|
||||
ONIG_SYN_OP2_ESC_H_XDIGIT // remove to replace \h\H with [^\S\n\v\f\r\u2028\u2029]
|
||||
};
|
||||
|
||||
const unsigned int AddSynOptions_1[] = {
|
||||
ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END // \<. \>
|
||||
};
|
||||
const unsigned int AddSynOptions_2[] = {
|
||||
ONIG_SYN_OP2_ESC_U_HEX4 // \uHHHH
|
||||
};
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
// ------------------------------------
|
||||
// --- Onigmo Engine Simple Options ---
|
||||
// ------------------------------------
|
||||
@ -134,13 +156,9 @@ static void SetSimpleOptions(OnigOptionType &onigOptions, EOLmode /*eolMode*/,
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
|
||||
|
||||
#define NP3_ONIG_SYNTAX_FLAVOR (ONIG_SYNTAX_DEFAULT) // default is: ONIG_SYNTAX_ONIGURUMA
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
class OnigurumaRegExEngine : public RegexSearchBase
|
||||
{
|
||||
public:
|
||||
@ -161,7 +179,19 @@ public:
|
||||
onig_initialize(s_UsedEncodingsTypes, _ARRAYSIZE(s_UsedEncodingsTypes));
|
||||
onig_set_default_syntax(NP3_ONIG_SYNTAX_FLAVOR); // std is: ONIG_SYNTAX_ONIGURUMA
|
||||
|
||||
m_OnigSyntax.op |= ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END; // xcluded from ONIG_SYNTAX_DEFAULT ?
|
||||
for (const auto op1 : RemSynOptions_1) {
|
||||
m_OnigSyntax.op &= ~op1;
|
||||
}
|
||||
for (const auto op2 : RemSynOptions_2) {
|
||||
m_OnigSyntax.op2 &= ~op2;
|
||||
}
|
||||
|
||||
for (const auto op1 : AddSynOptions_1) {
|
||||
m_OnigSyntax.op |= op1;
|
||||
}
|
||||
for (const auto op2 : AddSynOptions_2) {
|
||||
m_OnigSyntax.op2 |= op2;
|
||||
}
|
||||
|
||||
onig_region_init(&m_Region);
|
||||
}
|
||||
@ -557,7 +587,8 @@ void OnigurumaRegExEngine::clear() {
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
|
||||
std::string OnigurumaRegExEngine::translateRegExpr(const std::string & regExprStr, bool wholeWord, bool wordStart, EndOfLine eolMode, OnigOptionType & /*rxOptions*/)
|
||||
std::string OnigurumaRegExEngine::translateRegExpr(const std::string & regExprStr, bool wholeWord, bool wordStart,
|
||||
EndOfLine eolMode, OnigOptionType & /*rxOptions*/)
|
||||
{
|
||||
UNREFERENCED_PARAMETER(eolMode);
|
||||
|
||||
@ -584,6 +615,9 @@ std::string OnigurumaRegExEngine::translateRegExpr(const std::string & regExprSt
|
||||
//~replaceAll(transRegExpr, R"(\>)", R"((?<=\w)(?!\w))"); // word end
|
||||
//~replaceAll(transRegExpr, R"(\(?<=\w)(?!\w))", R"(\\>)"); // esc'd
|
||||
|
||||
replaceAll(transRegExpr, R"(\h)", R"([^\S\n\v\f\r\u2028\u2029])"); // horizontal space
|
||||
replaceAll(transRegExpr, R"(\H)", R"([^\t\p{Zs}])"); // not horizontal space
|
||||
|
||||
#if 0
|
||||
// EOL modes is controlled by
|
||||
switch (eolMode) {
|
||||
|
||||
@ -264,13 +264,6 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
||||
static int
|
||||
is_utf8_newline(const UChar *p, const UChar *end)
|
||||
{
|
||||
#ifdef USE_CRNL_AS_LINE_TERMINATOR
|
||||
if (p + 1 < end) {
|
||||
if ((*p == CARRIAGE_RET) && (*(p+1) == NEWLINE_CODE)) // CRLF
|
||||
return 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
|
||||
if (p + 2 < end) {
|
||||
if ((*p == 0xe2) && (*(p+1) == 0x80) && ((*(p+2) == 0xa8) || (*(p+2) == 0xa9))) // LS or PS
|
||||
@ -282,6 +275,13 @@ is_utf8_newline(const UChar *p, const UChar *end)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef USE_CRNL_AS_LINE_TERMINATOR
|
||||
if (p + 1 < end) {
|
||||
if ((*p == CARRIAGE_RET) && (*(p+1) == NEWLINE_CODE)) // CRLF
|
||||
return 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (p < end) {
|
||||
#ifdef USE_END_OF_FILE_AS_LINE_TERMINATOR
|
||||
if ((*p == CARRIAGE_RET) || (*p == NEWLINE_CODE) || (*p == END_OF_FILE))
|
||||
|
||||
Loading…
Reference in New Issue
Block a user