fix: PCRE2 interface - bug matching line-end ($)

This commit is contained in:
Rainer Kottenhoff 2026-03-06 16:15:03 +01:00
parent 00fef58b1b
commit 0f718805d4
5 changed files with 152 additions and 39 deletions

View File

@ -63,7 +63,7 @@ using namespace Scintilla::Internal;
// *** PCRE2 configuration ***
// ============================================================================
enum class EOLmode : int { UDEF = -1, CRLF = SC_EOL_CRLF, CR = SC_EOL_CR, LF = SC_EOL_LF };
// ============================================================================
// ============================================================================
@ -76,14 +76,17 @@ public:
: m_CompileOptions(PCRE2_UTF | PCRE2_UCP | PCRE2_MULTILINE)
, m_CompiledPattern(nullptr)
, m_MatchData(nullptr)
, m_CompileContext(nullptr)
, m_MatchContext(nullptr)
, m_EOLmode(EOLmode::UDEF)
, m_RangeBeg(-1)
, m_RangeEnd(-1)
, m_ErrorInfo()
, m_MatchPos(-1)
, m_MatchLen(0)
{
m_CompileContext = pcre2_compile_context_create(nullptr);
pcre2_set_newline(m_CompileContext, PCRE2_NEWLINE_ANYCRLF);
m_MatchContext = pcre2_match_context_create(nullptr);
// Set match limits to prevent catastrophic backtracking
pcre2_set_match_limit(m_MatchContext, 10000000);
@ -97,6 +100,10 @@ public:
pcre2_match_context_free(m_MatchContext);
m_MatchContext = nullptr;
}
if (m_CompileContext) {
pcre2_compile_context_free(m_CompileContext);
m_CompileContext = nullptr;
}
}
Sci::Position FindText(Document* doc, Sci::Position minPos, Sci::Position maxPos, const char* pattern,
@ -108,7 +115,7 @@ private:
void clear();
std::string translateRegExpr(const std::string & regExprStr, bool wholeWord, bool wordStart, EndOfLine eolMode);
std::string translateRegExpr(const std::string & regExprStr, bool wholeWord, bool wordStart);
std::string convertReplExpr(const std::string & replStr);
@ -116,19 +123,19 @@ private:
std::string m_RegExprStrg;
uint32_t m_CompileOptions;
pcre2_code* m_CompiledPattern;
pcre2_match_data* m_MatchData;
pcre2_match_context* m_MatchContext;
EOLmode m_EOLmode;
uint32_t m_CompileOptions;
pcre2_code* m_CompiledPattern;
pcre2_match_data* m_MatchData;
pcre2_compile_context* m_CompileContext;
pcre2_match_context* m_MatchContext;
Sci::Position m_RangeBeg;
Sci::Position m_RangeEnd;
Sci::Position m_RangeBeg;
Sci::Position m_RangeEnd;
char m_ErrorInfo[256];
char m_ErrorInfo[256];
Sci::Position m_MatchPos;
Sci::Position m_MatchLen;
Sci::Position m_MatchPos;
Sci::Position m_MatchLen;
public:
std::string m_SubstBuffer;
@ -252,10 +259,9 @@ Sci::Position PCRE2RegExEngine::FindText(Document* doc, Sci::Position minPos, Sc
Sci::Position const rangeEnd = (findForward) ? maxPos : minPos;
//Sci::Position const rangeLen = (rangeEnd - rangeBeg);
EOLmode const eolMode = static_cast<EOLmode>(doc->eolMode);
// --- Build compile options ---
// PCRE2_MULTILINE: ^/$ match at line boundaries (Oniguruma's default behavior)
// PCRE2_MULTILINE: ^/$ match at line boundaries
// Newline convention (ANYCRLF) is set on m_CompileContext, not here
uint32_t compileOptions = PCRE2_UTF | PCRE2_UCP | PCRE2_MULTILINE;
if (!caseSensitive) {
@ -265,10 +271,10 @@ Sci::Position PCRE2RegExEngine::FindText(Document* doc, Sci::Position minPos, Sc
compileOptions |= PCRE2_DOTALL; // Note: Oniguruma called this MULTILINE
}
std::string const sRegExprStrg = translateRegExpr(pattern, word, wordStart, doc->eolMode);
std::string const sRegExprStrg = translateRegExpr(pattern, word, wordStart);
bool const bReCompile = (m_CompiledPattern == nullptr) || (m_CompileOptions != compileOptions)
|| (m_RegExprStrg.compare(sRegExprStrg) != 0) || (m_EOLmode != eolMode);
|| (m_RegExprStrg.compare(sRegExprStrg) != 0);
if (bReCompile) {
clear();
@ -276,7 +282,6 @@ Sci::Position PCRE2RegExEngine::FindText(Document* doc, Sci::Position minPos, Sc
m_CompileOptions = compileOptions;
m_RangeBeg = rangeBeg;
m_RangeEnd = rangeEnd;
m_EOLmode = eolMode;
m_ErrorInfo[0] = '\0';
try {
@ -288,7 +293,7 @@ Sci::Position PCRE2RegExEngine::FindText(Document* doc, Sci::Position minPos, Sc
m_CompileOptions,
&errorcode,
&erroroffset,
nullptr // default compile context
m_CompileContext // newline convention set to ANYCRLF
);
if (!m_CompiledPattern) {
@ -555,11 +560,8 @@ const char* PCRE2RegExEngine::SubstituteByPosition(Document* doc, const char* te
//
// private methods
std::string PCRE2RegExEngine::translateRegExpr(const std::string & regExprStr, bool wholeWord, bool wordStart,
EndOfLine eolMode)
std::string PCRE2RegExEngine::translateRegExpr(const std::string & regExprStr, bool wholeWord, bool wordStart)
{
UNREFERENCED_PARAMETER(eolMode);
std::string transRegExpr;
if (wholeWord || wordStart) { // push '\b' at the begin of regexpr
@ -755,8 +757,11 @@ public:
: m_CompileOptions(0)
, m_CompiledPattern(nullptr)
, m_MatchData(nullptr)
, m_CompileContext(nullptr)
, m_ErrorInfo()
{
m_CompileContext = pcre2_compile_context_create(nullptr);
pcre2_set_newline(m_CompileContext, PCRE2_NEWLINE_ANYCRLF);
}
~SimplePCRE2Engine() noexcept
@ -767,6 +772,9 @@ public:
if (m_CompiledPattern) {
pcre2_code_free(m_CompiledPattern);
}
if (m_CompileContext) {
pcre2_compile_context_free(m_CompileContext);
}
}
// non-copyable
@ -781,6 +789,7 @@ private:
uint32_t m_CompileOptions;
pcre2_code* m_CompiledPattern;
pcre2_match_data* m_MatchData;
pcre2_compile_context* m_CompileContext;
char m_ErrorInfo[256];
@ -832,7 +841,7 @@ ptrdiff_t SimplePCRE2Engine::Find(const char* pattern, const char* document, con
auto const patternLen = strlen(pattern);
auto const stringLen = strlen(document);
// Build compile options
// Build compile options (newline convention set on m_CompileContext)
uint32_t compileOptions = PCRE2_UTF | PCRE2_UCP | PCRE2_MULTILINE;
if (!caseSensitive) {
compileOptions |= PCRE2_CASELESS;
@ -868,7 +877,7 @@ ptrdiff_t SimplePCRE2Engine::Find(const char* pattern, const char* document, con
m_CompileOptions,
&errorcode,
&erroroffset,
nullptr
m_CompileContext
);
if (!m_CompiledPattern) {
@ -930,9 +939,7 @@ extern "C"
#ifdef SCINTILLA_DLL
__declspec(dllexport)
#endif
ptrdiff_t WINAPI RegExFind(const char *pchPattern, const char *pchText, const bool caseSensitive, const int eolMode, int *matchLen_out) {
UNREFERENCED_PARAMETER(eolMode);
ptrdiff_t WINAPI RegExFind(const char *pchPattern, const char *pchText, const bool caseSensitive, int *matchLen_out) {
// Static cached engine: pattern is compiled once, reused across calls.
// Only recompiles when pattern or options change.

View File

@ -2157,7 +2157,7 @@ void EditURLDecode(const bool isPathConvert)
// can URL be found by Hyperlink pattern matching ?
int matchLen = 0;
ptrdiff_t const pos = RegExFind(s_pUrlRegExA, pszUnescaped, false, SciCall_GetEOLMode(), &matchLen);
ptrdiff_t const pos = RegExFind(s_pUrlRegExA, pszUnescaped, false, &matchLen);
bool const bIsValidConversion = isPathConvert ? ((pos >= 0) && (cchUnescapedDec == matchLen)) : true;
if (bIsValidConversion) {
@ -3117,6 +3117,7 @@ void EditCutLines(HWND hwnd, const bool bMSBehavSelEmpty)
bool const bIsLineEmpty = Sci_GetNetLineLength(Sci_GetCurrentLineNumber()) == 0;
UndoTransActionBegin();
if (SciCall_IsSelectionEmpty() && bMSBehavSelEmpty) {
//? SciCall_CutAllowLine(); - does it the same as CopyAllowLine() + LineDelete()?
SciCall_CopyAllowLine(); // (!) VisualStudio behavior
// On Windows, an extra "MSDEVLineSelect" marker is added to the clipboard
// which is then used in SCI_PASTE to paste the whole line before the current line.

View File

@ -104,8 +104,8 @@ LRESULT WINAPI Scintilla_DirectStatusFunction(HANDLE, UINT, WPARAM, LPARAM, LPIN
//=============================================================================
// PCRE2 RegEx search (exported from PCRE2RegExEngine.cxx)
ptrdiff_t WINAPI RegExFind(const char* pchPattern, const char* pchText,
const bool caseSensitive, const int eolMode, int *matchLen_out);
ptrdiff_t WINAPI RegExFind(const char* pchPattern, const char* pchText,
const bool caseSensitive, int *matchLen_out);
//=============================================================================

View File

@ -227,6 +227,36 @@ typedef enum {
FW_IDX_ULTRADARK
} FW_IDX;
// Font Stretch
typedef struct _fntstrtch {
LPCWSTR const wname;
int const stretch;
} FONTSTRETCH_T;
static const FONTSTRETCH_T FontStretches[9] = {
{ L"ultracondensed", SC_STRETCH_ULTRA_CONDENSED }, // 0
{ L"extracondensed", SC_STRETCH_EXTRA_CONDENSED }, // 1
{ L"condensed", SC_STRETCH_CONDENSED }, // 2
{ L"semicondensed", SC_STRETCH_SEMI_CONDENSED }, // 3
{ L"normal", SC_STRETCH_NORMAL }, // 4 (default)
{ L"semiexpanded", SC_STRETCH_SEMI_EXPANDED }, // 5
{ L"expanded", SC_STRETCH_EXPANDED }, // 6
{ L"extraexpanded", SC_STRETCH_EXTRA_EXPANDED }, // 7
{ L"ultraexpanded", SC_STRETCH_ULTRA_EXPANDED }, // 8
};
typedef enum {
FS_IDX_ULTRACONDENSED = 0,
FS_IDX_EXTRACONDENSED,
FS_IDX_CONDENSED,
FS_IDX_SEMICONDENSED,
FS_IDX_NORMAL,
FS_IDX_SEMIEXPANDED,
FS_IDX_EXPANDED,
FS_IDX_EXTRAEXPANDED,
FS_IDX_ULTRAEXPANDED,
} FONTSTRETCH_IDX;
//// font quality
//#define Style_StrHasAttrNone(lpszStyle) Style_StrHasAttribute((lpszStyle), L"none")
//#define Style_StrHasAttrStdType(lpszStyle) Style_StrHasAttribute((lpszStyle), L"standard")
@ -2514,7 +2544,7 @@ PEDITLEXER Style_RegExMatchLexer(LPCWSTR lpszFileName)
char regexpat[HUGE_BUFFER] = { '\0' };
WideCharToMultiByte(CP_UTF8, 0, f, (int)(e-f), regexpat, (int)COUNTOF(regexpat), NULL, NULL);
if (RegExFind(regexpat, chFilePath, false, SciCall_GetEOLMode(), NULL) >= 0) {
if (RegExFind(regexpat, chFilePath, false, NULL) >= 0) {
return g_pLexArray[iLex];
}
}
@ -3378,6 +3408,50 @@ void Style_AppendWeightAttribute(LPWSTR lpszWeight, int cchSize, int fontWeight)
}
//=============================================================================
//
// Style_StrGetStretchValue()
//
bool Style_StrGetStretchValue(LPCWSTR lpszStyle, int* stretch)
{
int fontStretch = SC_STRETCH_NORMAL;
bool bFound = false;
for (int i = FS_IDX_ULTRACONDENSED; i <= FS_IDX_ULTRAEXPANDED; ++i) {
if (Style_StrHasAttribute(lpszStyle, FontStretches[i].wname)) {
fontStretch = FontStretches[i].stretch;
bFound = true;
break;
}
}
if (bFound) {
*stretch = fontStretch;
}
return bFound;
}
//=============================================================================
//
// Style_AppendStretchAttribute()
//
void Style_AppendStretchAttribute(LPWSTR lpszStyle, int cchSize, int fontStretch)
{
if (fontStretch == SC_STRETCH_NORMAL) {
return; // normal is default, no need to append
}
const WCHAR *pFontStretch = NULL;
for (int i = FS_IDX_ULTRACONDENSED; i <= FS_IDX_ULTRAEXPANDED; ++i) {
if (fontStretch == FontStretches[i].stretch) {
pFontStretch = FontStretches[i].wname;
break;
}
}
if (pFontStretch) {
AppendStyle(lpszStyle, cchSize, pFontStretch);
}
}
//=============================================================================
//
// Style_StrGetColor()
@ -3599,6 +3673,26 @@ void Style_CopyStyles_IfNotDefined(LPCWSTR lpszStyleSrc, LPWSTR lpszStyleDest, i
AppendStyle(szTmpStyle, COUNTOF(szTmpStyle), pFontWeight);
}
// Font Stretch
const WCHAR *pFontStretch = NULL;
for (int idx = FS_IDX_ULTRACONDENSED; idx <= FS_IDX_ULTRAEXPANDED; ++idx) {
if (Style_StrHasAttribute(lpszStyleDest, FontStretches[idx].wname)) {
pFontStretch = FontStretches[idx].wname;
break;
}
}
if (!bIsFontDefInDestination && !pFontStretch) {
for (int idx = FS_IDX_ULTRACONDENSED; idx <= FS_IDX_ULTRAEXPANDED; ++idx) {
if (Style_StrHasAttribute(lpszStyleSrc, FontStretches[idx].wname)) {
pFontStretch = FontStretches[idx].wname;
break;
}
}
}
if (pFontStretch) {
AppendStyle(szTmpStyle, COUNTOF(szTmpStyle), pFontStretch);
}
if (Style_StrHasAttribute(lpszStyleDest, FontEffects[FE_ITALIC])) {
AppendStyle(szTmpStyle, COUNTOF(szTmpStyle), FontEffects[FE_ITALIC]);
} else if (!bIsFontDefInDestination && Style_StrHasAttribute(lpszStyleSrc, FontEffects[FE_ITALIC])) {
@ -3818,7 +3912,8 @@ bool Style_SelectFont(HWND hwnd, LPWSTR lpszStyle, int cchStyle, LPCWSTR sLexerN
int const iFontHeight = PointSizeToFontHeight(fFontSize, hdc);
ReleaseDC(hwnd, hdc);
int const iFontStretch = 0; // with calculated automatically
int iFontStretch = SC_STRETCH_NORMAL;
Style_StrGetStretchValue(lpszStyle, &iFontStretch);
bool const bIsUnderline = Style_StrHasAttribute(lpszStyle, FontEffects[FE_UNDERLINE]);
bool const bIsStrikeout = Style_StrHasAttribute(lpszStyle, FontEffects[FE_STRIKEOUT]);
@ -3835,7 +3930,7 @@ bool Style_SelectFont(HWND hwnd, LPWSTR lpszStyle, int cchStyle, LPCWSTR sLexerN
LOGFONT lf = { 0 };
lf.lfCharSet = (BYTE)iCharSet;
lf.lfHeight = iFontHeight;
lf.lfWidth = iFontStretch;
lf.lfWidth = 0; // let system calculate character width
lf.lfWeight = iFontWeight;
lf.lfItalic = (BYTE)(BOOL)bIsItalic;
lf.lfUnderline = (BYTE)(BOOL)bIsUnderline;
@ -3942,6 +4037,11 @@ bool Style_SelectFont(HWND hwnd, LPWSTR lpszStyle, int cchStyle, LPCWSTR sLexerN
Style_AppendWeightAttribute(szNewStyle, COUNTOF(szNewStyle), lf.lfWeight);
}
// persist stretch (ChooseFont dialog doesn't modify it, so round-trip the original)
if (iFontStretch != SC_STRETCH_NORMAL) {
Style_AppendStretchAttribute(szNewStyle, COUNTOF(szNewStyle), iFontStretch);
}
if (lf.lfItalic) {
AppendStyle(szNewStyle, COUNTOF(szNewStyle), FontEffects[FE_ITALIC]);
}
@ -4109,6 +4209,13 @@ void Style_SetStyles(HWND hwnd, const int iStyle, LPCWSTR lpszStyle, const float
SciCall_StyleSetWeight(iStyle, SC_WEIGHT_NORMAL);
}
// Font Stretch
if (Style_StrGetStretchValue(lpszStyle, &iValue)) {
SciCall_StyleSetStretch(iStyle, iValue);
} else if (bIsDefaultStyle) {
SciCall_StyleSetStretch(iStyle, SC_STRETCH_NORMAL);
}
// Italic
SciCall_StyleSetItalic(iStyle, Style_StrHasAttribute(lpszStyle, FontEffects[FE_ITALIC]));

View File

@ -129,10 +129,8 @@ inline void Style_PrintfCchColor(LPWSTR buffer, const size_t cch, LPCWSTR prefix
}
}
#if 0
bool Style_StrGetStretchValue(LPCWSTR lpszWeight, int* stretch);
void Style_AppendStretchStr(LPWSTR lpszWeight, int cchSize, int fontStretch);
#endif
bool Style_StrGetStretchValue(LPCWSTR lpszStyle, int* stretch);
void Style_AppendStretchAttribute(LPWSTR lpszStyle, int cchSize, int fontStretch);
#endif //_NP3_STYLES_H_