diff --git a/plans/schema_enhancements.md b/plans/schema_enhancements.md index 5a107f1e7..e2dd4fc67 100644 --- a/plans/schema_enhancements.md +++ b/plans/schema_enhancements.md @@ -27,7 +27,7 @@ low-to-moderate effort; several are trivial single-call-site fixes. |------|------------------| | `src/Styles.c` | `g_pLexArray[]`, load/save, mini-DSL parsers, Customize / Select / Themes dialogs, Import / Export | | `src/StyleLexers/EditLexer.h` | `EDITLEXER`, `EDITSTYLE`, `KEYWORDLIST`, `BUFSIZE_STYLE_VALUE = 256` | -| `src/TypeDefs.h` | `STYLE_EXTENTIONS_BUFFER = 512`, `EXTENTIONS_FILTER_BUFFER = 1024` | +| `src/TypeDefs.h` | `STYLE_EXTENSIONS_BUFFER = 512`, `EXTENSIONS_FILTER_BUFFER = 1024` | | `src/StyleLexers/*.c` | 60+ per-language lexer definitions | | `src/Config/Config.cpp` | Settings → theme name persistence (`Settings.CurrentThemeName`) | | `language/common_res.h` | Menu IDs: `IDM_VIEW_SCHEME=41001`, `IDM_VIEW_USE2NDDEFAULT=41002`, `IDM_VIEW_SCHEMECONFIG=41003`, `IDM_THEMES_FACTORY_RESET=37001`, `IDM_THEMES_STD_CFG=37002` | diff --git a/readme/schema/CustomSchema.md b/readme/schema/CustomSchema.md index fb8547c73..4e08edf74 100644 --- a/readme/schema/CustomSchema.md +++ b/readme/schema/CustomSchema.md @@ -78,7 +78,7 @@ Every `EDITSTYLE::szValue` is a **semicolon-delimited list of `attribute:value` font:Consolas; size:11; fore:#D7BA7D; back:#1E1E1E; bold ``` -Buffer limit: **255 characters** per style string. Longer strings are silently truncated — see [§11 Limitations](#11-known-limitations--quirks). +Buffer limit: **255 characters** per style string (`BUFSIZE_STYLE_VALUE` in `src/StyleLexers/EditLexer.h`). Longer strings are silently truncated on load. ### 3.1 Attributes @@ -165,7 +165,7 @@ When you open a file, Notepad3 runs a pipeline to pick the right schema. It stop 1. **File variables** (`vim:` / `emacs:` modelines inside the file). If the file declares `mode: python;` or `-*- mode: cpp -*-`, the declared mode name is matched against schema names (case-insensitive prefix) and then against extension lists. Disabled by `Settings2.NoFileVariables=1`. 2. **Shebang detection** for `.cgi` / `.fcgi` files or files flagged as CGI by mode. Recognised interpreters: `python`, `ruby`, `bash`/`sh`, `perl`, `tcl`, `node`/`js`, `php`. Disabled by `Settings2.NoCGIGuess=1`. -3. **Regex match on the file name.** Any entry in a schema's extension list that starts with a backslash is treated as a regex — e.g. `\^CMakeLists$` matches the file name `CMakeLists.txt` once combined with the bare `txt` entry below. +3. **Regex match on the file name.** Any entry in a schema's extension list that starts with a backslash is treated as a PCRE2 regex applied to the bare filename — e.g. `\^CMakeLists$` matches the extensionless file `CMakeLists`. See the [Extension lists](#extension-lists) subsection below for full syntax, escaping rules, and worked examples. 4. **Plain extension match.** First lexer whose extension list contains the file's extension wins. Extension comparison is case-insensitive; separator is `;` (semicolon or space both work in practice). 5. **HTML/XML sniff** — if the first bytes start with `<`, classify as HTML or XML. Disabled by `Settings2.NoHTMLGuess=1`. 6. **Shebang fallback** for extension-less files (same recognisers as step 2). @@ -181,16 +181,79 @@ When you open a file, Notepad3 runs a pipeline to pick the right schema. It stop ### Extension lists -Each schema has a hard-coded default extension list (compiled in), and a user-editable override stored in the schema's INI section as: +Each schema carries a hard-coded default extension list (compiled into `Notepad3.exe`) plus an optional user override stored in the schema's INI section: ```ini [Python Script] -FileNameExtensions=py;pyw;pyi;\^setup\.py$ +FileNameExtensions=py;pyw;pyi;\^setup\.py$;\^test_.*\.py$ ``` -The override replaces the default completely (not merged). Clearing the field in *Customize Schemes* restores the default. Buffer limit: **~512 characters per schema**. +The override **replaces** the default; the two are not merged. The field accepts both plain file extensions and full-filename regex patterns in a single semicolon-separated list. -> If two schemas both claim the same extension, the one that appears first in the internal schema array wins. There is currently no UI to reorder schemas. +#### Syntax + +| Aspect | Rule | +|---|---| +| Separator | `;` (semicolon) is canonical. A single space also works as a separator and surrounding whitespace around `;` is tolerated. | +| Case | Comparison is **case-insensitive** for both plain and regex entries. | +| Plain entry | A bare extension token like `py` or `cpp`. Matched as a whole token against the file's extension; substring matches are excluded. The leading `.` of the file's extension is stripped before comparison, so `py` matches `foo.py`, `foo.PY`, and `foo.Py`. | +| Regex entry | An entry whose first character is a **backslash** `\`. Everything after the leading `\` is the regex pattern. The `\` is the marker, not part of the pattern. | +| Buffer limit | **512 characters** per schema (counting all entries combined). Longer values are silently truncated when read from the INI — no warning is shown. | + +#### Regex extension patterns + +- **Engine:** PCRE2 (the same engine the editor's Find/Replace dialog uses), so the full PCRE2 syntax is available — character classes, alternation, lookarounds, named groups, inline modifiers. +- **Match target:** the **bare filename** including extension (e.g. `setup.py`, `CMakeLists.txt`). The directory portion of the path is stripped before matching, so patterns cannot anchor on parent directories. +- **Anchors are NOT implicit.** A pattern like `\setup\.py` is treated as `setup\.py` and matches anywhere in the filename — it accepts `setup.py`, `mysetup.python`, and `setup.python.bak`. Add `^` and `$` explicitly when you mean the whole filename. +- **`.` is a metachar.** Always escape literal dots as `\.`. Otherwise `\foo.py` will also match `fooXpy`, `foo-py`, etc. +- **Multiple regex entries are allowed** in the same field, each prefixed with its own `\` and separated by `;`. +- **Plain and regex entries can be freely mixed** in the same `FileNameExtensions` value. +- **No per-pattern case-sensitivity flag** is exposed by the dialog. If you need a case-sensitive regex for one pattern, use the PCRE2 inline modifier `(?-i)` at the start of that pattern. + +#### Worked examples + +```ini +; CMakeLists has no extension — only a regex can catch it +[CMake] +FileNameExtensions=cmake;ctest;\^CMakeLists$;\^CMakeLists\.txt$ + +; Dockerfile family — match Dockerfile, Dockerfile.dev, Dockerfile.prod, … +[Docker] +FileNameExtensions=dockerfile;\^Dockerfile(\..+)?$ + +; Hidden shell config files starting with a dot +[Bash Script] +FileNameExtensions=sh;bash;\^\.bashrc$;\^\.bash_profile$;\^\.profile$ + +; Route Python test files to a separate schema +[Python Test] +FileNameExtensions=\^test_.*\.py$;\^.*_test\.py$ + +; Nginx config files — fixed filenames plus the conf.d/*.conf convention +[Nginx Config] +FileNameExtensions=conf;\^nginx\.conf$;\^mime\.types$ +``` + +Common pitfalls illustrated above: +- `\^setup\.py$` matches `setup.py` but **not** `setup.py.bak` (because of the trailing `$`). It does match `Setup.py`, but only thanks to the global case-insensitive flag. +- `\setup\.py` (no anchors) also matches `mysetup.py.bak` and `setup.python` — almost never what you want. +- `\^foo.py$` (missing `\.`) matches `foo.py` *and* `fooXpy` — always escape the dot. + +#### Precedence + +Within `Style_SetLexerFromFile()` the auto-detect pipeline tries the **regex match across all schemas first**, and only then falls back to the plain-extension match across all schemas. Within a single schema the two are independent: a regex hit and a plain-extension hit can both be present, but only the regex pass gets a chance to fire before the plain pass runs. + +When two schemas claim the same plain extension, or both match a file with their regex entries, the schema that appears first in the internal schema array wins. There is currently no UI to reorder schemas. + +#### Editing in *Customize Schemes* + +Select a schema node in the left-hand tree and edit the *FileNameExtensions* field. On save: + +- **Empty field** (or whitespace only) → the compiled-in default is restored. +- **Field equals the default** → the INI key is removed entirely so `Notepad3.ini` stays clean. +- **Field differs from default** → the full value is written back to the schema's section. + +Persistence still requires *Save Settings On Exit* or *Settings → Save Settings Now* (**F7**); see [§2](#2-layered-override-model). --- diff --git a/src/Dialogs.c b/src/Dialogs.c index 4e2ea81be..19a81571c 100644 --- a/src/Dialogs.c +++ b/src/Dialogs.c @@ -1483,7 +1483,7 @@ CASE_WM_CTLCOLOR_SET: LPWSTR const args2_buf = StrgWriteAccessBuf(hargs2_str, StrgGetAllocLength(hargs_str)); HSTRINGW hflt_str = StrgCreate(NULL); - LPWSTR const flt_buf = StrgWriteAccessBuf(hflt_str, EXTENTIONS_FILTER_BUFFER); + LPWSTR const flt_buf = StrgWriteAccessBuf(hflt_str, EXTENSIONS_FILTER_BUFFER); GetDlgItemText(hwnd, IDC_COMMANDLINE, args_buf, (int)StrgGetAllocLength(hargs_str)); StrgSanitize(hargs_str); @@ -7335,7 +7335,7 @@ bool OpenFileDlg(HWND hwnd, HPATHL hfile_pth_io, const HPATHL hinidir_pth) Path_Empty(hfile_pth_io, false); } WCHAR szDefExt[64] = { L'\0' }; - WCHAR szFilter[EXTENTIONS_FILTER_BUFFER]; + WCHAR szFilter[EXTENSIONS_FILTER_BUFFER]; Style_GetFileFilterStr(szFilter, COUNTOF(szFilter), szDefExt, COUNTOF(szDefExt), false); HPATHL hpth_dir = Path_Copy(hinidir_pth); @@ -7369,7 +7369,7 @@ bool SaveFileDlg(HWND hwnd, HPATHL hfile_pth_io, const HPATHL hinidir_pth) } WCHAR szDefExt[64] = { L'\0' }; - WCHAR szFilter[EXTENTIONS_FILTER_BUFFER]; + WCHAR szFilter[EXTENSIONS_FILTER_BUFFER]; Style_GetFileFilterStr(szFilter, COUNTOF(szFilter), szDefExt, COUNTOF(szDefExt), true); HPATHL hpth_dir = Path_Copy(hinidir_pth); diff --git a/src/StyleLexers/EditLexer.h b/src/StyleLexers/EditLexer.h index 21489d471..91cc3704d 100644 --- a/src/StyleLexers/EditLexer.h +++ b/src/StyleLexers/EditLexer.h @@ -40,7 +40,7 @@ typedef struct _editlexer int resID; // language resource LPCWSTR pszName; // config/settings section LPCWSTR pszDefExt; // default file name ext (4 reset) - WCHAR szExtensions[STYLE_EXTENTIONS_BUFFER]; + WCHAR szExtensions[STYLE_EXTENSIONS_BUFFER]; PKEYWORDLIST pKeyWords; EDITSTYLE Styles[]; // must be last diff --git a/src/Styles.c b/src/Styles.c index e515b3f0a..ad5f86604 100644 --- a/src/Styles.c +++ b/src/Styles.c @@ -716,7 +716,7 @@ bool Style_Import(HWND hwnd) HPATHL hfile_pth = Path_Allocate(NULL); HSTRINGW hflt_str = StrgCreate(NULL); - wchar_t* const flt_buf = StrgWriteAccessBuf(hflt_str, EXTENTIONS_FILTER_BUFFER); + wchar_t* const flt_buf = StrgWriteAccessBuf(hflt_str, EXTENSIONS_FILTER_BUFFER); GetLngString(IDS_MUI_FILTER_INI, flt_buf, (int)StrgGetAllocLength(hflt_str)); StrgSanitize(hflt_str); @@ -754,8 +754,22 @@ static void _LoadLexerFileExtensions() Lexer_Section = (iLexer == 0) ? L"Default Text" : L"2nd Default Text"; } - IniSectionGetString(Lexer_Section, L"FileNameExtensions", g_pLexArray[iLexer]->pszDefExt, - g_pLexArray[iLexer]->szExtensions, COUNTOF(g_pLexArray[iLexer]->szExtensions)); + // Read into an oversized buffer so we can detect when the INI value + // would exceed the per-schema STYLE_EXTENSIONS_BUFFER limit and warn, + // instead of silently dropping the tail. + WCHAR tmpExt[STYLE_EXTENSIONS_BUFFER * 2] = { L'\0' }; + size_t const cchRead = IniSectionGetString(Lexer_Section, L"FileNameExtensions", + g_pLexArray[iLexer]->pszDefExt, tmpExt, COUNTOF(tmpExt)); + + if (cchRead >= (size_t)STYLE_EXTENSIONS_BUFFER) { + WCHAR warnMsg[256]; + StringCchPrintf(warnMsg, COUNTOF(warnMsg), + L"Notepad3: FileNameExtensions for [%s] is %zu chars, exceeds %d-char limit; truncating.\n", + Lexer_Section, cchRead, STYLE_EXTENSIONS_BUFFER - 1); + OutputDebugStringW(warnMsg); + } + StringCchCopy(g_pLexArray[iLexer]->szExtensions, + COUNTOF(g_pLexArray[iLexer]->szExtensions), tmpExt); // don't allow empty extensions settings => use default ext if (StrIsEmpty(g_pLexArray[iLexer]->szExtensions)) { @@ -1001,7 +1015,7 @@ bool Style_Export(HWND hwnd) HPATHL hfile_pth = Path_Allocate(NULL); HSTRINGW hflt_str = StrgCreate(NULL); - wchar_t* const flt_buf = StrgWriteAccessBuf(hflt_str, EXTENTIONS_FILTER_BUFFER); + wchar_t* const flt_buf = StrgWriteAccessBuf(hflt_str, EXTENSIONS_FILTER_BUFFER); GetLngString(IDS_MUI_FILTER_INI, flt_buf, (int)StrgGetAllocLength(hflt_str)); StrgSanitize(hflt_str); @@ -2640,6 +2654,9 @@ PEDITLEXER Style_RegExMatchLexer(LPCWSTR lpszFileName) ++f; // exclude '\' char regexpat[HUGE_BUFFER] = { '\0' }; WideCharToMultiByte(CP_UTF8, 0, f, (int)(e-f), regexpat, (int)COUNTOF(regexpat), NULL, NULL); + // Strip incidental whitespace around the pattern so entries like + // "py; \^setup\.py$ ;txt" don't carry a leading/trailing space into PCRE2. + StrTrimA(regexpat, " \t"); if (RegExFind(regexpat, chFilePath, false, NULL) >= 0) { return g_pLexArray[iLex]; @@ -3050,7 +3067,7 @@ bool Style_GetFileFilterStr(LPWSTR lpszFilter, int cchFilter, LPWSTR lpszDefExt, WCHAR filterAll[80] = { L'\0' }; GetLngString(IDS_MUI_FILTER_ALL, filterAll, COUNTOF(filterAll)); - WCHAR filterDef[EXTENTIONS_FILTER_BUFFER] = { L'\0' }; + WCHAR filterDef[EXTENSIONS_FILTER_BUFFER] = { L'\0' }; WCHAR ext[64] = { L'\0' }; WCHAR append[80] = { L'\0' }; bool bCurExtIncl = false; @@ -4457,7 +4474,7 @@ void Style_GetStyleDisplayName(PEDITSTYLE pStyle, LPWSTR lpszName, int cchName) // int Style_GetLexerIconId(PEDITLEXER plex) { - WCHAR pszFile[STYLE_EXTENTIONS_BUFFER << 1]; + WCHAR pszFile[STYLE_EXTENSIONS_BUFFER << 1]; LPCWSTR pszExtensions = StrIsNotEmpty(plex->szExtensions) ? plex->szExtensions : plex->pszDefExt; StringCchCopy(pszFile, COUNTOF(pszFile), L"*."); @@ -4564,10 +4581,10 @@ static bool _ApplyDialogItemText(HWND hwnd, PEDITLEXER pDlgLexer, PEDITSTYLE pD bool bChgNfy = false; bool bForce = false; - WCHAR szBuf[max(BUFSIZE_STYLE_VALUE, STYLE_EXTENTIONS_BUFFER)] = { L'\0' }; + WCHAR szBuf[max(BUFSIZE_STYLE_VALUE, STYLE_EXTENSIONS_BUFFER)] = { L'\0' }; GetDlgItemText(hwnd, IDC_STYLEEDIT, szBuf, COUNTOF(szBuf)); // normalize - WCHAR szBufNorm[max(BUFSIZE_STYLE_VALUE, STYLE_EXTENTIONS_BUFFER)] = { L'\0' }; + WCHAR szBufNorm[max(BUFSIZE_STYLE_VALUE, STYLE_EXTENSIONS_BUFFER)] = { L'\0' }; Style_CopyStyles_IfNotDefined(szBuf, szBufNorm, COUNTOF(szBufNorm)); if (StringCchCompareXI(szBufNorm, pDlgStyle->szValue) != 0) { @@ -4652,7 +4669,7 @@ INT_PTR CALLBACK Style_CustomizeSchemesDlgProc(HWND hwnd, UINT umsg, WPARAM wPar static bool bWarnedNoIniFile = false; static int iDMHighliteContrast = 75; - static WCHAR tchTmpBuffer[max(BUFSIZE_STYLE_VALUE, STYLE_EXTENTIONS_BUFFER)] = {L'\0'}; + static WCHAR tchTmpBuffer[max(BUFSIZE_STYLE_VALUE, STYLE_EXTENSIONS_BUFFER)] = {L'\0'}; static UT_array *pStylesBackup = NULL; switch (umsg) { @@ -4753,7 +4770,7 @@ INT_PTR CALLBACK Style_CustomizeSchemesDlgProc(HWND hwnd, UINT umsg, WPARAM wPar pCurrentStyle = &(pCurrentLexer->Styles[STY_DEFAULT]); iCurStyleIdx = STY_DEFAULT; - SendDlgItemMessage(hwnd, IDC_STYLEEDIT, EM_LIMITTEXT, max(BUFSIZE_STYLE_VALUE, STYLE_EXTENTIONS_BUFFER) - 1, 0); + SendDlgItemMessage(hwnd, IDC_STYLEEDIT, EM_LIMITTEXT, max(BUFSIZE_STYLE_VALUE, STYLE_EXTENSIONS_BUFFER) - 1, 0); MakeBitmapButton(hwnd, IDC_PREVSTYLE, IDB_PREV, -1, -1); MakeBitmapButton(hwnd, IDC_NEXTSTYLE, IDB_NEXT, -1, -1); @@ -5098,7 +5115,7 @@ CASE_WM_CTLCOLOR_SET: //ImageList_EndDrag(); HTREEITEM htiTarget = TreeView_GetDropHilight(hwndTV); if (htiTarget) { - WCHAR tchCopy[max(BUFSIZE_STYLE_VALUE, STYLE_EXTENTIONS_BUFFER)] = {L'\0'}; + WCHAR tchCopy[max(BUFSIZE_STYLE_VALUE, STYLE_EXTENSIONS_BUFFER)] = {L'\0'}; TreeView_SelectDropTarget(hwndTV, NULL); GetDlgItemText(hwnd, IDC_STYLEEDIT, tchCopy, COUNTOF(tchCopy)); TreeView_Select(hwndTV, htiTarget, TVGN_CARET); @@ -5216,7 +5233,7 @@ CASE_WM_CTLCOLOR_SET: case IDC_STYLEEDIT: { if (HIWORD(wParam) == EN_CHANGE) { - WCHAR tch[max(BUFSIZE_STYLE_VALUE, STYLE_EXTENTIONS_BUFFER)] = {L'\0'}; + WCHAR tch[max(BUFSIZE_STYLE_VALUE, STYLE_EXTENSIONS_BUFFER)] = {L'\0'}; GetDlgItemText(hwnd, IDC_STYLEEDIT, tch, COUNTOF(tch)); diff --git a/src/TypeDefs.h b/src/TypeDefs.h index eb9e8babd..feff313ca 100644 --- a/src/TypeDefs.h +++ b/src/TypeDefs.h @@ -218,8 +218,8 @@ typedef enum BUFFER_SIZES { EDGELINE_NUM_LIMIT = 256, ANSI_CHAR_BUFFER = 258, - STYLE_EXTENTIONS_BUFFER = 512, - EXTENTIONS_FILTER_BUFFER = (STYLE_EXTENTIONS_BUFFER << 1), + STYLE_EXTENSIONS_BUFFER = 512, + EXTENSIONS_FILTER_BUFFER = (STYLE_EXTENSIONS_BUFFER << 1), FNDRPL_BUFFER = 4096, // TODO: eliminate limit LONG_LINES_MARKER_LIMIT = 8192, CMDLN_LENGTH_LIMIT = 8192