diff --git a/Build/Themes/Dark.ini b/Build/Themes/Dark.ini index f459ce2c6..a028168dc 100644 --- a/Build/Themes/Dark.ini +++ b/Build/Themes/Dark.ini @@ -74,14 +74,14 @@ FileNameExtensions=txt; text; tmp; log; asc; doc; scp; wtx Default=font:Lucida Console; thin; size:11; smoothing:aliased Margins and Line Numbers=font:Consolas; size:-2 Extra Line Spacing (Size)=size:-1 -FileNameExtensions=nfo; diz; \^Readme$ +FileNameExtensions=nfo; diz; Readme* [Apache Config Files] Comment=fore:#75715E String=fore:#E6DB74 Number=fore:#AE81FF Directive=fore:#003CE6 IP Address=bold; fore:#FF4000 -FileNameExtensions=conf; cfg; cnf; htaccess; prefs; iface; prop; po; te; \^Kconfig$; \^Doxyfile$ +FileNameExtensions=conf; cfg; cnf; htaccess; prefs; iface; prop; po; te; Kconfig*; Doxyfile* [Assembly Script] Comment=fore:#75715E String=fore:#E6DB74 @@ -198,7 +198,7 @@ If Def=fore:#00007F Macro Def=fore:#00007F Variable within String=fore:#CC3300; back:#EEEEEE Number=fore:#008080 -FileNameExtensions=cmake; ctest; \^cmakelists\.txt$ +FileNameExtensions=cmake; ctest; cmakelists.txt [Coffeescript] Comment=fore:#646464 String=fore:#008000 @@ -433,7 +433,7 @@ Comment=fore:#75715E Identifier=fore:#003CE6 Target=fore:#003CE6; back:#FFC000 Preprocessor=fore:#75715E -FileNameExtensions=mak; make; mk; dsp; msc; msvc; am; pro; pri; gmk; ninja; dsw; \^Makefile$; \^Kbuild$ +FileNameExtensions=mak; make; mk; dsp; msc; msvc; am; pro; pri; gmk; ninja; dsw; Makefile*; Kbuild* [Markdown] Strong=bold Emphasis=italic @@ -603,7 +603,7 @@ Module Name=fore:#A000A0 Instance Var=fore:#B00080 Class Var=fore:#8000B0 Data Section=fore:#600000; back:#FFF0D8; eolfilled -FileNameExtensions=rb; ruby; rbw; rake; rjs; rakefile; gemspec; podspec; \^Rakefile$; \^Podfile$ +FileNameExtensions=rb; ruby; rbw; rake; rjs; rakefile; gemspec; podspec; Rakefile*; Podfile* [Rust Source Code] Keyword=bold; fore:#248112 Build-In Type=fore:#A9003D @@ -627,7 +627,7 @@ Scalar=fore:#808000 Parameter Expansion=fore:#808000; back:#FFFF99 Back Ticks=fore:#FF0080 Here-Doc (Single Quoted, q)=fore:#008080 -FileNameExtensions=sh; csh; zsh; bash; tcsh; m4; in; \^mozconfig$ +FileNameExtensions=sh; csh; zsh; bash; tcsh; m4; in; mozconfig* [SQL Query] Default=bold; fore:#EC7600 Comment=fore:#66747B diff --git a/Build/Themes/Obsidian.ini b/Build/Themes/Obsidian.ini index 530f1ba30..5952fda5c 100644 --- a/Build/Themes/Obsidian.ini +++ b/Build/Themes/Obsidian.ini @@ -74,14 +74,14 @@ FileNameExtensions=txt; text; tmp; log; asc; doc; scp; wtx Default=font:Lucida Console; size:11 Margins and Line Numbers=font:Consolas; size:-2 Extra Line Spacing (Size)=size:-1 -FileNameExtensions=nfo; diz; \^Readme$ +FileNameExtensions=nfo; diz; Readme* [Apache Config Files] Comment=fore:#75715E String=fore:#E0A596 Number=fore:#AE81FF Directive=fore:#003CE6 IP Address=bold; fore:#FF4000 -FileNameExtensions=conf; cfg; cnf; htaccess; prefs; iface; prop; po; te; \^Kconfig$; \^Doxyfile$ +FileNameExtensions=conf; cfg; cnf; htaccess; prefs; iface; prop; po; te; Kconfig*; Doxyfile* [Assembly Script] Comment=fore:#75715E String=fore:#E0A596 @@ -198,7 +198,7 @@ If Def=fore:#00007F Macro Def=fore:#00007F Variable within String=fore:#CC3300; back:#EEEEEE Number=fore:#008080 -FileNameExtensions=cmake; ctest; \^cmakelists\.txt$ +FileNameExtensions=cmake; ctest; cmakelists.txt [Coffeescript] Comment=fore:#646464 String=fore:#008000 @@ -433,7 +433,7 @@ Comment=fore:#008000 Identifier=fore:#003CE6 Target=fore:#003CE6; back:#FFC000 Preprocessor=fore:#FF8000 -FileNameExtensions=mak; make; mk; dsp; msc; msvc; am; pro; pri; gmk; ninja; dsw; \^Makefile$; \^Kbuild$ +FileNameExtensions=mak; make; mk; dsp; msc; msvc; am; pro; pri; gmk; ninja; dsw; Makefile*; Kbuild* [Markdown] Strong=bold; fore:#E68A00 Emphasis=italic; fore:#E68A00 @@ -604,7 +604,7 @@ Module Name=fore:#A000A0 Instance Var=fore:#B00080 Class Var=fore:#8000B0 Data Section=fore:#600000; back:#FFF0D8; eolfilled -FileNameExtensions=rb; ruby; rbw; rake; rjs; rakefile; gemspec; podspec; \^Rakefile$; \^Podfile$ +FileNameExtensions=rb; ruby; rbw; rake; rjs; rakefile; gemspec; podspec; Rakefile*; Podfile* [Rust Source Code] Keyword=bold; fore:#248112 Build-In Type=fore:#A9003D @@ -628,7 +628,7 @@ Scalar=fore:#808000 Parameter Expansion=fore:#808000; back:#FFFF99 Back Ticks=fore:#FF0080 Here-Doc (Single Quoted, q)=fore:#008080 -FileNameExtensions=sh; csh; zsh; bash; tcsh; m4; in; \^mozconfig$ +FileNameExtensions=sh; csh; zsh; bash; tcsh; m4; in; mozconfig* [SQL Query] Default=bold; fore:#EC7600 Comment=fore:#66747B diff --git a/Build/Themes/Sombra.ini b/Build/Themes/Sombra.ini index 748c0fb68..760d3cfd3 100644 --- a/Build/Themes/Sombra.ini +++ b/Build/Themes/Sombra.ini @@ -72,14 +72,14 @@ FileNameExtensions=txt; text; tmp; log; asc; doc; scp; wtx Default=font:Lucida Console; thin; size:11; smoothing:aliased Margins and Line Numbers=font:Consolas; size:-2; fore:#00AFAA Extra Line Spacing (Size)=size:-1 -FileNameExtensions=nfo; diz; \^Readme$ +FileNameExtensions=nfo; diz; Readme* [Apache Config Files] Comment=fore:#7EA100 String=fore:#DC00DC Number=fore:#C83C00 Directive=fore:#749DFF IP Address=bold; fore:#C83C00 -FileNameExtensions=conf; cfg; cnf; htaccess; prefs; iface; prop; po; te; \^Kconfig$; \^Doxyfile$ +FileNameExtensions=conf; cfg; cnf; htaccess; prefs; iface; prop; po; te; Kconfig*; Doxyfile* [Assembly Script] Comment=fore:#00CB00 String=fore:#00CB00 @@ -197,7 +197,7 @@ If Def=fore:#C1C4FF Macro Def=fore:#C1C4FF Variable within String=fore:#EA2E00; back:#353535 Number=fore:#00AFAA -FileNameExtensions=cmake; ctest; \^cmakelists\.txt$ +FileNameExtensions=cmake; ctest; cmakelists.txt [Coffeescript] Comment=fore:#9A9A9A String=fore:#00CB00 @@ -430,7 +430,7 @@ Comment=fore:#00CB00 Identifier=fore:#749DFF Target=fore:#749DFF; back:#7A6000 Preprocessor=fore:#C85C00 -FileNameExtensions=mak; make; mk; dsp; msc; msvc; am; pro; pri; gmk; ninja; dsw; \^Makefile$; \^Kbuild$ +FileNameExtensions=mak; make; mk; dsp; msc; msvc; am; pro; pri; gmk; ninja; dsw; Makefile*; Kbuild* [Markdown] Strong=bold Emphasis=italic @@ -603,7 +603,7 @@ Instance Var=fore:#F700B4 Class Var=fore:#CE5EFF Data Section=fore:#FF9EA3; back:#4A3000; eolfilled Error=fore:#353502; back:#FF3D48; eolfilled -FileNameExtensions=rb; ruby; rbw; rake; rjs; rakefile; gemspec; podspec; \^Rakefile$; \^Podfile$ +FileNameExtensions=rb; ruby; rbw; rake; rjs; rakefile; gemspec; podspec; Rakefile*; Podfile* [Rust Source Code] Keyword=bold; fore:#36C313 Build-In Type=fore:#FF448E @@ -627,7 +627,7 @@ Scalar=fore:#8E8E00 Parameter Expansion=fore:#8E8E00; back:#393902 Back Ticks=fore:#CB0965 Here-Doc (Single Quoted, q)=fore:#00AFAA -FileNameExtensions=sh; csh; zsh; bash; tcsh; m4; in; \^mozconfig$ +FileNameExtensions=sh; csh; zsh; bash; tcsh; m4; in; mozconfig* [SQL Query] Comment=fore:#00AFAA Keyword=bold; fore:#7A81FF diff --git a/plans/schema_enhancements.md b/plans/schema_enhancements.md index e2dd4fc67..d78f9799e 100644 --- a/plans/schema_enhancements.md +++ b/plans/schema_enhancements.md @@ -133,18 +133,20 @@ future session should pick one and stick with it. **Risk.** Low. Either variant is easy to revert if feedback is negative. -#### E-04 — Tooltip documenting `\regex` extension syntax (closes I-12) +#### E-04 — Tooltip documenting wildcard extension syntax (closes I-12) -**Motivation.** `\^CMakeLists$` style regex entries in `FileNameExtensions` -are a real feature, implemented in `Style_RegExMatchLexer()` -(`src/Styles.c:2543`), but the UI never mentions it. +**Motivation.** Wildcard entries in `FileNameExtensions` (e.g. `Makefile*`, +`CMakeLists.txt`) are a real feature, implemented in +`Style_WildcardMatchLexer()` (`src/Styles.c`, replacing the prior PCRE2 path +that was removed in the wildcard refactor), but the UI never mentions it. **Change.** - Add a tooltip (`TOOLTIPS_CLASS`) to the `IDC_STYLEEDIT_ROOT` control in `Style_CustomizeSchemesDlgProc`. - Text (new localised string `IDS_MUI_EXTLIST_TOOLTIP`): "Semicolon- - separated. Prefix with `\` for a regex matched against the full file - name, e.g. `\^CMakeLists$`." + separated. Plain extensions (`py`) match by extension; entries with + `*`, `?`, or `.` (e.g. `Makefile*`, `CMakeLists.txt`) match the full + filename." - Add the string to all 26 locale files. **Risk.** Trivial. diff --git a/readme/schema/CustomSchema.md b/readme/schema/CustomSchema.md index 4e08edf74..13137fc13 100644 --- a/readme/schema/CustomSchema.md +++ b/readme/schema/CustomSchema.md @@ -165,7 +165,7 @@ When you open a file, Notepad3 runs a pipeline to pick the right schema. It stop 1. **File variables** (`vim:` / `emacs:` modelines inside the file). If the file declares `mode: python;` or `-*- mode: cpp -*-`, the declared mode name is matched against schema names (case-insensitive prefix) and then against extension lists. Disabled by `Settings2.NoFileVariables=1`. 2. **Shebang detection** for `.cgi` / `.fcgi` files or files flagged as CGI by mode. Recognised interpreters: `python`, `ruby`, `bash`/`sh`, `perl`, `tcl`, `node`/`js`, `php`. Disabled by `Settings2.NoCGIGuess=1`. -3. **Regex match on the file name.** Any entry in a schema's extension list that starts with a backslash is treated as a PCRE2 regex applied to the bare filename — e.g. `\^CMakeLists$` matches the extensionless file `CMakeLists`. See the [Extension lists](#extension-lists) subsection below for full syntax, escaping rules, and worked examples. +3. **Wildcard / filename match.** Any entry in a schema's extension list that contains `*`, `?`, or `.` is matched against the bare filename via the standard Win32 wildcard matcher — e.g. `Makefile*` matches `Makefile`, `Makefile.dev`, `cmakelists.txt` matches the literal filename. See the [Extension lists](#extension-lists) subsection below for syntax and worked examples. 4. **Plain extension match.** First lexer whose extension list contains the file's extension wins. Extension comparison is case-insensitive; separator is `;` (semicolon or space both work in practice). 5. **HTML/XML sniff** — if the first bytes start with `<`, classify as HTML or XML. Disabled by `Settings2.NoHTMLGuess=1`. 6. **Shebang fallback** for extension-less files (same recognisers as step 2). @@ -185,65 +185,90 @@ Each schema carries a hard-coded default extension list (compiled into `Notepad3 ```ini [Python Script] -FileNameExtensions=py;pyw;pyi;\^setup\.py$;\^test_.*\.py$ +FileNameExtensions=py;pyw;pyi;setup.py;test_*.py ``` -The override **replaces** the default; the two are not merged. The field accepts both plain file extensions and full-filename regex patterns in a single semicolon-separated list. +The override **replaces** the default; the two are not merged. The field accepts both plain file extensions and filename wildcard patterns in a single semicolon-separated list. -#### Syntax +#### Entry classification + +Each entry is one of two kinds — Notepad3 picks based on what characters it contains: + +| Entry contains… | Treated as… | Matched against | +|---|---|---| +| only letters / digits (no `*`, `?`, `.`) | plain extension token | the file's extension (the part after the last `.`, case-insensitive) | +| any of `*`, `?`, or `.` | filename wildcard pattern | the bare filename via Win32 `PathMatchSpecW` | + +Examples: + +| Entry | Kind | Matches | +|---|---|---| +| `py` | extension | `foo.py`, `foo.PY` | +| `*.py` | wildcard | any file ending in `.py` (same as `py`, just spelled out) | +| `Makefile*` | wildcard | `Makefile`, `Makefile.dev`, `Makefile_old` | +| `CMakeLists.txt` | wildcard (literal) | exactly `CMakeLists.txt` | +| `??.log` | wildcard | any two-character name + `.log` | +| `Makefile` | extension | files with extension `Makefile` (rare; not the literal extensionless filename) | + +#### Syntax rules | Aspect | Rule | |---|---| -| Separator | `;` (semicolon) is canonical. A single space also works as a separator and surrounding whitespace around `;` is tolerated. | -| Case | Comparison is **case-insensitive** for both plain and regex entries. | -| Plain entry | A bare extension token like `py` or `cpp`. Matched as a whole token against the file's extension; substring matches are excluded. The leading `.` of the file's extension is stripped before comparison, so `py` matches `foo.py`, `foo.PY`, and `foo.Py`. | -| Regex entry | An entry whose first character is a **backslash** `\`. Everything after the leading `\` is the regex pattern. The `\` is the marker, not part of the pattern. | -| Buffer limit | **512 characters** per schema (counting all entries combined). Longer values are silently truncated when read from the INI — no warning is shown. | - -#### Regex extension patterns - -- **Engine:** PCRE2 (the same engine the editor's Find/Replace dialog uses), so the full PCRE2 syntax is available — character classes, alternation, lookarounds, named groups, inline modifiers. -- **Match target:** the **bare filename** including extension (e.g. `setup.py`, `CMakeLists.txt`). The directory portion of the path is stripped before matching, so patterns cannot anchor on parent directories. -- **Anchors are NOT implicit.** A pattern like `\setup\.py` is treated as `setup\.py` and matches anywhere in the filename — it accepts `setup.py`, `mysetup.python`, and `setup.python.bak`. Add `^` and `$` explicitly when you mean the whole filename. -- **`.` is a metachar.** Always escape literal dots as `\.`. Otherwise `\foo.py` will also match `fooXpy`, `foo-py`, etc. -- **Multiple regex entries are allowed** in the same field, each prefixed with its own `\` and separated by `;`. -- **Plain and regex entries can be freely mixed** in the same `FileNameExtensions` value. -- **No per-pattern case-sensitivity flag** is exposed by the dialog. If you need a case-sensitive regex for one pattern, use the PCRE2 inline modifier `(?-i)` at the start of that pattern. +| Separator | `;` (semicolon) is canonical. A single space also works, and whitespace around `;` is tolerated. | +| Case | Both passes are **case-insensitive**. | +| `*` in a wildcard | Matches zero or more of any character. | +| `?` in a wildcard | Matches exactly one character. | +| `.` in a wildcard | Matches a literal dot — there is no escape character. | +| Buffer limit | **512 characters** per schema (counting all entries combined). Longer values are silently truncated when read from the INI; a debug-output warning is emitted. | #### Worked examples ```ini -; CMakeLists has no extension — only a regex can catch it +; Plain extensions (the common case) +[Python Script] +FileNameExtensions=py;pyw;pyi + +; Match the literal extensionless file CMakeLists, plus its .txt sibling [CMake] -FileNameExtensions=cmake;ctest;\^CMakeLists$;\^CMakeLists\.txt$ +FileNameExtensions=cmake;ctest;cmakelists.txt;CMakeLists* -; Dockerfile family — match Dockerfile, Dockerfile.dev, Dockerfile.prod, … +; Dockerfile family — Dockerfile, Dockerfile.dev, Dockerfile.prod, … [Docker] -FileNameExtensions=dockerfile;\^Dockerfile(\..+)?$ +FileNameExtensions=dockerfile;Dockerfile* -; Hidden shell config files starting with a dot +; Shell config files starting with a dot [Bash Script] -FileNameExtensions=sh;bash;\^\.bashrc$;\^\.bash_profile$;\^\.profile$ +FileNameExtensions=sh;bash;.bashrc;.bash_profile;.profile ; Route Python test files to a separate schema [Python Test] -FileNameExtensions=\^test_.*\.py$;\^.*_test\.py$ +FileNameExtensions=test_*.py;*_test.py -; Nginx config files — fixed filenames plus the conf.d/*.conf convention -[Nginx Config] -FileNameExtensions=conf;\^nginx\.conf$;\^mime\.types$ +; Single-character pattern: any 1-char-name file ending in .x +[Custom] +FileNameExtensions=?.x ``` -Common pitfalls illustrated above: -- `\^setup\.py$` matches `setup.py` but **not** `setup.py.bak` (because of the trailing `$`). It does match `Setup.py`, but only thanks to the global case-insensitive flag. -- `\setup\.py` (no anchors) also matches `mysetup.py.bak` and `setup.python` — almost never what you want. -- `\^foo.py$` (missing `\.`) matches `foo.py` *and* `fooXpy` — always escape the dot. +Common pitfalls: + +- `Makefile` (no `*`, `?`, `.`) is treated as a **plain extension**, so it matches files whose extension is `Makefile` (i.e. `foo.Makefile`) — **not** the literal extensionless `Makefile`. To catch the latter, use `Makefile*`. +- `Makefile*` is greedy — it also matches `Makefile_old` or `Makefilexxx`. If you need exactness, list each name explicitly (`Makefile;Makefile.dev`) or rely on the `.` form (`Makefile.*`) which still matches `Makefile.foo` but excludes `Makefilexxx`. +- `setup.py` (entry contains `.`) is a wildcard literal — it only matches the literal filename `setup.py`, not `mysetup.py`. #### Precedence -Within `Style_SetLexerFromFile()` the auto-detect pipeline tries the **regex match across all schemas first**, and only then falls back to the plain-extension match across all schemas. Within a single schema the two are independent: a regex hit and a plain-extension hit can both be present, but only the regex pass gets a chance to fire before the plain pass runs. +Within `Style_SetLexerFromFile()` the auto-detect pipeline tries the **wildcard / filename pass across all schemas first**, and only then falls back to the plain-extension pass across all schemas. Within a single schema the two passes are independent: an entry's classification (plain vs wildcard) decides which pass it participates in. -When two schemas claim the same plain extension, or both match a file with their regex entries, the schema that appears first in the internal schema array wins. There is currently no UI to reorder schemas. +When two schemas claim the same plain extension, or both match a file with their wildcards, the schema that appears first in the internal schema array wins. There is currently no UI to reorder schemas. + +#### Legacy `\regex` syntax + +Earlier versions of Notepad3 used a PCRE2 regex syntax for filename matching, marked by a leading backslash (e.g. `\^Makefile$`). This has been **removed**. Existing INIs migrate automatically on load: + +- Patterns of the form `\^$` where `` is alphanumeric (with optional `_`, `-`, and `\.` escapes) are **translated** to a wildcard equivalent. Examples: `\^Makefile$` → `Makefile*`, `\^cmakelists\.txt$` → `cmakelists.txt`, `\^\.bashrc$` → `.bashrc`. +- Anything more complex (real regex metacharacters like `.+`, `[abc]`, `?`, missing anchors, etc.) is **dropped** — translation isn't safe. + +In both cases the in-memory list is rewritten on the next save, so user INIs converge to wildcard-only over time. A debug-output line records the migration per schema. Hand-tuned entries that get dropped may need to be rewritten as wildcard patterns. #### Editing in *Customize Schemes* @@ -466,7 +491,7 @@ For developers wanting to look up behaviour described above: |---|---| | Data structures (`EDITLEXER`, `EDITSTYLE`, `KEYWORDLIST`) | `src/StyleLexers/EditLexer.h` | | Schema array, loading, saving, layering | `src/Styles.c` — `g_pLexArray[]`, `_ReadFromIniCache()`, `Style_ToIniSection()`, `Style_CanonicalSectionToIniCache()`, `Style_ExportToFile()` | -| Auto-detection pipeline | `src/Styles.c` — `Style_SetLexerFromFile()`, `Style_MatchLexer()`, `Style_RegExMatchLexer()`, `Style_SniffShebang()` | +| Auto-detection pipeline | `src/Styles.c` — `Style_SetLexerFromFile()`, `Style_MatchLexer()`, `Style_WildcardMatchLexer()`, `Style_SniffShebang()` | | Customize Schemes dialog | `src/Styles.c` — `Style_CustomizeSchemesDlg()`, `Style_CustomizeSchemesDlgProc()` | | Select Scheme dialog | `src/Styles.c` — `Style_SelectLexerDlg()`, `Style_SelectLexerDlgProc()` | | Theme menu & switching | `src/Styles.c` — `_FillThemesMenuTable()`, `Style_InsertThemesMenu()`, `Style_DynamicThemesMenuCmd()`, `Style_ImportTheme()` | diff --git a/src/StyleLexers/styleLexBASH.c b/src/StyleLexers/styleLexBASH.c index 601f95558..0d56adabc 100644 --- a/src/StyleLexers/styleLexBASH.c +++ b/src/StyleLexers/styleLexBASH.c @@ -22,7 +22,7 @@ KEYWORDLIST KeyWords_BASH = EDITLEXER lexBASH = { - SCLEX_BASH, "bash", IDS_LEX_SHELL_SCR, L"Shell Script", L"sh; csh; zsh; bash; tcsh; m4; in; \\^mozconfig$", L"", + SCLEX_BASH, "bash", IDS_LEX_SHELL_SCR, L"Shell Script", L"sh; csh; zsh; bash; tcsh; m4; in; mozconfig*", L"", &KeyWords_BASH, { { {STYLE_DEFAULT}, IDS_LEX_STR_Default, L"Default", L"", L"" }, //{ {SCE_SH_DEFAULT}, IDS_LEX_STR_Default, L"Default", L"", L"" }, diff --git a/src/StyleLexers/styleLexCMAKE.c b/src/StyleLexers/styleLexCMAKE.c index 90ccd3716..cf60fa89e 100644 --- a/src/StyleLexers/styleLexCMAKE.c +++ b/src/StyleLexers/styleLexCMAKE.c @@ -35,7 +35,7 @@ KEYWORDLIST KeyWords_CMAKE = EDITLEXER lexCmake = { - SCLEX_CMAKE, "cmake", IDS_LEX_CMAKE, L"Cmake Script", L"cmake; ctest; \\^cmakelists\\.txt$", L"", + SCLEX_CMAKE, "cmake", IDS_LEX_CMAKE, L"Cmake Script", L"cmake; ctest; cmakelists.txt", L"", &KeyWords_CMAKE, { { {STYLE_DEFAULT}, IDS_LEX_STR_Default, L"Default", L"", L"" }, //{ {SCE_CMAKE_DEFAULT}, IDS_LEX_STR_Default, L"Default", L"", L"" }, diff --git a/src/StyleLexers/styleLexCONF.c b/src/StyleLexers/styleLexCONF.c index 830b26436..c7dd589d0 100644 --- a/src/StyleLexers/styleLexCONF.c +++ b/src/StyleLexers/styleLexCONF.c @@ -127,7 +127,7 @@ KEYWORDLIST KeyWords_CONF = EDITLEXER lexCONF = { - SCLEX_CONF, "conf", IDS_LEX_APC_CFG, L"Apache Config Files", L"conf; cfg; cnf; htaccess; prefs; iface; prop; po; te; \\^Kconfig$; \\^Doxyfile$", L"", + SCLEX_CONF, "conf", IDS_LEX_APC_CFG, L"Apache Config Files", L"conf; cfg; cnf; htaccess; prefs; iface; prop; po; te; Kconfig*; Doxyfile*", L"", &KeyWords_CONF, { { {STYLE_DEFAULT}, IDS_LEX_STR_Default, L"Default", L"", L"" }, //{ {SCE_CONF_DEFAULT}, IDS_LEX_STR_Default, L"Default", L"", L"" }, diff --git a/src/StyleLexers/styleLexMAK.c b/src/StyleLexers/styleLexMAK.c index 2b73ce863..141353a58 100644 --- a/src/StyleLexers/styleLexMAK.c +++ b/src/StyleLexers/styleLexMAK.c @@ -6,7 +6,7 @@ KEYWORDLIST KeyWords_MAK = EMPTY_KEYWORDLIST; EDITLEXER lexMAK = { - SCLEX_MAKEFILE, "makefile", IDS_LEX_MAKEFILES, L"Makefiles", L"mak; make; mk; dsp; msc; msvc; am; pro; pri; gmk; ninja; dsw; \\^Makefile$; \\^Kbuild$", L"", + SCLEX_MAKEFILE, "makefile", IDS_LEX_MAKEFILES, L"Makefiles", L"mak; make; mk; dsp; msc; msvc; am; pro; pri; gmk; ninja; dsw; Makefile*; Kbuild*", L"", &KeyWords_MAK, { { {STYLE_DEFAULT}, IDS_LEX_STR_Default, L"Default", L"", L"" }, //{ {SCE_MAKE_DEFAULT}, IDS_LEX_STR_Default, L"Default", L"", L"" }, diff --git a/src/StyleLexers/styleLexRUBY.c b/src/StyleLexers/styleLexRUBY.c index 6b56ce515..eff803e17 100644 --- a/src/StyleLexers/styleLexRUBY.c +++ b/src/StyleLexers/styleLexRUBY.c @@ -11,7 +11,7 @@ KEYWORDLIST KeyWords_RUBY = EDITLEXER lexRUBY = { - SCLEX_RUBY, "ruby", IDS_LEX_RUBY, L"Ruby Script", L"rb; ruby; rbw; rake; rjs; rakefile; gemspec; podspec; \\^Rakefile$; \\^Podfile$", L"", + SCLEX_RUBY, "ruby", IDS_LEX_RUBY, L"Ruby Script", L"rb; ruby; rbw; rake; rjs; rakefile; gemspec; podspec; Rakefile*; Podfile*", L"", &KeyWords_RUBY, { { {STYLE_DEFAULT}, IDS_LEX_STR_Default, L"Default", L"", L"" }, //{ {SCE_RB_DEFAULT}, IDS_LEX_STR_Default, L"Default", L"", L"" }, diff --git a/src/StyleLexers/styleLexStandard.c b/src/StyleLexers/styleLexStandard.c index bdf33c0f3..2810702fa 100644 --- a/src/StyleLexers/styleLexStandard.c +++ b/src/StyleLexers/styleLexStandard.c @@ -80,7 +80,7 @@ EDITLEXER lexTEXT = EDITLEXER lexANSI = { - SCLEX_NULL, "null", IDS_LEX_ANSI_ART, L"ANSI Art", L"nfo; diz; \\^Readme$", L"", + SCLEX_NULL, "null", IDS_LEX_ANSI_ART, L"ANSI Art", L"nfo; diz; Readme*", L"", &KeyWords_NULL,{ { {STYLE_DEFAULT}, IDS_LEX_STR_Default, L"Default", L"font:Lucida Console; thin; size:11; smoothing:aliased", L"" }, { {STYLE_LINENUMBER}, IDS_LEX_STD_MARGIN, L"Margins and Line Numbers", L"font:Consolas; size:-2; fore:#008080", L"" }, diff --git a/src/Styles.c b/src/Styles.c index ad5f86604..2a02597e7 100644 --- a/src/Styles.c +++ b/src/Styles.c @@ -738,6 +738,158 @@ bool Style_Import(HWND hwnd) } +//============================================================================= +// +// _TryTranslateRegexEntry() +// +// Conservative migration helper: try to translate a simple legacy regex +// pattern (without its leading '\' marker) into an equivalent wildcard +// entry. Only patterns of the form '^$' are accepted, where +// contains exclusively ASCII alphanumerics, '_', '-', and optional '\.' +// escapes for literal dots — anything else (real regex metacharacters, +// missing anchors, etc.) is rejected and the caller drops the entry. +// +// Output rules: +// - Bare name (no literal dot): append '*' so the result is still +// classified as a wildcard pattern by Style_WildcardMatchLexer. E.g. +// '^Makefile$' → 'Makefile*' (matches the extensionless file plus +// suffixed variants like 'Makefile.dev'; same broadening as the +// hard-coded defaults). +// - With at least one '\.': unescape to '.' and emit verbatim. E.g. +// '^cmakelists\.txt$' → 'cmakelists.txt' (literal whole-filename +// match via PathMatchSpec). +// +// Returns true on successful translation (writes a NUL-terminated string +// to `out`); false otherwise. +// +static bool _TryTranslateRegexEntry(const WCHAR *pat, size_t patLen, LPWSTR out, size_t outCap) +{ + if (patLen < 3 || pat[0] != L'^' || pat[patLen - 1] != L'$') { + return false; + } + + const WCHAR *inner = pat + 1; + const WCHAR *innerEnd = pat + patLen - 1; // exclusive (points at '$') + + bool hasDot = false; + size_t outLen = 0; + for (const WCHAR *c = inner; c < innerEnd; ) { + WCHAR ch = *c; + if (ch == L'\\' && (c + 1) < innerEnd && c[1] == L'.') { + if (outLen + 1 >= outCap) { + return false; + } + out[outLen++] = L'.'; + hasDot = true; + c += 2; + } else if ((ch >= L'a' && ch <= L'z') || + (ch >= L'A' && ch <= L'Z') || + (ch >= L'0' && ch <= L'9') || + ch == L'_' || ch == L'-') { + if (outLen + 1 >= outCap) { + return false; + } + out[outLen++] = ch; + ++c; + } else { + return false; // potential regex metacharacter — refuse + } + } + + if (outLen == 0) { + return false; + } + if (!hasDot) { + if (outLen + 1 >= outCap) { + return false; + } + out[outLen++] = L'*'; + } + out[outLen] = L'\0'; + return true; +} + + +//============================================================================= +// +// _StripLegacyRegexEntries() +// +// Walks a ';'-separated FileNameExtensions list in-place and migrates any +// entry beginning with '\' (the legacy PCRE2 syntax that the wildcard +// refactor replaced). Simple patterns of the form '\^$' are +// translated to their wildcard equivalents via _TryTranslateRegexEntry(); +// anything more complex is dropped. Returns true if the list was changed, +// in which case the buffer has been compacted. Buffer size is +// STYLE_EXTENSIONS_BUFFER (the szExtensions slot's size). +// +static bool _StripLegacyRegexEntries(LPWSTR lpszList) +{ + if (StrIsEmpty(lpszList)) { + return false; + } + WCHAR cleaned[STYLE_EXTENSIONS_BUFFER] = { L'\0' }; + size_t cleanedLen = 0; + bool changed = false; + + const WCHAR *p = lpszList; + while (*p) { + while (*p == L';' || *p == L' ' || *p == L'\t') { + ++p; + } + if (!*p) { + break; + } + const WCHAR *e = p; + while (*e && *e != L';') { + ++e; + } + const WCHAR *eTrim = e; + while (eTrim > p && (eTrim[-1] == L' ' || eTrim[-1] == L'\t')) { + --eTrim; + } + + // Decide what (if anything) to append to the cleaned buffer. + const WCHAR *src = NULL; + size_t entryLen = 0; + WCHAR translated[STYLE_EXTENSIONS_BUFFER]; + + if (*p == L'\\') { + // Legacy regex entry. Try to translate; otherwise drop. + if (_TryTranslateRegexEntry(p + 1, (size_t)(eTrim - p - 1), + translated, COUNTOF(translated))) { + src = translated; + entryLen = StringCchLen(translated, COUNTOF(translated)); + } + changed = true; // either we translated (different shape) or dropped + } else { + src = p; + entryLen = (size_t)(eTrim - p); + } + + if (src && entryLen > 0) { + if (cleanedLen > 0 && cleanedLen + 1 < COUNTOF(cleaned)) { + cleaned[cleanedLen++] = L';'; + } + size_t copyLen = entryLen; + if (cleanedLen + copyLen >= COUNTOF(cleaned)) { + copyLen = COUNTOF(cleaned) - cleanedLen - 1; + } + if (copyLen > 0) { + CopyMemory(&cleaned[cleanedLen], src, copyLen * sizeof(WCHAR)); + cleanedLen += copyLen; + } + cleaned[cleanedLen] = L'\0'; + } + p = e; + } + + if (changed) { + StringCchCopy(lpszList, STYLE_EXTENSIONS_BUFFER, cleaned); + } + return changed; +} + + //============================================================================= // // _LoadLexerFileExtensions() @@ -771,6 +923,16 @@ static void _LoadLexerFileExtensions() StringCchCopy(g_pLexArray[iLexer]->szExtensions, COUNTOF(g_pLexArray[iLexer]->szExtensions), tmpExt); + // Auto-migrate: translate or drop any legacy '\regex' entries from + // user INIs so the next save writes the cleaned wildcard-only form. + if (_StripLegacyRegexEntries(g_pLexArray[iLexer]->szExtensions)) { + WCHAR migMsg[256]; + StringCchPrintf(migMsg, COUNTOF(migMsg), + L"Notepad3: migrated legacy '\\regex' entries in FileNameExtensions for [%s]; will be rewritten on next save.\n", + Lexer_Section); + OutputDebugStringW(migMsg); + } + // don't allow empty extensions settings => use default ext if (StrIsEmpty(g_pLexArray[iLexer]->szExtensions)) { StringCchCopy(g_pLexArray[iLexer]->szExtensions, COUNTOF(g_pLexArray[iLexer]->szExtensions), g_pLexArray[iLexer]->pszDefExt); @@ -2632,38 +2794,72 @@ PEDITLEXER Style_MatchLexer(LPCWSTR lpszMatch, bool bCheckNames) //============================================================================= // -// Style_RegExMatchLexer() +// Style_WildcardMatchLexer() // -PEDITLEXER Style_RegExMatchLexer(LPCWSTR lpszFileName) +// Match the bare filename against any wildcard or literal-filename entries +// in each schema's FileNameExtensions list. An entry qualifies for this +// pass if it contains '*', '?', or '.' — anything else is a plain-extension +// token handled by Style_MatchLexer(). +// +// Uses Win32 PathMatchSpecW (case-insensitive, supports * and ?). Entries +// beginning with '\' (legacy PCRE2 syntax) are skipped — see _LoadLexerFileExtensions +// for the matching strip-on-load that keeps user INIs clean. +// +PEDITLEXER Style_WildcardMatchLexer(LPCWSTR lpszFileName) { - if (StrIsNotEmpty(lpszFileName)) { + if (StrIsEmpty(lpszFileName)) { + return NULL; + } - char chFilePath[XHUGE_BUFFER] = { '\0' }; - WideCharToMultiByteEx(CP_UTF8, 0, lpszFileName, -1, chFilePath, COUNTOF(chFilePath), NULL, NULL); + for (int iLex = 0; iLex < COUNTOF(g_pLexArray); ++iLex) { + const WCHAR *p = g_pLexArray[iLex]->szExtensions; + while (p && *p) { + // skip leading separators and whitespace + while (*p == L';' || *p == L' ' || *p == L'\t') { + ++p; + } + if (!*p) { + break; + } + // find end of this entry + const WCHAR *e = p; + while (*e && *e != L';') { + ++e; + } + // trim trailing whitespace + const WCHAR *eTrim = e; + while (eTrim > p && (eTrim[-1] == L' ' || eTrim[-1] == L'\t')) { + --eTrim; + } - for (int iLex = 0; iLex < COUNTOF(g_pLexArray); ++iLex) { - const WCHAR *p = g_pLexArray[iLex]->szExtensions; - do { - const WCHAR* f = StrChr(p, L'\\'); - const WCHAR* e = f; - if (f) { - e = StrChr(f, L';'); - if (!e) { - e = f + StringCchLen(f, 0); - } - ++f; // exclude '\' - char regexpat[HUGE_BUFFER] = { '\0' }; - WideCharToMultiByte(CP_UTF8, 0, f, (int)(e-f), regexpat, (int)COUNTOF(regexpat), NULL, NULL); - // Strip incidental whitespace around the pattern so entries like - // "py; \^setup\.py$ ;txt" don't carry a leading/trailing space into PCRE2. - StrTrimA(regexpat, " \t"); - - if (RegExFind(regexpat, chFilePath, false, NULL) >= 0) { - return g_pLexArray[iLex]; - } - } + // Skip legacy '\regex' entries (post-cutover; ignored, not matched). + if (*p == L'\\') { p = e; - } while (p != NULL); + continue; + } + + // Classify: an entry containing *, ?, or . is a PathMatchSpec pattern. + // Bare alphanumeric tokens are plain extensions handled elsewhere. + bool isPattern = false; + for (const WCHAR *c = p; c < eTrim; ++c) { + if (*c == L'*' || *c == L'?' || *c == L'.') { + isPattern = true; + break; + } + } + if (isPattern) { + WCHAR pat[STYLE_EXTENSIONS_BUFFER]; + size_t copyLen = (size_t)(eTrim - p); + if (copyLen >= COUNTOF(pat)) { + copyLen = COUNTOF(pat) - 1; + } + CopyMemory(pat, p, copyLen * sizeof(WCHAR)); + pat[copyLen] = L'\0'; + if (PathMatchSpecW(lpszFileName, pat)) { + return g_pLexArray[iLex]; + } + } + p = e; } } return NULL; @@ -2687,7 +2883,7 @@ bool Style_HasLexerForExt(const HPATHL hpath) } } if (!bFound && Path_IsNotEmpty(hpath)) { - bFound = Style_RegExMatchLexer(Path_FindFileName(hpath)); + bFound = Style_WildcardMatchLexer(Path_FindFileName(hpath)); } return bFound; } @@ -2747,9 +2943,9 @@ bool Style_SetLexerFromFile(HWND hwnd, const HPATHL hpath) LPCWSTR lpszFileName = Path_FindFileName(hpath); - // check for filename regex match + // check for filename wildcard / literal-filename match if (!bFound && s_bAutoSelect && Path_IsNotEmpty(hpath)) { - pLexSniffed = Style_RegExMatchLexer(lpszFileName); + pLexSniffed = Style_WildcardMatchLexer(lpszFileName); if (pLexSniffed) { pLexNew = pLexSniffed; bFound = true;