Merge pull request #5793 from RaiKoHoff/dev_master

breaking change: drop regex file extension pattern in favor of usual wildcard
This commit is contained in:
Rainer Kottenhoff 2026-05-03 11:18:31 +02:00 committed by GitHub
commit 24bce89737
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 319 additions and 96 deletions

View File

@ -74,14 +74,14 @@ FileNameExtensions=txt; text; tmp; log; asc; doc; scp; wtx
Default=font:Lucida Console; thin; size:11; smoothing:aliased
Margins and Line Numbers=font:Consolas; size:-2
Extra Line Spacing (Size)=size:-1
FileNameExtensions=nfo; diz; \^Readme$
FileNameExtensions=nfo; diz; Readme*
[Apache Config Files]
Comment=fore:#75715E
String=fore:#E6DB74
Number=fore:#AE81FF
Directive=fore:#003CE6
IP Address=bold; fore:#FF4000
FileNameExtensions=conf; cfg; cnf; htaccess; prefs; iface; prop; po; te; \^Kconfig$; \^Doxyfile$
FileNameExtensions=conf; cfg; cnf; htaccess; prefs; iface; prop; po; te; Kconfig*; Doxyfile*
[Assembly Script]
Comment=fore:#75715E
String=fore:#E6DB74
@ -198,7 +198,7 @@ If Def=fore:#00007F
Macro Def=fore:#00007F
Variable within String=fore:#CC3300; back:#EEEEEE
Number=fore:#008080
FileNameExtensions=cmake; ctest; \^cmakelists\.txt$
FileNameExtensions=cmake; ctest; cmakelists.txt
[Coffeescript]
Comment=fore:#646464
String=fore:#008000
@ -433,7 +433,7 @@ Comment=fore:#75715E
Identifier=fore:#003CE6
Target=fore:#003CE6; back:#FFC000
Preprocessor=fore:#75715E
FileNameExtensions=mak; make; mk; dsp; msc; msvc; am; pro; pri; gmk; ninja; dsw; \^Makefile$; \^Kbuild$
FileNameExtensions=mak; make; mk; dsp; msc; msvc; am; pro; pri; gmk; ninja; dsw; Makefile*; Kbuild*
[Markdown]
Strong=bold
Emphasis=italic
@ -603,7 +603,7 @@ Module Name=fore:#A000A0
Instance Var=fore:#B00080
Class Var=fore:#8000B0
Data Section=fore:#600000; back:#FFF0D8; eolfilled
FileNameExtensions=rb; ruby; rbw; rake; rjs; rakefile; gemspec; podspec; \^Rakefile$; \^Podfile$
FileNameExtensions=rb; ruby; rbw; rake; rjs; rakefile; gemspec; podspec; Rakefile*; Podfile*
[Rust Source Code]
Keyword=bold; fore:#248112
Build-In Type=fore:#A9003D
@ -627,7 +627,7 @@ Scalar=fore:#808000
Parameter Expansion=fore:#808000; back:#FFFF99
Back Ticks=fore:#FF0080
Here-Doc (Single Quoted, q)=fore:#008080
FileNameExtensions=sh; csh; zsh; bash; tcsh; m4; in; \^mozconfig$
FileNameExtensions=sh; csh; zsh; bash; tcsh; m4; in; mozconfig*
[SQL Query]
Default=bold; fore:#EC7600
Comment=fore:#66747B

View File

@ -74,14 +74,14 @@ FileNameExtensions=txt; text; tmp; log; asc; doc; scp; wtx
Default=font:Lucida Console; size:11
Margins and Line Numbers=font:Consolas; size:-2
Extra Line Spacing (Size)=size:-1
FileNameExtensions=nfo; diz; \^Readme$
FileNameExtensions=nfo; diz; Readme*
[Apache Config Files]
Comment=fore:#75715E
String=fore:#E0A596
Number=fore:#AE81FF
Directive=fore:#003CE6
IP Address=bold; fore:#FF4000
FileNameExtensions=conf; cfg; cnf; htaccess; prefs; iface; prop; po; te; \^Kconfig$; \^Doxyfile$
FileNameExtensions=conf; cfg; cnf; htaccess; prefs; iface; prop; po; te; Kconfig*; Doxyfile*
[Assembly Script]
Comment=fore:#75715E
String=fore:#E0A596
@ -198,7 +198,7 @@ If Def=fore:#00007F
Macro Def=fore:#00007F
Variable within String=fore:#CC3300; back:#EEEEEE
Number=fore:#008080
FileNameExtensions=cmake; ctest; \^cmakelists\.txt$
FileNameExtensions=cmake; ctest; cmakelists.txt
[Coffeescript]
Comment=fore:#646464
String=fore:#008000
@ -433,7 +433,7 @@ Comment=fore:#008000
Identifier=fore:#003CE6
Target=fore:#003CE6; back:#FFC000
Preprocessor=fore:#FF8000
FileNameExtensions=mak; make; mk; dsp; msc; msvc; am; pro; pri; gmk; ninja; dsw; \^Makefile$; \^Kbuild$
FileNameExtensions=mak; make; mk; dsp; msc; msvc; am; pro; pri; gmk; ninja; dsw; Makefile*; Kbuild*
[Markdown]
Strong=bold; fore:#E68A00
Emphasis=italic; fore:#E68A00
@ -604,7 +604,7 @@ Module Name=fore:#A000A0
Instance Var=fore:#B00080
Class Var=fore:#8000B0
Data Section=fore:#600000; back:#FFF0D8; eolfilled
FileNameExtensions=rb; ruby; rbw; rake; rjs; rakefile; gemspec; podspec; \^Rakefile$; \^Podfile$
FileNameExtensions=rb; ruby; rbw; rake; rjs; rakefile; gemspec; podspec; Rakefile*; Podfile*
[Rust Source Code]
Keyword=bold; fore:#248112
Build-In Type=fore:#A9003D
@ -628,7 +628,7 @@ Scalar=fore:#808000
Parameter Expansion=fore:#808000; back:#FFFF99
Back Ticks=fore:#FF0080
Here-Doc (Single Quoted, q)=fore:#008080
FileNameExtensions=sh; csh; zsh; bash; tcsh; m4; in; \^mozconfig$
FileNameExtensions=sh; csh; zsh; bash; tcsh; m4; in; mozconfig*
[SQL Query]
Default=bold; fore:#EC7600
Comment=fore:#66747B

View File

@ -72,14 +72,14 @@ FileNameExtensions=txt; text; tmp; log; asc; doc; scp; wtx
Default=font:Lucida Console; thin; size:11; smoothing:aliased
Margins and Line Numbers=font:Consolas; size:-2; fore:#00AFAA
Extra Line Spacing (Size)=size:-1
FileNameExtensions=nfo; diz; \^Readme$
FileNameExtensions=nfo; diz; Readme*
[Apache Config Files]
Comment=fore:#7EA100
String=fore:#DC00DC
Number=fore:#C83C00
Directive=fore:#749DFF
IP Address=bold; fore:#C83C00
FileNameExtensions=conf; cfg; cnf; htaccess; prefs; iface; prop; po; te; \^Kconfig$; \^Doxyfile$
FileNameExtensions=conf; cfg; cnf; htaccess; prefs; iface; prop; po; te; Kconfig*; Doxyfile*
[Assembly Script]
Comment=fore:#00CB00
String=fore:#00CB00
@ -197,7 +197,7 @@ If Def=fore:#C1C4FF
Macro Def=fore:#C1C4FF
Variable within String=fore:#EA2E00; back:#353535
Number=fore:#00AFAA
FileNameExtensions=cmake; ctest; \^cmakelists\.txt$
FileNameExtensions=cmake; ctest; cmakelists.txt
[Coffeescript]
Comment=fore:#9A9A9A
String=fore:#00CB00
@ -430,7 +430,7 @@ Comment=fore:#00CB00
Identifier=fore:#749DFF
Target=fore:#749DFF; back:#7A6000
Preprocessor=fore:#C85C00
FileNameExtensions=mak; make; mk; dsp; msc; msvc; am; pro; pri; gmk; ninja; dsw; \^Makefile$; \^Kbuild$
FileNameExtensions=mak; make; mk; dsp; msc; msvc; am; pro; pri; gmk; ninja; dsw; Makefile*; Kbuild*
[Markdown]
Strong=bold
Emphasis=italic
@ -603,7 +603,7 @@ Instance Var=fore:#F700B4
Class Var=fore:#CE5EFF
Data Section=fore:#FF9EA3; back:#4A3000; eolfilled
Error=fore:#353502; back:#FF3D48; eolfilled
FileNameExtensions=rb; ruby; rbw; rake; rjs; rakefile; gemspec; podspec; \^Rakefile$; \^Podfile$
FileNameExtensions=rb; ruby; rbw; rake; rjs; rakefile; gemspec; podspec; Rakefile*; Podfile*
[Rust Source Code]
Keyword=bold; fore:#36C313
Build-In Type=fore:#FF448E
@ -627,7 +627,7 @@ Scalar=fore:#8E8E00
Parameter Expansion=fore:#8E8E00; back:#393902
Back Ticks=fore:#CB0965
Here-Doc (Single Quoted, q)=fore:#00AFAA
FileNameExtensions=sh; csh; zsh; bash; tcsh; m4; in; \^mozconfig$
FileNameExtensions=sh; csh; zsh; bash; tcsh; m4; in; mozconfig*
[SQL Query]
Comment=fore:#00AFAA
Keyword=bold; fore:#7A81FF

View File

@ -133,18 +133,20 @@ future session should pick one and stick with it.
**Risk.** Low. Either variant is easy to revert if feedback is
negative.
#### E-04 — Tooltip documenting `\regex` extension syntax (closes I-12)
#### E-04 — Tooltip documenting wildcard extension syntax (closes I-12)
**Motivation.** `\^CMakeLists$` style regex entries in `FileNameExtensions`
are a real feature, implemented in `Style_RegExMatchLexer()`
(`src/Styles.c:2543`), but the UI never mentions it.
**Motivation.** Wildcard entries in `FileNameExtensions` (e.g. `Makefile*`,
`CMakeLists.txt`) are a real feature, implemented in
`Style_WildcardMatchLexer()` (`src/Styles.c`, replacing the prior PCRE2 path
that was removed in the wildcard refactor), but the UI never mentions it.
**Change.**
- Add a tooltip (`TOOLTIPS_CLASS`) to the `IDC_STYLEEDIT_ROOT` control
in `Style_CustomizeSchemesDlgProc`.
- Text (new localised string `IDS_MUI_EXTLIST_TOOLTIP`): "Semicolon-
separated. Prefix with `\` for a regex matched against the full file
name, e.g. `\^CMakeLists$`."
separated. Plain extensions (`py`) match by extension; entries with
`*`, `?`, or `.` (e.g. `Makefile*`, `CMakeLists.txt`) match the full
filename."
- Add the string to all 26 locale files.
**Risk.** Trivial.

View File

@ -165,7 +165,7 @@ When you open a file, Notepad3 runs a pipeline to pick the right schema. It stop
1. **File variables** (`vim:` / `emacs:` modelines inside the file). If the file declares `mode: python;` or `-*- mode: cpp -*-`, the declared mode name is matched against schema names (case-insensitive prefix) and then against extension lists. Disabled by `Settings2.NoFileVariables=1`.
2. **Shebang detection** for `.cgi` / `.fcgi` files or files flagged as CGI by mode. Recognised interpreters: `python`, `ruby`, `bash`/`sh`, `perl`, `tcl`, `node`/`js`, `php`. Disabled by `Settings2.NoCGIGuess=1`.
3. **Regex match on the file name.** Any entry in a schema's extension list that starts with a backslash is treated as a PCRE2 regex applied to the bare filename — e.g. `\^CMakeLists$` matches the extensionless file `CMakeLists`. See the [Extension lists](#extension-lists) subsection below for full syntax, escaping rules, and worked examples.
3. **Wildcard / filename match.** Any entry in a schema's extension list that contains `*`, `?`, or `.` is matched against the bare filename via the standard Win32 wildcard matcher — e.g. `Makefile*` matches `Makefile`, `Makefile.dev`, `cmakelists.txt` matches the literal filename. See the [Extension lists](#extension-lists) subsection below for syntax and worked examples.
4. **Plain extension match.** First lexer whose extension list contains the file's extension wins. Extension comparison is case-insensitive; separator is `;` (semicolon or space both work in practice).
5. **HTML/XML sniff** — if the first bytes start with `<`, classify as HTML or XML. Disabled by `Settings2.NoHTMLGuess=1`.
6. **Shebang fallback** for extension-less files (same recognisers as step 2).
@ -185,65 +185,90 @@ Each schema carries a hard-coded default extension list (compiled into `Notepad3
```ini
[Python Script]
FileNameExtensions=py;pyw;pyi;\^setup\.py$;\^test_.*\.py$
FileNameExtensions=py;pyw;pyi;setup.py;test_*.py
```
The override **replaces** the default; the two are not merged. The field accepts both plain file extensions and full-filename regex patterns in a single semicolon-separated list.
The override **replaces** the default; the two are not merged. The field accepts both plain file extensions and filename wildcard patterns in a single semicolon-separated list.
#### Syntax
#### Entry classification
Each entry is one of two kinds — Notepad3 picks based on what characters it contains:
| Entry contains… | Treated as… | Matched against |
|---|---|---|
| only letters / digits (no `*`, `?`, `.`) | plain extension token | the file's extension (the part after the last `.`, case-insensitive) |
| any of `*`, `?`, or `.` | filename wildcard pattern | the bare filename via Win32 `PathMatchSpecW` |
Examples:
| Entry | Kind | Matches |
|---|---|---|
| `py` | extension | `foo.py`, `foo.PY` |
| `*.py` | wildcard | any file ending in `.py` (same as `py`, just spelled out) |
| `Makefile*` | wildcard | `Makefile`, `Makefile.dev`, `Makefile_old` |
| `CMakeLists.txt` | wildcard (literal) | exactly `CMakeLists.txt` |
| `??.log` | wildcard | any two-character name + `.log` |
| `Makefile` | extension | files with extension `Makefile` (rare; not the literal extensionless filename) |
#### Syntax rules
| Aspect | Rule |
|---|---|
| Separator | `;` (semicolon) is canonical. A single space also works as a separator and surrounding whitespace around `;` is tolerated. |
| Case | Comparison is **case-insensitive** for both plain and regex entries. |
| Plain entry | A bare extension token like `py` or `cpp`. Matched as a whole token against the file's extension; substring matches are excluded. The leading `.` of the file's extension is stripped before comparison, so `py` matches `foo.py`, `foo.PY`, and `foo.Py`. |
| Regex entry | An entry whose first character is a **backslash** `\`. Everything after the leading `\` is the regex pattern. The `\` is the marker, not part of the pattern. |
| Buffer limit | **512 characters** per schema (counting all entries combined). Longer values are silently truncated when read from the INI — no warning is shown. |
#### Regex extension patterns
- **Engine:** PCRE2 (the same engine the editor's Find/Replace dialog uses), so the full PCRE2 syntax is available — character classes, alternation, lookarounds, named groups, inline modifiers.
- **Match target:** the **bare filename** including extension (e.g. `setup.py`, `CMakeLists.txt`). The directory portion of the path is stripped before matching, so patterns cannot anchor on parent directories.
- **Anchors are NOT implicit.** A pattern like `\setup\.py` is treated as `setup\.py` and matches anywhere in the filename — it accepts `setup.py`, `mysetup.python`, and `setup.python.bak`. Add `^` and `$` explicitly when you mean the whole filename.
- **`.` is a metachar.** Always escape literal dots as `\.`. Otherwise `\foo.py` will also match `fooXpy`, `foo-py`, etc.
- **Multiple regex entries are allowed** in the same field, each prefixed with its own `\` and separated by `;`.
- **Plain and regex entries can be freely mixed** in the same `FileNameExtensions` value.
- **No per-pattern case-sensitivity flag** is exposed by the dialog. If you need a case-sensitive regex for one pattern, use the PCRE2 inline modifier `(?-i)` at the start of that pattern.
| Separator | `;` (semicolon) is canonical. A single space also works, and whitespace around `;` is tolerated. |
| Case | Both passes are **case-insensitive**. |
| `*` in a wildcard | Matches zero or more of any character. |
| `?` in a wildcard | Matches exactly one character. |
| `.` in a wildcard | Matches a literal dot — there is no escape character. |
| Buffer limit | **512 characters** per schema (counting all entries combined). Longer values are silently truncated when read from the INI; a debug-output warning is emitted. |
#### Worked examples
```ini
; CMakeLists has no extension — only a regex can catch it
; Plain extensions (the common case)
[Python Script]
FileNameExtensions=py;pyw;pyi
; Match the literal extensionless file CMakeLists, plus its .txt sibling
[CMake]
FileNameExtensions=cmake;ctest;\^CMakeLists$;\^CMakeLists\.txt$
FileNameExtensions=cmake;ctest;cmakelists.txt;CMakeLists*
; Dockerfile family — match Dockerfile, Dockerfile.dev, Dockerfile.prod, …
; Dockerfile family — Dockerfile, Dockerfile.dev, Dockerfile.prod, …
[Docker]
FileNameExtensions=dockerfile;\^Dockerfile(\..+)?$
FileNameExtensions=dockerfile;Dockerfile*
; Hidden shell config files starting with a dot
; Shell config files starting with a dot
[Bash Script]
FileNameExtensions=sh;bash;\^\.bashrc$;\^\.bash_profile$;\^\.profile$
FileNameExtensions=sh;bash;.bashrc;.bash_profile;.profile
; Route Python test files to a separate schema
[Python Test]
FileNameExtensions=\^test_.*\.py$;\^.*_test\.py$
FileNameExtensions=test_*.py;*_test.py
; Nginx config files — fixed filenames plus the conf.d/*.conf convention
[Nginx Config]
FileNameExtensions=conf;\^nginx\.conf$;\^mime\.types$
; Single-character pattern: any 1-char-name file ending in .x
[Custom]
FileNameExtensions=?.x
```
Common pitfalls illustrated above:
- `\^setup\.py$` matches `setup.py` but **not** `setup.py.bak` (because of the trailing `$`). It does match `Setup.py`, but only thanks to the global case-insensitive flag.
- `\setup\.py` (no anchors) also matches `mysetup.py.bak` and `setup.python` — almost never what you want.
- `\^foo.py$` (missing `\.`) matches `foo.py` *and* `fooXpy` — always escape the dot.
Common pitfalls:
- `Makefile` (no `*`, `?`, `.`) is treated as a **plain extension**, so it matches files whose extension is `Makefile` (i.e. `foo.Makefile`) — **not** the literal extensionless `Makefile`. To catch the latter, use `Makefile*`.
- `Makefile*` is greedy — it also matches `Makefile_old` or `Makefilexxx`. If you need exactness, list each name explicitly (`Makefile;Makefile.dev`) or rely on the `.` form (`Makefile.*`) which still matches `Makefile.foo` but excludes `Makefilexxx`.
- `setup.py` (entry contains `.`) is a wildcard literal — it only matches the literal filename `setup.py`, not `mysetup.py`.
#### Precedence
Within `Style_SetLexerFromFile()` the auto-detect pipeline tries the **regex match across all schemas first**, and only then falls back to the plain-extension match across all schemas. Within a single schema the two are independent: a regex hit and a plain-extension hit can both be present, but only the regex pass gets a chance to fire before the plain pass runs.
Within `Style_SetLexerFromFile()` the auto-detect pipeline tries the **wildcard / filename pass across all schemas first**, and only then falls back to the plain-extension pass across all schemas. Within a single schema the two passes are independent: an entry's classification (plain vs wildcard) decides which pass it participates in.
When two schemas claim the same plain extension, or both match a file with their regex entries, the schema that appears first in the internal schema array wins. There is currently no UI to reorder schemas.
When two schemas claim the same plain extension, or both match a file with their wildcards, the schema that appears first in the internal schema array wins. There is currently no UI to reorder schemas.
#### Legacy `\regex` syntax
Earlier versions of Notepad3 used a PCRE2 regex syntax for filename matching, marked by a leading backslash (e.g. `\^Makefile$`). This has been **removed**. Existing INIs migrate automatically on load:
- Patterns of the form `\^<name>$` where `<name>` is alphanumeric (with optional `_`, `-`, and `\.` escapes) are **translated** to a wildcard equivalent. Examples: `\^Makefile$``Makefile*`, `\^cmakelists\.txt$``cmakelists.txt`, `\^\.bashrc$``.bashrc`.
- Anything more complex (real regex metacharacters like `.+`, `[abc]`, `?`, missing anchors, etc.) is **dropped** — translation isn't safe.
In both cases the in-memory list is rewritten on the next save, so user INIs converge to wildcard-only over time. A debug-output line records the migration per schema. Hand-tuned entries that get dropped may need to be rewritten as wildcard patterns.
#### Editing in *Customize Schemes*
@ -466,7 +491,7 @@ For developers wanting to look up behaviour described above:
|---|---|
| Data structures (`EDITLEXER`, `EDITSTYLE`, `KEYWORDLIST`) | `src/StyleLexers/EditLexer.h` |
| Schema array, loading, saving, layering | `src/Styles.c``g_pLexArray[]`, `_ReadFromIniCache()`, `Style_ToIniSection()`, `Style_CanonicalSectionToIniCache()`, `Style_ExportToFile()` |
| Auto-detection pipeline | `src/Styles.c``Style_SetLexerFromFile()`, `Style_MatchLexer()`, `Style_RegExMatchLexer()`, `Style_SniffShebang()` |
| Auto-detection pipeline | `src/Styles.c``Style_SetLexerFromFile()`, `Style_MatchLexer()`, `Style_WildcardMatchLexer()`, `Style_SniffShebang()` |
| Customize Schemes dialog | `src/Styles.c``Style_CustomizeSchemesDlg()`, `Style_CustomizeSchemesDlgProc()` |
| Select Scheme dialog | `src/Styles.c``Style_SelectLexerDlg()`, `Style_SelectLexerDlgProc()` |
| Theme menu & switching | `src/Styles.c``_FillThemesMenuTable()`, `Style_InsertThemesMenu()`, `Style_DynamicThemesMenuCmd()`, `Style_ImportTheme()` |

View File

@ -22,7 +22,7 @@ KEYWORDLIST KeyWords_BASH =
EDITLEXER lexBASH =
{
SCLEX_BASH, "bash", IDS_LEX_SHELL_SCR, L"Shell Script", L"sh; csh; zsh; bash; tcsh; m4; in; \\^mozconfig$", L"",
SCLEX_BASH, "bash", IDS_LEX_SHELL_SCR, L"Shell Script", L"sh; csh; zsh; bash; tcsh; m4; in; mozconfig*", L"",
&KeyWords_BASH, {
{ {STYLE_DEFAULT}, IDS_LEX_STR_Default, L"Default", L"", L"" },
//{ {SCE_SH_DEFAULT}, IDS_LEX_STR_Default, L"Default", L"", L"" },

View File

@ -35,7 +35,7 @@ KEYWORDLIST KeyWords_CMAKE =
EDITLEXER lexCmake =
{
SCLEX_CMAKE, "cmake", IDS_LEX_CMAKE, L"Cmake Script", L"cmake; ctest; \\^cmakelists\\.txt$", L"",
SCLEX_CMAKE, "cmake", IDS_LEX_CMAKE, L"Cmake Script", L"cmake; ctest; cmakelists.txt", L"",
&KeyWords_CMAKE, {
{ {STYLE_DEFAULT}, IDS_LEX_STR_Default, L"Default", L"", L"" },
//{ {SCE_CMAKE_DEFAULT}, IDS_LEX_STR_Default, L"Default", L"", L"" },

View File

@ -127,7 +127,7 @@ KEYWORDLIST KeyWords_CONF =
EDITLEXER lexCONF =
{
SCLEX_CONF, "conf", IDS_LEX_APC_CFG, L"Apache Config Files", L"conf; cfg; cnf; htaccess; prefs; iface; prop; po; te; \\^Kconfig$; \\^Doxyfile$", L"",
SCLEX_CONF, "conf", IDS_LEX_APC_CFG, L"Apache Config Files", L"conf; cfg; cnf; htaccess; prefs; iface; prop; po; te; Kconfig*; Doxyfile*", L"",
&KeyWords_CONF, {
{ {STYLE_DEFAULT}, IDS_LEX_STR_Default, L"Default", L"", L"" },
//{ {SCE_CONF_DEFAULT}, IDS_LEX_STR_Default, L"Default", L"", L"" },

View File

@ -6,7 +6,7 @@ KEYWORDLIST KeyWords_MAK = EMPTY_KEYWORDLIST;
EDITLEXER lexMAK =
{
SCLEX_MAKEFILE, "makefile", IDS_LEX_MAKEFILES, L"Makefiles", L"mak; make; mk; dsp; msc; msvc; am; pro; pri; gmk; ninja; dsw; \\^Makefile$; \\^Kbuild$", L"",
SCLEX_MAKEFILE, "makefile", IDS_LEX_MAKEFILES, L"Makefiles", L"mak; make; mk; dsp; msc; msvc; am; pro; pri; gmk; ninja; dsw; Makefile*; Kbuild*", L"",
&KeyWords_MAK, {
{ {STYLE_DEFAULT}, IDS_LEX_STR_Default, L"Default", L"", L"" },
//{ {SCE_MAKE_DEFAULT}, IDS_LEX_STR_Default, L"Default", L"", L"" },

View File

@ -11,7 +11,7 @@ KEYWORDLIST KeyWords_RUBY =
EDITLEXER lexRUBY =
{
SCLEX_RUBY, "ruby", IDS_LEX_RUBY, L"Ruby Script", L"rb; ruby; rbw; rake; rjs; rakefile; gemspec; podspec; \\^Rakefile$; \\^Podfile$", L"",
SCLEX_RUBY, "ruby", IDS_LEX_RUBY, L"Ruby Script", L"rb; ruby; rbw; rake; rjs; rakefile; gemspec; podspec; Rakefile*; Podfile*", L"",
&KeyWords_RUBY, {
{ {STYLE_DEFAULT}, IDS_LEX_STR_Default, L"Default", L"", L"" },
//{ {SCE_RB_DEFAULT}, IDS_LEX_STR_Default, L"Default", L"", L"" },

View File

@ -80,7 +80,7 @@ EDITLEXER lexTEXT =
EDITLEXER lexANSI =
{
SCLEX_NULL, "null", IDS_LEX_ANSI_ART, L"ANSI Art", L"nfo; diz; \\^Readme$", L"",
SCLEX_NULL, "null", IDS_LEX_ANSI_ART, L"ANSI Art", L"nfo; diz; Readme*", L"",
&KeyWords_NULL,{
{ {STYLE_DEFAULT}, IDS_LEX_STR_Default, L"Default", L"font:Lucida Console; thin; size:11; smoothing:aliased", L"" },
{ {STYLE_LINENUMBER}, IDS_LEX_STD_MARGIN, L"Margins and Line Numbers", L"font:Consolas; size:-2; fore:#008080", L"" },

View File

@ -738,6 +738,158 @@ bool Style_Import(HWND hwnd)
}
//=============================================================================
//
// _TryTranslateRegexEntry()
//
// Conservative migration helper: try to translate a simple legacy regex
// pattern (without its leading '\' marker) into an equivalent wildcard
// entry. Only patterns of the form '^<inner>$' are accepted, where <inner>
// contains exclusively ASCII alphanumerics, '_', '-', and optional '\.'
// escapes for literal dots — anything else (real regex metacharacters,
// missing anchors, etc.) is rejected and the caller drops the entry.
//
// Output rules:
// - Bare name (no literal dot): append '*' so the result is still
// classified as a wildcard pattern by Style_WildcardMatchLexer. E.g.
// '^Makefile$' → 'Makefile*' (matches the extensionless file plus
// suffixed variants like 'Makefile.dev'; same broadening as the
// hard-coded defaults).
// - With at least one '\.': unescape to '.' and emit verbatim. E.g.
// '^cmakelists\.txt$' → 'cmakelists.txt' (literal whole-filename
// match via PathMatchSpec).
//
// Returns true on successful translation (writes a NUL-terminated string
// to `out`); false otherwise.
//
static bool _TryTranslateRegexEntry(const WCHAR *pat, size_t patLen, LPWSTR out, size_t outCap)
{
if (patLen < 3 || pat[0] != L'^' || pat[patLen - 1] != L'$') {
return false;
}
const WCHAR *inner = pat + 1;
const WCHAR *innerEnd = pat + patLen - 1; // exclusive (points at '$')
bool hasDot = false;
size_t outLen = 0;
for (const WCHAR *c = inner; c < innerEnd; ) {
WCHAR ch = *c;
if (ch == L'\\' && (c + 1) < innerEnd && c[1] == L'.') {
if (outLen + 1 >= outCap) {
return false;
}
out[outLen++] = L'.';
hasDot = true;
c += 2;
} else if ((ch >= L'a' && ch <= L'z') ||
(ch >= L'A' && ch <= L'Z') ||
(ch >= L'0' && ch <= L'9') ||
ch == L'_' || ch == L'-') {
if (outLen + 1 >= outCap) {
return false;
}
out[outLen++] = ch;
++c;
} else {
return false; // potential regex metacharacter — refuse
}
}
if (outLen == 0) {
return false;
}
if (!hasDot) {
if (outLen + 1 >= outCap) {
return false;
}
out[outLen++] = L'*';
}
out[outLen] = L'\0';
return true;
}
//=============================================================================
//
// _StripLegacyRegexEntries()
//
// Walks a ';'-separated FileNameExtensions list in-place and migrates any
// entry beginning with '\' (the legacy PCRE2 syntax that the wildcard
// refactor replaced). Simple patterns of the form '\^<chars>$' are
// translated to their wildcard equivalents via _TryTranslateRegexEntry();
// anything more complex is dropped. Returns true if the list was changed,
// in which case the buffer has been compacted. Buffer size is
// STYLE_EXTENSIONS_BUFFER (the szExtensions slot's size).
//
static bool _StripLegacyRegexEntries(LPWSTR lpszList)
{
if (StrIsEmpty(lpszList)) {
return false;
}
WCHAR cleaned[STYLE_EXTENSIONS_BUFFER] = { L'\0' };
size_t cleanedLen = 0;
bool changed = false;
const WCHAR *p = lpszList;
while (*p) {
while (*p == L';' || *p == L' ' || *p == L'\t') {
++p;
}
if (!*p) {
break;
}
const WCHAR *e = p;
while (*e && *e != L';') {
++e;
}
const WCHAR *eTrim = e;
while (eTrim > p && (eTrim[-1] == L' ' || eTrim[-1] == L'\t')) {
--eTrim;
}
// Decide what (if anything) to append to the cleaned buffer.
const WCHAR *src = NULL;
size_t entryLen = 0;
WCHAR translated[STYLE_EXTENSIONS_BUFFER];
if (*p == L'\\') {
// Legacy regex entry. Try to translate; otherwise drop.
if (_TryTranslateRegexEntry(p + 1, (size_t)(eTrim - p - 1),
translated, COUNTOF(translated))) {
src = translated;
entryLen = StringCchLen(translated, COUNTOF(translated));
}
changed = true; // either we translated (different shape) or dropped
} else {
src = p;
entryLen = (size_t)(eTrim - p);
}
if (src && entryLen > 0) {
if (cleanedLen > 0 && cleanedLen + 1 < COUNTOF(cleaned)) {
cleaned[cleanedLen++] = L';';
}
size_t copyLen = entryLen;
if (cleanedLen + copyLen >= COUNTOF(cleaned)) {
copyLen = COUNTOF(cleaned) - cleanedLen - 1;
}
if (copyLen > 0) {
CopyMemory(&cleaned[cleanedLen], src, copyLen * sizeof(WCHAR));
cleanedLen += copyLen;
}
cleaned[cleanedLen] = L'\0';
}
p = e;
}
if (changed) {
StringCchCopy(lpszList, STYLE_EXTENSIONS_BUFFER, cleaned);
}
return changed;
}
//=============================================================================
//
// _LoadLexerFileExtensions()
@ -771,6 +923,16 @@ static void _LoadLexerFileExtensions()
StringCchCopy(g_pLexArray[iLexer]->szExtensions,
COUNTOF(g_pLexArray[iLexer]->szExtensions), tmpExt);
// Auto-migrate: translate or drop any legacy '\regex' entries from
// user INIs so the next save writes the cleaned wildcard-only form.
if (_StripLegacyRegexEntries(g_pLexArray[iLexer]->szExtensions)) {
WCHAR migMsg[256];
StringCchPrintf(migMsg, COUNTOF(migMsg),
L"Notepad3: migrated legacy '\\regex' entries in FileNameExtensions for [%s]; will be rewritten on next save.\n",
Lexer_Section);
OutputDebugStringW(migMsg);
}
// don't allow empty extensions settings => use default ext
if (StrIsEmpty(g_pLexArray[iLexer]->szExtensions)) {
StringCchCopy(g_pLexArray[iLexer]->szExtensions, COUNTOF(g_pLexArray[iLexer]->szExtensions), g_pLexArray[iLexer]->pszDefExt);
@ -2632,38 +2794,72 @@ PEDITLEXER Style_MatchLexer(LPCWSTR lpszMatch, bool bCheckNames)
//=============================================================================
//
// Style_RegExMatchLexer()
// Style_WildcardMatchLexer()
//
PEDITLEXER Style_RegExMatchLexer(LPCWSTR lpszFileName)
// Match the bare filename against any wildcard or literal-filename entries
// in each schema's FileNameExtensions list. An entry qualifies for this
// pass if it contains '*', '?', or '.' — anything else is a plain-extension
// token handled by Style_MatchLexer().
//
// Uses Win32 PathMatchSpecW (case-insensitive, supports * and ?). Entries
// beginning with '\' (legacy PCRE2 syntax) are skipped — see _LoadLexerFileExtensions
// for the matching strip-on-load that keeps user INIs clean.
//
PEDITLEXER Style_WildcardMatchLexer(LPCWSTR lpszFileName)
{
if (StrIsNotEmpty(lpszFileName)) {
if (StrIsEmpty(lpszFileName)) {
return NULL;
}
char chFilePath[XHUGE_BUFFER] = { '\0' };
WideCharToMultiByteEx(CP_UTF8, 0, lpszFileName, -1, chFilePath, COUNTOF(chFilePath), NULL, NULL);
for (int iLex = 0; iLex < COUNTOF(g_pLexArray); ++iLex) {
const WCHAR *p = g_pLexArray[iLex]->szExtensions;
while (p && *p) {
// skip leading separators and whitespace
while (*p == L';' || *p == L' ' || *p == L'\t') {
++p;
}
if (!*p) {
break;
}
// find end of this entry
const WCHAR *e = p;
while (*e && *e != L';') {
++e;
}
// trim trailing whitespace
const WCHAR *eTrim = e;
while (eTrim > p && (eTrim[-1] == L' ' || eTrim[-1] == L'\t')) {
--eTrim;
}
for (int iLex = 0; iLex < COUNTOF(g_pLexArray); ++iLex) {
const WCHAR *p = g_pLexArray[iLex]->szExtensions;
do {
const WCHAR* f = StrChr(p, L'\\');
const WCHAR* e = f;
if (f) {
e = StrChr(f, L';');
if (!e) {
e = f + StringCchLen(f, 0);
}
++f; // exclude '\'
char regexpat[HUGE_BUFFER] = { '\0' };
WideCharToMultiByte(CP_UTF8, 0, f, (int)(e-f), regexpat, (int)COUNTOF(regexpat), NULL, NULL);
// Strip incidental whitespace around the pattern so entries like
// "py; \^setup\.py$ ;txt" don't carry a leading/trailing space into PCRE2.
StrTrimA(regexpat, " \t");
if (RegExFind(regexpat, chFilePath, false, NULL) >= 0) {
return g_pLexArray[iLex];
}
}
// Skip legacy '\regex' entries (post-cutover; ignored, not matched).
if (*p == L'\\') {
p = e;
} while (p != NULL);
continue;
}
// Classify: an entry containing *, ?, or . is a PathMatchSpec pattern.
// Bare alphanumeric tokens are plain extensions handled elsewhere.
bool isPattern = false;
for (const WCHAR *c = p; c < eTrim; ++c) {
if (*c == L'*' || *c == L'?' || *c == L'.') {
isPattern = true;
break;
}
}
if (isPattern) {
WCHAR pat[STYLE_EXTENSIONS_BUFFER];
size_t copyLen = (size_t)(eTrim - p);
if (copyLen >= COUNTOF(pat)) {
copyLen = COUNTOF(pat) - 1;
}
CopyMemory(pat, p, copyLen * sizeof(WCHAR));
pat[copyLen] = L'\0';
if (PathMatchSpecW(lpszFileName, pat)) {
return g_pLexArray[iLex];
}
}
p = e;
}
}
return NULL;
@ -2687,7 +2883,7 @@ bool Style_HasLexerForExt(const HPATHL hpath)
}
}
if (!bFound && Path_IsNotEmpty(hpath)) {
bFound = Style_RegExMatchLexer(Path_FindFileName(hpath));
bFound = Style_WildcardMatchLexer(Path_FindFileName(hpath));
}
return bFound;
}
@ -2747,9 +2943,9 @@ bool Style_SetLexerFromFile(HWND hwnd, const HPATHL hpath)
LPCWSTR lpszFileName = Path_FindFileName(hpath);
// check for filename regex match
// check for filename wildcard / literal-filename match
if (!bFound && s_bAutoSelect && Path_IsNotEmpty(hpath)) {
pLexSniffed = Style_RegExMatchLexer(lpszFileName);
pLexSniffed = Style_WildcardMatchLexer(lpszFileName);
if (pLexSniffed) {
pLexNew = pLexSniffed;
bFound = true;