diff --git a/Build/Notepad3.ini b/Build/Notepad3.ini index 5c1f54555..feed39fe2 100644 --- a/Build/Notepad3.ini +++ b/Build/Notepad3.ini @@ -24,6 +24,7 @@ SettingsVersion=5 ;FileDeletedIndicator=[X] ;FileDlgFilters= ;FileLoadWarningMB=4 +;FileVarScanBytes=512 ;(min:256, max:2048) {bytes scanned at file head and tail for Emacs file variables, Vim modelines, and encoding tags} ;MultiFileArg=0 ;NoCGIGuess=0 ;NoCopyLineOnEmptySelection=0 diff --git a/Readme.md b/Readme.md index a88cb74c8..6bd9387cf 100644 --- a/Readme.md +++ b/Readme.md @@ -46,7 +46,8 @@ Over 55 languages supported, including: - **[AES-256 Rijndael](readme/encryption/Encryption.md)** encryption/decryption (in-app and command-line batch tool) - **[Encoding detection](readme/uchardet/EncodingDetection.md)** powered by [uchardet](https://www.freedesktop.org/wiki/Software/uchardet/) - **File change monitoring** with configurable check intervals -- **Emacs file variables** support (encoding, mode, tab-width, etc.) +- **[`.LOG` auto-timestamp](readme/config/FileContentFlags.md#log-auto-timestamp)** — files whose first line is `.LOG` get a fresh date + time appended on open, matching classic Notepad behaviour +- **[Emacs file variables](readme/config/FileContentFlags.md#emacs-file-variables)** support (encoding, mode, tab-width, etc.) - **File history** that preserves caret position and encoding - **Portable design** — runs from USB drives with relative path storage @@ -119,6 +120,8 @@ Notepad3 uses a portable INI file for all settings. Press **Ctrl+F7** to open it 🎨 **Schemas, styles & themes:** [readme/schema/CustomSchema.md](readme/schema/CustomSchema.md) — the layered override model, the style mini-language, `View → Customize Schemes` (**F12**), and how to export / import / collect custom themes. +📄 **File content flags** (`.LOG`, Emacs file variables, encoding tags, shebang lexer hint): [readme/config/FileContentFlags.md](readme/config/FileContentFlags.md) — markers you can put inside a file to control how Notepad3 opens or styles it. + ## Command-Line Options Notepad3 accepts a rich set of command-line switches inherited from Notepad2 / Notepad2-mod, plus several Notepad3-specific extensions. All switches are case-insensitive and may be prefixed with either `/` or `-`. diff --git a/readme/config/Configuration.md b/readme/config/Configuration.md index aa99837bc..4c697319f 100644 --- a/readme/config/Configuration.md +++ b/readme/config/Configuration.md @@ -107,6 +107,7 @@ This pattern is used by **Shift+F5** to update timestamps, e.g., `$Date: 2018/04 **Notes:** - All `DateTime` formats accept [`strftime()`](https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/strftime-wcsftime-strftime-l-wcsftime-l?view=vs-2019) format strings. +- `DateTimeFormat` is also the format used by the **`.LOG` auto-timestamp** feature (files starting with `.LOG`); see [File Content Flags](FileContentFlags.md#log-auto-timestamp). - `TimeStampFormat` also accepts `%s` as a placeholder for a `DateTimeFormat`-formatted current date/time string (mixing `strftime()` codes with `%s` is not allowed). - If you define a custom `TimeStampFormat`, define a matching `TimeStampRegEx` so that "Update Timestamps" can find and replace them correctly. @@ -166,6 +167,10 @@ Text Files|*.txt;*.wtx;*.log;*.asc;*.doc;*.diz;*.nfo|All Files|*.* Size limit (MB) for large-file warning. Set to `0` to disable. +#### `FileVarScanBytes=512` + +Number of bytes scanned at the file's head **and** (as fallback) at its tail for Emacs file variables, Vim modelines, and encoding tags. Increase if your project's headers (license blocks, banners) push the modeline past 512 bytes. Range: `256`–`2048`. See [File Content Flags](FileContentFlags.md). + #### `MultiFileArg=0` Set to `1` to allow multiple files on the command line. Default (`0`) accepts a single file like Windows Notepad. Use `+` and `-` command-line switches to override; `/z` behaves like `-`. diff --git a/readme/config/FileContentFlags.md b/readme/config/FileContentFlags.md new file mode 100644 index 000000000..e082c3157 --- /dev/null +++ b/readme/config/FileContentFlags.md @@ -0,0 +1,235 @@ +# File Content Flags + +Notepad3 recognizes a handful of markers **inside the file itself** that change how the file is opened or styled. Unlike command-line switches or INI settings, these are written into the document, travel with it, and apply automatically each time the file is loaded. + +This page documents the user-actionable markers. Notepad3 also performs several automatic content detections (BOMs, line endings, UTF-8 validity); those are summarized at the end as context. + +--- + +## Quick reference + +| Marker | Where in file | Effect | Gating INI key | +|--------|---------------|--------|----------------| +| `.LOG` | First 4 bytes of line 1 | Append fresh date + time on initial open, caret placed below it | — (always on) | +| Emacs file variables | First N bytes or last N bytes (N = `FileVarScanBytes`) | Sets tab width, indent, wrap, edge columns, lexer | `NoFileVariables` | +| Vim modelines (`vim:`, `vi:`, `ex:`) | First / last N bytes | Sets tab width, shiftwidth, expandtab, wrap, textwidth, filetype | `NoFileVariables` | +| Encoding tag (`coding:` / `encoding:` / `charset:`) | First / last N bytes | Forces a specific encoding for load | `NoEncodingTags` (default: **on** — disables tags) | +| Shebang `#!…` | First line | Picks the lexer (Python, Perl, Bash, Ruby, Tcl, PHP, Node, Lua, Awk, R, PowerShell, Julia, Dart, Nim, …) | — (always on) | + +The scan window `N` is configurable via `Settings2.FileVarScanBytes` (default `512`, range `256`–`2048`). + +INI keys are documented in the [Configuration Reference](Configuration.md). +Note the asymmetry: **Emacs file variables are on by default; encoding tags are off by default.** Set `NoEncodingTags=0` in `[Settings2]` to enable parsing of `# coding: utf-8` and friends. + +--- + +## `.LOG` auto-timestamp + +If the very first bytes of the file are exactly `.LOG` and the next byte is a line break (or EOF), Notepad3 treats the file as a log book: + +- On **initial open** the current date and time are appended to the document, on a new line at the end. +- The caret is placed at the start of the empty line **after** the timestamp, so you can immediately type a log entry. +- On **reload** (revert / autoreload) no new timestamp is appended; the caret restores to its pre-reload position. +- The MRU caret position is **not** persisted for `.LOG` files — every initial open lands on the freshly inserted timestamp, regardless of where you were last time. + +This matches the classic Windows Notepad behaviour. There is no INI key to disable it; remove the `.LOG` line (or change it to anything else) to opt the file out. + +The exact timestamp format is controlled by `Settings2.DateTimeFormat` (or `DateTimeLongFormat`). See the [Configuration Reference](Configuration.md). + +**Example file:** + +``` +.LOG +2026-04-30 14:02 Started new project, baseline performance numbers below. +2026-05-01 09:18 Re-ran after profile patch — 12% faster. +``` + +Open it again and Notepad3 appends today's date on a new line for the next entry. + +--- + +## Emacs file variables + +A line containing `name: value` (or `name=value`, `name="value"`) anywhere in the head or tail scan window (default 512 bytes; configurable via `Settings2.FileVarScanBytes`) is parsed as an Emacs-style file variable. Notepad3 honors the following names (all case-insensitive): + +| Variable | Type | Effect | +|----------|------|--------| +| `enable-local-variables` | int | Set to `0` to disable parsing of all other file variables in this file. | +| `tab-width` | int (1–256) | Tab stop width. | +| `c-basic-indent` | int (0–256) | Indent width (auto-indent / shift). | +| `indent-tabs-mode` | int (0 or 1) | `0` = insert spaces, `1` = insert tabs. | +| `c-tab-always-indent` | int (0 or 1) | Whether Tab always indents (vs. inserting a tab character at the cursor). | +| `truncate-lines` | int (0 or 1) | `0` = enable word-wrap, `1` = no wrap. | +| `fill-column` | string | One or more column numbers separated by spaces or commas — drawn as long-line edge markers. | +| `mode` | string | Lexer / language hint. See **Mode value matching** below. | + +Comment characters and surrounding text are ignored — only the `name: value` token is recognized. The header is scanned first; if no variables are found there, Notepad3 falls back to scanning the tail of the file. + +**Example (Python file with file variables in a trailing comment block):** + +```python +# Local Variables: +# mode: python +# tab-width: 4 +# indent-tabs-mode: 0 +# c-basic-indent: 4 +# fill-column: 80, 120 +# End: +``` + +**Example (header form, single line):** + +```c +/* -*- mode: c; tab-width: 8; indent-tabs-mode: 1 -*- */ +``` + +To disable Emacs file-variable parsing globally, set `NoFileVariables=1` in `[Settings2]`. The same setting also gates Vim modeline parsing. See the [Configuration Reference](Configuration.md). + +### Mode value matching + +The `mode` value (or Vim's `ft=` value) is resolved in this order: + +1. **Trailing suffix stripped** — common Emacs / Vim suffixes are removed: `-mode`, `-script`, `-major-mode`, `-minor-mode`. So `python-mode` → `python`, `markdown-mode` → `markdown`, `c#-mode` → `c#`. +2. **Extension list** — the (normalized) value is matched against each lexer's filename-extension list. So `mode: cpp`, `mode: c`, `mode: js`, `mode: go`, `mode: r` all resolve via this step. +3. **Display name prefix** — case-insensitive prefix match against the lexer's display name (e.g. `python` matches `"Python Script"`). Minimum length 2, so `mode: c#` resolves to the C# lexer. + +If none of the steps match, the file's filename / extension fallback applies. + +--- + +## Vim modelines + +Vim-style modelines are parsed alongside Emacs file variables (gated by the same `NoFileVariables` setting) and are searched in the same head / tail window. Three marker prefixes are recognized: `vim:`, `vi:`, `ex:`. The marker must be preceded by whitespace or be at the start of a line. Both forms work: + +- **Set form** (terminated by `:`): `vim: set ft=python ts=4 sw=4 noet :` +- **Colon form**: `vim:ft=python:ts=4:sw=4` + +| Option (short / long) | Maps to | Notes | +|-----------------------|---------|-------| +| `ts` / `tabstop` | tab-width | int 1–256 | +| `sw` / `shiftwidth` | indent-width | int 0–256 | +| `et` / `expandtab` | indent-tabs-mode = spaces | flag (no value) | +| `noet` / `noexpandtab` | indent-tabs-mode = tabs | flag | +| `wrap` | word-wrap on | flag | +| `nowrap` | word-wrap off | flag | +| `tw` / `textwidth` | long-line edge marker | int | +| `ft` / `filetype` | mode (lexer hint) | uses the same matcher as Emacs `mode:` | + +Other Vim options are silently ignored. + +**Examples:** + +```python +# vim: set ft=python ts=4 sw=4 et : +``` + +```c +// vim: ft=c ts=8 sw=8 noet : +``` + +```yaml +# vim:ft=yaml:ts=2:sw=2:et: +``` + +--- + +## Encoding tags + +> **Default: off.** Encoding tag parsing is gated by `Settings2.NoEncodingTags`, which defaults to `1` (tags ignored). Set `NoEncodingTags=0` to enable. The default-off was chosen because any innocuous mention of `coding:` or `charset:` in code or comments would otherwise be honored. + +If the file has **no BOM** and parsing is enabled, Notepad3 looks for an encoding declaration and uses it as the load encoding. The first match wins, in this order: + +1. `coding: ` +2. `encoding: ` +3. `charset: ` + +The same `name: value` / `name=value` / `name="value"` syntax as Emacs file variables; the same 512-byte head + tail scan window. The encoding name must be one Notepad3 knows (UTF-8, UTF-16, ISO-8859-*, Windows-*, the various code pages — anything visible in the encoding picker). + +**Examples that work:** + +```python +# -*- coding: utf-8 -*- +``` + +```html + +``` + +```ruby +# encoding: Windows-1252 +``` + +A file with a BOM ignores the encoding tag — the BOM wins. To disable encoding-tag parsing globally, set `NoEncodingTags=1` in `[Settings2]`. See the [Configuration Reference](Configuration.md). + +--- + +## Shebang lexer hint + +If the first line begins with `#!`, Notepad3 extracts the interpreter token, normalizes it, and selects a matching lexer. Recognized forms: + +- `#!/usr/bin/perl` +- `#!/usr/bin/env python3` — the `env` keyword is recognized; POSIX flags such as `-S` are skipped, and the next token is used. +- `#!/path/with spaces/python3.11` — any path prefix is stripped (only the basename matters). + +**Normalization** applied to the basename before matching: + +1. Strip leading path (everything before the last `/` or `\`). +2. Strip trailing version suffix — digits, dots, and dashes — so `python3.11` → `python`, `Rscript-4.3` → `Rscript`. +3. Compare case-insensitively against the table below; an exact (token-equal) match wins. + +| Interpreter | Lexer | +|-------------|-------| +| `python` | Python | +| `perl` | Perl | +| `ruby` | Ruby | +| `php` | Web (HTML / PHP) | +| `node`, `nodejs`, `deno`, `bun` | JavaScript | +| `tclsh`, `wish`, `tcl` | Tcl | +| `sh`, `bash`, `zsh`, `ksh`, `dash`, `ash`, `fish`, `csh`, `tcsh` | Shell (Bash) | +| `lua`, `luajit` | Lua | +| `awk`, `gawk`, `mawk`, `nawk` | Awk | +| `Rscript`, `littler` | R | +| `pwsh`, `powershell` | PowerShell | +| `julia` | Julia | +| `dart` | Dart | +| `nim`, `nimrod` | Nim | + +Bare `R` is intentionally **not** matched (too ambiguous as a single character — use `Rscript`, `littler`, or a `mode: r` file variable instead). + +There is no INI key to disable shebang sniffing; rename or change the `#!` line to suppress it. + +--- + +## How Notepad3 reads these markers + +- **`.LOG`** is checked at offset 0 only; bytes 1–4 must be `.LOG`, byte 5 must be `\r`, `\n`, or EOF. +- **Emacs file variables**, **Vim modelines**, and **encoding tags** are searched in two passes: first the leading `Settings2.FileVarScanBytes` bytes, then — only if nothing was found in the head — the trailing window. Default 512, max 2048. +- **Shebang** is checked on line 1 only. + +**Precedence for encoding** (most specific first): + +1. Command-line switch (`/utf8`, `/ansi`, …) or `/e ` +2. Byte Order Mark (BOM) at the start of the file +3. Encoding tag (`coding:` / `encoding:` / `charset:`) +4. uchardet auto-detection +5. `Settings.DefaultEncoding` + +**Precedence for lexer** (most specific first): + +1. Command-line `/d`, `/h`, `/x`, `/s` +2. Filename / extension match +3. `mode:` file variable / `ft=` Vim modeline +4. Shebang sniff +5. Default lexer + +--- + +## Related automatic detections (not file-content flags) + +For completeness — these aren't markers you write, but Notepad3 reads file content to decide them: + +- **BOM detection** — UTF-8 (`EF BB BF`), UTF-16 LE (`FF FE`), UTF-16 BE (`FE FF`), and UTF-32 BOMs are detected at file start and select the corresponding encoding. A BOM overrides any encoding tag. +- **Line-ending detection** — Notepad3 counts `\r\n`, `\r`, and `\n` occurrences and picks the most common as the document's EOL mode. If multiple kinds are mixed, you'll see the inconsistent-EOL warning (when enabled). +- **UTF-8 validity** — files whose non-ASCII bytes form valid UTF-8 sequences are loaded as UTF-8 (instead of the ANSI code page) when `LoadASCIIasUTF8` is on. Pure 7-bit ASCII files follow the same setting. + +See [`readme/uchardet/EncodingDetection.md`](../uchardet/EncodingDetection.md) for the full encoding-detection pipeline and [Configuration Reference](Configuration.md) for the related `[Settings2]` keys (`NoFileVariables`, `NoEncodingTags`, `FileVarScanBytes`, `LoadASCIIasUTF8`, `LoadNFOasOEM`, `DateTimeFormat`, `DateTimeLongFormat`). diff --git a/src/Config/Config.cpp b/src/Config/Config.cpp index 3b6b8fccd..1795d232e 100644 --- a/src/Config/Config.cpp +++ b/src/Config/Config.cpp @@ -1392,6 +1392,7 @@ void LoadSettings() Settings2.UchardetLanguageFilter = clampi(IniSectionGetInt(IniSecSettings2, L"UchardetLanguageFilter", 0x1F), 0, 0x1F); Settings2.FileLoadWarningMB = clampi(IniSectionGetInt(IniSecSettings2, L"FileLoadWarningMB", 4), 0, 2048); + Settings2.FileVarScanBytes = clampi(IniSectionGetInt(IniSecSettings2, L"FileVarScanBytes", LARGE_BUFFER), MIDSZ_BUFFER, XHUGE_BUFFER); Settings2.OpacityLevel = clampi(IniSectionGetInt(IniSecSettings2, L"OpacityLevel", 75), 10, 100); diff --git a/src/EncodingDetection.cpp b/src/EncodingDetection.cpp index 5609222fa..ba7eeb916 100644 --- a/src/EncodingDetection.cpp +++ b/src/EncodingDetection.cpp @@ -568,6 +568,136 @@ static void _SetEncodingTitleInfo(const ENC_DET_T* pEncDetInfo) +//============================================================================= +// +// _ParseVimModeline() +// +// Recognizes the Vim modeline forms: +// [text] vim: option=value option=value : (also vi: / ex:) +// [text] vim:set option=value option=value: (with optional "set" keyword) +// [text] vim:option=value:option=value (colon-separated form) +// The marker must be preceded by whitespace or be at line start. +// Honored options: +// ts/tabstop, sw/shiftwidth, et/expandtab, noet/noexpandtab, +// wrap/nowrap, tw/textwidth, ft/filetype +// +static void _ParseVimModeline(const char* buffer, LPFILEVARS lpfv) +{ + static const char* const markers[] = { "vim:", "ex:", "vi:" }; + const char* mlStart = NULL; + for (int m = 0; m < (int)COUNTOF(markers) && !mlStart; ++m) { + size_t const mLen = StringCchLenA(markers[m], 0); + const char* p = StrStrIA(buffer, markers[m]); + while (p) { + if (p == buffer || p[-1] == ' ' || p[-1] == '\t') { + mlStart = p + mLen; + break; + } + p = StrStrIA(p + 1, markers[m]); + } + } + if (!mlStart) { + return; + } + + const char* mlEnd = mlStart; + while (*mlEnd && *mlEnd != '\r' && *mlEnd != '\n') { + ++mlEnd; + } + + const char* p = mlStart; + while (p < mlEnd && (*p == ' ' || *p == '\t')) { + ++p; + } + + bool bSetForm = false; + if ((p + 4 <= mlEnd) && (_strnicmp(p, "set ", 4) == 0)) { + bSetForm = true; + p += 4; + } else if ((p + 3 <= mlEnd) && (_strnicmp(p, "se ", 3) == 0)) { + bSetForm = true; + p += 3; + } + + char nameBuf[32]; + char valBuf[64]; + while (p < mlEnd) { + while (p < mlEnd && (*p == ' ' || *p == '\t' || *p == ':')) { + ++p; + } + if (p >= mlEnd) { + break; + } + + const char* nameStart = p; + while (p < mlEnd && *p != '=' && *p != ':' && *p != ' ' && *p != '\t') { + ++p; + } + size_t nameLen = (size_t)(p - nameStart); + if (nameLen == 0) { + break; + } + if (nameLen >= COUNTOF(nameBuf)) { + nameLen = COUNTOF(nameBuf) - 1; + } + memcpy(nameBuf, nameStart, nameLen); + nameBuf[nameLen] = '\0'; + + valBuf[0] = '\0'; + if (p < mlEnd && *p == '=') { + ++p; + const char* valStart = p; + while (p < mlEnd && *p != ':' && (!bSetForm || (*p != ' ' && *p != '\t'))) { + ++p; + } + size_t valLen = (size_t)(p - valStart); + if (valLen >= COUNTOF(valBuf)) { + valLen = COUNTOF(valBuf) - 1; + } + memcpy(valBuf, valStart, valLen); + valBuf[valLen] = '\0'; + } + + if ((_stricmp(nameBuf, "ts") == 0) || (_stricmp(nameBuf, "tabstop") == 0)) { + int const i = atoi(valBuf); + if (i > 0) { + lpfv->iTabWidth = clampi(i, 1, 256); + lpfv->mask |= FV_TABWIDTH; + } + } else if ((_stricmp(nameBuf, "sw") == 0) || (_stricmp(nameBuf, "shiftwidth") == 0)) { + int const i = atoi(valBuf); + lpfv->iIndentWidth = clampi(i, 0, 256); + lpfv->mask |= FV_INDENTWIDTH; + } else if ((_stricmp(nameBuf, "et") == 0) || (_stricmp(nameBuf, "expandtab") == 0)) { + lpfv->bTabsAsSpaces = true; + lpfv->mask |= FV_TABSASSPACES; + } else if ((_stricmp(nameBuf, "noet") == 0) || (_stricmp(nameBuf, "noexpandtab") == 0)) { + lpfv->bTabsAsSpaces = false; + lpfv->mask |= FV_TABSASSPACES; + } else if (_stricmp(nameBuf, "wrap") == 0) { + lpfv->bWordWrap = true; + lpfv->mask |= FV_WORDWRAP; + } else if (_stricmp(nameBuf, "nowrap") == 0) { + lpfv->bWordWrap = false; + lpfv->mask |= FV_WORDWRAP; + } else if ((_stricmp(nameBuf, "tw") == 0) || (_stricmp(nameBuf, "textwidth") == 0)) { + int const i = atoi(valBuf); + if (i > 0) { + WCHAR wbuf[16]; + StringCchPrintf(wbuf, COUNTOF(wbuf), L"%d", i); + StringCchCopy(lpfv->wchMultiEdgeLines, COUNTOF(lpfv->wchMultiEdgeLines), wbuf); + lpfv->mask |= FV_LONGLINESLIMIT; + } + } else if ((_stricmp(nameBuf, "ft") == 0) || (_stricmp(nameBuf, "filetype") == 0)) { + if (valBuf[0] && !(lpfv->mask & FV_MODE)) { + StringCchCopyA(lpfv->chMode, COUNTOF(lpfv->chMode), valBuf); + lpfv->mask |= FV_MODE; + } + } + } +} + + //============================================================================= // // _SetFileVars() @@ -617,6 +747,9 @@ static void _SetFileVars(char* buffer, size_t cch, LPFILEVARS lpfv) if (FileVars_ParseStr(buffer, "mode", lpfv->chMode, COUNTOF(lpfv->chMode))) { lpfv->mask |= FV_MODE; } + + // Vim modelines (`vim:` / `vi:` / `ex:`) — parsed alongside Emacs file variables. + _ParseVimModeline(buffer, lpfv); } } @@ -662,15 +795,16 @@ extern "C" bool FileVars_GetFromData(const char* lpData, size_t cbData, LPFILEVA return true; } - char tmpbuf[LARGE_BUFFER]; - size_t const cch = min_s(cbData + 1, COUNTOF(tmpbuf)); + size_t const scanBytes = (size_t)clampi(Settings2.FileVarScanBytes, MIDSZ_BUFFER, XHUGE_BUFFER); + char tmpbuf[XHUGE_BUFFER]; + size_t const cch = min_s(cbData + 1, scanBytes); - StringCchCopyNA(tmpbuf, COUNTOF(tmpbuf), lpData, cch); + StringCchCopyNA(tmpbuf, scanBytes, lpData, cch); _SetFileVars(tmpbuf, cch, lpfv); // if no file vars found, look at EOF - if ((lpfv->mask == 0) && (cbData > COUNTOF(tmpbuf))) { - StringCchCopyNA(tmpbuf, COUNTOF(tmpbuf), lpData + cbData - COUNTOF(tmpbuf) + 1, COUNTOF(tmpbuf)); + if ((lpfv->mask == 0) && (cbData > scanBytes)) { + StringCchCopyNA(tmpbuf, scanBytes, lpData + cbData - scanBytes + 1, scanBytes); _SetFileVars(tmpbuf, cch, lpfv); } diff --git a/src/Notepad3.c b/src/Notepad3.c index 81c60d893..93e214e5a 100644 --- a/src/Notepad3.c +++ b/src/Notepad3.c @@ -10992,6 +10992,20 @@ void EndUndoActionSelection(const LONG token) } +static inline bool IsFileVarLogFile() +{ + DocPos const len = SciCall_GetTextLength(); + if (len < 4) { + return false; + } + char tch[6] = { '\0' }; + SciCall_GetText(COUNTOF(tch) - 1, tch); // up to 5 bytes, null-terminated + if (memcmp(tch, ".LOG", 4) != 0) { + return false; + } + return (len == 4) || (tch[4] == '\r') || (tch[4] == '\n'); +} + //============================================================================= // // FileIO() @@ -11009,9 +11023,10 @@ bool FileIO(bool fLoad, const HPATHL hfile_pth, EditFileIOStatus* status, else { int idx = 0; if (MRU_FindPath(Globals.pFileMRU, hfile_pth, &idx)) { + bool const bSkipCaretMRU = !Settings.PreserveCaretPos || IsFileVarLogFile(); Globals.pFileMRU->iEncoding[idx] = status->iEncoding; - Globals.pFileMRU->iCaretPos[idx] = (Settings.PreserveCaretPos ? SciCall_GetCurrentPos() : -1); - Globals.pFileMRU->iSelAnchPos[idx] = (Settings.PreserveCaretPos ? SciCall_GetAnchor() : -1); + Globals.pFileMRU->iCaretPos[idx] = bSkipCaretMRU ? -1 : SciCall_GetCurrentPos(); + Globals.pFileMRU->iSelAnchPos[idx] = bSkipCaretMRU ? -1 : SciCall_GetAnchor(); WCHAR wchBookMarks[MRU_BMRK_SIZE] = { L'\0' }; EditGetBookmarkList(Globals.hwndEdit, wchBookMarks, COUNTOF(wchBookMarks)); @@ -11067,16 +11082,6 @@ bool ConsistentIndentationCheck(EditFileIOStatus* status) // // -static inline bool IsFileVarLogFile() -{ - if (SciCall_GetTextLength() >= 4) { - char tch[5] = { '\0', '\0', '\0', '\0', '\0' }; - SciCall_GetText(COUNTOF(tch) - 1, tch); - return (StrCmpA(tch, ".LOG") == 0); - } - return false; -} - static inline void _ResetFileWatchingMode() { if (FileWatching.MonitoringLog) { KillTimer(Globals.hwndMain, ID_LOGROTATETIMER); @@ -11316,8 +11321,8 @@ bool FileLoad(const HPATHL hfile_pth, const FileLoadFlags fLoadFlags, const DocP // consistent settings file handling (if loaded in editor) Flags.bSettingsFileSoftLocked = (Path_StrgComparePath(Paths.CurrentFile, Paths.IniFile, Paths.WorkingDirectory, true) == 0); - // the .LOG feature ... - if (IsFileVarLogFile()) { + // the .LOG feature ... (initial open only) + if (!bReloadFile && IsFileVarLogFile()) { Sci_SetCaretScrollDocEnd(); UndoTransActionBegin(); SciCall_NewLine(); @@ -11331,8 +11336,8 @@ bool FileLoad(const HPATHL hfile_pth, const FileLoadFlags fLoadFlags, const DocP UpdateSaveSettingsCmds(); } - // set historic caret/selection pos - if (!FileWatching.MonitoringLog && (s_flagChangeNotify != FWM_AUTORELOAD)) { + // set historic caret/selection pos (suppressed for .LOG files - datetime placement wins) + if (!IsFileVarLogFile() && !FileWatching.MonitoringLog && (s_flagChangeNotify != FWM_AUTORELOAD)) { if ((iCaretPos >= 0) && (iAnchorPos >= 0)) { Sci_SetStreamSelection(iAnchorPos, iCaretPos, true); Sci_ScrollSelectionToView(); @@ -11584,9 +11589,10 @@ static void _MRU_UpdateSession() { int idx = 0; if (MRU_FindPath(Globals.pFileMRU, Paths.CurrentFile, &idx)) { + bool const bSkipCaretMRU = !Settings.PreserveCaretPos || IsFileVarLogFile(); Globals.pFileMRU->iEncoding[idx] = Encoding_GetCurrent(); - Globals.pFileMRU->iCaretPos[idx] = (Settings.PreserveCaretPos) ? SciCall_GetCurrentPos() : -1; - Globals.pFileMRU->iSelAnchPos[idx] = (Settings.PreserveCaretPos) ? (Sci_IsMultiOrRectangleSelection() ? -1 : SciCall_GetAnchor()) : -1; + Globals.pFileMRU->iCaretPos[idx] = bSkipCaretMRU ? -1 : SciCall_GetCurrentPos(); + Globals.pFileMRU->iSelAnchPos[idx] = bSkipCaretMRU ? -1 : (Sci_IsMultiOrRectangleSelection() ? -1 : SciCall_GetAnchor()); WCHAR wchBookMarks[MRU_BMRK_SIZE] = { L'\0' }; EditGetBookmarkList(Globals.hwndEdit, wchBookMarks, COUNTOF(wchBookMarks)); if (Globals.pFileMRU->pszBookMarks[idx]) { @@ -11599,9 +11605,10 @@ static void _MRU_UpdateSession() static void _MRU_AddSession() { + bool const bIsLogFile = IsFileVarLogFile(); cpi_enc_t iCurrEnc = Encoding_GetCurrent(); - const DocPos iCaretPos = SciCall_GetCurrentPos(); - const DocPos iAnchorPos = Sci_IsMultiOrRectangleSelection() ? -1 : SciCall_GetAnchor(); + const DocPos iCaretPos = bIsLogFile ? -1 : SciCall_GetCurrentPos(); + const DocPos iAnchorPos = bIsLogFile ? -1 : (Sci_IsMultiOrRectangleSelection() ? -1 : SciCall_GetAnchor()); WCHAR wchBookMarks[MRU_BMRK_SIZE] = { L'\0' }; EditGetBookmarkList(Globals.hwndEdit, wchBookMarks, COUNTOF(wchBookMarks)); MRU_AddPath(Globals.pFileMRU, Paths.CurrentFile, Flags.RelativeFileMRU, Flags.PortableMyDocs, iCurrEnc, iCaretPos, iAnchorPos, wchBookMarks); diff --git a/src/Styles.c b/src/Styles.c index b00df708c..e515b3f0a 100644 --- a/src/Styles.c +++ b/src/Styles.c @@ -2453,48 +2453,118 @@ void Style_SetMargin(HWND hwnd, LPCWSTR lpszStyle) /// iStyle == STYLE_LINENUMBE // PEDITLEXER Style_SniffShebang(char* pchText) { - if (StrCmpNA(pchText,"#!",2) == 0) { - char *pch = pchText + 2; - while (*pch == ' ' || *pch == '\t') { - pch++; + if (StrCmpNA(pchText, "#!", 2) != 0) { + return NULL; + } + char* p = pchText + 2; + + // Skip whitespace after "#!" + while (*p == ' ' || *p == '\t') { + ++p; + } + + // Read first token (interpreter path) + char* tokStart = p; + while (*p && *p != ' ' && *p != '\t' && *p != '\r' && *p != '\n') { + ++p; + } + char* tokEnd = p; + + // Find basename of first token (after last '/' or '\') + char* bn = tokStart; + for (char* s = tokStart; s < tokEnd; ++s) { + if (*s == '/' || *s == '\\') { + bn = s + 1; } - while (*pch && *pch != ' ' && *pch != '\t' && *pch != '\r' && *pch != '\n') { - pch++; + } + + // If basename is "env", skip optional flags (e.g. "-S") and re-read the interpreter token + int const bnLen = (int)(tokEnd - bn); + if (bnLen == 3 && StrCmpNIA(bn, "env", 3) == 0) { + while (*p == ' ' || *p == '\t') { + ++p; } - if ((pch - pchText) >= 3 && StrCmpNA(pch-3,"env",3) == 0) { - while (*pch == ' ') { - pch++; + // Skip POSIX flags like "-S", "-vS" + while (*p == '-') { + while (*p && *p != ' ' && *p != '\t' && *p != '\r' && *p != '\n') { + ++p; } - while (*pch && *pch != ' ' && *pch != '\t' && *pch != '\r' && *pch != '\n') { - pch++; + while (*p == ' ' || *p == '\t') { + ++p; } } - if ((pch - pchText) >= 3 && StrCmpNIA(pch - 3, "php", 3) == 0) { - return(&lexHTML); + // Read interpreter token after env [flags] + tokStart = p; + while (*p && *p != ' ' && *p != '\t' && *p != '\r' && *p != '\n') { + ++p; } - if ((pch - pchText) >= 4 && StrCmpNIA(pch - 4, "perl", 4) == 0) { - return(&lexPL); + tokEnd = p; + bn = tokStart; + for (char* s = tokStart; s < tokEnd; ++s) { + if (*s == '/' || *s == '\\') { + bn = s + 1; + } } - if ((pch - pchText) >= 6 && StrCmpNIA(pch - 6, "python", 6) == 0) { - return(&lexPY); + } + + // Strip trailing version suffix: digits, dots, dashes (e.g. python3.11 -> python, Rscript-4.3 -> Rscript) + char* bnEnd = tokEnd; + while (bnEnd > bn) { + char const c = *(bnEnd - 1); + if ((c >= '0' && c <= '9') || c == '.' || c == '-') { + --bnEnd; + } else { + break; } - if ((pch - pchText) >= 3 && StrCmpNA(pch - 3, "tcl", 3) == 0) { - return(&lexTCL); - } - if ((pch - pchText) >= 4 && StrCmpNA(pch - 4, "wish", 4) == 0) { - return(&lexTCL); - } - if ((pch - pchText) >= 5 && StrCmpNA(pch - 5, "tclsh", 5) == 0) { - return(&lexTCL); - } - if ((pch - pchText) >= 2 && StrCmpNA(pch - 2, "sh", 2) == 0) { - return(&lexBASH); - } - if ((pch - pchText) >= 4 && StrCmpNA(pch - 4, "ruby", 4) == 0) { - return(&lexRUBY); - } - if ((pch - pchText) >= 4 && StrCmpNA(pch - 4, "node", 4) == 0) { - return(&lexJS); + } + int const nameLen = (int)(bnEnd - bn); + if (nameLen <= 0) { + return NULL; + } + + static const struct { + const char* name; + PEDITLEXER lexer; + } interps[] = { + { "python", &lexPY }, + { "perl", &lexPL }, + { "ruby", &lexRUBY }, + { "php", &lexHTML }, + { "node", &lexJS }, + { "nodejs", &lexJS }, + { "deno", &lexJS }, + { "bun", &lexJS }, + { "tclsh", &lexTCL }, + { "wish", &lexTCL }, + { "tcl", &lexTCL }, + { "bash", &lexBASH }, + { "zsh", &lexBASH }, + { "ksh", &lexBASH }, + { "dash", &lexBASH }, + { "ash", &lexBASH }, + { "fish", &lexBASH }, + { "tcsh", &lexBASH }, + { "csh", &lexBASH }, + { "sh", &lexBASH }, + { "luajit", &lexLUA }, + { "lua", &lexLUA }, + { "gawk", &lexAwk }, + { "mawk", &lexAwk }, + { "nawk", &lexAwk }, + { "awk", &lexAwk }, + { "rscript", &lexR }, + { "littler", &lexR }, + { "powershell", &lexPS }, + { "pwsh", &lexPS }, + { "julia", &lexJulia }, + { "dart", &lexDart }, + { "nimrod", &lexNim }, + { "nim", &lexNim }, + }; + for (int i = 0; i < COUNTOF(interps); ++i) { + int const ilen = (int)StringCchLenA(interps[i].name, 0); + if (nameLen == ilen && StrCmpNIA(bn, interps[i].name, ilen) == 0) { + return interps[i].lexer; } } return NULL; @@ -2507,18 +2577,37 @@ PEDITLEXER Style_SniffShebang(char* pchText) // PEDITLEXER Style_MatchLexer(LPCWSTR lpszMatch, bool bCheckNames) { + // Normalize: strip trailing Emacs/Vim mode suffixes ("-mode", "-script", "-major-mode", "-minor-mode") + // so values like "python-mode", "c++-mode", "markdown-mode" reduce to "python", "c++", "markdown". + WCHAR wchNorm[MICRO_BUFFER]; + StringCchCopy(wchNorm, COUNTOF(wchNorm), lpszMatch); + static const WCHAR* const stripSuffixes[] = { + L"-major-mode", L"-minor-mode", L"-script", L"-mode" + }; + for (int i = 0; i < COUNTOF(stripSuffixes); ++i) { + size_t const sfxLen = StringCchLen(stripSuffixes[i], 0); + size_t const matchLen = StringCchLen(wchNorm, 0); + if (matchLen > sfxLen) { + LPCWSTR const tail = wchNorm + matchLen - sfxLen; + if (StrCmpI(tail, stripSuffixes[i]) == 0) { + wchNorm[matchLen - sfxLen] = L'\0'; + break; + } + } + } + if (bCheckNames) { - int const cch = (int)StringCchLen(lpszMatch, 0); - if (cch >= 3) { + int const cch = (int)StringCchLen(wchNorm, 0); + if (cch >= 2) { for (int iLex = 0; iLex < COUNTOF(g_pLexArray); ++iLex) { - if (StrCmpNI(g_pLexArray[iLex]->pszName, lpszMatch, cch) == 0) { + if (StrCmpNI(g_pLexArray[iLex]->pszName, wchNorm, cch) == 0) { return (g_pLexArray[iLex]); } } } - } else if (StrIsNotEmpty(lpszMatch)) { + } else if (StrIsNotEmpty(wchNorm)) { for (int iLex = 0; iLex < COUNTOF(g_pLexArray); ++iLex) { - if (Style_StrHasAttribute(g_pLexArray[iLex]->szExtensions, lpszMatch)) { + if (Style_StrHasAttribute(g_pLexArray[iLex]->szExtensions, wchNorm)) { return g_pLexArray[iLex]; } } diff --git a/src/TypeDefs.h b/src/TypeDefs.h index a81715e87..eb9e8babd 100644 --- a/src/TypeDefs.h +++ b/src/TypeDefs.h @@ -803,6 +803,7 @@ extern FLAGS_T DefaultFlags; typedef struct SETTINGS2_T { int FileLoadWarningMB; + int FileVarScanBytes; int OpacityLevel; int FindReplaceOpacityLevel; LONG64 FileCheckInterval;