From 8b92003032ecade31d0bbd1da7075e8065b37a1e Mon Sep 17 00:00:00 2001 From: "METANEOCORTEX\\Kotti" Date: Thu, 14 May 2026 12:23:09 +0200 Subject: [PATCH] fix: lexer AHK_L minor issues --- Build/Themes/Dark.ini | 1 + Build/Themes/Obsidian.ini | 1 + CLAUDE.md | 1 + lexilla/lexers_x/LexAHK.cxx | 128 ++++++++++++++++-- lexilla/lexers_x/SciX.iface | 1 + lexilla/lexers_x/SciXLexer.h | 1 + lexilla/lexers_x/StringUtils.h | 11 ++ res/StdDarkModeScheme.ini | 1 + src/StyleLexers/styleLexAHK.c | 1 + .../styleLexAHKL/lexer_ahk_v1_smoke.ahk | 53 +++++++- 10 files changed, 185 insertions(+), 14 deletions(-) diff --git a/Build/Themes/Dark.ini b/Build/Themes/Dark.ini index 758c4771c..615bdc6f4 100644 --- a/Build/Themes/Dark.ini +++ b/Build/Themes/Dark.ini @@ -96,6 +96,7 @@ Register=fore:#75715E FileNameExtensions=asm; s; sx; inc; a51 [AutoHotkey Script] Comment=fore:#008000 +Doc Comment=italic; fore:#3F7F5F Escape=italic; fore:#660000 Syntax Operator=fore:#7F200F Expression Operator=fore:#FF4F00 diff --git a/Build/Themes/Obsidian.ini b/Build/Themes/Obsidian.ini index 96a0467eb..03b6380dd 100644 --- a/Build/Themes/Obsidian.ini +++ b/Build/Themes/Obsidian.ini @@ -96,6 +96,7 @@ Register=fore:#75715E FileNameExtensions=asm; s; sx; inc; a51 [AutoHotkey Script] Comment=fore:#008000 +Doc Comment=italic; fore:#3F7F5F Escape=italic; fore:#660000 Syntax Operator=fore:#7F200F Expression Operator=fore:#FF4F00 diff --git a/CLAUDE.md b/CLAUDE.md index f0ab0cd08..fc707e19a 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -84,6 +84,7 @@ Easy-to-miss touchpoints — derivable but only if you know to look: - **Theme INI sections live under `pszName` (4th `EDITLEXER` field), not the lexer name string.** Each new lexer needs a `[]` block in every theme INI: `Build\Notepad3.ini`, `Build\Themes\*.ini`, `res\StdDarkModeScheme.ini`, locale variants `Build\Notepad3_.ini`. Renaming `pszName` orphans existing user style customizations. - **New style rows need theme INI entries too** — same rule as new lexers, just per-row. Each `EDITLEXER` row's label string (e.g. `L"User Literal"`) needs a matching `User Literal=` line in every theme INI's `[]` block. Without it, the EDITLEXER inline default applies and the row is invisible to theme switching. `lexilla/wscite/*.properties` (one per language) are useful colour references. - **Homebrew lexers in `lexilla/lexers_x/`** (5 files): `LexAHK`, `LexCSV`, `LexJSON5`, `LexKotlin`, `LexVerilog` (`SCLEX_VERILOG` + `SCLEX_SYSVERILOG`). Their `SCE_*_*` enums live in `lexilla/lexers_x/SciXLexer.h`, not the stock `lexilla/include/SciLexer.h` — `#include "lexers_x/SciXLexer.h"` if you need the homebrew constants. `SCLEX_HTML` / `SCLEX_XML` use the upstream `lexilla/lexers/LexHTML.cxx`. +- **Test fixture per lexer**: `test\test_files\StyleLexers\styleLex\` holds the visual smoke-test scripts for each lexer. Any change that affects tokenization (new style, state-machine branch, keyword list, escape handling, fold rule, etc.) MUST either extend the existing fixture in that folder or add a new file there. The fixture exists to be opened in Notepad3 after build and visually verified — automated lexer assertions are not in scope. Folders are named after the EDITLEXER source file (`styleLexAHK.c` → `styleLexAHKL\` is the historical AHK folder; reuse it for AHK changes). When a touchpoint above (new SCE enum, theme row, comment-toggle arm, property arm) goes in, drop a corresponding line/section into the fixture so the next reviewer can see it light up. ## Localization diff --git a/lexilla/lexers_x/LexAHK.cxx b/lexilla/lexers_x/LexAHK.cxx index a14a65eef..5f65f0eb7 100644 --- a/lexilla/lexers_x/LexAHK.cxx +++ b/lexilla/lexers_x/LexAHK.cxx @@ -10,6 +10,7 @@ #include +#include // _stricmp, _strnicmp (MSVC CRT, case-insensitive ASCII) #include #include // @@ -26,6 +27,7 @@ #include "WordList.h" // #include "CharSetX.h" +#include "StringUtils.h" #include "SciXLexer.h" @@ -202,7 +204,10 @@ Sci_Position SCI_METHOD LexerAHK::WordListSet(int n, const char *wl) int firstModification = -1; if (wordListN) { - if (wordListN->Set(wl)) { + // AHK is fully case-insensitive for keywords/commands/directives/etc. + // Load each wordlist lowercased; callers must lowercase currentWord + // before InList() (see Lexilla::ToLowerAscii in StringUtils.h). + if (wordListN->Set(wl, true)) { firstModification = 0; } } @@ -212,6 +217,9 @@ Sci_Position SCI_METHOD LexerAHK::WordListSet(int n, const char *wl) void LexerAHK::HighlightKeyword(char currentWord[], StyleContext& sc) { + // Wordlists are stored lowercase (see WordListSet) — canonicalize lookup. + ToLowerAscii(currentWord); + if (controlFlow.InList(currentWord)) { sc.ChangeState(SCE_AHK_WORD_CF); } @@ -265,6 +273,8 @@ void SCI_METHOD LexerAHK::Lex(Sci_PositionU startPos, Sci_Position lengthDoc, in */ // True if in a continuation section bool bContinuationSection = (initStyle == SCE_AHK_STRING); + // True if the active continuation section is expression-mode (Join opt etc.) + bool bExprContinuation = false; // Indicate if the lexer has seen only spaces since the start of the line bool bOnlySpaces = (!bContinuationSection); // Indicate if since the start of the line, lexer met only legal label chars @@ -278,6 +288,15 @@ void SCI_METHOD LexerAHK::Lex(Sci_PositionU startPos, Sci_Position lengthDoc, in bool bInExprString = false; // To accept A-F chars in a number bool bInHexNumber = false; + // To accept digits after e/E in scientific notation + bool bInExponent = false; + // Inside a Send / SendInput / SendRaw / ControlSend* argument run + // (line-scoped; resets at line start) + bool bInSendArgs = false; + // True if the current identifier started at line start (no preceding non-space). + // Captured at IDENTIFIER entry; used at IDENTIFIER termination to disambiguate + // command-call Send from expression-operand Send. + bool bIdentAtLineStart = false; for (; sc.More(); sc.Forward()) { @@ -306,11 +325,15 @@ void SCI_METHOD LexerAHK::Lex(Sci_PositionU startPos, Sci_Position lengthDoc, in bIsLabel = false; bInExpression = false; // I don't manage multiline expressions yet! bInHexNumber = false; + bInExponent = false; + bInSendArgs = false; + bIdentAtLineStart = false; } // Manage cases occuring in (almost) all states (not in comments) if (sc.state != SCE_AHK_COMMENTLINE && sc.state != SCE_AHK_COMMENTBLOCK && + sc.state != SCE_AHK_COMMENTDOC && !IsASpace(sc.ch)) { if (sc.ch == '`') { // Backtick, escape sequence @@ -377,6 +400,7 @@ void SCI_METHOD LexerAHK::Lex(Sci_PositionU startPos, Sci_Position lengthDoc, in bIsHotkey = true; // Check if it is a known key sc.GetCurrent(currentWord, sizeof(currentWord)); + ToLowerAscii(currentWord); if (keysButtons.InList(currentWord)) { sc.ChangeState(SCE_AHK_WORD_KB); } @@ -398,7 +422,7 @@ void SCI_METHOD LexerAHK::Lex(Sci_PositionU startPos, Sci_Position lengthDoc, in } // Determine if the current state should terminate. - if (sc.state == SCE_AHK_COMMENTLINE) { + if (sc.state == SCE_AHK_COMMENTLINE || sc.state == SCE_AHK_COMMENTDOC) { if (sc.atLineEnd) { sc.SetState(SCE_AHK_DEFAULT); } @@ -416,9 +440,9 @@ void SCI_METHOD LexerAHK::Lex(Sci_PositionU startPos, Sci_Position lengthDoc, in } } else if (sc.state == SCE_AHK_STRING) { - if (bContinuationSection) { + if (bContinuationSection && !bExprContinuation) { if (bOnlySpaces && sc.ch == ')') { - // End of continuation section + // End of literal-string continuation section bContinuationSection = false; sc.SetState(SCE_AHK_SYNOPERATOR); } @@ -452,6 +476,20 @@ void SCI_METHOD LexerAHK::Lex(Sci_PositionU startPos, Sci_Position lengthDoc, in sc.SetState(SCE_AHK_DEFAULT); } } + else if (bInExponent) { + if (!IsADigit(sc.ch)) { + bInExponent = false; + sc.SetState(SCE_AHK_DEFAULT); + } + } + else if ((sc.ch == 'e' || sc.ch == 'E') && + (IsADigit(sc.chNext) || sc.chNext == '+' || sc.chNext == '-')) { + // Scientific notation: enter exponent sub-state, optionally consume sign + bInExponent = true; + if (sc.chNext == '+' || sc.chNext == '-') { + sc.Forward(); + } + } else if (!(IsADigit(sc.ch) || sc.ch == '.')) { sc.SetState(SCE_AHK_DEFAULT); } @@ -460,7 +498,27 @@ void SCI_METHOD LexerAHK::Lex(Sci_PositionU startPos, Sci_Position lengthDoc, in if (!WordChar.Contains(sc.ch)) { sc.GetCurrent(currentWord, sizeof(currentWord)); HighlightKeyword(currentWord, sc); - if (strcmp(currentWord, "if") == 0) { + // AHK_L user-function call: unknown identifier immediately followed by `(` + if (sc.state == SCE_AHK_DEFAULT && sc.ch == '(') { + sc.ChangeState(SCE_AHK_WORD_UD); + } + // Send/SendInput/SendRaw/SendPlay/SendEvent/SendUnicode/ControlSend[Raw] + // — flag the rest of the line so `{`/`}` style as WORD_KB instead of + // SYNOPERATOR (which keeps Fold() from oscillating on key braces). + // Gated on bIdentAtLineStart to avoid false positives like `if (Send)` + // or `MyFunc(Send, ...)` where Send is a variable, not a command call. + if (sc.state == SCE_AHK_WORD_CMD && bIdentAtLineStart) { + if (_strnicmp(currentWord, "Send", 4) == 0 || + _strnicmp(currentWord, "ControlSend", 11) == 0) { + bInSendArgs = true; + } + } + // AHK keywords are case-insensitive; flow-of-control `if` and the + // `#If ` directive both put the rest of the line in expression + // mode. `#IfWin*` variants take a WinTitle, NOT an expression — they + // are excluded by the exact match on "#If". + if (_stricmp(currentWord, "if") == 0 || + _stricmp(currentWord, "#If") == 0) { bInExpression = true; } sc.SetState(SCE_AHK_DEFAULT); @@ -470,6 +528,7 @@ void SCI_METHOD LexerAHK::Lex(Sci_PositionU startPos, Sci_Position lengthDoc, in if (sc.ch == '%') { // End of variable reference sc.GetCurrent(currentWord, sizeof(currentWord)); + ToLowerAscii(currentWord); if (variables.InList(currentWord)) { sc.ChangeState(SCE_AHK_VARREFKW); } @@ -506,8 +565,13 @@ void SCI_METHOD LexerAHK::Lex(Sci_PositionU startPos, Sci_Position lengthDoc, in if (sc.state == SCE_AHK_DEFAULT) { if (sc.ch == ';' && (bOnlySpaces || IsASpace(sc.chPrev))) { - // Line comments are alone on the line or are preceded by a space - sc.SetState(SCE_AHK_COMMENTLINE); + // Line comments are alone on the line or are preceded by a space. + // AHK_L doc-comment convention: ";@" prefix (e.g. ;@param, ;@returns) + if (sc.chNext == '@') { + sc.SetState(SCE_AHK_COMMENTDOC); + } else { + sc.SetState(SCE_AHK_COMMENTLINE); + } } else if (bOnlySpaces && sc.Match('/', '*')) { // Comment at start of line (skipping white space) @@ -515,14 +579,52 @@ void SCI_METHOD LexerAHK::Lex(Sci_PositionU startPos, Sci_Position lengthDoc, in sc.Forward(); } else if (sc.ch == '{' || sc.ch == '}') { - // Code block or special key {Enter} + // Inside Send-args: style as WORD_KB so Fold() (gated on SYNOPERATOR) + // doesn't oscillate on key-sequence braces — `Send {Tab}{Enter 3}`, + // `Send {{}` (literal `{`), etc. Outside Send-args these are code-block + // braces and need to stay SYNOPERATOR for folding to work. + if (bInSendArgs) { + sc.SetState(SCE_AHK_WORD_KB); + nextState = SCE_AHK_DEFAULT; + } else { + sc.SetState(SCE_AHK_SYNOPERATOR); + } + } + else if (bExprContinuation && bContinuationSection && bOnlySpaces && sc.ch == ')') { + // End of expression-mode continuation section (parallel to STRING-mode + // end-detection at the SCE_AHK_STRING handler above). + bContinuationSection = false; + bExprContinuation = false; sc.SetState(SCE_AHK_SYNOPERATOR); + nextState = SCE_AHK_DEFAULT; } else if (bOnlySpaces && sc.ch == '(') { - // Continuation section + // Continuation section. AHK_L: with explicit `Join` option the body is + // an expression continuation; otherwise it's a multi-line string literal. bContinuationSection = true; + bExprContinuation = false; + { + // Scan rest of the `(` line (case-insensitive) for whole-word "Join". + Sci_PositionU const docEnd = startPos + lengthDoc; + for (Sci_PositionU p = sc.currentPos + 1; p < docEnd; ++p) { + const char c0 = styler.SafeGetCharAt(p); + if (c0 == '\n' || c0 == '\r') break; + if (((c0 & 0xDF) == 'J') && + ((styler.SafeGetCharAt(p+1) & 0xDF) == 'O') && + ((styler.SafeGetCharAt(p+2) & 0xDF) == 'I') && + ((styler.SafeGetCharAt(p+3) & 0xDF) == 'N')) { + // Require whole-word boundary on both sides + const char cPrev = (p == 0) ? ' ' : styler.SafeGetCharAt(p - 1); + const char cNext = styler.SafeGetCharAt(p + 4); + if (!IsAlphaNumeric(cPrev) && !IsAlphaNumeric(cNext)) { + bExprContinuation = true; + break; + } + } + } + } sc.SetState(SCE_AHK_SYNOPERATOR); - nextState = SCE_AHK_STRING; // !!! Can be an expression! + nextState = bExprContinuation ? SCE_AHK_DEFAULT : SCE_AHK_STRING; } else if (sc.Match(':', '=') || sc.Match('+', '=') || @@ -535,6 +637,11 @@ void SCI_METHOD LexerAHK::Lex(Sci_PositionU startPos, Sci_Position lengthDoc, in sc.Forward(); nextState = SCE_AHK_DEFAULT; } + else if (sc.ch == '.' && WordChar.Contains(sc.chPrev) && WordChar.Contains(sc.chNext)) { + // AHK_L member access: ident.ident (distinct from concat/expr operator) + sc.SetState(SCE_AHK_SYNOPERATOR); + nextState = SCE_AHK_DEFAULT; + } else if (ExpOperator.Contains(sc.ch)) { sc.SetState(SCE_AHK_EXPOPERATOR); } @@ -552,6 +659,7 @@ void SCI_METHOD LexerAHK::Lex(Sci_PositionU startPos, Sci_Position lengthDoc, in sc.SetState(SCE_AHK_NUMBER); } else if (WordChar.Contains(sc.ch)) { + bIdentAtLineStart = bOnlySpaces; sc.SetState(SCE_AHK_IDENTIFIER); } else if (sc.ch == ',') { diff --git a/lexilla/lexers_x/SciX.iface b/lexilla/lexers_x/SciX.iface index b5b72504c..539866d79 100644 --- a/lexilla/lexers_x/SciX.iface +++ b/lexilla/lexers_x/SciX.iface @@ -34,6 +34,7 @@ val SCE_AHK_WORD_UD=18 val SCE_AHK_VARREFKW=19 val SCE_AHK_ERROR=20 val SCE_AHK_HOTSTRINGOPT=21 +val SCE_AHK_COMMENTDOC=22 # Lexical states for SCLEX_KOTLIN diff --git a/lexilla/lexers_x/SciXLexer.h b/lexilla/lexers_x/SciXLexer.h index dd50e5ab6..42081424d 100644 --- a/lexilla/lexers_x/SciXLexer.h +++ b/lexilla/lexers_x/SciXLexer.h @@ -35,6 +35,7 @@ #define SCE_AHK_VARREFKW 19 #define SCE_AHK_ERROR 20 #define SCE_AHK_HOTSTRINGOPT 21 +#define SCE_AHK_COMMENTDOC 22 #define SCE_CSV_DEFAULT 0 diff --git a/lexilla/lexers_x/StringUtils.h b/lexilla/lexers_x/StringUtils.h index 94d186720..7cc42c90b 100644 --- a/lexilla/lexers_x/StringUtils.h +++ b/lexilla/lexers_x/StringUtils.h @@ -34,6 +34,17 @@ inline bool StrHasSuffix(const char *s, size_t length, const char (&suffix)[N]) return length >= N - 1 && strcmp(s + (length + 1 - N), suffix) == 0; } +// In-place ASCII lowercase. Intended for canonicalizing identifiers before +// case-insensitive WordList::InList lookups in case-insensitive languages +// (AHK, etc.). Non-ASCII bytes are left untouched. +inline void ToLowerAscii(char *s) noexcept { + for (; *s; ++s) { + if (*s >= 'A' && *s <= 'Z') { + *s = static_cast(*s + 32); + } + } +} + #if defined(__clang__) || defined(__GNUC__) || !defined(_MSC_BUILD)// || (_MSC_VER >= 1920) template diff --git a/res/StdDarkModeScheme.ini b/res/StdDarkModeScheme.ini index b05061718..e194bddfc 100644 --- a/res/StdDarkModeScheme.ini +++ b/res/StdDarkModeScheme.ini @@ -100,6 +100,7 @@ Directive Operand=fore:#B4E1F5 Register=fore:#F27E0D [AutoHotkey Script] Comment=fore:#B7B7B7 +Doc Comment=italic; fore:#A0E5A0 Escape=italic; fore:#F98488 Syntax Operator=fore:#B02D15 Expression Operator=fore:#FF4F00 diff --git a/src/StyleLexers/styleLexAHK.c b/src/StyleLexers/styleLexAHK.c index f38e0e0bc..3f6e1df0a 100644 --- a/src/StyleLexers/styleLexAHK.c +++ b/src/StyleLexers/styleLexAHK.c @@ -160,6 +160,7 @@ EDITLEXER lexAHK = { { {STYLE_DEFAULT}, IDS_LEX_STR_Default, L"Default", L"", L"" }, //{ SCE_AHK_DEFAULT, IDS_LEX_STR_Default, L"Default", L"", L"" }, { {MULTI_STYLE(SCE_AHK_COMMENTLINE,SCE_AHK_COMMENTBLOCK,0,0)}, IDS_LEX_STR_Comment, L"Comment", L"fore:#008000", L"" }, + { {SCE_AHK_COMMENTDOC}, IDS_LEX_STR_DocCmt, L"Doc Comment", L"italic; fore:#3F7F5F", L"" }, { {SCE_AHK_ESCAPE}, IDS_LEX_STR_Escape, L"Escape", L"fore:#FF8000", L"" }, { {SCE_AHK_SYNOPERATOR}, IDS_LEX_STR_SyntaxOp, L"Syntax Operator", L"fore:#7F200F", L"" }, { {SCE_AHK_EXPOPERATOR}, IDS_LEX_STR_63308, L"Expression Operator", L"fore:#FF4F00", L"" }, diff --git a/test/test_files/StyleLexers/styleLexAHKL/lexer_ahk_v1_smoke.ahk b/test/test_files/StyleLexers/styleLexAHKL/lexer_ahk_v1_smoke.ahk index 2b697683c..62f810747 100644 --- a/test/test_files/StyleLexers/styleLexAHKL/lexer_ahk_v1_smoke.ahk +++ b/test/test_files/StyleLexers/styleLexAHKL/lexer_ahk_v1_smoke.ahk @@ -13,7 +13,8 @@ #NoEnv #SingleInstance Force #Include %A_ScriptDir%\lib.ahk ; var deref inside a directive arg -#IfWinActive ahk_class Notepad ; conditional-directive context +#IfWinActive ahk_class Notepad ; conditional-directive context (WinTitle, NOT expression) +#If WinActive("Notepad") && A_Hour > 9 ; #If expression context (plan §6 D2) ; --- escape character + special chars in strings ------------------------- str := "hello`tworld`n""quoted`""" ; backtick escapes, doubled quote @@ -24,8 +25,9 @@ ints := 42 hex := 0xFF flt := 3.14 neg := -7 -; lexer currently lumps `1e5` as identifier; see plan §3 (scientific notation) -;sci := 1e5 +sci1 := 1e5 ; scientific notation (plan §6 L4) +sci2 := 1.5e-3 +sci3 := 0.5E+10 ; --- expression assignment + multi-char operators ------------------------ result := 3 + 4 * (2 ** 8) @@ -46,10 +48,20 @@ MsgBox, Hello, %name%! ; legacy command + %var% deref ; --- hotkeys ------------------------------------------------------------ ^!a:: ; Ctrl+Alt+a - Send {Tab}{Enter 3} ; send-key braces (plan §2.7 / §4) + Send {Tab}{Enter 3} ; send-key braces -> WORD_KB, no fold (plan §6 B3) + Send {{}MyVar{}} ; literal `{` / `}` send (unbalanced count without B3) + SendInput {F5}{F10} ; SendInput recognised + ControlSend, , {Enter}, ahk_class Notepad Run notepad.exe return +; B3 false-positive guard: Send used as a variable/identifier, NOT a command call. +; The `{` after `if (Send)` must stay SYNOPERATOR so the code block folds correctly. +if (Send) { + MsgBox, var-mode Send +} +result := MyFunc(Send, otherVar) ; Send as function arg, not command + ~LButton & RButton:: ; custom combo with modifiers Tooltip, Combo! return @@ -68,6 +80,39 @@ try { MsgBox, % "caught: " e } +; --- continuation sections (plan §6 D1) --------------------------------- +; literal-string continuation (default — body styles as STRING) +msg := +( +Line one of the message. +Line two with `tcontrol chars. +Last line. +) + +; expression-mode continuation via Join option — body styles as expression +total := 0 + +(Join + 1 + 2 + + 3 * 4 + + StrLen("hello") +) + +; --- doc-comment style ;@ (AHK_L convention, plan §6 L3) ---------------- +;@function MakePoint +;@param x - the x coordinate +;@param y - the y coordinate +;@returns Point object +MakePoint(x, y) { + return {x: x, y: y} +} + +; --- reference operator &var (plan §6 L5) ------------------------------- +GetSize(&outW, &outH) { ; ByRef parameters + outW := 800 + outH := 600 +} +GetSize(&w, &h) + ; --- class / extends / method / static (v1.1+) ------------------------- class Animal { static count := 0