fix: lexer AHK_L minor issues

This commit is contained in:
METANEOCORTEX\Kotti 2026-05-14 12:23:09 +02:00
parent abad6b2d92
commit 8b92003032
10 changed files with 185 additions and 14 deletions

View File

@ -96,6 +96,7 @@ Register=fore:#75715E
FileNameExtensions=asm; s; sx; inc; a51
[AutoHotkey Script]
Comment=fore:#008000
Doc Comment=italic; fore:#3F7F5F
Escape=italic; fore:#660000
Syntax Operator=fore:#7F200F
Expression Operator=fore:#FF4F00

View File

@ -96,6 +96,7 @@ Register=fore:#75715E
FileNameExtensions=asm; s; sx; inc; a51
[AutoHotkey Script]
Comment=fore:#008000
Doc Comment=italic; fore:#3F7F5F
Escape=italic; fore:#660000
Syntax Operator=fore:#7F200F
Expression Operator=fore:#FF4F00

View File

@ -84,6 +84,7 @@ Easy-to-miss touchpoints — derivable but only if you know to look:
- **Theme INI sections live under `pszName` (4th `EDITLEXER` field), not the lexer name string.** Each new lexer needs a `[<pszName>]` block in every theme INI: `Build\Notepad3.ini`, `Build\Themes\*.ini`, `res\StdDarkModeScheme.ini`, locale variants `Build\Notepad3_<locale>.ini`. Renaming `pszName` orphans existing user style customizations.
- **New style rows need theme INI entries too** — same rule as new lexers, just per-row. Each `EDITLEXER` row's label string (e.g. `L"User Literal"`) needs a matching `User Literal=<spec>` line in every theme INI's `[<pszName>]` block. Without it, the EDITLEXER inline default applies and the row is invisible to theme switching. `lexilla/wscite/*.properties` (one per language) are useful colour references.
- **Homebrew lexers in `lexilla/lexers_x/`** (5 files): `LexAHK`, `LexCSV`, `LexJSON5`, `LexKotlin`, `LexVerilog` (`SCLEX_VERILOG` + `SCLEX_SYSVERILOG`). Their `SCE_*_*` enums live in `lexilla/lexers_x/SciXLexer.h`, not the stock `lexilla/include/SciLexer.h``#include "lexers_x/SciXLexer.h"` if you need the homebrew constants. `SCLEX_HTML` / `SCLEX_XML` use the upstream `lexilla/lexers/LexHTML.cxx`.
- **Test fixture per lexer**: `test\test_files\StyleLexers\styleLex<NAME>\` holds the visual smoke-test scripts for each lexer. Any change that affects tokenization (new style, state-machine branch, keyword list, escape handling, fold rule, etc.) MUST either extend the existing fixture in that folder or add a new file there. The fixture exists to be opened in Notepad3 after build and visually verified — automated lexer assertions are not in scope. Folders are named after the EDITLEXER source file (`styleLexAHK.c` → `styleLexAHKL\` is the historical AHK folder; reuse it for AHK changes). When a touchpoint above (new SCE enum, theme row, comment-toggle arm, property arm) goes in, drop a corresponding line/section into the fixture so the next reviewer can see it light up.
## Localization

View File

@ -10,6 +10,7 @@
#include <string>
#include <string.h> // _stricmp, _strnicmp (MSVC CRT, case-insensitive ASCII)
#include <assert.h>
#include <map>
//
@ -26,6 +27,7 @@
#include "WordList.h"
//
#include "CharSetX.h"
#include "StringUtils.h"
#include "SciXLexer.h"
@ -202,7 +204,10 @@ Sci_Position SCI_METHOD LexerAHK::WordListSet(int n, const char *wl)
int firstModification = -1;
if (wordListN) {
if (wordListN->Set(wl)) {
// AHK is fully case-insensitive for keywords/commands/directives/etc.
// Load each wordlist lowercased; callers must lowercase currentWord
// before InList() (see Lexilla::ToLowerAscii in StringUtils.h).
if (wordListN->Set(wl, true)) {
firstModification = 0;
}
}
@ -212,6 +217,9 @@ Sci_Position SCI_METHOD LexerAHK::WordListSet(int n, const char *wl)
void LexerAHK::HighlightKeyword(char currentWord[], StyleContext& sc) {
// Wordlists are stored lowercase (see WordListSet) — canonicalize lookup.
ToLowerAscii(currentWord);
if (controlFlow.InList(currentWord)) {
sc.ChangeState(SCE_AHK_WORD_CF);
}
@ -265,6 +273,8 @@ void SCI_METHOD LexerAHK::Lex(Sci_PositionU startPos, Sci_Position lengthDoc, in
*/
// True if in a continuation section
bool bContinuationSection = (initStyle == SCE_AHK_STRING);
// True if the active continuation section is expression-mode (Join opt etc.)
bool bExprContinuation = false;
// Indicate if the lexer has seen only spaces since the start of the line
bool bOnlySpaces = (!bContinuationSection);
// Indicate if since the start of the line, lexer met only legal label chars
@ -278,6 +288,15 @@ void SCI_METHOD LexerAHK::Lex(Sci_PositionU startPos, Sci_Position lengthDoc, in
bool bInExprString = false;
// To accept A-F chars in a number
bool bInHexNumber = false;
// To accept digits after e/E in scientific notation
bool bInExponent = false;
// Inside a Send / SendInput / SendRaw / ControlSend* argument run
// (line-scoped; resets at line start)
bool bInSendArgs = false;
// True if the current identifier started at line start (no preceding non-space).
// Captured at IDENTIFIER entry; used at IDENTIFIER termination to disambiguate
// command-call Send from expression-operand Send.
bool bIdentAtLineStart = false;
for (; sc.More(); sc.Forward()) {
@ -306,11 +325,15 @@ void SCI_METHOD LexerAHK::Lex(Sci_PositionU startPos, Sci_Position lengthDoc, in
bIsLabel = false;
bInExpression = false; // I don't manage multiline expressions yet!
bInHexNumber = false;
bInExponent = false;
bInSendArgs = false;
bIdentAtLineStart = false;
}
// Manage cases occuring in (almost) all states (not in comments)
if (sc.state != SCE_AHK_COMMENTLINE &&
sc.state != SCE_AHK_COMMENTBLOCK &&
sc.state != SCE_AHK_COMMENTDOC &&
!IsASpace(sc.ch)) {
if (sc.ch == '`') {
// Backtick, escape sequence
@ -377,6 +400,7 @@ void SCI_METHOD LexerAHK::Lex(Sci_PositionU startPos, Sci_Position lengthDoc, in
bIsHotkey = true;
// Check if it is a known key
sc.GetCurrent(currentWord, sizeof(currentWord));
ToLowerAscii(currentWord);
if (keysButtons.InList(currentWord)) {
sc.ChangeState(SCE_AHK_WORD_KB);
}
@ -398,7 +422,7 @@ void SCI_METHOD LexerAHK::Lex(Sci_PositionU startPos, Sci_Position lengthDoc, in
}
// Determine if the current state should terminate.
if (sc.state == SCE_AHK_COMMENTLINE) {
if (sc.state == SCE_AHK_COMMENTLINE || sc.state == SCE_AHK_COMMENTDOC) {
if (sc.atLineEnd) {
sc.SetState(SCE_AHK_DEFAULT);
}
@ -416,9 +440,9 @@ void SCI_METHOD LexerAHK::Lex(Sci_PositionU startPos, Sci_Position lengthDoc, in
}
}
else if (sc.state == SCE_AHK_STRING) {
if (bContinuationSection) {
if (bContinuationSection && !bExprContinuation) {
if (bOnlySpaces && sc.ch == ')') {
// End of continuation section
// End of literal-string continuation section
bContinuationSection = false;
sc.SetState(SCE_AHK_SYNOPERATOR);
}
@ -452,6 +476,20 @@ void SCI_METHOD LexerAHK::Lex(Sci_PositionU startPos, Sci_Position lengthDoc, in
sc.SetState(SCE_AHK_DEFAULT);
}
}
else if (bInExponent) {
if (!IsADigit(sc.ch)) {
bInExponent = false;
sc.SetState(SCE_AHK_DEFAULT);
}
}
else if ((sc.ch == 'e' || sc.ch == 'E') &&
(IsADigit(sc.chNext) || sc.chNext == '+' || sc.chNext == '-')) {
// Scientific notation: enter exponent sub-state, optionally consume sign
bInExponent = true;
if (sc.chNext == '+' || sc.chNext == '-') {
sc.Forward();
}
}
else if (!(IsADigit(sc.ch) || sc.ch == '.')) {
sc.SetState(SCE_AHK_DEFAULT);
}
@ -460,7 +498,27 @@ void SCI_METHOD LexerAHK::Lex(Sci_PositionU startPos, Sci_Position lengthDoc, in
if (!WordChar.Contains(sc.ch)) {
sc.GetCurrent(currentWord, sizeof(currentWord));
HighlightKeyword(currentWord, sc);
if (strcmp(currentWord, "if") == 0) {
// AHK_L user-function call: unknown identifier immediately followed by `(`
if (sc.state == SCE_AHK_DEFAULT && sc.ch == '(') {
sc.ChangeState(SCE_AHK_WORD_UD);
}
// Send/SendInput/SendRaw/SendPlay/SendEvent/SendUnicode/ControlSend[Raw]
// — flag the rest of the line so `{`/`}` style as WORD_KB instead of
// SYNOPERATOR (which keeps Fold() from oscillating on key braces).
// Gated on bIdentAtLineStart to avoid false positives like `if (Send)`
// or `MyFunc(Send, ...)` where Send is a variable, not a command call.
if (sc.state == SCE_AHK_WORD_CMD && bIdentAtLineStart) {
if (_strnicmp(currentWord, "Send", 4) == 0 ||
_strnicmp(currentWord, "ControlSend", 11) == 0) {
bInSendArgs = true;
}
}
// AHK keywords are case-insensitive; flow-of-control `if` and the
// `#If <expr>` directive both put the rest of the line in expression
// mode. `#IfWin*` variants take a WinTitle, NOT an expression — they
// are excluded by the exact match on "#If".
if (_stricmp(currentWord, "if") == 0 ||
_stricmp(currentWord, "#If") == 0) {
bInExpression = true;
}
sc.SetState(SCE_AHK_DEFAULT);
@ -470,6 +528,7 @@ void SCI_METHOD LexerAHK::Lex(Sci_PositionU startPos, Sci_Position lengthDoc, in
if (sc.ch == '%') {
// End of variable reference
sc.GetCurrent(currentWord, sizeof(currentWord));
ToLowerAscii(currentWord);
if (variables.InList(currentWord)) {
sc.ChangeState(SCE_AHK_VARREFKW);
}
@ -506,8 +565,13 @@ void SCI_METHOD LexerAHK::Lex(Sci_PositionU startPos, Sci_Position lengthDoc, in
if (sc.state == SCE_AHK_DEFAULT) {
if (sc.ch == ';' &&
(bOnlySpaces || IsASpace(sc.chPrev))) {
// Line comments are alone on the line or are preceded by a space
sc.SetState(SCE_AHK_COMMENTLINE);
// Line comments are alone on the line or are preceded by a space.
// AHK_L doc-comment convention: ";@" prefix (e.g. ;@param, ;@returns)
if (sc.chNext == '@') {
sc.SetState(SCE_AHK_COMMENTDOC);
} else {
sc.SetState(SCE_AHK_COMMENTLINE);
}
}
else if (bOnlySpaces && sc.Match('/', '*')) {
// Comment at start of line (skipping white space)
@ -515,14 +579,52 @@ void SCI_METHOD LexerAHK::Lex(Sci_PositionU startPos, Sci_Position lengthDoc, in
sc.Forward();
}
else if (sc.ch == '{' || sc.ch == '}') {
// Code block or special key {Enter}
// Inside Send-args: style as WORD_KB so Fold() (gated on SYNOPERATOR)
// doesn't oscillate on key-sequence braces — `Send {Tab}{Enter 3}`,
// `Send {{}` (literal `{`), etc. Outside Send-args these are code-block
// braces and need to stay SYNOPERATOR for folding to work.
if (bInSendArgs) {
sc.SetState(SCE_AHK_WORD_KB);
nextState = SCE_AHK_DEFAULT;
} else {
sc.SetState(SCE_AHK_SYNOPERATOR);
}
}
else if (bExprContinuation && bContinuationSection && bOnlySpaces && sc.ch == ')') {
// End of expression-mode continuation section (parallel to STRING-mode
// end-detection at the SCE_AHK_STRING handler above).
bContinuationSection = false;
bExprContinuation = false;
sc.SetState(SCE_AHK_SYNOPERATOR);
nextState = SCE_AHK_DEFAULT;
}
else if (bOnlySpaces && sc.ch == '(') {
// Continuation section
// Continuation section. AHK_L: with explicit `Join` option the body is
// an expression continuation; otherwise it's a multi-line string literal.
bContinuationSection = true;
bExprContinuation = false;
{
// Scan rest of the `(` line (case-insensitive) for whole-word "Join".
Sci_PositionU const docEnd = startPos + lengthDoc;
for (Sci_PositionU p = sc.currentPos + 1; p < docEnd; ++p) {
const char c0 = styler.SafeGetCharAt(p);
if (c0 == '\n' || c0 == '\r') break;
if (((c0 & 0xDF) == 'J') &&
((styler.SafeGetCharAt(p+1) & 0xDF) == 'O') &&
((styler.SafeGetCharAt(p+2) & 0xDF) == 'I') &&
((styler.SafeGetCharAt(p+3) & 0xDF) == 'N')) {
// Require whole-word boundary on both sides
const char cPrev = (p == 0) ? ' ' : styler.SafeGetCharAt(p - 1);
const char cNext = styler.SafeGetCharAt(p + 4);
if (!IsAlphaNumeric(cPrev) && !IsAlphaNumeric(cNext)) {
bExprContinuation = true;
break;
}
}
}
}
sc.SetState(SCE_AHK_SYNOPERATOR);
nextState = SCE_AHK_STRING; // !!! Can be an expression!
nextState = bExprContinuation ? SCE_AHK_DEFAULT : SCE_AHK_STRING;
}
else if (sc.Match(':', '=') ||
sc.Match('+', '=') ||
@ -535,6 +637,11 @@ void SCI_METHOD LexerAHK::Lex(Sci_PositionU startPos, Sci_Position lengthDoc, in
sc.Forward();
nextState = SCE_AHK_DEFAULT;
}
else if (sc.ch == '.' && WordChar.Contains(sc.chPrev) && WordChar.Contains(sc.chNext)) {
// AHK_L member access: ident.ident (distinct from concat/expr operator)
sc.SetState(SCE_AHK_SYNOPERATOR);
nextState = SCE_AHK_DEFAULT;
}
else if (ExpOperator.Contains(sc.ch)) {
sc.SetState(SCE_AHK_EXPOPERATOR);
}
@ -552,6 +659,7 @@ void SCI_METHOD LexerAHK::Lex(Sci_PositionU startPos, Sci_Position lengthDoc, in
sc.SetState(SCE_AHK_NUMBER);
}
else if (WordChar.Contains(sc.ch)) {
bIdentAtLineStart = bOnlySpaces;
sc.SetState(SCE_AHK_IDENTIFIER);
}
else if (sc.ch == ',') {

View File

@ -34,6 +34,7 @@ val SCE_AHK_WORD_UD=18
val SCE_AHK_VARREFKW=19
val SCE_AHK_ERROR=20
val SCE_AHK_HOTSTRINGOPT=21
val SCE_AHK_COMMENTDOC=22
# Lexical states for SCLEX_KOTLIN

View File

@ -35,6 +35,7 @@
#define SCE_AHK_VARREFKW 19
#define SCE_AHK_ERROR 20
#define SCE_AHK_HOTSTRINGOPT 21
#define SCE_AHK_COMMENTDOC 22
#define SCE_CSV_DEFAULT 0

View File

@ -34,6 +34,17 @@ inline bool StrHasSuffix(const char *s, size_t length, const char (&suffix)[N])
return length >= N - 1 && strcmp(s + (length + 1 - N), suffix) == 0;
}
// In-place ASCII lowercase. Intended for canonicalizing identifiers before
// case-insensitive WordList::InList lookups in case-insensitive languages
// (AHK, etc.). Non-ASCII bytes are left untouched.
inline void ToLowerAscii(char *s) noexcept {
for (; *s; ++s) {
if (*s >= 'A' && *s <= 'Z') {
*s = static_cast<char>(*s + 32);
}
}
}
#if defined(__clang__) || defined(__GNUC__) || !defined(_MSC_BUILD)// || (_MSC_VER >= 1920)
template <size_t N>

View File

@ -100,6 +100,7 @@ Directive Operand=fore:#B4E1F5
Register=fore:#F27E0D
[AutoHotkey Script]
Comment=fore:#B7B7B7
Doc Comment=italic; fore:#A0E5A0
Escape=italic; fore:#F98488
Syntax Operator=fore:#B02D15
Expression Operator=fore:#FF4F00

View File

@ -160,6 +160,7 @@ EDITLEXER lexAHK = {
{ {STYLE_DEFAULT}, IDS_LEX_STR_Default, L"Default", L"", L"" },
//{ SCE_AHK_DEFAULT, IDS_LEX_STR_Default, L"Default", L"", L"" },
{ {MULTI_STYLE(SCE_AHK_COMMENTLINE,SCE_AHK_COMMENTBLOCK,0,0)}, IDS_LEX_STR_Comment, L"Comment", L"fore:#008000", L"" },
{ {SCE_AHK_COMMENTDOC}, IDS_LEX_STR_DocCmt, L"Doc Comment", L"italic; fore:#3F7F5F", L"" },
{ {SCE_AHK_ESCAPE}, IDS_LEX_STR_Escape, L"Escape", L"fore:#FF8000", L"" },
{ {SCE_AHK_SYNOPERATOR}, IDS_LEX_STR_SyntaxOp, L"Syntax Operator", L"fore:#7F200F", L"" },
{ {SCE_AHK_EXPOPERATOR}, IDS_LEX_STR_63308, L"Expression Operator", L"fore:#FF4F00", L"" },

View File

@ -13,7 +13,8 @@
#NoEnv
#SingleInstance Force
#Include %A_ScriptDir%\lib.ahk ; var deref inside a directive arg
#IfWinActive ahk_class Notepad ; conditional-directive context
#IfWinActive ahk_class Notepad ; conditional-directive context (WinTitle, NOT expression)
#If WinActive("Notepad") && A_Hour > 9 ; #If expression context (plan §6 D2)
; --- escape character + special chars in strings -------------------------
str := "hello`tworld`n""quoted`""" ; backtick escapes, doubled quote
@ -24,8 +25,9 @@ ints := 42
hex := 0xFF
flt := 3.14
neg := -7
; lexer currently lumps `1e5` as identifier; see plan §3 (scientific notation)
;sci := 1e5
sci1 := 1e5 ; scientific notation (plan §6 L4)
sci2 := 1.5e-3
sci3 := 0.5E+10
; --- expression assignment + multi-char operators ------------------------
result := 3 + 4 * (2 ** 8)
@ -46,10 +48,20 @@ MsgBox, Hello, %name%! ; legacy command + %var% deref
; --- hotkeys ------------------------------------------------------------
^!a:: ; Ctrl+Alt+a
Send {Tab}{Enter 3} ; send-key braces (plan §2.7 / §4)
Send {Tab}{Enter 3} ; send-key braces -> WORD_KB, no fold (plan §6 B3)
Send {{}MyVar{}} ; literal `{` / `}` send (unbalanced count without B3)
SendInput {F5}{F10} ; SendInput recognised
ControlSend, , {Enter}, ahk_class Notepad
Run notepad.exe
return
; B3 false-positive guard: Send used as a variable/identifier, NOT a command call.
; The `{` after `if (Send)` must stay SYNOPERATOR so the code block folds correctly.
if (Send) {
MsgBox, var-mode Send
}
result := MyFunc(Send, otherVar) ; Send as function arg, not command
~LButton & RButton:: ; custom combo with modifiers
Tooltip, Combo!
return
@ -68,6 +80,39 @@ try {
MsgBox, % "caught: " e
}
; --- continuation sections (plan §6 D1) ---------------------------------
; literal-string continuation (default — body styles as STRING)
msg :=
(
Line one of the message.
Line two with `tcontrol chars.
Last line.
)
; expression-mode continuation via Join option — body styles as expression
total := 0 +
(Join
1 + 2
+ 3 * 4
+ StrLen("hello")
)
; --- doc-comment style ;@ (AHK_L convention, plan §6 L3) ----------------
;@function MakePoint
;@param x - the x coordinate
;@param y - the y coordinate
;@returns Point object
MakePoint(x, y) {
return {x: x, y: y}
}
; --- reference operator &var (plan §6 L5) -------------------------------
GetSize(&outW, &outH) { ; ByRef parameters
outW := 800
outH := 600
}
GetSize(&w, &h)
; --- class / extends / method / static (v1.1+) -------------------------
class Animal {
static count := 0