Notepad3/test/test_files/StyleLexers/styleLexJSON/Sample_JSON5_extended.json
2026-05-11 14:23:32 +02:00

250 lines
10 KiB
JSON
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// =====================================================================
// Extended JSON5 / JSON-LD test file for the Notepad3 homebrew JSON lexer
// (lexilla/lexers_x/LexJSON.cxx).
//
// Requires both lexer properties enabled to exercise every branch:
// lexer.json.allow.comments = 1
// lexer.json.escape.sequence = 1
//
// Each section is annotated with the lexer feature it targets so a failed
// highlight is easy to attribute to a specific code path.
// =====================================================================
/* ---------------------------------------------------------------
Stream / block comment. Multiple lines, with stray * and / chars
inside (separated, so they cannot form a close marker).
--------------------------------------------------------------- */
{
// ---------- Strings: quoting variants ----------
doubleQuoted: "plain double-quoted string",
singleQuoted: 'plain single-quoted string',
mixedInside1: "contains 'single quotes' inside double",
mixedInside2: 'contains "double quotes" inside single',
emptyDouble: "",
emptySingle: '',
// ---------- Strings: line continuation (backslash + EOL) ----------
lineContLF: "first line\
second line\
third line",
lineContSingle: 'wrap with\
single quotes too',
// ---------- Strings: standard escape sequences ----------
// (lexer.json.escape.sequence = 1 to highlight these)
escSimple: "tab=\t nl=\n cr=\r bs=\b ff=\f vt=\v null=\0 slash=\/ bslash=\\ quote=\" apos=\'",
escUnicode: "äöü € 껮 꿾",
escSingleStr: 'mix: \té\\done',
// ---------- Property names: quoted vs unquoted ----------
unquotedKey: 'JSON5 allows bare property names',
"quotedKey": "classic JSON",
'singleKey': 'single-quoted property name',
$dollar: 1,
_underscore: 2,
mixed123name: 3,
// ---------- Numbers: integers, signs, decimals ----------
zero: 0,
posInt: 42,
negInt: -42,
posSign: +7,
decimal: 3.14159,
leadingDot: .5,
trailingDot: 5. ,
// Tightened dot-part rule (E1): "1 . 0" is no longer a valid number.
// Uncomment to verify it now highlights as ERROR (was NUMBER before E1):
//tightDot: 1 . 0,
// ---------- Numbers: exponent / scientific ----------
exp1: 1e10,
exp2: 1.5E-3,
exp3: 2.5e+12,
expNeg: -6.022e23,
// ---------- Numbers: hexadecimal (homebrew-only) ----------
hexLower: 0xff,
hexMixed: 0xDeCaF,
hexBig: 0xCAFEBABE,
// ---------- Keywords ----------
yes: true,
no: false,
nothing: null,
inf: Infinity,
posInf: +Infinity,
negInf: -Infinity,
notNum: NaN,
// ---------- Trailing commas (objects + arrays) ----------
trailingInObj: { a: 1, b: 2, },
trailingInArr: [ 1, 2, 3, ],
emptyArr: [],
emptyObj: {},
// ---------- URL hotspots inside strings ----------
urlHttps: "https://example.com/path?q=1#frag",
urlHttp: "http://example.com/",
urlFtp: "ftp://files.example.com/dir/",
urlSsh: "ssh://git@example.com/repo.git",
urlGit: "git://example.com/repo.git",
urlSvn: "svn://svn.example.com/trunk",
urlMailto: "mailto:user@example.com",
// ---------- URL with embedded escape (exercises the URI/LDKEYWORD escape fix) ----------
// Without the fix the URI style would terminate at '\' and the rest of
// the string would be mis-styled. With the fix, 'é' is recognized
// as an escape sequence and the URI style resumes afterwards (well, the
// remainder is plain string -- the URI ends at the non-URL char).
urlWithEsc: "https://example.com/café/menu",
urlWithBackslash:"https://example.com/path\\sub",
// ---------- JSON-LD @ keywords ----------
"@context": "https://schema.org/",
"@id": "https://example.org/things/1",
"@type": "Person",
"@language": "en",
"@vocab": "https://example.org/vocab#",
// ---------- Compact IRIs (single colon, alpha/$/_/- around it) ----------
"schema:name": "Compact IRI Example",
"foaf:knows": "another-iri",
"$compact_-Form:value": "edge characters in prefix",
// ---------- Strings that look like compact IRI but aren't (multiple colons -> not highlighted) ----------
"not:a:compact:iri": "more than one colon disqualifies highlighting",
"has space:notIRI": "space disqualifies",
// ---------- Nested structure with everything mixed ----------
nested: {
'array': [
0xAB,
.25,
'item with \'escaped\' single quotes',
"item with \"escaped\" double quotes",
{ inner: +Infinity, hex: 0xFEED, },
],
'urls': [
"http://a.example/",
"https://b.example/path",
"mailto:noreply@example.com",
],
},
// ---------- B1: wrong-quote close in ESCAPESEQUENCE ----------
// To trigger the bug requires (a) lexer.json.escape.sequence = 1 and
// (b) an escape sequence whose terminating char is immediately followed
// by the OPPOSITE quote. Pre-fix, the opposite quote falsely closed
// the string and the rest of the line styled as ERROR. Post-fix, the
// whole string highlights cleanly.
b1DoubleAfterU: "before\u1234'after the false-close point",
b1SingleAfterU: 'before\u5678"after the false-close point',
b1DoubleAfterX: "before\xAB'after",
b1SingleAfterX: 'before\xCD"after',
// ---------- L1: \xHH hex escape (JSON5 §5.5.4) ----------
l1Hex: "low=\x00 high=\xFF tilde=\x7E AT=\x40",
l1HexInSing: 'mix \xAA before \xbb after',
// ---------- L2: identity escape (any non-LineTerminator/digit/x/u char) ----------
// These were ERROR before; JSON5 says \? -> ?, \, -> , etc.
l2Identity: "\? \, \; \! \% \& \= \[ \] \( \) \{ \}",
l2InSingle: 'punct: \? \, \; -- still string',
// Legacy octal (\1..\9) remains forbidden -- uncomment to verify ERROR:
//l2OctalErr: "\1\2\3",
// ---------- L3: bare CR line continuation ----------
// Cannot be exercised in a CRLF/LF file -- requires CR-only line endings
// (Edit > Line Endings > Macintosh CR). When that's set, a string ending
// a line with a single backslash should continue onto the next line just
// like the LF / CRLF case above.
// ---------- L4: U+2028 / U+2029 line continuation ----------
// The two Unicode LineTerminator chars are below as literals. Each line
// contains "\<LS>continued" or "\<PS>continued" -- a single STRING that
// wraps via the JS-style line separator.
l4LS: "before\continued",
l4PS: "before\continued",
// =====================================================================
// BELOW: extensions covering the strict-ID / hardcoded-literal /
// signed-NaN / setURL-typo / block-comment-folding lexer changes.
// =====================================================================
// ---------- Hardcoded JSON5 reserved literals: signed NaN ----------
// Pre-fix the word-list path handled +Infinity / -Infinity but missed
// +NaN / -NaN (those tokens were never in the keyword list). After the
// hardcoded-literal change both style as NUMBER.
posNaN: +NaN,
negNaN: -NaN,
// ---------- Literal-boundary check ----------
// true / false / null / Infinity / NaN are reserved ONLY when the next
// character is NOT an identifier-continue char. These keys start with
// literal text but continue with more identifier characters, so they
// MUST style as PROPERTYNAME (not KEYWORD / NUMBER).
nullable: 'starts with literal "null"',
truer: 'starts with literal "true"',
falsehood: 'starts with literal "false"',
Infinityx: 'starts with literal "Infinity"',
NaNny: 'starts with literal "NaN"',
// ---------- Strict Unicode IdentifierName: accepted (UAX #31 ID_Start) ----------
// Pre-strict accepted any byte >= 0x80 indiscriminately; post-strict
// requires the actual code point to be a Unicode Letter / $ / _.
café: 'Latin Extended (precomposed)',
naïve: 'Latin Extended with diaeresis',
Σmega: 'Greek capital sigma + ASCII',
λambda: 'Greek small lambda + ASCII',
имя: 'Cyrillic letters (Russian "name")',
: 'CJK Han ideographs (Japanese "name")',
: 'CJK Han ideographs (Chinese)',
// ---------- IdentifierPart: ZWNJ / ZWJ inside identifier (invisible chars) ----------
// ZWNJ (U+200C) and ZWJ (U+200D) are valid IdentifierPart per ECMA-262.
// The bytes are present in the source even though they don't render --
// inspect with a hex viewer; the unquoted keys below must lex as
// PROPERTYNAME, not break at the joiner.
xy_zwnj: 'unquoted key with ZWNJ between x and y',
xy_zwj: 'unquoted key with ZWJ between x and y',
// ---------- Strict Unicode rejection: should style as PARSING ERROR ----------
// Each leading character is NOT a Unicode Letter, so not ID_Start.
// Pre-strict accepted these (over-accepting all bytes >= 0x80);
// post-strict flags them as ERROR.
// Emoji 😀 (U+1F600, category So) -- ERROR:
😀: 'leading emoji must be ERROR',
// Math symbol ∞ (U+221E, category Sm) -- ERROR:
: 'leading math symbol must be ERROR',
// Currency € (U+20AC, category Sc) -- JSON5 allows ONLY ASCII '$':
: 'leading currency symbol must be ERROR',
// ---------- URI charset typo fix: ';' inside URL ----------
// Pre-fix setURL was "...,)," (duplicated ')' with ';' missing), so the
// URI style terminated at the first ';'. Post-fix ';' is part of the
// RFC-3986 sub-delim set and the whole URL highlights as URI through
// the matrix parameters.
urlSemicolon: "http://example.com/path;jsessionid=abc;v=2",
// ---------- Folding: multi-line block comment ----------
// Fold() now treats a multi-line /* ... */ as a fold region (HEADERFLAG
// on the opening line). The block below should be foldable; the
// single-line /* ... */ further down should NOT produce a fold (its
// enter+exit cancel within the same line).
/* This block comment
spans three lines
and should produce a fold marker on the opening line. */
foldDemo: 'after the foldable block comment',
/* single-line comment -- should NOT produce a fold marker */
noFold: 'after the non-foldable comment',
// ---------- Trailing item to verify the closing brace styling ----------
last: 'end-of-test',
}