Notepad3/test/test_files/StyleLexers/styleLexJSON/Sample_JSON5_extended.json

// =====================================================================
// Extended JSON5 / JSON-LD test file for the Notepad3 homebrew JSON lexer
// (lexilla/lexers_x/LexJSON.cxx).
//
// Requires both lexer properties enabled to exercise every branch:
//   lexer.json.allow.comments  = 1
//   lexer.json.escape.sequence = 1
//
// Each section is annotated with the lexer feature it targets so a failed
// highlight is easy to attribute to a specific code path.
// =====================================================================

/* ---------------------------------------------------------------
   Stream / block comment.  Multiple lines, with stray * and / chars
   inside (separated, so they cannot form a close marker).
   --------------------------------------------------------------- */

{
  // ---------- Strings: quoting variants ----------
  doubleQuoted:   "plain double-quoted string",
  singleQuoted:   'plain single-quoted string',
  mixedInside1:   "contains 'single quotes' inside double",
  mixedInside2:   'contains "double quotes" inside single',
  emptyDouble:    "",
  emptySingle:    '',

  // ---------- Strings: line continuation (backslash + EOL) ----------
  lineContLF:     "first line\
second line\
third line",
  lineContSingle: 'wrap with\
single quotes too',

  // ---------- Strings: standard escape sequences ----------
  // (lexer.json.escape.sequence = 1 to highlight these)
  escSimple:      "tab=\t  nl=\n  cr=\r  bs=\b  ff=\f  vt=\v  null=\0  slash=\/  bslash=\\  quote=\"  apos=\'",
  escUnicode:     "äöü € 껮 꿾",
  escSingleStr:   'mix: \té\\done',

  // ---------- Property names: quoted vs unquoted ----------
  unquotedKey:    'JSON5 allows bare property names',
  "quotedKey":    "classic JSON",
  'singleKey':    'single-quoted property name',
  $dollar:        1,
  _underscore:    2,
  mixed123name:   3,

  // ---------- Numbers: integers, signs, decimals ----------
  zero:           0,
  posInt:         42,
  negInt:         -42,
  posSign:        +7,
  decimal:        3.14159,
  leadingDot:      .5,
  trailingDot:    5. ,
  // Tightened dot-part rule (E1): "1 . 0" is no longer a valid number.
  // Uncomment to verify it now highlights as ERROR (was NUMBER before E1):
  //tightDot:     1 . 0,

  // ---------- Numbers: exponent / scientific ----------
  exp1:           1e10,
  exp2:           1.5E-3,
  exp3:           2.5e+12,
  expNeg:         -6.022e23,

  // ---------- Numbers: hexadecimal (homebrew-only) ----------
  hexLower:       0xff,
  hexMixed:       0xDeCaF,
  hexBig:         0xCAFEBABE,

  // ---------- Keywords ----------
  yes:            true,
  no:             false,
  nothing:        null,
  inf:            Infinity,
  posInf:         +Infinity,
  negInf:         -Infinity,
  notNum:         NaN,

  // ---------- Trailing commas (objects + arrays) ----------
  trailingInObj:  { a: 1, b: 2, },
  trailingInArr:  [ 1, 2, 3, ],
  emptyArr:       [],
  emptyObj:       {},

  // ---------- URL hotspots inside strings ----------
  urlHttps:       "https://example.com/path?q=1#frag",
  urlHttp:        "http://example.com/",
  urlFtp:         "ftp://files.example.com/dir/",
  urlSsh:         "ssh://git@example.com/repo.git",
  urlGit:         "git://example.com/repo.git",
  urlSvn:         "svn://svn.example.com/trunk",
  urlMailto:      "mailto:user@example.com",

  // ---------- URL with embedded escape (exercises the URI/LDKEYWORD escape fix) ----------
  // Without the fix the URI style would terminate at '\' and the rest of
  // the string would be mis-styled.  With the fix, 'é' is recognized
  // as an escape sequence and the URI style resumes afterwards (well, the
  // remainder is plain string -- the URI ends at the non-URL char).
  urlWithEsc:     "https://example.com/café/menu",
  urlWithBackslash:"https://example.com/path\\sub",

  // ---------- JSON-LD @ keywords ----------
  "@context":     "https://schema.org/",
  "@id":          "https://example.org/things/1",
  "@type":        "Person",
  "@language":    "en",
  "@vocab":       "https://example.org/vocab#",

  // ---------- Compact IRIs (single colon, alpha/$/_/- around it) ----------
  "schema:name":          "Compact IRI Example",
  "foaf:knows":           "another-iri",
  "$compact_-Form:value": "edge characters in prefix",

  // ---------- Strings that look like compact IRI but aren't (multiple colons -> not highlighted) ----------
  "not:a:compact:iri":    "more than one colon disqualifies highlighting",
  "has space:notIRI":     "space disqualifies",

  // ---------- Nested structure with everything mixed ----------
  nested: {
    'array': [
      0xAB,
      .25,
      'item with \'escaped\' single quotes',
      "item with \"escaped\" double quotes",
      { inner: +Infinity, hex: 0xFEED, },
    ],
    'urls': [
      "http://a.example/",
      "https://b.example/path",
      "mailto:noreply@example.com",
    ],
  },

  // ---------- B1: wrong-quote close in ESCAPESEQUENCE ----------
  // To trigger the bug requires (a) lexer.json.escape.sequence = 1 and
  // (b) an escape sequence whose terminating char is immediately followed
  // by the OPPOSITE quote.  Pre-fix, the opposite quote falsely closed
  // the string and the rest of the line styled as ERROR.  Post-fix, the
  // whole string highlights cleanly.
  b1DoubleAfterU: "before\u1234'after the false-close point",
  b1SingleAfterU: 'before\u5678"after the false-close point',
  b1DoubleAfterX: "before\xAB'after",
  b1SingleAfterX: 'before\xCD"after',

  // ---------- L1: \xHH hex escape (JSON5 §5.5.4) ----------
  l1Hex:        "low=\x00  high=\xFF  tilde=\x7E  AT=\x40",
  l1HexInSing:  'mix \xAA before \xbb after',

  // ---------- L2: identity escape (any non-LineTerminator/digit/x/u char) ----------
  // These were ERROR before; JSON5 says \? -> ?, \, -> , etc.
  l2Identity:   "\? \, \; \! \% \& \= \[ \] \( \) \{ \}",
  l2InSingle:   'punct: \? \, \; -- still string',
  // Legacy octal (\1..\9) remains forbidden -- uncomment to verify ERROR:
  //l2OctalErr: "\1\2\3",

  // ---------- L3: bare CR line continuation ----------
  // Cannot be exercised in a CRLF/LF file -- requires CR-only line endings
  // (Edit > Line Endings > Macintosh CR).  When that's set, a string ending
  // a line with a single backslash should continue onto the next line just
  // like the LF / CRLF case above.

  // ---------- L4: U+2028 / U+2029 line continuation ----------
  // The two Unicode LineTerminator chars are below as literals.  Each line
  // contains "\<LS>continued" or "\<PS>continued" -- a single STRING that
  // wraps via the JS-style line separator.
  l4LS:         "before\ continued",
  l4PS:         "before\ continued",

  // =====================================================================
  // BELOW: extensions covering the strict-ID / hardcoded-literal /
  // signed-NaN / setURL-typo / block-comment-folding lexer changes.
  // =====================================================================

  // ---------- Hardcoded JSON5 reserved literals: signed NaN ----------
  // Pre-fix the word-list path handled +Infinity / -Infinity but missed
  // +NaN / -NaN (those tokens were never in the keyword list).  After the
  // hardcoded-literal change both style as NUMBER.
  posNaN:         +NaN,
  negNaN:         -NaN,

  // ---------- Literal-boundary check ----------
  // true / false / null / Infinity / NaN are reserved ONLY when the next
  // character is NOT an identifier-continue char.  These keys start with
  // literal text but continue with more identifier characters, so they
  // MUST style as PROPERTYNAME (not KEYWORD / NUMBER).
  nullable:       'starts with literal "null"',
  truer:          'starts with literal "true"',
  falsehood:      'starts with literal "false"',
  Infinityx:      'starts with literal "Infinity"',
  NaNny:          'starts with literal "NaN"',

  // ---------- Strict Unicode IdentifierName: accepted (UAX #31 ID_Start) ----------
  // Pre-strict accepted any byte >= 0x80 indiscriminately; post-strict
  // requires the actual code point to be a Unicode Letter / $ / _.
  café:           'Latin Extended (precomposed)',
  naïve:          'Latin Extended with diaeresis',
  Σmega:          'Greek capital sigma + ASCII',
  λambda:         'Greek small lambda + ASCII',
  имя:            'Cyrillic letters (Russian "name")',
  名前:           'CJK Han ideographs (Japanese "name")',
  中文:           'CJK Han ideographs (Chinese)',

  // ---------- IdentifierPart: ZWNJ / ZWJ inside identifier (invisible chars) ----------
  // ZWNJ (U+200C) and ZWJ (U+200D) are valid IdentifierPart per ECMA-262.
  // The bytes are present in the source even though they don't render --
  // inspect with a hex viewer; the unquoted keys below must lex as
  // PROPERTYNAME, not break at the joiner.
  x‌y_zwnj:        'unquoted key with ZWNJ between x and y',
  x‍y_zwj:         'unquoted key with ZWJ between x and y',

  // ---------- Strict Unicode rejection: should style as PARSING ERROR ----------
  // Each leading character is NOT a Unicode Letter, so not ID_Start.
  // Pre-strict accepted these (over-accepting all bytes >= 0x80);
  // post-strict flags them as ERROR.

  // Emoji 😀 (U+1F600, category So) -- ERROR:
  😀: 'leading emoji must be ERROR',

  // Math symbol ∞ (U+221E, category Sm) -- ERROR:
  ∞: 'leading math symbol must be ERROR',

  // Currency € (U+20AC, category Sc) -- JSON5 allows ONLY ASCII '$':
  €: 'leading currency symbol must be ERROR',

  // ---------- URI charset typo fix: ';' inside URL ----------
  // Pre-fix setURL was "...,)," (duplicated ')' with ';' missing), so the
  // URI style terminated at the first ';'.  Post-fix ';' is part of the
  // RFC-3986 sub-delim set and the whole URL highlights as URI through
  // the matrix parameters.
  urlSemicolon:   "http://example.com/path;jsessionid=abc;v=2",

  // ---------- Folding: multi-line block comment ----------
  // Fold() now treats a multi-line /* ... */ as a fold region (HEADERFLAG
  // on the opening line).  The block below should be foldable; the
  // single-line /* ... */ further down should NOT produce a fold (its
  // enter+exit cancel within the same line).

  /* This block comment
     spans three lines
     and should produce a fold marker on the opening line. */
  foldDemo:       'after the foldable block comment',

  /* single-line comment -- should NOT produce a fold marker */
  noFold:         'after the non-foldable comment',

  // ---------- Trailing item to verify the closing brace styling ----------
  last: 'end-of-test',
}