Merge pull request #3104 from RaiKoHoff/Dev_Release

JSON5 enhancements to Scintilla's original JSON-Lexer
This commit is contained in:
Pairi Daiza 2021-02-13 14:11:04 +01:00 committed by GitHub
commit f6d4e110fc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 144 additions and 58 deletions

View File

@ -106,10 +106,10 @@ struct OptionsJSON {
bool allowComments;
bool escapeSequence;
OptionsJSON() {
foldCompact = false;
fold = false;
allowComments = false;
escapeSequence = false;
foldCompact = true;
fold = true;
allowComments = true;
escapeSequence = true;
}
};
@ -139,10 +139,9 @@ class LexerJSON : public DefaultLexer {
CharacterSet setKeywordJSON;
CompactIRI compactIRI;
static bool IsNextNonWhitespace(LexAccessor &styler, Sci_Position start, char ch) {
Sci_Position i = 0;
while (i < 50) {
i++;
static bool IsNextNonWhitespace(LexAccessor &styler, Sci_PositionU start, char ch) {
Sci_PositionU i = 0;
while (++i < 60) {
char curr = styler.SafeGetCharAt(start+i, '\0');
char next = styler.SafeGetCharAt(start+i+1, '\0');
bool atEOL = (curr == '\r' && next != '\n') || (curr == '\n');
@ -158,24 +157,31 @@ class LexerJSON : public DefaultLexer {
/**
* Looks for the colon following the end quote
*
* Assumes property names of lengths no longer than a 100 characters.
* Assumes property names of lengths no longer than a 120 characters.
* The colon is also expected to be less than 50 spaces after the end
* quote for the string to be considered a property name
*/
static bool AtPropertyName(LexAccessor &styler, Sci_Position start) {
Sci_Position i = 0;
static constexpr bool IsPropChar(int ch) noexcept {
return IsAlphaNumeric(ch); // || ch == '_';
}
static bool AtPropertyName(LexAccessor &styler, const Sci_PositionU start, bool bQuoted) {
Sci_PositionU i = 0;
bool escaped = false;
while (i < 100) {
i++;
while (++i < 120) {
char curr = styler.SafeGetCharAt(start+i, '\0');
if (escaped) {
escaped = false;
continue;
}
escaped = curr == '\\';
if (curr == '"') {
return IsNextNonWhitespace(styler, start+i, ':');
} else if (!curr) {
escaped = (curr == '\\');
if (curr == ':' && !bQuoted) {
return true;
} else if ((curr == '"' || curr == '\'') && bQuoted) {
return IsNextNonWhitespace(styler, start + i, ':');
} else if (isspacechar(curr) && !bQuoted) {
return IsNextNonWhitespace(styler, start + i, ':');
} if (!curr || (!bQuoted && !IsPropChar(curr))) {
return false;
}
}
@ -183,7 +189,7 @@ class LexerJSON : public DefaultLexer {
}
static bool IsNextWordInList(WordList &keywordList, CharacterSet wordSet,
StyleContext &context, LexAccessor &styler) {
const StyleContext& context, LexAccessor& styler) {
char word[51];
Sci_Position currPos = (Sci_Position) context.currentPos;
int i = 0;
@ -199,6 +205,53 @@ class LexerJSON : public DefaultLexer {
return keywordList.InList(word);
}
static bool IsJSONNumber(const StyleContext& context, bool bChPrevIsOp) {
bool numberStart =
IsADigit(context.ch) && (context.chPrev == '+' ||
context.chPrev == '-' ||
context.atLineStart ||
IsASpace(context.chPrev) ||
bChPrevIsOp);
bool hexaStart =
tolower(context.ch) == 'x' &&
context.chPrev == '0' &&
IsADigit(context.chNext, 16);
bool hexaPart =
IsADigit(context.ch, 16) &&
(IsADigit(context.chPrev, 16) || tolower(context.chPrev) == 'x');
bool exponentPart =
tolower(context.ch) == 'e' &&
IsADigit(context.chPrev) &&
(IsADigit(context.chNext) ||
context.chNext == '+' ||
context.chNext == '-');
bool signPart =
(context.ch == '-' || context.ch == '+') &&
((tolower(context.chPrev) == 'e' && IsADigit(context.chNext)) ||
((IsASpace(context.chPrev) || bChPrevIsOp)
&& IsADigit(context.chNext)));
bool adjacentDigit =
IsADigit(context.ch) && IsADigit(context.chPrev);
bool afterExponent = IsADigit(context.ch) && tolower(context.chPrev) == 'e';
bool dotPart = context.ch == '.' &&
(IsADigit(context.chPrev) || IsASpace(context.chPrev)) &&
(IsADigit(context.chNext) || IsASpace(context.chNext) || context.chNext == ',');
bool afterDot = IsADigit(context.ch) && context.chPrev == '.';
return (
numberStart ||
hexaStart || hexaPart ||
exponentPart ||
signPart ||
adjacentDigit ||
dotPart ||
afterExponent ||
afterDot
);
}
public:
LexerJSON() :
DefaultLexer("json", SCLEX_JSON),
@ -279,7 +332,14 @@ void SCI_METHOD LexerJSON::Lex(Sci_PositionU startPos,
LexAccessor styler(pAccess);
StyleContext context(startPos, length, initStyle, styler);
int stringStyleBefore = SCE_JSON_STRING;
bool doubleQuotCntx = false;
bool singleQuotCntx = false;
bool lineContinuation = false;
while (context.More()) {
if (context.atLineStart) {
doubleQuotCntx = lineContinuation ? doubleQuotCntx : false;
singleQuotCntx = lineContinuation ? singleQuotCntx : false;
}
switch (context.state) {
case SCE_JSON_BLOCKCOMMENT:
if (context.Match("*/")) {
@ -305,9 +365,9 @@ void SCI_METHOD LexerJSON::Lex(Sci_PositionU startPos,
}
break;
}
if (context.ch == '"') {
if (context.ch == '"' || context.ch == '\'') {
context.SetState(stringStyleBefore);
context.ForwardSetState(SCE_C_DEFAULT);
context.ForwardSetState(SCE_JSON_DEFAULT);
} else if (context.ch == '\\') {
if (!escapeSeq.newSequence(context.chNext)) {
context.SetState(SCE_JSON_ERROR);
@ -322,7 +382,9 @@ void SCI_METHOD LexerJSON::Lex(Sci_PositionU startPos,
break;
case SCE_JSON_PROPERTYNAME:
case SCE_JSON_STRING:
if (context.ch == '"') {
if (context.ch == ':' && !(doubleQuotCntx || singleQuotCntx)) {
context.SetState(SCE_JSON_OPERATOR);
} else if (context.ch == '"' && doubleQuotCntx) {
if (compactIRI.shouldHighlight()) {
context.ChangeState(SCE_JSON_COMPACTIRI);
context.ForwardSetState(SCE_JSON_DEFAULT);
@ -330,6 +392,16 @@ void SCI_METHOD LexerJSON::Lex(Sci_PositionU startPos,
} else {
context.ForwardSetState(SCE_JSON_DEFAULT);
}
doubleQuotCntx = false;
} else if (context.ch == '\'' && singleQuotCntx) {
if (compactIRI.shouldHighlight()) {
context.ChangeState(SCE_JSON_COMPACTIRI);
context.ForwardSetState(SCE_JSON_DEFAULT);
compactIRI.resetState();
} else {
context.ForwardSetState(SCE_JSON_DEFAULT);
}
singleQuotCntx = false;
} else if (context.atLineEnd) {
context.ChangeState(SCE_JSON_STRINGEOL);
} else if (context.ch == '\\') {
@ -357,7 +429,15 @@ void SCI_METHOD LexerJSON::Lex(Sci_PositionU startPos,
stringStyleBefore = context.state;
context.SetState(SCE_JSON_LDKEYWORD);
}
} else {
}
else if (IsPropChar(context.ch)) {
if (!AtPropertyName(styler, context.currentPos, (doubleQuotCntx || singleQuotCntx))) {
if (context.state == SCE_JSON_PROPERTYNAME) {
context.SetState(SCE_JSON_ERROR);
}
}
}
else {
compactIRI.checkChar(context.ch);
}
break;
@ -368,7 +448,7 @@ void SCI_METHOD LexerJSON::Lex(Sci_PositionU startPos,
(!setURL.Contains(context.ch))) {
context.SetState(stringStyleBefore);
}
if (context.ch == '"') {
if (context.ch == '"' || context.ch == '\'') {
context.ForwardSetState(SCE_JSON_DEFAULT);
} else if (context.atLineEnd) {
context.ChangeState(SCE_JSON_STRINGEOL);
@ -389,14 +469,24 @@ void SCI_METHOD LexerJSON::Lex(Sci_PositionU startPos,
}
break;
}
if (context.state == SCE_JSON_DEFAULT) {
if (context.ch == '"') {
compactIRI.resetState();
context.SetState(SCE_JSON_STRING);
Sci_Position currPos = static_cast<Sci_Position>(context.currentPos);
if (AtPropertyName(styler, currPos)) {
doubleQuotCntx = !singleQuotCntx;
if (AtPropertyName(styler, context.currentPos, true)) {
context.SetState(SCE_JSON_PROPERTYNAME);
}
} else if (context.ch == '\'') {
compactIRI.resetState();
context.SetState(SCE_JSON_STRING);
singleQuotCntx = !doubleQuotCntx;
if (AtPropertyName(styler, context.currentPos, true)) {
context.SetState(SCE_JSON_PROPERTYNAME);
}
//} else if (context.ch == '\\') {
// lineContinuation = context.atLineEnd;
} else if (setOperators.Contains(context.ch)) {
context.SetState(SCE_JSON_OPERATOR);
} else if (options.allowComments && context.Match("/*")) {
@ -409,40 +499,19 @@ void SCI_METHOD LexerJSON::Lex(Sci_PositionU startPos,
context.SetState(SCE_JSON_KEYWORD);
}
}
bool numberStart =
IsADigit(context.ch) && (context.chPrev == '+'||
context.chPrev == '-' ||
context.atLineStart ||
IsASpace(context.chPrev) ||
setOperators.Contains(context.chPrev));
bool exponentPart =
tolower(context.ch) == 'e' &&
IsADigit(context.chPrev) &&
(IsADigit(context.chNext) ||
context.chNext == '+' ||
context.chNext == '-');
bool signPart =
(context.ch == '-' || context.ch == '+') &&
((tolower(context.chPrev) == 'e' && IsADigit(context.chNext)) ||
((IsASpace(context.chPrev) || setOperators.Contains(context.chPrev))
&& IsADigit(context.chNext)));
bool adjacentDigit =
IsADigit(context.ch) && IsADigit(context.chPrev);
bool afterExponent = IsADigit(context.ch) && tolower(context.chPrev) == 'e';
bool dotPart = context.ch == '.' &&
IsADigit(context.chPrev) &&
IsADigit(context.chNext);
bool afterDot = IsADigit(context.ch) && context.chPrev == '.';
if (numberStart ||
exponentPart ||
signPart ||
adjacentDigit ||
dotPart ||
afterExponent ||
afterDot) {
if (IsJSONNumber(context, setOperators.Contains(context.chPrev))) {
context.SetState(SCE_JSON_NUMBER);
} else if (context.state == SCE_JSON_DEFAULT && !IsASpace(context.ch)) {
context.SetState(SCE_JSON_ERROR);
}
else if (context.state == SCE_JSON_DEFAULT) {
if (IsPropChar(context.ch)) {
if (AtPropertyName(styler, context.currentPos, (doubleQuotCntx || singleQuotCntx))) {
context.SetState(SCE_JSON_PROPERTYNAME);
}
}
else if (!IsASpace(context.ch)) {
context.SetState(SCE_JSON_ERROR);
}
}
}
context.Forward();

View File

@ -1,4 +1,21 @@
// ############ allow line comments #############
/*
=========== allow stream commemnts
*/
{
/* JSON5: */
// comments
unquoted: 'and you can quote me on that',
singleQuotes: 'I can use "double quotes" here',
lineBreaks: "Look, Mom! \
No \\n's!",
hexadecimal: 0xdecaf,
leadingDecimalPoint: 1234567, .8675309, andTrailing: 8675309.,
positiveSign: +1,
trailingComma: 'in objects', andIn: ['arrays',],
escSeq: "\uAFFE",
"backwardsCompatible": "with JSON",
"$id": "0",
"AutoRefresh": true,
"AutoRefreshRemotes": true,