+ fix: TOML-Lexer issues

This commit is contained in:
Rainer Kottenhoff 2019-07-02 17:34:07 +02:00
parent 035cd3d3c4
commit 10e059e9b4
6 changed files with 195 additions and 155 deletions

View File

@ -1 +1 @@
2381
2382

View File

@ -3,8 +3,8 @@
<assemblyIdentity
name="Notepad3"
processorArchitecture="*"
version="5.19.630.2381"
version="5.19.702.2382"
type="win32"
/>
<description>Notepad3 </description>
<description>Notepad3 Oniguruma</description>
</assembly>

View File

@ -36,7 +36,7 @@ constexpr bool IsALetter(const int ch) noexcept {
}
constexpr bool IsLineBreak(const int ch) noexcept {
return ((ch == '\n') || (ch == '\r'));
return ((ch == '\n') || (ch == '\r') || (ch == '\0'));
}
inline int IsNumber(const Scintilla::StyleContext& sc) {

View File

@ -202,6 +202,9 @@ Sci_Position SCI_METHOD LexerTOML::WordListSet(int n, const char* wl)
}
// ----------------------------------------------------------------------------
constexpr int abs_i(const int i) noexcept { return ((i < 0) ? (0 - i) : (0 + i)); }
// ----------------------------------------------------------------------------
constexpr bool IsCommentChar(const int ch) noexcept {
//return (ch == '#') || (ch == ':');
@ -225,6 +228,17 @@ inline bool IsAKeywordChar(const int ch) {
}
// ----------------------------------------------------------------------------
inline void SetStateParsingError(StyleContext& sc) {
sc.SetState(SCE_TOML_PARSINGERROR);
}
// ----------------------------------------------------------------------------
inline void ForwardSetStateParsingError(StyleContext& sc) {
sc.ForwardSetState(SCE_TOML_PARSINGERROR);
}
// ----------------------------------------------------------------------------
static bool IsDateTimeStr(StyleContext& sc, const CharacterSet& validCh, const CharacterSet& valEnd)
{
@ -317,85 +331,74 @@ static bool IsLookAheadInList(StyleContext& sc, const CharacterSet& validCh, con
static bool _inComment(StyleContext& sc, Sci_Position& pos)
{
bool isInComment = false;
auto const posCurrent = static_cast<Sci_Position>(sc.currentPos);
Sci_Position i = pos - posCurrent;
while (((pos + i) >= 0))
Sci_Position p = pos;
while (p >= 0)
{
int const ch = sc.GetRelative(i--);
Sci_Position const d = p - posCurrent;
int const ch = sc.GetRelative(d);
if (IsLineBreak(ch)) {
break;
}
else if (IsCommentChar(ch)) {
isInComment = true;
pos += i;
pos = p - 1;
break;
}
--p;
}
return isInComment;
}
constexpr bool _isQuoted(const int q1, const int q2) noexcept {
return ((q1 >= 0) || (q2 >= 0));
}
constexpr bool _isQuoted(const bool q1, const bool q2) noexcept { return (q1 || q2); }
static int GetSquareBracketLevel(StyleContext& sc, const bool stopAtLnBreak)
{
auto const posCurrent = static_cast<Sci_Position>(sc.currentPos);
int iBracketLevel = 0;
int inSQStrg = -1;
int inDQStrg = -1;
int inInlTable = 0;
bool inSQStrg = false;
bool inDQStrg = false;
Sci_Position i = 0;
Sci_Position pos = posCurrent;
while (pos >= 0)
Sci_Position pos = posCurrent - 1;
while ((pos >= 0) && (iBracketLevel <= 0))
{
int const ch = sc.GetRelative(i);
Sci_Position const diff = pos - posCurrent;
int const ch = sc.GetRelative(diff);
if (stopAtLnBreak && IsLineBreak(ch)) {
break;
}
int const ch_p = sc.GetRelative(diff - 1);
if (!_inComment(sc, pos))
{
if (ch == '"') {
if (inDQStrg > 0) {
inDQStrg = 0 - i;
if (ch_p != '\\') // not ESCaped
{
if (ch == '"')
{
if (inDQStrg) {
inDQStrg = false;
}
else {
inDQStrg = !inSQStrg;
}
}
else if (inSQStrg < 0) {
inDQStrg = i;
else if (ch == '\'')
{
if (inSQStrg) {
inSQStrg = false;
}
else {
inSQStrg = !inDQStrg;
}
}
}
else if (ch == '\'') {
if (inSQStrg > 0) {
inSQStrg = 0 - i;
}
else if (inSQStrg < 0) {
inSQStrg = i;
}
}
else if (ch == '\\') { // ESCaped ?
if (inDQStrg == (0 - i + 1)) {
inDQStrg = i + 1; // revert
}
else if (inSQStrg == (0 - i + 1)) {
inSQStrg = i + 1; // revert
}
}
else if ((ch == '}') && !_isQuoted(inDQStrg, inSQStrg)) {
++inInlTable;
}
else if ((ch == '{') && !_isQuoted(inDQStrg, inSQStrg)) {
--inInlTable;
}
if (!_isQuoted(inDQStrg, inSQStrg) && (inInlTable == 0)) {
if (!_isQuoted(inDQStrg, inSQStrg)) {
if (ch == ']') {
--iBracketLevel;
@ -403,14 +406,10 @@ static int GetSquareBracketLevel(StyleContext& sc, const bool stopAtLnBreak)
else if (ch == '[') {
++iBracketLevel;
}
else if (IsAssignChar(ch)) {
break; // must be the assignment begin
}
}
--pos;
}
i = pos - posCurrent;
--pos;
}
return iBracketLevel;
}
@ -425,12 +424,13 @@ void SCI_METHOD LexerTOML::Lex(Sci_PositionU startPos, Sci_Position length, int
bool inSQuotedKey = false;
bool inDQuotedKey = false;
bool inInnerQKey = false;
bool inSectionDef = false;
bool isSectKeyBeg = false;
bool isSectKeyEnd = false;
bool inMultiLnString = (sc.state == SCE_TOML_STR_BASIC) || (sc.state == SCE_TOML_STR_LITERAL);
bool inMultiLnArrayDef = false;
bool inMultiLnArrayDef = (GetSquareBracketLevel(sc, false) > 0);
bool inHex = false;
bool inBin = false;
@ -441,43 +441,56 @@ void SCI_METHOD LexerTOML::Lex(Sci_PositionU startPos, Sci_Position length, int
for (; sc.More(); sc.Forward())
{
// --------------------------------------------------
// check if in the middle of a line continuation ...
// --------------------------------------------------
// reset context infos
if (sc.atLineStart) {
if (sc.atLineStart)
{
inMultiLnArrayDef = (GetSquareBracketLevel(sc, false) > 0);
inSQuotedKey = inDQuotedKey = inInnerQKey = false;
inSQuotedKey = inDQuotedKey = false;
bPossibleKeyword = true;
switch (sc.state)
{
case SCE_TOML_STR_BASIC:
case SCE_TOML_STR_LITERAL:
if (!inMultiLnString) {
sc.SetState(SCE_TOML_PARSINGERROR);
}
break;
case SCE_TOML_KEY:
case SCE_TOML_ASSIGNMENT:
sc.SetState(SCE_TOML_PARSINGERROR);
break;
case SCE_TOML_COMMENT:
if (inMultiLnArrayDef) {
if (inMultiLnArrayDef) {
switch (sc.state)
{
case SCE_TOML_COMMENT:
sc.SetState(SCE_TOML_VALUE);
}
break;
case SCE_TOML_PARSINGERROR:
if (!inMultiLnArrayDef) {
sc.SetState(SCE_TOML_DEFAULT);
}
break;
default:
if (!inMultiLnArrayDef) {
break;
default:
// no state change
break;
}
}
else {
// NOT in inMultiLnArrayDef
switch (sc.state)
{
case SCE_TOML_STR_BASIC:
case SCE_TOML_STR_LITERAL:
if (!inMultiLnString) {
SetStateParsingError(sc);
}
break;
case SCE_TOML_KEY:
case SCE_TOML_ASSIGNMENT:
SetStateParsingError(sc);
break;
case SCE_TOML_COMMENT:
sc.SetState(SCE_TOML_DEFAULT); // reset
break;
case SCE_TOML_PARSINGERROR:
sc.SetState(SCE_TOML_DEFAULT); // reset
}
break;
break;
case SCE_TOML_VALUE:
default:
sc.SetState(SCE_TOML_DEFAULT);
break;
}
}
}
@ -486,7 +499,7 @@ void SCI_METHOD LexerTOML::Lex(Sci_PositionU startPos, Sci_Position length, int
// -------------------------
if (IsLineBreak(sc.ch)) {
if (bInInlBracket) {
sc.ForwardSetState(SCE_TOML_PARSINGERROR);
ForwardSetStateParsingError(sc);
}
else {
continue; // eat line breaks
@ -497,7 +510,7 @@ void SCI_METHOD LexerTOML::Lex(Sci_PositionU startPos, Sci_Position length, int
{
if (IsCommentChar(sc.ch)) {
if (inSectionDef) {
sc.SetState(SCE_TOML_PARSINGERROR);
SetStateParsingError(sc);
}
else if ((sc.state == SCE_TOML_STR_BASIC) ||
(sc.state == SCE_TOML_STR_LITERAL))
@ -517,6 +530,7 @@ void SCI_METHOD LexerTOML::Lex(Sci_PositionU startPos, Sci_Position length, int
switch (sc.state)
{
case SCE_TOML_DEFAULT:
{
if (IsASpaceOrTab(sc.ch)) {
// eat
}
@ -524,8 +538,8 @@ void SCI_METHOD LexerTOML::Lex(Sci_PositionU startPos, Sci_Position length, int
sc.SetState(SCE_TOML_COMMENT);
}
else if (sc.ch == '[') {
sc.SetState(SCE_TOML_SECTION);
inSectionDef = true;
sc.SetState(SCE_TOML_SECTION);
}
else if (validKey.Contains(sc.ch)) {
sc.SetState(SCE_TOML_KEY);
@ -540,20 +554,29 @@ void SCI_METHOD LexerTOML::Lex(Sci_PositionU startPos, Sci_Position length, int
sc.SetState(SCE_TOML_KEY);
}
else {
sc.SetState(SCE_TOML_PARSINGERROR);
if (!inMultiLnArrayDef) {
SetStateParsingError(sc);
}
}
}
break;
}
break;
case SCE_TOML_COMMENT:
{
// eat - rest of line is comment
break;
}
break;
case SCE_TOML_SECTION:
{
if (sc.ch == ']') {
if (GetSquareBracketLevel(sc, true) == 0) {
if (isSectKeyBeg) {
isSectKeyEnd = true;
}
if (GetSquareBracketLevel(sc, true) == 1) {
inSectionDef = false;
}
}
@ -562,73 +585,68 @@ void SCI_METHOD LexerTOML::Lex(Sci_PositionU startPos, Sci_Position length, int
sc.SetState(SCE_TOML_COMMENT);
}
else {
sc.SetState(SCE_TOML_PARSINGERROR);
SetStateParsingError(sc);
}
}
break;
else if (!IsASpaceOrTab(sc.ch)) {
if (!validKey.Contains(sc.ch)) {
SetStateParsingError(sc);
}
// else EAT
}
}
break;
case SCE_TOML_KEY:
{
if (sc.atLineEnd) {
sc.SetState(SCE_TOML_PARSINGERROR);
SetStateParsingError(sc);
break;
}
else if ((sc.ch == '"') && inDQuotedKey) {
if (inInnerQKey) {
sc.SetState(SCE_TOML_PARSINGERROR);
}
else {
sc.ForwardSetState(SCE_TOML_ASSIGNMENT); // end of key
}
sc.ForwardSetState(SCE_TOML_ASSIGNMENT); // end of key
inDQuotedKey = false;
break;
}
else if ((sc.ch == '\'') && inSQuotedKey) {
if (inInnerQKey) {
sc.SetState(SCE_TOML_PARSINGERROR);
}
else {
sc.ForwardSetState(SCE_TOML_ASSIGNMENT); // end of key
}
sc.ForwardSetState(SCE_TOML_ASSIGNMENT); // end of key
inSQuotedKey = false;
break;
}
else if (IsASpaceOrTab(sc.ch)) {
if (!(inSQuotedKey || inDQuotedKey || inInnerQKey)) {
if (!(inSQuotedKey || inDQuotedKey)) {
sc.SetState(SCE_TOML_ASSIGNMENT); // end of key
}
break; // else eat
}
else if (IsAssignChar(sc.ch)) {
if ((inSQuotedKey || inDQuotedKey || inInnerQKey)) {
break;
if ((inSQuotedKey || inDQuotedKey)) {
break; // eat
}
sc.SetState(SCE_TOML_ASSIGNMENT); // end of key
// === fall through === case SCE_TOML_ASSIGNMENT:
}
else if (validKey.Contains(sc.ch)) {
break; // eat
break; // eat
}
else {
if ((sc.ch == '"') && inSQuotedKey) {
inInnerQKey = !inInnerQKey; //toggle
if (!(inSQuotedKey || inDQuotedKey)) {
SetStateParsingError(sc);
}
else if ((sc.ch == '\'') && inDQuotedKey) {
inInnerQKey = !inInnerQKey; //toggle
}
else if (!(inSQuotedKey || inDQuotedKey || inInnerQKey)) {
sc.SetState(SCE_TOML_PARSINGERROR);
}
break; // else eat
break; // no fall through
}
// === fall through ===
}
// === fall through ===
case SCE_TOML_ASSIGNMENT:
if (sc.atLineEnd) {
sc.SetState(SCE_TOML_PARSINGERROR);
{
if (sc.atLineEnd || (inSQuotedKey || inDQuotedKey)) {
SetStateParsingError(sc);
break;
}
else if (IsAssignChar(sc.ch)) {
if (IsLookAheadLineEmpty(sc)) {
sc.ForwardSetState(SCE_TOML_PARSINGERROR);
ForwardSetStateParsingError(sc);
break;
}
else {
@ -640,12 +658,14 @@ void SCI_METHOD LexerTOML::Lex(Sci_PositionU startPos, Sci_Position length, int
break; // OK
}
else {
sc.SetState(SCE_TOML_PARSINGERROR);
SetStateParsingError(sc);
break;
}
// === fall through ===
}
// === fall through ===
case SCE_TOML_VALUE:
{
if (bPossibleKeyword && IsLookAheadInList(sc, validKeyWord, keywords)) {
sc.SetState(SCE_TOML_KEYWORD);
break;
@ -654,31 +674,29 @@ void SCI_METHOD LexerTOML::Lex(Sci_PositionU startPos, Sci_Position length, int
bPossibleKeyword = false;
}
if (sc.ch == '[') {
inMultiLnArrayDef = true;
inMultiLnArrayDef = (GetSquareBracketLevel(sc, false) > 0);
}
else if (sc.ch == ']') {
int const level = GetSquareBracketLevel(sc, false);
if (level == 0) {
if (level == 1) {
inMultiLnArrayDef = false;
}
else if (level < 0) {
sc.SetState(SCE_TOML_PARSINGERROR);
inMultiLnArrayDef = false;
else if (level <= 0) {
SetStateParsingError(sc);
}
}
else if (sc.ch == '}') {
if (bInInlBracket) {
bInInlBracket = false;
sc.SetState(SCE_TOML_VALUE);
}
else {
sc.SetState(SCE_TOML_PARSINGERROR);
SetStateParsingError(sc);
}
}
else if (sc.ch == '{') {
if (bInInlBracket)
{
sc.SetState(SCE_TOML_PARSINGERROR);
SetStateParsingError(sc);
}
else {
bInInlBracket = true;
@ -708,31 +726,41 @@ void SCI_METHOD LexerTOML::Lex(Sci_PositionU startPos, Sci_Position length, int
}
}
}
else if (sc.ch == '"') {
else if ((sc.ch == '"') && (sc.chPrev != '\\')) {
sc.SetState(SCE_TOML_STR_BASIC);
if (sc.Match(R"(""")")) {
inMultiLnString = true;
sc.Forward(2);
}
else {
inMultiLnString = false;
}
}
else if (sc.ch == '\'') {
else if ((sc.ch == '\'') && (sc.chPrev != '\\')) {
sc.SetState(SCE_TOML_STR_LITERAL);
if (sc.Match(R"(''')")) {
inMultiLnString = true;
sc.Forward(2);
}
else {
inMultiLnString = false;
}
}
break;
}
break;
case SCE_TOML_KEYWORD:
{
if (!(IsASpaceX(sc.ch) || validKeyWord.Contains(sc.ch))) {
sc.SetState(SCE_TOML_VALUE);
}
break;
}
break;
case SCE_TOML_NUMBER:
{
if (sc.ch == '_') {
// eat // TODO: only once
}
@ -745,10 +773,10 @@ void SCI_METHOD LexerTOML::Lex(Sci_PositionU startPos, Sci_Position length, int
}
else {
if ((inHex && !IsADigit(sc.ch, 16)) ||
(inBin && !IsADigit(sc.ch, 2)) ||
(inOct && !IsADigit(sc.ch, 8)))
(inBin && !IsADigit(sc.ch, 2)) ||
(inOct && !IsADigit(sc.ch, 8)))
{
sc.SetState(SCE_TOML_PARSINGERROR);
SetStateParsingError(sc);
}
}
}
@ -775,26 +803,30 @@ void SCI_METHOD LexerTOML::Lex(Sci_PositionU startPos, Sci_Position length, int
inOct = false;
}
else {
sc.SetState(SCE_TOML_PARSINGERROR);
SetStateParsingError(sc);
}
}
break;
}
break;
case SCE_TOML_DATETIME:
{
if (!IsADigit(sc.ch) && !chDateTime.Contains(sc.ch) && (sc.ch != '.')) {
if (validNumberEnd.Contains(sc.ch)) {
sc.SetState(SCE_TOML_VALUE);
}
else {
sc.SetState(SCE_TOML_PARSINGERROR);
SetStateParsingError(sc);
}
}
break;
}
break;
case SCE_TOML_STR_BASIC:
case SCE_TOML_STR_LITERAL:
{
if (sc.ch == '"') {
if (sc.state == SCE_TOML_STR_BASIC) {
if (sc.chPrev != '\\') {
@ -827,17 +859,22 @@ void SCI_METHOD LexerTOML::Lex(Sci_PositionU startPos, Sci_Position length, int
}
}
}
break;
}
break;
case SCE_TOML_PARSINGERROR:
{
// keep parsing error until new line
break;
}
break;
default:
sc.SetState(SCE_TOML_PARSINGERROR); // unknown
break;
{
SetStateParsingError(sc); // unknown
}
break;
}
//~if (sc.atLineEnd) {

View File

@ -7,8 +7,8 @@
#define SAPPNAME "Notepad3"
#define VERSION_MAJOR 5
#define VERSION_MINOR 19
#define VERSION_REV 630
#define VERSION_BUILD 2381
#define SCINTILLA_VER 416
#define ONIGMO_REGEX_VER 6.2.0
#define VERSION_PATCH
#define VERSION_REV 702
#define VERSION_BUILD 2382
#define SCINTILLA_VER 417
#define ONIGURUMA_REGEX_VER 6.9.2
#define VERSION_PATCH Oniguruma

View File

@ -41,10 +41,10 @@ data = [ ["gamma", "delta"], [1, 2] ]
key = "value"
key = # INVALID
key = "value"
bare_key = "value"
bare-key = "value"
1234 = "value"
= "string" # is invalid
"127.0.0.1" = "value"
"character encoding" = "value"
@ -52,6 +52,9 @@ bare-key = "value"
'key2' = "value"
'quoted "value"' = "value"
"~!@$^&*()_+-` 1234567890 []|/?><.,;:'" = 1
'~!@$^&*()_+-` 1234567890 []|/?><.,;:"' = 2
# boolean
bool1 = true
bool2 = false