diff --git a/Versions/build.txt b/Versions/build.txt index 8c5581cf3..76bfd63d0 100644 --- a/Versions/build.txt +++ b/Versions/build.txt @@ -1 +1 @@ -2381 +2382 diff --git a/res/Notepad3.exe.manifest.conf b/res/Notepad3.exe.manifest.conf index 85db89fbc..325beede9 100644 --- a/res/Notepad3.exe.manifest.conf +++ b/res/Notepad3.exe.manifest.conf @@ -3,8 +3,8 @@ - Notepad3 + Notepad3 Oniguruma diff --git a/sciXlexers/CharSetX.h b/sciXlexers/CharSetX.h index 978845d05..ad554f5a7 100644 --- a/sciXlexers/CharSetX.h +++ b/sciXlexers/CharSetX.h @@ -36,7 +36,7 @@ constexpr bool IsALetter(const int ch) noexcept { } constexpr bool IsLineBreak(const int ch) noexcept { - return ((ch == '\n') || (ch == '\r')); + return ((ch == '\n') || (ch == '\r') || (ch == '\0')); } inline int IsNumber(const Scintilla::StyleContext& sc) { diff --git a/sciXlexers/LexTOML.cxx b/sciXlexers/LexTOML.cxx index 690f84388..a691dc12f 100644 --- a/sciXlexers/LexTOML.cxx +++ b/sciXlexers/LexTOML.cxx @@ -202,6 +202,9 @@ Sci_Position SCI_METHOD LexerTOML::WordListSet(int n, const char* wl) } // ---------------------------------------------------------------------------- +constexpr int abs_i(const int i) noexcept { return ((i < 0) ? (0 - i) : (0 + i)); } + +// ---------------------------------------------------------------------------- constexpr bool IsCommentChar(const int ch) noexcept { //return (ch == '#') || (ch == ':'); @@ -225,6 +228,17 @@ inline bool IsAKeywordChar(const int ch) { } // ---------------------------------------------------------------------------- +inline void SetStateParsingError(StyleContext& sc) { + sc.SetState(SCE_TOML_PARSINGERROR); +} +// ---------------------------------------------------------------------------- + +inline void ForwardSetStateParsingError(StyleContext& sc) { + sc.ForwardSetState(SCE_TOML_PARSINGERROR); +} +// ---------------------------------------------------------------------------- + + static bool IsDateTimeStr(StyleContext& sc, const CharacterSet& validCh, const CharacterSet& valEnd) { @@ -317,85 +331,74 @@ static bool IsLookAheadInList(StyleContext& sc, const CharacterSet& validCh, con static bool _inComment(StyleContext& sc, Sci_Position& pos) { bool isInComment = false; - auto const posCurrent = static_cast(sc.currentPos); - Sci_Position i = pos - posCurrent; - while (((pos + i) >= 0)) + Sci_Position p = pos; + while (p >= 0) { - int const ch = sc.GetRelative(i--); + Sci_Position const d = p - posCurrent; + int const ch = sc.GetRelative(d); if (IsLineBreak(ch)) { break; } else if (IsCommentChar(ch)) { isInComment = true; - pos += i; + pos = p - 1; break; } - + --p; } return isInComment; } -constexpr bool _isQuoted(const int q1, const int q2) noexcept { - return ((q1 >= 0) || (q2 >= 0)); -} +constexpr bool _isQuoted(const bool q1, const bool q2) noexcept { return (q1 || q2); } static int GetSquareBracketLevel(StyleContext& sc, const bool stopAtLnBreak) { auto const posCurrent = static_cast(sc.currentPos); int iBracketLevel = 0; - int inSQStrg = -1; - int inDQStrg = -1; - int inInlTable = 0; + bool inSQStrg = false; + bool inDQStrg = false; - Sci_Position i = 0; - Sci_Position pos = posCurrent; - while (pos >= 0) + Sci_Position pos = posCurrent - 1; + + while ((pos >= 0) && (iBracketLevel <= 0)) { - int const ch = sc.GetRelative(i); + Sci_Position const diff = pos - posCurrent; + int const ch = sc.GetRelative(diff); if (stopAtLnBreak && IsLineBreak(ch)) { break; } + int const ch_p = sc.GetRelative(diff - 1); if (!_inComment(sc, pos)) { - if (ch == '"') { - if (inDQStrg > 0) { - inDQStrg = 0 - i; + if (ch_p != '\\') // not ESCaped + { + if (ch == '"') + { + if (inDQStrg) { + inDQStrg = false; + } + else { + inDQStrg = !inSQStrg; + } } - else if (inSQStrg < 0) { - inDQStrg = i; + else if (ch == '\'') + { + if (inSQStrg) { + inSQStrg = false; + } + else { + inSQStrg = !inDQStrg; + } } } - else if (ch == '\'') { - if (inSQStrg > 0) { - inSQStrg = 0 - i; - } - else if (inSQStrg < 0) { - inSQStrg = i; - } - } - else if (ch == '\\') { // ESCaped ? - if (inDQStrg == (0 - i + 1)) { - inDQStrg = i + 1; // revert - } - else if (inSQStrg == (0 - i + 1)) { - inSQStrg = i + 1; // revert - } - } - else if ((ch == '}') && !_isQuoted(inDQStrg, inSQStrg)) { - ++inInlTable; - } - else if ((ch == '{') && !_isQuoted(inDQStrg, inSQStrg)) { - --inInlTable; - } - - if (!_isQuoted(inDQStrg, inSQStrg) && (inInlTable == 0)) { + if (!_isQuoted(inDQStrg, inSQStrg)) { if (ch == ']') { --iBracketLevel; @@ -403,14 +406,10 @@ static int GetSquareBracketLevel(StyleContext& sc, const bool stopAtLnBreak) else if (ch == '[') { ++iBracketLevel; } - else if (IsAssignChar(ch)) { - break; // must be the assignment begin - } } - --pos; } - i = pos - posCurrent; + --pos; } return iBracketLevel; } @@ -425,12 +424,13 @@ void SCI_METHOD LexerTOML::Lex(Sci_PositionU startPos, Sci_Position length, int bool inSQuotedKey = false; bool inDQuotedKey = false; - bool inInnerQKey = false; bool inSectionDef = false; + bool isSectKeyBeg = false; + bool isSectKeyEnd = false; bool inMultiLnString = (sc.state == SCE_TOML_STR_BASIC) || (sc.state == SCE_TOML_STR_LITERAL); - bool inMultiLnArrayDef = false; + bool inMultiLnArrayDef = (GetSquareBracketLevel(sc, false) > 0); bool inHex = false; bool inBin = false; @@ -441,43 +441,56 @@ void SCI_METHOD LexerTOML::Lex(Sci_PositionU startPos, Sci_Position length, int for (; sc.More(); sc.Forward()) { - // -------------------------------------------------- // check if in the middle of a line continuation ... // -------------------------------------------------- // reset context infos - if (sc.atLineStart) { + if (sc.atLineStart) + { inMultiLnArrayDef = (GetSquareBracketLevel(sc, false) > 0); - inSQuotedKey = inDQuotedKey = inInnerQKey = false; + inSQuotedKey = inDQuotedKey = false; bPossibleKeyword = true; - switch (sc.state) - { - case SCE_TOML_STR_BASIC: - case SCE_TOML_STR_LITERAL: - if (!inMultiLnString) { - sc.SetState(SCE_TOML_PARSINGERROR); - } - break; - case SCE_TOML_KEY: - case SCE_TOML_ASSIGNMENT: - sc.SetState(SCE_TOML_PARSINGERROR); - break; - case SCE_TOML_COMMENT: - if (inMultiLnArrayDef) { + if (inMultiLnArrayDef) { + switch (sc.state) + { + case SCE_TOML_COMMENT: sc.SetState(SCE_TOML_VALUE); - } - break; - case SCE_TOML_PARSINGERROR: - if (!inMultiLnArrayDef) { - sc.SetState(SCE_TOML_DEFAULT); - } - break; - default: - if (!inMultiLnArrayDef) { + break; + default: + // no state change + break; + } + } + else { + // NOT in inMultiLnArrayDef + switch (sc.state) + { + case SCE_TOML_STR_BASIC: + case SCE_TOML_STR_LITERAL: + if (!inMultiLnString) { + SetStateParsingError(sc); + } + break; + + case SCE_TOML_KEY: + case SCE_TOML_ASSIGNMENT: + SetStateParsingError(sc); + break; + + case SCE_TOML_COMMENT: + sc.SetState(SCE_TOML_DEFAULT); // reset + break; + + case SCE_TOML_PARSINGERROR: sc.SetState(SCE_TOML_DEFAULT); // reset - } - break; + break; + + case SCE_TOML_VALUE: + default: + sc.SetState(SCE_TOML_DEFAULT); + break; + } } } @@ -486,7 +499,7 @@ void SCI_METHOD LexerTOML::Lex(Sci_PositionU startPos, Sci_Position length, int // ------------------------- if (IsLineBreak(sc.ch)) { if (bInInlBracket) { - sc.ForwardSetState(SCE_TOML_PARSINGERROR); + ForwardSetStateParsingError(sc); } else { continue; // eat line breaks @@ -497,7 +510,7 @@ void SCI_METHOD LexerTOML::Lex(Sci_PositionU startPos, Sci_Position length, int { if (IsCommentChar(sc.ch)) { if (inSectionDef) { - sc.SetState(SCE_TOML_PARSINGERROR); + SetStateParsingError(sc); } else if ((sc.state == SCE_TOML_STR_BASIC) || (sc.state == SCE_TOML_STR_LITERAL)) @@ -517,6 +530,7 @@ void SCI_METHOD LexerTOML::Lex(Sci_PositionU startPos, Sci_Position length, int switch (sc.state) { case SCE_TOML_DEFAULT: + { if (IsASpaceOrTab(sc.ch)) { // eat } @@ -524,8 +538,8 @@ void SCI_METHOD LexerTOML::Lex(Sci_PositionU startPos, Sci_Position length, int sc.SetState(SCE_TOML_COMMENT); } else if (sc.ch == '[') { - sc.SetState(SCE_TOML_SECTION); inSectionDef = true; + sc.SetState(SCE_TOML_SECTION); } else if (validKey.Contains(sc.ch)) { sc.SetState(SCE_TOML_KEY); @@ -540,20 +554,29 @@ void SCI_METHOD LexerTOML::Lex(Sci_PositionU startPos, Sci_Position length, int sc.SetState(SCE_TOML_KEY); } else { - sc.SetState(SCE_TOML_PARSINGERROR); + if (!inMultiLnArrayDef) { + SetStateParsingError(sc); + } } } - break; + } + break; case SCE_TOML_COMMENT: + { // eat - rest of line is comment - break; + } + break; case SCE_TOML_SECTION: + { if (sc.ch == ']') { - if (GetSquareBracketLevel(sc, true) == 0) { + if (isSectKeyBeg) { + isSectKeyEnd = true; + } + if (GetSquareBracketLevel(sc, true) == 1) { inSectionDef = false; } } @@ -562,73 +585,68 @@ void SCI_METHOD LexerTOML::Lex(Sci_PositionU startPos, Sci_Position length, int sc.SetState(SCE_TOML_COMMENT); } else { - sc.SetState(SCE_TOML_PARSINGERROR); + SetStateParsingError(sc); } } - break; - + else if (!IsASpaceOrTab(sc.ch)) { + if (!validKey.Contains(sc.ch)) { + SetStateParsingError(sc); + } + // else EAT + } + } + break; case SCE_TOML_KEY: + { if (sc.atLineEnd) { - sc.SetState(SCE_TOML_PARSINGERROR); + SetStateParsingError(sc); break; } else if ((sc.ch == '"') && inDQuotedKey) { - if (inInnerQKey) { - sc.SetState(SCE_TOML_PARSINGERROR); - } - else { - sc.ForwardSetState(SCE_TOML_ASSIGNMENT); // end of key - } + sc.ForwardSetState(SCE_TOML_ASSIGNMENT); // end of key + inDQuotedKey = false; break; } else if ((sc.ch == '\'') && inSQuotedKey) { - if (inInnerQKey) { - sc.SetState(SCE_TOML_PARSINGERROR); - } - else { - sc.ForwardSetState(SCE_TOML_ASSIGNMENT); // end of key - } + sc.ForwardSetState(SCE_TOML_ASSIGNMENT); // end of key + inSQuotedKey = false; break; } else if (IsASpaceOrTab(sc.ch)) { - if (!(inSQuotedKey || inDQuotedKey || inInnerQKey)) { + if (!(inSQuotedKey || inDQuotedKey)) { sc.SetState(SCE_TOML_ASSIGNMENT); // end of key } break; // else eat } else if (IsAssignChar(sc.ch)) { - if ((inSQuotedKey || inDQuotedKey || inInnerQKey)) { - break; + if ((inSQuotedKey || inDQuotedKey)) { + break; // eat } sc.SetState(SCE_TOML_ASSIGNMENT); // end of key // === fall through === case SCE_TOML_ASSIGNMENT: } else if (validKey.Contains(sc.ch)) { - break; // eat + break; // eat } else { - if ((sc.ch == '"') && inSQuotedKey) { - inInnerQKey = !inInnerQKey; //toggle + if (!(inSQuotedKey || inDQuotedKey)) { + SetStateParsingError(sc); } - else if ((sc.ch == '\'') && inDQuotedKey) { - inInnerQKey = !inInnerQKey; //toggle - } - else if (!(inSQuotedKey || inDQuotedKey || inInnerQKey)) { - sc.SetState(SCE_TOML_PARSINGERROR); - } - break; // else eat + break; // no fall through } - // === fall through === + } + // === fall through === case SCE_TOML_ASSIGNMENT: - if (sc.atLineEnd) { - sc.SetState(SCE_TOML_PARSINGERROR); + { + if (sc.atLineEnd || (inSQuotedKey || inDQuotedKey)) { + SetStateParsingError(sc); break; } else if (IsAssignChar(sc.ch)) { if (IsLookAheadLineEmpty(sc)) { - sc.ForwardSetState(SCE_TOML_PARSINGERROR); + ForwardSetStateParsingError(sc); break; } else { @@ -640,12 +658,14 @@ void SCI_METHOD LexerTOML::Lex(Sci_PositionU startPos, Sci_Position length, int break; // OK } else { - sc.SetState(SCE_TOML_PARSINGERROR); + SetStateParsingError(sc); break; } - // === fall through === + } + // === fall through === case SCE_TOML_VALUE: + { if (bPossibleKeyword && IsLookAheadInList(sc, validKeyWord, keywords)) { sc.SetState(SCE_TOML_KEYWORD); break; @@ -654,31 +674,29 @@ void SCI_METHOD LexerTOML::Lex(Sci_PositionU startPos, Sci_Position length, int bPossibleKeyword = false; } if (sc.ch == '[') { - inMultiLnArrayDef = true; + inMultiLnArrayDef = (GetSquareBracketLevel(sc, false) > 0); } else if (sc.ch == ']') { int const level = GetSquareBracketLevel(sc, false); - if (level == 0) { + if (level == 1) { inMultiLnArrayDef = false; } - else if (level < 0) { - sc.SetState(SCE_TOML_PARSINGERROR); - inMultiLnArrayDef = false; + else if (level <= 0) { + SetStateParsingError(sc); } } else if (sc.ch == '}') { if (bInInlBracket) { bInInlBracket = false; - sc.SetState(SCE_TOML_VALUE); } else { - sc.SetState(SCE_TOML_PARSINGERROR); + SetStateParsingError(sc); } } else if (sc.ch == '{') { if (bInInlBracket) { - sc.SetState(SCE_TOML_PARSINGERROR); + SetStateParsingError(sc); } else { bInInlBracket = true; @@ -708,31 +726,41 @@ void SCI_METHOD LexerTOML::Lex(Sci_PositionU startPos, Sci_Position length, int } } } - else if (sc.ch == '"') { + else if ((sc.ch == '"') && (sc.chPrev != '\\')) { sc.SetState(SCE_TOML_STR_BASIC); if (sc.Match(R"(""")")) { inMultiLnString = true; sc.Forward(2); } + else { + inMultiLnString = false; + } } - else if (sc.ch == '\'') { + else if ((sc.ch == '\'') && (sc.chPrev != '\\')) { sc.SetState(SCE_TOML_STR_LITERAL); if (sc.Match(R"(''')")) { inMultiLnString = true; sc.Forward(2); } + else { + inMultiLnString = false; + } } - break; + } + break; case SCE_TOML_KEYWORD: + { if (!(IsASpaceX(sc.ch) || validKeyWord.Contains(sc.ch))) { sc.SetState(SCE_TOML_VALUE); } - break; + } + break; case SCE_TOML_NUMBER: + { if (sc.ch == '_') { // eat // TODO: only once } @@ -745,10 +773,10 @@ void SCI_METHOD LexerTOML::Lex(Sci_PositionU startPos, Sci_Position length, int } else { if ((inHex && !IsADigit(sc.ch, 16)) || - (inBin && !IsADigit(sc.ch, 2)) || - (inOct && !IsADigit(sc.ch, 8))) + (inBin && !IsADigit(sc.ch, 2)) || + (inOct && !IsADigit(sc.ch, 8))) { - sc.SetState(SCE_TOML_PARSINGERROR); + SetStateParsingError(sc); } } } @@ -775,26 +803,30 @@ void SCI_METHOD LexerTOML::Lex(Sci_PositionU startPos, Sci_Position length, int inOct = false; } else { - sc.SetState(SCE_TOML_PARSINGERROR); + SetStateParsingError(sc); } } - break; + } + break; case SCE_TOML_DATETIME: + { if (!IsADigit(sc.ch) && !chDateTime.Contains(sc.ch) && (sc.ch != '.')) { if (validNumberEnd.Contains(sc.ch)) { sc.SetState(SCE_TOML_VALUE); } else { - sc.SetState(SCE_TOML_PARSINGERROR); + SetStateParsingError(sc); } } - break; + } + break; case SCE_TOML_STR_BASIC: case SCE_TOML_STR_LITERAL: + { if (sc.ch == '"') { if (sc.state == SCE_TOML_STR_BASIC) { if (sc.chPrev != '\\') { @@ -827,17 +859,22 @@ void SCI_METHOD LexerTOML::Lex(Sci_PositionU startPos, Sci_Position length, int } } } - break; + } + break; case SCE_TOML_PARSINGERROR: + { // keep parsing error until new line - break; + } + break; default: - sc.SetState(SCE_TOML_PARSINGERROR); // unknown - break; + { + SetStateParsingError(sc); // unknown + } + break; } //~if (sc.atLineEnd) { diff --git a/src/VersionEx.h b/src/VersionEx.h index ef0293ed8..b43ee6e27 100644 --- a/src/VersionEx.h +++ b/src/VersionEx.h @@ -7,8 +7,8 @@ #define SAPPNAME "Notepad3" #define VERSION_MAJOR 5 #define VERSION_MINOR 19 -#define VERSION_REV 630 -#define VERSION_BUILD 2381 -#define SCINTILLA_VER 416 -#define ONIGMO_REGEX_VER 6.2.0 -#define VERSION_PATCH +#define VERSION_REV 702 +#define VERSION_BUILD 2382 +#define SCINTILLA_VER 417 +#define ONIGURUMA_REGEX_VER 6.9.2 +#define VERSION_PATCH Oniguruma diff --git a/test/txtfiles/TOML.toml b/test/txtfiles/TOML.toml index 5803e35bf..473cc1a02 100644 --- a/test/txtfiles/TOML.toml +++ b/test/txtfiles/TOML.toml @@ -41,10 +41,10 @@ data = [ ["gamma", "delta"], [1, 2] ] key = "value" key = # INVALID -key = "value" bare_key = "value" bare-key = "value" 1234 = "value" += "string" # is invalid "127.0.0.1" = "value" "character encoding" = "value" @@ -52,6 +52,9 @@ bare-key = "value" 'key2' = "value" 'quoted "value"' = "value" +"~!@$^&*()_+-` 1234567890 []|/?><.,;:'" = 1 +'~!@$^&*()_+-` 1234567890 []|/?><.,;:"' = 2 + # boolean bool1 = true bool2 = false