diff --git a/Versions/build.txt b/Versions/build.txt index 8c5581cf3..76bfd63d0 100644 --- a/Versions/build.txt +++ b/Versions/build.txt @@ -1 +1 @@ -2381 +2382 diff --git a/res/Notepad3.exe.manifest.conf b/res/Notepad3.exe.manifest.conf index 85db89fbc..325beede9 100644 --- a/res/Notepad3.exe.manifest.conf +++ b/res/Notepad3.exe.manifest.conf @@ -3,8 +3,8 @@ - Notepad3 + Notepad3 Oniguruma diff --git a/sciXlexers/CharSetX.h b/sciXlexers/CharSetX.h index 978845d05..ad554f5a7 100644 --- a/sciXlexers/CharSetX.h +++ b/sciXlexers/CharSetX.h @@ -36,7 +36,7 @@ constexpr bool IsALetter(const int ch) noexcept { } constexpr bool IsLineBreak(const int ch) noexcept { - return ((ch == '\n') || (ch == '\r')); + return ((ch == '\n') || (ch == '\r') || (ch == '\0')); } inline int IsNumber(const Scintilla::StyleContext& sc) { diff --git a/sciXlexers/LexTOML.cxx b/sciXlexers/LexTOML.cxx index 3b7c30ff4..696b938fb 100644 --- a/sciXlexers/LexTOML.cxx +++ b/sciXlexers/LexTOML.cxx @@ -114,7 +114,7 @@ public: , validKey(CharacterSet::setAlphaNum, R"(-_.)", 0x80, false) , validKeyWord(CharacterSet::setAlphaNum, "_+-", 0x80, false) , validNumberEnd(CharacterSet::setNone, " \t\n\v\f\r#,)}]", 0x80, false) - , chDateTime(CharacterSet::setNone, "-:TZ", 0x80, false) + , chDateTime(CharacterSet::setNone, "-+.:TZ", 0x80, false) { } virtual ~LexerTOML() { } @@ -202,6 +202,9 @@ Sci_Position SCI_METHOD LexerTOML::WordListSet(int n, const char* wl) } // ---------------------------------------------------------------------------- +constexpr int abs_i(const int i) noexcept { return ((i < 0) ? (0 - i) : (0 + i)); } + +// ---------------------------------------------------------------------------- constexpr bool IsCommentChar(const int ch) noexcept { //return (ch == '#') || (ch == ':'); @@ -225,44 +228,18 @@ inline bool IsAKeywordChar(const int ch) { } // ---------------------------------------------------------------------------- - -static int GetBracketLevel(StyleContext& sc, const bool stopAtLnBreak = false) -{ - auto const posCurrent = static_cast(sc.currentPos); - - int iBracketLevel = -1; - int inInlTbl = 0; - - Sci_Position i = 0; - while (((--i + posCurrent) >= 0)) - { - int const ch = sc.GetRelative(i); - - if (stopAtLnBreak && IsLineBreak(ch)) { - break; - } - - if (ch == '}') { - ++inInlTbl; - } - else if (ch == '{') { - --inInlTbl; - } - - if (IsAssignChar(ch) && (inInlTbl == 0)) { - break; // must be the assignment begin - } - else if (ch == ']') { - --iBracketLevel; - } - else if (ch == '[') { - ++iBracketLevel; - } - } - return iBracketLevel; +inline void SetStateParsingError(StyleContext& sc) { + sc.SetState(SCE_TOML_PARSINGERROR); } // ---------------------------------------------------------------------------- +inline void ForwardSetStateParsingError(StyleContext& sc) { + sc.ForwardSetState(SCE_TOML_PARSINGERROR); +} +// ---------------------------------------------------------------------------- + + + static bool IsDateTimeStr(StyleContext& sc, const CharacterSet& validCh, const CharacterSet& valEnd) { auto const posCurrent = static_cast(sc.currentPos); @@ -351,6 +328,92 @@ static bool IsLookAheadInList(StyleContext& sc, const CharacterSet& validCh, con // ---------------------------------------------------------------------------- +static bool _inComment(StyleContext& sc, Sci_Position& pos) +{ + bool isInComment = false; + auto const posCurrent = static_cast(sc.currentPos); + + Sci_Position p = pos; + while (p >= 0) + { + Sci_Position const d = p - posCurrent; + int const ch = sc.GetRelative(d); + + if (IsLineBreak(ch)) { + break; + } + else if (IsCommentChar(ch)) { + isInComment = true; + pos = p - 1; + break; + } + --p; + } + return isInComment; +} + + +constexpr bool _isQuoted(const bool q1, const bool q2) noexcept { return (q1 || q2); } + +static int GetSquareBracketLevel(StyleContext& sc, const bool stopAtLnBreak) +{ + auto const posCurrent = static_cast(sc.currentPos); + + int iBracketLevel = 0; + bool inSQStrg = false; + bool inDQStrg = false; + + Sci_Position pos = posCurrent - 1; + + while ((pos >= 0) && (iBracketLevel <= 0)) + { + Sci_Position const diff = pos - posCurrent; + int const ch = sc.GetRelative(diff); + + if (stopAtLnBreak && IsLineBreak(ch)) { + break; + } + int const ch_p = sc.GetRelative(diff - 1); + + if (!_inComment(sc, pos)) + { + if (ch_p != '\\') // not ESCaped + { + if (ch == '"') + { + if (inDQStrg) { + inDQStrg = false; + } + else { + inDQStrg = !inSQStrg; + } + } + else if (ch == '\'') + { + if (inSQStrg) { + inSQStrg = false; + } + else { + inSQStrg = !inDQStrg; + } + } + } + if (!_isQuoted(inDQStrg, inSQStrg)) { + + if (ch == ']') { + --iBracketLevel; + } + else if (ch == '[') { + ++iBracketLevel; + } + + } + } + --pos; + } + return iBracketLevel; +} +// ---------------------------------------------------------------------------- // ---------------------------------------------------------------------------- @@ -361,53 +424,74 @@ void SCI_METHOD LexerTOML::Lex(Sci_PositionU startPos, Sci_Position length, int bool inSQuotedKey = false; bool inDQuotedKey = false; - bool inInnerQKey = false; bool inSectionDef = false; + bool isSectKeyBeg = false; + bool isSectKeyEnd = false; bool inMultiLnString = (sc.state == SCE_TOML_STR_BASIC) || (sc.state == SCE_TOML_STR_LITERAL); - bool inMultiLnArrayDef = false; + bool inMultiLnArrayDef = (GetSquareBracketLevel(sc, false) > 0); bool inHex = false; bool inBin = false; bool inOct = false; bool bPossibleKeyword = true; + bool bInInlBracket = false; for (; sc.More(); sc.Forward()) { - // -------------------------------------------------- // check if in the middle of a line continuation ... // -------------------------------------------------- // reset context infos - if (sc.atLineStart) { - inMultiLnArrayDef = (GetBracketLevel(sc) >= 0); - inSQuotedKey = inDQuotedKey = inInnerQKey = false; + if (sc.atLineStart) + { + inMultiLnArrayDef = (GetSquareBracketLevel(sc, false) > 0); + inSQuotedKey = inDQuotedKey = false; + isSectKeyBeg = isSectKeyEnd = false; bPossibleKeyword = true; - switch (sc.state) - { - case SCE_TOML_STR_BASIC: - case SCE_TOML_STR_LITERAL: - if (!inMultiLnString) { - sc.SetState(SCE_TOML_PARSINGERROR); - } - break; - case SCE_TOML_KEY: - case SCE_TOML_ASSIGNMENT: - sc.SetState(SCE_TOML_PARSINGERROR); - break; - case SCE_TOML_PARSINGERROR: - if (!inMultiLnArrayDef) { - sc.SetState(SCE_TOML_DEFAULT); - } - break; - default: - if (!inMultiLnArrayDef) { + if (inMultiLnArrayDef) { + switch (sc.state) + { + case SCE_TOML_COMMENT: + sc.SetState(SCE_TOML_VALUE); + break; + default: + // no state change + break; + } + } + else { + // NOT in inMultiLnArrayDef + switch (sc.state) + { + case SCE_TOML_STR_BASIC: + case SCE_TOML_STR_LITERAL: + if (!inMultiLnString) { + SetStateParsingError(sc); + } + break; + + case SCE_TOML_KEY: + case SCE_TOML_ASSIGNMENT: + SetStateParsingError(sc); + break; + + case SCE_TOML_COMMENT: + sc.SetState(SCE_TOML_DEFAULT); // reset + break; + + case SCE_TOML_PARSINGERROR: sc.SetState(SCE_TOML_DEFAULT); // reset - } - break; + break; + + case SCE_TOML_VALUE: + default: + sc.SetState(SCE_TOML_DEFAULT); + break; + } } } @@ -415,18 +499,22 @@ void SCI_METHOD LexerTOML::Lex(Sci_PositionU startPos, Sci_Position length, int // current state independent // ------------------------- if (IsLineBreak(sc.ch)) { - continue; // eat line breaks + if (bInInlBracket) { + ForwardSetStateParsingError(sc); + } + else { + continue; // eat line breaks + } } if (sc.state != SCE_TOML_PARSINGERROR) { if (IsCommentChar(sc.ch)) { if (inSectionDef) { - sc.SetState(SCE_TOML_PARSINGERROR); + SetStateParsingError(sc); } else if ((sc.state == SCE_TOML_STR_BASIC) || - (sc.state == SCE_TOML_STR_LITERAL) || - inMultiLnArrayDef) + (sc.state == SCE_TOML_STR_LITERAL)) { sc.ForwardSetState(sc.state); // ignore } @@ -434,7 +522,6 @@ void SCI_METHOD LexerTOML::Lex(Sci_PositionU startPos, Sci_Position length, int sc.SetState(SCE_TOML_COMMENT); } } - } // SCE_TOML_PARSINGERROR @@ -444,6 +531,7 @@ void SCI_METHOD LexerTOML::Lex(Sci_PositionU startPos, Sci_Position length, int switch (sc.state) { case SCE_TOML_DEFAULT: + { if (IsASpaceOrTab(sc.ch)) { // eat } @@ -451,8 +539,8 @@ void SCI_METHOD LexerTOML::Lex(Sci_PositionU startPos, Sci_Position length, int sc.SetState(SCE_TOML_COMMENT); } else if (sc.ch == '[') { - sc.SetState(SCE_TOML_SECTION); inSectionDef = true; + sc.SetState(SCE_TOML_SECTION); } else if (validKey.Contains(sc.ch)) { sc.SetState(SCE_TOML_KEY); @@ -467,95 +555,131 @@ void SCI_METHOD LexerTOML::Lex(Sci_PositionU startPos, Sci_Position length, int sc.SetState(SCE_TOML_KEY); } else { - sc.SetState(SCE_TOML_PARSINGERROR); + if (!inMultiLnArrayDef) { + SetStateParsingError(sc); + } } } - break; + } + break; case SCE_TOML_COMMENT: + { // eat - rest of line is comment - break; + } + break; case SCE_TOML_SECTION: - if (sc.ch == ']') { - if (GetBracketLevel(sc, true) == 0) { - inSectionDef = false; + { + if (sc.ch == '"') { + if (!inSQuotedKey) { + inDQuotedKey = !inDQuotedKey; } } - else if (IsCommentChar(sc.ch)) { - if (!inSectionDef) { - sc.SetState(SCE_TOML_COMMENT); + else if (sc.ch == '\'') { + if (!inDQuotedKey) { + inSQuotedKey = !inSQuotedKey; + } + } + else if (!(inDQuotedKey || inSQuotedKey)) { + if (sc.ch == '[') { + if (isSectKeyBeg) { + SetStateParsingError(sc); + } + // Array of Tables - eat + } + else if (sc.ch == ']') { + int const level = GetSquareBracketLevel(sc, true); + if (isSectKeyBeg) { + isSectKeyEnd = true; + } + if (level == 1) { + inSectionDef = false; + } + else if (level < 1) { + SetStateParsingError(sc); + } + } + else if (IsCommentChar(sc.ch)) { + if (!inSectionDef) { + sc.SetState(SCE_TOML_COMMENT); + } + else { + SetStateParsingError(sc); + } + } + else if (IsASpaceOrTab(sc.ch)) { + // eat } else { - sc.SetState(SCE_TOML_PARSINGERROR); + if (validKey.Contains(sc.ch)) { + if (isSectKeyEnd) { + SetStateParsingError(sc); + } + else { + isSectKeyBeg = true; + } + } + else { + SetStateParsingError(sc); + } } } - break; - + } + break; case SCE_TOML_KEY: + { if (sc.atLineEnd) { - sc.SetState(SCE_TOML_PARSINGERROR); + SetStateParsingError(sc); break; } else if ((sc.ch == '"') && inDQuotedKey) { - if (inInnerQKey) { - sc.SetState(SCE_TOML_PARSINGERROR); - } - else { - sc.ForwardSetState(SCE_TOML_ASSIGNMENT); // end of key - } + sc.ForwardSetState(SCE_TOML_ASSIGNMENT); // end of key + inDQuotedKey = false; break; } else if ((sc.ch == '\'') && inSQuotedKey) { - if (inInnerQKey) { - sc.SetState(SCE_TOML_PARSINGERROR); - } - else { - sc.ForwardSetState(SCE_TOML_ASSIGNMENT); // end of key - } + sc.ForwardSetState(SCE_TOML_ASSIGNMENT); // end of key + inSQuotedKey = false; break; } else if (IsASpaceOrTab(sc.ch)) { - if (!(inSQuotedKey || inDQuotedKey || inInnerQKey)) { + if (!(inSQuotedKey || inDQuotedKey)) { sc.SetState(SCE_TOML_ASSIGNMENT); // end of key } break; // else eat } else if (IsAssignChar(sc.ch)) { - if ((inSQuotedKey || inDQuotedKey || inInnerQKey)) { - break; + if ((inSQuotedKey || inDQuotedKey)) { + break; // eat } sc.SetState(SCE_TOML_ASSIGNMENT); // end of key // === fall through === case SCE_TOML_ASSIGNMENT: } else if (validKey.Contains(sc.ch)) { - break; // eat + break; // eat } else { - if ((sc.ch == '"') && inSQuotedKey) { - inInnerQKey = !inInnerQKey; //toggle + if (!(inSQuotedKey || inDQuotedKey)) { + SetStateParsingError(sc); } - else if ((sc.ch == '\'') && inDQuotedKey) { - inInnerQKey = !inInnerQKey; //toggle - } - else if (!(inSQuotedKey || inDQuotedKey || inInnerQKey)) { - sc.SetState(SCE_TOML_PARSINGERROR); - } - break; // else eat + break; // no fall through } - // === fall through === + } + // === fall through === case SCE_TOML_ASSIGNMENT: - if (sc.atLineEnd) { - sc.SetState(SCE_TOML_PARSINGERROR); + { + if (sc.atLineEnd || (inSQuotedKey || inDQuotedKey)) { + SetStateParsingError(sc); break; } else if (IsAssignChar(sc.ch)) { if (IsLookAheadLineEmpty(sc)) { - sc.ForwardSetState(SCE_TOML_PARSINGERROR); + ForwardSetStateParsingError(sc); break; } else { @@ -567,12 +691,14 @@ void SCI_METHOD LexerTOML::Lex(Sci_PositionU startPos, Sci_Position length, int break; // OK } else { - sc.SetState(SCE_TOML_PARSINGERROR); + SetStateParsingError(sc); break; } - // === fall through === + } + // === fall through === case SCE_TOML_VALUE: + { if (bPossibleKeyword && IsLookAheadInList(sc, validKeyWord, keywords)) { sc.SetState(SCE_TOML_KEYWORD); break; @@ -581,16 +707,33 @@ void SCI_METHOD LexerTOML::Lex(Sci_PositionU startPos, Sci_Position length, int bPossibleKeyword = false; } if (sc.ch == '[') { - inMultiLnArrayDef = true; + inMultiLnArrayDef = (GetSquareBracketLevel(sc, false) > 0); } else if (sc.ch == ']') { - int const level = GetBracketLevel(sc); - if (level == 0) { + int const level = GetSquareBracketLevel(sc, false); + if (level == 1) { inMultiLnArrayDef = false; } - else if (level < 0) { - sc.SetState(SCE_TOML_PARSINGERROR); - inMultiLnArrayDef = false; + else if (level <= 0) { + SetStateParsingError(sc); + } + } + else if (sc.ch == '}') { + if (bInInlBracket) { + bInInlBracket = false; + } + else { + SetStateParsingError(sc); + } + } + else if (sc.ch == '{') { + if (bInInlBracket) + { + SetStateParsingError(sc); + } + else { + bInInlBracket = true; + sc.SetState(SCE_TOML_VALUE); } } else if (IsNumber(sc)) { @@ -616,31 +759,41 @@ void SCI_METHOD LexerTOML::Lex(Sci_PositionU startPos, Sci_Position length, int } } } - else if (sc.ch == '"') { + else if ((sc.ch == '"') && (sc.chPrev != '\\')) { sc.SetState(SCE_TOML_STR_BASIC); if (sc.Match(R"(""")")) { inMultiLnString = true; sc.Forward(2); } + else { + inMultiLnString = false; + } } - else if (sc.ch == '\'') { + else if ((sc.ch == '\'') && (sc.chPrev != '\\')) { sc.SetState(SCE_TOML_STR_LITERAL); if (sc.Match(R"(''')")) { inMultiLnString = true; sc.Forward(2); } + else { + inMultiLnString = false; + } } - break; + } + break; case SCE_TOML_KEYWORD: + { if (!(IsASpaceX(sc.ch) || validKeyWord.Contains(sc.ch))) { sc.SetState(SCE_TOML_VALUE); } - break; + } + break; case SCE_TOML_NUMBER: + { if (sc.ch == '_') { // eat // TODO: only once } @@ -653,10 +806,10 @@ void SCI_METHOD LexerTOML::Lex(Sci_PositionU startPos, Sci_Position length, int } else { if ((inHex && !IsADigit(sc.ch, 16)) || - (inBin && !IsADigit(sc.ch, 2)) || - (inOct && !IsADigit(sc.ch, 8))) + (inBin && !IsADigit(sc.ch, 2)) || + (inOct && !IsADigit(sc.ch, 8))) { - sc.SetState(SCE_TOML_PARSINGERROR); + SetStateParsingError(sc); } } } @@ -672,6 +825,9 @@ void SCI_METHOD LexerTOML::Lex(Sci_PositionU startPos, Sci_Position length, int else if (IsADigit(sc.ch)) { // eat } + else if (chDateTime.Contains(sc.ch)) { + sc.SetState(SCE_TOML_DATETIME); + } else { if (validNumberEnd.Contains(sc.ch)) { sc.SetState(SCE_TOML_VALUE); @@ -680,26 +836,30 @@ void SCI_METHOD LexerTOML::Lex(Sci_PositionU startPos, Sci_Position length, int inOct = false; } else { - sc.SetState(SCE_TOML_PARSINGERROR); + SetStateParsingError(sc); } } - break; + } + break; case SCE_TOML_DATETIME: + { if (!IsADigit(sc.ch) && !chDateTime.Contains(sc.ch) && (sc.ch != '.')) { if (validNumberEnd.Contains(sc.ch)) { sc.SetState(SCE_TOML_VALUE); } else { - sc.SetState(SCE_TOML_PARSINGERROR); + SetStateParsingError(sc); } } - break; + } + break; case SCE_TOML_STR_BASIC: case SCE_TOML_STR_LITERAL: + { if (sc.ch == '"') { if (sc.state == SCE_TOML_STR_BASIC) { if (sc.chPrev != '\\') { @@ -732,17 +892,22 @@ void SCI_METHOD LexerTOML::Lex(Sci_PositionU startPos, Sci_Position length, int } } } - break; + } + break; case SCE_TOML_PARSINGERROR: + { // keep parsing error until new line - break; + } + break; default: - sc.SetState(SCE_TOML_PARSINGERROR); // unknown - break; + { + SetStateParsingError(sc); // unknown + } + break; } //~if (sc.atLineEnd) { diff --git a/src/VersionEx.h b/src/VersionEx.h index ef0293ed8..b43ee6e27 100644 --- a/src/VersionEx.h +++ b/src/VersionEx.h @@ -7,8 +7,8 @@ #define SAPPNAME "Notepad3" #define VERSION_MAJOR 5 #define VERSION_MINOR 19 -#define VERSION_REV 630 -#define VERSION_BUILD 2381 -#define SCINTILLA_VER 416 -#define ONIGMO_REGEX_VER 6.2.0 -#define VERSION_PATCH +#define VERSION_REV 702 +#define VERSION_BUILD 2382 +#define SCINTILLA_VER 417 +#define ONIGURUMA_REGEX_VER 6.9.2 +#define VERSION_PATCH Oniguruma diff --git a/test/txtfiles/TOML.toml b/test/txtfiles/TOML.toml index 5803e35bf..473cc1a02 100644 --- a/test/txtfiles/TOML.toml +++ b/test/txtfiles/TOML.toml @@ -41,10 +41,10 @@ data = [ ["gamma", "delta"], [1, 2] ] key = "value" key = # INVALID -key = "value" bare_key = "value" bare-key = "value" 1234 = "value" += "string" # is invalid "127.0.0.1" = "value" "character encoding" = "value" @@ -52,6 +52,9 @@ bare-key = "value" 'key2' = "value" 'quoted "value"' = "value" +"~!@$^&*()_+-` 1234567890 []|/?><.,;:'" = 1 +'~!@$^&*()_+-` 1234567890 []|/?><.,;:"' = 2 + # boolean bool1 = true bool2 = false