Merge pull request #1343 from RaiKoHoff/Dev_ONIGURUMA

TOML Lexer: fixing minor problems
This commit is contained in:
Rainer Kottenhoff 2019-06-14 16:07:10 +02:00 committed by GitHub
commit 12db738c6f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 140 additions and 43 deletions

View File

@ -132,6 +132,7 @@ SettingsVersion=3
[Shell Script]
[SQL Query]
[Tcl Script]
[TOML Config]
[VBScript]
[VHDL]
[Visual Basic]

View File

@ -1 +1 @@
2252
2255

View File

@ -3,7 +3,7 @@
<assemblyIdentity
name="Notepad3"
processorArchitecture="*"
version="5.19.613.2252"
version="5.19.614.2255"
type="win32"
/>
<description>Notepad3 Oniguruma</description>

View File

@ -18,6 +18,18 @@
//- IsUpperOrLowerCase(int ch);
//- IsAlphaNumeric(int ch);
constexpr bool IsASpaceX(const int ch) noexcept {
return ((ch == ' ') || ((ch >= 0x09) && (ch <= 0x0d)));
}
constexpr bool IsABlankOrTabX(const int ch) noexcept {
return ((ch == ' ') || (ch == '\t'));
}
constexpr bool IsADigitX(const int ch) noexcept {
return ((ch >= '0') && (ch <= '9'));
}
constexpr bool IsALetter(const int ch) noexcept {
// 97 to 122 || 65 to 90
return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z');
@ -28,7 +40,8 @@ constexpr bool IsLineBreak(const int ch) noexcept {
}
inline int IsNumber(const Scintilla::StyleContext& sc) {
return Scintilla::IsADigit(sc.ch) || (((sc.ch == '+') || (sc.ch == '-')) && Scintilla::IsADigit(sc.chNext));
return Scintilla::IsADigit(sc.ch) ||
(((sc.ch == '+') || (sc.ch == '-')) && Scintilla::IsADigit(sc.chNext));
}
constexpr int IsNumHex(const Scintilla::StyleContext& sc) noexcept {
@ -47,10 +60,16 @@ inline int IsNumExponent(const Scintilla::StyleContext& sc) {
return Scintilla::IsADigit(sc.ch) && ((sc.chNext == 'e') || (sc.chNext == 'E'));
}
inline bool IsAIdentifierChar(const int ch) {
return (Scintilla::IsAlphaNumeric(ch) || ch == '_' || ch == '.');
inline void TrimIdentifier(const char* input, char* output)
{
size_t j = 0;
for (size_t i = 0; input[i] != '\0'; ++i) {
if (!IsASpaceX(input[i])) {
output[j++] = input[i];
}
}
output[j] = '\0';
}
#endif //_CHARSETX_H_

View File

@ -64,7 +64,7 @@ namespace {
}
};
static const char* const tomlWordListsDesc[] = {
static const char* const tomlWordLists[] = {
"Keyword",
nullptr
};
@ -75,7 +75,7 @@ namespace {
DefineProperty("fold", &OptionsTOML::fold, "FOLD COMMENT");
DefineProperty("fold.compact", &OptionsTOML::foldCompact, "FOLDCOMPACT COMMENT");
DefineWordListSets(tomlWordListsDesc);
DefineWordListSets(tomlWordLists);
}
};
@ -99,7 +99,7 @@ namespace {
class LexerTOML : public DefaultLexer {
CharacterSet validKey;
//CharacterSet validKeyWord;
CharacterSet validKeyWord;
CharacterSet validNumberEnd;
CharacterSet chDateTime;
@ -112,7 +112,7 @@ public:
LexerTOML()
: DefaultLexer(lexicalClasses, ELEMENTS(lexicalClasses))
, validKey(CharacterSet::setAlphaNum, R"(-_.)", 0x80, false)
//, validKeyWord(CharacterSet::setAlphaNum, "_", 0x80, false)
, validKeyWord(CharacterSet::setAlphaNum, "_+-", 0x80, false)
, validNumberEnd(CharacterSet::setNone, " \t\n\v\f\r#,)}]", 0x80, false)
, chDateTime(CharacterSet::setNone, "-:TZ", 0x80, false)
{ }
@ -215,6 +215,17 @@ constexpr bool IsAssignChar(const int ch) noexcept {
}
// ----------------------------------------------------------------------------
inline bool IsAIdentifierChar(const int ch) {
return (IsAlphaNumeric(ch) || ch == '_' || ch == '.');
}
// ----------------------------------------------------------------------------
inline bool IsAKeywordChar(const int ch) {
return (IsAIdentifierChar(ch) || ch == '+' || ch == '-');
}
// ----------------------------------------------------------------------------
static int GetBracketLevel(StyleContext& sc, const bool stopAtLnBreak = false)
{
Sci_Position const posCurrent = static_cast<Sci_Position>(sc.currentPos);
@ -257,9 +268,9 @@ static bool IsDateTimeStr(StyleContext& sc, const CharacterSet& validCh, const C
Sci_Position const posCurrent = static_cast<Sci_Position>(sc.currentPos);
Sci_Position const posEnd = static_cast<Sci_Position>(sc.lineStartNext);
Sci_Position i = 0;
bool bDateTimeFlag = false;
Sci_Position i = 0;
while ((++i + posCurrent) < posEnd)
{
int const ch = sc.GetRelative(i);
@ -286,9 +297,9 @@ static bool IsLookAheadLineEmpty(StyleContext& sc)
Sci_Position const posCurrent = static_cast<Sci_Position>(sc.currentPos);
Sci_Position const posEnd = static_cast<Sci_Position>(sc.lineStartNext);
Sci_Position i = 0;
bool bLHLineEmpty = true;
Sci_Position i = 0;
while ((++i + posCurrent) < posEnd)
{
int const ch = sc.GetRelative(i);
@ -305,6 +316,40 @@ static bool IsLookAheadLineEmpty(StyleContext& sc)
}
// ----------------------------------------------------------------------------
static bool IsLookAheadInList(StyleContext& sc, const CharacterSet& validCh, const WordList& keywords)
{
Sci_Position const posCurrent = static_cast<Sci_Position>(sc.currentPos);
Sci_Position const posEnd = static_cast<Sci_Position>(sc.lineStartNext);
static char identifier[1024] = { '\0' };
int j = 0;
Sci_Position i = -1;
while (((++i + posCurrent) < posEnd) && (j < 1023))
{
int const ch = sc.GetRelative(i);
if (IsABlankOrTabX(ch)) {
if (j == 0) { continue; }
}
if (validCh.Contains(ch)) {
identifier[j++] = static_cast<char>(ch);
continue;
}
identifier[j] = '\0';
break;
}
if (identifier[0] != '\0') {
TrimIdentifier(identifier, identifier);
if (keywords.InList(identifier)) {
return true;
}
}
return false;
}
// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
@ -327,15 +372,20 @@ void SCI_METHOD LexerTOML::Lex(Sci_PositionU startPos, Sci_Position length, int
bool inBin = false;
bool inOct = false;
bool bPossibleKeyword = true;
for (; sc.More(); sc.Forward())
{
// --------------------------------------------------
// check if in the middle of a line continuation ...
// --------------------------------------------------
// reset context infos
if (sc.atLineStart) {
inMultiLnArrayDef = (GetBracketLevel(sc) >= 0);
inSQuotedKey = inDQuotedKey = inInnerQKey = false; // clear
inSQuotedKey = inDQuotedKey = inInnerQKey = false;
bPossibleKeyword = true;
switch (sc.state)
{
case SCE_TOML_STR_BASIC:
@ -344,11 +394,14 @@ void SCI_METHOD LexerTOML::Lex(Sci_PositionU startPos, Sci_Position length, int
sc.SetState(SCE_TOML_PARSINGERROR);
}
break;
case SCE_TOML_KEY:
case SCE_TOML_ASSIGNMENT:
sc.SetState(SCE_TOML_PARSINGERROR);
break;
case SCE_TOML_PARSINGERROR:
// preserve error
if (!inMultiLnArrayDef) {
sc.SetState(SCE_TOML_DEFAULT);
}
break;
default:
if (!inMultiLnArrayDef) {
@ -365,7 +418,7 @@ void SCI_METHOD LexerTOML::Lex(Sci_PositionU startPos, Sci_Position length, int
continue; // eat line breaks
}
if (sc.ch != SCE_TOML_PARSINGERROR)
if (sc.state != SCE_TOML_PARSINGERROR)
{
if (IsCommentChar(sc.ch)) {
if (inSectionDef) {
@ -440,13 +493,18 @@ void SCI_METHOD LexerTOML::Lex(Sci_PositionU startPos, Sci_Position length, int
case SCE_TOML_KEY:
if ((sc.ch == '"') && inDQuotedKey) {
if (sc.atLineEnd) {
sc.SetState(SCE_TOML_PARSINGERROR);
break;
}
else if ((sc.ch == '"') && inDQuotedKey) {
if (inInnerQKey) {
sc.SetState(SCE_TOML_PARSINGERROR);
}
else {
sc.ForwardSetState(SCE_TOML_ASSIGNMENT); // end of key
}
break;
}
else if ((sc.ch == '\'') && inSQuotedKey) {
if (inInnerQKey) {
@ -455,21 +513,23 @@ void SCI_METHOD LexerTOML::Lex(Sci_PositionU startPos, Sci_Position length, int
else {
sc.ForwardSetState(SCE_TOML_ASSIGNMENT); // end of key
}
break;
}
else if (IsASpaceOrTab(sc.ch)) {
if (!(inSQuotedKey || inDQuotedKey || inInnerQKey)) {
sc.SetState(SCE_TOML_ASSIGNMENT); // end of key
}
// else eat
break; // else eat
}
else if (IsAssignChar(sc.ch)) {
if (!(inSQuotedKey || inDQuotedKey || inInnerQKey)) {
sc.SetState(SCE_TOML_ASSIGNMENT);
if ((inSQuotedKey || inDQuotedKey || inInnerQKey)) {
break;
}
// else eat
sc.SetState(SCE_TOML_ASSIGNMENT); // end of key
// === fall through === case SCE_TOML_ASSIGNMENT:
}
else if (validKey.Contains(sc.ch)) {
// eat
break; // eat
}
else {
if ((sc.ch == '"') && inSQuotedKey) {
@ -481,20 +541,24 @@ void SCI_METHOD LexerTOML::Lex(Sci_PositionU startPos, Sci_Position length, int
else if (!(inSQuotedKey || inDQuotedKey || inInnerQKey)) {
sc.SetState(SCE_TOML_PARSINGERROR);
}
// else eat
break; // else eat
}
break;
// === fall through ===
case SCE_TOML_ASSIGNMENT:
if (IsAssignChar(sc.ch)) {
if (!IsLookAheadLineEmpty(sc)) {
sc.ForwardSetState(SCE_TOML_VALUE);
if (sc.atLineEnd) {
sc.SetState(SCE_TOML_PARSINGERROR);
break;
}
else if (IsAssignChar(sc.ch)) {
if (IsLookAheadLineEmpty(sc)) {
sc.ForwardSetState(SCE_TOML_PARSINGERROR);
break;
}
else {
sc.SetState(SCE_TOML_PARSINGERROR);
sc.ForwardSetState(SCE_TOML_VALUE);
// === fall through === case SCE_TOML_VALUE:
}
// fall through case SCE_TOML_VALUE:
}
else if (IsASpace(sc.ch)) {
break; // OK
@ -503,9 +567,16 @@ void SCI_METHOD LexerTOML::Lex(Sci_PositionU startPos, Sci_Position length, int
sc.SetState(SCE_TOML_PARSINGERROR);
break;
}
// fall through
// === fall through ===
case SCE_TOML_VALUE:
if (bPossibleKeyword && IsLookAheadInList(sc, validKeyWord, keywords)) {
sc.SetState(SCE_TOML_KEYWORD);
break;
}
else {
bPossibleKeyword = false;
}
if (sc.ch == '[') {
inMultiLnArrayDef = true;
}
@ -559,6 +630,13 @@ void SCI_METHOD LexerTOML::Lex(Sci_PositionU startPos, Sci_Position length, int
break;
case SCE_TOML_KEYWORD:
if (!(IsASpaceX(sc.ch) || validKeyWord.Contains(sc.ch))) {
sc.SetState(SCE_TOML_VALUE);
}
break;
case SCE_TOML_NUMBER:
if (sc.ch == '_') {
// eat // TODO: only once
@ -765,7 +843,7 @@ void SCI_METHOD LexerTOML::Fold(Sci_PositionU startPos, Sci_Position length, int
}
// ----------------------------------------------------------------------------
LexerModule lmTOML(SCLEX_TOML, LexerTOML::LexerFactoryTOML, "toml", tomlWordListsDesc);
LexerModule lmTOML(SCLEX_TOML, LexerTOML::LexerFactoryTOML, "toml", tomlWordLists);
// ----------------------------------------------------------------------------

View File

@ -4,7 +4,7 @@
//KEYWORDLIST KeyWords_TOML = EMPTY_KEYWORDLIST;
KEYWORDLIST KeyWords_TOML = {
"false inf nan table true", // Keyword
"+inf -inf +nan -nan inf nan true false", // Keyword
"", "", "", "", "", "", "", "" };
@ -13,7 +13,7 @@ SCLEX_TOML, IDS_LEX_TOML_CFG, L"TOML Config", L"toml", L"",
&KeyWords_TOML,{
{ {STYLE_DEFAULT}, IDS_LEX_STR_63126, L"Default", L"", L"" },
//{ {SCE_TOML_DEFAULT}, IDS_LEX_STR_63126, L"Default", L"", L"" },
{ {SCE_TOML_KEYWORD}, IDS_LEX_STR_63128, L"Keyword", L"fore:#E00000", L"" },
{ {SCE_TOML_KEYWORD}, IDS_LEX_STR_63128, L"Keyword", L"bold; fore:#FF0080", L"" },
{ {SCE_TOML_COMMENT}, IDS_LEX_STR_63127, L"Comment", L"fore:#008000", L"" },
{ {SCE_TOML_SECTION}, IDS_LEX_STR_63232, L"Section", L"bold; fore:#000000; back:#FFF1A8; eolfilled", L"" },
{ {SCE_TOML_KEY}, IDS_LEX_STR_63348, L"Key", L"bold; fore:#5E608F", L"" },
@ -21,6 +21,6 @@ SCLEX_TOML, IDS_LEX_TOML_CFG, L"TOML Config", L"toml", L"",
{ {SCE_TOML_VALUE}, IDS_LEX_STR_63201, L"Value", L"fore:#202020", L"" },
{ {SCE_TOML_NUMBER}, IDS_LEX_STR_63130, L"Number", L"fore:#0000E0", L"" },
{ {SCE_TOML_DATETIME}, IDS_LEX_STR_63356, L"Date-Time", L"fore:#950095", L"" },
{ {MULTI_STYLE(SCE_TOML_STR_BASIC, SCE_TOML_STR_LITERAL,0,0)}, IDS_LEX_STR_63131, L"String", L"italic; fore:#800000", L"" },
{ {SCE_TOML_PARSINGERROR}, IDS_LEX_STR_63252, L"Parsing Error", L"fore:#FFFF00; back:#A00000", L"" },
{ {MULTI_STYLE(SCE_TOML_STR_BASIC, SCE_TOML_STR_LITERAL,0,0)}, IDS_LEX_STR_63131, L"String", L"italic; fore:#606060", L"" },
{ {SCE_TOML_PARSINGERROR}, IDS_LEX_STR_63252, L"Parsing Error", L"fore:#FFFF00; back:#A00000; eolfilled", L"" },
EDITLEXER_SENTINEL } };

View File

@ -1419,8 +1419,8 @@ void Style_SetLexer(HWND hwnd, PEDITLEXER pLexNew)
switch (s_pLexCurrent->lexerID)
{
case SCLEX_PYTHON:
SendMessage(hwnd, SCI_INDICSETSTYLE, 1, INDIC_COMPOSITIONTHIN);
SendMessage(hwnd, SCI_INDICSETFORE, 1, (LPARAM)RGB(0xAF, 0, 0)); // (light red)
SendMessage(hwnd, SCI_INDICSETSTYLE, 1, INDIC_BOX);
SendMessage(hwnd, SCI_INDICSETFORE, 1, (LPARAM)RGB(0xBF, 0, 0)); // (light red)
//SendMessage(hwnd, SCI_INDICSETALPHA, 1, 40);
//SendMessage(hwnd, SCI_INDICSETOUTLINEALPHA, 1, 100);
break;

View File

@ -7,8 +7,8 @@
#define SAPPNAME "Notepad3"
#define VERSION_MAJOR 5
#define VERSION_MINOR 19
#define VERSION_REV 613
#define VERSION_BUILD 2252
#define VERSION_REV 614
#define VERSION_BUILD 2255
#define SCINTILLA_VER 416
#define ONIGURUMA_REGEX_VER 6.9.2
#define VERSION_PATCH Oniguruma

View File

@ -41,6 +41,9 @@ bare-key = "value"
'key2' = "value"
'quoted "value"' = "value"
# boolean
bool1 = true
bool2 = false
[strings]
str = "I'm a string. \"You can quote me\". Name\tJos\u00E9\nLocation\tSF."
@ -137,10 +140,6 @@ sf4 = nan # actual sNaN/qNaN encoding is implementation specific
sf5 = +nan # same as `nan`
sf6 = -nan # valid, actual encoding is implementation specific
# boolean
bool1 = true
bool2 = false
[date time]
# Offset Date-Time ( RFC 3339 : http://tools.ietf.org/html/rfc3339 )