From 1d28edf6e5aa86ff2bebad15a1342d62103f10bc Mon Sep 17 00:00:00 2001 From: Rainer Kottenhoff Date: Thu, 19 Sep 2019 09:26:44 +0200 Subject: [PATCH] + fix: prevent _ctype assertions in Scintilla's lexer --- sciXlexers/LexAHKL.cxx | 4 ++-- scintilla/lexers/LexAU3.cxx | 6 +++--- scintilla/lexers/LexAVS.cxx | 4 ++-- scintilla/lexers/LexAsm.cxx | 6 +++--- scintilla/lexers/LexCPP.cxx | 2 +- scintilla/lexers/LexCSS.cxx | 4 ++-- scintilla/lexers/LexCoffeeScript.cxx | 2 +- scintilla/lexers/LexHTML.cxx | 6 +++--- scintilla/lexers/LexNim.cxx | 4 ++-- scintilla/lexers/LexPowerShell.cxx | 2 +- scintilla/lexers/LexPython.cxx | 4 ++-- scintilla/lexers/LexR.cxx | 4 ++-- scintilla/lexers/LexRuby.cxx | 2 +- scintilla/lexers/LexSQL.cxx | 10 +++++----- scintilla/lexers/LexTCL.cxx | 4 ++-- scintilla/lexers/LexVB.cxx | 6 +++--- scintilla/lexers/LexVHDL.cxx | 4 ++-- 17 files changed, 37 insertions(+), 37 deletions(-) diff --git a/sciXlexers/LexAHKL.cxx b/sciXlexers/LexAHKL.cxx index 291352803..0fd9ae37c 100644 --- a/sciXlexers/LexAHKL.cxx +++ b/sciXlexers/LexAHKL.cxx @@ -409,7 +409,7 @@ void SCI_METHOD LexerAHKL::Lex(Sci_PositionU startPos, Sci_Position lengthDoc, i sc.SetState(SCE_AHKL_NEUTRAL); } else if ((OnlySpaces || isspace(sc.chPrev)) && - ((sc.ch == '@' && isalnum(sc.chNext)) || valDocComment.Contains(sc.ch))) { + ((sc.ch == '@' && isalnum(sc.chNext & 0xFF)) || valDocComment.Contains(sc.ch))) { if (valDocComment.Contains(sc.ch)) inDocComment = true; @@ -442,7 +442,7 @@ void SCI_METHOD LexerAHKL::Lex(Sci_PositionU startPos, Sci_Position lengthDoc, i if (isspace(sc.ch) || SynOperator.Contains(sc.ch)) sc.SetState(SCE_AHKL_NEUTRAL); - else if (!isxdigit(sc.ch)) + else if (!isxdigit(sc.ch & 0xFF)) sc.ChangeState(SCE_AHKL_IDENTIFIER); break; } diff --git a/scintilla/lexers/LexAU3.cxx b/scintilla/lexers/LexAU3.cxx index 417c832e0..e23f671d2 100644 --- a/scintilla/lexers/LexAU3.cxx +++ b/scintilla/lexers/LexAU3.cxx @@ -76,12 +76,12 @@ static inline bool IsTypeCharacter(const int ch) } static inline bool IsAWordChar(const int ch) { - return (ch < 0x80) && (isalnum(ch) || ch == '_'); + return ((IsASCII(ch) && isalnum(ch)) || ch == '_'); } static inline bool IsAWordStart(const int ch) { - return (ch < 0x80) && (isalnum(ch) || ch == '_' || ch == '@' || ch == '#' || ch == '$' || ch == '.'); + return ((IsASCII(ch) && isalnum(ch)) || ch == '_' || ch == '@' || ch == '#' || ch == '$' || ch == '.'); } static inline bool IsAOperator(char ch) { @@ -140,7 +140,7 @@ static int GetSendKey(const char *szLine, char *szKey) // Save second portion into var... szSpecial[nSpecPos++] = cTemp; // check if Second portion is all numbers for repeat fuction - if (isdigit(cTemp) == false) {nSpecNum = 0;} + if (isdigit(cTemp & 0xFF) == false) {nSpecNum = 0;} } } nPos++; // skip to next char diff --git a/scintilla/lexers/LexAVS.cxx b/scintilla/lexers/LexAVS.cxx index 96e32d5ad..138f6b5db 100644 --- a/scintilla/lexers/LexAVS.cxx +++ b/scintilla/lexers/LexAVS.cxx @@ -27,7 +27,7 @@ using namespace Scintilla; static inline bool IsAWordChar(const int ch) { - return (ch < 0x80) && (isalnum(ch) || ch == '_'); + return IsASCII(ch) && (isalnum(ch) || ch == '_'); } static inline bool IsAWordStart(int ch) { @@ -37,7 +37,7 @@ static inline bool IsAWordStart(int ch) { static inline bool IsANumberChar(int ch) { // Not exactly following number definition (several dots are seen as OK, etc.) // but probably enough in most cases. - return (ch < 0x80) && (isdigit(ch) || ch == '.' || ch == '-' || ch == '+'); + return IsASCII(ch) && (isdigit(ch) || ch == '.' || ch == '-' || ch == '+'); } static void ColouriseAvsDoc( diff --git a/scintilla/lexers/LexAsm.cxx b/scintilla/lexers/LexAsm.cxx index f34c8b50c..b3dc29259 100644 --- a/scintilla/lexers/LexAsm.cxx +++ b/scintilla/lexers/LexAsm.cxx @@ -35,17 +35,17 @@ using namespace Scintilla; static inline bool IsAWordChar(const int ch) { - return (ch < 0x80) && (isalnum(ch) || ch == '.' || + return IsASCII(ch) && (isalnum(ch) || ch == '.' || ch == '_' || ch == '?'); } static inline bool IsAWordStart(const int ch) { - return (ch < 0x80) && (isalnum(ch) || ch == '_' || ch == '.' || + return IsASCII(ch) && (isalnum(ch) || ch == '_' || ch == '.' || ch == '%' || ch == '@' || ch == '$' || ch == '?'); } static inline bool IsAsmOperator(const int ch) { - if ((ch < 0x80) && (isalnum(ch))) + if (IsASCII(ch) && (isalnum(ch))) return false; // '.' left out as it is used to make up numbers if (ch == '*' || ch == '/' || ch == '-' || ch == '+' || diff --git a/scintilla/lexers/LexCPP.cxx b/scintilla/lexers/LexCPP.cxx index 3b8bbfa16..b329fa340 100644 --- a/scintilla/lexers/LexCPP.cxx +++ b/scintilla/lexers/LexCPP.cxx @@ -1151,7 +1151,7 @@ void SCI_METHOD LexerCPP::Lex(Sci_PositionU startPos, Sci_Position length, int i sc.SetState(SCE_C_DEFAULT|activitySet); } else if (! inRERange && sc.ch == '/') { sc.Forward(); - while ((sc.ch < 0x80) && islower(sc.ch)) + while (IsASCII(sc.ch) && islower(sc.ch)) sc.Forward(); // gobble regex flags sc.SetState(SCE_C_DEFAULT|activitySet); } else if (sc.ch == '\\' && ((sc.currentPos+1) < lineEndNext)) { diff --git a/scintilla/lexers/LexCSS.cxx b/scintilla/lexers/LexCSS.cxx index c1a86f537..af1e06d9e 100644 --- a/scintilla/lexers/LexCSS.cxx +++ b/scintilla/lexers/LexCSS.cxx @@ -41,11 +41,11 @@ static inline bool IsAWordChar(const unsigned int ch) { * Unfortunately, we are only getting string bytes here, and not full unicode characters. We cannot guarantee * that our byte is between U+0080 - U+00A0 (to return false), so we have to allow all characters U+0080 and higher */ - return ch >= 0x80 || isalnum(ch) || ch == '-' || ch == '_'; + return ch >= 0x80 || isalnum(ch & 0xFF) || ch == '-' || ch == '_'; } inline bool IsCssOperator(const int ch) { - if (!((ch < 0x80) && isalnum(ch)) && + if (!(IsASCII(ch) && isalnum(ch)) && (ch == '{' || ch == '}' || ch == ':' || ch == ',' || ch == ';' || ch == '.' || ch == '#' || ch == '!' || ch == '@' || /* CSS2 */ diff --git a/scintilla/lexers/LexCoffeeScript.cxx b/scintilla/lexers/LexCoffeeScript.cxx index a00162335..8b64c6673 100644 --- a/scintilla/lexers/LexCoffeeScript.cxx +++ b/scintilla/lexers/LexCoffeeScript.cxx @@ -239,7 +239,7 @@ static void ColouriseCoffeeScriptDoc(Sci_PositionU startPos, Sci_Position length sc.SetState(SCE_COFFEESCRIPT_DEFAULT); } else if (sc.ch == '/') { sc.Forward(); - while ((sc.ch < 0x80) && islower(sc.ch)) + while (IsASCII(sc.ch) && islower(sc.ch)) sc.Forward(); // gobble regex flags sc.SetState(SCE_COFFEESCRIPT_DEFAULT); } else if (sc.ch == '\\') { diff --git a/scintilla/lexers/LexHTML.cxx b/scintilla/lexers/LexHTML.cxx index 96462235f..5866124cd 100644 --- a/scintilla/lexers/LexHTML.cxx +++ b/scintilla/lexers/LexHTML.cxx @@ -39,11 +39,11 @@ enum script_type { eScriptNone = 0, eScriptJS, eScriptVBS, eScriptPython, eScrip enum script_mode { eHtml = 0, eNonHtmlScript, eNonHtmlPreProc, eNonHtmlScriptPreProc }; inline bool IsAWordChar(const int ch) { - return (ch < 0x80) && (isalnum(ch) || ch == '.' || ch == '_'); + return IsASCII(ch) && (isalnum(ch) || ch == '.' || ch == '_'); } inline bool IsAWordStart(const int ch) { - return (ch < 0x80) && (isalnum(ch) || ch == '_'); + return IsASCII(ch) && (isalnum(ch) || ch == '_'); } inline bool IsOperator(int ch) { @@ -1676,7 +1676,7 @@ void SCI_METHOD LexerHTML::Lex(Sci_PositionU startPos, Sci_Position length, int case SCE_H_SGML_SPECIAL: if (!(IsASCII(ch) && isupper(ch))) { styler.ColourTo(i - 1, StateToPrint); - if (isalnum(ch)) { + if (isalnum(ch & 0xFF)) { state = SCE_H_SGML_ERROR; } else { state = SCE_H_SGML_DEFAULT; diff --git a/scintilla/lexers/LexNim.cxx b/scintilla/lexers/LexNim.cxx index da21cdb50..096c9b625 100644 --- a/scintilla/lexers/LexNim.cxx +++ b/scintilla/lexers/LexNim.cxx @@ -60,7 +60,7 @@ constexpr bool IsLetter(const int ch) noexcept { } bool IsAWordChar(const int ch) noexcept { - return ch < 0x80 && (isalnum(ch) || ch == '_' || ch == '.'); + return IsASCII(ch) && (isalnum(ch) || ch == '_' || ch == '.'); } int IsNumHex(const StyleContext &sc) noexcept { @@ -804,4 +804,4 @@ void SCI_METHOD LexerNim::Fold(Sci_PositionU startPos, Sci_Position length, int, } } -LexerModule lmNim(SCLEX_NIM, LexerNim::LexerFactoryNim, "nim", nimWordListDesc); \ No newline at end of file +LexerModule lmNim(SCLEX_NIM, LexerNim::LexerFactoryNim, "nim", nimWordListDesc); diff --git a/scintilla/lexers/LexPowerShell.cxx b/scintilla/lexers/LexPowerShell.cxx index 9969df35d..219efa65f 100644 --- a/scintilla/lexers/LexPowerShell.cxx +++ b/scintilla/lexers/LexPowerShell.cxx @@ -27,7 +27,7 @@ using namespace Scintilla; // Extended to accept accented characters static inline bool IsAWordChar(int ch) { - return ch >= 0x80 || isalnum(ch) || ch == '-' || ch == '_'; + return IsASCII(ch) || isalnum(ch) || ch == '-' || ch == '_'; } static void ColourisePowerShellDoc(Sci_PositionU startPos, Sci_Position length, int initStyle, diff --git a/scintilla/lexers/LexPython.cxx b/scintilla/lexers/LexPython.cxx index f325ca360..ac077db91 100644 --- a/scintilla/lexers/LexPython.cxx +++ b/scintilla/lexers/LexPython.cxx @@ -186,7 +186,7 @@ int GetPyStringState(Accessor &styler, Sci_Position i, Sci_PositionU *nextIndex, } inline bool IsAWordChar(int ch, bool unicodeIdentifiers) { - if (ch < 0x80) + if (IsASCII(ch)) return (isalnum(ch) || ch == '.' || ch == '_'); if (!unicodeIdentifiers) @@ -197,7 +197,7 @@ inline bool IsAWordChar(int ch, bool unicodeIdentifiers) { } inline bool IsAWordStart(int ch, bool unicodeIdentifiers) { - if (ch < 0x80) + if (IsASCII(ch)) return (isalpha(ch) || ch == '_'); if (!unicodeIdentifiers) diff --git a/scintilla/lexers/LexR.cxx b/scintilla/lexers/LexR.cxx index ead0dba18..09f2e448f 100644 --- a/scintilla/lexers/LexR.cxx +++ b/scintilla/lexers/LexR.cxx @@ -27,11 +27,11 @@ using namespace Scintilla; static inline bool IsAWordChar(const int ch) { - return (ch < 0x80) && (isalnum(ch) || ch == '.' || ch == '_'); + return IsASCII(ch) && (isalnum(ch) || ch == '.' || ch == '_'); } static inline bool IsAWordStart(const int ch) { - return (ch < 0x80) && (isalnum(ch) || ch == '_'); + return IsASCII(ch) && (isalnum(ch) || ch == '_'); } static inline bool IsAnOperator(const int ch) { diff --git a/scintilla/lexers/LexRuby.cxx b/scintilla/lexers/LexRuby.cxx index 2affffe65..4f83b81cc 100644 --- a/scintilla/lexers/LexRuby.cxx +++ b/scintilla/lexers/LexRuby.cxx @@ -54,7 +54,7 @@ static inline bool isSafeWordcharOrHigh(char ch) { // Error: scintilla's KeyWords.h includes '.' as a word-char // we want to separate things that can take methods from the // methods. - return isHighBitChar(ch) || isalnum(ch) || ch == '_'; + return isHighBitChar(ch) || isalnum(ch & 0xFF) || ch == '_'; } static bool inline iswhitespace(char ch) { diff --git a/scintilla/lexers/LexSQL.cxx b/scintilla/lexers/LexSQL.cxx index a8dd3c393..dbf80d1de 100644 --- a/scintilla/lexers/LexSQL.cxx +++ b/scintilla/lexers/LexSQL.cxx @@ -37,17 +37,17 @@ using namespace Scintilla; static inline bool IsAWordChar(int ch, bool sqlAllowDottedWord) { if (!sqlAllowDottedWord) - return (ch < 0x80) && (isalnum(ch) || ch == '_'); + return IsASCII(ch) && (isalnum(ch) || ch == '_'); else - return (ch < 0x80) && (isalnum(ch) || ch == '_' || ch == '.'); + return IsASCII(ch) && (isalnum(ch) || ch == '_' || ch == '.'); } static inline bool IsAWordStart(int ch) { - return (ch < 0x80) && (isalpha(ch) || ch == '_'); + return IsASCII(ch) && (isalpha(ch) || ch == '_'); } static inline bool IsADoxygenChar(int ch) { - return (islower(ch) || ch == '$' || ch == '@' || + return ((IsASCII(ch) && islower(ch)) || ch == '$' || ch == '@' || ch == '\\' || ch == '&' || ch == '<' || ch == '>' || ch == '#' || ch == '{' || ch == '}' || ch == '[' || ch == ']'); @@ -56,7 +56,7 @@ static inline bool IsADoxygenChar(int ch) { static inline bool IsANumberChar(int ch, int chPrev) { // Not exactly following number definition (several dots are seen as OK, etc.) // but probably enough in most cases. - return (ch < 0x80) && + return IsASCII(ch) && (isdigit(ch) || toupper(ch) == 'E' || ch == '.' || ((ch == '-' || ch == '+') && chPrev < 0x80 && toupper(chPrev) == 'E')); } diff --git a/scintilla/lexers/LexTCL.cxx b/scintilla/lexers/LexTCL.cxx index 4fdeae5f3..32a963567 100644 --- a/scintilla/lexers/LexTCL.cxx +++ b/scintilla/lexers/LexTCL.cxx @@ -28,11 +28,11 @@ using namespace Scintilla; // Extended to accept accented characters static inline bool IsAWordChar(int ch) { return ch >= 0x80 || - (isalnum(ch) || ch == '_' || ch ==':' || ch=='.'); // : name space separator + (isalnum(ch & 0xFF) || ch == '_' || ch ==':' || ch=='.'); // : name space separator } static inline bool IsAWordStart(int ch) { - return ch >= 0x80 || (ch ==':' || isalpha(ch) || ch == '_'); + return ch >= 0x80 || (ch ==':' || isalpha(ch & 0xFF) || ch == '_'); } static inline bool IsANumberChar(int ch) { diff --git a/scintilla/lexers/LexVB.cxx b/scintilla/lexers/LexVB.cxx index ceac8d2fe..25491bf9f 100644 --- a/scintilla/lexers/LexVB.cxx +++ b/scintilla/lexers/LexVB.cxx @@ -40,18 +40,18 @@ static inline bool IsTypeCharacter(int ch) { // Extended to accept accented characters static inline bool IsAWordChar(int ch) { return ch >= 0x80 || - (isalnum(ch) || ch == '.' || ch == '_'); + (isalnum(ch & 0xFF) || ch == '.' || ch == '_'); } static inline bool IsAWordStart(int ch) { return ch >= 0x80 || - (isalpha(ch) || ch == '_'); + (isalpha(ch & 0xFF) || ch == '_'); } static inline bool IsANumberChar(int ch) { // Not exactly following number definition (several dots are seen as OK, etc.) // but probably enough in most cases. - return (ch < 0x80) && + return IsASCII(ch) && (isdigit(ch) || toupper(ch) == 'E' || ch == '.' || ch == '-' || ch == '+' || ch == '_'); } diff --git a/scintilla/lexers/LexVHDL.cxx b/scintilla/lexers/LexVHDL.cxx index 92b18269b..725c70c24 100644 --- a/scintilla/lexers/LexVHDL.cxx +++ b/scintilla/lexers/LexVHDL.cxx @@ -40,12 +40,12 @@ static void ColouriseVHDLDoc( /***************************************/ static inline bool IsAWordChar(const int ch) { - return (ch < 0x80) && (isalnum(ch) || ch == '.' || ch == '_' ); + return IsASCII(ch) && (isalnum(ch) || ch == '.' || ch == '_' ); } /***************************************/ static inline bool IsAWordStart(const int ch) { - return (ch < 0x80) && (isalnum(ch) || ch == '_'); + return IsASCII(ch) && (isalnum(ch) || ch == '_'); } /***************************************/