use changed scintilla/lexers/LexPython.cxx from current Notepad2-mod

This commit is contained in:
Rainer Kottenhoff 2017-01-03 10:07:11 +01:00
parent 51693dc7c7
commit 334a68bbcb

View File

@ -25,6 +25,7 @@
#include "Accessor.h"
#include "StyleContext.h"
#include "CharacterSet.h"
#include "CharacterCategory.h"
#include "LexerModule.h"
#include "OptionSet.h"
#include "SubStyles.h"
@ -39,7 +40,7 @@ namespace {
/* kwCDef, kwCTypeName only used for Cython */
enum kwType { kwOther, kwClass, kwDef, kwImport, kwCDef, kwCTypeName, kwCPDef };
enum literalsAllowed { litNone = 0, litU = 1, litB = 2 };
enum literalsAllowed { litNone = 0, litU = 1, litB = 2, litF = 4 };
const int indicatorWhitespace = 1;
@ -50,7 +51,8 @@ bool IsPyComment(Accessor &styler, Sci_Position pos, Sci_Position len) {
bool IsPyStringTypeChar(int ch, literalsAllowed allowed) {
return
((allowed & litB) && (ch == 'b' || ch == 'B')) ||
((allowed & litU) && (ch == 'u' || ch == 'U'));
((allowed & litU) && (ch == 'u' || ch == 'U')) ||
((allowed & litF) && (ch == 'f' || ch == 'F'));
}
bool IsPyStringStart(int ch, int chNext, int chNext2, literalsAllowed allowed) {
@ -73,7 +75,7 @@ int GetPyStringState(Accessor &styler, Sci_Position i, Sci_PositionU *nextIndex,
char ch = styler.SafeGetCharAt(i);
char chNext = styler.SafeGetCharAt(i + 1);
// Advance beyond r, u, or ur prefix (or r, b, or br in Python 3.0), but bail if there are any unexpected chars
// Advance beyond r, u, or ur prefix (or r, b, or br in Python 2.7+ and r, f, or fr in Python 3.6+), but bail if there are any unexpected chars
if (ch == 'r' || ch == 'R') {
i++;
ch = styler.SafeGetCharAt(i);
@ -109,12 +111,32 @@ int GetPyStringState(Accessor &styler, Sci_Position i, Sci_PositionU *nextIndex,
}
}
inline bool IsAWordChar(int ch) {
return (ch < 0x80) && (isalnum(ch) || ch == '.' || ch == '_');
inline bool IsAWordChar(int ch, bool unicodeIdentifiers) {
if (ch < 0x80)
return (isalnum(ch) || ch == '.' || ch == '_');
if (!unicodeIdentifiers)
return false;
// Approximation, Python uses the XID_Continue set from unicode data
// see http://unicode.org/reports/tr31/
CharacterCategory c = CategoriseCharacter(ch);
return (c == ccLl || c == ccLu || c == ccLt || c == ccLm || c == ccLo
|| c == ccNl || c == ccMn || c == ccMc || c == ccNd || c == ccPc);
}
inline bool IsAWordStart(int ch) {
return (ch < 0x80) && (isalnum(ch) || ch == '_');
inline bool IsAWordStart(int ch, bool unicodeIdentifiers) {
if (ch < 0x80)
return (isalpha(ch) || ch == '_');
if (!unicodeIdentifiers)
return false;
// Approximation, Python uses the XID_Start set from unicode data
// see http://unicode.org/reports/tr31/
CharacterCategory c = CategoriseCharacter(ch);
return (c == ccLl || c == ccLu || c == ccLt || c == ccLm || c == ccLo
|| c == ccNl);
}
static bool IsFirstNonWhitespace(Sci_Position pos, Accessor &styler) {
@ -134,28 +156,34 @@ struct OptionsPython {
bool base2or8Literals;
bool stringsU;
bool stringsB;
bool stringsF;
bool stringsOverNewline;
bool keywords2NoSubIdentifiers;
bool fold;
bool foldQuotes;
bool foldCompact;
bool unicodeIdentifiers;
OptionsPython() {
whingeLevel = 0;
base2or8Literals = true;
stringsU = true;
stringsB = true;
stringsF = true;
stringsOverNewline = false;
keywords2NoSubIdentifiers = false;
fold = false;
foldQuotes = false;
foldCompact = false;
unicodeIdentifiers = true;
}
literalsAllowed AllowedLiterals() const {
literalsAllowed allowedLiterals = stringsU ? litU : litNone;
if (stringsB)
allowedLiterals = static_cast<literalsAllowed>(allowedLiterals | litB);
if (stringsF)
allowedLiterals = static_cast<literalsAllowed>(allowedLiterals | litF);
return allowedLiterals;
}
};
@ -186,6 +214,9 @@ struct OptionSetPython : public OptionSet<OptionsPython> {
DefineProperty("lexer.python.strings.b", &OptionsPython::stringsB,
"Set to 0 to not recognise Python 3 bytes literals b\"x\".");
DefineProperty("lexer.python.strings.f", &OptionsPython::stringsF,
"Set to 0 to not recognise Python 3.6 f-string literals f\"var={var}\".");
DefineProperty("lexer.python.strings.over.newline", &OptionsPython::stringsOverNewline,
"Set to 1 to allow strings to span newline characters.");
@ -200,6 +231,9 @@ struct OptionSetPython : public OptionSet<OptionsPython> {
DefineProperty("fold.compact", &OptionsPython::foldCompact);
DefineProperty("lexer.python.unicode.identifiers", &OptionsPython::unicodeIdentifiers,
"Set to 0 to not recognise Python 3 unicode identifiers.");
DefineWordListSets(pythonWordListDesc);
}
};
@ -410,12 +444,12 @@ void SCI_METHOD LexerPython::Lex(Sci_PositionU startPos, Sci_Position length, in
kwLast = kwOther;
sc.SetState(SCE_P_DEFAULT);
} else if (sc.state == SCE_P_NUMBER) {
if (!IsAWordChar(sc.ch) &&
if (!IsAWordChar(sc.ch, false) &&
!(!base_n_number && ((sc.ch == '+' || sc.ch == '-') && (sc.chPrev == 'e' || sc.chPrev == 'E')))) {
sc.SetState(SCE_P_DEFAULT);
}
} else if (sc.state == SCE_P_IDENTIFIER) {
if ((sc.ch == '.') || (!IsAWordChar(sc.ch))) {
if ((sc.ch == '.') || (!IsAWordChar(sc.ch, options.unicodeIdentifiers))) {
char s[100];
sc.GetCurrent(s, sizeof(s));
int style = SCE_P_IDENTIFIER;
@ -487,7 +521,7 @@ void SCI_METHOD LexerPython::Lex(Sci_PositionU startPos, Sci_Position length, in
sc.SetState(SCE_P_DEFAULT);
}
} else if (sc.state == SCE_P_DECORATOR) {
if (!IsAWordChar(sc.ch)) {
if (!IsAWordStart(sc.ch, options.unicodeIdentifiers)) {
sc.SetState(SCE_P_DEFAULT);
}
} else if ((sc.state == SCE_P_STRING) || (sc.state == SCE_P_CHARACTER)) {
@ -581,7 +615,7 @@ void SCI_METHOD LexerPython::Lex(Sci_PositionU startPos, Sci_Position length, in
while (nextIndex > (sc.currentPos + 1) && sc.More()) {
sc.Forward();
}
} else if (IsAWordStart(sc.ch)) {
} else if (IsAWordStart(sc.ch, options.unicodeIdentifiers)) {
sc.SetState(SCE_P_IDENTIFIER);
}
}