use changed scintilla/lexers/LexPython.cxx from current Notepad2-mod

2026-06-11 21:03:05 +08:00 · 2017-01-03 10:07:11 +01:00 · 2017-01-03 10:07:11 +01:00 · 334a68bbcb
commit 334a68bbcb
parent 51693dc7c7
1 changed files with 45 additions and 11 deletions
--- a/scintilla/lexers/LexPython.cxx
+++ b/scintilla/lexers/LexPython.cxx
@ -25,6 +25,7 @@
 #include "Accessor.h"
 #include "StyleContext.h"
 #include "CharacterSet.h"
+#include "CharacterCategory.h"
 #include "LexerModule.h"
 #include "OptionSet.h"
 #include "SubStyles.h"
@ -39,7 +40,7 @@ namespace {
 /* kwCDef, kwCTypeName only used for Cython */
 enum kwType { kwOther, kwClass, kwDef, kwImport, kwCDef, kwCTypeName, kwCPDef };

-enum literalsAllowed { litNone = 0, litU = 1, litB = 2 };
+enum literalsAllowed { litNone = 0, litU = 1, litB = 2, litF = 4 };

 const int indicatorWhitespace = 1;

@ -50,7 +51,8 @@ bool IsPyComment(Accessor &styler, Sci_Position pos, Sci_Position len) {
 bool IsPyStringTypeChar(int ch, literalsAllowed allowed) {
 	return
 		((allowed & litB) && (ch == 'b' || ch == 'B')) ||
-		((allowed & litU) && (ch == 'u' || ch == 'U'));
+		((allowed & litU) && (ch == 'u' || ch == 'U')) ||
+		((allowed & litF) && (ch == 'f' || ch == 'F'));
 }

 bool IsPyStringStart(int ch, int chNext, int chNext2, literalsAllowed allowed) {
@ -73,7 +75,7 @@ int GetPyStringState(Accessor &styler, Sci_Position i, Sci_PositionU *nextIndex,
 	char ch = styler.SafeGetCharAt(i);
 	char chNext = styler.SafeGetCharAt(i + 1);

-	// Advance beyond r, u, or ur prefix (or r, b, or br in Python 3.0), but bail if there are any unexpected chars
+	// Advance beyond r, u, or ur prefix (or r, b, or br in Python 2.7+ and r, f, or fr in Python 3.6+), but bail if there are any unexpected chars
 	if (ch == 'r' || ch == 'R') {
 		i++;
 		ch = styler.SafeGetCharAt(i);
@ -109,12 +111,32 @@ int GetPyStringState(Accessor &styler, Sci_Position i, Sci_PositionU *nextIndex,
 	}
 }

-inline bool IsAWordChar(int ch) {
-	return (ch < 0x80) && (isalnum(ch) || ch == '.' || ch == '_');
+inline bool IsAWordChar(int ch, bool unicodeIdentifiers) {
+	if (ch < 0x80)
+		return (isalnum(ch) || ch == '.' || ch == '_');
+
+	if (!unicodeIdentifiers)
+		return false;
+
+	// Approximation, Python uses the XID_Continue set from unicode data
+	// see http://unicode.org/reports/tr31/
+	CharacterCategory c = CategoriseCharacter(ch);
+	return (c == ccLl || c == ccLu || c == ccLt || c == ccLm || c == ccLo
+		|| c == ccNl || c == ccMn || c == ccMc || c == ccNd || c == ccPc);
 }

-inline bool IsAWordStart(int ch) {
-	return (ch < 0x80) && (isalnum(ch) || ch == '_');
+inline bool IsAWordStart(int ch, bool unicodeIdentifiers) {
+	if (ch < 0x80) 
+		return (isalpha(ch) || ch == '_');
+
+	if (!unicodeIdentifiers)
+		return false;
+	
+	// Approximation, Python uses the XID_Start set from unicode data
+	// see http://unicode.org/reports/tr31/
+	CharacterCategory c = CategoriseCharacter(ch);
+	return (c == ccLl || c == ccLu || c == ccLt || c == ccLm || c == ccLo
+		|| c == ccNl);
 }

 static bool IsFirstNonWhitespace(Sci_Position pos, Accessor &styler) {
@ -134,28 +156,34 @@ struct OptionsPython {
 	bool base2or8Literals;
 	bool stringsU;
 	bool stringsB;
+	bool stringsF;
 	bool stringsOverNewline;
 	bool keywords2NoSubIdentifiers;
 	bool fold;
 	bool foldQuotes;
 	bool foldCompact;
+	bool unicodeIdentifiers;

 	OptionsPython() {
 		whingeLevel = 0;
 		base2or8Literals = true;
 		stringsU = true;
 		stringsB = true;
+		stringsF = true;
 		stringsOverNewline = false;
 		keywords2NoSubIdentifiers = false;
 		fold = false;
 		foldQuotes = false;
 		foldCompact = false;
+		unicodeIdentifiers = true;
 	}

 	literalsAllowed AllowedLiterals() const {
 		literalsAllowed allowedLiterals = stringsU ? litU : litNone;
 		if (stringsB)
 			allowedLiterals = static_cast<literalsAllowed>(allowedLiterals | litB);
+		if (stringsF)
+			allowedLiterals = static_cast<literalsAllowed>(allowedLiterals | litF);
 		return allowedLiterals;
 	}
 };
@ -186,6 +214,9 @@ struct OptionSetPython : public OptionSet<OptionsPython> {
 		DefineProperty("lexer.python.strings.b", &OptionsPython::stringsB,
 			"Set to 0 to not recognise Python 3 bytes literals b\"x\".");

+		DefineProperty("lexer.python.strings.f", &OptionsPython::stringsF,
+			"Set to 0 to not recognise Python 3.6 f-string literals f\"var={var}\".");
+
 		DefineProperty("lexer.python.strings.over.newline", &OptionsPython::stringsOverNewline,
 			"Set to 1 to allow strings to span newline characters.");

@ -200,6 +231,9 @@ struct OptionSetPython : public OptionSet<OptionsPython> {

 		DefineProperty("fold.compact", &OptionsPython::foldCompact);

+		DefineProperty("lexer.python.unicode.identifiers", &OptionsPython::unicodeIdentifiers,
+			"Set to 0 to not recognise Python 3 unicode identifiers.");
+
 		DefineWordListSets(pythonWordListDesc);
 	}
 };
@ -410,12 +444,12 @@ void SCI_METHOD LexerPython::Lex(Sci_PositionU startPos, Sci_Position length, in
 			kwLast = kwOther;
 			sc.SetState(SCE_P_DEFAULT);
 		} else if (sc.state == SCE_P_NUMBER) {
-			if (!IsAWordChar(sc.ch) &&
+			if (!IsAWordChar(sc.ch, false) &&
 			        !(!base_n_number && ((sc.ch == '+' || sc.ch == '-') && (sc.chPrev == 'e' || sc.chPrev == 'E')))) {
 				sc.SetState(SCE_P_DEFAULT);
 			}
 		} else if (sc.state == SCE_P_IDENTIFIER) {
-			if ((sc.ch == '.') || (!IsAWordChar(sc.ch))) {
+			if ((sc.ch == '.') || (!IsAWordChar(sc.ch, options.unicodeIdentifiers))) {
 				char s[100];
 				sc.GetCurrent(s, sizeof(s));
 				int style = SCE_P_IDENTIFIER;
@ -487,7 +521,7 @@ void SCI_METHOD LexerPython::Lex(Sci_PositionU startPos, Sci_Position length, in
 				sc.SetState(SCE_P_DEFAULT);
 			}
 		} else if (sc.state == SCE_P_DECORATOR) {
-			if (!IsAWordChar(sc.ch)) {
+			if (!IsAWordStart(sc.ch, options.unicodeIdentifiers)) {
 				sc.SetState(SCE_P_DEFAULT);
 			}
 		} else if ((sc.state == SCE_P_STRING) || (sc.state == SCE_P_CHARACTER)) {
@ -581,7 +615,7 @@ void SCI_METHOD LexerPython::Lex(Sci_PositionU startPos, Sci_Position length, in
 				while (nextIndex > (sc.currentPos + 1) && sc.More()) {
 					sc.Forward();
 				}
-			} else if (IsAWordStart(sc.ch)) {
+			} else if (IsAWordStart(sc.ch, options.unicodeIdentifiers)) {
 				sc.SetState(SCE_P_IDENTIFIER);
 			}
 		}