diff --git a/Versions/build.txt b/Versions/build.txt
index 8c88d58d5..44085c505 100644
--- a/Versions/build.txt
+++ b/Versions/build.txt
@@ -1 +1 @@
-2399
+2401
diff --git a/res/Notepad3.exe.manifest.conf b/res/Notepad3.exe.manifest.conf
index cb40e1bfb..0877fa6d9 100644
--- a/res/Notepad3.exe.manifest.conf
+++ b/res/Notepad3.exe.manifest.conf
@@ -3,7 +3,7 @@
Notepad3 BETA
diff --git a/src/Edit.c b/src/Edit.c
index f62fb70cd..981a4211d 100644
--- a/src/Edit.c
+++ b/src/Edit.c
@@ -1119,9 +1119,10 @@ bool EditLoadFile(
bool bBOM = false;
bool bReverse = false;
- bool const bIsUnicodeValid = IsValidUnicode(lpData, cbData, &bBOM, &bReverse);
bool const bIsUnicodeAnalyzed = ((Encoding_IsUNICODE(iAnalyzedEncoding) && bIsReliable) && !bIsForced && !bSkipUTFDetection && !bIsUTF8Sig);
+ cpi_enc_t const encUnicode = bSkipUTFDetection ? CPI_NONE : GetUnicodeEncoding(lpData, cbData, &bBOM, &bReverse);
+
if (cbData == 0) {
FileVars_Init(NULL, 0, &Globals.fvCurFile);
status->iEOLMode = Settings.DefaultEOLMode;
@@ -1130,20 +1131,22 @@ bool EditLoadFile(
SciCall_SetEOLMode(Settings.DefaultEOLMode);
FreeMem(lpData);
}
- else if (bIsUnicodeForced || (!bIsForced && bIsUnicodeAnalyzed && bIsUnicodeValid))
+ else if (bIsUnicodeForced || (!bIsForced && (bIsUnicodeAnalyzed || !Encoding_IsNONE(encUnicode))))
{
// === UNICODE ===
+ if (Encoding_IsNONE(encUnicode))
+ {
+ bool const bBOM_LE = Has_UTF16_LE_BOM(lpData, cbData);
+ bool const bBOM_BE = Has_UTF16_BE_BOM(lpData, cbData);
- bool const bBOM_LE = Has_UTF16_LE_BOM(lpData, cbData);
- bool const bBOM_BE = Has_UTF16_BE_BOM(lpData, cbData);
-
- if ((iForcedEncoding == CPI_UNICODE) || bBOM_LE) {
- bBOM = bBOM_LE;
- bReverse = false;
- }
- else if ((iForcedEncoding == CPI_UNICODEBE) || bBOM_BE) {
- bBOM = bBOM_BE;
- bReverse = true;
+ if ((iForcedEncoding == CPI_UNICODE) || bBOM_LE) {
+ bBOM = bBOM_LE;
+ bReverse = false;
+ }
+ else if ((iForcedEncoding == CPI_UNICODEBE) || bBOM_BE) {
+ bBOM = bBOM_BE;
+ bReverse = true;
+ }
}
if (bReverse)
diff --git a/src/Encoding.c b/src/Encoding.c
index bd22d83d1..6857ae035 100644
--- a/src/Encoding.c
+++ b/src/Encoding.c
@@ -664,51 +664,6 @@ bool Has_UTF16_BE_BOM(const char* pBuf, size_t cnt)
// ============================================================================
-bool IsValidUnicode(const char* pBuffer, const size_t len, bool* lpbBOM, bool* lpbReverse)
-{
- size_t const enoughData = 2048LL;
- size_t const cb = (len < enoughData) ? len : enoughData;
-
- if (!pBuffer || cb < 2) { return false; }
-
- // IS_TEXT_UNICODE_UNICODE_MASK -> IS_TEXT_UNICODE_ASCII16, IS_TEXT_UNICODE_STATISTICS, IS_TEXT_UNICODE_CONTROLS, IS_TEXT_UNICODE_SIGNATURE.
- // IS_TEXT_UNICODE_REVERSE_MASK -> IS_TEXT_UNICODE_REVERSE_ASCII16, IS_TEXT_UNICODE_REVERSE_STATISTICS, IS_TEXT_UNICODE_REVERSE_CONTROLS, IS_TEXT_UNICODE_REVERSE_SIGNATURE.
- // IS_TEXT_UNICODE_NOT_UNICODE_MASK -> IS_TEXT_UNICODE_ILLEGAL_CHARS, IS_TEXT_UNICODE_ODD_LENGTH, and two currently unused bit flags.
- // IS_TEXT_UNICODE_NOT_ASCII_MASK -> IS_TEXT_UNICODE_NULL_BYTES and three currently unused bit flags.
- //
- int const iAllTests = IS_TEXT_UNICODE_UNICODE_MASK | IS_TEXT_UNICODE_REVERSE_MASK | IS_TEXT_UNICODE_NOT_UNICODE_MASK | IS_TEXT_UNICODE_NOT_ASCII_MASK;
-
- int iTest = iAllTests;
- /*bool const ok =*/ (void)IsTextUnicode(pBuffer, (int)cb, &iTest); // don't rely on result ok
-
- if (iTest == iAllTests) {
- iTest = 0; // iTest doesn't seem to have been modified ...
- }
-
- bool const bHasBOM = (iTest & IS_TEXT_UNICODE_SIGNATURE);
- bool const bHasRBOM = (iTest & IS_TEXT_UNICODE_REVERSE_SIGNATURE);
-
- bool const bIsUnicode = (iTest & IS_TEXT_UNICODE_UNICODE_MASK);
- bool const bIsReverse = (iTest & IS_TEXT_UNICODE_REVERSE_MASK);
- bool const bIsIllegal = (iTest & IS_TEXT_UNICODE_NOT_UNICODE_MASK);
-
- //bool const bHasNullBytes = (iTest & IS_TEXT_UNICODE_NULL_BYTES);
-
- if (bHasBOM || bHasRBOM || ((bIsUnicode || bIsReverse) && !bIsIllegal && !(bIsUnicode && bIsReverse)))
- {
- if (lpbBOM) {
- *lpbBOM = (bHasBOM || bHasRBOM);
- }
- if (lpbReverse) {
- *lpbReverse = (bHasRBOM || bIsReverse);
- }
- return true;
- }
- return false;
-}
-// ============================================================================
-
-
bool IsValidUTF7(const char* pTest, size_t nLength)
{
if (!pTest) { return false; }
diff --git a/src/Encoding.h b/src/Encoding.h
index 6104939d1..608dc41ab 100644
--- a/src/Encoding.h
+++ b/src/Encoding.h
@@ -121,7 +121,7 @@ inline bool IsUTF8Signature(const char* p) {
bool IsValidUTF7(const char* pTest, size_t nLength);
bool IsValidUTF8(const char* pTest, size_t nLength);
-bool IsValidUnicode(const char* pBuffer, const size_t len, bool* lpbBOM, bool* lpbReverse);
+
//////////////////////////////////////////////////////
// Google's CED "Compact Encoding Detection"
@@ -145,6 +145,7 @@ inline bool IsDBCSCodePage(UINT cp) {
}
cpi_enc_t Encoding_AnalyzeText(const char* const text, const size_t len, float* confidence_io, const cpi_enc_t encodingHint);
+cpi_enc_t GetUnicodeEncoding(const char* pBuffer, const size_t len, bool* lpbBOM, bool* lpbReverse);
const char* Encoding_GetTitleInfoA();
const WCHAR* Encoding_GetTitleInfoW();
diff --git a/src/EncodingDetection.cpp b/src/EncodingDetection.cpp
index 170221a6b..deb2fae0f 100644
--- a/src/EncodingDetection.cpp
+++ b/src/EncodingDetection.cpp
@@ -552,7 +552,12 @@ constexpr cpi_enc_t _MapStdEncodingString2CPI(const char* encStrg, float* pConfi
{
bool bBOM = false;
bool bReverse = false;
- if (IsValidUnicode(text, len, &bBOM, &bReverse)) {
+ cpi_enc_t const cpi = GetUnicodeEncoding(text, len, &bBOM, &bReverse);
+ if (!Encoding_IsNONE(cpiEncoding))
+ {
+ cpiEncoding = cpi;
+ }
+ else {
cpiEncoding = bBOM ? (bReverse ? CPI_UNICODEBE : CPI_UNICODE) : (bReverse ? CPI_UNICODEBE : CPI_UNICODE);
}
}
@@ -830,6 +835,57 @@ extern "C" cpi_enc_t Encoding_AnalyzeText
// ============================================================================
+cpi_enc_t GetUnicodeEncoding(const char* pBuffer, const size_t len, bool* lpbBOM, bool* lpbReverse)
+{
+ size_t const enoughData = 2048LL;
+ size_t const cb = (len < enoughData) ? len : enoughData;
+
+ if (!pBuffer || cb < 2) { return false; }
+
+ // IS_TEXT_UNICODE_UNICODE_MASK -> IS_TEXT_UNICODE_ASCII16, IS_TEXT_UNICODE_STATISTICS, IS_TEXT_UNICODE_CONTROLS, IS_TEXT_UNICODE_SIGNATURE.
+ // IS_TEXT_UNICODE_REVERSE_MASK -> IS_TEXT_UNICODE_REVERSE_ASCII16, IS_TEXT_UNICODE_REVERSE_STATISTICS, IS_TEXT_UNICODE_REVERSE_CONTROLS, IS_TEXT_UNICODE_REVERSE_SIGNATURE.
+ // IS_TEXT_UNICODE_NOT_UNICODE_MASK -> IS_TEXT_UNICODE_ILLEGAL_CHARS, IS_TEXT_UNICODE_ODD_LENGTH, and two currently unused bit flags.
+ // IS_TEXT_UNICODE_NOT_ASCII_MASK -> IS_TEXT_UNICODE_NULL_BYTES and three currently unused bit flags.
+ //
+ int const iAllTests = IS_TEXT_UNICODE_UNICODE_MASK | IS_TEXT_UNICODE_REVERSE_MASK | IS_TEXT_UNICODE_NOT_UNICODE_MASK | IS_TEXT_UNICODE_NOT_ASCII_MASK;
+
+ int iTest = iAllTests;
+ /*bool const ok =*/ (void)IsTextUnicode(pBuffer, (int)cb, &iTest); // don't rely on result ok
+
+ if (iTest == iAllTests) {
+ iTest = 0; // iTest doesn't seem to have been modified ...
+ }
+
+ bool const bHasBOM = (iTest & IS_TEXT_UNICODE_SIGNATURE);
+ bool const bHasRBOM = (iTest & IS_TEXT_UNICODE_REVERSE_SIGNATURE);
+
+ bool const bIsUnicode = (iTest & IS_TEXT_UNICODE_UNICODE_MASK);
+ bool const bIsReverse = (iTest & IS_TEXT_UNICODE_REVERSE_MASK);
+ bool const bIsIllegal = (iTest & IS_TEXT_UNICODE_NOT_UNICODE_MASK);
+
+ //bool const bHasNullBytes = (iTest & IS_TEXT_UNICODE_NULL_BYTES);
+
+ cpi_enc_t iEncoding = CPI_NONE;
+
+ if (bHasBOM || bHasRBOM || ((bIsUnicode || bIsReverse) && !bIsIllegal && !(bIsUnicode && bIsReverse)))
+ {
+ if (lpbBOM) {
+ *lpbBOM = (bHasBOM || bHasRBOM);
+ }
+ if (lpbReverse) {
+ *lpbReverse = (bHasRBOM || bIsReverse);
+ }
+ if (bHasBOM || bHasRBOM) {
+ iEncoding = bHasBOM ? CPI_UNICODEBOM : CPI_UNICODEBEBOM;
+ }
+ else if (bIsUnicode || bIsReverse) {
+ iEncoding = bIsUnicode ? CPI_UNICODE : CPI_UNICODEBE;
+ }
+ }
+ return iEncoding;
+}
+// ============================================================================
+
//=============================================================================
//
diff --git a/src/VersionEx.h b/src/VersionEx.h
index 53bab1d55..5032bb75d 100644
--- a/src/VersionEx.h
+++ b/src/VersionEx.h
@@ -7,8 +7,8 @@
#define SAPPNAME "Notepad3"
#define VERSION_MAJOR 5
#define VERSION_MINOR 19
-#define VERSION_REV 719
-#define VERSION_BUILD 2399
+#define VERSION_REV 721
+#define VERSION_BUILD 2401
#define SCINTILLA_VER 420
#define ONIGURUMA_REGEX_VER 6.9.3
#define VERSION_PATCH BETA