+ chg: Encoding Detection fine tuning

This commit is contained in:
RaiKoHoff 2020-01-14 18:26:52 +01:00
parent b23f23c40d
commit 2ad343ee91
9 changed files with 83 additions and 28 deletions

View File

@ -1,4 +1,4 @@
[Notepad3]
[Notepad3]
;Notepad3.ini=%USERPROFILE%\Notepad3.ini
;Notepad3.ini=%APPDATA%\Rizonesoft\Notepad3\Notepad3.ini
[Settings]
@ -49,7 +49,7 @@ SettingsVersion=4
;UndoTransactionTimeout=0
;AdministrationTool.exe=
;DevDebugMode=0
;AnalyzeReliableConfidenceLevel=50
;AnalyzeReliableConfidenceLevel=66
;LexerSQLNumberSignAsComment=1
;ExitOnESCSkipLevel=2
[Statusbar Settings]

View File

@ -1 +1 @@
2706
2707

View File

@ -3,7 +3,7 @@
<assemblyIdentity
name="Notepad3"
processorArchitecture="*"
version="5.20.114.2706"
version="5.20.114.2707"
type="win32"
/>
<description>Notepad3 BETA</description>

View File

@ -780,7 +780,7 @@ void LoadSettings()
Settings2.NoCutLineOnEmptySelection = IniSectionGetBool(Settings2_Section, L"NoCutLineOnEmptySelection", Defaults2.NoCutLineOnEmptySelection);
int const iARCLdef = 50;
int const iARCLdef = 66;
Defaults2.AnalyzeReliableConfidenceLevel = (float)iARCLdef / 100.0f;
int const iARCLset = clampi(IniSectionGetInt(Settings2_Section, L"AnalyzeReliableConfidenceLevel", iARCLdef), 0, 100);
Settings2.AnalyzeReliableConfidenceLevel = (float)iARCLset / 100.0f;

View File

@ -1147,20 +1147,14 @@ bool EditLoadFile(
}
else // === ALL OTHERS ===
{
// ----------------------------------------------------------------------
status->iEncoding = encDetection.Encoding;
// ----------------------------------------------------------------------
UINT const uCodePage = Encoding_GetCodePage(status->iEncoding);
// === UTF-8 ? ===
bool const bForcedUTF8 = Encoding_IsUTF8(encDetection.forcedEncoding);// ~ don't || encDetection.bIsUTF8Sig here !
bool const bAnalysisUTF8 = Encoding_IsUTF8(encDetection.analyzedEncoding) && encDetection.bIsAnalysisReliable;
bool const bSoftHintUTF8 = Encoding_IsUTF8(encDetection.analyzedEncoding) && Encoding_IsUTF8(encDetection.Encoding); // non-reliable analysis = soft-hint
bool const bAnalysisUTF8 = Encoding_IsUTF8(encDetection.Encoding);
bool const bRejectUTF8 = (IS_ENC_ENFORCED() && !bForcedUTF8) || !bValidUTF8 || (!encDetection.bIsUTF8Sig && bSkipUTFDetection);
bool const bIsCP_UTF7 = (Encoding_GetCodePage(encDetection.Encoding) == CP_UTF7);
if (bForcedUTF8 || (!bRejectUTF8 && (encDetection.bIsUTF8Sig || bAnalysisUTF8 || bSoftHintUTF8))) // soft-hint = prefer UTF-8
if (bForcedUTF8 || (!bRejectUTF8 && (encDetection.bIsUTF8Sig || bAnalysisUTF8)))
{
if (encDetection.bIsUTF8Sig) {
EditSetNewText(hwnd, UTF8StringStart(lpData), cbData - 3, bClearUndoHistory);
@ -1173,7 +1167,7 @@ bool EditLoadFile(
EditDetectEOLMode(lpData, cbData, status);
}
}
else if ((uCodePage == CP_UTF7) && IsValidUTF7(lpData, cbData))
else if (bIsCP_UTF7 && IsValidUTF7(lpData, cbData))
{
// load UTF-7/ASCII(7-bit) as ANSI/UTF-8
EditSetNewText(hwnd, lpData, cbData, bClearUndoHistory);
@ -1182,6 +1176,9 @@ bool EditLoadFile(
}
else { // === ALL OTHER NON UTF-8 ===
status->iEncoding = encDetection.Encoding;
UINT const uCodePage = Encoding_GetCodePage(encDetection.Encoding);
if (Encoding_IsEXTERNAL_8BIT(status->iEncoding))
{
LPWSTR lpDataWide = AllocMem(cbData * 2 + 16, HEAP_ZERO_MEMORY);

View File

@ -928,7 +928,7 @@ static void _SetEncodingTitleInfo(const char* encodingUCD, cpi_enc_t encUCD, flo
//~ StringCchCatA(chEncodingInfo, ARRAYSIZE(chEncodingInfo), "'");
//~}
StringCchPrintfA(tmpBuf, ARRAYSIZE(tmpBuf), (int)lroundf(ucd_conf_perc) >= Settings2.AnalyzeReliableConfidenceLevel ? " (reliable)" : " (NOT reliable)");
StringCchPrintfA(tmpBuf, ARRAYSIZE(tmpBuf), ucd_confidence >= Settings2.AnalyzeReliableConfidenceLevel ? " (reliable)" : " (NOT reliable)");
StringCchCatA(chEncodingInfo, ARRAYSIZE(chEncodingInfo), tmpBuf);
::MultiByteToWideChar(CP_UTF7, 0, chEncodingInfo, -1, wchEncodingInfo, ARRAYSIZE(wchEncodingInfo));
@ -1265,6 +1265,8 @@ extern "C" ENC_DET_T Encoding_DetectEncoding(LPWSTR pszFile, const char* lpData,
float confidence = 0.0f;
cpi_enc_t const asciiEnc = Settings.LoadASCIIasUTF8 ? CPI_UTF8 : CPI_ANSI_DEFAULT;
if (!IS_ENC_ENFORCED() || bForceEncDetection)
{
if (!bSkipANSICPDetection)
@ -1275,7 +1277,7 @@ extern "C" ENC_DET_T Encoding_DetectEncoding(LPWSTR pszFile, const char* lpData,
if (encDetRes.analyzedEncoding == CPI_NONE)
{
encDetRes.analyzedEncoding = iAnalyzeFallback;
confidence = Settings2.AnalyzeReliableConfidenceLevel;
confidence = Settings2.AnalyzeReliableConfidenceLevel / 4.0f;
}
if (!bSkipUTFDetection)
@ -1303,7 +1305,7 @@ extern "C" ENC_DET_T Encoding_DetectEncoding(LPWSTR pszFile, const char* lpData,
if (bForceEncDetection) {
if (Encoding_IsValid(encDetRes.analyzedEncoding)) {
// no bIsReliable check (forced unreliable detection)
encDetRes.forcedEncoding = (encDetRes.analyzedEncoding == CPI_ASCII_7BIT) ? CPI_ANSI_DEFAULT : encDetRes.analyzedEncoding;
encDetRes.forcedEncoding = (encDetRes.analyzedEncoding == CPI_ASCII_7BIT) ? asciiEnc : encDetRes.analyzedEncoding;
}
else if (Encoding_IsValid(encDetRes.unicodeAnalysis)) {
encDetRes.forcedEncoding = encDetRes.unicodeAnalysis;
@ -1318,11 +1320,10 @@ extern "C" ENC_DET_T Encoding_DetectEncoding(LPWSTR pszFile, const char* lpData,
if (encDetRes.analyzedEncoding == CPI_NONE)
{
encDetRes.analyzedEncoding = iAnalyzeFallback;
confidence = Settings2.AnalyzeReliableConfidenceLevel;
confidence = Settings2.AnalyzeReliableConfidenceLevel / 4.0f;
}
else if (encDetRes.analyzedEncoding == CPI_ASCII_7BIT) {
encDetRes.analyzedEncoding = Settings.LoadASCIIasUTF8 ? CPI_UTF8 : CPI_ANSI_DEFAULT;
confidence = 1.0;
encDetRes.analyzedEncoding = asciiEnc;
}
}
@ -1333,16 +1334,12 @@ extern "C" ENC_DET_T Encoding_DetectEncoding(LPWSTR pszFile, const char* lpData,
// --------------------------------------------------------------------------
// init Preferred Encoding
encDetRes.Encoding = Settings.LoadASCIIasUTF8 ? CPI_UTF8 : CPI_ANSI_DEFAULT;
encDetRes.Encoding = asciiEnc;
if (IS_ENC_ENFORCED())
{
encDetRes.Encoding = encDetRes.forcedEncoding;
}
else if (Encoding_IsValid(encDetRes.analyzedEncoding) && (encDetRes.bIsAnalysisReliable || !Settings.UseReliableCEDonly))
{
encDetRes.Encoding = encDetRes.analyzedEncoding;
}
else if (encDetRes.bIsUTF8Sig)
{
encDetRes.Encoding = CPI_UTF8SIGN;
@ -1351,11 +1348,20 @@ extern "C" ENC_DET_T Encoding_DetectEncoding(LPWSTR pszFile, const char* lpData,
encDetRes.Encoding = bBOM_LE ? CPI_UNICODEBOM : CPI_UNICODEBEBOM;
encDetRes.bIsReverse = bBOM_BE;
}
else if (Encoding_IsValid(encDetRes.analyzedEncoding) && (encDetRes.bIsAnalysisReliable || !Settings.UseReliableCEDonly))
{
encDetRes.Encoding = encDetRes.analyzedEncoding;
}
else if (Encoding_IsValid(Encoding_SrcWeak(CPI_GET))) {
encDetRes.Encoding = Encoding_SrcWeak(CPI_GET);
encDetRes.bIsAnalysisReliable = false;
}
else if (Encoding_IsValid(iAnalyzeFallback)) {
encDetRes.Encoding = iAnalyzeFallback;
encDetRes.bIsAnalysisReliable = false;
}
if (!Encoding_IsValid(encDetRes.Encoding)) { encDetRes.Encoding = CPI_ANSI_DEFAULT; }
if (!Encoding_IsValid(encDetRes.Encoding)) { encDetRes.Encoding = asciiEnc; }
return encDetRes;
}

View File

@ -9,7 +9,7 @@
#define VERSION_MAJOR 5
#define VERSION_MINOR 20
#define VERSION_REV 114
#define VERSION_BUILD 2706
#define VERSION_BUILD 2707
#define SCINTILLA_VER 423
#define ONIGURUMA_REGEX_VER 6.9.4
#define UCHARDET_VER 2018.09.27

View File

@ -0,0 +1,21 @@
¢י<EFBFBD> <20>¹<EFBFBD><C2B9><EFBFBD>ל·<D79C>י§<D799><C2A7><EFBFBD><EFBFBD>א<EFBFBD><D790>´<EFBFBD><C2B4><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ב<EFBFBD><D791>א<EFBFBD><D790><EFBFBD><EFBFBD><EFBFBD>¤<EFBFBD><C2A4>¹ד¹א<C2B9><D790><EFBFBD><EFBFBD>µ<EFBFBD><C2B5><EFBFBD><EFBFBD>´<><D790><EFBFBD><EFBFBD>µ<EFBFBD><C2B5><EFBFBD><EFBFBD>´<EFBFBD>ל]ב<><D791><EFBFBD><EFBFBD>·¸<C2B7>
µט<EFBFBD>§<EFBFBD><EFBFBD>א<EFBFBD>µ<EFBFBD>¼<EFBFBD>ב<EFBFBD><EFBFBD><EFBFBD>ג¹¸<EFBFBD><EFBFBD><EFBFBD> ב<><D791>¤<EFBFBD><C2A4>»¯<C2BB>÷<EFBFBD>µ<EFBFBD>µט<C2B5><D798><EFBFBD>¹´י<C2B4><D799>א¨µ¹<C2B5><C2B9><EFBFBD>³לב<D79C>ט§<D798><C2A7><EFBFBD>´<EFBFBD><C2B4><EFBFBD>¾
¢י<EFBFBD> ·<><C2B7>¤¹<C2A4>ט<EFBFBD><D798><EFBFBD><EFBFBD><EFBFBD><EFBFBD>·¸<C2B7>ב<EFBFBD><D791><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>¾÷<C2BE><C3B7>´<EFBFBD>·<EFBFBD>ט<EFBFBD><D798><EFBFBD>¹´ה<C2B4>יד¹»¯<C2BB>­­<C2AD>¹<EFBFBD>י
ג´<D792>»<EFBFBD><C2BB><EFBFBD>¨<EFBFBD><C2A8>¤<EFBFBD><C2A4><EFBFBD>בµ<D791>µט<C2B5>§ה<C2A7>ט<EFBFBD>ט<EFBFBD>×¹<C397>´ד´ ז ´<>§א×ט¹ א×<D790>י<EFBFBD>×<EFBFBD>µ<EFBFBD> ¼<><C2BC> א¾<D790> <20><><EFBFBD><EFBFBD> <20><><EFBFBD>¹<EFBFBD>
¤<><C2A4><EFBFBD>¤<EFBFBD>´א<C2B4>ח¹·<C2B9>§<EFBFBD><C2A7><EFBFBD>א<EFBFBD><D790><EFBFBD>§<EFBFBD><C2A7><EFBFBD><EFBFBD>·<EFBFBD>§<EFBFBD><C2A7>ט¹ א¼ט<C2BC>¾<EFBFBD>¹¸<C2B9>לב<D79C>ט§×<C2A7>µ<EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>§¤<C2A7> ·<><C2B7>¾<EFBFBD>ל<EFBFBD><D79C>¹ <20><>א¹<D790>´ <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>¹<EFBFBD><C2B9><EFBFBD>ט¹ ז
<20>¹<EFBFBD>ט§¨<C2A7>ה<EFBFBD>ט<EFBFBD><D798>¤<EFBFBD><C2A4><EFBFBD>בµ<D791>µט<C2B5>§ד´ ז µ<><C2B5><EFBFBD><EFBFBD><EFBFBD>°<EFBFBD>¹ב<C2B9>ט§<D798><EFBFBD>¹<EFBFBD>·<EFBFBD>§<EFBFBD><C2A7><EFBFBD>א<EFBFBD><D790><EFBFBD>§ ·<>§<EFBFBD><C2A7><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD>·<EFBFBD>§<EFBFBD><C2A7><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ט<EFBFBD>§»<C2A7><C2BB>א·<D790>¢<EFBFBD>§»<C2A7><C2BB>א·<D790><C2B7><EFBFBD><EFBFBD><EFBFBD>´<EFBFBD>¹ב´¹·<C2B9>ט÷<D798>¤¤<C2A4><C2A4><EFBFBD>§<EFBFBD><C2A7>´
ה<>ט<EFBFBD>ט<EFBFBD>´<EFBFBD>¹ב´¹¹<C2B9>י¨<D799>א»ח¹א<C2B9><D790><EFBFBD><EFBFBD>× <20><><EFBFBD>טד¹¤<C2B9><C2A4><EFBFBD>¾<EFBFBD>·<EFBFBD><C2B7><EFBFBD>ל<EFBFBD><D79C>ה´י»<D799>¤<EFBFBD><C2A4>§µ¹א<C2B9>§ <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ט<EFBFBD><D798><EFBFBD>דµי<C2B5><D799><EFBFBD>¨<EFBFBD><C2A8><EFBFBD>´<EFBFBD>¸<EFBFBD>»הµ<D794>ד´ ז ·<>י§<D799><C2A7>י¹
¢י<C2A2> ·<><C2B7>¤¹<C2A4>ט<EFBFBD><D798><EFBFBD><EFBFBD><EFBFBD><EFBFBD>·¸<C2B7>ב<EFBFBD><D791><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>¾÷<C2BE><C3B7>´<EFBFBD>·<EFBFBD>ט<EFBFBD><D798><EFBFBD>¹´ה<C2B4>יד¹»¯<C2BB>­­<C2AD>¹<EFBFBD>י
ג´<D792>»<EFBFBD><C2BB><EFBFBD>¨<EFBFBD><C2A8>¤<EFBFBD><C2A4><EFBFBD>בµ<D791>µט<C2B5>§ה<C2A7>ט<EFBFBD>ט<EFBFBD>×¹<C397>´ד´ ז ´<>§א×ט¹ א×<D790>י<EFBFBD>×<EFBFBD>µ<EFBFBD> ¼<><C2BC> א¾<D790> <20><><EFBFBD><EFBFBD> <20><><EFBFBD>¹<EFBFBD>
¤<><C2A4><EFBFBD>¤<EFBFBD>´א<C2B4>ח¹·<C2B9>§<EFBFBD><C2A7><EFBFBD>א<EFBFBD><D790><EFBFBD>§<EFBFBD><C2A7><EFBFBD><EFBFBD>·<EFBFBD>§<EFBFBD><C2A7>ט¹ א¼ט<C2BC>¾<EFBFBD>¹¸<C2B9>לב<D79C>ט§×<C2A7>µ<EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>§¤<C2A7> ·<><C2B7>¾<EFBFBD>ל<EFBFBD><D79C>¹ <20><>א¹<D790>´ <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>¹<EFBFBD><C2B9><EFBFBD>ט¹ ז
<20>¹<EFBFBD>ט§¨<C2A7>ה<EFBFBD>ט<EFBFBD><D798>¤<EFBFBD><C2A4><EFBFBD>בµ<D791>µט<C2B5>§ד´ ז µ<><C2B5><EFBFBD><EFBFBD><EFBFBD>°<EFBFBD>¹ב<C2B9>ט§<D798><EFBFBD>¹<EFBFBD>·<EFBFBD>§<EFBFBD><C2A7><EFBFBD>א<EFBFBD><D790><EFBFBD>§ ·<>§<EFBFBD><C2A7><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD>·<EFBFBD>§<EFBFBD><C2A7><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ט<EFBFBD>§»<C2A7><C2BB>א·<D790>¢<EFBFBD>§»<C2A7><C2BB>א·<D790><C2B7><EFBFBD><EFBFBD><EFBFBD>´<EFBFBD>¹ב´¹·<C2B9>ט÷<D798>¤¤<C2A4><C2A4><EFBFBD>§<EFBFBD><C2A7>´
ה<>ט<EFBFBD>ט<EFBFBD>´<EFBFBD>¹ב´¹¹<C2B9>י¨<D799>א»ח¹א<C2B9><D790><EFBFBD><EFBFBD>× <20><><EFBFBD>טד¹¤<C2B9><C2A4><EFBFBD>¾<EFBFBD>·<EFBFBD><C2B7><EFBFBD>ל<EFBFBD><D79C>ה´י»<D799>¤<EFBFBD><C2A4>§µ¹א<C2B9>§ <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ט<EFBFBD><D798><EFBFBD>דµי<C2B5><D799><EFBFBD>¨<EFBFBD><C2A8><EFBFBD>´<EFBFBD>¸<EFBFBD>»הµ<D794>ד´ ז ·<>י§<D799><C2A7>י¹
¢י<C2A2> ·<><C2B7>¤¹<C2A4>ט<EFBFBD><D798><EFBFBD><EFBFBD><EFBFBD><EFBFBD>·¸<C2B7>ב<EFBFBD><D791><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>¾÷<C2BE><C3B7>´<EFBFBD>·<EFBFBD>ט<EFBFBD><D798><EFBFBD>¹´ה<C2B4>יד¹»¯<C2BB>­­<C2AD>¹<EFBFBD>י
ג´<D792>»<EFBFBD><C2BB><EFBFBD>¨<EFBFBD><C2A8>¤<EFBFBD><C2A4><EFBFBD>בµ<D791>µט<C2B5>§ה<C2A7>ט<EFBFBD>ט<EFBFBD>×¹<C397>´ד´ ז ´<>§א×ט¹ א×<D790>י<EFBFBD>×<EFBFBD>µ<EFBFBD> ¼<><C2BC> א¾<D790> <20><><EFBFBD><EFBFBD> <20><><EFBFBD>¹<EFBFBD>
¤<><C2A4><EFBFBD>¤<EFBFBD>´א<C2B4>ח¹·<C2B9>§<EFBFBD><C2A7><EFBFBD>א<EFBFBD><D790><EFBFBD>§<EFBFBD><C2A7><EFBFBD><EFBFBD>·<EFBFBD>§<EFBFBD><C2A7>ט¹ א¼ט<C2BC>¾<EFBFBD>¹¸<C2B9>לב<D79C>ט§×<C2A7>µ<EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>§¤<C2A7> ·<><C2B7>¾<EFBFBD>ל<EFBFBD><D79C>¹ <20><>א¹<D790>´ <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>¹<EFBFBD><C2B9><EFBFBD>ט¹ ז
<20>¹<EFBFBD>ט§¨<C2A7>ה<EFBFBD>ט<EFBFBD><D798>¤<EFBFBD><C2A4><EFBFBD>בµ<D791>µט<C2B5>§ד´ ז µ<><C2B5><EFBFBD><EFBFBD><EFBFBD>°<EFBFBD>¹ב<C2B9>ט§<D798><EFBFBD>¹<EFBFBD>·<EFBFBD>§<EFBFBD><C2A7><EFBFBD>א<EFBFBD><D790><EFBFBD>§ ·<>§<EFBFBD><C2A7><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD>·<EFBFBD>§<EFBFBD><C2A7><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ט<EFBFBD>§»<C2A7><C2BB>א·<D790>¢<EFBFBD>§»<C2A7><C2BB>א·<D790><C2B7><EFBFBD><EFBFBD><EFBFBD>´<EFBFBD>¹ב´¹·<C2B9>ט÷<D798>¤¤<C2A4><C2A4><EFBFBD>§<EFBFBD><C2A7>´
ה<>ט<EFBFBD>ט<EFBFBD>´<EFBFBD>¹ב´¹¹<C2B9>י¨<D799>א»ח¹א<C2B9><D790><EFBFBD><EFBFBD>× <20><><EFBFBD>טד¹¤<C2B9><C2A4><EFBFBD>¾<EFBFBD>·<EFBFBD><C2B7><EFBFBD>ל<EFBFBD><D79C>ה´י»<D799>¤<EFBFBD><C2A4>§µ¹א<C2B9>§ <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ט<EFBFBD><D798><EFBFBD>דµי<C2B5><D799><EFBFBD>¨<EFBFBD><C2A8><EFBFBD>´<EFBFBD>¸<EFBFBD>»הµ<D794>ד´ ז ·<>י§<D799><C2A7>י¹

View File

@ -0,0 +1,31 @@
Apache on Windows:
==================
é
* Which distribution?
apache.org no longer provides Windows binaries. They direct you to other sources.
Candidates:
- The Apache Haus (apachehaus.com)
- Apache Lounge (apachelounge.com)
My current favourite is Apache Lounge. Use the distro with the highest Visual C version you can, and that matches the target system architecture.
Prerequisite: Visual C++ Redistributable
* Updating:
(Also check for the latest Visual C++ Redistributable)
Précis:
- Build a new folder at same level as production folder, with "_new" suffix
- Copy as much production data as possible to new folder
- Stop (old) production
- Copy remainder of production data to new folder, moving what can't be copied because of space constraints
- Rename production folder to backup folder (becomes backup)
- Rename new folder to production folder (becomes production)
- Start (new) production and test