diff --git a/Build/Notepad3.ini b/Build/Notepad3.ini index b003d4698..d4dc0a202 100644 --- a/Build/Notepad3.ini +++ b/Build/Notepad3.ini @@ -49,7 +49,7 @@ SettingsVersion=4 ;UndoTransactionTimeout=0 ;AdministrationTool.exe= ;DevDebugMode=0 -;AnalyzeReliableConfidenceLevel=70 +;AnalyzeReliableConfidenceLevel=92 ;LexerSQLNumberSignAsComment=1 ;ExitOnESCSkipLevel=2 [Statusbar Settings] diff --git a/src/Config/Config.cpp b/src/Config/Config.cpp index 8f943d348..79ac0038c 100644 --- a/src/Config/Config.cpp +++ b/src/Config/Config.cpp @@ -801,7 +801,7 @@ void LoadSettings() Settings2.NoCutLineOnEmptySelection = IniSectionGetBool(IniSecSettings2, L"NoCutLineOnEmptySelection", Defaults2.NoCutLineOnEmptySelection); - int const iARCLdef = 70; + int const iARCLdef = 92; Defaults2.AnalyzeReliableConfidenceLevel = (float)iARCLdef / 100.0f; int const iARCLset = clampi(IniSectionGetInt(IniSecSettings2, L"AnalyzeReliableConfidenceLevel", iARCLdef), 0, 100); Settings2.AnalyzeReliableConfidenceLevel = (float)iARCLset / 100.0f; diff --git a/src/Dialogs.c b/src/Dialogs.c index fd4fcd7b8..ecd8c7d3e 100644 --- a/src/Dialogs.c +++ b/src/Dialogs.c @@ -2581,133 +2581,112 @@ static INT_PTR CALLBACK SelectDefEncodingDlgProc(HWND hwnd, UINT umsg, WPARAM wP switch (umsg) { - case WM_INITDIALOG: - { - SetWindowLongPtr(hwnd, DWLP_USER, (LONG_PTR)lParam); - if (Globals.hDlgIcon) { SendMessage(hwnd, WM_SETICON, ICON_SMALL, (LPARAM)Globals.hDlgIcon); } + case WM_INITDIALOG: + { + SetWindowLongPtr(hwnd, DWLP_USER, (LONG_PTR)lParam); + if (Globals.hDlgIcon) { SendMessage(hwnd, WM_SETICON, ICON_SMALL, (LPARAM)Globals.hDlgIcon); } - PENCODEDLG const pdd = (PENCODEDLG)lParam; - HBITMAP hbmp = LoadImage(Globals.hInstance, MAKEINTRESOURCE(IDB_ENCODING), IMAGE_BITMAP, 0, 0, LR_CREATEDIBSECTION); - hbmp = ResizeImageForCurrentDPI(hbmp); + PENCODEDLG const pdd = (PENCODEDLG)lParam; + HBITMAP hbmp = LoadImage(Globals.hInstance, MAKEINTRESOURCE(IDB_ENCODING), IMAGE_BITMAP, 0, 0, LR_CREATEDIBSECTION); + hbmp = ResizeImageForCurrentDPI(hbmp); - HIMAGELIST himl = ImageList_Create(16, 16, ILC_COLOR32 | ILC_MASK, 0, 0); - ImageList_AddMasked(himl, hbmp, CLR_DEFAULT); - DeleteObject(hbmp); - SendDlgItemMessage(hwnd, IDC_ENCODINGLIST, CBEM_SETIMAGELIST, 0, (LPARAM)himl); - SendDlgItemMessage(hwnd, IDC_ENCODINGLIST, CB_SETEXTENDEDUI, true, 0); + HIMAGELIST himl = ImageList_Create(16, 16, ILC_COLOR32 | ILC_MASK, 0, 0); + ImageList_AddMasked(himl, hbmp, CLR_DEFAULT); + DeleteObject(hbmp); + SendDlgItemMessage(hwnd, IDC_ENCODINGLIST, CBEM_SETIMAGELIST, 0, (LPARAM)himl); + SendDlgItemMessage(hwnd, IDC_ENCODINGLIST, CB_SETEXTENDEDUI, true, 0); - Encoding_AddToComboboxEx(GetDlgItem(hwnd, IDC_ENCODINGLIST), pdd->idEncoding, 0); + Encoding_AddToComboboxEx(GetDlgItem(hwnd, IDC_ENCODINGLIST), pdd->idEncoding, 0); - Encoding_GetFromComboboxEx(GetDlgItem(hwnd, IDC_ENCODINGLIST), &s_iEnc); - s_bLoadASCIIasUTF8 = Settings.LoadASCIIasUTF8; - s_bUseAsFallback = Encoding_IsASCII(s_iEnc) ? Settings.UseDefaultForFileEncoding : false; - - CheckDlgButton(hwnd, IDC_USEASREADINGFALLBACK, SetBtn(s_bUseAsFallback)); - CheckDlgButton(hwnd, IDC_ASCIIASUTF8, SetBtn(s_bLoadASCIIasUTF8)); - CheckDlgButton(hwnd, IDC_RELIABLE_DETECTION_RES, SetBtn(Settings.UseReliableCEDonly)); - CheckDlgButton(hwnd, IDC_NFOASOEM, SetBtn(Settings.LoadNFOasOEM)); - CheckDlgButton(hwnd, IDC_ENCODINGFROMFILEVARS, SetBtn(!Settings.NoEncodingTags)); - CheckDlgButton(hwnd, IDC_NOUNICODEDETECTION, SetBtn(!Settings.SkipUnicodeDetection)); - CheckDlgButton(hwnd, IDC_NOANSICPDETECTION, SetBtn(!Settings.SkipANSICodePageDetection)); - - DialogEnableControl(hwnd, IDC_USEASREADINGFALLBACK, Encoding_IsASCII(s_iEnc)); - - CenterDlgInParent(hwnd, NULL); - } - return true; - - - case WM_DPICHANGED: - UpdateWindowLayoutForDPI(hwnd, 0, 0, 0, 0); - return true; - - - case WM_COMMAND: - switch (LOWORD(wParam)) - { - case IDC_ASCIIASUTF8: - if (s_iEnc != CPI_UTF8) { - s_bLoadASCIIasUTF8 = IsButtonChecked(hwnd, IDC_ASCIIASUTF8); - } - break; - - case IDC_USEASREADINGFALLBACK: - if (s_iEnc != CPI_ANSI_DEFAULT) { - s_bUseAsFallback = IsButtonChecked(hwnd, IDC_USEASREADINGFALLBACK); - } - if (s_iEnc == CPI_UTF8) { - if (s_bUseAsFallback) { - CheckDlgButton(hwnd, IDC_ASCIIASUTF8, SetBtn(true)); - DialogEnableControl(hwnd, IDC_ASCIIASUTF8, false); - } - else - { - DialogEnableControl(hwnd, IDC_ASCIIASUTF8, true); - CheckDlgButton(hwnd, IDC_ASCIIASUTF8, SetBtn(s_bLoadASCIIasUTF8)); - } - } - break; - - case IDC_ENCODINGLIST: - { - Encoding_GetFromComboboxEx(GetDlgItem(hwnd, IDC_ENCODINGLIST), &s_iEnc); - - s_bUseAsFallback = Encoding_IsASCII(s_iEnc) ? Settings.UseDefaultForFileEncoding : false; - - if (s_iEnc == CPI_UTF8) { - if (s_bUseAsFallback) { - DialogEnableControl(hwnd, IDC_ASCIIASUTF8, false); - CheckDlgButton(hwnd, IDC_ASCIIASUTF8, SetBtn(true)); - } - DialogEnableControl(hwnd, IDC_USEASREADINGFALLBACK, Encoding_IsASCII(s_iEnc)); - CheckDlgButton(hwnd, IDC_USEASREADINGFALLBACK, SetBtn(s_bUseAsFallback)); - } - else if (s_iEnc == CPI_ANSI_DEFAULT) { - DialogEnableControl(hwnd, IDC_ASCIIASUTF8, true); - CheckDlgButton(hwnd, IDC_ASCIIASUTF8, SetBtn(s_bLoadASCIIasUTF8)); - s_bUseAsFallback = true; - DialogEnableControl(hwnd, IDC_USEASREADINGFALLBACK, false); - CheckDlgButton(hwnd, IDC_USEASREADINGFALLBACK, SetBtn(s_bUseAsFallback)); - } - else { + Encoding_GetFromComboboxEx(GetDlgItem(hwnd, IDC_ENCODINGLIST), &s_iEnc); + s_bLoadASCIIasUTF8 = Settings.LoadASCIIasUTF8; s_bUseAsFallback = Encoding_IsASCII(s_iEnc) ? Settings.UseDefaultForFileEncoding : false; - DialogEnableControl(hwnd, IDC_ASCIIASUTF8, true); - CheckDlgButton(hwnd, IDC_ASCIIASUTF8, SetBtn(s_bLoadASCIIasUTF8)); + DialogEnableControl(hwnd, IDC_USEASREADINGFALLBACK, Encoding_IsASCII(s_iEnc)); CheckDlgButton(hwnd, IDC_USEASREADINGFALLBACK, SetBtn(s_bUseAsFallback)); - } - } - break; - case IDOK: { - PENCODEDLG pdd = (PENCODEDLG)GetWindowLongPtr(hwnd, DWLP_USER); - if (Encoding_GetFromComboboxEx(GetDlgItem(hwnd, IDC_ENCODINGLIST), &pdd->idEncoding)) { - if (pdd->idEncoding < 0) { - InfoBoxLng(MB_ICONWARNING, NULL, IDS_MUI_ERR_ENCODINGNA); + CheckDlgButton(hwnd, IDC_ASCIIASUTF8, SetBtn(s_bLoadASCIIasUTF8)); + CheckDlgButton(hwnd, IDC_RELIABLE_DETECTION_RES, SetBtn(Settings.UseReliableCEDonly)); + CheckDlgButton(hwnd, IDC_NFOASOEM, SetBtn(Settings.LoadNFOasOEM)); + CheckDlgButton(hwnd, IDC_ENCODINGFROMFILEVARS, SetBtn(!Settings.NoEncodingTags)); + CheckDlgButton(hwnd, IDC_NOUNICODEDETECTION, SetBtn(!Settings.SkipUnicodeDetection)); + CheckDlgButton(hwnd, IDC_NOANSICPDETECTION, SetBtn(!Settings.SkipANSICodePageDetection)); + + + CenterDlgInParent(hwnd, NULL); + } + return true; + + + case WM_DPICHANGED: + UpdateWindowLayoutForDPI(hwnd, 0, 0, 0, 0); + return true; + + + case WM_COMMAND: + switch (LOWORD(wParam)) + { + case IDC_ENCODINGLIST: + case IDC_USEASREADINGFALLBACK: + case IDC_ASCIIASUTF8: + { + Encoding_GetFromComboboxEx(GetDlgItem(hwnd, IDC_ENCODINGLIST), &s_iEnc); + + s_bUseAsFallback = Encoding_IsASCII(s_iEnc) ? IsButtonChecked(hwnd, IDC_USEASREADINGFALLBACK) : false; + s_bLoadASCIIasUTF8 = IsButtonChecked(hwnd, IDC_ASCIIASUTF8); + + DialogEnableControl(hwnd, IDC_USEASREADINGFALLBACK, Encoding_IsASCII(s_iEnc)); + CheckDlgButton(hwnd, IDC_USEASREADINGFALLBACK, SetBtn(s_bUseAsFallback)); + + DialogEnableControl(hwnd, IDC_ASCIIASUTF8, true); + CheckDlgButton(hwnd, IDC_ASCIIASUTF8, SetBtn(s_bLoadASCIIasUTF8)); + + if (s_iEnc == CPI_UTF8) { + if (s_bUseAsFallback) { + s_bLoadASCIIasUTF8 = true; + DialogEnableControl(hwnd, IDC_ASCIIASUTF8, false); + CheckDlgButton(hwnd, IDC_ASCIIASUTF8, SetBtn(s_bLoadASCIIasUTF8)); + } + } + else if (s_iEnc == CPI_ANSI_DEFAULT) { + if (s_bUseAsFallback) { + s_bLoadASCIIasUTF8 = false; + DialogEnableControl(hwnd, IDC_ASCIIASUTF8, false); + CheckDlgButton(hwnd, IDC_ASCIIASUTF8, SetBtn(s_bLoadASCIIasUTF8)); + } + } + } + break; + + case IDOK: { + PENCODEDLG pdd = (PENCODEDLG)GetWindowLongPtr(hwnd, DWLP_USER); + if (Encoding_GetFromComboboxEx(GetDlgItem(hwnd, IDC_ENCODINGLIST), &pdd->idEncoding)) { + if (pdd->idEncoding < 0) { + InfoBoxLng(MB_ICONWARNING, NULL, IDS_MUI_ERR_ENCODINGNA); + EndDialog(hwnd, IDCANCEL); + } + else { + Settings.UseDefaultForFileEncoding = IsButtonChecked(hwnd, IDC_USEASREADINGFALLBACK); + Settings.LoadASCIIasUTF8 = IsButtonChecked(hwnd, IDC_ASCIIASUTF8); + Settings.UseReliableCEDonly = IsButtonChecked(hwnd, IDC_RELIABLE_DETECTION_RES); + Settings.LoadNFOasOEM = IsButtonChecked(hwnd, IDC_NFOASOEM); + Settings.NoEncodingTags = !IsButtonChecked(hwnd, IDC_ENCODINGFROMFILEVARS); + Settings.SkipUnicodeDetection = !IsButtonChecked(hwnd, IDC_NOUNICODEDETECTION); + Settings.SkipANSICodePageDetection = !IsButtonChecked(hwnd, IDC_NOANSICPDETECTION); + EndDialog(hwnd, IDOK); + } + } + else { + PostMessage(hwnd, WM_NEXTDLGCTL, (WPARAM)(GetDlgItem(hwnd, IDC_ENCODINGLIST)), 1); + } + } + break; + + case IDCANCEL: EndDialog(hwnd, IDCANCEL); - } - else { - Settings.UseDefaultForFileEncoding = IsButtonChecked(hwnd, IDC_USEASREADINGFALLBACK); - Settings.LoadASCIIasUTF8 = IsButtonChecked(hwnd, IDC_ASCIIASUTF8); - Settings.UseReliableCEDonly = IsButtonChecked(hwnd, IDC_RELIABLE_DETECTION_RES); - Settings.LoadNFOasOEM = IsButtonChecked(hwnd, IDC_NFOASOEM); - Settings.NoEncodingTags = !IsButtonChecked(hwnd, IDC_ENCODINGFROMFILEVARS); - Settings.SkipUnicodeDetection = !IsButtonChecked(hwnd, IDC_NOUNICODEDETECTION); - Settings.SkipANSICodePageDetection = !IsButtonChecked(hwnd, IDC_NOANSICPDETECTION); - EndDialog(hwnd, IDOK); - } + break; } - else { - PostMessage(hwnd, WM_NEXTDLGCTL, (WPARAM)(GetDlgItem(hwnd, IDC_ENCODINGLIST)), 1); - } - } - break; - - case IDCANCEL: - EndDialog(hwnd, IDCANCEL); - break; - } - return true; + return true; } return false; } diff --git a/src/Edit.c b/src/Edit.c index 36e36e3ab..51f8689d3 100644 --- a/src/Edit.c +++ b/src/Edit.c @@ -979,6 +979,10 @@ bool EditLoadFile( bool bClearUndoHistory, EditFileIOStatus* status) { + cpi_enc_t const iEncFallback = Settings.UseDefaultForFileEncoding ? + Settings.DefaultEncoding : (Settings.LoadASCIIasUTF8 ? CPI_UTF8 : CPI_ANSI_DEFAULT); + + status->iEncoding = iEncFallback; status->bUnicodeErr = false; status->bFileTooBig = false; status->bUnknownExt = false; @@ -1062,8 +1066,8 @@ bool EditLoadFile( if (cbData == 0) { FileVars_Init(NULL, 0, &Globals.fvCurFile); + status->iEncoding = iEncFallback; status->iEOLMode = Settings.DefaultEOLMode; - status->iEncoding = Settings.LoadASCIIasUTF8 ? CPI_UTF8 : CPI_ANSI_DEFAULT; EditSetNewText(hwnd, "", 0, bClearUndoHistory); SciCall_SetEOLMode(Settings.DefaultEOLMode); Encoding_Forced(CPI_NONE); @@ -1091,12 +1095,7 @@ bool EditLoadFile( return false; } - bool const bValidUTF8 = IsValidUTF8(lpData, cbData); - - cpi_enc_t const iAnalyzeFallback = Settings.UseDefaultForFileEncoding ? Settings.DefaultEncoding : - ((bValidUTF8 && Settings.LoadASCIIasUTF8) ? CPI_UTF8 : CPI_ANSI_DEFAULT); - - ENC_DET_T encDetection = Encoding_DetectEncoding(pszFile, lpData, cbData, iAnalyzeFallback, + ENC_DET_T encDetection = Encoding_DetectEncoding(pszFile, lpData, cbData, iEncFallback, bSkipUTFDetection, bSkipANSICPDetection, bForceEncDetection); #define IS_ENC_ENFORCED() (!Encoding_IsNONE(encDetection.forcedEncoding)) @@ -1168,6 +1167,7 @@ bool EditLoadFile( else // === ALL OTHERS === { // === UTF-8 ? === + bool const bValidUTF8 = IsValidUTF8(lpData, cbData); bool const bForcedUTF8 = Encoding_IsUTF8(encDetection.forcedEncoding);// ~ don't || encDetection.bIsUTF8Sig here ! bool const bAnalysisUTF8 = Encoding_IsUTF8(encDetection.Encoding); @@ -1191,7 +1191,7 @@ bool EditLoadFile( { // load UTF-7/ASCII(7-bit) as ANSI/UTF-8 EditSetNewText(hwnd, lpData, cbData, bClearUndoHistory); - status->iEncoding = Settings.LoadASCIIasUTF8 ? CPI_UTF8 : CPI_ANSI_DEFAULT; + status->iEncoding = iEncFallback; EditDetectEOLMode(lpData, cbData, status); } else { // === ALL OTHER NON UTF-8 === diff --git a/src/Encoding.c b/src/Encoding.c index 9591d3e6c..d0e2f4ab9 100644 --- a/src/Encoding.c +++ b/src/Encoding.c @@ -533,6 +533,11 @@ bool Encoding_IsUTF8_SIGN(const cpi_enc_t iEncoding) { } // ============================================================================ +bool Encoding_IsUTF8_NO_SIGN(const cpi_enc_t iEncoding) { + return (Encoding_IsUTF8(iEncoding) && !Encoding_IsUTF8_SIGN(iEncoding)); +} +// ============================================================================ + bool Encoding_IsMBCS(const cpi_enc_t iEncoding) { return (iEncoding >= 0) ? (g_Encodings[iEncoding].uFlags & NCP_MBCS) : false; } diff --git a/src/Encoding.h b/src/Encoding.h index 65015e779..ae12a6b89 100644 --- a/src/Encoding.h +++ b/src/Encoding.h @@ -94,6 +94,7 @@ bool Encoding_IsANSI(const cpi_enc_t iEncoding); bool Encoding_IsOEM(const cpi_enc_t iEncoding); bool Encoding_IsUTF8(const cpi_enc_t iEncoding); bool Encoding_IsUTF8_SIGN(const cpi_enc_t iEncoding); +bool Encoding_IsUTF8_NO_SIGN(const cpi_enc_t iEncoding); bool Encoding_IsMBCS(const cpi_enc_t iEncoding); bool Encoding_IsCJK(const cpi_enc_t iEncoding); bool Encoding_IsUNICODE(const cpi_enc_t iEncoding); @@ -174,6 +175,7 @@ typedef struct _enc_det_t cpi_enc_t fileVarEncoding; cpi_enc_t analyzedEncoding; cpi_enc_t unicodeAnalysis; + float confidence; // flags: bool bIsAnalysisReliable; bool bHasBOM; diff --git a/src/EncodingDetection.cpp b/src/EncodingDetection.cpp index 7bde97701..a7f4539ac 100644 --- a/src/EncodingDetection.cpp +++ b/src/EncodingDetection.cpp @@ -1265,7 +1265,7 @@ extern "C" ENC_DET_T Encoding_DetectEncoding(LPWSTR pszFile, const char* lpData, size_t const cbNbytes4Analysis = min_s(cbData, 200000LL); - float confidence = 0.0f; + encDetRes.confidence = 0.0f; cpi_enc_t const asciiEnc = Settings.LoadASCIIasUTF8 ? CPI_UTF8 : CPI_ANSI_DEFAULT; @@ -1273,13 +1273,16 @@ extern "C" ENC_DET_T Encoding_DetectEncoding(LPWSTR pszFile, const char* lpData, { if (!bSkipANSICPDetection) { - encDetRes.analyzedEncoding = Encoding_AnalyzeText(lpData, cbNbytes4Analysis, &confidence, iAnalyzeFallback); + encDetRes.analyzedEncoding = Encoding_AnalyzeText(lpData, cbNbytes4Analysis, &encDetRes.confidence, iAnalyzeFallback); } if (encDetRes.analyzedEncoding == CPI_NONE) { encDetRes.analyzedEncoding = iAnalyzeFallback; - confidence = (1.0f - Settings2.AnalyzeReliableConfidenceLevel); + encDetRes.confidence = (1.0f - Settings2.AnalyzeReliableConfidenceLevel); + } + else if (encDetRes.analyzedEncoding == CPI_ASCII_7BIT) { + encDetRes.analyzedEncoding = asciiEnc; } if (!bSkipUTFDetection) @@ -1317,19 +1320,7 @@ extern "C" ENC_DET_T Encoding_DetectEncoding(LPWSTR pszFile, const char* lpData, //bool const bIsUTF8orUnicodeAnalysis = Encoding_IsUTF8(encDetRes.analyzedEncoding) || Encoding_IsUNICODE(encDetRes.analyzedEncoding); - if (!IS_ENC_ENFORCED()) - { - if (encDetRes.analyzedEncoding == CPI_NONE) - { - encDetRes.analyzedEncoding = iAnalyzeFallback; - confidence = (1.0f - Settings2.AnalyzeReliableConfidenceLevel); - } - else if (encDetRes.analyzedEncoding == CPI_ASCII_7BIT) { - encDetRes.analyzedEncoding = asciiEnc; - } - } - - int const iConfidence = float2int(confidence * 100.0f); + int const iConfidence = float2int(encDetRes.confidence * 100.0f); int const iReliableThreshold = float2int(Settings2.AnalyzeReliableConfidenceLevel * 100.0f); encDetRes.bIsAnalysisReliable = (iConfidence >= iReliableThreshold);