mirror of
https://github.com/rizonesoft/Notepad3.git
synced 2026-06-11 21:03:05 +08:00
+ fix: UTF-8 encoding detection: allow less reliable CED-Analysis of UTF-8 as soft-hint => prefer UTF-8
This commit is contained in:
parent
aa6ad3be63
commit
8f6eb3087f
@ -64,9 +64,9 @@ extern DWORD dwLastIOError;
|
||||
extern bool bUseDefaultForFileEncoding;
|
||||
extern bool bSkipUnicodeDetection;
|
||||
extern bool bSkipANSICodePageDetection;
|
||||
extern bool bLoadASCIIasUTF8;
|
||||
extern bool bLoadNFOasOEM;
|
||||
extern bool bNoEncodingTags;
|
||||
extern bool g_bLoadASCIIasUTF8;
|
||||
extern bool g_bLoadNFOasOEM;
|
||||
extern bool g_bNoEncodingTags;
|
||||
extern bool bFixLineEndings;
|
||||
extern bool bAutoStripBlanks;
|
||||
|
||||
@ -2255,9 +2255,9 @@ INT_PTR CALLBACK SelectDefEncodingDlgProc(HWND hwnd,UINT umsg,WPARAM wParam,LPAR
|
||||
CheckDlgButton(hwnd, IDC_USEASREADINGFALLBACK, DlgBtnChk(bUseDefaultForFileEncoding));
|
||||
CheckDlgButton(hwnd,IDC_NOUNICODEDETECTION, DlgBtnChk(bSkipUnicodeDetection));
|
||||
CheckDlgButton(hwnd, IDC_NOANSICPDETECTION, DlgBtnChk(bSkipANSICodePageDetection));
|
||||
CheckDlgButton(hwnd,IDC_ASCIIASUTF8, DlgBtnChk(bLoadASCIIasUTF8));
|
||||
CheckDlgButton(hwnd,IDC_NFOASOEM, DlgBtnChk(bLoadNFOasOEM));
|
||||
CheckDlgButton(hwnd,IDC_ENCODINGFROMFILEVARS, DlgBtnChk(bNoEncodingTags));
|
||||
CheckDlgButton(hwnd,IDC_ASCIIASUTF8, DlgBtnChk(g_bLoadASCIIasUTF8));
|
||||
CheckDlgButton(hwnd,IDC_NFOASOEM, DlgBtnChk(g_bLoadNFOasOEM));
|
||||
CheckDlgButton(hwnd,IDC_ENCODINGFROMFILEVARS, DlgBtnChk(g_bNoEncodingTags));
|
||||
|
||||
CenterDlgInParent(hwnd);
|
||||
}
|
||||
@ -2277,9 +2277,9 @@ INT_PTR CALLBACK SelectDefEncodingDlgProc(HWND hwnd,UINT umsg,WPARAM wParam,LPAR
|
||||
bUseDefaultForFileEncoding = (IsDlgButtonChecked(hwnd, IDC_USEASREADINGFALLBACK) == BST_CHECKED);
|
||||
bSkipUnicodeDetection = (IsDlgButtonChecked(hwnd,IDC_NOUNICODEDETECTION) == BST_CHECKED);
|
||||
bSkipANSICodePageDetection = (IsDlgButtonChecked(hwnd, IDC_NOANSICPDETECTION) == BST_CHECKED);
|
||||
bLoadASCIIasUTF8 = (IsDlgButtonChecked(hwnd,IDC_ASCIIASUTF8) == BST_CHECKED);
|
||||
bLoadNFOasOEM = (IsDlgButtonChecked(hwnd,IDC_NFOASOEM) == BST_CHECKED);
|
||||
bNoEncodingTags = (IsDlgButtonChecked(hwnd,IDC_ENCODINGFROMFILEVARS) == BST_CHECKED);
|
||||
g_bLoadASCIIasUTF8 = (IsDlgButtonChecked(hwnd,IDC_ASCIIASUTF8) == BST_CHECKED);
|
||||
g_bLoadNFOasOEM = (IsDlgButtonChecked(hwnd,IDC_NFOASOEM) == BST_CHECKED);
|
||||
g_bNoEncodingTags = (IsDlgButtonChecked(hwnd,IDC_ENCODINGFROMFILEVARS) == BST_CHECKED);
|
||||
EndDialog(hwnd,IDOK);
|
||||
}
|
||||
}
|
||||
|
||||
31
src/Edit.c
31
src/Edit.c
@ -92,10 +92,10 @@ extern bool bAutoStripBlanks;
|
||||
// Default Codepage and Character Set
|
||||
extern int g_iDefaultNewFileEncoding;
|
||||
extern int g_iDefaultCharSet;
|
||||
extern bool bLoadASCIIasUTF8;
|
||||
extern bool bForceLoadASCIIasUTF8;
|
||||
extern bool bLoadNFOasOEM;
|
||||
extern bool bNoEncodingTags;
|
||||
extern bool g_bLoadASCIIasUTF8;
|
||||
extern bool g_bForceLoadASCIIasUTF8;
|
||||
extern bool g_bLoadNFOasOEM;
|
||||
extern bool g_bNoEncodingTags;
|
||||
extern bool g_bUseLimitedAutoCCharSet;
|
||||
extern bool g_bIsCJKInputCodePage;
|
||||
|
||||
@ -1053,7 +1053,7 @@ bool EditLoadFile(
|
||||
}
|
||||
|
||||
bool bNfoDizDetected = false;
|
||||
if (bLoadNFOasOEM)
|
||||
if (g_bLoadNFOasOEM)
|
||||
{
|
||||
if (lpszExt && !(StringCchCompareXI(lpszExt,L".nfo") && StringCchCompareXI(lpszExt,L".diz")))
|
||||
bNfoDizDetected = true;
|
||||
@ -1073,7 +1073,7 @@ bool EditLoadFile(
|
||||
bool const bIsUnicode = Encoding_IsUTF8(iAnalyzedEncoding) || Encoding_IsUNICODE(iAnalyzedEncoding);
|
||||
|
||||
if (iAnalyzedEncoding == CPI_ASCII_7BIT) {
|
||||
iAnalyzedEncoding = bLoadASCIIasUTF8 ? CPI_UTF8 : iPreferedEncoding; // stay on prefered
|
||||
iAnalyzedEncoding = g_bLoadASCIIasUTF8 ? CPI_UTF8 : iPreferedEncoding; // stay on prefered
|
||||
}
|
||||
else {
|
||||
if ((bSkipUTFDetection && bIsUnicode) || (bSkipANSICPDetection && !bIsUnicode)) {
|
||||
@ -1083,7 +1083,7 @@ bool EditLoadFile(
|
||||
}
|
||||
// --------------------------------------------------------------------------
|
||||
|
||||
int iForcedEncoding = bForceLoadASCIIasUTF8 ? CPI_UTF8 : Encoding_SrcCmdLn(CPI_GET);
|
||||
int iForcedEncoding = g_bForceLoadASCIIasUTF8 ? CPI_UTF8 : Encoding_SrcCmdLn(CPI_GET);
|
||||
if (Encoding_IsNONE(iForcedEncoding) && bNfoDizDetected) {
|
||||
iForcedEncoding = g_DOSEncoding;
|
||||
}
|
||||
@ -1118,7 +1118,7 @@ bool EditLoadFile(
|
||||
if (cbData == 0) {
|
||||
FileVars_Init(NULL,0,&fvCurFile);
|
||||
*iEOLMode = g_iDefaultEOLMode;
|
||||
*iEncoding = !Encoding_IsNONE(iForcedEncoding) ? iForcedEncoding : (bLoadASCIIasUTF8 ? CPI_UTF8 : iPreferedEncoding);
|
||||
*iEncoding = !Encoding_IsNONE(iForcedEncoding) ? iForcedEncoding : (g_bLoadASCIIasUTF8 ? CPI_UTF8 : iPreferedEncoding);
|
||||
EditSetNewText(hwnd,"",0);
|
||||
SendMessage(hwnd,SCI_SETEOLMODE,g_iDefaultEOLMode,0);
|
||||
FreeMem(lpData);
|
||||
@ -1183,16 +1183,17 @@ bool EditLoadFile(
|
||||
FileVars_Init(lpData,cbData,&fvCurFile);
|
||||
|
||||
// === UTF-8 ===
|
||||
bool const bHardRulesUTF8 = Encoding_IsUTF8(iForcedEncoding) || (FileVars_IsUTF8(&fvCurFile) && !bNoEncodingTags);
|
||||
bool const bHardRulesUTF8 = Encoding_IsUTF8(iForcedEncoding) || (FileVars_IsUTF8(&fvCurFile) && !g_bNoEncodingTags);
|
||||
bool const bForcedNonUTF8 = !Encoding_IsNONE(iForcedEncoding) && !Encoding_IsUTF8(iForcedEncoding);
|
||||
|
||||
bool const bValidUTF8 = IsValidUTF8(lpData, cbData);
|
||||
bool const bAnalysisUTF8 = Encoding_IsUTF8(iAnalyzedEncoding) && bIsReliable;
|
||||
bool const bSoftHintUTF8 = (Encoding_IsUTF8(iPreferedEncoding) || bLoadASCIIasUTF8);
|
||||
bool const bSoftHintUTF8 = Encoding_IsUTF8(iAnalyzedEncoding) || Encoding_IsUTF8(iPreferedEncoding); // non-reliable analysis = soft-hint
|
||||
|
||||
bool const bRejectUTF8 = bSkipUTFDetection || bForcedNonUTF8 || (FileVars_IsNonUTF8(&fvCurFile) && !bNoEncodingTags);
|
||||
bool const bRejectUTF8 = bSkipUTFDetection || bForcedNonUTF8 || (FileVars_IsNonUTF8(&fvCurFile) && !g_bNoEncodingTags);
|
||||
|
||||
if (bHardRulesUTF8 || (!bRejectUTF8 && bValidUTF8 && (bIsUTF8Sig || bAnalysisUTF8 || bSoftHintUTF8)))
|
||||
//if (bHardRulesUTF8 || (!bRejectUTF8 && bValidUTF8 && (bIsUTF8Sig || bAnalysisUTF8)))
|
||||
if (bHardRulesUTF8 || (!bRejectUTF8 && bValidUTF8 && (bIsUTF8Sig || bAnalysisUTF8 || bSoftHintUTF8))) // soft-hint = prefer UTF-8
|
||||
{
|
||||
EditSetNewText(hwnd,"",0);
|
||||
if (bIsUTF8Sig) {
|
||||
@ -7819,7 +7820,7 @@ void EditSetBookmarkList(HWND hwnd, LPCWSTR pszBookMarks)
|
||||
//
|
||||
// _SetFileVars()
|
||||
//
|
||||
extern bool bNoEncodingTags;
|
||||
extern bool g_bNoEncodingTags;
|
||||
extern int g_flagNoFileVariables;
|
||||
|
||||
static void __fastcall _SetFileVars(char* lpData, char* tch, LPFILEVARS lpfv)
|
||||
@ -7866,7 +7867,7 @@ static void __fastcall _SetFileVars(char* lpData, char* tch, LPFILEVARS lpfv)
|
||||
}
|
||||
}
|
||||
|
||||
if (!IsUTF8Signature(lpData) && !bNoEncodingTags && !bDisableFileVar) {
|
||||
if (!IsUTF8Signature(lpData) && !g_bNoEncodingTags && !bDisableFileVar) {
|
||||
|
||||
if (FileVars_ParseStr(tch, "encoding", lpfv->tchEncoding, COUNTOF(lpfv->tchEncoding)))
|
||||
lpfv->mask |= FV_ENCODING;
|
||||
@ -7892,7 +7893,7 @@ bool FileVars_Init(char *lpData, DWORD cbData, LPFILEVARS lpfv) {
|
||||
char tch[LARGE_BUFFER];
|
||||
|
||||
ZeroMemory(lpfv,sizeof(FILEVARS));
|
||||
if ((g_flagNoFileVariables && bNoEncodingTags) || !lpData || !cbData)
|
||||
if ((g_flagNoFileVariables && g_bNoEncodingTags) || !lpData || !cbData)
|
||||
return true;
|
||||
|
||||
StringCchCopyNA(tch,COUNTOF(tch),lpData,min_s(cbData + 1,COUNTOF(tch)));
|
||||
|
||||
@ -228,10 +228,10 @@ bool bViewEOLs;
|
||||
bool bUseDefaultForFileEncoding;
|
||||
bool bSkipUnicodeDetection;
|
||||
bool bSkipANSICodePageDetection;
|
||||
bool bLoadASCIIasUTF8 = false;
|
||||
bool bForceLoadASCIIasUTF8 = false;
|
||||
bool bLoadNFOasOEM;
|
||||
bool bNoEncodingTags;
|
||||
bool g_bLoadASCIIasUTF8 = false;
|
||||
bool g_bForceLoadASCIIasUTF8 = false;
|
||||
bool g_bLoadNFOasOEM;
|
||||
bool g_bNoEncodingTags;
|
||||
bool bFixLineEndings;
|
||||
bool bAutoStripBlanks;
|
||||
int iPrintHeader;
|
||||
@ -5314,10 +5314,10 @@ LRESULT MsgCommand(HWND hwnd, UINT umsg, WPARAM wParam, LPARAM lParam)
|
||||
case CMD_RELOADASCIIASUTF8:
|
||||
{
|
||||
if (StringCchLenW(g_wchCurFile,COUNTOF(g_wchCurFile))) {
|
||||
bForceLoadASCIIasUTF8 = true;
|
||||
g_bForceLoadASCIIasUTF8 = true;
|
||||
StringCchCopy(tchMaxPathBuffer,COUNTOF(tchMaxPathBuffer),g_wchCurFile);
|
||||
FileLoad(false, false, true, true, true, tchMaxPathBuffer);
|
||||
bForceLoadASCIIasUTF8 = false;
|
||||
g_bForceLoadASCIIasUTF8 = false;
|
||||
}
|
||||
}
|
||||
break;
|
||||
@ -5327,7 +5327,7 @@ LRESULT MsgCommand(HWND hwnd, UINT umsg, WPARAM wParam, LPARAM lParam)
|
||||
{
|
||||
g_bForceCompEncDetection = true;
|
||||
if (StringCchLenW(g_wchCurFile, COUNTOF(g_wchCurFile))) {
|
||||
bForceLoadASCIIasUTF8 = false;
|
||||
g_bForceLoadASCIIasUTF8 = false;
|
||||
StringCchCopy(tchMaxPathBuffer, COUNTOF(tchMaxPathBuffer), g_wchCurFile);
|
||||
FileLoad(false, false, true, false, false, tchMaxPathBuffer);
|
||||
}
|
||||
@ -5339,13 +5339,13 @@ LRESULT MsgCommand(HWND hwnd, UINT umsg, WPARAM wParam, LPARAM lParam)
|
||||
{
|
||||
if (StringCchLenW(g_wchCurFile,COUNTOF(g_wchCurFile))) {
|
||||
int _fNoFileVariables = g_flagNoFileVariables;
|
||||
bool _bNoEncodingTags = bNoEncodingTags;
|
||||
bool _bNoEncodingTags = g_bNoEncodingTags;
|
||||
g_flagNoFileVariables = 1;
|
||||
bNoEncodingTags = 1;
|
||||
g_bNoEncodingTags = 1;
|
||||
StringCchCopy(tchMaxPathBuffer,COUNTOF(tchMaxPathBuffer),g_wchCurFile);
|
||||
FileLoad(false,false,true, bSkipUnicodeDetection, bSkipANSICodePageDetection, tchMaxPathBuffer);
|
||||
g_flagNoFileVariables = _fNoFileVariables;
|
||||
bNoEncodingTags = _bNoEncodingTags;
|
||||
g_bNoEncodingTags = _bNoEncodingTags;
|
||||
}
|
||||
}
|
||||
break;
|
||||
@ -6884,11 +6884,11 @@ void LoadSettings()
|
||||
|
||||
bSkipANSICodePageDetection = IniSectionGetBool(pIniSection, L"SkipANSICodePageDetection", true);
|
||||
|
||||
bLoadASCIIasUTF8 = IniSectionGetBool(pIniSection, L"LoadASCIIasUTF8", false);
|
||||
g_bLoadASCIIasUTF8 = IniSectionGetBool(pIniSection, L"LoadASCIIasUTF8", false);
|
||||
|
||||
bLoadNFOasOEM = IniSectionGetBool(pIniSection, L"LoadNFOasOEM", true);
|
||||
g_bLoadNFOasOEM = IniSectionGetBool(pIniSection, L"LoadNFOasOEM", true);
|
||||
|
||||
bNoEncodingTags = IniSectionGetBool(pIniSection, L"NoEncodingTags", false);
|
||||
g_bNoEncodingTags = IniSectionGetBool(pIniSection, L"NoEncodingTags", false);
|
||||
|
||||
g_iDefaultEOLMode = clampi(IniSectionGetInt(pIniSection, L"DefaultEOLMode", 0), 0, 2);
|
||||
|
||||
@ -7197,9 +7197,9 @@ void SaveSettings(bool bSaveSettingsNow)
|
||||
IniSectionSetBool(pIniSection, L"UseDefaultForFileEncoding", bUseDefaultForFileEncoding);
|
||||
IniSectionSetBool(pIniSection, L"SkipUnicodeDetection", bSkipUnicodeDetection);
|
||||
IniSectionSetBool(pIniSection, L"SkipANSICodePageDetection", bSkipANSICodePageDetection);
|
||||
IniSectionSetInt(pIniSection, L"LoadASCIIasUTF8", bLoadASCIIasUTF8);
|
||||
IniSectionSetBool(pIniSection, L"LoadNFOasOEM", bLoadNFOasOEM);
|
||||
IniSectionSetBool(pIniSection, L"NoEncodingTags", bNoEncodingTags);
|
||||
IniSectionSetInt(pIniSection, L"LoadASCIIasUTF8", g_bLoadASCIIasUTF8);
|
||||
IniSectionSetBool(pIniSection, L"LoadNFOasOEM", g_bLoadNFOasOEM);
|
||||
IniSectionSetBool(pIniSection, L"NoEncodingTags", g_bNoEncodingTags);
|
||||
IniSectionSetInt(pIniSection, L"DefaultEOLMode", g_iDefaultEOLMode);
|
||||
IniSectionSetBool(pIniSection, L"FixLineEndings", bFixLineEndings);
|
||||
IniSectionSetBool(pIniSection, L"FixTrailingBlanks", bAutoStripBlanks);
|
||||
|
||||
Loading…
Reference in New Issue
Block a user