+ fix: UTF-8 encoding detection: allow less reliable CED-Analysis of UTF-8 as soft-hint => prefer UTF-8

This commit is contained in:
Rainer Kottenhoff 2018-09-23 12:42:55 +02:00
parent aa6ad3be63
commit 8f6eb3087f
3 changed files with 41 additions and 40 deletions

View File

@ -64,9 +64,9 @@ extern DWORD dwLastIOError;
extern bool bUseDefaultForFileEncoding;
extern bool bSkipUnicodeDetection;
extern bool bSkipANSICodePageDetection;
extern bool bLoadASCIIasUTF8;
extern bool bLoadNFOasOEM;
extern bool bNoEncodingTags;
extern bool g_bLoadASCIIasUTF8;
extern bool g_bLoadNFOasOEM;
extern bool g_bNoEncodingTags;
extern bool bFixLineEndings;
extern bool bAutoStripBlanks;
@ -2255,9 +2255,9 @@ INT_PTR CALLBACK SelectDefEncodingDlgProc(HWND hwnd,UINT umsg,WPARAM wParam,LPAR
CheckDlgButton(hwnd, IDC_USEASREADINGFALLBACK, DlgBtnChk(bUseDefaultForFileEncoding));
CheckDlgButton(hwnd,IDC_NOUNICODEDETECTION, DlgBtnChk(bSkipUnicodeDetection));
CheckDlgButton(hwnd, IDC_NOANSICPDETECTION, DlgBtnChk(bSkipANSICodePageDetection));
CheckDlgButton(hwnd,IDC_ASCIIASUTF8, DlgBtnChk(bLoadASCIIasUTF8));
CheckDlgButton(hwnd,IDC_NFOASOEM, DlgBtnChk(bLoadNFOasOEM));
CheckDlgButton(hwnd,IDC_ENCODINGFROMFILEVARS, DlgBtnChk(bNoEncodingTags));
CheckDlgButton(hwnd,IDC_ASCIIASUTF8, DlgBtnChk(g_bLoadASCIIasUTF8));
CheckDlgButton(hwnd,IDC_NFOASOEM, DlgBtnChk(g_bLoadNFOasOEM));
CheckDlgButton(hwnd,IDC_ENCODINGFROMFILEVARS, DlgBtnChk(g_bNoEncodingTags));
CenterDlgInParent(hwnd);
}
@ -2277,9 +2277,9 @@ INT_PTR CALLBACK SelectDefEncodingDlgProc(HWND hwnd,UINT umsg,WPARAM wParam,LPAR
bUseDefaultForFileEncoding = (IsDlgButtonChecked(hwnd, IDC_USEASREADINGFALLBACK) == BST_CHECKED);
bSkipUnicodeDetection = (IsDlgButtonChecked(hwnd,IDC_NOUNICODEDETECTION) == BST_CHECKED);
bSkipANSICodePageDetection = (IsDlgButtonChecked(hwnd, IDC_NOANSICPDETECTION) == BST_CHECKED);
bLoadASCIIasUTF8 = (IsDlgButtonChecked(hwnd,IDC_ASCIIASUTF8) == BST_CHECKED);
bLoadNFOasOEM = (IsDlgButtonChecked(hwnd,IDC_NFOASOEM) == BST_CHECKED);
bNoEncodingTags = (IsDlgButtonChecked(hwnd,IDC_ENCODINGFROMFILEVARS) == BST_CHECKED);
g_bLoadASCIIasUTF8 = (IsDlgButtonChecked(hwnd,IDC_ASCIIASUTF8) == BST_CHECKED);
g_bLoadNFOasOEM = (IsDlgButtonChecked(hwnd,IDC_NFOASOEM) == BST_CHECKED);
g_bNoEncodingTags = (IsDlgButtonChecked(hwnd,IDC_ENCODINGFROMFILEVARS) == BST_CHECKED);
EndDialog(hwnd,IDOK);
}
}

View File

@ -92,10 +92,10 @@ extern bool bAutoStripBlanks;
// Default Codepage and Character Set
extern int g_iDefaultNewFileEncoding;
extern int g_iDefaultCharSet;
extern bool bLoadASCIIasUTF8;
extern bool bForceLoadASCIIasUTF8;
extern bool bLoadNFOasOEM;
extern bool bNoEncodingTags;
extern bool g_bLoadASCIIasUTF8;
extern bool g_bForceLoadASCIIasUTF8;
extern bool g_bLoadNFOasOEM;
extern bool g_bNoEncodingTags;
extern bool g_bUseLimitedAutoCCharSet;
extern bool g_bIsCJKInputCodePage;
@ -1053,7 +1053,7 @@ bool EditLoadFile(
}
bool bNfoDizDetected = false;
if (bLoadNFOasOEM)
if (g_bLoadNFOasOEM)
{
if (lpszExt && !(StringCchCompareXI(lpszExt,L".nfo") && StringCchCompareXI(lpszExt,L".diz")))
bNfoDizDetected = true;
@ -1073,7 +1073,7 @@ bool EditLoadFile(
bool const bIsUnicode = Encoding_IsUTF8(iAnalyzedEncoding) || Encoding_IsUNICODE(iAnalyzedEncoding);
if (iAnalyzedEncoding == CPI_ASCII_7BIT) {
iAnalyzedEncoding = bLoadASCIIasUTF8 ? CPI_UTF8 : iPreferedEncoding; // stay on prefered
iAnalyzedEncoding = g_bLoadASCIIasUTF8 ? CPI_UTF8 : iPreferedEncoding; // stay on prefered
}
else {
if ((bSkipUTFDetection && bIsUnicode) || (bSkipANSICPDetection && !bIsUnicode)) {
@ -1083,7 +1083,7 @@ bool EditLoadFile(
}
// --------------------------------------------------------------------------
int iForcedEncoding = bForceLoadASCIIasUTF8 ? CPI_UTF8 : Encoding_SrcCmdLn(CPI_GET);
int iForcedEncoding = g_bForceLoadASCIIasUTF8 ? CPI_UTF8 : Encoding_SrcCmdLn(CPI_GET);
if (Encoding_IsNONE(iForcedEncoding) && bNfoDizDetected) {
iForcedEncoding = g_DOSEncoding;
}
@ -1118,7 +1118,7 @@ bool EditLoadFile(
if (cbData == 0) {
FileVars_Init(NULL,0,&fvCurFile);
*iEOLMode = g_iDefaultEOLMode;
*iEncoding = !Encoding_IsNONE(iForcedEncoding) ? iForcedEncoding : (bLoadASCIIasUTF8 ? CPI_UTF8 : iPreferedEncoding);
*iEncoding = !Encoding_IsNONE(iForcedEncoding) ? iForcedEncoding : (g_bLoadASCIIasUTF8 ? CPI_UTF8 : iPreferedEncoding);
EditSetNewText(hwnd,"",0);
SendMessage(hwnd,SCI_SETEOLMODE,g_iDefaultEOLMode,0);
FreeMem(lpData);
@ -1183,16 +1183,17 @@ bool EditLoadFile(
FileVars_Init(lpData,cbData,&fvCurFile);
// === UTF-8 ===
bool const bHardRulesUTF8 = Encoding_IsUTF8(iForcedEncoding) || (FileVars_IsUTF8(&fvCurFile) && !bNoEncodingTags);
bool const bHardRulesUTF8 = Encoding_IsUTF8(iForcedEncoding) || (FileVars_IsUTF8(&fvCurFile) && !g_bNoEncodingTags);
bool const bForcedNonUTF8 = !Encoding_IsNONE(iForcedEncoding) && !Encoding_IsUTF8(iForcedEncoding);
bool const bValidUTF8 = IsValidUTF8(lpData, cbData);
bool const bAnalysisUTF8 = Encoding_IsUTF8(iAnalyzedEncoding) && bIsReliable;
bool const bSoftHintUTF8 = (Encoding_IsUTF8(iPreferedEncoding) || bLoadASCIIasUTF8);
bool const bSoftHintUTF8 = Encoding_IsUTF8(iAnalyzedEncoding) || Encoding_IsUTF8(iPreferedEncoding); // non-reliable analysis = soft-hint
bool const bRejectUTF8 = bSkipUTFDetection || bForcedNonUTF8 || (FileVars_IsNonUTF8(&fvCurFile) && !bNoEncodingTags);
bool const bRejectUTF8 = bSkipUTFDetection || bForcedNonUTF8 || (FileVars_IsNonUTF8(&fvCurFile) && !g_bNoEncodingTags);
if (bHardRulesUTF8 || (!bRejectUTF8 && bValidUTF8 && (bIsUTF8Sig || bAnalysisUTF8 || bSoftHintUTF8)))
//if (bHardRulesUTF8 || (!bRejectUTF8 && bValidUTF8 && (bIsUTF8Sig || bAnalysisUTF8)))
if (bHardRulesUTF8 || (!bRejectUTF8 && bValidUTF8 && (bIsUTF8Sig || bAnalysisUTF8 || bSoftHintUTF8))) // soft-hint = prefer UTF-8
{
EditSetNewText(hwnd,"",0);
if (bIsUTF8Sig) {
@ -7819,7 +7820,7 @@ void EditSetBookmarkList(HWND hwnd, LPCWSTR pszBookMarks)
//
// _SetFileVars()
//
extern bool bNoEncodingTags;
extern bool g_bNoEncodingTags;
extern int g_flagNoFileVariables;
static void __fastcall _SetFileVars(char* lpData, char* tch, LPFILEVARS lpfv)
@ -7866,7 +7867,7 @@ static void __fastcall _SetFileVars(char* lpData, char* tch, LPFILEVARS lpfv)
}
}
if (!IsUTF8Signature(lpData) && !bNoEncodingTags && !bDisableFileVar) {
if (!IsUTF8Signature(lpData) && !g_bNoEncodingTags && !bDisableFileVar) {
if (FileVars_ParseStr(tch, "encoding", lpfv->tchEncoding, COUNTOF(lpfv->tchEncoding)))
lpfv->mask |= FV_ENCODING;
@ -7892,7 +7893,7 @@ bool FileVars_Init(char *lpData, DWORD cbData, LPFILEVARS lpfv) {
char tch[LARGE_BUFFER];
ZeroMemory(lpfv,sizeof(FILEVARS));
if ((g_flagNoFileVariables && bNoEncodingTags) || !lpData || !cbData)
if ((g_flagNoFileVariables && g_bNoEncodingTags) || !lpData || !cbData)
return true;
StringCchCopyNA(tch,COUNTOF(tch),lpData,min_s(cbData + 1,COUNTOF(tch)));

View File

@ -228,10 +228,10 @@ bool bViewEOLs;
bool bUseDefaultForFileEncoding;
bool bSkipUnicodeDetection;
bool bSkipANSICodePageDetection;
bool bLoadASCIIasUTF8 = false;
bool bForceLoadASCIIasUTF8 = false;
bool bLoadNFOasOEM;
bool bNoEncodingTags;
bool g_bLoadASCIIasUTF8 = false;
bool g_bForceLoadASCIIasUTF8 = false;
bool g_bLoadNFOasOEM;
bool g_bNoEncodingTags;
bool bFixLineEndings;
bool bAutoStripBlanks;
int iPrintHeader;
@ -5314,10 +5314,10 @@ LRESULT MsgCommand(HWND hwnd, UINT umsg, WPARAM wParam, LPARAM lParam)
case CMD_RELOADASCIIASUTF8:
{
if (StringCchLenW(g_wchCurFile,COUNTOF(g_wchCurFile))) {
bForceLoadASCIIasUTF8 = true;
g_bForceLoadASCIIasUTF8 = true;
StringCchCopy(tchMaxPathBuffer,COUNTOF(tchMaxPathBuffer),g_wchCurFile);
FileLoad(false, false, true, true, true, tchMaxPathBuffer);
bForceLoadASCIIasUTF8 = false;
g_bForceLoadASCIIasUTF8 = false;
}
}
break;
@ -5327,7 +5327,7 @@ LRESULT MsgCommand(HWND hwnd, UINT umsg, WPARAM wParam, LPARAM lParam)
{
g_bForceCompEncDetection = true;
if (StringCchLenW(g_wchCurFile, COUNTOF(g_wchCurFile))) {
bForceLoadASCIIasUTF8 = false;
g_bForceLoadASCIIasUTF8 = false;
StringCchCopy(tchMaxPathBuffer, COUNTOF(tchMaxPathBuffer), g_wchCurFile);
FileLoad(false, false, true, false, false, tchMaxPathBuffer);
}
@ -5339,13 +5339,13 @@ LRESULT MsgCommand(HWND hwnd, UINT umsg, WPARAM wParam, LPARAM lParam)
{
if (StringCchLenW(g_wchCurFile,COUNTOF(g_wchCurFile))) {
int _fNoFileVariables = g_flagNoFileVariables;
bool _bNoEncodingTags = bNoEncodingTags;
bool _bNoEncodingTags = g_bNoEncodingTags;
g_flagNoFileVariables = 1;
bNoEncodingTags = 1;
g_bNoEncodingTags = 1;
StringCchCopy(tchMaxPathBuffer,COUNTOF(tchMaxPathBuffer),g_wchCurFile);
FileLoad(false,false,true, bSkipUnicodeDetection, bSkipANSICodePageDetection, tchMaxPathBuffer);
g_flagNoFileVariables = _fNoFileVariables;
bNoEncodingTags = _bNoEncodingTags;
g_bNoEncodingTags = _bNoEncodingTags;
}
}
break;
@ -6884,11 +6884,11 @@ void LoadSettings()
bSkipANSICodePageDetection = IniSectionGetBool(pIniSection, L"SkipANSICodePageDetection", true);
bLoadASCIIasUTF8 = IniSectionGetBool(pIniSection, L"LoadASCIIasUTF8", false);
g_bLoadASCIIasUTF8 = IniSectionGetBool(pIniSection, L"LoadASCIIasUTF8", false);
bLoadNFOasOEM = IniSectionGetBool(pIniSection, L"LoadNFOasOEM", true);
g_bLoadNFOasOEM = IniSectionGetBool(pIniSection, L"LoadNFOasOEM", true);
bNoEncodingTags = IniSectionGetBool(pIniSection, L"NoEncodingTags", false);
g_bNoEncodingTags = IniSectionGetBool(pIniSection, L"NoEncodingTags", false);
g_iDefaultEOLMode = clampi(IniSectionGetInt(pIniSection, L"DefaultEOLMode", 0), 0, 2);
@ -7197,9 +7197,9 @@ void SaveSettings(bool bSaveSettingsNow)
IniSectionSetBool(pIniSection, L"UseDefaultForFileEncoding", bUseDefaultForFileEncoding);
IniSectionSetBool(pIniSection, L"SkipUnicodeDetection", bSkipUnicodeDetection);
IniSectionSetBool(pIniSection, L"SkipANSICodePageDetection", bSkipANSICodePageDetection);
IniSectionSetInt(pIniSection, L"LoadASCIIasUTF8", bLoadASCIIasUTF8);
IniSectionSetBool(pIniSection, L"LoadNFOasOEM", bLoadNFOasOEM);
IniSectionSetBool(pIniSection, L"NoEncodingTags", bNoEncodingTags);
IniSectionSetInt(pIniSection, L"LoadASCIIasUTF8", g_bLoadASCIIasUTF8);
IniSectionSetBool(pIniSection, L"LoadNFOasOEM", g_bLoadNFOasOEM);
IniSectionSetBool(pIniSection, L"NoEncodingTags", g_bNoEncodingTags);
IniSectionSetInt(pIniSection, L"DefaultEOLMode", g_iDefaultEOLMode);
IniSectionSetBool(pIniSection, L"FixLineEndings", bFixLineEndings);
IniSectionSetBool(pIniSection, L"FixTrailingBlanks", bAutoStripBlanks);