mirror of
https://github.com/rizonesoft/Notepad3.git
synced 2026-06-14 21:09:05 +08:00
+ rfc: refactoring encoding detection: pure ASCII vs. 7-bit
This commit is contained in:
parent
51d93fa0f9
commit
9d3f190236
10
src/Edit.c
10
src/Edit.c
@ -471,7 +471,7 @@ bool EditIsRecodingNeeded(WCHAR* pszText, int cchLen)
|
||||
return false;
|
||||
}
|
||||
|
||||
DWORD dwFlags = Encoding_GetWCMB_Flags(codepage);
|
||||
DWORD dwFlags = Encoding_GetWCMBFlagsByCodePage(codepage);
|
||||
if (dwFlags != 0) {
|
||||
dwFlags |= (WC_COMPOSITECHECK | WC_DEFAULTCHAR);
|
||||
}
|
||||
@ -1166,6 +1166,7 @@ bool EditLoadFile(
|
||||
bSkipUTFDetection, bSkipANSICPDetection, bForceEncDetection);
|
||||
|
||||
#define IS_ENC_ENFORCED() (!Encoding_IsNONE(encDetection.forcedEncoding))
|
||||
#define IS_ENC_PURE_ASCII() (encDetection.analyzedEncoding == CPI_ASCII_7BIT)
|
||||
|
||||
// --------------------------------------------------------------------------
|
||||
|
||||
@ -1238,7 +1239,6 @@ bool EditLoadFile(
|
||||
bool const bAnalysisUTF8 = Encoding_IsUTF8(encDetection.Encoding);
|
||||
|
||||
bool const bRejectUTF8 = (IS_ENC_ENFORCED() && !bForcedUTF8) || !bValidUTF8 || (!encDetection.bIsUTF8Sig && bSkipUTFDetection);
|
||||
bool const bIsCP_UTF7 = (Encoding_GetCodePage(encDetection.Encoding) == CP_UTF7);
|
||||
|
||||
if (bForcedUTF8 || (!bRejectUTF8 && (encDetection.bIsUTF8Sig || bAnalysisUTF8))) {
|
||||
if (encDetection.bIsUTF8Sig) {
|
||||
@ -1250,8 +1250,8 @@ bool EditLoadFile(
|
||||
status->iEncoding = CPI_UTF8;
|
||||
EditDetectEOLMode(lpData, cbData, status);
|
||||
}
|
||||
} else if (!IS_ENC_ENFORCED() && (bIsCP_UTF7 && encDetection.bIs7BitOnly)) {
|
||||
// load UTF-7/ASCII(7-bit) as ANSI/UTF-8
|
||||
} else if (!IS_ENC_ENFORCED() && IS_ENC_PURE_ASCII()) {
|
||||
// load ASCII(7-bit) as ANSI/UTF-8
|
||||
EditSetNewText(hwnd, lpData, cbData, bClearUndoHistory);
|
||||
status->iEncoding = (Settings.LoadASCIIasUTF8 ? CPI_UTF8 : CPI_ANSI_DEFAULT);
|
||||
EditDetectEOLMode(lpData, cbData, status);
|
||||
@ -1459,7 +1459,7 @@ bool EditSaveFile(
|
||||
lpDataWide, (SizeOfMem(lpDataWide) / sizeof(WCHAR)));
|
||||
|
||||
// dry conversion run
|
||||
DWORD const dwFlags = Encoding_GetWCMB_Flags(uCodePage);
|
||||
DWORD const dwFlags = Encoding_GetWCMBFlagsByCodePage(uCodePage);
|
||||
size_t const cbSizeNeeded = (size_t)WideCharToMultiByteEx(uCodePage, dwFlags, lpDataWide, cbDataWide, NULL, 0, NULL, NULL);
|
||||
size_t const cbDataNew = max(cbSizeNeeded, cbDataWide);
|
||||
|
||||
|
||||
@ -53,7 +53,7 @@ const UINT uCodePageMBCS[] = {
|
||||
// ============================================================================
|
||||
|
||||
|
||||
DWORD Encoding_GetWCMB_Flags(const UINT codePage)
|
||||
DWORD Encoding_GetWCMBFlagsByCodePage(const UINT codePage)
|
||||
{
|
||||
DWORD flags = WC_NO_BEST_FIT_CHARS;
|
||||
for (int k = 0; k < COUNTOF(uCodePageMBCS); k++) {
|
||||
@ -721,7 +721,7 @@ bool Has_UTF16_BE_BOM(const char* pBuf, size_t cnt)
|
||||
}
|
||||
// ============================================================================
|
||||
|
||||
|
||||
#if 0
|
||||
bool IsValidUTF7(const char* pTest, size_t nLength)
|
||||
{
|
||||
if (!pTest) {
|
||||
@ -737,6 +737,7 @@ bool IsValidUTF7(const char* pTest, size_t nLength)
|
||||
}
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
// ============================================================================
|
||||
|
||||
|
||||
|
||||
@ -69,7 +69,7 @@ typedef struct _np2encoding {
|
||||
|
||||
} NP2ENCODING;
|
||||
|
||||
DWORD Encoding_GetWCMB_Flags(const UINT codePage);
|
||||
DWORD Encoding_GetWCMBFlagsByCodePage(const UINT codePage);
|
||||
|
||||
cpi_enc_t Encoding_Current(cpi_enc_t iEncoding); // getter/setter
|
||||
cpi_enc_t Encoding_Forced(cpi_enc_t iEncoding); // getter/setter
|
||||
@ -186,7 +186,6 @@ typedef struct _enc_det_t {
|
||||
float confidence;
|
||||
// flags:
|
||||
bool bIsAnalysisReliable;
|
||||
bool bIs7BitOnly;
|
||||
bool bHasBOM;
|
||||
bool bIsReverse;
|
||||
bool bIsUTF8Sig;
|
||||
@ -196,7 +195,7 @@ typedef struct _enc_det_t {
|
||||
|
||||
} ENC_DET_T;
|
||||
|
||||
#define INIT_ENC_DET_T { CPI_NONE, CPI_NONE, CPI_NONE, CPI_NONE, CPI_NONE, 0.0f, false, false, false, false, false, false, "" }
|
||||
#define INIT_ENC_DET_T { CPI_NONE, CPI_NONE, CPI_NONE, CPI_NONE, CPI_NONE, 0.0f, false, false, false, false, false, "" }
|
||||
|
||||
|
||||
ENC_DET_T Encoding_DetectEncoding(LPWSTR pszFile, const char* lpData, const size_t cbData,
|
||||
|
||||
@ -626,7 +626,7 @@ constexpr cpi_enc_t _MapStdEncodingString2CPI(const char* encStrg, float* pConfi
|
||||
|
||||
if (encStrg && (encStrg[0] != '\0')) {
|
||||
// preprocessing: special cases
|
||||
if (_stricmp(encStrg, "ascii") == 0) {
|
||||
if (StrCmpICA(encStrg, "ascii") == 0) {
|
||||
cpiEncoding = CPI_ASCII_7BIT;
|
||||
} else {
|
||||
cpiEncoding = Encoding_MatchA(encStrg);
|
||||
@ -1270,7 +1270,6 @@ extern "C" ENC_DET_T Encoding_DetectEncoding(LPWSTR pszFile, const char* lpData,
|
||||
encDetRes.bHasBOM = (bBOM_LE || bBOM_BE);
|
||||
encDetRes.bIsReverse = bBOM_BE;
|
||||
encDetRes.bIsUTF8Sig = ((cbData >= 3) ? IsUTF8Signature(lpData) : false);
|
||||
encDetRes.bIs7BitOnly = IsValidUTF7(lpData, cbData);
|
||||
encDetRes.bValidUTF8 = IsValidUTF8(lpData, cbData);
|
||||
|
||||
if (!IS_ENC_ENFORCED()) {
|
||||
@ -1298,6 +1297,7 @@ extern "C" ENC_DET_T Encoding_DetectEncoding(LPWSTR pszFile, const char* lpData,
|
||||
Encoding_AnalyzeText(lpData, cbNbytes4Analysis, &encDetRes, iAnalyzeHint);
|
||||
// ---------------------------------------------------------------------------
|
||||
}
|
||||
//~encDetRes.bIs7BitOnly = (encDetRes.analyzedEncoding == CPI_ASCII_7BIT) || IsValidUTF7(lpData, cbData);
|
||||
|
||||
if (encDetRes.analyzedEncoding == CPI_NONE) {
|
||||
encDetRes.analyzedEncoding = iAnalyzeHint;
|
||||
|
||||
Loading…
Reference in New Issue
Block a user