diff --git a/src/Edit.c b/src/Edit.c index 35d20e6e6..4da6e06da 100644 --- a/src/Edit.c +++ b/src/Edit.c @@ -1023,32 +1023,36 @@ bool EditLoadFile( size_t const cbNbytes4Analysis = (cbData < 200000L) ? cbData : 200000L; - // if not skipped, analyze bytes - bool bIsReliable = false; - - int const iAnalyzedEncoding = (bSkipANSICPDetection && !g_bForceCompEncDetection) ? CPI_NONE : - Encoding_Analyze(lpData, cbNbytes4Analysis, &bIsReliable); - int const iFileEncWeak = Encoding_SrcWeak(CPI_GET); + int iPreferedEncoding = (bPreferOEM) ? g_DOSEncoding : + ((bUseDefaultForFileEncoding || (cbNbytes4Analysis < 1)) ? g_iDefaultNewFileEncoding : CPI_ANSI_DEFAULT); + + // -------------------------------------------------------------------------- + bool bIsReliable = false; + int iAnalyzedEncoding = (bSkipANSICPDetection && !g_bForceCompEncDetection) ? CPI_NONE : + Encoding_Analyze(lpData, cbNbytes4Analysis, iPreferedEncoding, &bIsReliable); + // correct analysis based on preferred encoding + if (iAnalyzedEncoding == CPI_ANSI_DEFAULT) { + iAnalyzedEncoding = iPreferedEncoding; // stay on prefered + } + // -------------------------------------------------------------------------- + int iForcedEncoding = bLoadASCIIasUTF8 ? CPI_UTF8 : Encoding_SrcCmdLn(CPI_GET); if (g_bForceCompEncDetection && !Encoding_IsNONE(iAnalyzedEncoding) && bIsReliable) { iForcedEncoding = iAnalyzedEncoding; } + // -------------------------------------------------------------------------- // choose best encoding guess - int iPreferedEncoding = CPI_NONE; if (!Encoding_IsNONE(iForcedEncoding)) iPreferedEncoding = iForcedEncoding; - else if (iFileEncWeak != CPI_NONE) - iPreferedEncoding = iFileEncWeak; else if (Encoding_IsUNICODE(iAnalyzedEncoding) && !bSkipUTFDetection) iPreferedEncoding = iAnalyzedEncoding; + else if (iFileEncWeak != CPI_NONE) + iPreferedEncoding = iFileEncWeak; else if (!Encoding_IsNONE(iAnalyzedEncoding)) iPreferedEncoding = iAnalyzedEncoding; - else - iPreferedEncoding = (bPreferOEM) ? g_DOSEncoding : - (bUseDefaultForFileEncoding ? g_iDefaultNewFileEncoding : CPI_ANSI_DEFAULT); bool bBOM = false; diff --git a/src/Encoding.c b/src/Encoding.c index 833e5693c..db9b0f1cd 100644 --- a/src/Encoding.c +++ b/src/Encoding.c @@ -40,240 +40,6 @@ extern HMODULE g_hLngResContainer; -//============================================================================= - -#define ENC_PARSE_NAM_ANSI "ansi,system,ascii," -#define ENC_PARSE_NAM_OEM "oem,oem," -#define ENC_PARSE_NAM_UTF16LEBOM "" -#define ENC_PARSE_NAM_UTF16BEBOM "" -#define ENC_PARSE_NAM_UTF16LE "utf-16,utf16,unicode," -#define ENC_PARSE_NAM_UTF16BE "utf-16be,utf16be,unicodebe," -#define ENC_PARSE_NAM_UTF8 "utf-8,utf8," -#define ENC_PARSE_NAM_UTF8SIG "utf-8,utf8," -#define ENC_PARSE_NAM_UTF7 "utf-7,utf7," -#define ENC_PARSE_NAM_DOS_720 "DOS-720,dos720," -#define ENC_PARSE_NAM_ISO_8859_6 "iso-8859-6,iso88596,arabic,csisolatinarabic,ecma114,isoir127," -#define ENC_PARSE_NAM_MAC_ARABIC "x-mac-arabic,xmacarabic," -#define ENC_PARSE_NAM_WIN_1256 "windows-1256,windows1256,cp1256" -#define ENC_PARSE_NAM_DOS_775 "ibm775,ibm775,cp500," -#define ENC_PARSE_NAM_ISO_8859_4 "iso-8859-4,iso88594,csisolatin4,isoir110,l4,latin4," -#define ENC_PARSE_NAM_WIN_1257 "windows-1257,windows1257," -#define ENC_PARSE_NAM_DOS_852 "ibm852,ibm852,cp852," -#define ENC_PARSE_NAM_ISO_8859_2 "iso-8859-2,iso88592,csisolatin2,isoir101,latin2,l2," -#define ENC_PARSE_NAM_MAC_CENTRAL_EUROP "x-mac-ce,xmacce," -#define ENC_PARSE_NAM_WIN_1250 "windows-1250,windows1250,xcp1250," -#define ENC_PARSE_NAM_GBK_2312 "gb2312,gb2312,chinese,cngb,csgb2312,csgb231280,gb231280,gbk," -#define ENC_PARSE_NAM_MAC_ZH_CN "x-mac-chinesesimp,xmacchinesesimp," -#define ENC_PARSE_NAM_BIG5 "big5,big5,cnbig5,csbig5,xxbig5," -#define ENC_PARSE_NAM_MAC_ZH_TW "x-mac-chinesetrad,xmacchinesetrad," -#define ENC_PARSE_NAM_MAC_CROATIAN "x-mac-croatian,xmaccroatian," -#define ENC_PARSE_NAM_DOS_866 "cp866,cp866,ibm866," -#define ENC_PARSE_NAM_ISO_8859_5 "iso-8859-5,iso88595,csisolatin5,csisolatincyrillic,cyrillic,isoir144," -#define ENC_PARSE_NAM_KOI8_R "koi8-r,koi8r,cskoi8r,koi,koi8," -#define ENC_PARSE_NAM_KOI8_U "koi8-u,koi8u,koi8ru," -#define ENC_PARSE_NAM_MAC_CYRILLIC "x-mac-cyrillic,xmaccyrillic," -#define ENC_PARSE_NAM_WIN_1251 "windows-1251,windows1251,xcp1251," -#define ENC_PARSE_NAM_ISO_8859_13 "iso-8859-13,iso885913," -#define ENC_PARSE_NAM_DOS_863 "ibm863,ibm863," -#define ENC_PARSE_NAM_DOS_737 "ibm737,ibm737," -#define ENC_PARSE_NAM_ISO_8859_7 "iso-8859-7,iso88597,csisolatingreek,ecma118,elot928,greek,greek8,isoir126," -#define ENC_PARSE_NAM_MAC_GREEK "x-mac-greek,xmacgreek," -#define ENC_PARSE_NAM_WIN_1253 "windows-1253,windows1253," -#define ENC_PARSE_NAM_DOS_869 "ibm869,ibm869," -#define ENC_PARSE_NAM_DOS_862 "DOS-862,dos862," -#define ENC_PARSE_NAM_ISO_8859_8_I "iso-8859-8-i,iso88598i,logical," -#define ENC_PARSE_NAM_ISO_8859_8 "iso-8859-8,iso88598,csisolatinhebrew,hebrew,isoir138,visual," -#define ENC_PARSE_NAM_MAC_HEBREW "x-mac-hebrew,xmachebrew," -#define ENC_PARSE_NAM_WIN_1255 "windows-1255,windows1255," -#define ENC_PARSE_NAM_DOS_861 "ibm861,ibm861," -#define ENC_PARSE_NAM_MAC_ICELANDIC "x-mac-icelandic,xmacicelandic," -#define ENC_PARSE_NAM_MAC_JAPANESE "x-mac-japanese,xmacjapanese," -#define ENC_PARSE_NAM_SHIFT_JIS "shift_jis,shiftjis,shiftjs,csshiftjis,cswindows31j,mskanji,xmscp932,xsjis," -#define ENC_PARSE_NAM_MAC_KOREAN "x-mac-korean,xmackorean," -#define ENC_PARSE_NAM_WIN_949 "windows-949,windows949,ksc56011987,csksc5601,euckr,isoir149,korean,ksc56011989" -#define ENC_PARSE_NAM_ISO_8859_3 "iso-8859-3,iso88593,latin3,isoir109,l3," -#define ENC_PARSE_NAM_ISO_8859_15 "iso-8859-15,iso885915,latin9,l9," -#define ENC_PARSE_NAM_DOS_865 "ibm865,ibm865," -#define ENC_PARSE_NAM_DOS_437 "ibm437,ibm437,437,cp437,cspc8,codepage437," -#define ENC_PARSE_NAM_DOS_858 "ibm858,ibm858,ibm00858," -#define ENC_PARSE_NAM_DOS_860 "ibm860,ibm860," -#define ENC_PARSE_NAM_MAC_ROMANIAN "x-mac-romanian,xmacromanian," -#define ENC_PARSE_NAM_MAC_THAI "x-mac-thai,xmacthai," -#define ENC_PARSE_NAM_WIN_874 "windows-874,windows874,dos874,iso885911,tis620," -#define ENC_PARSE_NAM_DOS_857 "ibm857,ibm857," -#define ENC_PARSE_NAM_ISO_8859_9 "iso-8859-9,iso88599,latin5,isoir148,l5," -#define ENC_PARSE_NAM_MAC_TURKISH "x-mac-turkish,xmacturkish," -#define ENC_PARSE_NAM_WIN_1254 "windows-1254,windows1254," -#define ENC_PARSE_NAM_MAC_UKRAINIAN "x-mac-ukrainian,xmacukrainian," -#define ENC_PARSE_NAM_WIN_1258 "windows-1258,windows-258," -#define ENC_PARSE_NAM_DOS_850 "ibm850,ibm850," -#define ENC_PARSE_NAM_ISO_8859_1 "iso-8859-1,iso88591,cp819,latin1,ibm819,isoir100,latin1,l1," -#define ENC_PARSE_NAM_MAC_WESTERN_EUROP "macintosh,macintosh," -#define ENC_PARSE_NAM_WIN_1252 "windows-1252,windows1252,cp367,cp819,ibm367,us,xansi," -#define ENC_PARSE_NAM_IBM_EBCDIC_US "ebcdic-cp-us,ebcdiccpus,ebcdiccpca,ebcdiccpwt,ebcdiccpnl,ibm037,cp037," -#define ENC_PARSE_NAM_IBM_EBCDIC_INT "x-ebcdic-international,xebcdicinternational," -#define ENC_PARSE_NAM_IBM_EBCDIC_GR "x-EBCDIC-GreekModern,xebcdicgreekmodern," -#define ENC_PARSE_NAM_IBM_EBCDIC_LAT_5 "CP1026,cp1026,csibm1026,ibm1026," -#define ENC_PARSE_NAM_GB18030 "gb18030,gb18030," -#define ENC_PARSE_NAM_EUC_JAPANESE "euc-jp,eucjp,xeuc,xeucjp," -#define ENC_PARSE_NAM_EUC_KOREAN "euc-kr,euckr,cseuckr," -#define ENC_PARSE_NAM_ISO_2022_CN "iso-2022-cn,iso2022cn," -#define ENC_PARSE_NAM_HZ_GB2312 "hz-gb-2312,hzgb2312,hz," -#define ENC_PARSE_NAM_ISO_2022_JP "iso-2022-jp,iso2022jp," -#define ENC_PARSE_NAM_ISO_2022_KR "iso-2022-kr,iso2022kr,csiso2022kr," -#define ENC_PARSE_NAM_X_CHINESE_CNS "x-Chinese-CNS,xchinesecns," - -//============================================================================= - -static NP2ENCODING g_Encodings[] = { - /* 000 */{ NCP_ANSI | NCP_RECODE, CP_ACP, ENC_PARSE_NAM_ANSI, IDS_ENC_ANSI, L"" }, - /* 001 */{ NCP_OEM | NCP_RECODE, CP_OEMCP, ENC_PARSE_NAM_OEM, IDS_ENC_OEM, L"" }, - /* 002 */{ NCP_UNICODE | NCP_UNICODE_BOM, CP_UTF8, ENC_PARSE_NAM_UTF16LEBOM, IDS_ENC_UTF16LEBOM, L"" }, - /* 003 */{ NCP_UNICODE | NCP_UNICODE_REVERSE | NCP_UNICODE_BOM, CP_UTF8, ENC_PARSE_NAM_UTF16BEBOM, IDS_ENC_UTF16BEBOM, L"" }, - /* 004 */{ NCP_UNICODE | NCP_RECODE, CP_UTF8, ENC_PARSE_NAM_UTF16LE, IDS_ENC_UTF16LE, L"" }, - /* 005 */{ NCP_UNICODE | NCP_UNICODE_REVERSE | NCP_RECODE, CP_UTF8, ENC_PARSE_NAM_UTF16BE, IDS_ENC_UTF16BE, L"" }, - /* 006 */{ NCP_UTF8 | NCP_RECODE, CP_UTF8, ENC_PARSE_NAM_UTF8, IDS_ENC_UTF8, L"" }, - /* 007 */{ NCP_UTF8 | NCP_UTF8_SIGN, CP_UTF8, ENC_PARSE_NAM_UTF8SIG, IDS_ENC_UTF8SIG, L"" }, - /* 008 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, CP_UTF7, ENC_PARSE_NAM_UTF7, IDS_ENC_UTF7, L"" }, - /* 009 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 720, ENC_PARSE_NAM_DOS_720, IDS_ENC_DOS_720, L"" }, - /* 010 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 28596, ENC_PARSE_NAM_ISO_8859_6, IDS_ENC_ISO_8859_6, L"" }, - /* 011 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10004, ENC_PARSE_NAM_MAC_ARABIC, IDS_ENC_MAC_ARABIC, L"" }, - /* 012 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 1256, ENC_PARSE_NAM_WIN_1256, IDS_ENC_WIN_1256, L"" }, - /* 013 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 775, ENC_PARSE_NAM_DOS_775, IDS_ENC_DOS_775, L"" }, - /* 014 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 28594, ENC_PARSE_NAM_ISO_8859_4, IDS_ENC_ISO_8859_4, L"" }, - /* 015 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 1257, ENC_PARSE_NAM_WIN_1257, IDS_ENC_WIN_1257, L"" }, - /* 016 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 852, ENC_PARSE_NAM_DOS_852, IDS_ENC_DOS_852, L"" }, - /* 017 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 28592, ENC_PARSE_NAM_ISO_8859_2, IDS_ENC_ISO_8859_2, L"" }, - /* 018 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10029, ENC_PARSE_NAM_MAC_CENTRAL_EUROP, IDS_ENC_MAC_CENTRAL_EUROP, L"" }, - /* 019 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 1250, ENC_PARSE_NAM_WIN_1250, IDS_ENC_WIN_1250, L"" }, - /* 020 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 936, ENC_PARSE_NAM_GBK_2312, IDS_ENC_GBK_2312, L"" }, - /* 021 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10008, ENC_PARSE_NAM_MAC_ZH_CN, IDS_ENC_MAC_ZH_CN, L"" }, - /* 022 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 950, ENC_PARSE_NAM_BIG5, IDS_ENC_BIG5, L"" }, - /* 023 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10002, ENC_PARSE_NAM_MAC_ZH_TW, IDS_ENC_MAC_ZH_TW, L"" }, - /* 024 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10082, ENC_PARSE_NAM_MAC_CROATIAN, IDS_ENC_MAC_CROATIAN, L"" }, - /* 025 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 866, ENC_PARSE_NAM_DOS_866, IDS_ENC_DOS_866, L"" }, - /* 026 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 28595, ENC_PARSE_NAM_ISO_8859_5, IDS_ENC_ISO_8859_5, L"" }, - /* 027 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 20866, ENC_PARSE_NAM_KOI8_R, IDS_ENC_KOI8_R, L"" }, - /* 028 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 21866, ENC_PARSE_NAM_KOI8_U, IDS_ENC_KOI8_U, L"" }, - /* 029 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10007, ENC_PARSE_NAM_MAC_CYRILLIC, IDS_ENC_MAC_CYRILLIC, L"" }, - /* 030 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 1251, ENC_PARSE_NAM_WIN_1251, IDS_ENC_WIN_1251, L"" }, - /* 031 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 28603, ENC_PARSE_NAM_ISO_8859_13, IDS_ENC_ISO_8859_13, L"" }, - /* 032 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 863, ENC_PARSE_NAM_DOS_863, IDS_ENC_DOS_863, L"" }, - /* 033 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 737, ENC_PARSE_NAM_DOS_737, IDS_ENC_DOS_737, L"" }, - /* 034 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 28597, ENC_PARSE_NAM_ISO_8859_7, IDS_ENC_ISO_8859_7, L"" }, - /* 035 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10006, ENC_PARSE_NAM_MAC_GREEK, IDS_ENC_MAC_GREEK, L"" }, - /* 036 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 1253, ENC_PARSE_NAM_WIN_1253, IDS_ENC_WIN_1253, L"" }, - /* 037 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 869, ENC_PARSE_NAM_DOS_869, IDS_ENC_DOS_869, L"" }, - /* 038 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 862, ENC_PARSE_NAM_DOS_862, IDS_ENC_DOS_862, L"" }, - /* 039 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 38598, ENC_PARSE_NAM_ISO_8859_8_I, IDS_ENC_ISO_8859_8_I, L"" }, - /* 040 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 28598, ENC_PARSE_NAM_ISO_8859_8, IDS_ENC_ISO_8859_8, L"" }, - /* 041 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10005, ENC_PARSE_NAM_MAC_HEBREW, IDS_ENC_MAC_HEBREW, L"" }, - /* 042 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 1255, ENC_PARSE_NAM_WIN_1255, IDS_ENC_WIN_1255, L"" }, - /* 043 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 861, ENC_PARSE_NAM_DOS_861, IDS_ENC_DOS_861, L"" }, - /* 044 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10079, ENC_PARSE_NAM_MAC_ICELANDIC, IDS_ENC_MAC_ICELANDIC, L"" }, - /* 045 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10001, ENC_PARSE_NAM_MAC_JAPANESE, IDS_ENC_MAC_JAPANESE, L"" }, - /* 046 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 932, ENC_PARSE_NAM_SHIFT_JIS, IDS_ENC_SHIFT_JIS, L"" }, - /* 047 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10003, ENC_PARSE_NAM_MAC_KOREAN, IDS_ENC_MAC_KOREAN, L"" }, - /* 048 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 949, ENC_PARSE_NAM_WIN_949, IDS_ENC_WIN_949, L"" }, - /* 049 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 28593, ENC_PARSE_NAM_ISO_8859_3, IDS_ENC_ISO_8859_3, L"" }, - /* 050 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 28605, ENC_PARSE_NAM_ISO_8859_15, IDS_ENC_ISO_8859_15, L"" }, - /* 051 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 865, ENC_PARSE_NAM_DOS_865, IDS_ENC_DOS_865, L"" }, - /* 052 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 437, ENC_PARSE_NAM_DOS_437, IDS_ENC_DOS_437, L"" }, - /* 053 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 858, ENC_PARSE_NAM_DOS_858, IDS_ENC_DOS_858, L"" }, - /* 054 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 860, ENC_PARSE_NAM_DOS_860, IDS_ENC_DOS_860, L"" }, - /* 055 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10010, ENC_PARSE_NAM_MAC_ROMANIAN, IDS_ENC_MAC_ROMANIAN, L"" }, - /* 056 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10021, ENC_PARSE_NAM_MAC_THAI, IDS_ENC_MAC_THAI, L"" }, - /* 057 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 874, ENC_PARSE_NAM_WIN_874, IDS_ENC_WIN_874, L"" }, - /* 058 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 857, ENC_PARSE_NAM_DOS_857, IDS_ENC_DOS_857, L"" }, - /* 059 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 28599, ENC_PARSE_NAM_ISO_8859_9, IDS_ENC_ISO_8859_9, L"" }, - /* 060 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10081, ENC_PARSE_NAM_MAC_TURKISH, IDS_ENC_MAC_TURKISH, L"" }, - /* 061 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 1254, ENC_PARSE_NAM_WIN_1254, IDS_ENC_WIN_1254, L"" }, - /* 062 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10017, ENC_PARSE_NAM_MAC_UKRAINIAN, IDS_ENC_MAC_UKRAINIAN, L"" }, - /* 063 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 1258, ENC_PARSE_NAM_WIN_1258, IDS_ENC_WIN_1258, L"" }, - /* 064 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 850, ENC_PARSE_NAM_DOS_850, IDS_ENC_DOS_850, L"" }, - /* 065 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 28591, ENC_PARSE_NAM_ISO_8859_1, IDS_ENC_ISO_8859_1, L"" }, - /* 066 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10000, ENC_PARSE_NAM_MAC_WESTERN_EUROP, IDS_ENC_MAC_WESTERN_EUROP, L"" }, - /* 067 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 1252, ENC_PARSE_NAM_WIN_1252, IDS_ENC_WIN_1252, L"" }, - /* 068 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 37, ENC_PARSE_NAM_IBM_EBCDIC_US, IDS_ENC_IBM_EBCDIC_US, L"" }, - /* 069 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 500, ENC_PARSE_NAM_IBM_EBCDIC_INT, IDS_ENC_IBM_EBCDIC_INT, L"" }, - /* 070 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 875, ENC_PARSE_NAM_IBM_EBCDIC_GR, IDS_ENC_IBM_EBCDIC_GR, L"" }, - /* 071 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 1026, ENC_PARSE_NAM_IBM_EBCDIC_LAT_5, IDS_ENC_IBM_EBCDIC_LAT_5, L"" }, - /* 072 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 54936, ENC_PARSE_NAM_GB18030, IDS_ENC_GB18030, L"" }, // Chinese Simplified (GB18030) - /* 073 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 51932, ENC_PARSE_NAM_EUC_JAPANESE, IDS_ENC_EUC_JAPANESE, L"" }, // Japanese (EUC) - /* 074 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 51949, ENC_PARSE_NAM_EUC_KOREAN, IDS_ENC_EUC_KOREAN, L"" }, // Korean (EUC) - /* 075 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 50229, ENC_PARSE_NAM_ISO_2022_CN, IDS_ENC_ISO_2022_CN, L"" }, // Chinese Traditional (ISO-2022-CN) - /* 076 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 52936, ENC_PARSE_NAM_HZ_GB2312, IDS_ENC_HZ_GB2312, L"" }, // Chinese Simplified (HZ-GB2312) - /* 077 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 50220, ENC_PARSE_NAM_ISO_2022_JP, IDS_ENC_ISO_2022_JP, L"" }, // Japanese (JIS) - /* 078 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 50225, ENC_PARSE_NAM_ISO_2022_KR, IDS_ENC_ISO_2022_KR, L"" }, // Korean (ISO-2022-KR) - /* 079 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 20000, ENC_PARSE_NAM_X_CHINESE_CNS, IDS_ENC_X_CHINESE_CNS, L"" } // Chinese Traditional (CNS) - - /* 073 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 870, "CP870,cp870,ebcdiccproece,ebcdiccpyu,csibm870,ibm870,", 00000, L"" }, // IBM EBCDIC (Multilingual Latin-2) - /* 074 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 1047, "IBM01047,ibm01047,", 00000, L"" }, // IBM EBCDIC (Open System Latin-1) - /* 075 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 1140, "x-ebcdic-cp-us-euro,xebcdiccpuseuro,", 00000, L"" }, // IBM EBCDIC (US-Canada-Euro) - /* 076 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 1141, "x-ebcdic-germany-euro,xebcdicgermanyeuro,", 00000, L"" }, // IBM EBCDIC (Germany-Euro) - /* 077 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 1142, "x-ebcdic-denmarknorway-euro,xebcdicdenmarknorwayeuro,", 00000, L"" }, // IBM EBCDIC (Denmark-Norway-Euro) - /* 078 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 1143, "x-ebcdic-finlandsweden-euro,xebcdicfinlandswedeneuro,", 00000, L"" }, // IBM EBCDIC (Finland-Sweden-Euro) - /* 079 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 1144, "x-ebcdic-italy-euro,xebcdicitalyeuro,", 00000, L"" }, // IBM EBCDIC (Italy-Euro) - /* 080 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 1145, "x-ebcdic-spain-euro,xebcdicspaineuro,", 00000, L"" }, // IBM EBCDIC (Spain-Latin America-Euro) - /* 081 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 1146, "x-ebcdic-uk-euro,xebcdicukeuro,", 00000, L"" }, // IBM EBCDIC (UK-Euro) - /* 082 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 1147, "x-ebcdic-france-euro,xebcdicfranceeuro,", 00000, L"" }, // IBM EBCDIC (France-Euro) - /* 083 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 1148, "x-ebcdic-international-euro,xebcdicinternationaleuro,", 00000, L"" }, // IBM EBCDIC (International-Euro) - /* 084 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 1149, "x-ebcdic-icelandic-euro,xebcdicicelandiceuro,", 00000, L"" }, // IBM EBCDIC (Icelandic-Euro) - /* 085 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 1361, "johab,johab,", 00000, L"" }, // Korean (Johab) - /* 086 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20273, "x-EBCDIC-Germany,xebcdicgermany,", 00000, L"" }, // IBM EBCDIC (Germany) - /* 087 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20277, "x-EBCDIC-DenmarkNorway,xebcdicdenmarknorway,ebcdiccpdk,ebcdiccpno,", 00000, L"" }, // IBM EBCDIC (Denmark-Norway) - /* 088 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20278, "x-EBCDIC-FinlandSweden,xebcdicfinlandsweden,ebcdicpfi,ebcdiccpse,", 00000, L"" }, // IBM EBCDIC (Finland-Sweden) - /* 089 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20280, "x-EBCDIC-Italy,xebcdicitaly,", 00000, L"" }, // IBM EBCDIC (Italy) - /* 090 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20284, "x-EBCDIC-Spain,xebcdicspain,ebcdiccpes,", 00000, L"" }, // IBM EBCDIC (Spain-Latin America) - /* 091 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20285, "x-EBCDIC-UK,xebcdicuk,ebcdiccpgb,", 00000, L"" }, // IBM EBCDIC (UK) - /* 092 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20290, "x-EBCDIC-JapaneseKatakana,xebcdicjapanesekatakana,", 00000, L"" }, // IBM EBCDIC (Japanese Katakana) - /* 093 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20297, "x-EBCDIC-France,xebcdicfrance,ebcdiccpfr,", 00000, L"" }, // IBM EBCDIC (France) - /* 094 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20420, "x-EBCDIC-Arabic,xebcdicarabic,ebcdiccpar1,", 00000, L"" }, // IBM EBCDIC (Arabic) - /* 095 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20423, "x-EBCDIC-Greek,xebcdicgreek,ebcdiccpgr,", 00000, L"" }, // IBM EBCDIC (Greek) - /* 096 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20424, "x-EBCDIC-Hebrew,xebcdichebrew,ebcdiccphe,", 00000, L"" }, // IBM EBCDIC (Hebrew) - /* 097 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20833, "x-EBCDIC-KoreanExtended,xebcdickoreanextended,", 00000, L"" }, // IBM EBCDIC (Korean Extended) - /* 098 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20838, "x-EBCDIC-Thai,xebcdicthai,ibmthai,csibmthai,", 00000, L"" }, // IBM EBCDIC (Thai) - /* 099 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20871, "x-EBCDIC-Icelandic,xebcdicicelandic,ebcdiccpis,", 00000, L"" }, // IBM EBCDIC (Icelandic) - /* 100 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20880, "x-EBCDIC-CyrillicRussian,xebcdiccyrillicrussian,ebcdiccyrillic,", 00000, L"" }, // IBM EBCDIC (Cyrillic Russian) - /* 101 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20905, "x-EBCDIC-Turkish,xebcdicturkish,ebcdiccptr,", 00000, L"" }, // IBM EBCDIC (Turkish) - /* 102 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20924, "IBM00924,ibm00924,ebcdiclatin9euro,", 00000, L"" }, // IBM EBCDIC (Open System-Euro Latin-1) - /* 103 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 21025, "x-EBCDIC-CyrillicSerbianBulgarian,xebcdiccyrillicserbianbulgarian,", 00000, L"" }, // IBM EBCDIC (Cyrillic Serbian-Bulgarian) - /* 104 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 50930, "x-EBCDIC-JapaneseAndKana,xebcdicjapaneseandkana,", 00000, L"" }, // IBM EBCDIC (Japanese and Japanese Katakana) - /* 105 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 50931, "x-EBCDIC-JapaneseAndUSCanada,xebcdicjapaneseanduscanada,", 00000, L"" }, // IBM EBCDIC (Japanese and US-Canada) - /* 106 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 50933, "x-EBCDIC-KoreanAndKoreanExtended,xebcdickoreanandkoreanextended,", 00000, L"" }, // IBM EBCDIC (Korean and Korean Extended) - /* 107 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 50935, "x-EBCDIC-SimplifiedChinese,xebcdicsimplifiedchinese,", 00000, L"" }, // IBM EBCDIC (Chinese Simplified) - /* 108 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 50937, "x-EBCDIC-TraditionalChinese,xebcdictraditionalchinese,", 00000, L"" }, // IBM EBCDIC (Chinese Traditional) - /* 109 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 50939, "x-EBCDIC-JapaneseAndJapaneseLatin,xebcdicjapaneseandjapaneselatin,", 00000, L"" }, // IBM EBCDIC (Japanese and Japanese-Latin) - /* 110 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20105, "x-IA5,xia5,", 00000, L"" }, // Western European (IA5) - /* 111 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20106, "x-IA5-German,xia5german,", 00000, L"" }, // German (IA5) - /* 112 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20107, "x-IA5-Swedish,xia5swedish,", 00000, L"" }, // Swedish (IA5) - /* 113 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20108, "x-IA5-Norwegian,xia5norwegian,", 00000, L"" }, // Norwegian (IA5) - /* 114 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20936, "x-cp20936,xcp20936,", 00000, L"" }, // Chinese Simplified (GB2312) - /* 115 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20932, "euc-jp,,", 00000, L"" }, // Japanese (JIS X 0208-1990 & 0212-1990) - /* 117 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 50221, "csISO2022JP,csiso2022jp,", 00000, L"" }, // Japanese (JIS-Allow 1 byte Kana) - /* 118 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 50222, "_iso-2022-jp$SIO,iso2022jpSIO,", 00000, L"" }, // Japanese (JIS-Allow 1 byte Kana - SO/SI) - /* 120 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 50227, "x-cp50227,xcp50227,", 00000, L"" }, // Chinese Simplified (ISO-2022) - /* 123 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20002, "x-Chinese-Eten,xchineseeten,", 00000, L"" }, // Chinese Traditional (Eten) - /* 125 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 51936, "euc-cn,euccn,xeuccn,", 00000, L"" }, // Chinese Simplified (EUC) - /* 128 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 57002, "x-iscii-de,xisciide,", 00000, L"" }, // ISCII Devanagari - /* 129 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 57003, "x-iscii-be,xisciibe,", 00000, L"" }, // ISCII Bengali - /* 130 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 57004, "x-iscii-ta,xisciita,", 00000, L"" }, // ISCII Tamil - /* 131 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 57005, "x-iscii-te,xisciite,", 00000, L"" }, // ISCII Telugu - /* 132 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 57006, "x-iscii-as,xisciias,", 00000, L"" }, // ISCII Assamese - /* 133 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 57007, "x-iscii-or,xisciior,", 00000, L"" }, // ISCII Oriya - /* 134 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 57008, "x-iscii-ka,xisciika,", 00000, L"" }, // ISCII Kannada - /* 135 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 57009, "x-iscii-ma,xisciima,", 00000, L"" }, // ISCII Malayalam - /* 136 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 57010, "x-iscii-gu,xisciigu,", 00000, L"" }, // ISCII Gujarathi - /* 137 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 57011, "x-iscii-pa,xisciipa,", 00000, L"" }, // ISCII Panjabi -}; - -int Encoding_CountOf() -{ - return COUNTOF(g_Encodings); -} -//============================================================================= - - //============================================================================= // // Encoding Helper Functions @@ -289,7 +55,8 @@ WCHAR wchOEM[16] = { L'\0' }; // ============================================================================ -int Encoding_Current(int iEncoding) { +int Encoding_Current(int iEncoding) +{ static int CurrentEncoding = CPI_NONE; if (iEncoding >= 0) { @@ -303,7 +70,8 @@ int Encoding_Current(int iEncoding) { // ============================================================================ -int Encoding_SrcCmdLn(int iSrcEncoding) { +int Encoding_SrcCmdLn(int iSrcEncoding) +{ static int SourceEncoding = CPI_NONE; if (iSrcEncoding >= 0) { @@ -320,7 +88,8 @@ int Encoding_SrcCmdLn(int iSrcEncoding) { // ============================================================================ -int Encoding_SrcWeak(int iSrcWeakEnc) { +int Encoding_SrcWeak(int iSrcWeakEnc) +{ static int SourceWeakEncoding = CPI_NONE; if (iSrcWeakEnc >= 0) { @@ -358,10 +127,10 @@ void Encoding_InitDefaults() 65001 // (UTF-8) }; - g_Encodings[CPI_ANSI_DEFAULT].uCodePage = GetACP(); // set ANSI system CP + ChangeEncodingCodePage(CPI_ANSI_DEFAULT, GetACP()); // set ANSI system CP StringCchPrintf(wchANSI, COUNTOF(wchANSI), L" (CP-%u)", g_Encodings[CPI_ANSI_DEFAULT].uCodePage); - for (int i = CPI_UTF7 + 1; i < COUNTOF(g_Encodings); ++i) { + for (int i = CPI_UTF7 + 1; i < Encoding_CountOf(); ++i) { if (Encoding_IsValid(i) && (g_Encodings[i].uCodePage == g_Encodings[CPI_ANSI_DEFAULT].uCodePage)) { g_Encodings[i].uFlags |= NCP_ANSI; if (g_Encodings[i].uFlags & NCP_EXTERNAL_8BIT) @@ -370,10 +139,10 @@ void Encoding_InitDefaults() } } - g_Encodings[CPI_OEM].uCodePage = GetOEMCP(); + ChangeEncodingCodePage(CPI_OEM, GetOEMCP()); // set OEM system CP StringCchPrintf(wchOEM, COUNTOF(wchOEM), L" (CP-%u)", g_Encodings[CPI_OEM].uCodePage); - for (int i = CPI_UTF7 + 1; i < COUNTOF(g_Encodings); ++i) { + for (int i = CPI_UTF7 + 1; i < Encoding_CountOf(); ++i) { if (Encoding_IsValid(i) && (g_Encodings[i].uCodePage == g_Encodings[CPI_OEM].uCodePage)) { g_Encodings[i].uFlags |= NCP_OEM; if (g_Encodings[i].uFlags & NCP_EXTERNAL_8BIT) @@ -383,7 +152,7 @@ void Encoding_InitDefaults() } // multi byte character sets - for (int i = 0; i < COUNTOF(g_Encodings); ++i) { + for (int i = 0; i < Encoding_CountOf(); ++i) { for (int k = 0; k < COUNTOF(uCodePageMBCS); k++) { if (g_Encodings[i].uCodePage == uCodePageMBCS[k]) { g_Encodings[i].uFlags |= NCP_MBCS; @@ -394,7 +163,7 @@ void Encoding_InitDefaults() g_DOSEncoding = CPI_OEM; // Try to set the DOS encoding to DOS-437 if the default OEMCP is not DOS-437 if (g_Encodings[g_DOSEncoding].uCodePage != 437) { - for (int i = CPI_UTF7 + 1; i < COUNTOF(g_Encodings); ++i) { + for (int i = CPI_UTF7 + 1; i < Encoding_CountOf(); ++i) { if (Encoding_IsValid(i) && (g_Encodings[i].uCodePage == 437)) { g_DOSEncoding = i; break; @@ -420,7 +189,7 @@ int Encoding_MapIniSetting(bool bLoad, int iSetting) { case 7: return CPI_UNICODEBE; case 8: return CPI_UTF7; default: { - for (int i = CPI_UTF7 + 1; i < COUNTOF(g_Encodings); i++) { + for (int i = CPI_UTF7 + 1; i < Encoding_CountOf(); i++) { if ((g_Encodings[i].uCodePage == (UINT)iSetting) && Encoding_IsValid(i)) return(i); } @@ -515,7 +284,7 @@ int Encoding_MatchA(char *pchTest) { } *pchDst++ = ','; *pchDst = 0; - for (int i = 0; i < COUNTOF(g_Encodings); i++) { + for (int i = 0; i < Encoding_CountOf(); i++) { if (StrStrIA(g_Encodings[i].pszParseNames, chTest)) { CPINFO cpi; if ((g_Encodings[i].uFlags & NCP_INTERNAL) || @@ -532,7 +301,7 @@ int Encoding_MatchA(char *pchTest) { int Encoding_GetByCodePage(UINT cp) { - for (int i = 0; i < COUNTOF(g_Encodings); i++) { + for (int i = 0; i < Encoding_CountOf(); i++) { if (cp == g_Encodings[i].uCodePage) { return i; } @@ -544,7 +313,7 @@ int Encoding_GetByCodePage(UINT cp) { bool Encoding_IsValid(int iTestEncoding) { CPINFO cpi; - if ((iTestEncoding >= 0) && (iTestEncoding < COUNTOF(g_Encodings))) { + if ((iTestEncoding >= 0) && (iTestEncoding < Encoding_CountOf())) { if ((g_Encodings[iTestEncoding].uFlags & NCP_INTERNAL) || IsValidCodePage(g_Encodings[iTestEncoding].uCodePage) && GetCPInfo(g_Encodings[iTestEncoding].uCodePage, &cpi)) { @@ -573,18 +342,18 @@ void Encoding_AddToListView(HWND hwnd, int idSel, bool bRecodeOnly) { LVITEM lvi; WCHAR wchBuf[256] = { L'\0' }; - PENCODINGENTRY pEE = LocalAlloc(LPTR, COUNTOF(g_Encodings) * sizeof(ENCODINGENTRY)); - for (i = 0; i < COUNTOF(g_Encodings); i++) { + PENCODINGENTRY pEE = LocalAlloc(LPTR, Encoding_CountOf() * sizeof(ENCODINGENTRY)); + for (i = 0; i < Encoding_CountOf(); i++) { pEE[i].id = i; GetLngString(g_Encodings[i].idsName, pEE[i].wch, COUNTOF(pEE[i].wch)); } - qsort(pEE, COUNTOF(g_Encodings), sizeof(ENCODINGENTRY), CmpEncoding); + qsort(pEE, Encoding_CountOf(), sizeof(ENCODINGENTRY), CmpEncoding); ZeroMemory(&lvi, sizeof(LVITEM)); lvi.mask = LVIF_PARAM | LVIF_TEXT | LVIF_IMAGE; lvi.pszText = wchBuf; - for (i = 0; i < COUNTOF(g_Encodings); i++) { + for (i = 0; i < Encoding_CountOf(); i++) { int id = pEE[i].id; if (!bRecodeOnly || (g_Encodings[id].uFlags & NCP_RECODE)) { @@ -659,12 +428,12 @@ void Encoding_AddToComboboxEx(HWND hwnd, int idSel, bool bRecodeOnly) { COMBOBOXEXITEM cbei; WCHAR wchBuf[256] = { L'\0' }; - PENCODINGENTRY pEE = LocalAlloc(LPTR, COUNTOF(g_Encodings) * sizeof(ENCODINGENTRY)); - for (i = 0; i < COUNTOF(g_Encodings); i++) { + PENCODINGENTRY pEE = LocalAlloc(LPTR, Encoding_CountOf() * sizeof(ENCODINGENTRY)); + for (i = 0; i < Encoding_CountOf(); i++) { pEE[i].id = i; GetLngString(g_Encodings[i].idsName, pEE[i].wch, COUNTOF(pEE[i].wch)); } - qsort(pEE, COUNTOF(g_Encodings), sizeof(ENCODINGENTRY), CmpEncoding); + qsort(pEE, Encoding_CountOf(), sizeof(ENCODINGENTRY), CmpEncoding); ZeroMemory(&cbei, sizeof(COMBOBOXEXITEM)); cbei.mask = CBEIF_TEXT | CBEIF_IMAGE | CBEIF_SELECTEDIMAGE | CBEIF_LPARAM; @@ -673,7 +442,7 @@ void Encoding_AddToComboboxEx(HWND hwnd, int idSel, bool bRecodeOnly) { cbei.iImage = 0; cbei.iSelectedImage = 0; - for (i = 0; i < COUNTOF(g_Encodings); i++) { + for (i = 0; i < Encoding_CountOf(); i++) { int id = pEE[i].id; if (!bRecodeOnly || (g_Encodings[id].uFlags & NCP_RECODE)) { diff --git a/src/Encoding.h b/src/Encoding.h index ed03e634d..127cd133d 100644 --- a/src/Encoding.h +++ b/src/Encoding.h @@ -50,8 +50,9 @@ extern bool g_bForceCompEncDetection; #define NCP_EXTERNAL_8BIT 512 #define NCP_RECODE 1024 -#define CPI_GET -2 -#define CPI_NONE -1 +#define CED_NO_MAPPING (-3) +#define CPI_GET (-2) +#define CPI_NONE (-1) #define CPI_ANSI_DEFAULT 0 #define CPI_OEM 1 #define CPI_UNICODEBOM 2 @@ -69,11 +70,13 @@ extern bool g_bForceCompEncDetection; #define Encoding_IsNONE(enc) ((enc) == CPI_NONE) + typedef struct _np2encoding { UINT uFlags; UINT uCodePage; char* pszParseNames; int idsName; + int iCEDEncoding; WCHAR wchLabel[64]; } NP2ENCODING; @@ -130,7 +133,10 @@ size_t UTF8_mbslen(LPCSTR utf8_string, size_t byte_length); bool UTF8_ContainsInvalidChars(LPCSTR utf8_string, size_t byte_length); // Google's "Compact Encoding Detection" -int Encoding_Analyze(const char* const text, const size_t len, bool* isReliable); +extern NP2ENCODING g_Encodings[]; +int Encoding_CountOf(); +void ChangeEncodingCodePage(int cpi, UINT newCP); +int Encoding_Analyze(const char* const text, const size_t len, const int encodingHint, bool* isReliable); // -------------------------------------------------------------------------------------------------------------------------------- diff --git a/src/EncodingCED.cpp b/src/EncodingCED.cpp index e7fb2b3c9..a806d8550 100644 --- a/src/EncodingCED.cpp +++ b/src/EncodingCED.cpp @@ -25,7 +25,7 @@ #define VC_EXTRALEAN 1 #include - +#include "resource.h" extern "C" { #include "Encoding.h" @@ -33,265 +33,382 @@ extern "C" { #include "compact_enc_det/compact_enc_det.h" -// Global settings... -//extern "C" g_Encodings; + +//============================================================================= + +#define ENC_PARSE_NAM_ANSI "ansi,system,ascii," +#define ENC_PARSE_NAM_OEM "oem,oem," +#define ENC_PARSE_NAM_UTF16LEBOM "" +#define ENC_PARSE_NAM_UTF16BEBOM "" +#define ENC_PARSE_NAM_UTF16LE "utf-16,utf16,unicode," +#define ENC_PARSE_NAM_UTF16BE "utf-16be,utf16be,unicodebe," +#define ENC_PARSE_NAM_UTF8 "utf-8,utf8," +#define ENC_PARSE_NAM_UTF8SIG "utf-8,utf8," +#define ENC_PARSE_NAM_UTF7 "utf-7,utf7," +#define ENC_PARSE_NAM_DOS_720 "DOS-720,dos720," +#define ENC_PARSE_NAM_ISO_8859_6 "iso-8859-6,iso88596,arabic,csisolatinarabic,ecma114,isoir127," +#define ENC_PARSE_NAM_MAC_ARABIC "x-mac-arabic,xmacarabic," +#define ENC_PARSE_NAM_WIN_1256 "windows-1256,windows1256,cp1256" +#define ENC_PARSE_NAM_DOS_775 "ibm775,ibm775,cp500," +#define ENC_PARSE_NAM_ISO_8859_4 "iso-8859-4,iso88594,csisolatin4,isoir110,l4,latin4," +#define ENC_PARSE_NAM_WIN_1257 "windows-1257,windows1257," +#define ENC_PARSE_NAM_DOS_852 "ibm852,ibm852,cp852," +#define ENC_PARSE_NAM_ISO_8859_2 "iso-8859-2,iso88592,csisolatin2,isoir101,latin2,l2," +#define ENC_PARSE_NAM_MAC_CENTRAL_EUROP "x-mac-ce,xmacce," +#define ENC_PARSE_NAM_WIN_1250 "windows-1250,windows1250,xcp1250," +#define ENC_PARSE_NAM_GBK_2312 "gb2312,gb2312,chinese,cngb,csgb2312,csgb231280,gb231280,gbk," +#define ENC_PARSE_NAM_MAC_ZH_CN "x-mac-chinesesimp,xmacchinesesimp," +#define ENC_PARSE_NAM_BIG5 "big5,big5,cnbig5,csbig5,xxbig5," +#define ENC_PARSE_NAM_MAC_ZH_TW "x-mac-chinesetrad,xmacchinesetrad," +#define ENC_PARSE_NAM_MAC_CROATIAN "x-mac-croatian,xmaccroatian," +#define ENC_PARSE_NAM_DOS_866 "cp866,cp866,ibm866," +#define ENC_PARSE_NAM_ISO_8859_5 "iso-8859-5,iso88595,csisolatin5,csisolatincyrillic,cyrillic,isoir144," +#define ENC_PARSE_NAM_KOI8_R "koi8-r,koi8r,cskoi8r,koi,koi8," +#define ENC_PARSE_NAM_KOI8_U "koi8-u,koi8u,koi8ru," +#define ENC_PARSE_NAM_MAC_CYRILLIC "x-mac-cyrillic,xmaccyrillic," +#define ENC_PARSE_NAM_WIN_1251 "windows-1251,windows1251,xcp1251," +#define ENC_PARSE_NAM_ISO_8859_13 "iso-8859-13,iso885913," +#define ENC_PARSE_NAM_DOS_863 "ibm863,ibm863," +#define ENC_PARSE_NAM_DOS_737 "ibm737,ibm737," +#define ENC_PARSE_NAM_ISO_8859_7 "iso-8859-7,iso88597,csisolatingreek,ecma118,elot928,greek,greek8,isoir126," +#define ENC_PARSE_NAM_MAC_GREEK "x-mac-greek,xmacgreek," +#define ENC_PARSE_NAM_WIN_1253 "windows-1253,windows1253," +#define ENC_PARSE_NAM_DOS_869 "ibm869,ibm869," +#define ENC_PARSE_NAM_DOS_862 "DOS-862,dos862," +#define ENC_PARSE_NAM_ISO_8859_8_I "iso-8859-8-i,iso88598i,logical," +#define ENC_PARSE_NAM_ISO_8859_8 "iso-8859-8,iso88598,csisolatinhebrew,hebrew,isoir138,visual," +#define ENC_PARSE_NAM_MAC_HEBREW "x-mac-hebrew,xmachebrew," +#define ENC_PARSE_NAM_WIN_1255 "windows-1255,windows1255," +#define ENC_PARSE_NAM_DOS_861 "ibm861,ibm861," +#define ENC_PARSE_NAM_MAC_ICELANDIC "x-mac-icelandic,xmacicelandic," +#define ENC_PARSE_NAM_MAC_JAPANESE "x-mac-japanese,xmacjapanese," +#define ENC_PARSE_NAM_SHIFT_JIS "shift_jis,shiftjis,shiftjs,csshiftjis,cswindows31j,mskanji,xmscp932,xsjis," +#define ENC_PARSE_NAM_MAC_KOREAN "x-mac-korean,xmackorean," +#define ENC_PARSE_NAM_WIN_949 "windows-949,windows949,ksc56011987,csksc5601,euckr,isoir149,korean,ksc56011989" +#define ENC_PARSE_NAM_ISO_8859_3 "iso-8859-3,iso88593,latin3,isoir109,l3," +#define ENC_PARSE_NAM_ISO_8859_15 "iso-8859-15,iso885915,latin9,l9," +#define ENC_PARSE_NAM_DOS_865 "ibm865,ibm865," +#define ENC_PARSE_NAM_DOS_437 "ibm437,ibm437,437,cp437,cspc8,codepage437," +#define ENC_PARSE_NAM_DOS_858 "ibm858,ibm858,ibm00858," +#define ENC_PARSE_NAM_DOS_860 "ibm860,ibm860," +#define ENC_PARSE_NAM_MAC_ROMANIAN "x-mac-romanian,xmacromanian," +#define ENC_PARSE_NAM_MAC_THAI "x-mac-thai,xmacthai," +#define ENC_PARSE_NAM_WIN_874 "windows-874,windows874,dos874,iso885911,tis620," +#define ENC_PARSE_NAM_DOS_857 "ibm857,ibm857," +#define ENC_PARSE_NAM_ISO_8859_9 "iso-8859-9,iso88599,latin5,isoir148,l5," +#define ENC_PARSE_NAM_MAC_TURKISH "x-mac-turkish,xmacturkish," +#define ENC_PARSE_NAM_WIN_1254 "windows-1254,windows1254," +#define ENC_PARSE_NAM_MAC_UKRAINIAN "x-mac-ukrainian,xmacukrainian," +#define ENC_PARSE_NAM_WIN_1258 "windows-1258,windows-258," +#define ENC_PARSE_NAM_DOS_850 "ibm850,ibm850," +#define ENC_PARSE_NAM_ISO_8859_1 "iso-8859-1,iso88591,cp819,latin1,ibm819,isoir100,latin1,l1," +#define ENC_PARSE_NAM_MAC_WESTERN_EUROP "macintosh,macintosh," +#define ENC_PARSE_NAM_WIN_1252 "windows-1252,windows1252,cp367,cp819,ibm367,us,xansi," +#define ENC_PARSE_NAM_IBM_EBCDIC_US "ebcdic-cp-us,ebcdiccpus,ebcdiccpca,ebcdiccpwt,ebcdiccpnl,ibm037,cp037," +#define ENC_PARSE_NAM_IBM_EBCDIC_INT "x-ebcdic-international,xebcdicinternational," +#define ENC_PARSE_NAM_IBM_EBCDIC_GR "x-EBCDIC-GreekModern,xebcdicgreekmodern," +#define ENC_PARSE_NAM_IBM_EBCDIC_LAT_5 "CP1026,cp1026,csibm1026,ibm1026," +#define ENC_PARSE_NAM_GB18030 "gb18030,gb18030," +#define ENC_PARSE_NAM_EUC_JAPANESE "euc-jp,eucjp,xeuc,xeucjp," +#define ENC_PARSE_NAM_EUC_KOREAN "euc-kr,euckr,cseuckr," +#define ENC_PARSE_NAM_ISO_2022_CN "iso-2022-cn,iso2022cn," +#define ENC_PARSE_NAM_HZ_GB2312 "hz-gb-2312,hzgb2312,hz," +#define ENC_PARSE_NAM_ISO_2022_JP "iso-2022-jp,iso2022jp," +#define ENC_PARSE_NAM_ISO_2022_KR "iso-2022-kr,iso2022kr,csiso2022kr," +#define ENC_PARSE_NAM_X_CHINESE_CNS "x-Chinese-CNS,xchinesecns," + +//============================================================================= + +extern "C" NP2ENCODING g_Encodings[] = { + /* 000 */{ NCP_ANSI | NCP_RECODE, CP_ACP, ENC_PARSE_NAM_ANSI, IDS_ENC_ANSI, CED_NO_MAPPING, L"" }, + /* 001 */{ NCP_OEM | NCP_RECODE, CP_OEMCP, ENC_PARSE_NAM_OEM, IDS_ENC_OEM, CED_NO_MAPPING, L"" }, + /* 002 */{ NCP_UNICODE | NCP_UNICODE_BOM, CP_UTF8, ENC_PARSE_NAM_UTF16LEBOM, IDS_ENC_UTF16LEBOM, CED_NO_MAPPING, L"" }, + /* 003 */{ NCP_UNICODE | NCP_UNICODE_REVERSE | NCP_UNICODE_BOM, CP_UTF8, ENC_PARSE_NAM_UTF16BEBOM, IDS_ENC_UTF16BEBOM, CED_NO_MAPPING, L"" }, + /* 004 */{ NCP_UNICODE | NCP_RECODE, CP_UTF8, ENC_PARSE_NAM_UTF16LE, IDS_ENC_UTF16LE, UTF16LE, L"" }, + /* 005 */{ NCP_UNICODE | NCP_UNICODE_REVERSE | NCP_RECODE, CP_UTF8, ENC_PARSE_NAM_UTF16BE, IDS_ENC_UTF16BE, UTF16BE, L"" }, + /* 006 */{ NCP_UTF8 | NCP_RECODE, CP_UTF8, ENC_PARSE_NAM_UTF8, IDS_ENC_UTF8, UTF8, L"" }, + /* 007 */{ NCP_UTF8 | NCP_UTF8_SIGN, CP_UTF8, ENC_PARSE_NAM_UTF8SIG, IDS_ENC_UTF8SIG, CED_NO_MAPPING, L"" }, + /* 008 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, CP_UTF7, ENC_PARSE_NAM_UTF7, IDS_ENC_UTF7, UTF7, L"" }, + /* 009 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 720, ENC_PARSE_NAM_DOS_720, IDS_ENC_DOS_720, CED_NO_MAPPING, L"" }, + /* 010 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 28596, ENC_PARSE_NAM_ISO_8859_6, IDS_ENC_ISO_8859_6, ISO_8859_6, L"" }, + /* 011 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10004, ENC_PARSE_NAM_MAC_ARABIC, IDS_ENC_MAC_ARABIC, CED_NO_MAPPING, L"" }, + /* 012 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 1256, ENC_PARSE_NAM_WIN_1256, IDS_ENC_WIN_1256, MSFT_CP1256, L"" }, + /* 013 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 775, ENC_PARSE_NAM_DOS_775, IDS_ENC_DOS_775, CED_NO_MAPPING, L"" }, + /* 014 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 28594, ENC_PARSE_NAM_ISO_8859_4, IDS_ENC_ISO_8859_4, ISO_8859_4, L"" }, + /* 015 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 1257, ENC_PARSE_NAM_WIN_1257, IDS_ENC_WIN_1257, MSFT_CP1257, L"" }, + /* 016 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 852, ENC_PARSE_NAM_DOS_852, IDS_ENC_DOS_852, CZECH_CP852, L"" }, + /* 017 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 28592, ENC_PARSE_NAM_ISO_8859_2, IDS_ENC_ISO_8859_2, ISO_8859_2, L"" }, + /* 018 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10029, ENC_PARSE_NAM_MAC_CENTRAL_EUROP, IDS_ENC_MAC_CENTRAL_EUROP, CED_NO_MAPPING, L"" }, + /* 019 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 1250, ENC_PARSE_NAM_WIN_1250, IDS_ENC_WIN_1250, MSFT_CP1250, L"" }, + /* 020 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 936, ENC_PARSE_NAM_GBK_2312, IDS_ENC_GBK_2312, GBK, L"" }, + /* 021 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10008, ENC_PARSE_NAM_MAC_ZH_CN, IDS_ENC_MAC_ZH_CN, CED_NO_MAPPING, L"" }, + /* 022 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 950, ENC_PARSE_NAM_BIG5, IDS_ENC_BIG5, CHINESE_BIG5_CP950, L"" }, + /* 023 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10002, ENC_PARSE_NAM_MAC_ZH_TW, IDS_ENC_MAC_ZH_TW, CED_NO_MAPPING, L"" }, + /* 024 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10082, ENC_PARSE_NAM_MAC_CROATIAN, IDS_ENC_MAC_CROATIAN, CED_NO_MAPPING, L"" }, + /* 025 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 866, ENC_PARSE_NAM_DOS_866, IDS_ENC_DOS_866, RUSSIAN_CP866, L"" }, + /* 026 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 28595, ENC_PARSE_NAM_ISO_8859_5, IDS_ENC_ISO_8859_5, ISO_8859_5, L"" }, + /* 027 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 20866, ENC_PARSE_NAM_KOI8_R, IDS_ENC_KOI8_R, RUSSIAN_KOI8_R, L"" }, + /* 028 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 21866, ENC_PARSE_NAM_KOI8_U, IDS_ENC_KOI8_U, RUSSIAN_KOI8_RU, L"" }, + /* 029 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10007, ENC_PARSE_NAM_MAC_CYRILLIC, IDS_ENC_MAC_CYRILLIC, CED_NO_MAPPING, L"" }, + /* 030 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 1251, ENC_PARSE_NAM_WIN_1251, IDS_ENC_WIN_1251, RUSSIAN_CP1251, L"" }, + /* 031 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 28603, ENC_PARSE_NAM_ISO_8859_13, IDS_ENC_ISO_8859_13, ISO_8859_13, L"" }, + /* 032 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 863, ENC_PARSE_NAM_DOS_863, IDS_ENC_DOS_863, CED_NO_MAPPING, L"" }, + /* 033 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 737, ENC_PARSE_NAM_DOS_737, IDS_ENC_DOS_737, CED_NO_MAPPING, L"" }, + /* 034 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 28597, ENC_PARSE_NAM_ISO_8859_7, IDS_ENC_ISO_8859_7, ISO_8859_7, L"" }, + /* 035 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10006, ENC_PARSE_NAM_MAC_GREEK, IDS_ENC_MAC_GREEK, CED_NO_MAPPING, L"" }, + /* 036 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 1253, ENC_PARSE_NAM_WIN_1253, IDS_ENC_WIN_1253, MSFT_CP1253, L"" }, + /* 037 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 869, ENC_PARSE_NAM_DOS_869, IDS_ENC_DOS_869, CED_NO_MAPPING, L"" }, + /* 038 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 862, ENC_PARSE_NAM_DOS_862, IDS_ENC_DOS_862, CED_NO_MAPPING, L"" }, + /* 039 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 38598, ENC_PARSE_NAM_ISO_8859_8_I, IDS_ENC_ISO_8859_8_I, ISO_8859_8_I, L"" }, + /* 040 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 28598, ENC_PARSE_NAM_ISO_8859_8, IDS_ENC_ISO_8859_8, ISO_8859_8, L"" }, + /* 041 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10005, ENC_PARSE_NAM_MAC_HEBREW, IDS_ENC_MAC_HEBREW, CED_NO_MAPPING, L"" }, + /* 042 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 1255, ENC_PARSE_NAM_WIN_1255, IDS_ENC_WIN_1255, MSFT_CP1255, L"" }, + /* 043 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 861, ENC_PARSE_NAM_DOS_861, IDS_ENC_DOS_861, CED_NO_MAPPING, L"" }, + /* 044 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10079, ENC_PARSE_NAM_MAC_ICELANDIC, IDS_ENC_MAC_ICELANDIC, CED_NO_MAPPING, L"" }, + /* 045 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10001, ENC_PARSE_NAM_MAC_JAPANESE, IDS_ENC_MAC_JAPANESE, CED_NO_MAPPING, L"" }, + /* 046 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 932, ENC_PARSE_NAM_SHIFT_JIS, IDS_ENC_SHIFT_JIS, JAPANESE_CP932, L"" }, + /* 047 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10003, ENC_PARSE_NAM_MAC_KOREAN, IDS_ENC_MAC_KOREAN, CED_NO_MAPPING, L"" }, + /* 048 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 949, ENC_PARSE_NAM_WIN_949, IDS_ENC_WIN_949, CED_NO_MAPPING, L"" }, + /* 049 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 28593, ENC_PARSE_NAM_ISO_8859_3, IDS_ENC_ISO_8859_3, ISO_8859_3, L"" }, + /* 050 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 28605, ENC_PARSE_NAM_ISO_8859_15, IDS_ENC_ISO_8859_15, ISO_8859_15, L"" }, + /* 051 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 865, ENC_PARSE_NAM_DOS_865, IDS_ENC_DOS_865, CED_NO_MAPPING, L"" }, + /* 052 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 437, ENC_PARSE_NAM_DOS_437, IDS_ENC_DOS_437, CED_NO_MAPPING, L"" }, + /* 053 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 858, ENC_PARSE_NAM_DOS_858, IDS_ENC_DOS_858, CED_NO_MAPPING, L"" }, + /* 054 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 860, ENC_PARSE_NAM_DOS_860, IDS_ENC_DOS_860, CED_NO_MAPPING, L"" }, + /* 055 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10000, ENC_PARSE_NAM_MAC_WESTERN_EUROP, IDS_ENC_MAC_WESTERN_EUROP, MACINTOSH_ROMAN, L"" }, + /* 056 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10021, ENC_PARSE_NAM_MAC_THAI, IDS_ENC_MAC_THAI, CED_NO_MAPPING, L"" }, + /* 057 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 874, ENC_PARSE_NAM_WIN_874, IDS_ENC_WIN_874, MSFT_CP874, L"" }, + /* 058 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 857, ENC_PARSE_NAM_DOS_857, IDS_ENC_DOS_857, CED_NO_MAPPING, L"" }, + /* 059 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 28599, ENC_PARSE_NAM_ISO_8859_9, IDS_ENC_ISO_8859_9, ISO_8859_9, L"" }, + /* 060 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10081, ENC_PARSE_NAM_MAC_TURKISH, IDS_ENC_MAC_TURKISH, CED_NO_MAPPING, L"" }, + /* 061 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 1254, ENC_PARSE_NAM_WIN_1254, IDS_ENC_WIN_1254, MSFT_CP1254, L"" }, + /* 062 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10017, ENC_PARSE_NAM_MAC_UKRAINIAN, IDS_ENC_MAC_UKRAINIAN, CED_NO_MAPPING, L"" }, + /* 063 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 1258, ENC_PARSE_NAM_WIN_1258, IDS_ENC_WIN_1258, CED_NO_MAPPING, L"" }, + /* 064 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 850, ENC_PARSE_NAM_DOS_850, IDS_ENC_DOS_850, CED_NO_MAPPING, L"" }, + /* 065 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 28591, ENC_PARSE_NAM_ISO_8859_1, IDS_ENC_ISO_8859_1, ISO_8859_1, L"" }, + /* 066 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10010, ENC_PARSE_NAM_MAC_ROMANIAN, IDS_ENC_MAC_ROMANIAN, MACINTOSH_ROMAN, L"" }, + /* 067 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 1252, ENC_PARSE_NAM_WIN_1252, IDS_ENC_WIN_1252, MSFT_CP1252, L"" }, + /* 068 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 37, ENC_PARSE_NAM_IBM_EBCDIC_US, IDS_ENC_IBM_EBCDIC_US, CED_NO_MAPPING, L"" }, + /* 069 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 500, ENC_PARSE_NAM_IBM_EBCDIC_INT, IDS_ENC_IBM_EBCDIC_INT, CED_NO_MAPPING, L"" }, + /* 070 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 875, ENC_PARSE_NAM_IBM_EBCDIC_GR, IDS_ENC_IBM_EBCDIC_GR, CED_NO_MAPPING, L"" }, + /* 071 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 1026, ENC_PARSE_NAM_IBM_EBCDIC_LAT_5, IDS_ENC_IBM_EBCDIC_LAT_5, CED_NO_MAPPING, L"" }, + /* 072 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 54936, ENC_PARSE_NAM_GB18030, IDS_ENC_GB18030, GB18030, L"" }, // Chinese Simplified (GB18030) + /* 073 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 51932, ENC_PARSE_NAM_EUC_JAPANESE, IDS_ENC_EUC_JAPANESE, JAPANESE_EUC_JP, L"" }, // Japanese (EUC) + /* 074 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 51949, ENC_PARSE_NAM_EUC_KOREAN, IDS_ENC_EUC_KOREAN, KOREAN_EUC_KR, L"" }, // Korean (EUC) + /* 075 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 50229, ENC_PARSE_NAM_ISO_2022_CN, IDS_ENC_ISO_2022_CN, ISO_2022_CN, L"" }, // Chinese Traditional (ISO-2022-CN) + /* 076 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 52936, ENC_PARSE_NAM_HZ_GB2312, IDS_ENC_HZ_GB2312, HZ_GB_2312, L"" }, // Chinese Simplified (HZ-GB2312) + /* 077 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 50220, ENC_PARSE_NAM_ISO_2022_JP, IDS_ENC_ISO_2022_JP, KDDI_ISO_2022_JP, L"" }, // Japanese (JIS) + /* 078 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 50225, ENC_PARSE_NAM_ISO_2022_KR, IDS_ENC_ISO_2022_KR, ISO_2022_KR, L"" }, // Korean (ISO-2022-KR) + /* 079 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 20000, ENC_PARSE_NAM_X_CHINESE_CNS, IDS_ENC_X_CHINESE_CNS, CHINESE_CNS, L"" } // Chinese Traditional (CNS) + + /* 073 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 870, "CP870,cp870,ebcdiccproece,ebcdiccpyu,csibm870,ibm870,", 00000, L"" }, // IBM EBCDIC (Multilingual Latin-2) + /* 074 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 1047, "IBM01047,ibm01047,", 00000, L"" }, // IBM EBCDIC (Open System Latin-1) + /* 075 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 1140, "x-ebcdic-cp-us-euro,xebcdiccpuseuro,", 00000, L"" }, // IBM EBCDIC (US-Canada-Euro) + /* 076 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 1141, "x-ebcdic-germany-euro,xebcdicgermanyeuro,", 00000, L"" }, // IBM EBCDIC (Germany-Euro) + /* 077 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 1142, "x-ebcdic-denmarknorway-euro,xebcdicdenmarknorwayeuro,", 00000, L"" }, // IBM EBCDIC (Denmark-Norway-Euro) + /* 078 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 1143, "x-ebcdic-finlandsweden-euro,xebcdicfinlandswedeneuro,", 00000, L"" }, // IBM EBCDIC (Finland-Sweden-Euro) + /* 079 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 1144, "x-ebcdic-italy-euro,xebcdicitalyeuro,", 00000, L"" }, // IBM EBCDIC (Italy-Euro) + /* 080 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 1145, "x-ebcdic-spain-euro,xebcdicspaineuro,", 00000, L"" }, // IBM EBCDIC (Spain-Latin America-Euro) + /* 081 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 1146, "x-ebcdic-uk-euro,xebcdicukeuro,", 00000, L"" }, // IBM EBCDIC (UK-Euro) + /* 082 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 1147, "x-ebcdic-france-euro,xebcdicfranceeuro,", 00000, L"" }, // IBM EBCDIC (France-Euro) + /* 083 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 1148, "x-ebcdic-international-euro,xebcdicinternationaleuro,", 00000, L"" }, // IBM EBCDIC (International-Euro) + /* 084 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 1149, "x-ebcdic-icelandic-euro,xebcdicicelandiceuro,", 00000, L"" }, // IBM EBCDIC (Icelandic-Euro) + /* 085 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 1361, "johab,johab,", 00000, L"" }, // Korean (Johab) + /* 086 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20273, "x-EBCDIC-Germany,xebcdicgermany,", 00000, L"" }, // IBM EBCDIC (Germany) + /* 087 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20277, "x-EBCDIC-DenmarkNorway,xebcdicdenmarknorway,ebcdiccpdk,ebcdiccpno,", 00000, L"" }, // IBM EBCDIC (Denmark-Norway) + /* 088 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20278, "x-EBCDIC-FinlandSweden,xebcdicfinlandsweden,ebcdicpfi,ebcdiccpse,", 00000, L"" }, // IBM EBCDIC (Finland-Sweden) + /* 089 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20280, "x-EBCDIC-Italy,xebcdicitaly,", 00000, L"" }, // IBM EBCDIC (Italy) + /* 090 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20284, "x-EBCDIC-Spain,xebcdicspain,ebcdiccpes,", 00000, L"" }, // IBM EBCDIC (Spain-Latin America) + /* 091 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20285, "x-EBCDIC-UK,xebcdicuk,ebcdiccpgb,", 00000, L"" }, // IBM EBCDIC (UK) + /* 092 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20290, "x-EBCDIC-JapaneseKatakana,xebcdicjapanesekatakana,", 00000, L"" }, // IBM EBCDIC (Japanese Katakana) + /* 093 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20297, "x-EBCDIC-France,xebcdicfrance,ebcdiccpfr,", 00000, L"" }, // IBM EBCDIC (France) + /* 094 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20420, "x-EBCDIC-Arabic,xebcdicarabic,ebcdiccpar1,", 00000, L"" }, // IBM EBCDIC (Arabic) + /* 095 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20423, "x-EBCDIC-Greek,xebcdicgreek,ebcdiccpgr,", 00000, L"" }, // IBM EBCDIC (Greek) + /* 096 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20424, "x-EBCDIC-Hebrew,xebcdichebrew,ebcdiccphe,", 00000, L"" }, // IBM EBCDIC (Hebrew) + /* 097 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20833, "x-EBCDIC-KoreanExtended,xebcdickoreanextended,", 00000, L"" }, // IBM EBCDIC (Korean Extended) + /* 098 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20838, "x-EBCDIC-Thai,xebcdicthai,ibmthai,csibmthai,", 00000, L"" }, // IBM EBCDIC (Thai) + /* 099 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20871, "x-EBCDIC-Icelandic,xebcdicicelandic,ebcdiccpis,", 00000, L"" }, // IBM EBCDIC (Icelandic) + /* 100 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20880, "x-EBCDIC-CyrillicRussian,xebcdiccyrillicrussian,ebcdiccyrillic,", 00000, L"" }, // IBM EBCDIC (Cyrillic Russian) + /* 101 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20905, "x-EBCDIC-Turkish,xebcdicturkish,ebcdiccptr,", 00000, L"" }, // IBM EBCDIC (Turkish) + /* 102 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20924, "IBM00924,ibm00924,ebcdiclatin9euro,", 00000, L"" }, // IBM EBCDIC (Open System-Euro Latin-1) + /* 103 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 21025, "x-EBCDIC-CyrillicSerbianBulgarian,xebcdiccyrillicserbianbulgarian,", 00000, L"" }, // IBM EBCDIC (Cyrillic Serbian-Bulgarian) + /* 104 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 50930, "x-EBCDIC-JapaneseAndKana,xebcdicjapaneseandkana,", 00000, L"" }, // IBM EBCDIC (Japanese and Japanese Katakana) + /* 105 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 50931, "x-EBCDIC-JapaneseAndUSCanada,xebcdicjapaneseanduscanada,", 00000, L"" }, // IBM EBCDIC (Japanese and US-Canada) + /* 106 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 50933, "x-EBCDIC-KoreanAndKoreanExtended,xebcdickoreanandkoreanextended,", 00000, L"" }, // IBM EBCDIC (Korean and Korean Extended) + /* 107 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 50935, "x-EBCDIC-SimplifiedChinese,xebcdicsimplifiedchinese,", 00000, L"" }, // IBM EBCDIC (Chinese Simplified) + /* 108 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 50937, "x-EBCDIC-TraditionalChinese,xebcdictraditionalchinese,", 00000, L"" }, // IBM EBCDIC (Chinese Traditional) + /* 109 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 50939, "x-EBCDIC-JapaneseAndJapaneseLatin,xebcdicjapaneseandjapaneselatin,", 00000, L"" }, // IBM EBCDIC (Japanese and Japanese-Latin) + /* 110 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20105, "x-IA5,xia5,", 00000, L"" }, // Western European (IA5) + /* 111 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20106, "x-IA5-German,xia5german,", 00000, L"" }, // German (IA5) + /* 112 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20107, "x-IA5-Swedish,xia5swedish,", 00000, L"" }, // Swedish (IA5) + /* 113 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20108, "x-IA5-Norwegian,xia5norwegian,", 00000, L"" }, // Norwegian (IA5) + /* 114 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20936, "x-cp20936,xcp20936,", 00000, L"" }, // Chinese Simplified (GB2312) + /* 115 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20932, "euc-jp,,", 00000, L"" }, // Japanese (JIS X 0208-1990 & 0212-1990) + /* 117 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 50221, "csISO2022JP,csiso2022jp,", 00000, L"" }, // Japanese (JIS-Allow 1 byte Kana) + /* 118 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 50222, "_iso-2022-jp$SIO,iso2022jpSIO,", 00000, L"" }, // Japanese (JIS-Allow 1 byte Kana - SO/SI) + /* 120 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 50227, "x-cp50227,xcp50227,", 00000, L"" }, // Chinese Simplified (ISO-2022) + /* 123 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20002, "x-Chinese-Eten,xchineseeten,", 00000, L"" }, // Chinese Traditional (Eten) + /* 125 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 51936, "euc-cn,euccn,xeuccn,", 00000, L"" }, // Chinese Simplified (EUC) + /* 128 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 57002, "x-iscii-de,xisciide,", 00000, L"" }, // ISCII Devanagari + /* 129 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 57003, "x-iscii-be,xisciibe,", 00000, L"" }, // ISCII Bengali + /* 130 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 57004, "x-iscii-ta,xisciita,", 00000, L"" }, // ISCII Tamil + /* 131 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 57005, "x-iscii-te,xisciite,", 00000, L"" }, // ISCII Telugu + /* 132 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 57006, "x-iscii-as,xisciias,", 00000, L"" }, // ISCII Assamese + /* 133 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 57007, "x-iscii-or,xisciior,", 00000, L"" }, // ISCII Oriya + /* 134 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 57008, "x-iscii-ka,xisciika,", 00000, L"" }, // ISCII Kannada + /* 135 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 57009, "x-iscii-ma,xisciima,", 00000, L"" }, // ISCII Malayalam + /* 136 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 57010, "x-iscii-gu,xisciigu,", 00000, L"" }, // ISCII Gujarathi + /* 137 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 57011, "x-iscii-pa,xisciipa,", 00000, L"" }, // ISCII Panjabi +}; + +extern "C" int Encoding_CountOf() +{ + return ARRAYSIZE(g_Encodings); +} +//============================================================================= + + + // ============================================================================ -int __fastcall MapEncoding2CPI(const Encoding& encoding) +static int __fastcall MapCPI2Encoding(const int iNP3Encoding) +{ + if ((iNP3Encoding < 0) || (iNP3Encoding >= Encoding_CountOf())) { + return UNKNOWN_ENCODING; // CPI_NONE, CPI_GET + } + + int const iCED = g_Encodings[iNP3Encoding].iCEDEncoding; + + return ((iCED != CED_NO_MAPPING) ? iCED : UNKNOWN_ENCODING); +} +// ============================================================================ + + + +static int __fastcall MapEncoding2CPI(const Encoding& encoding, bool* pIsReliable) { int iNP3Encoding = CPI_NONE; - // map corresponding ID of global 'g_Encodings' - - switch (encoding) { - case ISO_8859_1: - iNP3Encoding = 65; - break; - case ISO_8859_2: - iNP3Encoding = 17; - break; - case ISO_8859_3: - iNP3Encoding = 49; - break; - case ISO_8859_4: - iNP3Encoding = 14; - break; - case ISO_8859_5: - iNP3Encoding = 26; - break; - case ISO_8859_6: - iNP3Encoding = 10; - break; - case ISO_8859_7: - iNP3Encoding = 34; - break; - case ISO_8859_8: - iNP3Encoding = 40; - break; - case ISO_8859_9: - iNP3Encoding = 59; - break; - case ISO_8859_10: - iNP3Encoding = CPI_NONE; // ??? - break; - case JAPANESE_EUC_JP: - iNP3Encoding = 73; - break; - case JAPANESE_SHIFT_JIS: - iNP3Encoding = 46; - break; - case JAPANESE_JIS: - iNP3Encoding = 77; // ??? - break; - case CHINESE_BIG5: - iNP3Encoding = 22; - break; - case CHINESE_GB: - iNP3Encoding = 20; - break; - case CHINESE_EUC_CN: - iNP3Encoding = CPI_NONE; - break; - case KOREAN_EUC_KR: - iNP3Encoding = 74; - break; - case UNICODE: - iNP3Encoding = 4; - break; - case CHINESE_EUC_DEC: - iNP3Encoding = CPI_NONE; - break; - case CHINESE_CNS: - iNP3Encoding = 79; - break; - case CHINESE_BIG5_CP950: - iNP3Encoding = 22; - break; - case JAPANESE_CP932: - iNP3Encoding = 46; - break; - case UTF8: - iNP3Encoding = CPI_UTF8; - break; - case ASCII_7BIT: + if ((encoding == ISO_8859_1) || (encoding == ASCII_7BIT)) { + // ASCII -> ANSI default iNP3Encoding = CPI_ANSI_DEFAULT; - break; - case RUSSIAN_KOI8_R: - iNP3Encoding = 27; - break; - case RUSSIAN_CP1251: - iNP3Encoding = 30; - break; - case MSFT_CP1252: - iNP3Encoding = 67; - break; - case RUSSIAN_KOI8_RU: - iNP3Encoding = 28; - break; - case MSFT_CP1250: - iNP3Encoding = 19; - break; - case ISO_8859_15: - iNP3Encoding = 50; - break; - case MSFT_CP1254: - iNP3Encoding = 61; - break; - case MSFT_CP1257: - iNP3Encoding = 15; - break; - case ISO_8859_11: - iNP3Encoding = CPI_NONE; - break; - case MSFT_CP874: - iNP3Encoding = 57; - break; - case MSFT_CP1256: - iNP3Encoding = 12; - break; - case MSFT_CP1255: - iNP3Encoding = 42; - break; - case ISO_8859_8_I: - iNP3Encoding = 39; - break; - case HEBREW_VISUAL: - iNP3Encoding = 40; - break; - case CZECH_CP852: - iNP3Encoding = 16; - break; - case CZECH_CSN_369103: - iNP3Encoding = CPI_NONE; // ??? - break; - case MSFT_CP1253: - iNP3Encoding = 36; - break; - case RUSSIAN_CP866: - iNP3Encoding = 25; - break; - case ISO_8859_13: - iNP3Encoding = 31; - break; - case ISO_2022_KR: - iNP3Encoding = 78; - break; - case GBK: - iNP3Encoding = 20; - break; - case GB18030: - iNP3Encoding = 72; - break; - case BIG5_HKSCS: - iNP3Encoding = 22; - break; - case ISO_2022_CN: - iNP3Encoding = 75; - break; - case TSCII: - iNP3Encoding = CPI_NONE; // ??? - break; - case TAMIL_MONO: - iNP3Encoding = CPI_NONE; // ??? - break; - case TAMIL_BI: - iNP3Encoding = CPI_NONE; // ??? - break; - case JAGRAN: - iNP3Encoding = CPI_NONE; // ??? - break; - case MACINTOSH_ROMAN: - iNP3Encoding = 55; - break; - case UTF7: - iNP3Encoding = CPI_UTF7; - break; - case BHASKAR: - iNP3Encoding = CPI_NONE; // ??? - break; - case HTCHANAKYA: - iNP3Encoding = CPI_NONE; // ??? - break; - case UTF16BE: - iNP3Encoding = CPI_UNICODEBE; - break; - case UTF16LE: - iNP3Encoding = CPI_UNICODE; - break; - case UTF32BE: - iNP3Encoding = CPI_UTF32BE; - break; - case UTF32LE: - iNP3Encoding = CPI_UTF32; - break; - case BINARYENC: - iNP3Encoding = CPI_NONE; // ??? - break; - case HZ_GB_2312: - iNP3Encoding = 76; - break; - case UTF8UTF8: - iNP3Encoding = CPI_UTF8; - break; - case TAM_ELANGO: - iNP3Encoding = CPI_NONE; // ??? - break; - case TAM_LTTMBARANI: - iNP3Encoding = CPI_NONE; // ??? - break; - case TAM_SHREE: - iNP3Encoding = CPI_NONE; // ??? - break; - case TAM_TBOOMIS: - iNP3Encoding = CPI_NONE; // ??? - break; - case TAM_TMNEWS: - iNP3Encoding = CPI_NONE; // ??? - break; - case TAM_WEBTAMIL: - iNP3Encoding = CPI_NONE; // ??? - break; - case KDDI_SHIFT_JIS: - iNP3Encoding = 46; // ??? - break; - case DOCOMO_SHIFT_JIS: - iNP3Encoding = 46; // ??? - break; - case SOFTBANK_SHIFT_JIS: - iNP3Encoding = 46; // ??? - break; - case KDDI_ISO_2022_JP: - iNP3Encoding = 77; - break; - case SOFTBANK_ISO_2022_JP: - iNP3Encoding = 77; // ??? - break; + } + else { + for (int i = 0; i < Encoding_CountOf(); ++i) { + if (encoding == g_Encodings[i].iCEDEncoding) { + iNP3Encoding = i; + break; + } + } + } - case UNKNOWN_ENCODING: - default: - iNP3Encoding = CPI_NONE; - break; + // not found, guess a mapping: + if (iNP3Encoding == CPI_NONE) + { + switch (encoding) { + + case UNICODE: + iNP3Encoding = CPI_UNICODE; + break; + case UTF8UTF8: + iNP3Encoding = CPI_UTF8; + break; + case UTF32BE: + iNP3Encoding = CPI_UTF32BE; + break; + case UTF32LE: + iNP3Encoding = CPI_UTF32; + break; + + case ISO_8859_10: + iNP3Encoding = CPI_NONE; + break; + case ISO_8859_11: + iNP3Encoding = CPI_NONE; // latin-thai + break; + + case BIG5_HKSCS: + case CHINESE_BIG5: + iNP3Encoding = 22; + break; + case CHINESE_EUC_CN: + case CHINESE_EUC_DEC: + *pIsReliable = false; + case CHINESE_GB: + iNP3Encoding = 20; + break; + + case JAPANESE_SHIFT_JIS: + iNP3Encoding = 46; + break; + case KDDI_SHIFT_JIS: + iNP3Encoding = 46; + break; + case DOCOMO_SHIFT_JIS: + iNP3Encoding = 46; + break; + case SOFTBANK_SHIFT_JIS: + iNP3Encoding = 46; + break; + + case JAPANESE_JIS: + iNP3Encoding = 77; + break; + case SOFTBANK_ISO_2022_JP: + iNP3Encoding = 77; + break; + + + case CZECH_CSN_369103: + case TSCII: + case TAMIL_MONO: + case TAMIL_BI: + case JAGRAN: + case BHASKAR: + case HTCHANAKYA: + case BINARYENC: + case TAM_ELANGO: + case TAM_LTTMBARANI: + case TAM_SHREE: + case TAM_TBOOMIS: + case TAM_TMNEWS: + case TAM_WEBTAMIL: + + case UNKNOWN_ENCODING: + default: + iNP3Encoding = CPI_NONE; + *pIsReliable = false; + break; + } } return iNP3Encoding; } // ============================================================================ +extern "C" void ChangeEncodingCodePage(int cpi, UINT newCP) +{ + int iCED = MapCPI2Encoding(cpi); + g_Encodings[cpi].uCodePage = newCP; + g_Encodings[cpi].iCEDEncoding = iCED; +} +// ============================================================================ -extern "C" int Encoding_Analyze(const char* const text, const size_t len, bool* pIsReliable) +extern "C" int Encoding_Analyze(const char* const text, const size_t len, const int encodingHint, bool* pIsReliable) { int bytes_consumed; Encoding encoding = CompactEncDet::DetectEncoding( text, static_cast(len), nullptr, nullptr, nullptr, - UNKNOWN_ENCODING, + MapCPI2Encoding(encodingHint), UNKNOWN_LANGUAGE, CompactEncDet::WEB_CORPUS, false, &bytes_consumed, pIsReliable); - return MapEncoding2CPI(encoding); + return MapEncoding2CPI(encoding, pIsReliable); } // ============================================================================ +