Merge branch 'Dev_Enhancements' into Dev_TinyExpr

This commit is contained in:
Rainer Kottenhoff 2018-08-09 15:53:29 +02:00
commit faee74bd44
4 changed files with 401 additions and 505 deletions

View File

@ -1023,32 +1023,36 @@ bool EditLoadFile(
size_t const cbNbytes4Analysis = (cbData < 200000L) ? cbData : 200000L;
// if not skipped, analyze bytes
bool bIsReliable = false;
int const iAnalyzedEncoding = (bSkipANSICPDetection && !g_bForceCompEncDetection) ? CPI_NONE :
Encoding_Analyze(lpData, cbNbytes4Analysis, &bIsReliable);
int const iFileEncWeak = Encoding_SrcWeak(CPI_GET);
int iPreferedEncoding = (bPreferOEM) ? g_DOSEncoding :
((bUseDefaultForFileEncoding || (cbNbytes4Analysis < 1)) ? g_iDefaultNewFileEncoding : CPI_ANSI_DEFAULT);
// --------------------------------------------------------------------------
bool bIsReliable = false;
int iAnalyzedEncoding = (bSkipANSICPDetection && !g_bForceCompEncDetection) ? CPI_NONE :
Encoding_Analyze(lpData, cbNbytes4Analysis, iPreferedEncoding, &bIsReliable);
// correct analysis based on preferred encoding
if (iAnalyzedEncoding == CPI_ANSI_DEFAULT) {
iAnalyzedEncoding = iPreferedEncoding; // stay on prefered
}
// --------------------------------------------------------------------------
int iForcedEncoding = bLoadASCIIasUTF8 ? CPI_UTF8 : Encoding_SrcCmdLn(CPI_GET);
if (g_bForceCompEncDetection && !Encoding_IsNONE(iAnalyzedEncoding) && bIsReliable) {
iForcedEncoding = iAnalyzedEncoding;
}
// --------------------------------------------------------------------------
// choose best encoding guess
int iPreferedEncoding = CPI_NONE;
if (!Encoding_IsNONE(iForcedEncoding))
iPreferedEncoding = iForcedEncoding;
else if (iFileEncWeak != CPI_NONE)
iPreferedEncoding = iFileEncWeak;
else if (Encoding_IsUNICODE(iAnalyzedEncoding) && !bSkipUTFDetection)
iPreferedEncoding = iAnalyzedEncoding;
else if (iFileEncWeak != CPI_NONE)
iPreferedEncoding = iFileEncWeak;
else if (!Encoding_IsNONE(iAnalyzedEncoding))
iPreferedEncoding = iAnalyzedEncoding;
else
iPreferedEncoding = (bPreferOEM) ? g_DOSEncoding :
(bUseDefaultForFileEncoding ? g_iDefaultNewFileEncoding : CPI_ANSI_DEFAULT);
bool bBOM = false;

View File

@ -40,240 +40,6 @@
extern HMODULE g_hLngResContainer;
//=============================================================================
#define ENC_PARSE_NAM_ANSI "ansi,system,ascii,"
#define ENC_PARSE_NAM_OEM "oem,oem,"
#define ENC_PARSE_NAM_UTF16LEBOM ""
#define ENC_PARSE_NAM_UTF16BEBOM ""
#define ENC_PARSE_NAM_UTF16LE "utf-16,utf16,unicode,"
#define ENC_PARSE_NAM_UTF16BE "utf-16be,utf16be,unicodebe,"
#define ENC_PARSE_NAM_UTF8 "utf-8,utf8,"
#define ENC_PARSE_NAM_UTF8SIG "utf-8,utf8,"
#define ENC_PARSE_NAM_UTF7 "utf-7,utf7,"
#define ENC_PARSE_NAM_DOS_720 "DOS-720,dos720,"
#define ENC_PARSE_NAM_ISO_8859_6 "iso-8859-6,iso88596,arabic,csisolatinarabic,ecma114,isoir127,"
#define ENC_PARSE_NAM_MAC_ARABIC "x-mac-arabic,xmacarabic,"
#define ENC_PARSE_NAM_WIN_1256 "windows-1256,windows1256,cp1256"
#define ENC_PARSE_NAM_DOS_775 "ibm775,ibm775,cp500,"
#define ENC_PARSE_NAM_ISO_8859_4 "iso-8859-4,iso88594,csisolatin4,isoir110,l4,latin4,"
#define ENC_PARSE_NAM_WIN_1257 "windows-1257,windows1257,"
#define ENC_PARSE_NAM_DOS_852 "ibm852,ibm852,cp852,"
#define ENC_PARSE_NAM_ISO_8859_2 "iso-8859-2,iso88592,csisolatin2,isoir101,latin2,l2,"
#define ENC_PARSE_NAM_MAC_CENTRAL_EUROP "x-mac-ce,xmacce,"
#define ENC_PARSE_NAM_WIN_1250 "windows-1250,windows1250,xcp1250,"
#define ENC_PARSE_NAM_GBK_2312 "gb2312,gb2312,chinese,cngb,csgb2312,csgb231280,gb231280,gbk,"
#define ENC_PARSE_NAM_MAC_ZH_CN "x-mac-chinesesimp,xmacchinesesimp,"
#define ENC_PARSE_NAM_BIG5 "big5,big5,cnbig5,csbig5,xxbig5,"
#define ENC_PARSE_NAM_MAC_ZH_TW "x-mac-chinesetrad,xmacchinesetrad,"
#define ENC_PARSE_NAM_MAC_CROATIAN "x-mac-croatian,xmaccroatian,"
#define ENC_PARSE_NAM_DOS_866 "cp866,cp866,ibm866,"
#define ENC_PARSE_NAM_ISO_8859_5 "iso-8859-5,iso88595,csisolatin5,csisolatincyrillic,cyrillic,isoir144,"
#define ENC_PARSE_NAM_KOI8_R "koi8-r,koi8r,cskoi8r,koi,koi8,"
#define ENC_PARSE_NAM_KOI8_U "koi8-u,koi8u,koi8ru,"
#define ENC_PARSE_NAM_MAC_CYRILLIC "x-mac-cyrillic,xmaccyrillic,"
#define ENC_PARSE_NAM_WIN_1251 "windows-1251,windows1251,xcp1251,"
#define ENC_PARSE_NAM_ISO_8859_13 "iso-8859-13,iso885913,"
#define ENC_PARSE_NAM_DOS_863 "ibm863,ibm863,"
#define ENC_PARSE_NAM_DOS_737 "ibm737,ibm737,"
#define ENC_PARSE_NAM_ISO_8859_7 "iso-8859-7,iso88597,csisolatingreek,ecma118,elot928,greek,greek8,isoir126,"
#define ENC_PARSE_NAM_MAC_GREEK "x-mac-greek,xmacgreek,"
#define ENC_PARSE_NAM_WIN_1253 "windows-1253,windows1253,"
#define ENC_PARSE_NAM_DOS_869 "ibm869,ibm869,"
#define ENC_PARSE_NAM_DOS_862 "DOS-862,dos862,"
#define ENC_PARSE_NAM_ISO_8859_8_I "iso-8859-8-i,iso88598i,logical,"
#define ENC_PARSE_NAM_ISO_8859_8 "iso-8859-8,iso88598,csisolatinhebrew,hebrew,isoir138,visual,"
#define ENC_PARSE_NAM_MAC_HEBREW "x-mac-hebrew,xmachebrew,"
#define ENC_PARSE_NAM_WIN_1255 "windows-1255,windows1255,"
#define ENC_PARSE_NAM_DOS_861 "ibm861,ibm861,"
#define ENC_PARSE_NAM_MAC_ICELANDIC "x-mac-icelandic,xmacicelandic,"
#define ENC_PARSE_NAM_MAC_JAPANESE "x-mac-japanese,xmacjapanese,"
#define ENC_PARSE_NAM_SHIFT_JIS "shift_jis,shiftjis,shiftjs,csshiftjis,cswindows31j,mskanji,xmscp932,xsjis,"
#define ENC_PARSE_NAM_MAC_KOREAN "x-mac-korean,xmackorean,"
#define ENC_PARSE_NAM_WIN_949 "windows-949,windows949,ksc56011987,csksc5601,euckr,isoir149,korean,ksc56011989"
#define ENC_PARSE_NAM_ISO_8859_3 "iso-8859-3,iso88593,latin3,isoir109,l3,"
#define ENC_PARSE_NAM_ISO_8859_15 "iso-8859-15,iso885915,latin9,l9,"
#define ENC_PARSE_NAM_DOS_865 "ibm865,ibm865,"
#define ENC_PARSE_NAM_DOS_437 "ibm437,ibm437,437,cp437,cspc8,codepage437,"
#define ENC_PARSE_NAM_DOS_858 "ibm858,ibm858,ibm00858,"
#define ENC_PARSE_NAM_DOS_860 "ibm860,ibm860,"
#define ENC_PARSE_NAM_MAC_ROMANIAN "x-mac-romanian,xmacromanian,"
#define ENC_PARSE_NAM_MAC_THAI "x-mac-thai,xmacthai,"
#define ENC_PARSE_NAM_WIN_874 "windows-874,windows874,dos874,iso885911,tis620,"
#define ENC_PARSE_NAM_DOS_857 "ibm857,ibm857,"
#define ENC_PARSE_NAM_ISO_8859_9 "iso-8859-9,iso88599,latin5,isoir148,l5,"
#define ENC_PARSE_NAM_MAC_TURKISH "x-mac-turkish,xmacturkish,"
#define ENC_PARSE_NAM_WIN_1254 "windows-1254,windows1254,"
#define ENC_PARSE_NAM_MAC_UKRAINIAN "x-mac-ukrainian,xmacukrainian,"
#define ENC_PARSE_NAM_WIN_1258 "windows-1258,windows-258,"
#define ENC_PARSE_NAM_DOS_850 "ibm850,ibm850,"
#define ENC_PARSE_NAM_ISO_8859_1 "iso-8859-1,iso88591,cp819,latin1,ibm819,isoir100,latin1,l1,"
#define ENC_PARSE_NAM_MAC_WESTERN_EUROP "macintosh,macintosh,"
#define ENC_PARSE_NAM_WIN_1252 "windows-1252,windows1252,cp367,cp819,ibm367,us,xansi,"
#define ENC_PARSE_NAM_IBM_EBCDIC_US "ebcdic-cp-us,ebcdiccpus,ebcdiccpca,ebcdiccpwt,ebcdiccpnl,ibm037,cp037,"
#define ENC_PARSE_NAM_IBM_EBCDIC_INT "x-ebcdic-international,xebcdicinternational,"
#define ENC_PARSE_NAM_IBM_EBCDIC_GR "x-EBCDIC-GreekModern,xebcdicgreekmodern,"
#define ENC_PARSE_NAM_IBM_EBCDIC_LAT_5 "CP1026,cp1026,csibm1026,ibm1026,"
#define ENC_PARSE_NAM_GB18030 "gb18030,gb18030,"
#define ENC_PARSE_NAM_EUC_JAPANESE "euc-jp,eucjp,xeuc,xeucjp,"
#define ENC_PARSE_NAM_EUC_KOREAN "euc-kr,euckr,cseuckr,"
#define ENC_PARSE_NAM_ISO_2022_CN "iso-2022-cn,iso2022cn,"
#define ENC_PARSE_NAM_HZ_GB2312 "hz-gb-2312,hzgb2312,hz,"
#define ENC_PARSE_NAM_ISO_2022_JP "iso-2022-jp,iso2022jp,"
#define ENC_PARSE_NAM_ISO_2022_KR "iso-2022-kr,iso2022kr,csiso2022kr,"
#define ENC_PARSE_NAM_X_CHINESE_CNS "x-Chinese-CNS,xchinesecns,"
//=============================================================================
static NP2ENCODING g_Encodings[] = {
/* 000 */{ NCP_ANSI | NCP_RECODE, CP_ACP, ENC_PARSE_NAM_ANSI, IDS_ENC_ANSI, L"" },
/* 001 */{ NCP_OEM | NCP_RECODE, CP_OEMCP, ENC_PARSE_NAM_OEM, IDS_ENC_OEM, L"" },
/* 002 */{ NCP_UNICODE | NCP_UNICODE_BOM, CP_UTF8, ENC_PARSE_NAM_UTF16LEBOM, IDS_ENC_UTF16LEBOM, L"" },
/* 003 */{ NCP_UNICODE | NCP_UNICODE_REVERSE | NCP_UNICODE_BOM, CP_UTF8, ENC_PARSE_NAM_UTF16BEBOM, IDS_ENC_UTF16BEBOM, L"" },
/* 004 */{ NCP_UNICODE | NCP_RECODE, CP_UTF8, ENC_PARSE_NAM_UTF16LE, IDS_ENC_UTF16LE, L"" },
/* 005 */{ NCP_UNICODE | NCP_UNICODE_REVERSE | NCP_RECODE, CP_UTF8, ENC_PARSE_NAM_UTF16BE, IDS_ENC_UTF16BE, L"" },
/* 006 */{ NCP_UTF8 | NCP_RECODE, CP_UTF8, ENC_PARSE_NAM_UTF8, IDS_ENC_UTF8, L"" },
/* 007 */{ NCP_UTF8 | NCP_UTF8_SIGN, CP_UTF8, ENC_PARSE_NAM_UTF8SIG, IDS_ENC_UTF8SIG, L"" },
/* 008 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, CP_UTF7, ENC_PARSE_NAM_UTF7, IDS_ENC_UTF7, L"" },
/* 009 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 720, ENC_PARSE_NAM_DOS_720, IDS_ENC_DOS_720, L"" },
/* 010 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 28596, ENC_PARSE_NAM_ISO_8859_6, IDS_ENC_ISO_8859_6, L"" },
/* 011 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10004, ENC_PARSE_NAM_MAC_ARABIC, IDS_ENC_MAC_ARABIC, L"" },
/* 012 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 1256, ENC_PARSE_NAM_WIN_1256, IDS_ENC_WIN_1256, L"" },
/* 013 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 775, ENC_PARSE_NAM_DOS_775, IDS_ENC_DOS_775, L"" },
/* 014 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 28594, ENC_PARSE_NAM_ISO_8859_4, IDS_ENC_ISO_8859_4, L"" },
/* 015 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 1257, ENC_PARSE_NAM_WIN_1257, IDS_ENC_WIN_1257, L"" },
/* 016 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 852, ENC_PARSE_NAM_DOS_852, IDS_ENC_DOS_852, L"" },
/* 017 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 28592, ENC_PARSE_NAM_ISO_8859_2, IDS_ENC_ISO_8859_2, L"" },
/* 018 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10029, ENC_PARSE_NAM_MAC_CENTRAL_EUROP, IDS_ENC_MAC_CENTRAL_EUROP, L"" },
/* 019 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 1250, ENC_PARSE_NAM_WIN_1250, IDS_ENC_WIN_1250, L"" },
/* 020 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 936, ENC_PARSE_NAM_GBK_2312, IDS_ENC_GBK_2312, L"" },
/* 021 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10008, ENC_PARSE_NAM_MAC_ZH_CN, IDS_ENC_MAC_ZH_CN, L"" },
/* 022 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 950, ENC_PARSE_NAM_BIG5, IDS_ENC_BIG5, L"" },
/* 023 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10002, ENC_PARSE_NAM_MAC_ZH_TW, IDS_ENC_MAC_ZH_TW, L"" },
/* 024 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10082, ENC_PARSE_NAM_MAC_CROATIAN, IDS_ENC_MAC_CROATIAN, L"" },
/* 025 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 866, ENC_PARSE_NAM_DOS_866, IDS_ENC_DOS_866, L"" },
/* 026 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 28595, ENC_PARSE_NAM_ISO_8859_5, IDS_ENC_ISO_8859_5, L"" },
/* 027 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 20866, ENC_PARSE_NAM_KOI8_R, IDS_ENC_KOI8_R, L"" },
/* 028 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 21866, ENC_PARSE_NAM_KOI8_U, IDS_ENC_KOI8_U, L"" },
/* 029 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10007, ENC_PARSE_NAM_MAC_CYRILLIC, IDS_ENC_MAC_CYRILLIC, L"" },
/* 030 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 1251, ENC_PARSE_NAM_WIN_1251, IDS_ENC_WIN_1251, L"" },
/* 031 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 28603, ENC_PARSE_NAM_ISO_8859_13, IDS_ENC_ISO_8859_13, L"" },
/* 032 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 863, ENC_PARSE_NAM_DOS_863, IDS_ENC_DOS_863, L"" },
/* 033 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 737, ENC_PARSE_NAM_DOS_737, IDS_ENC_DOS_737, L"" },
/* 034 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 28597, ENC_PARSE_NAM_ISO_8859_7, IDS_ENC_ISO_8859_7, L"" },
/* 035 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10006, ENC_PARSE_NAM_MAC_GREEK, IDS_ENC_MAC_GREEK, L"" },
/* 036 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 1253, ENC_PARSE_NAM_WIN_1253, IDS_ENC_WIN_1253, L"" },
/* 037 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 869, ENC_PARSE_NAM_DOS_869, IDS_ENC_DOS_869, L"" },
/* 038 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 862, ENC_PARSE_NAM_DOS_862, IDS_ENC_DOS_862, L"" },
/* 039 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 38598, ENC_PARSE_NAM_ISO_8859_8_I, IDS_ENC_ISO_8859_8_I, L"" },
/* 040 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 28598, ENC_PARSE_NAM_ISO_8859_8, IDS_ENC_ISO_8859_8, L"" },
/* 041 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10005, ENC_PARSE_NAM_MAC_HEBREW, IDS_ENC_MAC_HEBREW, L"" },
/* 042 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 1255, ENC_PARSE_NAM_WIN_1255, IDS_ENC_WIN_1255, L"" },
/* 043 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 861, ENC_PARSE_NAM_DOS_861, IDS_ENC_DOS_861, L"" },
/* 044 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10079, ENC_PARSE_NAM_MAC_ICELANDIC, IDS_ENC_MAC_ICELANDIC, L"" },
/* 045 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10001, ENC_PARSE_NAM_MAC_JAPANESE, IDS_ENC_MAC_JAPANESE, L"" },
/* 046 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 932, ENC_PARSE_NAM_SHIFT_JIS, IDS_ENC_SHIFT_JIS, L"" },
/* 047 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10003, ENC_PARSE_NAM_MAC_KOREAN, IDS_ENC_MAC_KOREAN, L"" },
/* 048 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 949, ENC_PARSE_NAM_WIN_949, IDS_ENC_WIN_949, L"" },
/* 049 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 28593, ENC_PARSE_NAM_ISO_8859_3, IDS_ENC_ISO_8859_3, L"" },
/* 050 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 28605, ENC_PARSE_NAM_ISO_8859_15, IDS_ENC_ISO_8859_15, L"" },
/* 051 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 865, ENC_PARSE_NAM_DOS_865, IDS_ENC_DOS_865, L"" },
/* 052 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 437, ENC_PARSE_NAM_DOS_437, IDS_ENC_DOS_437, L"" },
/* 053 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 858, ENC_PARSE_NAM_DOS_858, IDS_ENC_DOS_858, L"" },
/* 054 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 860, ENC_PARSE_NAM_DOS_860, IDS_ENC_DOS_860, L"" },
/* 055 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10010, ENC_PARSE_NAM_MAC_ROMANIAN, IDS_ENC_MAC_ROMANIAN, L"" },
/* 056 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10021, ENC_PARSE_NAM_MAC_THAI, IDS_ENC_MAC_THAI, L"" },
/* 057 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 874, ENC_PARSE_NAM_WIN_874, IDS_ENC_WIN_874, L"" },
/* 058 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 857, ENC_PARSE_NAM_DOS_857, IDS_ENC_DOS_857, L"" },
/* 059 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 28599, ENC_PARSE_NAM_ISO_8859_9, IDS_ENC_ISO_8859_9, L"" },
/* 060 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10081, ENC_PARSE_NAM_MAC_TURKISH, IDS_ENC_MAC_TURKISH, L"" },
/* 061 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 1254, ENC_PARSE_NAM_WIN_1254, IDS_ENC_WIN_1254, L"" },
/* 062 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10017, ENC_PARSE_NAM_MAC_UKRAINIAN, IDS_ENC_MAC_UKRAINIAN, L"" },
/* 063 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 1258, ENC_PARSE_NAM_WIN_1258, IDS_ENC_WIN_1258, L"" },
/* 064 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 850, ENC_PARSE_NAM_DOS_850, IDS_ENC_DOS_850, L"" },
/* 065 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 28591, ENC_PARSE_NAM_ISO_8859_1, IDS_ENC_ISO_8859_1, L"" },
/* 066 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10000, ENC_PARSE_NAM_MAC_WESTERN_EUROP, IDS_ENC_MAC_WESTERN_EUROP, L"" },
/* 067 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 1252, ENC_PARSE_NAM_WIN_1252, IDS_ENC_WIN_1252, L"" },
/* 068 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 37, ENC_PARSE_NAM_IBM_EBCDIC_US, IDS_ENC_IBM_EBCDIC_US, L"" },
/* 069 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 500, ENC_PARSE_NAM_IBM_EBCDIC_INT, IDS_ENC_IBM_EBCDIC_INT, L"" },
/* 070 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 875, ENC_PARSE_NAM_IBM_EBCDIC_GR, IDS_ENC_IBM_EBCDIC_GR, L"" },
/* 071 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 1026, ENC_PARSE_NAM_IBM_EBCDIC_LAT_5, IDS_ENC_IBM_EBCDIC_LAT_5, L"" },
/* 072 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 54936, ENC_PARSE_NAM_GB18030, IDS_ENC_GB18030, L"" }, // Chinese Simplified (GB18030)
/* 073 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 51932, ENC_PARSE_NAM_EUC_JAPANESE, IDS_ENC_EUC_JAPANESE, L"" }, // Japanese (EUC)
/* 074 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 51949, ENC_PARSE_NAM_EUC_KOREAN, IDS_ENC_EUC_KOREAN, L"" }, // Korean (EUC)
/* 075 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 50229, ENC_PARSE_NAM_ISO_2022_CN, IDS_ENC_ISO_2022_CN, L"" }, // Chinese Traditional (ISO-2022-CN)
/* 076 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 52936, ENC_PARSE_NAM_HZ_GB2312, IDS_ENC_HZ_GB2312, L"" }, // Chinese Simplified (HZ-GB2312)
/* 077 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 50220, ENC_PARSE_NAM_ISO_2022_JP, IDS_ENC_ISO_2022_JP, L"" }, // Japanese (JIS)
/* 078 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 50225, ENC_PARSE_NAM_ISO_2022_KR, IDS_ENC_ISO_2022_KR, L"" }, // Korean (ISO-2022-KR)
/* 079 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 20000, ENC_PARSE_NAM_X_CHINESE_CNS, IDS_ENC_X_CHINESE_CNS, L"" } // Chinese Traditional (CNS)
/* 073 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 870, "CP870,cp870,ebcdiccproece,ebcdiccpyu,csibm870,ibm870,", 00000, L"" }, // IBM EBCDIC (Multilingual Latin-2)
/* 074 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 1047, "IBM01047,ibm01047,", 00000, L"" }, // IBM EBCDIC (Open System Latin-1)
/* 075 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 1140, "x-ebcdic-cp-us-euro,xebcdiccpuseuro,", 00000, L"" }, // IBM EBCDIC (US-Canada-Euro)
/* 076 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 1141, "x-ebcdic-germany-euro,xebcdicgermanyeuro,", 00000, L"" }, // IBM EBCDIC (Germany-Euro)
/* 077 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 1142, "x-ebcdic-denmarknorway-euro,xebcdicdenmarknorwayeuro,", 00000, L"" }, // IBM EBCDIC (Denmark-Norway-Euro)
/* 078 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 1143, "x-ebcdic-finlandsweden-euro,xebcdicfinlandswedeneuro,", 00000, L"" }, // IBM EBCDIC (Finland-Sweden-Euro)
/* 079 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 1144, "x-ebcdic-italy-euro,xebcdicitalyeuro,", 00000, L"" }, // IBM EBCDIC (Italy-Euro)
/* 080 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 1145, "x-ebcdic-spain-euro,xebcdicspaineuro,", 00000, L"" }, // IBM EBCDIC (Spain-Latin America-Euro)
/* 081 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 1146, "x-ebcdic-uk-euro,xebcdicukeuro,", 00000, L"" }, // IBM EBCDIC (UK-Euro)
/* 082 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 1147, "x-ebcdic-france-euro,xebcdicfranceeuro,", 00000, L"" }, // IBM EBCDIC (France-Euro)
/* 083 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 1148, "x-ebcdic-international-euro,xebcdicinternationaleuro,", 00000, L"" }, // IBM EBCDIC (International-Euro)
/* 084 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 1149, "x-ebcdic-icelandic-euro,xebcdicicelandiceuro,", 00000, L"" }, // IBM EBCDIC (Icelandic-Euro)
/* 085 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 1361, "johab,johab,", 00000, L"" }, // Korean (Johab)
/* 086 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20273, "x-EBCDIC-Germany,xebcdicgermany,", 00000, L"" }, // IBM EBCDIC (Germany)
/* 087 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20277, "x-EBCDIC-DenmarkNorway,xebcdicdenmarknorway,ebcdiccpdk,ebcdiccpno,", 00000, L"" }, // IBM EBCDIC (Denmark-Norway)
/* 088 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20278, "x-EBCDIC-FinlandSweden,xebcdicfinlandsweden,ebcdicpfi,ebcdiccpse,", 00000, L"" }, // IBM EBCDIC (Finland-Sweden)
/* 089 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20280, "x-EBCDIC-Italy,xebcdicitaly,", 00000, L"" }, // IBM EBCDIC (Italy)
/* 090 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20284, "x-EBCDIC-Spain,xebcdicspain,ebcdiccpes,", 00000, L"" }, // IBM EBCDIC (Spain-Latin America)
/* 091 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20285, "x-EBCDIC-UK,xebcdicuk,ebcdiccpgb,", 00000, L"" }, // IBM EBCDIC (UK)
/* 092 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20290, "x-EBCDIC-JapaneseKatakana,xebcdicjapanesekatakana,", 00000, L"" }, // IBM EBCDIC (Japanese Katakana)
/* 093 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20297, "x-EBCDIC-France,xebcdicfrance,ebcdiccpfr,", 00000, L"" }, // IBM EBCDIC (France)
/* 094 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20420, "x-EBCDIC-Arabic,xebcdicarabic,ebcdiccpar1,", 00000, L"" }, // IBM EBCDIC (Arabic)
/* 095 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20423, "x-EBCDIC-Greek,xebcdicgreek,ebcdiccpgr,", 00000, L"" }, // IBM EBCDIC (Greek)
/* 096 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20424, "x-EBCDIC-Hebrew,xebcdichebrew,ebcdiccphe,", 00000, L"" }, // IBM EBCDIC (Hebrew)
/* 097 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20833, "x-EBCDIC-KoreanExtended,xebcdickoreanextended,", 00000, L"" }, // IBM EBCDIC (Korean Extended)
/* 098 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20838, "x-EBCDIC-Thai,xebcdicthai,ibmthai,csibmthai,", 00000, L"" }, // IBM EBCDIC (Thai)
/* 099 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20871, "x-EBCDIC-Icelandic,xebcdicicelandic,ebcdiccpis,", 00000, L"" }, // IBM EBCDIC (Icelandic)
/* 100 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20880, "x-EBCDIC-CyrillicRussian,xebcdiccyrillicrussian,ebcdiccyrillic,", 00000, L"" }, // IBM EBCDIC (Cyrillic Russian)
/* 101 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20905, "x-EBCDIC-Turkish,xebcdicturkish,ebcdiccptr,", 00000, L"" }, // IBM EBCDIC (Turkish)
/* 102 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20924, "IBM00924,ibm00924,ebcdiclatin9euro,", 00000, L"" }, // IBM EBCDIC (Open System-Euro Latin-1)
/* 103 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 21025, "x-EBCDIC-CyrillicSerbianBulgarian,xebcdiccyrillicserbianbulgarian,", 00000, L"" }, // IBM EBCDIC (Cyrillic Serbian-Bulgarian)
/* 104 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 50930, "x-EBCDIC-JapaneseAndKana,xebcdicjapaneseandkana,", 00000, L"" }, // IBM EBCDIC (Japanese and Japanese Katakana)
/* 105 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 50931, "x-EBCDIC-JapaneseAndUSCanada,xebcdicjapaneseanduscanada,", 00000, L"" }, // IBM EBCDIC (Japanese and US-Canada)
/* 106 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 50933, "x-EBCDIC-KoreanAndKoreanExtended,xebcdickoreanandkoreanextended,", 00000, L"" }, // IBM EBCDIC (Korean and Korean Extended)
/* 107 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 50935, "x-EBCDIC-SimplifiedChinese,xebcdicsimplifiedchinese,", 00000, L"" }, // IBM EBCDIC (Chinese Simplified)
/* 108 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 50937, "x-EBCDIC-TraditionalChinese,xebcdictraditionalchinese,", 00000, L"" }, // IBM EBCDIC (Chinese Traditional)
/* 109 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 50939, "x-EBCDIC-JapaneseAndJapaneseLatin,xebcdicjapaneseandjapaneselatin,", 00000, L"" }, // IBM EBCDIC (Japanese and Japanese-Latin)
/* 110 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20105, "x-IA5,xia5,", 00000, L"" }, // Western European (IA5)
/* 111 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20106, "x-IA5-German,xia5german,", 00000, L"" }, // German (IA5)
/* 112 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20107, "x-IA5-Swedish,xia5swedish,", 00000, L"" }, // Swedish (IA5)
/* 113 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20108, "x-IA5-Norwegian,xia5norwegian,", 00000, L"" }, // Norwegian (IA5)
/* 114 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20936, "x-cp20936,xcp20936,", 00000, L"" }, // Chinese Simplified (GB2312)
/* 115 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20932, "euc-jp,,", 00000, L"" }, // Japanese (JIS X 0208-1990 & 0212-1990)
/* 117 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 50221, "csISO2022JP,csiso2022jp,", 00000, L"" }, // Japanese (JIS-Allow 1 byte Kana)
/* 118 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 50222, "_iso-2022-jp$SIO,iso2022jpSIO,", 00000, L"" }, // Japanese (JIS-Allow 1 byte Kana - SO/SI)
/* 120 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 50227, "x-cp50227,xcp50227,", 00000, L"" }, // Chinese Simplified (ISO-2022)
/* 123 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20002, "x-Chinese-Eten,xchineseeten,", 00000, L"" }, // Chinese Traditional (Eten)
/* 125 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 51936, "euc-cn,euccn,xeuccn,", 00000, L"" }, // Chinese Simplified (EUC)
/* 128 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 57002, "x-iscii-de,xisciide,", 00000, L"" }, // ISCII Devanagari
/* 129 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 57003, "x-iscii-be,xisciibe,", 00000, L"" }, // ISCII Bengali
/* 130 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 57004, "x-iscii-ta,xisciita,", 00000, L"" }, // ISCII Tamil
/* 131 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 57005, "x-iscii-te,xisciite,", 00000, L"" }, // ISCII Telugu
/* 132 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 57006, "x-iscii-as,xisciias,", 00000, L"" }, // ISCII Assamese
/* 133 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 57007, "x-iscii-or,xisciior,", 00000, L"" }, // ISCII Oriya
/* 134 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 57008, "x-iscii-ka,xisciika,", 00000, L"" }, // ISCII Kannada
/* 135 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 57009, "x-iscii-ma,xisciima,", 00000, L"" }, // ISCII Malayalam
/* 136 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 57010, "x-iscii-gu,xisciigu,", 00000, L"" }, // ISCII Gujarathi
/* 137 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 57011, "x-iscii-pa,xisciipa,", 00000, L"" }, // ISCII Panjabi
};
int Encoding_CountOf()
{
return COUNTOF(g_Encodings);
}
//=============================================================================
//=============================================================================
//
// Encoding Helper Functions
@ -289,7 +55,8 @@ WCHAR wchOEM[16] = { L'\0' };
// ============================================================================
int Encoding_Current(int iEncoding) {
int Encoding_Current(int iEncoding)
{
static int CurrentEncoding = CPI_NONE;
if (iEncoding >= 0) {
@ -303,7 +70,8 @@ int Encoding_Current(int iEncoding) {
// ============================================================================
int Encoding_SrcCmdLn(int iSrcEncoding) {
int Encoding_SrcCmdLn(int iSrcEncoding)
{
static int SourceEncoding = CPI_NONE;
if (iSrcEncoding >= 0) {
@ -320,7 +88,8 @@ int Encoding_SrcCmdLn(int iSrcEncoding) {
// ============================================================================
int Encoding_SrcWeak(int iSrcWeakEnc) {
int Encoding_SrcWeak(int iSrcWeakEnc)
{
static int SourceWeakEncoding = CPI_NONE;
if (iSrcWeakEnc >= 0) {
@ -358,10 +127,10 @@ void Encoding_InitDefaults()
65001 // (UTF-8)
};
g_Encodings[CPI_ANSI_DEFAULT].uCodePage = GetACP(); // set ANSI system CP
ChangeEncodingCodePage(CPI_ANSI_DEFAULT, GetACP()); // set ANSI system CP
StringCchPrintf(wchANSI, COUNTOF(wchANSI), L" (CP-%u)", g_Encodings[CPI_ANSI_DEFAULT].uCodePage);
for (int i = CPI_UTF7 + 1; i < COUNTOF(g_Encodings); ++i) {
for (int i = CPI_UTF7 + 1; i < Encoding_CountOf(); ++i) {
if (Encoding_IsValid(i) && (g_Encodings[i].uCodePage == g_Encodings[CPI_ANSI_DEFAULT].uCodePage)) {
g_Encodings[i].uFlags |= NCP_ANSI;
if (g_Encodings[i].uFlags & NCP_EXTERNAL_8BIT)
@ -370,10 +139,10 @@ void Encoding_InitDefaults()
}
}
g_Encodings[CPI_OEM].uCodePage = GetOEMCP();
ChangeEncodingCodePage(CPI_OEM, GetOEMCP()); // set OEM system CP
StringCchPrintf(wchOEM, COUNTOF(wchOEM), L" (CP-%u)", g_Encodings[CPI_OEM].uCodePage);
for (int i = CPI_UTF7 + 1; i < COUNTOF(g_Encodings); ++i) {
for (int i = CPI_UTF7 + 1; i < Encoding_CountOf(); ++i) {
if (Encoding_IsValid(i) && (g_Encodings[i].uCodePage == g_Encodings[CPI_OEM].uCodePage)) {
g_Encodings[i].uFlags |= NCP_OEM;
if (g_Encodings[i].uFlags & NCP_EXTERNAL_8BIT)
@ -383,7 +152,7 @@ void Encoding_InitDefaults()
}
// multi byte character sets
for (int i = 0; i < COUNTOF(g_Encodings); ++i) {
for (int i = 0; i < Encoding_CountOf(); ++i) {
for (int k = 0; k < COUNTOF(uCodePageMBCS); k++) {
if (g_Encodings[i].uCodePage == uCodePageMBCS[k]) {
g_Encodings[i].uFlags |= NCP_MBCS;
@ -394,7 +163,7 @@ void Encoding_InitDefaults()
g_DOSEncoding = CPI_OEM;
// Try to set the DOS encoding to DOS-437 if the default OEMCP is not DOS-437
if (g_Encodings[g_DOSEncoding].uCodePage != 437) {
for (int i = CPI_UTF7 + 1; i < COUNTOF(g_Encodings); ++i) {
for (int i = CPI_UTF7 + 1; i < Encoding_CountOf(); ++i) {
if (Encoding_IsValid(i) && (g_Encodings[i].uCodePage == 437)) {
g_DOSEncoding = i;
break;
@ -420,7 +189,7 @@ int Encoding_MapIniSetting(bool bLoad, int iSetting) {
case 7: return CPI_UNICODEBE;
case 8: return CPI_UTF7;
default: {
for (int i = CPI_UTF7 + 1; i < COUNTOF(g_Encodings); i++) {
for (int i = CPI_UTF7 + 1; i < Encoding_CountOf(); i++) {
if ((g_Encodings[i].uCodePage == (UINT)iSetting) && Encoding_IsValid(i))
return(i);
}
@ -515,7 +284,7 @@ int Encoding_MatchA(char *pchTest) {
}
*pchDst++ = ',';
*pchDst = 0;
for (int i = 0; i < COUNTOF(g_Encodings); i++) {
for (int i = 0; i < Encoding_CountOf(); i++) {
if (StrStrIA(g_Encodings[i].pszParseNames, chTest)) {
CPINFO cpi;
if ((g_Encodings[i].uFlags & NCP_INTERNAL) ||
@ -532,7 +301,7 @@ int Encoding_MatchA(char *pchTest) {
int Encoding_GetByCodePage(UINT cp) {
for (int i = 0; i < COUNTOF(g_Encodings); i++) {
for (int i = 0; i < Encoding_CountOf(); i++) {
if (cp == g_Encodings[i].uCodePage) {
return i;
}
@ -544,7 +313,7 @@ int Encoding_GetByCodePage(UINT cp) {
bool Encoding_IsValid(int iTestEncoding) {
CPINFO cpi;
if ((iTestEncoding >= 0) && (iTestEncoding < COUNTOF(g_Encodings))) {
if ((iTestEncoding >= 0) && (iTestEncoding < Encoding_CountOf())) {
if ((g_Encodings[iTestEncoding].uFlags & NCP_INTERNAL) ||
IsValidCodePage(g_Encodings[iTestEncoding].uCodePage) &&
GetCPInfo(g_Encodings[iTestEncoding].uCodePage, &cpi)) {
@ -573,18 +342,18 @@ void Encoding_AddToListView(HWND hwnd, int idSel, bool bRecodeOnly) {
LVITEM lvi;
WCHAR wchBuf[256] = { L'\0' };
PENCODINGENTRY pEE = LocalAlloc(LPTR, COUNTOF(g_Encodings) * sizeof(ENCODINGENTRY));
for (i = 0; i < COUNTOF(g_Encodings); i++) {
PENCODINGENTRY pEE = LocalAlloc(LPTR, Encoding_CountOf() * sizeof(ENCODINGENTRY));
for (i = 0; i < Encoding_CountOf(); i++) {
pEE[i].id = i;
GetLngString(g_Encodings[i].idsName, pEE[i].wch, COUNTOF(pEE[i].wch));
}
qsort(pEE, COUNTOF(g_Encodings), sizeof(ENCODINGENTRY), CmpEncoding);
qsort(pEE, Encoding_CountOf(), sizeof(ENCODINGENTRY), CmpEncoding);
ZeroMemory(&lvi, sizeof(LVITEM));
lvi.mask = LVIF_PARAM | LVIF_TEXT | LVIF_IMAGE;
lvi.pszText = wchBuf;
for (i = 0; i < COUNTOF(g_Encodings); i++) {
for (i = 0; i < Encoding_CountOf(); i++) {
int id = pEE[i].id;
if (!bRecodeOnly || (g_Encodings[id].uFlags & NCP_RECODE)) {
@ -659,12 +428,12 @@ void Encoding_AddToComboboxEx(HWND hwnd, int idSel, bool bRecodeOnly) {
COMBOBOXEXITEM cbei;
WCHAR wchBuf[256] = { L'\0' };
PENCODINGENTRY pEE = LocalAlloc(LPTR, COUNTOF(g_Encodings) * sizeof(ENCODINGENTRY));
for (i = 0; i < COUNTOF(g_Encodings); i++) {
PENCODINGENTRY pEE = LocalAlloc(LPTR, Encoding_CountOf() * sizeof(ENCODINGENTRY));
for (i = 0; i < Encoding_CountOf(); i++) {
pEE[i].id = i;
GetLngString(g_Encodings[i].idsName, pEE[i].wch, COUNTOF(pEE[i].wch));
}
qsort(pEE, COUNTOF(g_Encodings), sizeof(ENCODINGENTRY), CmpEncoding);
qsort(pEE, Encoding_CountOf(), sizeof(ENCODINGENTRY), CmpEncoding);
ZeroMemory(&cbei, sizeof(COMBOBOXEXITEM));
cbei.mask = CBEIF_TEXT | CBEIF_IMAGE | CBEIF_SELECTEDIMAGE | CBEIF_LPARAM;
@ -673,7 +442,7 @@ void Encoding_AddToComboboxEx(HWND hwnd, int idSel, bool bRecodeOnly) {
cbei.iImage = 0;
cbei.iSelectedImage = 0;
for (i = 0; i < COUNTOF(g_Encodings); i++) {
for (i = 0; i < Encoding_CountOf(); i++) {
int id = pEE[i].id;
if (!bRecodeOnly || (g_Encodings[id].uFlags & NCP_RECODE)) {

View File

@ -50,8 +50,9 @@ extern bool g_bForceCompEncDetection;
#define NCP_EXTERNAL_8BIT 512
#define NCP_RECODE 1024
#define CPI_GET -2
#define CPI_NONE -1
#define CED_NO_MAPPING (-3)
#define CPI_GET (-2)
#define CPI_NONE (-1)
#define CPI_ANSI_DEFAULT 0
#define CPI_OEM 1
#define CPI_UNICODEBOM 2
@ -69,11 +70,13 @@ extern bool g_bForceCompEncDetection;
#define Encoding_IsNONE(enc) ((enc) == CPI_NONE)
typedef struct _np2encoding {
UINT uFlags;
UINT uCodePage;
char* pszParseNames;
int idsName;
int iCEDEncoding;
WCHAR wchLabel[64];
} NP2ENCODING;
@ -130,7 +133,10 @@ size_t UTF8_mbslen(LPCSTR utf8_string, size_t byte_length);
bool UTF8_ContainsInvalidChars(LPCSTR utf8_string, size_t byte_length);
// Google's "Compact Encoding Detection"
int Encoding_Analyze(const char* const text, const size_t len, bool* isReliable);
extern NP2ENCODING g_Encodings[];
int Encoding_CountOf();
void ChangeEncodingCodePage(int cpi, UINT newCP);
int Encoding_Analyze(const char* const text, const size_t len, const int encodingHint, bool* isReliable);
// --------------------------------------------------------------------------------------------------------------------------------

View File

@ -25,7 +25,7 @@
#define VC_EXTRALEAN 1
#include <windows.h>
#include "resource.h"
extern "C" {
#include "Encoding.h"
@ -33,265 +33,382 @@ extern "C" {
#include "compact_enc_det/compact_enc_det.h"
// Global settings...
//extern "C" g_Encodings;
//=============================================================================
#define ENC_PARSE_NAM_ANSI "ansi,system,ascii,"
#define ENC_PARSE_NAM_OEM "oem,oem,"
#define ENC_PARSE_NAM_UTF16LEBOM ""
#define ENC_PARSE_NAM_UTF16BEBOM ""
#define ENC_PARSE_NAM_UTF16LE "utf-16,utf16,unicode,"
#define ENC_PARSE_NAM_UTF16BE "utf-16be,utf16be,unicodebe,"
#define ENC_PARSE_NAM_UTF8 "utf-8,utf8,"
#define ENC_PARSE_NAM_UTF8SIG "utf-8,utf8,"
#define ENC_PARSE_NAM_UTF7 "utf-7,utf7,"
#define ENC_PARSE_NAM_DOS_720 "DOS-720,dos720,"
#define ENC_PARSE_NAM_ISO_8859_6 "iso-8859-6,iso88596,arabic,csisolatinarabic,ecma114,isoir127,"
#define ENC_PARSE_NAM_MAC_ARABIC "x-mac-arabic,xmacarabic,"
#define ENC_PARSE_NAM_WIN_1256 "windows-1256,windows1256,cp1256"
#define ENC_PARSE_NAM_DOS_775 "ibm775,ibm775,cp500,"
#define ENC_PARSE_NAM_ISO_8859_4 "iso-8859-4,iso88594,csisolatin4,isoir110,l4,latin4,"
#define ENC_PARSE_NAM_WIN_1257 "windows-1257,windows1257,"
#define ENC_PARSE_NAM_DOS_852 "ibm852,ibm852,cp852,"
#define ENC_PARSE_NAM_ISO_8859_2 "iso-8859-2,iso88592,csisolatin2,isoir101,latin2,l2,"
#define ENC_PARSE_NAM_MAC_CENTRAL_EUROP "x-mac-ce,xmacce,"
#define ENC_PARSE_NAM_WIN_1250 "windows-1250,windows1250,xcp1250,"
#define ENC_PARSE_NAM_GBK_2312 "gb2312,gb2312,chinese,cngb,csgb2312,csgb231280,gb231280,gbk,"
#define ENC_PARSE_NAM_MAC_ZH_CN "x-mac-chinesesimp,xmacchinesesimp,"
#define ENC_PARSE_NAM_BIG5 "big5,big5,cnbig5,csbig5,xxbig5,"
#define ENC_PARSE_NAM_MAC_ZH_TW "x-mac-chinesetrad,xmacchinesetrad,"
#define ENC_PARSE_NAM_MAC_CROATIAN "x-mac-croatian,xmaccroatian,"
#define ENC_PARSE_NAM_DOS_866 "cp866,cp866,ibm866,"
#define ENC_PARSE_NAM_ISO_8859_5 "iso-8859-5,iso88595,csisolatin5,csisolatincyrillic,cyrillic,isoir144,"
#define ENC_PARSE_NAM_KOI8_R "koi8-r,koi8r,cskoi8r,koi,koi8,"
#define ENC_PARSE_NAM_KOI8_U "koi8-u,koi8u,koi8ru,"
#define ENC_PARSE_NAM_MAC_CYRILLIC "x-mac-cyrillic,xmaccyrillic,"
#define ENC_PARSE_NAM_WIN_1251 "windows-1251,windows1251,xcp1251,"
#define ENC_PARSE_NAM_ISO_8859_13 "iso-8859-13,iso885913,"
#define ENC_PARSE_NAM_DOS_863 "ibm863,ibm863,"
#define ENC_PARSE_NAM_DOS_737 "ibm737,ibm737,"
#define ENC_PARSE_NAM_ISO_8859_7 "iso-8859-7,iso88597,csisolatingreek,ecma118,elot928,greek,greek8,isoir126,"
#define ENC_PARSE_NAM_MAC_GREEK "x-mac-greek,xmacgreek,"
#define ENC_PARSE_NAM_WIN_1253 "windows-1253,windows1253,"
#define ENC_PARSE_NAM_DOS_869 "ibm869,ibm869,"
#define ENC_PARSE_NAM_DOS_862 "DOS-862,dos862,"
#define ENC_PARSE_NAM_ISO_8859_8_I "iso-8859-8-i,iso88598i,logical,"
#define ENC_PARSE_NAM_ISO_8859_8 "iso-8859-8,iso88598,csisolatinhebrew,hebrew,isoir138,visual,"
#define ENC_PARSE_NAM_MAC_HEBREW "x-mac-hebrew,xmachebrew,"
#define ENC_PARSE_NAM_WIN_1255 "windows-1255,windows1255,"
#define ENC_PARSE_NAM_DOS_861 "ibm861,ibm861,"
#define ENC_PARSE_NAM_MAC_ICELANDIC "x-mac-icelandic,xmacicelandic,"
#define ENC_PARSE_NAM_MAC_JAPANESE "x-mac-japanese,xmacjapanese,"
#define ENC_PARSE_NAM_SHIFT_JIS "shift_jis,shiftjis,shiftjs,csshiftjis,cswindows31j,mskanji,xmscp932,xsjis,"
#define ENC_PARSE_NAM_MAC_KOREAN "x-mac-korean,xmackorean,"
#define ENC_PARSE_NAM_WIN_949 "windows-949,windows949,ksc56011987,csksc5601,euckr,isoir149,korean,ksc56011989"
#define ENC_PARSE_NAM_ISO_8859_3 "iso-8859-3,iso88593,latin3,isoir109,l3,"
#define ENC_PARSE_NAM_ISO_8859_15 "iso-8859-15,iso885915,latin9,l9,"
#define ENC_PARSE_NAM_DOS_865 "ibm865,ibm865,"
#define ENC_PARSE_NAM_DOS_437 "ibm437,ibm437,437,cp437,cspc8,codepage437,"
#define ENC_PARSE_NAM_DOS_858 "ibm858,ibm858,ibm00858,"
#define ENC_PARSE_NAM_DOS_860 "ibm860,ibm860,"
#define ENC_PARSE_NAM_MAC_ROMANIAN "x-mac-romanian,xmacromanian,"
#define ENC_PARSE_NAM_MAC_THAI "x-mac-thai,xmacthai,"
#define ENC_PARSE_NAM_WIN_874 "windows-874,windows874,dos874,iso885911,tis620,"
#define ENC_PARSE_NAM_DOS_857 "ibm857,ibm857,"
#define ENC_PARSE_NAM_ISO_8859_9 "iso-8859-9,iso88599,latin5,isoir148,l5,"
#define ENC_PARSE_NAM_MAC_TURKISH "x-mac-turkish,xmacturkish,"
#define ENC_PARSE_NAM_WIN_1254 "windows-1254,windows1254,"
#define ENC_PARSE_NAM_MAC_UKRAINIAN "x-mac-ukrainian,xmacukrainian,"
#define ENC_PARSE_NAM_WIN_1258 "windows-1258,windows-258,"
#define ENC_PARSE_NAM_DOS_850 "ibm850,ibm850,"
#define ENC_PARSE_NAM_ISO_8859_1 "iso-8859-1,iso88591,cp819,latin1,ibm819,isoir100,latin1,l1,"
#define ENC_PARSE_NAM_MAC_WESTERN_EUROP "macintosh,macintosh,"
#define ENC_PARSE_NAM_WIN_1252 "windows-1252,windows1252,cp367,cp819,ibm367,us,xansi,"
#define ENC_PARSE_NAM_IBM_EBCDIC_US "ebcdic-cp-us,ebcdiccpus,ebcdiccpca,ebcdiccpwt,ebcdiccpnl,ibm037,cp037,"
#define ENC_PARSE_NAM_IBM_EBCDIC_INT "x-ebcdic-international,xebcdicinternational,"
#define ENC_PARSE_NAM_IBM_EBCDIC_GR "x-EBCDIC-GreekModern,xebcdicgreekmodern,"
#define ENC_PARSE_NAM_IBM_EBCDIC_LAT_5 "CP1026,cp1026,csibm1026,ibm1026,"
#define ENC_PARSE_NAM_GB18030 "gb18030,gb18030,"
#define ENC_PARSE_NAM_EUC_JAPANESE "euc-jp,eucjp,xeuc,xeucjp,"
#define ENC_PARSE_NAM_EUC_KOREAN "euc-kr,euckr,cseuckr,"
#define ENC_PARSE_NAM_ISO_2022_CN "iso-2022-cn,iso2022cn,"
#define ENC_PARSE_NAM_HZ_GB2312 "hz-gb-2312,hzgb2312,hz,"
#define ENC_PARSE_NAM_ISO_2022_JP "iso-2022-jp,iso2022jp,"
#define ENC_PARSE_NAM_ISO_2022_KR "iso-2022-kr,iso2022kr,csiso2022kr,"
#define ENC_PARSE_NAM_X_CHINESE_CNS "x-Chinese-CNS,xchinesecns,"
//=============================================================================
extern "C" NP2ENCODING g_Encodings[] = {
/* 000 */{ NCP_ANSI | NCP_RECODE, CP_ACP, ENC_PARSE_NAM_ANSI, IDS_ENC_ANSI, CED_NO_MAPPING, L"" },
/* 001 */{ NCP_OEM | NCP_RECODE, CP_OEMCP, ENC_PARSE_NAM_OEM, IDS_ENC_OEM, CED_NO_MAPPING, L"" },
/* 002 */{ NCP_UNICODE | NCP_UNICODE_BOM, CP_UTF8, ENC_PARSE_NAM_UTF16LEBOM, IDS_ENC_UTF16LEBOM, CED_NO_MAPPING, L"" },
/* 003 */{ NCP_UNICODE | NCP_UNICODE_REVERSE | NCP_UNICODE_BOM, CP_UTF8, ENC_PARSE_NAM_UTF16BEBOM, IDS_ENC_UTF16BEBOM, CED_NO_MAPPING, L"" },
/* 004 */{ NCP_UNICODE | NCP_RECODE, CP_UTF8, ENC_PARSE_NAM_UTF16LE, IDS_ENC_UTF16LE, UTF16LE, L"" },
/* 005 */{ NCP_UNICODE | NCP_UNICODE_REVERSE | NCP_RECODE, CP_UTF8, ENC_PARSE_NAM_UTF16BE, IDS_ENC_UTF16BE, UTF16BE, L"" },
/* 006 */{ NCP_UTF8 | NCP_RECODE, CP_UTF8, ENC_PARSE_NAM_UTF8, IDS_ENC_UTF8, UTF8, L"" },
/* 007 */{ NCP_UTF8 | NCP_UTF8_SIGN, CP_UTF8, ENC_PARSE_NAM_UTF8SIG, IDS_ENC_UTF8SIG, CED_NO_MAPPING, L"" },
/* 008 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, CP_UTF7, ENC_PARSE_NAM_UTF7, IDS_ENC_UTF7, UTF7, L"" },
/* 009 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 720, ENC_PARSE_NAM_DOS_720, IDS_ENC_DOS_720, CED_NO_MAPPING, L"" },
/* 010 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 28596, ENC_PARSE_NAM_ISO_8859_6, IDS_ENC_ISO_8859_6, ISO_8859_6, L"" },
/* 011 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10004, ENC_PARSE_NAM_MAC_ARABIC, IDS_ENC_MAC_ARABIC, CED_NO_MAPPING, L"" },
/* 012 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 1256, ENC_PARSE_NAM_WIN_1256, IDS_ENC_WIN_1256, MSFT_CP1256, L"" },
/* 013 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 775, ENC_PARSE_NAM_DOS_775, IDS_ENC_DOS_775, CED_NO_MAPPING, L"" },
/* 014 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 28594, ENC_PARSE_NAM_ISO_8859_4, IDS_ENC_ISO_8859_4, ISO_8859_4, L"" },
/* 015 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 1257, ENC_PARSE_NAM_WIN_1257, IDS_ENC_WIN_1257, MSFT_CP1257, L"" },
/* 016 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 852, ENC_PARSE_NAM_DOS_852, IDS_ENC_DOS_852, CZECH_CP852, L"" },
/* 017 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 28592, ENC_PARSE_NAM_ISO_8859_2, IDS_ENC_ISO_8859_2, ISO_8859_2, L"" },
/* 018 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10029, ENC_PARSE_NAM_MAC_CENTRAL_EUROP, IDS_ENC_MAC_CENTRAL_EUROP, CED_NO_MAPPING, L"" },
/* 019 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 1250, ENC_PARSE_NAM_WIN_1250, IDS_ENC_WIN_1250, MSFT_CP1250, L"" },
/* 020 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 936, ENC_PARSE_NAM_GBK_2312, IDS_ENC_GBK_2312, GBK, L"" },
/* 021 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10008, ENC_PARSE_NAM_MAC_ZH_CN, IDS_ENC_MAC_ZH_CN, CED_NO_MAPPING, L"" },
/* 022 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 950, ENC_PARSE_NAM_BIG5, IDS_ENC_BIG5, CHINESE_BIG5_CP950, L"" },
/* 023 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10002, ENC_PARSE_NAM_MAC_ZH_TW, IDS_ENC_MAC_ZH_TW, CED_NO_MAPPING, L"" },
/* 024 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10082, ENC_PARSE_NAM_MAC_CROATIAN, IDS_ENC_MAC_CROATIAN, CED_NO_MAPPING, L"" },
/* 025 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 866, ENC_PARSE_NAM_DOS_866, IDS_ENC_DOS_866, RUSSIAN_CP866, L"" },
/* 026 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 28595, ENC_PARSE_NAM_ISO_8859_5, IDS_ENC_ISO_8859_5, ISO_8859_5, L"" },
/* 027 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 20866, ENC_PARSE_NAM_KOI8_R, IDS_ENC_KOI8_R, RUSSIAN_KOI8_R, L"" },
/* 028 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 21866, ENC_PARSE_NAM_KOI8_U, IDS_ENC_KOI8_U, RUSSIAN_KOI8_RU, L"" },
/* 029 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10007, ENC_PARSE_NAM_MAC_CYRILLIC, IDS_ENC_MAC_CYRILLIC, CED_NO_MAPPING, L"" },
/* 030 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 1251, ENC_PARSE_NAM_WIN_1251, IDS_ENC_WIN_1251, RUSSIAN_CP1251, L"" },
/* 031 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 28603, ENC_PARSE_NAM_ISO_8859_13, IDS_ENC_ISO_8859_13, ISO_8859_13, L"" },
/* 032 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 863, ENC_PARSE_NAM_DOS_863, IDS_ENC_DOS_863, CED_NO_MAPPING, L"" },
/* 033 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 737, ENC_PARSE_NAM_DOS_737, IDS_ENC_DOS_737, CED_NO_MAPPING, L"" },
/* 034 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 28597, ENC_PARSE_NAM_ISO_8859_7, IDS_ENC_ISO_8859_7, ISO_8859_7, L"" },
/* 035 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10006, ENC_PARSE_NAM_MAC_GREEK, IDS_ENC_MAC_GREEK, CED_NO_MAPPING, L"" },
/* 036 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 1253, ENC_PARSE_NAM_WIN_1253, IDS_ENC_WIN_1253, MSFT_CP1253, L"" },
/* 037 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 869, ENC_PARSE_NAM_DOS_869, IDS_ENC_DOS_869, CED_NO_MAPPING, L"" },
/* 038 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 862, ENC_PARSE_NAM_DOS_862, IDS_ENC_DOS_862, CED_NO_MAPPING, L"" },
/* 039 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 38598, ENC_PARSE_NAM_ISO_8859_8_I, IDS_ENC_ISO_8859_8_I, ISO_8859_8_I, L"" },
/* 040 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 28598, ENC_PARSE_NAM_ISO_8859_8, IDS_ENC_ISO_8859_8, ISO_8859_8, L"" },
/* 041 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10005, ENC_PARSE_NAM_MAC_HEBREW, IDS_ENC_MAC_HEBREW, CED_NO_MAPPING, L"" },
/* 042 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 1255, ENC_PARSE_NAM_WIN_1255, IDS_ENC_WIN_1255, MSFT_CP1255, L"" },
/* 043 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 861, ENC_PARSE_NAM_DOS_861, IDS_ENC_DOS_861, CED_NO_MAPPING, L"" },
/* 044 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10079, ENC_PARSE_NAM_MAC_ICELANDIC, IDS_ENC_MAC_ICELANDIC, CED_NO_MAPPING, L"" },
/* 045 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10001, ENC_PARSE_NAM_MAC_JAPANESE, IDS_ENC_MAC_JAPANESE, CED_NO_MAPPING, L"" },
/* 046 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 932, ENC_PARSE_NAM_SHIFT_JIS, IDS_ENC_SHIFT_JIS, JAPANESE_CP932, L"" },
/* 047 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10003, ENC_PARSE_NAM_MAC_KOREAN, IDS_ENC_MAC_KOREAN, CED_NO_MAPPING, L"" },
/* 048 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 949, ENC_PARSE_NAM_WIN_949, IDS_ENC_WIN_949, CED_NO_MAPPING, L"" },
/* 049 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 28593, ENC_PARSE_NAM_ISO_8859_3, IDS_ENC_ISO_8859_3, ISO_8859_3, L"" },
/* 050 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 28605, ENC_PARSE_NAM_ISO_8859_15, IDS_ENC_ISO_8859_15, ISO_8859_15, L"" },
/* 051 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 865, ENC_PARSE_NAM_DOS_865, IDS_ENC_DOS_865, CED_NO_MAPPING, L"" },
/* 052 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 437, ENC_PARSE_NAM_DOS_437, IDS_ENC_DOS_437, CED_NO_MAPPING, L"" },
/* 053 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 858, ENC_PARSE_NAM_DOS_858, IDS_ENC_DOS_858, CED_NO_MAPPING, L"" },
/* 054 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 860, ENC_PARSE_NAM_DOS_860, IDS_ENC_DOS_860, CED_NO_MAPPING, L"" },
/* 055 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10000, ENC_PARSE_NAM_MAC_WESTERN_EUROP, IDS_ENC_MAC_WESTERN_EUROP, MACINTOSH_ROMAN, L"" },
/* 056 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10021, ENC_PARSE_NAM_MAC_THAI, IDS_ENC_MAC_THAI, CED_NO_MAPPING, L"" },
/* 057 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 874, ENC_PARSE_NAM_WIN_874, IDS_ENC_WIN_874, MSFT_CP874, L"" },
/* 058 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 857, ENC_PARSE_NAM_DOS_857, IDS_ENC_DOS_857, CED_NO_MAPPING, L"" },
/* 059 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 28599, ENC_PARSE_NAM_ISO_8859_9, IDS_ENC_ISO_8859_9, ISO_8859_9, L"" },
/* 060 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10081, ENC_PARSE_NAM_MAC_TURKISH, IDS_ENC_MAC_TURKISH, CED_NO_MAPPING, L"" },
/* 061 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 1254, ENC_PARSE_NAM_WIN_1254, IDS_ENC_WIN_1254, MSFT_CP1254, L"" },
/* 062 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10017, ENC_PARSE_NAM_MAC_UKRAINIAN, IDS_ENC_MAC_UKRAINIAN, CED_NO_MAPPING, L"" },
/* 063 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 1258, ENC_PARSE_NAM_WIN_1258, IDS_ENC_WIN_1258, CED_NO_MAPPING, L"" },
/* 064 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 850, ENC_PARSE_NAM_DOS_850, IDS_ENC_DOS_850, CED_NO_MAPPING, L"" },
/* 065 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 28591, ENC_PARSE_NAM_ISO_8859_1, IDS_ENC_ISO_8859_1, ISO_8859_1, L"" },
/* 066 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 10010, ENC_PARSE_NAM_MAC_ROMANIAN, IDS_ENC_MAC_ROMANIAN, MACINTOSH_ROMAN, L"" },
/* 067 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 1252, ENC_PARSE_NAM_WIN_1252, IDS_ENC_WIN_1252, MSFT_CP1252, L"" },
/* 068 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 37, ENC_PARSE_NAM_IBM_EBCDIC_US, IDS_ENC_IBM_EBCDIC_US, CED_NO_MAPPING, L"" },
/* 069 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 500, ENC_PARSE_NAM_IBM_EBCDIC_INT, IDS_ENC_IBM_EBCDIC_INT, CED_NO_MAPPING, L"" },
/* 070 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 875, ENC_PARSE_NAM_IBM_EBCDIC_GR, IDS_ENC_IBM_EBCDIC_GR, CED_NO_MAPPING, L"" },
/* 071 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 1026, ENC_PARSE_NAM_IBM_EBCDIC_LAT_5, IDS_ENC_IBM_EBCDIC_LAT_5, CED_NO_MAPPING, L"" },
/* 072 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 54936, ENC_PARSE_NAM_GB18030, IDS_ENC_GB18030, GB18030, L"" }, // Chinese Simplified (GB18030)
/* 073 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 51932, ENC_PARSE_NAM_EUC_JAPANESE, IDS_ENC_EUC_JAPANESE, JAPANESE_EUC_JP, L"" }, // Japanese (EUC)
/* 074 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 51949, ENC_PARSE_NAM_EUC_KOREAN, IDS_ENC_EUC_KOREAN, KOREAN_EUC_KR, L"" }, // Korean (EUC)
/* 075 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 50229, ENC_PARSE_NAM_ISO_2022_CN, IDS_ENC_ISO_2022_CN, ISO_2022_CN, L"" }, // Chinese Traditional (ISO-2022-CN)
/* 076 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 52936, ENC_PARSE_NAM_HZ_GB2312, IDS_ENC_HZ_GB2312, HZ_GB_2312, L"" }, // Chinese Simplified (HZ-GB2312)
/* 077 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 50220, ENC_PARSE_NAM_ISO_2022_JP, IDS_ENC_ISO_2022_JP, KDDI_ISO_2022_JP, L"" }, // Japanese (JIS)
/* 078 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 50225, ENC_PARSE_NAM_ISO_2022_KR, IDS_ENC_ISO_2022_KR, ISO_2022_KR, L"" }, // Korean (ISO-2022-KR)
/* 079 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 20000, ENC_PARSE_NAM_X_CHINESE_CNS, IDS_ENC_X_CHINESE_CNS, CHINESE_CNS, L"" } // Chinese Traditional (CNS)
/* 073 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 870, "CP870,cp870,ebcdiccproece,ebcdiccpyu,csibm870,ibm870,", 00000, L"" }, // IBM EBCDIC (Multilingual Latin-2)
/* 074 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 1047, "IBM01047,ibm01047,", 00000, L"" }, // IBM EBCDIC (Open System Latin-1)
/* 075 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 1140, "x-ebcdic-cp-us-euro,xebcdiccpuseuro,", 00000, L"" }, // IBM EBCDIC (US-Canada-Euro)
/* 076 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 1141, "x-ebcdic-germany-euro,xebcdicgermanyeuro,", 00000, L"" }, // IBM EBCDIC (Germany-Euro)
/* 077 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 1142, "x-ebcdic-denmarknorway-euro,xebcdicdenmarknorwayeuro,", 00000, L"" }, // IBM EBCDIC (Denmark-Norway-Euro)
/* 078 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 1143, "x-ebcdic-finlandsweden-euro,xebcdicfinlandswedeneuro,", 00000, L"" }, // IBM EBCDIC (Finland-Sweden-Euro)
/* 079 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 1144, "x-ebcdic-italy-euro,xebcdicitalyeuro,", 00000, L"" }, // IBM EBCDIC (Italy-Euro)
/* 080 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 1145, "x-ebcdic-spain-euro,xebcdicspaineuro,", 00000, L"" }, // IBM EBCDIC (Spain-Latin America-Euro)
/* 081 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 1146, "x-ebcdic-uk-euro,xebcdicukeuro,", 00000, L"" }, // IBM EBCDIC (UK-Euro)
/* 082 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 1147, "x-ebcdic-france-euro,xebcdicfranceeuro,", 00000, L"" }, // IBM EBCDIC (France-Euro)
/* 083 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 1148, "x-ebcdic-international-euro,xebcdicinternationaleuro,", 00000, L"" }, // IBM EBCDIC (International-Euro)
/* 084 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 1149, "x-ebcdic-icelandic-euro,xebcdicicelandiceuro,", 00000, L"" }, // IBM EBCDIC (Icelandic-Euro)
/* 085 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 1361, "johab,johab,", 00000, L"" }, // Korean (Johab)
/* 086 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20273, "x-EBCDIC-Germany,xebcdicgermany,", 00000, L"" }, // IBM EBCDIC (Germany)
/* 087 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20277, "x-EBCDIC-DenmarkNorway,xebcdicdenmarknorway,ebcdiccpdk,ebcdiccpno,", 00000, L"" }, // IBM EBCDIC (Denmark-Norway)
/* 088 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20278, "x-EBCDIC-FinlandSweden,xebcdicfinlandsweden,ebcdicpfi,ebcdiccpse,", 00000, L"" }, // IBM EBCDIC (Finland-Sweden)
/* 089 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20280, "x-EBCDIC-Italy,xebcdicitaly,", 00000, L"" }, // IBM EBCDIC (Italy)
/* 090 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20284, "x-EBCDIC-Spain,xebcdicspain,ebcdiccpes,", 00000, L"" }, // IBM EBCDIC (Spain-Latin America)
/* 091 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20285, "x-EBCDIC-UK,xebcdicuk,ebcdiccpgb,", 00000, L"" }, // IBM EBCDIC (UK)
/* 092 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20290, "x-EBCDIC-JapaneseKatakana,xebcdicjapanesekatakana,", 00000, L"" }, // IBM EBCDIC (Japanese Katakana)
/* 093 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20297, "x-EBCDIC-France,xebcdicfrance,ebcdiccpfr,", 00000, L"" }, // IBM EBCDIC (France)
/* 094 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20420, "x-EBCDIC-Arabic,xebcdicarabic,ebcdiccpar1,", 00000, L"" }, // IBM EBCDIC (Arabic)
/* 095 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20423, "x-EBCDIC-Greek,xebcdicgreek,ebcdiccpgr,", 00000, L"" }, // IBM EBCDIC (Greek)
/* 096 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20424, "x-EBCDIC-Hebrew,xebcdichebrew,ebcdiccphe,", 00000, L"" }, // IBM EBCDIC (Hebrew)
/* 097 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20833, "x-EBCDIC-KoreanExtended,xebcdickoreanextended,", 00000, L"" }, // IBM EBCDIC (Korean Extended)
/* 098 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20838, "x-EBCDIC-Thai,xebcdicthai,ibmthai,csibmthai,", 00000, L"" }, // IBM EBCDIC (Thai)
/* 099 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20871, "x-EBCDIC-Icelandic,xebcdicicelandic,ebcdiccpis,", 00000, L"" }, // IBM EBCDIC (Icelandic)
/* 100 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20880, "x-EBCDIC-CyrillicRussian,xebcdiccyrillicrussian,ebcdiccyrillic,", 00000, L"" }, // IBM EBCDIC (Cyrillic Russian)
/* 101 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20905, "x-EBCDIC-Turkish,xebcdicturkish,ebcdiccptr,", 00000, L"" }, // IBM EBCDIC (Turkish)
/* 102 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20924, "IBM00924,ibm00924,ebcdiclatin9euro,", 00000, L"" }, // IBM EBCDIC (Open System-Euro Latin-1)
/* 103 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 21025, "x-EBCDIC-CyrillicSerbianBulgarian,xebcdiccyrillicserbianbulgarian,", 00000, L"" }, // IBM EBCDIC (Cyrillic Serbian-Bulgarian)
/* 104 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 50930, "x-EBCDIC-JapaneseAndKana,xebcdicjapaneseandkana,", 00000, L"" }, // IBM EBCDIC (Japanese and Japanese Katakana)
/* 105 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 50931, "x-EBCDIC-JapaneseAndUSCanada,xebcdicjapaneseanduscanada,", 00000, L"" }, // IBM EBCDIC (Japanese and US-Canada)
/* 106 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 50933, "x-EBCDIC-KoreanAndKoreanExtended,xebcdickoreanandkoreanextended,", 00000, L"" }, // IBM EBCDIC (Korean and Korean Extended)
/* 107 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 50935, "x-EBCDIC-SimplifiedChinese,xebcdicsimplifiedchinese,", 00000, L"" }, // IBM EBCDIC (Chinese Simplified)
/* 108 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 50937, "x-EBCDIC-TraditionalChinese,xebcdictraditionalchinese,", 00000, L"" }, // IBM EBCDIC (Chinese Traditional)
/* 109 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 50939, "x-EBCDIC-JapaneseAndJapaneseLatin,xebcdicjapaneseandjapaneselatin,", 00000, L"" }, // IBM EBCDIC (Japanese and Japanese-Latin)
/* 110 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20105, "x-IA5,xia5,", 00000, L"" }, // Western European (IA5)
/* 111 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20106, "x-IA5-German,xia5german,", 00000, L"" }, // German (IA5)
/* 112 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20107, "x-IA5-Swedish,xia5swedish,", 00000, L"" }, // Swedish (IA5)
/* 113 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20108, "x-IA5-Norwegian,xia5norwegian,", 00000, L"" }, // Norwegian (IA5)
/* 114 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20936, "x-cp20936,xcp20936,", 00000, L"" }, // Chinese Simplified (GB2312)
/* 115 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20932, "euc-jp,,", 00000, L"" }, // Japanese (JIS X 0208-1990 & 0212-1990)
/* 117 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 50221, "csISO2022JP,csiso2022jp,", 00000, L"" }, // Japanese (JIS-Allow 1 byte Kana)
/* 118 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 50222, "_iso-2022-jp$SIO,iso2022jpSIO,", 00000, L"" }, // Japanese (JIS-Allow 1 byte Kana - SO/SI)
/* 120 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 50227, "x-cp50227,xcp50227,", 00000, L"" }, // Chinese Simplified (ISO-2022)
/* 123 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 20002, "x-Chinese-Eten,xchineseeten,", 00000, L"" }, // Chinese Traditional (Eten)
/* 125 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 51936, "euc-cn,euccn,xeuccn,", 00000, L"" }, // Chinese Simplified (EUC)
/* 128 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 57002, "x-iscii-de,xisciide,", 00000, L"" }, // ISCII Devanagari
/* 129 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 57003, "x-iscii-be,xisciibe,", 00000, L"" }, // ISCII Bengali
/* 130 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 57004, "x-iscii-ta,xisciita,", 00000, L"" }, // ISCII Tamil
/* 131 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 57005, "x-iscii-te,xisciite,", 00000, L"" }, // ISCII Telugu
/* 132 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 57006, "x-iscii-as,xisciias,", 00000, L"" }, // ISCII Assamese
/* 133 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 57007, "x-iscii-or,xisciior,", 00000, L"" }, // ISCII Oriya
/* 134 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 57008, "x-iscii-ka,xisciika,", 00000, L"" }, // ISCII Kannada
/* 135 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 57009, "x-iscii-ma,xisciima,", 00000, L"" }, // ISCII Malayalam
/* 136 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 57010, "x-iscii-gu,xisciigu,", 00000, L"" }, // ISCII Gujarathi
/* 137 *///{ NCP_EXTERNAL_8BIT|NCP_RECODE, 57011, "x-iscii-pa,xisciipa,", 00000, L"" }, // ISCII Panjabi
};
extern "C" int Encoding_CountOf()
{
return ARRAYSIZE(g_Encodings);
}
//=============================================================================
// ============================================================================
int __fastcall MapEncoding2CPI(const Encoding& encoding)
static int __fastcall MapCPI2Encoding(const int iNP3Encoding)
{
if ((iNP3Encoding < 0) || (iNP3Encoding >= Encoding_CountOf())) {
return UNKNOWN_ENCODING; // CPI_NONE, CPI_GET
}
int const iCED = g_Encodings[iNP3Encoding].iCEDEncoding;
return ((iCED != CED_NO_MAPPING) ? iCED : UNKNOWN_ENCODING);
}
// ============================================================================
static int __fastcall MapEncoding2CPI(const Encoding& encoding, bool* pIsReliable)
{
int iNP3Encoding = CPI_NONE;
// map corresponding ID of global 'g_Encodings'
switch (encoding) {
case ISO_8859_1:
iNP3Encoding = 65;
break;
case ISO_8859_2:
iNP3Encoding = 17;
break;
case ISO_8859_3:
iNP3Encoding = 49;
break;
case ISO_8859_4:
iNP3Encoding = 14;
break;
case ISO_8859_5:
iNP3Encoding = 26;
break;
case ISO_8859_6:
iNP3Encoding = 10;
break;
case ISO_8859_7:
iNP3Encoding = 34;
break;
case ISO_8859_8:
iNP3Encoding = 40;
break;
case ISO_8859_9:
iNP3Encoding = 59;
break;
case ISO_8859_10:
iNP3Encoding = CPI_NONE; // ???
break;
case JAPANESE_EUC_JP:
iNP3Encoding = 73;
break;
case JAPANESE_SHIFT_JIS:
iNP3Encoding = 46;
break;
case JAPANESE_JIS:
iNP3Encoding = 77; // ???
break;
case CHINESE_BIG5:
iNP3Encoding = 22;
break;
case CHINESE_GB:
iNP3Encoding = 20;
break;
case CHINESE_EUC_CN:
iNP3Encoding = CPI_NONE;
break;
case KOREAN_EUC_KR:
iNP3Encoding = 74;
break;
case UNICODE:
iNP3Encoding = 4;
break;
case CHINESE_EUC_DEC:
iNP3Encoding = CPI_NONE;
break;
case CHINESE_CNS:
iNP3Encoding = 79;
break;
case CHINESE_BIG5_CP950:
iNP3Encoding = 22;
break;
case JAPANESE_CP932:
iNP3Encoding = 46;
break;
case UTF8:
iNP3Encoding = CPI_UTF8;
break;
case ASCII_7BIT:
if ((encoding == ISO_8859_1) || (encoding == ASCII_7BIT)) {
// ASCII -> ANSI default
iNP3Encoding = CPI_ANSI_DEFAULT;
break;
case RUSSIAN_KOI8_R:
iNP3Encoding = 27;
break;
case RUSSIAN_CP1251:
iNP3Encoding = 30;
break;
case MSFT_CP1252:
iNP3Encoding = 67;
break;
case RUSSIAN_KOI8_RU:
iNP3Encoding = 28;
break;
case MSFT_CP1250:
iNP3Encoding = 19;
break;
case ISO_8859_15:
iNP3Encoding = 50;
break;
case MSFT_CP1254:
iNP3Encoding = 61;
break;
case MSFT_CP1257:
iNP3Encoding = 15;
break;
case ISO_8859_11:
iNP3Encoding = CPI_NONE;
break;
case MSFT_CP874:
iNP3Encoding = 57;
break;
case MSFT_CP1256:
iNP3Encoding = 12;
break;
case MSFT_CP1255:
iNP3Encoding = 42;
break;
case ISO_8859_8_I:
iNP3Encoding = 39;
break;
case HEBREW_VISUAL:
iNP3Encoding = 40;
break;
case CZECH_CP852:
iNP3Encoding = 16;
break;
case CZECH_CSN_369103:
iNP3Encoding = CPI_NONE; // ???
break;
case MSFT_CP1253:
iNP3Encoding = 36;
break;
case RUSSIAN_CP866:
iNP3Encoding = 25;
break;
case ISO_8859_13:
iNP3Encoding = 31;
break;
case ISO_2022_KR:
iNP3Encoding = 78;
break;
case GBK:
iNP3Encoding = 20;
break;
case GB18030:
iNP3Encoding = 72;
break;
case BIG5_HKSCS:
iNP3Encoding = 22;
break;
case ISO_2022_CN:
iNP3Encoding = 75;
break;
case TSCII:
iNP3Encoding = CPI_NONE; // ???
break;
case TAMIL_MONO:
iNP3Encoding = CPI_NONE; // ???
break;
case TAMIL_BI:
iNP3Encoding = CPI_NONE; // ???
break;
case JAGRAN:
iNP3Encoding = CPI_NONE; // ???
break;
case MACINTOSH_ROMAN:
iNP3Encoding = 55;
break;
case UTF7:
iNP3Encoding = CPI_UTF7;
break;
case BHASKAR:
iNP3Encoding = CPI_NONE; // ???
break;
case HTCHANAKYA:
iNP3Encoding = CPI_NONE; // ???
break;
case UTF16BE:
iNP3Encoding = CPI_UNICODEBE;
break;
case UTF16LE:
iNP3Encoding = CPI_UNICODE;
break;
case UTF32BE:
iNP3Encoding = CPI_UTF32BE;
break;
case UTF32LE:
iNP3Encoding = CPI_UTF32;
break;
case BINARYENC:
iNP3Encoding = CPI_NONE; // ???
break;
case HZ_GB_2312:
iNP3Encoding = 76;
break;
case UTF8UTF8:
iNP3Encoding = CPI_UTF8;
break;
case TAM_ELANGO:
iNP3Encoding = CPI_NONE; // ???
break;
case TAM_LTTMBARANI:
iNP3Encoding = CPI_NONE; // ???
break;
case TAM_SHREE:
iNP3Encoding = CPI_NONE; // ???
break;
case TAM_TBOOMIS:
iNP3Encoding = CPI_NONE; // ???
break;
case TAM_TMNEWS:
iNP3Encoding = CPI_NONE; // ???
break;
case TAM_WEBTAMIL:
iNP3Encoding = CPI_NONE; // ???
break;
case KDDI_SHIFT_JIS:
iNP3Encoding = 46; // ???
break;
case DOCOMO_SHIFT_JIS:
iNP3Encoding = 46; // ???
break;
case SOFTBANK_SHIFT_JIS:
iNP3Encoding = 46; // ???
break;
case KDDI_ISO_2022_JP:
iNP3Encoding = 77;
break;
case SOFTBANK_ISO_2022_JP:
iNP3Encoding = 77; // ???
break;
}
else {
for (int i = 0; i < Encoding_CountOf(); ++i) {
if (encoding == g_Encodings[i].iCEDEncoding) {
iNP3Encoding = i;
break;
}
}
}
case UNKNOWN_ENCODING:
default:
iNP3Encoding = CPI_NONE;
break;
// not found, guess a mapping:
if (iNP3Encoding == CPI_NONE)
{
switch (encoding) {
case UNICODE:
iNP3Encoding = CPI_UNICODE;
break;
case UTF8UTF8:
iNP3Encoding = CPI_UTF8;
break;
case UTF32BE:
iNP3Encoding = CPI_UTF32BE;
break;
case UTF32LE:
iNP3Encoding = CPI_UTF32;
break;
case ISO_8859_10:
iNP3Encoding = CPI_NONE;
break;
case ISO_8859_11:
iNP3Encoding = CPI_NONE; // latin-thai
break;
case BIG5_HKSCS:
case CHINESE_BIG5:
iNP3Encoding = 22;
break;
case CHINESE_EUC_CN:
case CHINESE_EUC_DEC:
*pIsReliable = false;
case CHINESE_GB:
iNP3Encoding = 20;
break;
case JAPANESE_SHIFT_JIS:
iNP3Encoding = 46;
break;
case KDDI_SHIFT_JIS:
iNP3Encoding = 46;
break;
case DOCOMO_SHIFT_JIS:
iNP3Encoding = 46;
break;
case SOFTBANK_SHIFT_JIS:
iNP3Encoding = 46;
break;
case JAPANESE_JIS:
iNP3Encoding = 77;
break;
case SOFTBANK_ISO_2022_JP:
iNP3Encoding = 77;
break;
case CZECH_CSN_369103:
case TSCII:
case TAMIL_MONO:
case TAMIL_BI:
case JAGRAN:
case BHASKAR:
case HTCHANAKYA:
case BINARYENC:
case TAM_ELANGO:
case TAM_LTTMBARANI:
case TAM_SHREE:
case TAM_TBOOMIS:
case TAM_TMNEWS:
case TAM_WEBTAMIL:
case UNKNOWN_ENCODING:
default:
iNP3Encoding = CPI_NONE;
*pIsReliable = false;
break;
}
}
return iNP3Encoding;
}
// ============================================================================
extern "C" void ChangeEncodingCodePage(int cpi, UINT newCP)
{
int iCED = MapCPI2Encoding(cpi);
g_Encodings[cpi].uCodePage = newCP;
g_Encodings[cpi].iCEDEncoding = iCED;
}
// ============================================================================
extern "C" int Encoding_Analyze(const char* const text, const size_t len, bool* pIsReliable)
extern "C" int Encoding_Analyze(const char* const text, const size_t len, const int encodingHint, bool* pIsReliable)
{
int bytes_consumed;
Encoding encoding = CompactEncDet::DetectEncoding(
text, static_cast<int>(len),
nullptr, nullptr, nullptr,
UNKNOWN_ENCODING,
MapCPI2Encoding(encodingHint),
UNKNOWN_LANGUAGE,
CompactEncDet::WEB_CORPUS,
false,
&bytes_consumed,
pIsReliable);
return MapEncoding2CPI(encoding);
return MapEncoding2CPI(encoding, pIsReliable);
}
// ============================================================================