+ fix: renewed UTF-8 and Unicode detection

+ rev: reverting some new encodings (needs handling for not installed codepages)
2026-06-11 21:03:05 +08:00 · 2018-08-21 16:22:30 +02:00 · 2018-08-21 16:22:30 +02:00 · 4582aa487d
commit 4582aa487d
parent 26e32752c2
5 changed files with 171 additions and 192 deletions
--- a/src/Dialogs.c
+++ b/src/Dialogs.c
@ -127,7 +127,7 @@ int MsgBoxLng(int iType, UINT uIdMsg, ...)
  if (uIdMsg == IDS_MUI_ERR_LOADFILE || uIdMsg == IDS_MUI_ERR_SAVEFILE ||
    uIdMsg == IDS_MUI_CREATEINI_FAIL || uIdMsg == IDS_MUI_WRITEINI_FAIL ||
    uIdMsg == IDS_MUI_EXPORT_FAIL) {
-    LPVOID lpMsgBuf;
+    LPVOID lpMsgBuf = NULL;
    WCHAR wcht;
    FormatMessage(
      FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS,
@ -137,10 +137,12 @@ int MsgBoxLng(int iType, UINT uIdMsg, ...)
      (LPTSTR)&lpMsgBuf,
      0,
      NULL);
-    StrTrim(lpMsgBuf, L" \a\b\f\n\r\t\v");
-    StringCchCat(szText, COUNTOF(szText), L"\n");
-    StringCchCat(szText, COUNTOF(szText), lpMsgBuf);
-    LocalFree(lpMsgBuf);
+    if (lpMsgBuf) {
+      StrTrim(lpMsgBuf, L" \a\b\f\n\r\t\v");
+      StringCchCat(szText, COUNTOF(szText), L"\n");
+      StringCchCat(szText, COUNTOF(szText), lpMsgBuf);
+      LocalFree(lpMsgBuf);
+    }
    wcht = *CharPrev(szText, StrEnd(szText));
    if (IsCharAlphaNumeric(wcht) || wcht == '"' || wcht == '\'')
      StringCchCat(szText, COUNTOF(szText), L".");
@ -2281,12 +2283,12 @@ INT_PTR CALLBACK SelectDefEncodingDlgProc(HWND hwnd,UINT umsg,WPARAM wParam,LPAR
                EndDialog(hwnd,IDCANCEL);
              }
              else {
-                bUseDefaultForFileEncoding = (IsDlgButtonChecked(hwnd, IDC_USEASREADINGFALLBACK) == BST_CHECKED) ? 1 : 0;
-                bSkipUnicodeDetection = (IsDlgButtonChecked(hwnd,IDC_NOUNICODEDETECTION) == BST_CHECKED) ? 1 : 0;
-                bSkipANSICodePageDetection = (IsDlgButtonChecked(hwnd, IDC_NOANSICPDETECTION) == BST_CHECKED) ? 1 : 0;
-                bLoadASCIIasUTF8 = (IsDlgButtonChecked(hwnd,IDC_ASCIIASUTF8) == BST_CHECKED) ? 1 : 0;
-                bLoadNFOasOEM = (IsDlgButtonChecked(hwnd,IDC_NFOASOEM) == BST_CHECKED) ? 1 : 0;
-                bNoEncodingTags = (IsDlgButtonChecked(hwnd,IDC_ENCODINGFROMFILEVARS) == BST_CHECKED) ? 1 : 0;
+                bUseDefaultForFileEncoding = (IsDlgButtonChecked(hwnd, IDC_USEASREADINGFALLBACK) == BST_CHECKED);
+                bSkipUnicodeDetection = (IsDlgButtonChecked(hwnd,IDC_NOUNICODEDETECTION) == BST_CHECKED);
+                bSkipANSICodePageDetection = (IsDlgButtonChecked(hwnd, IDC_NOANSICPDETECTION) == BST_CHECKED);
+                bLoadASCIIasUTF8 = (IsDlgButtonChecked(hwnd,IDC_ASCIIASUTF8) == BST_CHECKED);
+                bLoadNFOasOEM = (IsDlgButtonChecked(hwnd,IDC_NFOASOEM) == BST_CHECKED);
+                bNoEncodingTags = (IsDlgButtonChecked(hwnd,IDC_ENCODINGFROMFILEVARS) == BST_CHECKED);
                EndDialog(hwnd,IDOK);
              }
            }
--- a/src/Edit.c
+++ b/src/Edit.c
@ -96,6 +96,7 @@ extern int g_iDefaultCharSet;
 extern bool bLoadASCIIasUTF8;
 extern bool bForceLoadASCIIasUTF8;
 extern bool bLoadNFOasOEM;
+extern bool bNoEncodingTags;

 extern bool g_bAccelWordNavigation;

@ -1056,13 +1057,10 @@ bool EditLoadFile(
  if (!Encoding_IsNONE(iForcedEncoding)) {
    iPreferedEncoding = iForcedEncoding;
  }
-  else if (Encoding_IsUNICODE(iAnalyzedEncoding) && !bSkipUTFDetection) {
-    iPreferedEncoding = iAnalyzedEncoding;
-  }
  else if (iFileEncWeak != CPI_NONE) {
    iPreferedEncoding = iFileEncWeak;
  }
-  else if (!Encoding_IsNONE(iAnalyzedEncoding) && bIsReliable ) {
+  else if (!Encoding_IsNONE(iAnalyzedEncoding) && bIsReliable) {
    iPreferedEncoding = iAnalyzedEncoding;
  } 
  else if (Encoding_IsNONE(iPreferedEncoding)) {
@ -1087,7 +1085,8 @@ bool EditLoadFile(
  // ===  UNICODE  ===
  else if (Encoding_IsUNICODE(iForcedEncoding) ||
    (Encoding_IsNONE(iForcedEncoding) && !bSkipUTFDetection && !bIsUTF8Sig
-      && (IsUnicode(lpData, cbData, &bBOM, &bReverse) || (Encoding_IsUNICODE(iAnalyzedEncoding) && bIsReliable))
+      && (IsValidUnicode(lpData, cbData, &bBOM, &bReverse) 
+        || (Encoding_IsUNICODE(iAnalyzedEncoding) && bIsReliable))
      )
    )
  {
@ -1143,16 +1142,16 @@ bool EditLoadFile(
    FileVars_Init(lpData,cbData,&fvCurFile);

    // ===  UTF-8  ===
-    if (Encoding_IsUTF8(iForcedEncoding) || 
-      (Encoding_IsNONE(iForcedEncoding) && !bSkipUTFDetection && !FileVars_IsNonUTF8(&fvCurFile)
-        && (bIsUTF8Sig
-          || FileVars_IsUTF8(&fvCurFile)
-          || (Encoding_IsUTF8(iAnalyzedEncoding) && bIsReliable)
-          || (!bNfoDizDetected && (Encoding_IsUTF8(iPreferedEncoding) || bLoadASCIIasUTF8))
-          )
-        && (IsUTF8(lpData, cbData) && !UTF8_ContainsInvalidChars(lpData, cbData))
-        )
-      )
+    bool const bHardRulesUTF8 = Encoding_IsUTF8(iForcedEncoding) || (FileVars_IsUTF8(&fvCurFile) && !bNoEncodingTags);
+    bool const bForcedNonUTF8 = !Encoding_IsNONE(iForcedEncoding) && !Encoding_IsUTF8(iForcedEncoding);
+
+    bool const bValidUTF8 = IsValidUTF8(lpData, cbData);
+    bool const bAnalysisUTF8 = Encoding_IsUTF8(iAnalyzedEncoding) && bIsReliable;
+    bool const bSoftHintUTF8 = (Encoding_IsUTF8(iPreferedEncoding) || bLoadASCIIasUTF8);
+
+    bool const bRejectUTF8 = bSkipUTFDetection || bForcedNonUTF8 || (FileVars_IsNonUTF8(&fvCurFile) && !bNoEncodingTags);
+
+    if (bHardRulesUTF8 || (!bRejectUTF8 && bValidUTF8 && (bIsUTF8Sig || bAnalysisUTF8 || bSoftHintUTF8)))
    {
      EditSetNewText(hwnd,"",0);
      if (bIsUTF8Sig) {
@ -1181,7 +1180,7 @@ bool EditLoadFile(
      }

      if (((Encoding_GetCodePage(*iEncoding) != CP_UTF7) && Encoding_IsEXTERNAL_8BIT(*iEncoding)) ||
-          ((Encoding_GetCodePage(*iEncoding) == CP_UTF7) && IsUTF7(lpData,cbData))) {
+          ((Encoding_GetCodePage(*iEncoding) == CP_UTF7) && IsValidUTF7(lpData,cbData))) {

        UINT uCodePage = Encoding_GetCodePage(*iEncoding);

--- a/src/Encoding.c
+++ b/src/Encoding.c
@ -587,7 +587,7 @@ const char* Encoding_GetParseNames(int iEncoding) {
 // ============================================================================


-bool IsUnicode(const char* pBuffer, size_t cb, bool* lpbBOM, bool* lpbReverse) 
+bool IsValidUnicode(const char* pBuffer, size_t cb, bool* lpbBOM, bool* lpbReverse) 
 {
  if (!pBuffer || cb < 2) { return false; }

@ -630,7 +630,7 @@ bool IsUnicode(const char* pBuffer, size_t cb, bool* lpbBOM, bool* lpbReverse)
 // ============================================================================


-bool IsUTF7(const char* pTest, size_t nLength) {
+bool IsValidUTF7(const char* pTest, size_t nLength) {
  const char *pt = pTest;

  for (size_t i = 0; i < nLength; i++) {
@ -638,7 +638,6 @@ bool IsUTF7(const char* pTest, size_t nLength) {
      return false;
    pt++;
  }
-
  return true;
 }
 // ============================================================================
@ -648,7 +647,124 @@ bool IsUTF7(const char* pTest, size_t nLength) {
 //#define _OLD_UTF8_VALIDATOR_ 1
 #ifdef _OLD_UTF8_VALIDATOR_

-bool IsUTF8(const char* pTest, size_t nLength)
+// ============================================================================
+
+/* byte length of UTF-8 sequence based on value of first byte.
+for UTF-16 (21-bit space), max. code length is 4, so we only need to look
+at 4 upper bits.
+*/
+static const size_t utf8_lengths[16] =
+{
+  1,1,1,1,1,1,1,1,        /* 0000 to 0111 : 1 byte (plain ASCII) */
+  0,0,0,0,                /* 1000 to 1011 : not valid */
+  2,2,                    /* 1100, 1101 : 2 bytes */
+  3,                      /* 1110 : 3 bytes */
+  4                       /* 1111 : 4 bytes */
+};
+
+// ----------------------------------------------------------------------------
+
+/*++
+Function :
+UTF8_mbslen_bytes [INTERNAL]
+
+Calculates the byte size of a NULL-terminated UTF-8 string.
+
+Parameters :
+char *utf8_string : string to examine
+
+Return value :
+size (in bytes) of a NULL-terminated UTF-8 string.
+-1 if invalid NULL-terminated UTF-8 string
+--*/
+size_t __fastcall UTF8_mbslen_bytes(LPCSTR utf8_string)
+{
+  size_t length = 0;
+  size_t code_size;
+  BYTE byte;
+
+  while (*utf8_string)
+  {
+    byte = (BYTE)*utf8_string;
+
+    if ((byte <= 0xF7) && (0 != (code_size = utf8_lengths[byte >> 4]))) {
+      length += code_size;
+      utf8_string += code_size;
+    }
+    else {
+      /* we got an invalid byte value but need to count it,
+      it will be later ignored during the string conversion */
+      //WARN("invalid first byte value 0x%02X in UTF-8 sequence!\n",byte);
+      length++;
+      utf8_string++;
+    }
+  }
+  length++; /* include NULL terminator */
+  return length;
+}
+// ----------------------------------------------------------------------------
+
+/*++
+Function :
+UTF8_mbslen [INTERNAL]
+
+Calculates the character size of a NULL-terminated UTF-8 string.
+
+Parameters :
+char *utf8_string : string to examine
+int byte_length : byte size of string
+
+Return value :
+size (in characters) of a UTF-8 string.
+-1 if invalid UTF-8 string
+--*/
+size_t __fastcall UTF8_mbslen(LPCSTR utf8_string, size_t byte_length)
+{
+  size_t wchar_length = 0;
+  size_t code_size;
+  BYTE byte;
+
+  while (byte_length > 0) {
+    byte = (BYTE)*utf8_string;
+
+    /* UTF-16 can't encode 5-byte and 6-byte sequences, so maximum value
+    for first byte is 11110111. Use lookup table to determine sequence
+    length based on upper 4 bits of first byte */
+    if ((byte <= 0xF7) && (0 != (code_size = utf8_lengths[byte >> 4]))) {
+      /* 1 sequence == 1 character */
+      wchar_length++;
+
+      if (code_size == 4)
+        wchar_length++;
+
+      utf8_string += code_size;        /* increment pointer */
+      byte_length -= code_size;   /* decrement counter*/
+    }
+    else {
+      /*
+      unlike UTF8_mbslen_bytes, we ignore the invalid characters.
+      we only report the number of valid characters we have encountered
+      to match the Windows behavior.
+      */
+      //WARN("invalid byte 0x%02X in UTF-8 sequence, skipping it!\n", byte);
+      utf8_string++;
+      byte_length--;
+    }
+  }
+  return wchar_length;
+}
+// ----------------------------------------------------------------------------
+
+bool __fastcall UTF8_ContainsInvalidChars(LPCSTR utf8_string, size_t byte_length)
+{
+  return ((UTF8_mbslen_bytes(UTF8StringStart(utf8_string)) - 1) !=
+    UTF8_mbslen(UTF8StringStart(utf8_string), IsUTF8Signature(utf8_string) ? (byte_length - 3) : byte_length));
+}
+
+// ----------------------------------------------------------------------------
+
+
+bool IsValidUTF8(const char* pTest, size_t nLength)
 {
  static int byte_class_table[256] = {
    /*       00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F  */
@ -705,121 +821,7 @@ bool IsUTF8(const char* pTest, size_t nLength)
      break;
  }

-  return (current == kSTART) ? true : false;
-}
-
-// ============================================================================
-
-/* byte length of UTF-8 sequence based on value of first byte.
-for UTF-16 (21-bit space), max. code length is 4, so we only need to look
-at 4 upper bits.
-*/
-static const size_t utf8_lengths[16] =
-{
-  1,1,1,1,1,1,1,1,        /* 0000 to 0111 : 1 byte (plain ASCII) */
-  0,0,0,0,                /* 1000 to 1011 : not valid */
-  2,2,                    /* 1100, 1101 : 2 bytes */
-  3,                      /* 1110 : 3 bytes */
-  4                       /* 1111 : 4 bytes */
-};
-
-// ----------------------------------------------------------------------------
-
-/*++
-Function :
-UTF8_mbslen_bytes [INTERNAL]
-
-Calculates the byte size of a NULL-terminated UTF-8 string.
-
-Parameters :
-char *utf8_string : string to examine
-
-Return value :
-size (in bytes) of a NULL-terminated UTF-8 string.
-1 if invalid NULL-terminated UTF-8 string
--*/
-size_t UTF8_mbslen_bytes(LPCSTR utf8_string)
-{
-  size_t length = 0;
-  size_t code_size;
-  BYTE byte;
-
-  while (*utf8_string)
-  {
-    byte = (BYTE)*utf8_string;
-
-    if ((byte <= 0xF7) && (0 != (code_size = utf8_lengths[byte >> 4]))) {
-      length += code_size;
-      utf8_string += code_size;
-    }
-    else {
-      /* we got an invalid byte value but need to count it,
-      it will be later ignored during the string conversion */
-      //WARN("invalid first byte value 0x%02X in UTF-8 sequence!\n",byte);
-      length++;
-      utf8_string++;
-    }
-  }
-  length++; /* include NULL terminator */
-  return length;
-}
-// ----------------------------------------------------------------------------
-
-/*++
-Function :
-UTF8_mbslen [INTERNAL]
-
-Calculates the character size of a NULL-terminated UTF-8 string.
-
-Parameters :
-char *utf8_string : string to examine
-int byte_length : byte size of string
-
-Return value :
-size (in characters) of a UTF-8 string.
-1 if invalid UTF-8 string
--*/
-size_t UTF8_mbslen(LPCSTR utf8_string, size_t byte_length)
-{
-  size_t wchar_length = 0;
-  size_t code_size;
-  BYTE byte;
-
-  while (byte_length > 0) {
-    byte = (BYTE)*utf8_string;
-
-    /* UTF-16 can't encode 5-byte and 6-byte sequences, so maximum value
-    for first byte is 11110111. Use lookup table to determine sequence
-    length based on upper 4 bits of first byte */
-    if ((byte <= 0xF7) && (0 != (code_size = utf8_lengths[byte >> 4]))) {
-      /* 1 sequence == 1 character */
-      wchar_length++;
-
-      if (code_size == 4)
-        wchar_length++;
-
-      utf8_string += code_size;        /* increment pointer */
-      byte_length -= code_size;   /* decrement counter*/
-    }
-    else {
-      /*
-      unlike UTF8_mbslen_bytes, we ignore the invalid characters.
-      we only report the number of valid characters we have encountered
-      to match the Windows behavior.
-      */
-      //WARN("invalid byte 0x%02X in UTF-8 sequence, skipping it!\n", byte);
-      utf8_string++;
-      byte_length--;
-    }
-  }
-  return wchar_length;
-}
-// ----------------------------------------------------------------------------
-
-bool UTF8_ContainsInvalidChars(LPCSTR utf8_string, size_t byte_length)
-{
-  return ((UTF8_mbslen_bytes(UTF8StringStart(utf8_string)) - 1) !=
-    UTF8_mbslen(UTF8StringStart(utf8_string), IsUTF8Signature(utf8_string) ? (byte_length - 3) : byte_length));
+  return (current == kSTART) && !UTF8_ContainsInvalidChars(pTest, nLength);
 }


@ -831,17 +833,13 @@ bool UTF8_ContainsInvalidChars(LPCSTR utf8_string, size_t byte_length)
 // Copyright (c) 2008-2010 Bjoern Hoehrmann <bjoern@hoehrmann.de>
 // See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.

-
-enum {
-  UTF8_ACCEPT = 0,
-  UTF8_REJECT = 12,
-  UTF8_NOTEST = 113
-};
-
-static UINT s_State = UTF8_NOTEST;
-
-bool IsUTF8(const char* pTest, size_t nLength)
+bool IsValidUTF8(const char* pTest, size_t nLength)
 {
+  enum {
+    UTF8_ACCEPT = 0,
+    UTF8_REJECT = 12
+  };
+
  static const unsigned char utf8_dfa[] = {
    // The first part of the table maps bytes to character classes that
    // to reduce the size of the transition table and create bitmasks.
@ -866,32 +864,16 @@ bool IsUTF8(const char* pTest, size_t nLength)
  const unsigned char *pt = (const unsigned char *)pTest;
  const unsigned char *end = pt + nLength;

-  s_State = UTF8_ACCEPT;
+  UINT state = UTF8_ACCEPT;
  while (pt < end && *pt) {
-    s_State = utf8_dfa[256 + s_State + utf8_dfa[*pt++]];
-    if (s_State == UTF8_REJECT) {
+    state = utf8_dfa[256 + state + utf8_dfa[*pt++]];
+    if (state == UTF8_REJECT) {
      return false;
    }
  }
-  return (s_State == UTF8_ACCEPT);
+  return (state == UTF8_ACCEPT);
 }

-// ----------------------------------------------------------------------------
-
-bool UTF8_ContainsInvalidChars(LPCSTR utf8_string, size_t byte_length)
-{
-  bool result = true;
-  if (s_State != UTF8_NOTEST) {
-    result = (s_State == UTF8_REJECT);
-  }
-  else {
-    result = IsUTF8(utf8_string, byte_length);
-  }
-  s_State = UTF8_NOTEST; // reset: old way, call IsUTF8() before 
-  return result;
-}
-
-
 // ----------------------------------------------------------------------------

 #endif
--- a/src/Encoding.h
+++ b/src/Encoding.h
@ -128,14 +128,9 @@ const char* Encoding_GetParseNames(int);
 #define Has_UTF16_LE_BOM(p) (*((UNALIGNED wchar_t*)(p)) == 0xFEFF)
 #define Has_UTF16_BE_BOM(p) (*((UNALIGNED wchar_t*)(p)) == 0xFFFE) /* reverse */

-bool IsUnicode(const char*, size_t, bool*, bool*);
-bool IsUTF8(const char*, size_t);
-bool IsUTF7(const char*, size_t);
-
-
-size_t UTF8_mbslen_bytes(LPCSTR utf8_string);
-size_t UTF8_mbslen(LPCSTR utf8_string, size_t byte_length);
-bool UTF8_ContainsInvalidChars(LPCSTR utf8_string, size_t byte_length);
+bool IsValidUnicode(const char*, size_t, bool*, bool*);
+bool IsValidUTF7(const char*, size_t);
+bool IsValidUTF8(const char*, size_t);

 // Google's "Compact Encoding Detection" 
 extern NP2ENCODING g_Encodings[];
--- a/src/EncodingCED.cpp
+++ b/src/EncodingCED.cpp
@ -117,9 +117,9 @@ extern "C" {
 #define ENC_PARSE_NAM_ISO_2022_JP          "ISO-2022-jp,iso2022jp,"
 #define ENC_PARSE_NAM_ISO_2022_KR          "ISO-2022-kr,iso2022kr,csiso2022kr,"
 #define ENC_PARSE_NAM_X_CHINESE_CNS        "x-Chinese-CNS,xchinesecns,"
-#define ENC_PARSE_NAM_JOHAB                "johab,johab,"
-#define ENC_PARSE_NAM_ISO_8859_10          "ISO-8859-10,iso885910,Windows-28600,Windows28600,"
-#define ENC_PARSE_NAM_BIG5_HKSCS           "big5hkscs,cnbig5hkscs,xxbig5hkscs,"
+//#define ENC_PARSE_NAM_JOHAB                "johab,johab,"
+//#define ENC_PARSE_NAM_ISO_8859_10          "ISO-8859-10,iso885910,Windows-28600,Windows28600,"
+//#define ENC_PARSE_NAM_BIG5_HKSCS           "big5hkscs,cnbig5hkscs,xxbig5hkscs,"
 //=============================================================================


@ -204,10 +204,10 @@ extern "C" NP2ENCODING g_Encodings[] = {
  /* 076 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 52936, ENC_PARSE_NAM_HZ_GB2312,         IDS_ENC_HZ_GB2312,         HZ_GB_2312,         L"" }, // Chinese Simplified (HZ-GB2312)
  /* 077 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 50220, ENC_PARSE_NAM_ISO_2022_JP,       IDS_ENC_ISO_2022_JP,       KDDI_ISO_2022_JP,   L"" }, // Japanese (JIS)
  /* 078 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 50225, ENC_PARSE_NAM_ISO_2022_KR,       IDS_ENC_ISO_2022_KR,       ISO_2022_KR,        L"" }, // Korean (ISO-2022-KR)
-  /* 079 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 20000, ENC_PARSE_NAM_X_CHINESE_CNS,     IDS_ENC_X_CHINESE_CNS,     CHINESE_CNS,        L"" }, // Chinese Traditional (CNS)
-  /* 080 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 1361,  ENC_PARSE_NAM_JOHAB,             IDS_ENC_JOHAB,             CED_NO_MAPPING,     L"" }, // Korean (Johab)
-  /* 081 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 28600, ENC_PARSE_NAM_ISO_8859_10,       IDS_ENC_ISO_8859_10,       ISO_8859_10,        L"" }, // Nordic (ISO 8859-10)
-  /* 082 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 951,   ENC_PARSE_NAM_BIG5_HKSCS,        IDS_ENC_BIG5_HKSCS,        BIG5_HKSCS,         L"" }  // Chinese (Hong Kong Supplementary Character Set)
+  /* 079 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 20000, ENC_PARSE_NAM_X_CHINESE_CNS,     IDS_ENC_X_CHINESE_CNS,     CHINESE_CNS,        L"" } // Chinese Traditional (CNS)
+  ///* 080 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 1361,  ENC_PARSE_NAM_JOHAB,             IDS_ENC_JOHAB,             CED_NO_MAPPING,     L"" }, // Korean (Johab)
+  ///* 081 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 28600, ENC_PARSE_NAM_ISO_8859_10,       IDS_ENC_ISO_8859_10,       ISO_8859_10,        L"" }, // Nordic (ISO 8859-10)
+  ///* 082 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 951,   ENC_PARSE_NAM_BIG5_HKSCS,        IDS_ENC_BIG5_HKSCS,        BIG5_HKSCS,         L"" }  // Chinese (Hong Kong Supplementary Character Set)

  
 #if 0
@ -322,6 +322,7 @@ static int __fastcall FindCodePage(const Encoding& encoding)
    iCodePage = 1250;
    break;
  case ISO_8859_4:
+  case ISO_8859_10:
    iCodePage = 1257;
    break;
  case ISO_8859_5:
@ -410,7 +411,7 @@ static int __fastcall MapEncoding2CPI(const char* const text, const size_t len,
    {
      bool bBOM;
      bool bReverse;
-      if (IsUnicode(text, len, &bBOM, &bReverse)) {
+      if (IsValidUnicode(text, len, &bBOM, &bReverse)) {
        iNP3Encoding = bBOM ? (bReverse ? CPI_UNICODEBEBOM : CPI_UNICODEBOM) : (bReverse ? CPI_UNICODEBE : CPI_UNICODE);
      }
    }