+ fix: integration of "tellenc" encoding detection ideas

2026-06-14 21:09:05 +08:00 · 2018-03-02 16:13:34 +01:00 · 2018-03-02 16:13:34 +01:00 · 98c919dfcf
commit 98c919dfcf
parent 504c9d9404
6 changed files with 459 additions and 452 deletions
--- a/src/Edit.c
+++ b/src/Edit.c
@ -1064,9 +1064,19 @@ BOOL EditLoadFile(
  }

  const int iForcedEncoding = Encoding_SrcCmdLn(CPI_GET);
-  const int iFileEncWeak = (Encoding_SrcWeak(CPI_GET) != CPI_NONE) ? Encoding_SrcWeak(CPI_GET) : CPI_ANSI_DEFAULT;
-  const int iPreferedEncoding = (bPreferOEM) ? g_DOSEncoding : (bUseDefaultForFileEncoding ? g_iDefaultNewFileEncoding : iFileEncWeak);
-    //@@@ Encoding_IsINTERNAL(iFileEncWeak) ? g_iDefaultNewFileEncoding : iFileEncWeak;
+  const int iFileEncWeak = Encoding_SrcWeak(CPI_GET);
+  const int iAnalyzedEncoding = !bSkipEncodingDetection ? Encoding_Analyze(lpData, cbData) : CPI_NONE;
+
+  // choose best encoding guess
+  int iPreferedEncoding = (bPreferOEM) ? g_DOSEncoding : (bUseDefaultForFileEncoding ? g_iDefaultNewFileEncoding : CPI_ANSI_DEFAULT);
+
+  if (iForcedEncoding != CPI_NONE)
+    iPreferedEncoding = iForcedEncoding;
+  else if (iFileEncWeak != CPI_NONE)
+    iPreferedEncoding = iFileEncWeak;
+  else if (iAnalyzedEncoding != CPI_NONE)
+    iPreferedEncoding = iAnalyzedEncoding;
+

  BOOL bBOM = FALSE;
  BOOL bReverse = FALSE;
@ -1088,10 +1098,11 @@ BOOL EditLoadFile(
    SendMessage(hwnd,SCI_SETEOLMODE,iLineEndings[g_iDefaultEOLMode],0);
    GlobalFree(lpData);
  }
-  else if (!bSkipEncodingDetection && 
-      (iForcedEncoding == CPI_NONE    || iForcedEncoding == CPI_UNICODE   || iForcedEncoding == CPI_UNICODEBE) &&
-      (iForcedEncoding == CPI_UNICODE || iForcedEncoding == CPI_UNICODEBE || IsUnicode(lpData,cbData,&bBOM,&bReverse)) &&
-      (iForcedEncoding == CPI_UNICODE || iForcedEncoding == CPI_UNICODEBE || !IsUTF8Signature(lpData))) // check for UTF-8 signature
+  // ===  UNICODE  ===
+  else if (!bSkipEncodingDetection &&  //TODO: use Encoding_IsUNICODE(iAnalyzedEncoding) here ???
+      (Encoding_IsUNICODE(iForcedEncoding) || (iForcedEncoding == CPI_NONE)) &&
+      (Encoding_IsUNICODE(iForcedEncoding) || IsUnicode(lpData,cbData,&bBOM,&bReverse)) &&
+      (Encoding_IsUNICODE(iForcedEncoding) || !IsUTF8Signature(lpData))) // check for UTF-8 signature
  {
    char* lpDataUTF8;

@ -1147,19 +1158,20 @@ BOOL EditLoadFile(
    }
  }

-  else {
+  else { // ===  ALL OTHERS  ===
+
    FileVars_Init(lpData,cbData,&fvCurFile);
-    if (!bSkipEncodingDetection && (iForcedEncoding == CPI_NONE || iForcedEncoding == CPI_UTF8 || iForcedEncoding == CPI_UTF8SIGN) &&
-            ((IsUTF8Signature(lpData) ||
-              FileVars_IsUTF8(&fvCurFile) ||
-              (iForcedEncoding == CPI_UTF8 || iForcedEncoding == CPI_UTF8SIGN) ||
-              (!bPreferOEM && bLoadASCIIasUTF8) ||  // from menu "Reload As UTF-8"
-              (IsUTF8(lpData,cbData) &&
-              (((UTF8_mbslen_bytes(UTF8StringStart(lpData)) - 1 !=
-                UTF8_mbslen(UTF8StringStart(lpData),IsUTF8Signature(lpData) ? cbData-3 : cbData)) ||
-                (!bPreferOEM && (
-                  Encoding_IsUTF8(iPreferedEncoding) || bLoadASCIIasUTF8))))))) && !(FileVars_IsNonUTF8(&fvCurFile) &&
-                  (iForcedEncoding != CPI_UTF8 && iForcedEncoding != CPI_UTF8SIGN)))
+
+    // ===  UTF-8  ===
+    if (!bSkipEncodingDetection && (Encoding_IsNONE(iForcedEncoding) || Encoding_IsUTF8(iForcedEncoding)) &&
+      ((IsUTF8Signature(lpData) ||
+        FileVars_IsUTF8(&fvCurFile) ||
+        (Encoding_IsUTF8(iForcedEncoding) ||
+        (!bPreferOEM && bLoadASCIIasUTF8) ||  // from menu "Reload As UTF-8"
+         (IsUTF8(lpData,cbData) &&
+         (((UTF8_mbslen_bytes(UTF8StringStart(lpData)) - 1 != UTF8_mbslen(UTF8StringStart(lpData),IsUTF8Signature(lpData) ? cbData-3 : cbData)) ||
+          (!bPreferOEM && (Encoding_IsUTF8(iPreferedEncoding) || bLoadASCIIasUTF8))))))) && 
+       !(FileVars_IsNonUTF8(&fvCurFile) && !Encoding_IsUTF8(iForcedEncoding))))
    {
      Encoding_SciSetCodePage(hwnd,CPI_UTF8);
      EditSetNewText(hwnd,"",0);
@ -1176,13 +1188,13 @@ BOOL EditLoadFile(
      GlobalFree(lpData);
    }

-    else {
+    else { // ===  ALL OTHER  ===

-      if (iForcedEncoding != CPI_NONE)
+      if (!Encoding_IsNONE(iForcedEncoding))
        *iEncoding = iForcedEncoding;
      else {
        *iEncoding = FileVars_GetEncoding(&fvCurFile);
-        if (*iEncoding == CPI_NONE) {
+        if (Encoding_IsNONE(*iEncoding)) {
          if (fvCurFile.mask & FV_ENCODING)
            *iEncoding = CPI_ANSI_DEFAULT;
          else {
--- a/src/Encoding.c
+++ b/src/Encoding.c
@ -3,11 +3,11 @@
 *                                                                             *
 * Notepad3                                                                    *
 *                                                                             *
-* Encoding.c                                                                   *
-*   General helper functions                                                  *
+* Encoding.c                                                                  *
+*   Handling and Helpers for File Encoding                                    *
 *   Based on code from Notepad2, (c) Florian Balmer 1996-2011                 *
-*	Parts taken from SciTE, (c) Neil Hodgson                                    *
-*	MinimizeToTray, (c) 2000 Matthew Ellis                                      *
+*	                                                                            *
+*	                                                                            *
 *                                                                             *
 *                                                  (c) Rizonesoft 2015-2018   *
 *                                                    https://rizonesoft.com   *
@ -28,7 +28,7 @@

 #include <windows.h>
 #include <commctrl.h>
-#include <uxtheme.h>
+#include <stdlib.h>

 #include "../uthash/utarray.h"

@ -219,7 +219,7 @@ int Encoding_SrcCmdLn(int iSrcEncoding) {
    if (Encoding_IsValid(iSrcEncoding))
      SourceEncoding = iSrcEncoding;
    else
-      SourceEncoding = CPI_UTF8;
+      SourceEncoding = CPI_ANSI_DEFAULT;
  }
  else if (iSrcEncoding == CPI_NONE) {
    SourceEncoding = CPI_NONE;
@ -260,9 +260,53 @@ BOOL Encoding_HasChanged(int iOriginalEncoding) {
 // ============================================================================
 // ============================================================================

+/*
+* Mostly taken from "tellenc"
+* Program to detect the encoding of text.  It currently supports ASCII,
+* UTF-8, UTF-16/32 (little-endian or big-endian), Latin1, Windows-1252,
+* CP437, GB2312, GBK, Big5, and SJIS, among others.
+*
+* Copyright (C) 2006-2016 Wu Yongwei <wuyongwei@gmail.com>
+*
+* This software is provided 'as-is', without any express or implied
+* warranty.  In no event will the authors be held liable for any
+* damages arising from the use of this software.
+*
+* Permission is granted to anyone to use this software for any purpose,
+* including commercial applications, and to alter it and redistribute
+* it freely, subject to the following restrictions:
+*
+* 1. The origin of this software must not be misrepresented; you must
+*    not claim that you wrote the original software.  If you use this
+*    software in a product, an acknowledgment in the product
+*    documentation would be appreciated but is not required.
+* 2. Altered source versions must be plainly marked as such, and must
+*    not be misrepresented as being the original software.
+* 3. This notice may not be removed or altered from any source
+*    distribution.
+*
+*
+* The latest version of this software should be available at:
+*      <URL:https://github.com/adah1972/tellenc>
+*
+*/
+
+
 typedef unsigned short uint16_t;
 typedef unsigned int   uint32_t;

+
+typedef enum _UTF8_ValidationState
+{
+  UTF8_INVALID,
+  UTF8_1,
+  UTF8_2,
+  UTF8_3,
+  UTF8_4,
+  UTF8_TAIL
+} UTF8_ValidationState;
+
+
 typedef struct {
  int         encID;
  uint16_t    dbyte;
@ -284,6 +328,13 @@ static freq_analysis_data_t freq_analysis_data[] =
 { 19, 0xfa9d, "windows-1250" },         // "úť" (Slovak)
 { 19, 0x9e69, "windows-1250" },         // "ži" (Slovenian)
 { 19, 0xe869, "windows-1250" },         // "či" (Slovenian)
+  
+{ 30, 0xe820, "windows-1251" },         // "и " (Cyrillic)
+{ 30, 0xe3ee, "windows-1251" },         // "го" (Cyrillic)
+{ 30, 0xeaee, "windows-1251" },         // "ко" (Cyrillic)
+{ 30, 0xf1ea, "windows-1251" },         // "ск" (Cyrillic)
+{ 30, 0xf1f2, "windows-1251" },         // "ст" (Cyrillic)
+
 { 67, 0xe020, "windows-1252" },         // "à " (French)
 { 67, 0xe920, "windows-1252" },         // "é " (French)
 { 67, 0xe963, "windows-1252" },         // "éc" (French)
@ -304,43 +355,43 @@ static freq_analysis_data_t freq_analysis_data[] =
 { 52, 0xc4c4, "cp437" },                // "──"
 { 52, 0xcdcd, "cp437" },                // "══"
 { 52, 0xdbdb, "cp437" },                // "██"
-{ 72, 0xa1a1, "gbk" },                  // "　"
-{ 72, 0xa1a2, "gbk" },                  // "、"
-{ 72, 0xa1a3, "gbk" },                  // "。"
-{ 72, 0xa1a4, "gbk" },                  // "·"
-{ 72, 0xa1b6, "gbk" },                  // "《"
-{ 72, 0xa1b7, "gbk" },                  // "》"
-{ 72, 0xa3ac, "gbk" },                  // "，"
-{ 72, 0xa3ba, "gbk" },                  // "："
-{ 72, 0xb5c4, "gbk" },                  // "的"
-{ 72, 0xc1cb, "gbk" },                  // "了"
-{ 72, 0xd2bb, "gbk" },                  // "一"
-{ 72, 0xcac7, "gbk" },                  // "是"
-{ 72, 0xb2bb, "gbk" },                  // "不"
-{ 72, 0xb8f6, "gbk" },                  // "个"
-{ 72, 0xc8cb, "gbk" },                  // "人"
-{ 72, 0xd5e2, "gbk" },                  // "这"
-{ 72, 0xd3d0, "gbk" },                  // "有"
-{ 72, 0xced2, "gbk" },                  // "我"
-{ 72, 0xc4e3, "gbk" },                  // "你"
-{ 72, 0xcbfb, "gbk" },                  // "他"
-{ 72, 0xcbfd, "gbk" },                  // "她"
-{ 72, 0xc9cf, "gbk" },                  // "上"
-{ 72, 0xbfb4, "gbk" },                  // "看"
-{ 72, 0xd6ae, "gbk" },                  // "之"
-{ 72, 0xbbb9, "gbk" },                  // "还"
-{ 72, 0xbfc9, "gbk" },                  // "可"
-{ 72, 0xbaf3, "gbk" },                  // "后"
-{ 72, 0xd6d0, "gbk" },                  // "中"
-{ 72, 0xd0d0, "gbk" },                  // "行"
-{ 72, 0xb1d2, "gbk" },                  // "币"
-{ 72, 0xb3f6, "gbk" },                  // "出"
-{ 72, 0xb7d1, "gbk" },                  // "费"
-{ 72, 0xb8d0, "gbk" },                  // "感"
-{ 72, 0xbef5, "gbk" },                  // "觉"
-{ 72, 0xc4ea, "gbk" },                  // "年"
-{ 72, 0xd4c2, "gbk" },                  // "月"
-{ 72, 0xc8d5, "gbk" },                  // "日"
+{ 20, 0xa1a1, "gbk" },                  // "　"
+{ 20, 0xa1a2, "gbk" },                  // "、"
+{ 20, 0xa1a3, "gbk" },                  // "。"
+{ 20, 0xa1a4, "gbk" },                  // "·"
+{ 20, 0xa1b6, "gbk" },                  // "《"
+{ 20, 0xa1b7, "gbk" },                  // "》"
+{ 20, 0xa3ac, "gbk" },                  // "，"
+{ 20, 0xa3ba, "gbk" },                  // "："
+{ 20, 0xb5c4, "gbk" },                  // "的"
+{ 20, 0xc1cb, "gbk" },                  // "了"
+{ 20, 0xd2bb, "gbk" },                  // "一"
+{ 20, 0xcac7, "gbk" },                  // "是"
+{ 20, 0xb2bb, "gbk" },                  // "不"
+{ 20, 0xb8f6, "gbk" },                  // "个"
+{ 20, 0xc8cb, "gbk" },                  // "人"
+{ 20, 0xd5e2, "gbk" },                  // "这"
+{ 20, 0xd3d0, "gbk" },                  // "有"
+{ 20, 0xced2, "gbk" },                  // "我"
+{ 20, 0xc4e3, "gbk" },                  // "你"
+{ 20, 0xcbfb, "gbk" },                  // "他"
+{ 20, 0xcbfd, "gbk" },                  // "她"
+{ 20, 0xc9cf, "gbk" },                  // "上"
+{ 20, 0xbfb4, "gbk" },                  // "看"
+{ 20, 0xd6ae, "gbk" },                  // "之"
+{ 20, 0xbbb9, "gbk" },                  // "还"
+{ 20, 0xbfc9, "gbk" },                  // "可"
+{ 20, 0xbaf3, "gbk" },                  // "后"
+{ 20, 0xd6d0, "gbk" },                  // "中"
+{ 20, 0xd0d0, "gbk" },                  // "行"
+{ 20, 0xb1d2, "gbk" },                  // "币"
+{ 20, 0xb3f6, "gbk" },                  // "出"
+{ 20, 0xb7d1, "gbk" },                  // "费"
+{ 20, 0xb8d0, "gbk" },                  // "感"
+{ 20, 0xbef5, "gbk" },                  // "觉"
+{ 20, 0xc4ea, "gbk" },                  // "年"
+{ 20, 0xd4c2, "gbk" },                  // "月"
+{ 20, 0xc8d5, "gbk" },                  // "日"
 { 22, 0xa140, "big5" },                 // "　"
 { 22, 0xa141, "big5" },                 // "，"
 { 22, 0xa143, "big5" },                 // "。"
@ -416,19 +467,16 @@ static freq_analysis_data_t freq_analysis_data[] =
 };
 // ============================================================================

-typedef struct _char_count_t {
-  uint16_t first;
-  uint32_t second;
-} char_count_t;
-
-//typedef pair<uint16_t, uint32_t>  char_count_t;
-//typedef map<uint16_t, uint32_t>   char_count_map_t;
-//typedef vector<char_count_t>      char_count_vec_t;
+#define MAX_CHAR 256

+typedef struct _dbyte_cnt_t {
+  uint16_t dblByte;
+  uint32_t count;
+} dbyte_cnt_t;

 int __fastcall check_freq_dbyte(uint16_t dbyte)
 {
-  for (size_t i = 0; i < sizeof freq_analysis_data / sizeof(freq_analysis_data_t); ++i) {
+  for (size_t i = 0; i < (sizeof freq_analysis_data / sizeof(freq_analysis_data_t)); ++i) {
    if (dbyte == freq_analysis_data[i].dbyte) {
      return freq_analysis_data[i].encID;
    }
@ -437,18 +485,20 @@ int __fastcall check_freq_dbyte(uint16_t dbyte)
 }
 // ============================================================================

-
-int __fastcall search_freq_dbytes(const UT_array* dbyte_char_cnt)
+// --------------------------------------------------------------
+// arg dbyte_cnt_map must be sorted (high count first)
+//
+int __fastcall search_freq_dbytes(const UT_array* dbyte_cnt_map)
 {
-  size_t max_comp_idx = 10;
-  if (max_comp_idx > utarray_len(dbyte_char_cnt)) {
-    max_comp_idx = utarray_len(dbyte_char_cnt);
-  }
-  for (size_t i = 0; i < max_comp_idx; ++i) {
+  size_t max_comp_cnt = 10;
+  size_t cnt = 0;

-    const char_count_t* ccnt = (char_count_t*)utarray_eltptr(dbyte_char_cnt, i);
+  for (dbyte_cnt_t* p = (dbyte_cnt_t*)utarray_front(dbyte_cnt_map);
+       (p != NULL) && (++cnt <= max_comp_cnt);
+       p = (dbyte_cnt_t*)utarray_next(dbyte_cnt_map, p)) {
+
+    const int enc = check_freq_dbyte(p->dblByte);

-    const int enc = check_freq_dbyte(ccnt->first);
    if (enc > CPI_NONE) {
      return enc;
    }
@ -458,35 +508,300 @@ int __fastcall search_freq_dbytes(const UT_array* dbyte_char_cnt)
 // ============================================================================


+static UTF8_ValidationState utf8_char_table[MAX_CHAR];

-int Encoding_TellEncoding(const unsigned char* const buffer, const size_t len)
+void init_utf8_validation_char_table()
+{
+  int ch = 0;
+  utf8_char_table[ch] = UTF8_INVALID;
+  ++ch;
+  for (; ch <= 0x7f; ++ch) {
+    utf8_char_table[ch] = UTF8_1;
+  }
+  for (; ch <= 0xbf; ++ch) {
+    utf8_char_table[ch] = UTF8_TAIL;
+  }
+  for (; ch <= 0xc1; ++ch) {
+    utf8_char_table[ch] = UTF8_INVALID;
+  }
+  for (; ch <= 0xdf; ++ch) {
+    utf8_char_table[ch] = UTF8_2;
+  }
+  for (; ch <= 0xef; ++ch) {
+    utf8_char_table[ch] = UTF8_3;
+  }
+  for (; ch <= 0xf4; ++ch) {
+    utf8_char_table[ch] = UTF8_4;
+  }
+  for (; ch <= 0xff; ++ch) {
+    utf8_char_table[ch] = UTF8_INVALID;
+  }
+}
+// ============================================================================
+
+
+void __fastcall init_sbyte_char_count(dbyte_cnt_t sbyte_char_cnt[])
+{
+  for (size_t ch = 0; ch < MAX_CHAR; ++ch) {
+    sbyte_char_cnt[ch].dblByte = (uint16_t)ch;
+    sbyte_char_cnt[ch].count = 0;
+  }
+}
+// ============================================================================
+
+static const unsigned char NON_TEXT_CHARS[] = { 0, 26, 127, 255 };
+
+__forceinline bool is_non_text(char ch)
+{
+  for (size_t i = 0; i < sizeof(NON_TEXT_CHARS); ++i) {
+    if (ch == NON_TEXT_CHARS[i]) {
+      return true;
+    }
+  }
+  return false;
+}
+// ============================================================================
+
+
+__forceinline dbyte_cnt_t* find_dbyte_count(const UT_array* const dbyte_cnt_map, const uint16_t dbyte)
+{
+  for (dbyte_cnt_t* p = (dbyte_cnt_t*)utarray_front(dbyte_cnt_map);
+    (p != NULL);
+       p = (dbyte_cnt_t*)utarray_next(dbyte_cnt_map, p)) {
+
+    if (p->dblByte == dbyte)
+      return p;
+  }
+  return NULL;
+}
+// ============================================================================
+
+
+static int ascending_count(const void *lhs, const void *rhs)
+{
+  const uint32_t lcnt = ((dbyte_cnt_t*)lhs)->count;
+  const uint32_t rcnt = ((dbyte_cnt_t*)rhs)->count;
+  return (lcnt - rcnt); // ascending order
+}
+
+static int descending_count(const void *lhs, const void *rhs)
+{
+  const uint32_t lcnt = ((dbyte_cnt_t*)lhs)->count;
+  const uint32_t rcnt = ((dbyte_cnt_t*)rhs)->count;
+  return (rcnt - lcnt); // descending order
+}
+
+// ============================================================================
+
+//typedef pair<uint16_t, uint32_t>  char_count_t;
+//typedef map<uint16_t, uint32_t>   char_count_map_t;
+//typedef vector<char_count_t>      char_count_vec_t;
+
+static const char NUL = '\0';
+static const char DOS_EOF = '\x1A';
+static const int EVEN = 0;
+static const int ODD = 1;
+
+static size_t nul_count_byte[2];
+static size_t nul_count_word[2];
+
+
+int Encoding_Analyze(const char* const buffer, const size_t len)
 {
  int iEncoding = CPI_NONE;
-  UT_icd char_count_icd = { sizeof(char_count_t), NULL, NULL, NULL };
-  UT_array* char_count_vector = NULL;
+  bool is_binary = false;
+  bool is_valid_utf8 = true;
+  bool is_valid_latin1 = true;
+  uint32_t dbyte_cnt = 0;
+  uint32_t dbyte_hihi_cnt = 0;

-  utarray_new(char_count_vector, &char_count_icd);
-  utarray_reserve(char_count_vector, 256);
+  UT_icd dbyte_count_icd = { sizeof(dbyte_cnt_t), NULL, NULL, NULL };
+  UT_array* dbyte_count_map = NULL;

-  ///...
+  utarray_new(dbyte_count_map, &dbyte_count_icd);
+  utarray_reserve(dbyte_count_map, MAX_CHAR);

-  utarray_clear(char_count_vector);
-  utarray_free(char_count_vector);
+  //~dbyte_cnt_t sbyte_char_count[MAX_CHAR];
+  //~init_sbyte_char_count(sbyte_char_count);
+
+  int last_ch = EOF;
+  UTF8_ValidationState utf8_valid_state = UTF8_1;
+
+  for (size_t pos = 0; pos < len; ++pos) {
+
+    const unsigned char ch = buffer[pos];
+    //~ ++(sbyte_char_count[ch].count);
+
+    // Check for binary data (including UTF-16/32)
+    if (is_non_text(ch)) {
+      if (!is_binary && !(ch == DOS_EOF && pos == len - 1)) {
+        is_binary = true;
+      }
+      if (ch == NUL) {
+        // Count for NULs in even- and odd-number bytes
+        nul_count_byte[pos & 1]++;
+        if (pos & 1) {
+          if (buffer[pos - 1] == NUL) {
+            // Count for NULs in even- and odd-number words
+            nul_count_word[(pos / 2) & 1]++;
+          }
+        }
+      }
+    }
+
+    // Check for UTF-8 validity
+    if (is_valid_utf8) {
+      switch (utf8_char_table[ch]) {
+      case UTF8_INVALID:
+        is_valid_utf8 = false;
+        break;
+      case UTF8_1:
+        if (utf8_valid_state != UTF8_1) {
+          is_valid_utf8 = false;
+        }
+        break;
+      case UTF8_2:
+        if (utf8_valid_state != UTF8_1) {
+          is_valid_utf8 = false;
+        }
+        else {
+          utf8_valid_state = UTF8_2;
+        }
+        break;
+      case UTF8_3:
+        if (utf8_valid_state != UTF8_1) {
+          is_valid_utf8 = false;
+        }
+        else {
+          utf8_valid_state = UTF8_3;
+        }
+        break;
+      case UTF8_4:
+        if (utf8_valid_state != UTF8_1) {
+          is_valid_utf8 = false;
+        }
+        else {
+          utf8_valid_state = UTF8_4;
+        }
+        break;
+      case UTF8_TAIL:
+        if (utf8_valid_state > UTF8_1) {
+          utf8_valid_state--;
+        }
+        else {
+          is_valid_utf8 = false;
+        }
+        break;
+      }
+    }
+
+    // Check whether non-Latin1 characters appear
+    if (is_valid_latin1) {
+      if (ch >= 0x80 && ch < 0xa0) {
+        is_valid_latin1 = false;
+      }
+    }
+    
+    // Construct double-bytes and count
+    if (last_ch != EOF)
+    {
+      dbyte_cnt_t dbyte_item = { 0, 1 };
+      dbyte_item.dblByte = (uint16_t)((last_ch << 8) + ch);
+
+      dbyte_cnt_t* item = find_dbyte_count(dbyte_count_map, dbyte_item.dblByte);
+      if (item == NULL)
+        utarray_push_back(dbyte_count_map, &dbyte_item);
+      else
+        ++(item->count);
+
+      dbyte_cnt++;
+      if ((last_ch > 0xa0) && (ch > 0xa0)) {
+        ++dbyte_hihi_cnt;
+      }
+      //last_ch = EOF;
+    }
+
+    if (ch >= 0x80)
+      last_ch = ch;
+    else
+      last_ch = EOF;
+
+  } // for
+
+  if (!is_valid_utf8 && is_binary) {
+    // Heuristics for UTF-16/32
+    if (nul_count_byte[EVEN] > 4 &&
+      (nul_count_byte[ODD] == 0 ||
+       nul_count_byte[EVEN] / nul_count_byte[ODD] > 20)) {
+      iEncoding = CPI_UNICODEBE;
+    }
+    else if (nul_count_byte[ODD] > 4 &&
+      (nul_count_byte[EVEN] == 0 ||
+       nul_count_byte[ODD] / nul_count_byte[EVEN] > 20)) {
+      iEncoding = CPI_UNICODE;
+    }
+    else if (nul_count_word[EVEN] > 4 &&
+      (nul_count_word[ODD] == 0 ||
+       nul_count_word[EVEN] / nul_count_word[ODD] > 20)) {
+      iEncoding = CPI_UCS4BE;   // utf-32 is not a built-in encoding for Notepad3
+    }
+    else if (nul_count_word[ODD] > 4 &&
+      (nul_count_word[EVEN] == 0 ||
+       nul_count_word[ODD] / nul_count_word[EVEN] > 20)) {
+      iEncoding = CPI_UCS4; // utf-32le is not a built-in encoding for Notepad3
+    }
+  }
+  else if (dbyte_cnt == 0) {
+    // No characters outside the scope of ASCII
+    iEncoding = CPI_ANSI_DEFAULT;
+  }
+  else if (is_valid_utf8) {
+    // Only valid UTF-8 sequences
+    iEncoding = CPI_UTF8;
+  }
+
+  if (iEncoding == CPI_NONE)  // still unknown ?
+  {
+    // Get the character counts in descending order
+    //~qsort((void*)sbyte_char_count, MAX_CHAR, sizeof(dbyte_cnt_t), descending_count);
+
+    // Get the double-byte counts in descending order
+    utarray_sort(dbyte_count_map, descending_count);
+
+    const int probEncoding = search_freq_dbytes(dbyte_count_map);
+
+    if (probEncoding != CPI_NONE) {
+      iEncoding = probEncoding;
+    }
+    else if (((dbyte_hihi_cnt * 100) / ++dbyte_cnt) < 5) {
+      // mostly a low-byte follows a high-byte
+      iEncoding = CPI_ANSI_DEFAULT;
+    }
+  }
+
+  utarray_clear(dbyte_count_map);
+  utarray_free(dbyte_count_map);

-  UNUSED(buffer);
-  UNUSED(len);
  return iEncoding;
 }
 // ============================================================================


-
+// ============================================================================
+// ============================================================================
+//
+// END  OF  "TELLENC"  PART
+//
 // ============================================================================
 // ============================================================================

-void Encoding_InitDefaults() 
+
+void Encoding_InitDefaults()

 {
+  // init tellenc code page detection
+  init_utf8_validation_char_table();
+
  const UINT uCodePageMBCS[20] = {
    42, // (Symbol)
    50220,50221,50222,50225,50227,50229, // (Chinese, Japanese, Korean) 
@ -871,85 +1186,86 @@ BOOL Encoding_GetFromComboboxEx(HWND hwnd, int *pidEncoding) {


 UINT Encoding_GetCodePage(int iEncoding) {
-  return g_Encodings[iEncoding].uCodePage;
+  return (iEncoding >= 0) ? g_Encodings[iEncoding].uCodePage : CP_ACP;
 }
 // ============================================================================

 BOOL Encoding_IsDefault(int iEncoding) {
-  return (g_Encodings[iEncoding].uFlags & NCP_DEFAULT);
+  return  (iEncoding >= 0) ? (g_Encodings[iEncoding].uFlags & NCP_DEFAULT) : FALSE;
 }
 // ============================================================================

 BOOL Encoding_IsANSI(int iEncoding) {
-  return (g_Encodings[iEncoding].uFlags & NCP_ANSI);
+  return  (iEncoding >= 0) ? (g_Encodings[iEncoding].uFlags & NCP_ANSI) : FALSE;
 }
 // ============================================================================

 BOOL Encoding_IsOEM(int iEncoding) {
-  return (g_Encodings[iEncoding].uFlags & NCP_OEM);
+  return  (iEncoding >= 0) ? (g_Encodings[iEncoding].uFlags & NCP_OEM) : FALSE;
 }
 // ============================================================================

 BOOL Encoding_IsUTF8(int iEncoding) {
-  return (g_Encodings[iEncoding].uFlags & NCP_UTF8);
+  return  (iEncoding >= 0) ? (g_Encodings[iEncoding].uFlags & NCP_UTF8) : FALSE;
 }
 // ============================================================================

 BOOL Encoding_IsUTF8_SIGN(int iEncoding) {
-  return (g_Encodings[iEncoding].uFlags & NCP_UTF8_SIGN);
+  return  (iEncoding >= 0) ? (g_Encodings[iEncoding].uFlags & NCP_UTF8_SIGN) : FALSE;
 }
 // ============================================================================

 BOOL Encoding_IsMBCS(int iEncoding) {
-  return (g_Encodings[iEncoding].uFlags & NCP_MBCS);
+  return  (iEncoding >= 0) ? (g_Encodings[iEncoding].uFlags & NCP_MBCS) : FALSE;
 }
 // ============================================================================

 BOOL Encoding_IsUNICODE(int iEncoding) {
-  return (g_Encodings[iEncoding].uFlags & NCP_UNICODE);
+  return  (iEncoding >= 0) ? (g_Encodings[iEncoding].uFlags & NCP_UNICODE) : FALSE;
 }
 // ============================================================================

 BOOL Encoding_IsUNICODE_BOM(int iEncoding) {
-  return (g_Encodings[iEncoding].uFlags & NCP_UNICODE_BOM);
+  return  (iEncoding >= 0) ? (g_Encodings[iEncoding].uFlags & NCP_UNICODE_BOM) : FALSE;
 }
 // ============================================================================

 BOOL Encoding_IsUNICODE_REVERSE(int iEncoding) {
-  return (g_Encodings[iEncoding].uFlags & NCP_UNICODE_REVERSE);
+  return  (iEncoding >= 0) ? (g_Encodings[iEncoding].uFlags & NCP_UNICODE_REVERSE) : FALSE;
 }
 // ============================================================================


 BOOL Encoding_IsINTERNAL(int iEncoding) {
-  return (g_Encodings[iEncoding].uFlags & NCP_INTERNAL);
+  return  (iEncoding >= 0) ? (g_Encodings[iEncoding].uFlags & NCP_INTERNAL) : FALSE;
 }
 // ============================================================================

 BOOL Encoding_IsEXTERNAL_8BIT(int iEncoding) {
-  return (g_Encodings[iEncoding].uFlags & NCP_EXTERNAL_8BIT);
+  return  (iEncoding >= 0) ? (g_Encodings[iEncoding].uFlags & NCP_EXTERNAL_8BIT) : FALSE;
 }
 // ============================================================================

 BOOL Encoding_IsRECODE(int iEncoding) {
-  return (g_Encodings[iEncoding].uFlags & NCP_RECODE);
+  return  (iEncoding >= 0) ? (g_Encodings[iEncoding].uFlags & NCP_RECODE) : FALSE;
 }
 // ============================================================================


 void Encoding_SetDefaultFlag(int iEncoding) {
-  g_Encodings[iEncoding].uFlags |= NCP_DEFAULT;
+  if (iEncoding >= 0)
+    g_Encodings[iEncoding].uFlags |= NCP_DEFAULT;
 }
 // ============================================================================


 const WCHAR* Encoding_GetLabel(int iEncoding) {
-  return g_Encodings[iEncoding].wchLabel;
+  return (iEncoding >= 0) ? g_Encodings[iEncoding].wchLabel : NULL;
 }
 // ============================================================================

 const char* Encoding_GetParseNames(int iEncoding) {
-  return g_Encodings[iEncoding].pszParseNames;
+  return (iEncoding >= 0) ? g_Encodings[iEncoding].pszParseNames : NULL;
 }
 // ============================================================================

@ -1251,324 +1567,3 @@ INT UTF8_mbslen(LPCSTR source, INT byte_length)
  return wchar_length;
 }
 // ============================================================================
-
-
-
-
-/*
-* Copyright (C) 2006-2016 Wu Yongwei <wuyongwei@gmail.com>
-*
-* This software is provided 'as-is', without any express or implied
-* warranty.  In no event will the authors be held liable for any
-* damages arising from the use of this software.
-*
-* Permission is granted to anyone to use this software for any purpose,
-* including commercial applications, and to alter it and redistribute
-* it freely, subject to the following restrictions:
-*
-* 1. The origin of this software must not be misrepresented; you must
-*    not claim that you wrote the original software.  If you use this
-*    software in a product, an acknowledgement in the product
-*    documentation would be appreciated but is not required.
-* 2. Altered source versions must be plainly marked as such, and must
-*    not be misrepresented as being the original software.
-* 3. This notice may not be removed or altered from any source
-*    distribution.
-*
-*
-* The latest version of this software should be available at:
-*      <URL:https://github.com/adah1972/tellenc>
-*
-*/
-
-/**
-* @file    TellEnc.c
-*
-* Program to detect the encoding of text.  It currently supports ASCII,
-* UTF-8, UTF-16/32 (little-endian or big-endian), Latin1, Windows-1252,
-* CP437, GB2312, GBK, Big5, and SJIS, among others.
-*
-* @version 1.22, 2016/07/26
-* @author  Wu Yongwei
-*/
-
-#define MAX_CHAR 256
-
-
-
-
-
-
-static const unsigned char NON_TEXT_CHARS[] = { 0, 26, 127, 255 };
-static const char NUL = '\0';
-static const char DOS_EOF = '\x1A';
-static const int EVEN = 0;
-static const int ODD  = 1;
-
-
-// ============================================================================
-
-
-static size_t nul_count_byte[2];
-static size_t nul_count_word[2];
-
-static bool is_binary = false;
-static bool is_valid_utf8 = true;
-static bool is_valid_latin1 = true;
-static uint32_t dbyte_cnt = 0;
-static uint32_t dbyte_hihi_cnt = 0;
-
-
-// ============================================================================
-// ============================================================================
-
-
-static inline bool is_non_text(char ch)
-{
-  for (size_t i = 0; i < sizeof(NON_TEXT_CHARS); ++i) {
-    if (ch == NON_TEXT_CHARS[i]) {
-      return true;
-    }
-  }
-  return false;
-}
-// ============================================================================
-
-
-
-
-static void init_sbyte_char_count(char_count_t sbyte_char_cnt[])
-{
-  for (size_t i = 0; i < MAX_CHAR; ++i) {
-    sbyte_char_cnt[i].first = (uint16_t)i;
-    sbyte_char_cnt[i].second = 0;
-  }
-}
-// ============================================================================
-
-
-
-
-
-
-#if FALSE
-
-
-typedef struct _pattern_t {
-  const char* name;
-  const char* pattern;
-  size_t pattern_len;
-} pattern_t;
-
-static const char* check_ucs_bom(const unsigned char* const buffer, const size_t len)
-{
-  const pattern_t patterns[] = {
-      { "ucs-4",     "\x00\x00\xFE\xFF",  4 },
-      { "ucs-4le",   "\xFF\xFE\x00\x00",  4 },
-      { "utf-8",     "\xEF\xBB\xBF",      3 },
-      { "utf-16",    "\xFE\xFF",          2 },
-      { "utf-16le",  "\xFF\xFE",          2 },
-      { NULL,        NULL,                0 }
-  };
-  for (size_t i = 0; patterns[i].name; ++i) {
-    const pattern_t* item = &(patterns[i]);
-    if (len >= item->pattern_len &&  memcmp(buffer, item->pattern, item->pattern_len) == 0) {
-      return item->name;
-    }
-  }
-  return NULL;
-}
-// ============================================================================
-
-
-
-
-
-
-const char* tellenc(const unsigned char* const buffer, const size_t len)
-{
-    if (len == 0) {
-        return "unknown";
-    }
-
-    const char* result = check_ucs_bom(buffer, len);
-    if (result) {
-        return result;
-    }
-
-    char_count_t sbyte_char_cnt[MAX_CHAR];
-    char_count_map_t dbyte_char_cnt_map;
-    init_sbyte_char_count(sbyte_char_cnt);
-
-    unsigned char ch;
-    int last_ch = EOF;
-    int utf8_state = UTF8_1;
-    for (size_t i = 0; i < len; ++i) {
-        ch = buffer[i];
-        sbyte_char_cnt[ch].second++;
-
-        // Check for binary data (including UTF-16/32)
-        if (is_non_text(ch)) {
-            if (!is_binary && !(ch == DOS_EOF && i == len - 1)) {
-                is_binary = true;
-            }
-            if (ch == NUL) {
-                // Count for NULs in even- and odd-number bytes
-                nul_count_byte[i & 1]++;
-                if (i & 1) {
-                    if (buffer[i - 1] == NUL) {
-                        // Count for NULs in even- and odd-number words
-                        nul_count_word[(i / 2) & 1]++;
-                    }
-                }
-            }
-        }
-
-        // Check for UTF-8 validity
-        if (is_valid_utf8) {
-            switch (utf8_char_table[ch]) {
-            case UTF8_INVALID:
-                is_valid_utf8 = false;
-                break;
-            case UTF8_1:
-                if (utf8_state != UTF8_1) {
-                    is_valid_utf8 = false;
-                }
-                break;
-            case UTF8_2:
-                if (utf8_state != UTF8_1) {
-                    is_valid_utf8 = false;
-                } else {
-                    utf8_state = UTF8_2;
-                }
-                break;
-            case UTF8_3:
-                if (utf8_state != UTF8_1) {
-                    is_valid_utf8 = false;
-                } else {
-                    utf8_state = UTF8_3;
-                }
-                break;
-            case UTF8_4:
-                if (utf8_state != UTF8_1) {
-                    is_valid_utf8 = false;
-                } else {
-                    utf8_state = UTF8_4;
-                }
-                break;
-            case UTF8_TAIL:
-                if (utf8_state > UTF8_1) {
-                    utf8_state--;
-                } else {
-                    is_valid_utf8 = false;
-                }
-                break;
-            }
-        }
-
-        // Check whether non-Latin1 characters appear
-        if (is_valid_latin1) {
-            if (ch >= 0x80 && ch < 0xa0) {
-                is_valid_latin1 = false;
-            }
-        }
-
-        // Construct double-bytes and count
-        if (last_ch != EOF) {
-            uint16_t dbyte_char = (last_ch << 8) + ch;
-            dbyte_char_cnt_map[dbyte_char]++;
-            dbyte_cnt++;
-            if (last_ch > 0xa0 && ch > 0xa0) {
-                dbyte_hihi_cnt++;
-            }
-            last_ch = EOF;
-        } else if (ch >= 0x80) {
-            last_ch = ch;
-        }
-    }
-
-    // Get the character counts in descending order
-    sort(sbyte_char_cnt, sbyte_char_cnt + MAX_CHAR, greater_char_count());
-
-    // Get the double-byte counts in descending order
-    char_count_vec_t dbyte_char_cnt;
-    for (char_count_map_t::iterator it = dbyte_char_cnt_map.begin();
-            it != dbyte_char_cnt_map.end(); ++it) {
-        dbyte_char_cnt.push_back(*it);
-    }
-    sort(dbyte_char_cnt.begin(),
-         dbyte_char_cnt.end(),
-         greater_char_count());
-
-    if (!is_valid_utf8 && is_binary) {
-        // Heuristics for UTF-16/32
-        if        (nul_count_byte[EVEN] > 4 &&
-                   (nul_count_byte[ODD] == 0 ||
-                    nul_count_byte[EVEN] / nul_count_byte[ODD] > 20)) {
-            return "utf-16";
-        } else if (nul_count_byte[ODD] > 4 &&
-                   (nul_count_byte[EVEN] == 0 ||
-                    nul_count_byte[ODD] / nul_count_byte[EVEN] > 20)) {
-            return "utf-16le";
-        } else if (nul_count_word[EVEN] > 4 &&
-                   (nul_count_word[ODD] == 0 ||
-                    nul_count_word[EVEN] / nul_count_word[ODD] > 20)) {
-            return "ucs-4";   // utf-32 is not a built-in encoding for Vim
-        } else if (nul_count_word[ODD] > 4 &&
-                   (nul_count_word[EVEN] == 0 ||
-                    nul_count_word[ODD] / nul_count_word[EVEN] > 20)) {
-            return "ucs-4le"; // utf-32le is not a built-in encoding for Vim
-        } else {
-            return "binary";
-        }
-    } else if (dbyte_cnt == 0) {
-        // No characters outside the scope of ASCII
-        return "ascii";
-    } else if (is_valid_utf8) {
-        // Only valid UTF-8 sequences
-        return "utf-8";
-    } else if (const char* enc = search_freq_dbytes(dbyte_char_cnt)) {
-        return enc;
-    } else if (dbyte_hihi_cnt * 100 / dbyte_cnt < 5) {
-        // Mostly a low-byte follows a high-byte
-        return "windows-1252";
-    }
-    return NULL;
-}
-// ============================================================================
-
-
-#endif
-const char* tellenc(const unsigned char* const buffer, const size_t len) { UNUSED(buffer); UNUSED(len);  return NULL; }
-
-
-const char* tellenc_simplify(const char* const buffer, const size_t len)
-{
-  const char* enc = tellenc((const unsigned char*)buffer, len);
-    if (enc) {
-        if (strcmp(enc, "windows-1252") == 0 && is_valid_latin1) {
-            // Latin1 is subset of Windows-1252
-            return "latin1";
-        } else if (strcmp(enc, "gbk") == 0 && dbyte_hihi_cnt == dbyte_cnt) {
-            // Special case for GB2312: no high-byte followed by a low-byte
-            return "gb2312";
-        }
-    }
-    return enc;
-}
-// ============================================================================
-
-
-
-static bool bInitDone = false;
-
-int GetBufferEncoding(const char* const buffer, const size_t len)
-{
-  const char* enc = tellenc_simplify(buffer, len);
-
-  if (enc)
-    return 1;
-
-  return 0; // unknown
-}
-// ============================================================================
--- a/src/Encoding.h
+++ b/src/Encoding.h
@ -65,6 +65,13 @@ extern int g_DOSEncoding;
 #define CPI_UTF8SIGN           7
 #define CPI_UTF7               8

+#define CPI_UTF32       CPI_NONE // invalid
+#define CPI_UTF32BE     CPI_NONE // invalid
+#define CPI_UCS4       CPI_UTF32 // invalid
+#define CPI_UCS4BE   CPI_UTF32BE // invalid
+
+#define Encoding_IsNONE(enc) ((enc) == CPI_NONE)
+
 #define IDS_ENCODINGNAME0  61000
 #define IDS_EOLMODENAME0   62000

@ -130,9 +137,7 @@ BOOL IsUTF7(const char*, int);
 INT UTF8_mbslen_bytes(LPCSTR utf8_string);
 INT UTF8_mbslen(LPCSTR source, INT byte_length);

-
-int Encoding_GetEncoding(const char* const, const size_t);
-
+int Encoding_Analyze(const char* const, const size_t);

 // --------------------------------------------------------------------------------------------------------------------------------

--- a/src/Helpers.c
+++ b/src/Helpers.c
@ -27,10 +27,8 @@
 #define VC_EXTRALEAN 1

 #include <windows.h>
+//#include <uxtheme.h>
 #include <shlobj.h>
-#include <shlwapi.h>
-#include <stdio.h>
-#include <string.h>
 //#include <pathcch.h>
 #include "scintilla.h"
 #include "resource.h"
--- a/src/Helpers.h
+++ b/src/Helpers.h
@ -16,11 +16,11 @@
 #ifndef _NP3_HELPERS_H_
 #define _NP3_HELPERS_H_

-#include <VersionHelpers.h>
 #define STRSAFE_NO_CB_FUNCTIONS
-#undef STRSAFE_NO_DEPRECATE      // don't allow deprecated functions
+#define STRSAFE_NO_DEPRECATE      // don't allow deprecated functions
 #include <strsafe.h>
 #include <shlwapi.h>
+#include <VersionHelpers.h>

 #include "typedefs.h"

@ -39,7 +39,6 @@ extern WCHAR g_wchIniFile[MAX_PATH];
 #define COUNTOF(ar) ARRAYSIZE(ar)   //#define COUNTOF(ar) (sizeof(ar)/sizeof(ar[0]))
 #define CSTRLEN(s)  (COUNTOF(s)-1)

-
 __forceinline void swapi(int* a, int* b) { int t = *a;  *a = *b;  *b = t; }
 __forceinline void swapos(DocPos* a, DocPos* b) { DocPos t = *a;  *a = *b;  *b = t; }

@ -53,8 +52,7 @@ __forceinline void swapos(DocPos* a, DocPos* b) { DocPos t = *a;  *a = *b;  *b =
  WritePrivateProfileString(lpSection,lpName,(lpString),g_wchIniFile)
 #define IniDeleteSection(lpSection) \
  WritePrivateProfileSection(lpSection,NULL,g_wchIniFile)
-__inline BOOL IniSetInt(LPCWSTR lpSection, LPCWSTR lpName, int i)
-{
+__inline BOOL IniSetInt(LPCWSTR lpSection, LPCWSTR lpName, int i) {
  WCHAR tch[32] = { L'\0' }; StringCchPrintf(tch, COUNTOF(tch), L"%i", i); return IniSetString(lpSection, lpName, tch);
 }
 #define IniSetBool(lpSection,lpName,nValue) \
@ -73,8 +71,7 @@ BOOL IniSectionSetString(LPWSTR,LPCWSTR,LPCWSTR);
 __inline BOOL IniSectionSetInt(LPWSTR lpCachedIniSection,LPCWSTR lpName,int i) {
  WCHAR tch[32]={L'\0'}; StringCchPrintf(tch,COUNTOF(tch),L"%i",i); return IniSectionSetString(lpCachedIniSection,lpName,tch);
 }
-__inline BOOL IniSectionSetBool(LPWSTR lpCachedIniSection, LPCWSTR lpName, BOOL b)
-{
+__inline BOOL IniSectionSetBool(LPWSTR lpCachedIniSection, LPCWSTR lpName, BOOL b) {
  return IniSectionSetInt(lpCachedIniSection, lpName, (b ? 1 : 0));
 }

--- a/src/Notepad3.rc
+++ b/src/Notepad3.rc
@ -751,16 +751,16 @@ FONT 8, "MS Shell Dlg", 400, 0, 0x1
 BEGIN
    LTEXT           "&Default encoding (new file):",IDC_STATIC,7,7,90,8
    CONTROL         "",IDC_ENCODINGLIST,"ComboBoxEx32",CBS_DROPDOWNLIST | WS_CLIPSIBLINGS | WS_VSCROLL | WS_TABSTOP,7,20,167,128
-    CONTROL         "Fallback on detection failure.", IDC_USEASREADINGFALLBACK,
-                    "Button", BS_AUTOCHECKBOX | WS_TABSTOP, 7, 40, 108, 10
-    CONTROL         "Skip automatic &Unicode detection.",IDC_NOUNICODEDETECTION,
-                    "Button",BS_AUTOCHECKBOX | WS_TABSTOP,7,51,124,10
+    CONTROL         "Use as &fallback on detection failure.",IDC_USEASREADINGFALLBACK,
+                    "Button",BS_AUTOCHECKBOX | WS_TABSTOP,7,40,155,10
+    CONTROL         "Skip &encoding detection.",IDC_NOUNICODEDETECTION,
+                    "Button",BS_AUTOCHECKBOX | WS_TABSTOP,7,51,122,10
    CONTROL         "Open 7-bit &ASCII files in UTF-8 mode.",IDC_ASCIIASUTF8,
                    "Button",BS_AUTOCHECKBOX | WS_TABSTOP,7,62,136,10
    CONTROL         "Open 8-bit *.&nfo/diz files in DOS-437 mode.",IDC_NFOASOEM,
-                    "Button",BS_AUTOCHECKBOX | WS_TABSTOP,7,73,167,10
-    CONTROL         "Don't parse encoding &tags.",IDC_ENCODINGFROMFILEVARS,
-                    "Button",BS_AUTOCHECKBOX | WS_TABSTOP,7,84,102,10
+                    "Button",BS_AUTOCHECKBOX | WS_TABSTOP,7,73,155,10
+    CONTROL         "Don't parse encoding file &tags.",IDC_ENCODINGFROMFILEVARS,
+                    "Button",BS_AUTOCHECKBOX | WS_TABSTOP,7,84,126,10
    DEFPUSHBUTTON   "OK",IDOK,68,101,50,14
    PUSHBUTTON      "Cancel",IDCANCEL,124,101,50,14
 END