+ upd: current corrections for Compact Encoding Detection (CED by Google)

+ fix: problem with UTF-8 (no BOM) encoding vs. ANSI encoding detection + cln: code cleanup
2026-06-11 21:03:05 +08:00 · 2019-02-01 13:21:15 +01:00 · 2019-02-01 13:21:15 +01:00 · e83fbcc747
commit e83fbcc747
parent 2eee188627
14 changed files with 112 additions and 126 deletions
--- a/ced/ced/CMakeLists.txt
+++ b/ced/ced/CMakeLists.txt
@ -73,7 +73,7 @@ add_library(ced ${CED_LIBRARY_SOURCES})
 #add_definitions(-DHTML5_MODE)

 set(GTEST_INCLUDE_DIR "gtest/googletest/include")
-set(GTEST_LIB_DIR "${CMAKE_SOURCE_DIR}/gtest/googlemock/gtest")
+set(GTEST_LIB_DIR "${CMAKE_SOURCE_DIR}/gtest/lib")

 set(CED_UNITTEST_SOURCES
    compact_enc_det/compact_enc_det_unittest.cc
--- a/ced/ced/compact_enc_det/compact_enc_det.cc
+++ b/ced/ced/compact_enc_det/compact_enc_det.cc
@ -1,4 +1,4 @@
-// Copyright 2016 Google Inc.
+// Copyright 2016 Google Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@ -363,6 +363,9 @@ static const char* kWhatSetName[] = {"Ascii", "Other"};
 // regions (ISO-2022-xx, HZ)
 enum StateSoSi {SOSI_NONE, SOSI_ERROR, SOSI_ONEBYTE, SOSI_TWOBYTE};

+#define UTF8_ARR_CNT 6
+#define BYTE32_ARR_CNT 8
+
 typedef struct {
  const uint8* initial_src;       // For calculating byte offsets
  const uint8* limit_src;         // Range of input source
@ -386,30 +389,30 @@ typedef struct {
  bool do_latin_trigrams;           // True if we actually are scoring trigrams

  // Miscellaneous state variables for difficult encodings
-  int binary_quadrants_count;     // Number of four bigram quadrants seen:
-                                  //  0xxxxxxx0xxxxxxx 0xxxxxxx1xxxxxx
-                                  //  1xxxxxxx0xxxxxxx 1xxxxxxx1xxxxxx
-  int binary_8x4_count;           // Number of 8x4 buckets seen:
-  uint32 binary_quadrants_seen;   // Bit[i] set if bigram i.......i....... seen
-  uint32 binary_8x4_seen;         // Bit[i] set if bigram iii.....ii...... seen
-  int utf7_starts;                // Count of possible UTF-7 beginnings seen
-  int prior_utf7_offset;          // Source consumed by prior UTF-7 string
-  int next_utf8_ministate;        // Mini state for UTF-8 sequences
-  int utf8_minicount[6];          // Number of correct 2- 3- 4-byte seq, errors
-  int next_utf8utf8_ministate;    // Mini state for UTF8UTF8 sequences
-  int utf8utf8_odd_byte;          // UTF8UTF8 seq has odd number of bytes
-  int utf8utf8_minicount[6];      // Number of correct 2- 3- 4-byte seq, errors
+  int binary_quadrants_count;           // Number of four bigram quadrants seen:
+                                        //  0xxxxxxx0xxxxxxx 0xxxxxxx1xxxxxx
+                                        //  1xxxxxxx0xxxxxxx 1xxxxxxx1xxxxxx
+  int binary_8x4_count;                 // Number of 8x4 buckets seen:
+  uint32 binary_quadrants_seen;         // Bit[i] set if bigram i.......i....... seen
+  uint32 binary_8x4_seen;               // Bit[i] set if bigram iii.....ii...... seen
+  int utf7_starts;                      // Count of possible UTF-7 beginnings seen
+  int prior_utf7_offset;                // Source consumed by prior UTF-7 string
+  int next_utf8_ministate;              // Mini state for UTF-8 sequences
+  int utf8_minicount[UTF8_ARR_CNT];     // Number of correct 2- 3- 4-byte seq, errors
+  int next_utf8utf8_ministate;          // Mini state for UTF8UTF8 sequences
+  int utf8utf8_odd_byte;                // UTF8UTF8 seq has odd number of bytes
+  int utf8utf8_minicount[UTF8_ARR_CNT]; // Number of correct 2- 3- 4-byte seq, errors
  StateSoSi next_2022_state;            // Mini state for 2022 sequences
  StateSoSi next_hz_state;              // Mini state for HZ sequences
  bool next_eucjp_oddphase;             // Mini state for EUC-JP sequences
-  int byte32_count[8];            // Count of top 3 bits of byte1 of bigram
-                                  // 0x1x 2x3x 4x5x 6x7x 8x9x AxBx CxDx ExFx
-  uint32 active_special;          // Bits showing which special cases are active
-
-  Encoding tld_hint;              // Top TLD encoding or UNKNOWN
-  Encoding http_hint;             // What the document says about itself or
-  Encoding meta_hint;             // UNKNOWN_ENCODING. BOM is initial byte
-  Encoding bom_hint;              // order mark for UTF-xx
+  int byte32_count[BYTE32_ARR_CNT];     // Count of top 3 bits of byte1 of bigram
+                                        // 0x1x 2x3x 4x5x 6x7x 8x9x AxBx CxDx ExFx
+  uint32 active_special;                // Bits showing which special cases are active
+                                        
+  Encoding tld_hint;                    // Top TLD encoding or UNKNOWN
+  Encoding http_hint;                   // What the document says about itself or
+  Encoding meta_hint;                   // UNKNOWN_ENCODING. BOM is initial byte
+  Encoding bom_hint;                    // order mark for UTF-xx

  // small cache of previous interesting bigrams
  int next_prior_bigram;
@ -1371,9 +1374,10 @@ void PsMark(const uint8* src, int len, const uint8* isrc, int weightshift) {
 // Unfortunately, we have to skip back N lines since source was printed for
 // up to 8 bigrams before we get here. Match on src+1 to handle 0/31 better
 void PsHighlight(const uint8* src, const uint8* isrc, int trigram_val, int n) {
-  auto offset = static_cast<int>((src + 1) - isrc);
+  auto offset = src ? static_cast<int>((src + 1) - isrc) :
+                      static_cast<int>((const uint8*)1 - isrc);
  int offset32 = (offset % pssourcewidth);    // mod len bytes
-  offset -= offset32;                     // round down to multiple of len bytes
+  offset -= offset32;                         // round down to multiple of len bytes

  for (int i = 1; i <= 16; ++i) {
    if (do_src_offset[(next_do_src_line - i) & 0x0f] == offset) {
@ -1413,14 +1417,17 @@ void InitDetectEncodingState(DetectEncodingState* destatep) {
  destatep->utf7_starts = 0;
  destatep->prior_utf7_offset = 0;
  destatep->next_utf8_ministate = 0;
-  for (int & i : destatep->utf8_minicount) {i = 0;}
+  //for (int & i : destatep->utf8_minicount) {i = 0;}
+  std::fill(destatep->utf8_minicount, destatep->utf8_minicount + UTF8_ARR_CNT, 0);
  destatep->next_utf8utf8_ministate = 0;
  destatep->utf8utf8_odd_byte = 0;
-  for (int & i : destatep->utf8utf8_minicount) {i = 0;}
+  //for (int & i : destatep->utf8utf8_minicount) {i = 0;}
+  std::fill(destatep->utf8utf8_minicount, destatep->utf8utf8_minicount + UTF8_ARR_CNT, 0);
  destatep->next_2022_state = SOSI_NONE;
  destatep->next_hz_state = SOSI_NONE;
  destatep->next_eucjp_oddphase = false;
-  for (int & i : destatep->byte32_count) {i = 0;}
+  //for (int & i : destatep->byte32_count) {i = 0;}
+  std::fill(destatep->byte32_count, destatep->byte32_count + BYTE32_ARR_CNT, 0);
  destatep->active_special = 0xffffffff;
  destatep->tld_hint = UNKNOWN_ENCODING;
  destatep->http_hint = UNKNOWN_ENCODING;
@ -1910,9 +1917,7 @@ int ApplyDefaultHint(const CompactEncDet::TextCorpusType corpus_type,

  if (FLAGS_demo_nodefault) {
    // Demo, make initial probs all zero
-    for (int & i : destatep->enc_prob) {
-      i = 0;
-    }
+    std::fill(destatep->enc_prob, destatep->enc_prob + NUM_RANKEDENCODING, 0);
  }

  if (destatep->debug_data != nullptr) {
@ -2098,7 +2103,8 @@ void ApplyHints(const char* url_hint,
  if (hint_count == 0) {
    destatep->looking_for_latin_trigrams = true;    // Default needs trigrams
    destatep->declared_enc_2 = destatep->declared_enc_1;
-    hint_count += ApplyDefaultHint(corpus_type, destatep);
+    //~hint_count += ApplyDefaultHint(corpus_type, destatep);
+    ApplyDefaultHint(corpus_type, destatep);
  }


@ -2214,10 +2220,10 @@ void InitialBytesBoost(const uint8* src,
                       DetectEncodingState* destatep) {
  if (text_length < 4) {return;}

-  char32 pair01 = (src[0] << 8) | src[1];
-  char32 pair23 = (src[2] << 8) | src[3];
-  char32 quad0123 = (pair01 << 16) | pair23;
-
+  uint32 pair01 = (src[0] << 8) | src[1];
+  uint32 pair23 = (src[2] << 8) | src[3];
+  uint32 quad0123 = (pair01 << 16) | pair23;
+  
  bool utf_16_indication = false;
  bool utf_32_indication = false;
  int best_enc = -1;
@ -2539,7 +2545,7 @@ void UTF7BoostWhack(DetectEncodingState* destatep, int next_pair, uint8 byte2) {
      int nmod8 = n & 7;
      if ((n == 3) || (n == 6)) {
        // short but legal -- treat as neutral
-      } else if ((nmod8 == 0) | (nmod8 == 3) | (nmod8 == 6)) {
+      } else if ((nmod8 == 0) || (nmod8 == 3) || (nmod8 == 6)) {
        // Good length. Check for good Unicode.
        if (GoodUnicodeFromBase64(start, start + n)) {
          // Good length and Unicode, boost
@ -4893,7 +4899,6 @@ Encoding Rescan(Encoding enc,
  auto scanned_bytes = static_cast<int>(src - isrc);
  auto unscanned_bytes = static_cast<int>(srctextlimit - src);
  auto text_length = static_cast<int>(srctextlimit - isrc);
-  bool empty_rescan = true;

  // See if enough bytes left to bother doing rescan
  if (kMinRescanLength < unscanned_bytes) {
@ -4963,7 +4968,7 @@ Encoding Rescan(Encoding enc,
                             &mid_second_best_enc);
    destatep->reliable = mid_is_reliable;

-    empty_rescan = (mid_enc == ASCII_7BIT);
+    bool const empty_rescan = (mid_enc == ASCII_7BIT);

    // Not the right decision if, e.g. enc=Greek, mid=ASCII7, one=KSC
    // hence the !empty_rescan term
--- a/ced/ced/compact_enc_det/compact_enc_det.h
+++ b/ced/ced/compact_enc_det/compact_enc_det.h
@ -67,9 +67,9 @@ namespace CompactEncDet {
  Encoding DetectEncoding(
      const char* text, int text_length, const char* url_hint,
      const char* http_charset_hint, const char* meta_charset_hint,
-      int encoding_hint,
-      Language language_hint,  // User interface lang
-      TextCorpusType corpus_type, bool ignore_7bit_mail_encodings,
+      const int encoding_hint,
+      const Language language_hint,  // User interface lang
+      const TextCorpusType corpus_type, bool ignore_7bit_mail_encodings,
      int* bytes_consumed, bool* is_reliable);

  // Support functions for unit test program
--- a/ced/ced/compact_enc_det/compact_enc_det_hint_code.cc
+++ b/ced/ced/compact_enc_det/compact_enc_det_hint_code.cc
@ -109,7 +109,7 @@ string MakeChar44(const string& str) {
  string res("________");     // eight underscores
  int l_ptr = 0;
  size_t d_ptr = 0;
-  for (char ch : str) {
+  for (auto ch : str) {
    auto uc = static_cast<uint8>(ch);
    if (kIsAlpha[uc]) {
      if (l_ptr < 4) {                  // Else ignore
@ -138,7 +138,7 @@ string MakeChar44(const string& str) {
 string MakeChar4(const string& str) {
  string res("____");     // four underscores
  int l_ptr = 0;
-  for (char ch : str) {
+  for (auto ch : str) {
    auto uc = static_cast<uint8>(ch);
    if (kIsAlpha[uc] | kIsDigit[uc]) {
      if (l_ptr < 4) {                  // Else ignore
@ -156,7 +156,7 @@ string MakeChar4(const string& str) {
 string MakeChar8(const string& str) {
  string res("________");     // eight dots
  int l_ptr = 0;
-  for (char ch : str) {
+  for (auto ch : str) {
    auto uc = static_cast<uint8>(ch);
    if (kIsAlpha[uc] | kIsDigit[uc]) {
      if (l_ptr < 8) {                  // Else ignore
--- a/ced/ced/util/encodings/encodings.h
+++ b/ced/ced/util/encodings/encodings.h
@ -101,7 +101,7 @@ bool IsValidEncoding(Encoding enc);
 // good to consider the full matrix of all pairs of encodings and to fish out
 // all compatible pairs.
 //
-bool IsEncEncCompatible(Encoding from, Encoding to);
+bool IsEncEncCompatible(const Encoding from, const Encoding to);

 // To be a superset of 7-bit Ascii means that bytes 0...127 in the given
 // encoding represent the same characters as they do in ISO_8859_1.
@ -225,7 +225,7 @@ bool IsJapaneseCellPhoneCarrierSpecificEncoding(Encoding enc);
 // Given the encoding, returns its standard name.
 // Return invalid_encoding_name() if the encoding is invalid.
 //
-const char* EncodingName(Encoding enc);
+const char* EncodingName(const Encoding enc);

 //
 // MimeEncodingName
@ -235,7 +235,7 @@ const char* EncodingName(Encoding enc);
 //
 // This name is suitable for using in HTTP headers, HTML tags,
 // and as the "charset" parameter of a MIME Content-Type.
-const char* MimeEncodingName(Encoding enc);
+const char* MimeEncodingName(const Encoding enc);


 // The maximum length of an encoding name
--- a/crypto/crypto.c
+++ b/crypto/crypto.c
@ -142,7 +142,7 @@ INT_PTR CALLBACK SetKeysDlgProc(HWND hDlg, UINT umsg, WPARAM wParam, LPARAM lPar
            }
            InvalidateRect(hDlg, NULL, TRUE);
          }
-          return(true);
+          return true;
        break;

        case IDOK:
@ -164,7 +164,7 @@ INT_PTR CALLBACK SetKeysDlgProc(HWND hDlg, UINT umsg, WPARAM wParam, LPARAM lPar
            unicodeStringCpy(fileKey, unicodeFileKey, sizeof(fileKey));
            unicodeStringCpy(masterKey, unicodeMasterKey, sizeof(masterKey));
            EndDialog(hDlg, IDOK);
-            return(true);
+            return true;
        }

        break;
@ -264,7 +264,7 @@ INT_PTR CALLBACK GetKeysDlgProc(HWND hDlg, UINT umsg, WPARAM wParam, LPARAM lPar
              SendDlgItemMessage(hDlg, IDC_PWD_EDIT3, EM_SETPASSWORDCHAR, (WPARAM)wDot, 0);
            }
            InvalidateRect(hDlg, NULL, TRUE);
-            return(true);
+            return true;
            break;
          }
        case IDOK:
@ -287,7 +287,7 @@ INT_PTR CALLBACK GetKeysDlgProc(HWND hDlg, UINT umsg, WPARAM wParam, LPARAM lPar
              }
              EndDialog(hDlg, IDOK);
          }
-          return(true);
+          return true;
          break;

        case IDCANCEL:
@ -527,7 +527,7 @@ bool EncryptAndWriteFile(HWND hwnd, HANDLE hFile, BYTE *data, DWORD size, DWORD
            // write the PREAMBLE, punt if that failed
            if (!WriteFile(hFile, precodedata, precode_size, &PREAMBLE_written, NULL)) {
                *written = PREAMBLE_written;
-                return(false);
+                return false;
            }
        }

--- a/src/Dialogs.c
+++ b/src/Dialogs.c
@ -1272,9 +1272,9 @@ bool FavoritesDlg(HWND hwnd,LPWSTR lpstrFile)
                             hwnd,FavoritesDlgProc,(LPARAM)&dliFavorite))
  {
    StringCchCopyN(lpstrFile,MAX_PATH,dliFavorite.szFileName,MAX_PATH);
-    return(true);
+    return true;
  }
-  return(false);
+  return false;
 }


@ -2421,9 +2421,9 @@ bool SelectDefEncodingDlg(HWND hwnd,int *pidREncoding)

  if (iResult == IDOK) {
    *pidREncoding = dd.idEncoding;
-    return(true);
+    return true;
  }
-  return(false);
+  return false;
 }


@ -2588,9 +2588,9 @@ bool SelectEncodingDlg(HWND hwnd,int *pidREncoding)

  if (iResult == IDOK) {
    *pidREncoding = dd.idEncoding;
-    return(true);
+    return true;
  }
-  return(false);
+  return false;
 }


@ -2621,9 +2621,9 @@ bool RecodeDlg(HWND hwnd,int *pidREncoding)

  if (iResult == IDOK) {
    *pidREncoding = dd.idEncoding;
-    return(true);
+    return true;
  }
-  return(false);
+  return false;
 }


--- a/src/Dlapi.c
+++ b/src/Dlapi.c
@ -857,9 +857,9 @@ bool DirList_GetLongPathName(HWND hwnd,LPWSTR lpszLongPath,int length)
  if (SHGetPathFromIDList(lpdl->pidl,tch))
  {
    StringCchCopy(lpszLongPath,length,tch);
-    return(true);
+    return true;
  }
-  return(false);
+  return false;
 }


@ -885,7 +885,7 @@ bool DirList_SelectItem(HWND hwnd,LPCWSTR lpszDisplayName,LPCWSTR lpszFullPath)
  int i = -1;

  if (!lpszFullPath || !StringCchLen(lpszFullPath, MAX_PATH)) {
-    return(false);
+    return false;
  }

  GetShortPathName(lpszFullPath,szShortPath,MAX_PATH);
@ -912,12 +912,12 @@ bool DirList_SelectItem(HWND hwnd,LPCWSTR lpszDisplayName,LPCWSTR lpszFullPath)
      ListView_SetItemState(hwnd,i,LVIS_FLAGS,LVIS_FLAGS);
      ListView_EnsureVisible(hwnd,i,false);

-      return(true);
+      return true;
    }

  }

-  return(false);
+  return false;

 }

@ -979,7 +979,7 @@ bool DirList_MatchFilter(LPSHELLFOLDER lpsf,LPCITEMIDLIST pidl,PDL_FILTER pdlf)

  // All the directories are added
  if (fd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)
-    return(true);
+    return true;

  // Check if exclude *.* after directories have been added
  if (pdlf->nCount == 0 && pdlf->bExcludeFilter)
@ -993,9 +993,9 @@ bool DirList_MatchFilter(LPSHELLFOLDER lpsf,LPCITEMIDLIST pidl,PDL_FILTER pdlf)
      if (bMatchSpec)
      {
        if (!pdlf->bExcludeFilter) {
-          return(true);
+          return true;
        }
-        return(false);
+        return false;
      }
    }
  }
--- a/src/Edit.c
+++ b/src/Edit.c
@ -390,10 +390,10 @@ void EditSetNewText(HWND hwnd,char* lpstrText,DWORD cbText)
 bool EditConvertText(HWND hwnd, int encSource, int encDest, bool bSetSavePoint)
 {
  if (encSource == encDest)
-    return(true);
+    return true;

  if (!(Encoding_IsValid(encSource) && Encoding_IsValid(encDest)))
-    return(false);
+    return false;

  DocPos const length = SciCall_GetTextLength();

@ -435,7 +435,7 @@ bool EditConvertText(HWND hwnd, int encSource, int encDest, bool bSetSavePoint)

    FreeMem(pchText);
  }
-  return(true);
+  return true;
 }


@ -1178,22 +1178,18 @@ bool EditLoadFile(
    FileVars_Init(lpData,cbData,&Globals.fvCurFile);

    // ===  UTF-8  ===
-    bool const bForcedUTF8 = Encoding_IsUTF8(iForcedEncoding);
-    bool const bHardRulesUTF8 = bForcedUTF8 || (FileVars_IsUTF8(&Globals.fvCurFile) && !Settings.NoEncodingTags);
-    bool const bForcedNonUTF8 = bIsForced && !bForcedUTF8;
-
    bool const bValidUTF8 = IsValidUTF8(lpData, cbData);
+    bool const bForcedUTF8 = Encoding_IsUTF8(iForcedEncoding) || (FileVars_IsUTF8(&Globals.fvCurFile) && !Settings.NoEncodingTags);
    bool const bAnalysisUTF8 = Encoding_IsUTF8(iAnalyzedEncoding) && bIsReliable;
-    bool const bSoftHintUTF8 = Encoding_IsUTF8(iAnalyzedEncoding) || Encoding_IsUTF8(iPreferedEncoding); // non-reliable analysis = soft-hint
+    bool const bSoftHintUTF8 = Encoding_IsUTF8(iAnalyzedEncoding) && Encoding_IsUTF8(iPreferedEncoding); // non-reliable analysis = soft-hint

-    bool const bRejectUTF8 = bSkipUTFDetection || bForcedNonUTF8 || (FileVars_IsNonUTF8(&Globals.fvCurFile) && !Settings.NoEncodingTags);
+    bool const bRejectUTF8 = !bValidUTF8 || (!bIsUTF8Sig && bSkipUTFDetection);

-    //if (bHardRulesUTF8 || (!bRejectUTF8 && bValidUTF8 && (bIsUTF8Sig || bAnalysisUTF8)))
-    if (bHardRulesUTF8 || (!bRejectUTF8 && bValidUTF8 && (bIsUTF8Sig || bAnalysisUTF8 || bSoftHintUTF8))) // soft-hint = prefer UTF-8
+    if (bForcedUTF8 || (!bRejectUTF8 && (bIsUTF8Sig || bAnalysisUTF8 || bSoftHintUTF8))) // soft-hint = prefer UTF-8
    {
      EditSetNewText(hwnd,"",0);
      if (bIsUTF8Sig) {
-        EditSetNewText(hwnd,UTF8StringStart(lpData),cbData-3);
+        EditSetNewText(hwnd,UTF8StringStart(lpData),cbData - 3);
        status->iEncoding = CPI_UTF8SIGN;
        EditDetectEOLMode(UTF8StringStart(lpData), cbData - 3, status);
      }
@ -8021,7 +8017,7 @@ bool FileVars_Apply(HWND hwnd,LPFILEVARS lpfv) {

  Globals.iWrapCol = 0;

-  return(true);
+  return true;
 }


@ -8063,19 +8059,19 @@ bool FileVars_ParseInt(char* pszData,char* pszName,int* piValue) {

    int itok = sscanf_s(tch,"%i",piValue);
    if (itok == 1)
-      return(true);
+      return true;

    if (tch[0] == 't') {
      *piValue = 1;
-      return(true);
+      return true;
    }

    if (tch[0] == 'n' || tch[0] == 'f') {
      *piValue = 0;
-      return(true);
+      return true;
    }
  }
-  return(false);
+  return false;
 }


@ -8121,9 +8117,9 @@ bool FileVars_ParseStr(char* pszData,char* pszName,char* pszValue,int cchValue)

    StringCchCopyNA(pszValue,cchValue,tch,COUNTOF(tch));

-    return(true);
+    return true;
  }
-  return(false);
+  return false;
 }


@ -8135,24 +8131,9 @@ bool FileVars_IsUTF8(LPFILEVARS lpfv) {
  if (lpfv->mask & FV_ENCODING) {
    if (StringCchCompareNIA(lpfv->tchEncoding,COUNTOF(lpfv->tchEncoding),"utf-8",CSTRLEN("utf-8")) == 0 ||
        StringCchCompareNIA(lpfv->tchEncoding,COUNTOF(lpfv->tchEncoding),"utf8", CSTRLEN("utf8")) == 0)
-      return(true);
+      return true;
  }
-  return(false);
-}
-
-
-//=============================================================================
-//
-//  FileVars_IsNonUTF8()
-//
-bool FileVars_IsNonUTF8(LPFILEVARS lpfv) {
-  if (lpfv->mask & FV_ENCODING) {
-    if (StringCchLenA(lpfv->tchEncoding,COUNTOF(lpfv->tchEncoding)) &&
-        StringCchCompareNIA(lpfv->tchEncoding,COUNTOF(lpfv->tchEncoding),"utf-8", CSTRLEN("utf-8")) != 0 &&
-        StringCchCompareNIA(lpfv->tchEncoding,COUNTOF(lpfv->tchEncoding),"utf8", CSTRLEN("utf8")) != 0)
-      return(true);
-  }
-  return(false);
+  return false;
 }


@ -8168,12 +8149,13 @@ bool FileVars_IsValidEncoding(LPFILEVARS lpfv) {
    if ((Encoding_IsINTERNAL(lpfv->iEncoding)) ||
         (IsValidCodePage(Encoding_GetCodePage(lpfv->iEncoding)) &&
          GetCPInfo(Encoding_GetCodePage(lpfv->iEncoding),&cpi))) {
-      return(true);
+      return true;
    }
  }
-  return(false);
+  return false;
 }

+
 //=============================================================================
 //
 //  FileVars_GetEncoding()
@ -8182,7 +8164,7 @@ int FileVars_GetEncoding(LPFILEVARS lpfv) {
  if (lpfv->mask & FV_ENCODING) {
    return(lpfv->iEncoding);
  }
-  return(-1);
+  return CPI_NONE;
 }


--- a/src/Edit.h
+++ b/src/Edit.h
@ -132,7 +132,6 @@ bool FileVars_Apply(HWND hwnd,LPFILEVARS lpfv);
 bool FileVars_ParseInt(char* pszData,char* pszName,int* piValue);
 bool FileVars_ParseStr(char* pszData,char* pszName,char* pszValue,int cchValue);
 bool FileVars_IsUTF8(LPFILEVARS lpfv);
-bool FileVars_IsNonUTF8(LPFILEVARS lpfv);
 bool FileVars_IsValidEncoding(LPFILEVARS lpfv);
 int  FileVars_GetEncoding(LPFILEVARS lpfv);

--- a/src/Encoding.c
+++ b/src/Encoding.c
@ -329,10 +329,10 @@ bool Encoding_IsValid(int iTestEncoding) {
    if ((g_Encodings[iTestEncoding].uFlags & NCP_INTERNAL) ||
      (IsValidCodePage(g_Encodings[iTestEncoding].uCodePage) &&
       GetCPInfo(g_Encodings[iTestEncoding].uCodePage, &cpi))) {
-      return(true);
+      return true;
    }
  }
-  return(false);
+  return false;
 }
 // ============================================================================

@ -428,7 +428,7 @@ bool Encoding_GetFromListView(HWND hwnd, int *pidEncoding) {

    return (true);
  }
-  return(false);
+  return false;
 }
 // ============================================================================

@ -506,7 +506,7 @@ bool Encoding_GetFromComboboxEx(HWND hwnd, int *pidEncoding) {

    return (true);
  }
-  return(false);
+  return false;
 }
 // ============================================================================

--- a/src/Helpers.c
+++ b/src/Helpers.c
@ -455,7 +455,7 @@ bool IsElevated() {
  HANDLE hToken = NULL;

  if (!IsVista())
-    return(false);
+    return false;

  if (OpenProcessToken(GetCurrentProcess(),TOKEN_QUERY,&hToken)) {

@ -1009,7 +1009,7 @@ bool PathCreateDeskLnk(LPCWSTR pszDocument)

  // Try to construct a valid filename...
  if (!SHGetNewLinkInfo(pszDocument,tchLinkDir,tchLnkFileName,&fMustCopy,SHGNLI_PREFIXNAME))
-    return(false);
+    return false;

  if (SUCCEEDED(CoCreateInstance(&CLSID_ShellLink,NULL,
                                 CLSCTX_INPROC_SERVER,
--- a/src/Notepad3.c
+++ b/src/Notepad3.c
@ -9942,7 +9942,7 @@ bool ActivatePrevInst()
  COPYDATASTRUCT cds;

  if ((Flags.fNoReuseWindow && !Flags.fSingleFileInstance) || s_flagStartAsTrayIcon || s_flagNewFromClipboard || s_flagPasteBoard)
-    return(false);
+    return false;

  if (Flags.fSingleFileInstance && s_lpFileArg) 
  {
@ -10009,18 +10009,18 @@ bool ActivatePrevInst()
        SendMessage(hwnd,WM_COPYDATA,(WPARAM)NULL,(LPARAM)&cds);
        FreeMem(params);

-        return(true);
+        return true;
      }
      // IsWindowEnabled()
      if (IDYES == MsgBoxLng(MBYESNOWARN, IDS_MUI_ERR_PREVWINDISABLED)) {
-        return(false);
+        return false;
      }
-      return(true);
+      return true;
    }
  }

  if (Flags.fNoReuseWindow) {
-    return(false);
+    return false;
  }

  hwnd = NULL;
@ -10102,12 +10102,12 @@ bool ActivatePrevInst()
        FreeMem(params);    params = NULL;
        FreeMem(s_lpFileArg); s_lpFileArg = NULL;
      }
-      return(true);
+      return true;
    }
    // IsWindowEnabled()
    return ((IDYES == MsgBoxLng(MBYESNOWARN, IDS_MUI_ERR_PREVWINDISABLED)) ? false : true);
  }
-  return(false);
+  return false;
 }


--- a/src/Styles.c
+++ b/src/Styles.c
@ -453,10 +453,10 @@ bool Style_Import(HWND hwnd)
        }
      }
      FreeMem(pIniSection);
-      return(true);
+      return true;
    }
  }
-  return(false);
+  return false;
 }

 //=============================================================================
@ -504,9 +504,9 @@ bool Style_Export(HWND hwnd)
    if (dwError != ERROR_SUCCESS) {
      MsgBoxLng(MBINFO,IDS_MUI_EXPORT_FAIL,szFile);
    }
-    return(true);
+    return true;
  }
-  return(false);
+  return false;
 }