mirror of
https://github.com/rizonesoft/Notepad3.git
synced 2026-06-11 21:03:05 +08:00
+ upd: current corrections for Compact Encoding Detection (CED by Google)
+ fix: problem with UTF-8 (no BOM) encoding vs. ANSI encoding detection + cln: code cleanup
This commit is contained in:
parent
2eee188627
commit
e83fbcc747
@ -73,7 +73,7 @@ add_library(ced ${CED_LIBRARY_SOURCES})
|
||||
#add_definitions(-DHTML5_MODE)
|
||||
|
||||
set(GTEST_INCLUDE_DIR "gtest/googletest/include")
|
||||
set(GTEST_LIB_DIR "${CMAKE_SOURCE_DIR}/gtest/googlemock/gtest")
|
||||
set(GTEST_LIB_DIR "${CMAKE_SOURCE_DIR}/gtest/lib")
|
||||
|
||||
set(CED_UNITTEST_SOURCES
|
||||
compact_enc_det/compact_enc_det_unittest.cc
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
// Copyright 2016 Google Inc.
|
||||
// Copyright 2016 Google Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
@ -363,6 +363,9 @@ static const char* kWhatSetName[] = {"Ascii", "Other"};
|
||||
// regions (ISO-2022-xx, HZ)
|
||||
enum StateSoSi {SOSI_NONE, SOSI_ERROR, SOSI_ONEBYTE, SOSI_TWOBYTE};
|
||||
|
||||
#define UTF8_ARR_CNT 6
|
||||
#define BYTE32_ARR_CNT 8
|
||||
|
||||
typedef struct {
|
||||
const uint8* initial_src; // For calculating byte offsets
|
||||
const uint8* limit_src; // Range of input source
|
||||
@ -386,30 +389,30 @@ typedef struct {
|
||||
bool do_latin_trigrams; // True if we actually are scoring trigrams
|
||||
|
||||
// Miscellaneous state variables for difficult encodings
|
||||
int binary_quadrants_count; // Number of four bigram quadrants seen:
|
||||
// 0xxxxxxx0xxxxxxx 0xxxxxxx1xxxxxx
|
||||
// 1xxxxxxx0xxxxxxx 1xxxxxxx1xxxxxx
|
||||
int binary_8x4_count; // Number of 8x4 buckets seen:
|
||||
uint32 binary_quadrants_seen; // Bit[i] set if bigram i.......i....... seen
|
||||
uint32 binary_8x4_seen; // Bit[i] set if bigram iii.....ii...... seen
|
||||
int utf7_starts; // Count of possible UTF-7 beginnings seen
|
||||
int prior_utf7_offset; // Source consumed by prior UTF-7 string
|
||||
int next_utf8_ministate; // Mini state for UTF-8 sequences
|
||||
int utf8_minicount[6]; // Number of correct 2- 3- 4-byte seq, errors
|
||||
int next_utf8utf8_ministate; // Mini state for UTF8UTF8 sequences
|
||||
int utf8utf8_odd_byte; // UTF8UTF8 seq has odd number of bytes
|
||||
int utf8utf8_minicount[6]; // Number of correct 2- 3- 4-byte seq, errors
|
||||
int binary_quadrants_count; // Number of four bigram quadrants seen:
|
||||
// 0xxxxxxx0xxxxxxx 0xxxxxxx1xxxxxx
|
||||
// 1xxxxxxx0xxxxxxx 1xxxxxxx1xxxxxx
|
||||
int binary_8x4_count; // Number of 8x4 buckets seen:
|
||||
uint32 binary_quadrants_seen; // Bit[i] set if bigram i.......i....... seen
|
||||
uint32 binary_8x4_seen; // Bit[i] set if bigram iii.....ii...... seen
|
||||
int utf7_starts; // Count of possible UTF-7 beginnings seen
|
||||
int prior_utf7_offset; // Source consumed by prior UTF-7 string
|
||||
int next_utf8_ministate; // Mini state for UTF-8 sequences
|
||||
int utf8_minicount[UTF8_ARR_CNT]; // Number of correct 2- 3- 4-byte seq, errors
|
||||
int next_utf8utf8_ministate; // Mini state for UTF8UTF8 sequences
|
||||
int utf8utf8_odd_byte; // UTF8UTF8 seq has odd number of bytes
|
||||
int utf8utf8_minicount[UTF8_ARR_CNT]; // Number of correct 2- 3- 4-byte seq, errors
|
||||
StateSoSi next_2022_state; // Mini state for 2022 sequences
|
||||
StateSoSi next_hz_state; // Mini state for HZ sequences
|
||||
bool next_eucjp_oddphase; // Mini state for EUC-JP sequences
|
||||
int byte32_count[8]; // Count of top 3 bits of byte1 of bigram
|
||||
// 0x1x 2x3x 4x5x 6x7x 8x9x AxBx CxDx ExFx
|
||||
uint32 active_special; // Bits showing which special cases are active
|
||||
|
||||
Encoding tld_hint; // Top TLD encoding or UNKNOWN
|
||||
Encoding http_hint; // What the document says about itself or
|
||||
Encoding meta_hint; // UNKNOWN_ENCODING. BOM is initial byte
|
||||
Encoding bom_hint; // order mark for UTF-xx
|
||||
int byte32_count[BYTE32_ARR_CNT]; // Count of top 3 bits of byte1 of bigram
|
||||
// 0x1x 2x3x 4x5x 6x7x 8x9x AxBx CxDx ExFx
|
||||
uint32 active_special; // Bits showing which special cases are active
|
||||
|
||||
Encoding tld_hint; // Top TLD encoding or UNKNOWN
|
||||
Encoding http_hint; // What the document says about itself or
|
||||
Encoding meta_hint; // UNKNOWN_ENCODING. BOM is initial byte
|
||||
Encoding bom_hint; // order mark for UTF-xx
|
||||
|
||||
// small cache of previous interesting bigrams
|
||||
int next_prior_bigram;
|
||||
@ -1371,9 +1374,10 @@ void PsMark(const uint8* src, int len, const uint8* isrc, int weightshift) {
|
||||
// Unfortunately, we have to skip back N lines since source was printed for
|
||||
// up to 8 bigrams before we get here. Match on src+1 to handle 0/31 better
|
||||
void PsHighlight(const uint8* src, const uint8* isrc, int trigram_val, int n) {
|
||||
auto offset = static_cast<int>((src + 1) - isrc);
|
||||
auto offset = src ? static_cast<int>((src + 1) - isrc) :
|
||||
static_cast<int>((const uint8*)1 - isrc);
|
||||
int offset32 = (offset % pssourcewidth); // mod len bytes
|
||||
offset -= offset32; // round down to multiple of len bytes
|
||||
offset -= offset32; // round down to multiple of len bytes
|
||||
|
||||
for (int i = 1; i <= 16; ++i) {
|
||||
if (do_src_offset[(next_do_src_line - i) & 0x0f] == offset) {
|
||||
@ -1413,14 +1417,17 @@ void InitDetectEncodingState(DetectEncodingState* destatep) {
|
||||
destatep->utf7_starts = 0;
|
||||
destatep->prior_utf7_offset = 0;
|
||||
destatep->next_utf8_ministate = 0;
|
||||
for (int & i : destatep->utf8_minicount) {i = 0;}
|
||||
//for (int & i : destatep->utf8_minicount) {i = 0;}
|
||||
std::fill(destatep->utf8_minicount, destatep->utf8_minicount + UTF8_ARR_CNT, 0);
|
||||
destatep->next_utf8utf8_ministate = 0;
|
||||
destatep->utf8utf8_odd_byte = 0;
|
||||
for (int & i : destatep->utf8utf8_minicount) {i = 0;}
|
||||
//for (int & i : destatep->utf8utf8_minicount) {i = 0;}
|
||||
std::fill(destatep->utf8utf8_minicount, destatep->utf8utf8_minicount + UTF8_ARR_CNT, 0);
|
||||
destatep->next_2022_state = SOSI_NONE;
|
||||
destatep->next_hz_state = SOSI_NONE;
|
||||
destatep->next_eucjp_oddphase = false;
|
||||
for (int & i : destatep->byte32_count) {i = 0;}
|
||||
//for (int & i : destatep->byte32_count) {i = 0;}
|
||||
std::fill(destatep->byte32_count, destatep->byte32_count + BYTE32_ARR_CNT, 0);
|
||||
destatep->active_special = 0xffffffff;
|
||||
destatep->tld_hint = UNKNOWN_ENCODING;
|
||||
destatep->http_hint = UNKNOWN_ENCODING;
|
||||
@ -1910,9 +1917,7 @@ int ApplyDefaultHint(const CompactEncDet::TextCorpusType corpus_type,
|
||||
|
||||
if (FLAGS_demo_nodefault) {
|
||||
// Demo, make initial probs all zero
|
||||
for (int & i : destatep->enc_prob) {
|
||||
i = 0;
|
||||
}
|
||||
std::fill(destatep->enc_prob, destatep->enc_prob + NUM_RANKEDENCODING, 0);
|
||||
}
|
||||
|
||||
if (destatep->debug_data != nullptr) {
|
||||
@ -2098,7 +2103,8 @@ void ApplyHints(const char* url_hint,
|
||||
if (hint_count == 0) {
|
||||
destatep->looking_for_latin_trigrams = true; // Default needs trigrams
|
||||
destatep->declared_enc_2 = destatep->declared_enc_1;
|
||||
hint_count += ApplyDefaultHint(corpus_type, destatep);
|
||||
//~hint_count += ApplyDefaultHint(corpus_type, destatep);
|
||||
ApplyDefaultHint(corpus_type, destatep);
|
||||
}
|
||||
|
||||
|
||||
@ -2214,10 +2220,10 @@ void InitialBytesBoost(const uint8* src,
|
||||
DetectEncodingState* destatep) {
|
||||
if (text_length < 4) {return;}
|
||||
|
||||
char32 pair01 = (src[0] << 8) | src[1];
|
||||
char32 pair23 = (src[2] << 8) | src[3];
|
||||
char32 quad0123 = (pair01 << 16) | pair23;
|
||||
|
||||
uint32 pair01 = (src[0] << 8) | src[1];
|
||||
uint32 pair23 = (src[2] << 8) | src[3];
|
||||
uint32 quad0123 = (pair01 << 16) | pair23;
|
||||
|
||||
bool utf_16_indication = false;
|
||||
bool utf_32_indication = false;
|
||||
int best_enc = -1;
|
||||
@ -2539,7 +2545,7 @@ void UTF7BoostWhack(DetectEncodingState* destatep, int next_pair, uint8 byte2) {
|
||||
int nmod8 = n & 7;
|
||||
if ((n == 3) || (n == 6)) {
|
||||
// short but legal -- treat as neutral
|
||||
} else if ((nmod8 == 0) | (nmod8 == 3) | (nmod8 == 6)) {
|
||||
} else if ((nmod8 == 0) || (nmod8 == 3) || (nmod8 == 6)) {
|
||||
// Good length. Check for good Unicode.
|
||||
if (GoodUnicodeFromBase64(start, start + n)) {
|
||||
// Good length and Unicode, boost
|
||||
@ -4893,7 +4899,6 @@ Encoding Rescan(Encoding enc,
|
||||
auto scanned_bytes = static_cast<int>(src - isrc);
|
||||
auto unscanned_bytes = static_cast<int>(srctextlimit - src);
|
||||
auto text_length = static_cast<int>(srctextlimit - isrc);
|
||||
bool empty_rescan = true;
|
||||
|
||||
// See if enough bytes left to bother doing rescan
|
||||
if (kMinRescanLength < unscanned_bytes) {
|
||||
@ -4963,7 +4968,7 @@ Encoding Rescan(Encoding enc,
|
||||
&mid_second_best_enc);
|
||||
destatep->reliable = mid_is_reliable;
|
||||
|
||||
empty_rescan = (mid_enc == ASCII_7BIT);
|
||||
bool const empty_rescan = (mid_enc == ASCII_7BIT);
|
||||
|
||||
// Not the right decision if, e.g. enc=Greek, mid=ASCII7, one=KSC
|
||||
// hence the !empty_rescan term
|
||||
|
||||
@ -67,9 +67,9 @@ namespace CompactEncDet {
|
||||
Encoding DetectEncoding(
|
||||
const char* text, int text_length, const char* url_hint,
|
||||
const char* http_charset_hint, const char* meta_charset_hint,
|
||||
int encoding_hint,
|
||||
Language language_hint, // User interface lang
|
||||
TextCorpusType corpus_type, bool ignore_7bit_mail_encodings,
|
||||
const int encoding_hint,
|
||||
const Language language_hint, // User interface lang
|
||||
const TextCorpusType corpus_type, bool ignore_7bit_mail_encodings,
|
||||
int* bytes_consumed, bool* is_reliable);
|
||||
|
||||
// Support functions for unit test program
|
||||
|
||||
@ -109,7 +109,7 @@ string MakeChar44(const string& str) {
|
||||
string res("________"); // eight underscores
|
||||
int l_ptr = 0;
|
||||
size_t d_ptr = 0;
|
||||
for (char ch : str) {
|
||||
for (auto ch : str) {
|
||||
auto uc = static_cast<uint8>(ch);
|
||||
if (kIsAlpha[uc]) {
|
||||
if (l_ptr < 4) { // Else ignore
|
||||
@ -138,7 +138,7 @@ string MakeChar44(const string& str) {
|
||||
string MakeChar4(const string& str) {
|
||||
string res("____"); // four underscores
|
||||
int l_ptr = 0;
|
||||
for (char ch : str) {
|
||||
for (auto ch : str) {
|
||||
auto uc = static_cast<uint8>(ch);
|
||||
if (kIsAlpha[uc] | kIsDigit[uc]) {
|
||||
if (l_ptr < 4) { // Else ignore
|
||||
@ -156,7 +156,7 @@ string MakeChar4(const string& str) {
|
||||
string MakeChar8(const string& str) {
|
||||
string res("________"); // eight dots
|
||||
int l_ptr = 0;
|
||||
for (char ch : str) {
|
||||
for (auto ch : str) {
|
||||
auto uc = static_cast<uint8>(ch);
|
||||
if (kIsAlpha[uc] | kIsDigit[uc]) {
|
||||
if (l_ptr < 8) { // Else ignore
|
||||
|
||||
@ -101,7 +101,7 @@ bool IsValidEncoding(Encoding enc);
|
||||
// good to consider the full matrix of all pairs of encodings and to fish out
|
||||
// all compatible pairs.
|
||||
//
|
||||
bool IsEncEncCompatible(Encoding from, Encoding to);
|
||||
bool IsEncEncCompatible(const Encoding from, const Encoding to);
|
||||
|
||||
// To be a superset of 7-bit Ascii means that bytes 0...127 in the given
|
||||
// encoding represent the same characters as they do in ISO_8859_1.
|
||||
@ -225,7 +225,7 @@ bool IsJapaneseCellPhoneCarrierSpecificEncoding(Encoding enc);
|
||||
// Given the encoding, returns its standard name.
|
||||
// Return invalid_encoding_name() if the encoding is invalid.
|
||||
//
|
||||
const char* EncodingName(Encoding enc);
|
||||
const char* EncodingName(const Encoding enc);
|
||||
|
||||
//
|
||||
// MimeEncodingName
|
||||
@ -235,7 +235,7 @@ const char* EncodingName(Encoding enc);
|
||||
//
|
||||
// This name is suitable for using in HTTP headers, HTML tags,
|
||||
// and as the "charset" parameter of a MIME Content-Type.
|
||||
const char* MimeEncodingName(Encoding enc);
|
||||
const char* MimeEncodingName(const Encoding enc);
|
||||
|
||||
|
||||
// The maximum length of an encoding name
|
||||
|
||||
@ -142,7 +142,7 @@ INT_PTR CALLBACK SetKeysDlgProc(HWND hDlg, UINT umsg, WPARAM wParam, LPARAM lPar
|
||||
}
|
||||
InvalidateRect(hDlg, NULL, TRUE);
|
||||
}
|
||||
return(true);
|
||||
return true;
|
||||
break;
|
||||
|
||||
case IDOK:
|
||||
@ -164,7 +164,7 @@ INT_PTR CALLBACK SetKeysDlgProc(HWND hDlg, UINT umsg, WPARAM wParam, LPARAM lPar
|
||||
unicodeStringCpy(fileKey, unicodeFileKey, sizeof(fileKey));
|
||||
unicodeStringCpy(masterKey, unicodeMasterKey, sizeof(masterKey));
|
||||
EndDialog(hDlg, IDOK);
|
||||
return(true);
|
||||
return true;
|
||||
}
|
||||
|
||||
break;
|
||||
@ -264,7 +264,7 @@ INT_PTR CALLBACK GetKeysDlgProc(HWND hDlg, UINT umsg, WPARAM wParam, LPARAM lPar
|
||||
SendDlgItemMessage(hDlg, IDC_PWD_EDIT3, EM_SETPASSWORDCHAR, (WPARAM)wDot, 0);
|
||||
}
|
||||
InvalidateRect(hDlg, NULL, TRUE);
|
||||
return(true);
|
||||
return true;
|
||||
break;
|
||||
}
|
||||
case IDOK:
|
||||
@ -287,7 +287,7 @@ INT_PTR CALLBACK GetKeysDlgProc(HWND hDlg, UINT umsg, WPARAM wParam, LPARAM lPar
|
||||
}
|
||||
EndDialog(hDlg, IDOK);
|
||||
}
|
||||
return(true);
|
||||
return true;
|
||||
break;
|
||||
|
||||
case IDCANCEL:
|
||||
@ -527,7 +527,7 @@ bool EncryptAndWriteFile(HWND hwnd, HANDLE hFile, BYTE *data, DWORD size, DWORD
|
||||
// write the PREAMBLE, punt if that failed
|
||||
if (!WriteFile(hFile, precodedata, precode_size, &PREAMBLE_written, NULL)) {
|
||||
*written = PREAMBLE_written;
|
||||
return(false);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -1272,9 +1272,9 @@ bool FavoritesDlg(HWND hwnd,LPWSTR lpstrFile)
|
||||
hwnd,FavoritesDlgProc,(LPARAM)&dliFavorite))
|
||||
{
|
||||
StringCchCopyN(lpstrFile,MAX_PATH,dliFavorite.szFileName,MAX_PATH);
|
||||
return(true);
|
||||
return true;
|
||||
}
|
||||
return(false);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@ -2421,9 +2421,9 @@ bool SelectDefEncodingDlg(HWND hwnd,int *pidREncoding)
|
||||
|
||||
if (iResult == IDOK) {
|
||||
*pidREncoding = dd.idEncoding;
|
||||
return(true);
|
||||
return true;
|
||||
}
|
||||
return(false);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@ -2588,9 +2588,9 @@ bool SelectEncodingDlg(HWND hwnd,int *pidREncoding)
|
||||
|
||||
if (iResult == IDOK) {
|
||||
*pidREncoding = dd.idEncoding;
|
||||
return(true);
|
||||
return true;
|
||||
}
|
||||
return(false);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@ -2621,9 +2621,9 @@ bool RecodeDlg(HWND hwnd,int *pidREncoding)
|
||||
|
||||
if (iResult == IDOK) {
|
||||
*pidREncoding = dd.idEncoding;
|
||||
return(true);
|
||||
return true;
|
||||
}
|
||||
return(false);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
|
||||
16
src/Dlapi.c
16
src/Dlapi.c
@ -857,9 +857,9 @@ bool DirList_GetLongPathName(HWND hwnd,LPWSTR lpszLongPath,int length)
|
||||
if (SHGetPathFromIDList(lpdl->pidl,tch))
|
||||
{
|
||||
StringCchCopy(lpszLongPath,length,tch);
|
||||
return(true);
|
||||
return true;
|
||||
}
|
||||
return(false);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@ -885,7 +885,7 @@ bool DirList_SelectItem(HWND hwnd,LPCWSTR lpszDisplayName,LPCWSTR lpszFullPath)
|
||||
int i = -1;
|
||||
|
||||
if (!lpszFullPath || !StringCchLen(lpszFullPath, MAX_PATH)) {
|
||||
return(false);
|
||||
return false;
|
||||
}
|
||||
|
||||
GetShortPathName(lpszFullPath,szShortPath,MAX_PATH);
|
||||
@ -912,12 +912,12 @@ bool DirList_SelectItem(HWND hwnd,LPCWSTR lpszDisplayName,LPCWSTR lpszFullPath)
|
||||
ListView_SetItemState(hwnd,i,LVIS_FLAGS,LVIS_FLAGS);
|
||||
ListView_EnsureVisible(hwnd,i,false);
|
||||
|
||||
return(true);
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return(false);
|
||||
return false;
|
||||
|
||||
}
|
||||
|
||||
@ -979,7 +979,7 @@ bool DirList_MatchFilter(LPSHELLFOLDER lpsf,LPCITEMIDLIST pidl,PDL_FILTER pdlf)
|
||||
|
||||
// All the directories are added
|
||||
if (fd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)
|
||||
return(true);
|
||||
return true;
|
||||
|
||||
// Check if exclude *.* after directories have been added
|
||||
if (pdlf->nCount == 0 && pdlf->bExcludeFilter)
|
||||
@ -993,9 +993,9 @@ bool DirList_MatchFilter(LPSHELLFOLDER lpsf,LPCITEMIDLIST pidl,PDL_FILTER pdlf)
|
||||
if (bMatchSpec)
|
||||
{
|
||||
if (!pdlf->bExcludeFilter) {
|
||||
return(true);
|
||||
return true;
|
||||
}
|
||||
return(false);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
60
src/Edit.c
60
src/Edit.c
@ -390,10 +390,10 @@ void EditSetNewText(HWND hwnd,char* lpstrText,DWORD cbText)
|
||||
bool EditConvertText(HWND hwnd, int encSource, int encDest, bool bSetSavePoint)
|
||||
{
|
||||
if (encSource == encDest)
|
||||
return(true);
|
||||
return true;
|
||||
|
||||
if (!(Encoding_IsValid(encSource) && Encoding_IsValid(encDest)))
|
||||
return(false);
|
||||
return false;
|
||||
|
||||
DocPos const length = SciCall_GetTextLength();
|
||||
|
||||
@ -435,7 +435,7 @@ bool EditConvertText(HWND hwnd, int encSource, int encDest, bool bSetSavePoint)
|
||||
|
||||
FreeMem(pchText);
|
||||
}
|
||||
return(true);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@ -1178,22 +1178,18 @@ bool EditLoadFile(
|
||||
FileVars_Init(lpData,cbData,&Globals.fvCurFile);
|
||||
|
||||
// === UTF-8 ===
|
||||
bool const bForcedUTF8 = Encoding_IsUTF8(iForcedEncoding);
|
||||
bool const bHardRulesUTF8 = bForcedUTF8 || (FileVars_IsUTF8(&Globals.fvCurFile) && !Settings.NoEncodingTags);
|
||||
bool const bForcedNonUTF8 = bIsForced && !bForcedUTF8;
|
||||
|
||||
bool const bValidUTF8 = IsValidUTF8(lpData, cbData);
|
||||
bool const bForcedUTF8 = Encoding_IsUTF8(iForcedEncoding) || (FileVars_IsUTF8(&Globals.fvCurFile) && !Settings.NoEncodingTags);
|
||||
bool const bAnalysisUTF8 = Encoding_IsUTF8(iAnalyzedEncoding) && bIsReliable;
|
||||
bool const bSoftHintUTF8 = Encoding_IsUTF8(iAnalyzedEncoding) || Encoding_IsUTF8(iPreferedEncoding); // non-reliable analysis = soft-hint
|
||||
bool const bSoftHintUTF8 = Encoding_IsUTF8(iAnalyzedEncoding) && Encoding_IsUTF8(iPreferedEncoding); // non-reliable analysis = soft-hint
|
||||
|
||||
bool const bRejectUTF8 = bSkipUTFDetection || bForcedNonUTF8 || (FileVars_IsNonUTF8(&Globals.fvCurFile) && !Settings.NoEncodingTags);
|
||||
bool const bRejectUTF8 = !bValidUTF8 || (!bIsUTF8Sig && bSkipUTFDetection);
|
||||
|
||||
//if (bHardRulesUTF8 || (!bRejectUTF8 && bValidUTF8 && (bIsUTF8Sig || bAnalysisUTF8)))
|
||||
if (bHardRulesUTF8 || (!bRejectUTF8 && bValidUTF8 && (bIsUTF8Sig || bAnalysisUTF8 || bSoftHintUTF8))) // soft-hint = prefer UTF-8
|
||||
if (bForcedUTF8 || (!bRejectUTF8 && (bIsUTF8Sig || bAnalysisUTF8 || bSoftHintUTF8))) // soft-hint = prefer UTF-8
|
||||
{
|
||||
EditSetNewText(hwnd,"",0);
|
||||
if (bIsUTF8Sig) {
|
||||
EditSetNewText(hwnd,UTF8StringStart(lpData),cbData-3);
|
||||
EditSetNewText(hwnd,UTF8StringStart(lpData),cbData - 3);
|
||||
status->iEncoding = CPI_UTF8SIGN;
|
||||
EditDetectEOLMode(UTF8StringStart(lpData), cbData - 3, status);
|
||||
}
|
||||
@ -8021,7 +8017,7 @@ bool FileVars_Apply(HWND hwnd,LPFILEVARS lpfv) {
|
||||
|
||||
Globals.iWrapCol = 0;
|
||||
|
||||
return(true);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@ -8063,19 +8059,19 @@ bool FileVars_ParseInt(char* pszData,char* pszName,int* piValue) {
|
||||
|
||||
int itok = sscanf_s(tch,"%i",piValue);
|
||||
if (itok == 1)
|
||||
return(true);
|
||||
return true;
|
||||
|
||||
if (tch[0] == 't') {
|
||||
*piValue = 1;
|
||||
return(true);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (tch[0] == 'n' || tch[0] == 'f') {
|
||||
*piValue = 0;
|
||||
return(true);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return(false);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@ -8121,9 +8117,9 @@ bool FileVars_ParseStr(char* pszData,char* pszName,char* pszValue,int cchValue)
|
||||
|
||||
StringCchCopyNA(pszValue,cchValue,tch,COUNTOF(tch));
|
||||
|
||||
return(true);
|
||||
return true;
|
||||
}
|
||||
return(false);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@ -8135,24 +8131,9 @@ bool FileVars_IsUTF8(LPFILEVARS lpfv) {
|
||||
if (lpfv->mask & FV_ENCODING) {
|
||||
if (StringCchCompareNIA(lpfv->tchEncoding,COUNTOF(lpfv->tchEncoding),"utf-8",CSTRLEN("utf-8")) == 0 ||
|
||||
StringCchCompareNIA(lpfv->tchEncoding,COUNTOF(lpfv->tchEncoding),"utf8", CSTRLEN("utf8")) == 0)
|
||||
return(true);
|
||||
return true;
|
||||
}
|
||||
return(false);
|
||||
}
|
||||
|
||||
|
||||
//=============================================================================
|
||||
//
|
||||
// FileVars_IsNonUTF8()
|
||||
//
|
||||
bool FileVars_IsNonUTF8(LPFILEVARS lpfv) {
|
||||
if (lpfv->mask & FV_ENCODING) {
|
||||
if (StringCchLenA(lpfv->tchEncoding,COUNTOF(lpfv->tchEncoding)) &&
|
||||
StringCchCompareNIA(lpfv->tchEncoding,COUNTOF(lpfv->tchEncoding),"utf-8", CSTRLEN("utf-8")) != 0 &&
|
||||
StringCchCompareNIA(lpfv->tchEncoding,COUNTOF(lpfv->tchEncoding),"utf8", CSTRLEN("utf8")) != 0)
|
||||
return(true);
|
||||
}
|
||||
return(false);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@ -8168,12 +8149,13 @@ bool FileVars_IsValidEncoding(LPFILEVARS lpfv) {
|
||||
if ((Encoding_IsINTERNAL(lpfv->iEncoding)) ||
|
||||
(IsValidCodePage(Encoding_GetCodePage(lpfv->iEncoding)) &&
|
||||
GetCPInfo(Encoding_GetCodePage(lpfv->iEncoding),&cpi))) {
|
||||
return(true);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return(false);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
//=============================================================================
|
||||
//
|
||||
// FileVars_GetEncoding()
|
||||
@ -8182,7 +8164,7 @@ int FileVars_GetEncoding(LPFILEVARS lpfv) {
|
||||
if (lpfv->mask & FV_ENCODING) {
|
||||
return(lpfv->iEncoding);
|
||||
}
|
||||
return(-1);
|
||||
return CPI_NONE;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -132,7 +132,6 @@ bool FileVars_Apply(HWND hwnd,LPFILEVARS lpfv);
|
||||
bool FileVars_ParseInt(char* pszData,char* pszName,int* piValue);
|
||||
bool FileVars_ParseStr(char* pszData,char* pszName,char* pszValue,int cchValue);
|
||||
bool FileVars_IsUTF8(LPFILEVARS lpfv);
|
||||
bool FileVars_IsNonUTF8(LPFILEVARS lpfv);
|
||||
bool FileVars_IsValidEncoding(LPFILEVARS lpfv);
|
||||
int FileVars_GetEncoding(LPFILEVARS lpfv);
|
||||
|
||||
|
||||
@ -329,10 +329,10 @@ bool Encoding_IsValid(int iTestEncoding) {
|
||||
if ((g_Encodings[iTestEncoding].uFlags & NCP_INTERNAL) ||
|
||||
(IsValidCodePage(g_Encodings[iTestEncoding].uCodePage) &&
|
||||
GetCPInfo(g_Encodings[iTestEncoding].uCodePage, &cpi))) {
|
||||
return(true);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return(false);
|
||||
return false;
|
||||
}
|
||||
// ============================================================================
|
||||
|
||||
@ -428,7 +428,7 @@ bool Encoding_GetFromListView(HWND hwnd, int *pidEncoding) {
|
||||
|
||||
return (true);
|
||||
}
|
||||
return(false);
|
||||
return false;
|
||||
}
|
||||
// ============================================================================
|
||||
|
||||
@ -506,7 +506,7 @@ bool Encoding_GetFromComboboxEx(HWND hwnd, int *pidEncoding) {
|
||||
|
||||
return (true);
|
||||
}
|
||||
return(false);
|
||||
return false;
|
||||
}
|
||||
// ============================================================================
|
||||
|
||||
|
||||
@ -455,7 +455,7 @@ bool IsElevated() {
|
||||
HANDLE hToken = NULL;
|
||||
|
||||
if (!IsVista())
|
||||
return(false);
|
||||
return false;
|
||||
|
||||
if (OpenProcessToken(GetCurrentProcess(),TOKEN_QUERY,&hToken)) {
|
||||
|
||||
@ -1009,7 +1009,7 @@ bool PathCreateDeskLnk(LPCWSTR pszDocument)
|
||||
|
||||
// Try to construct a valid filename...
|
||||
if (!SHGetNewLinkInfo(pszDocument,tchLinkDir,tchLnkFileName,&fMustCopy,SHGNLI_PREFIXNAME))
|
||||
return(false);
|
||||
return false;
|
||||
|
||||
if (SUCCEEDED(CoCreateInstance(&CLSID_ShellLink,NULL,
|
||||
CLSCTX_INPROC_SERVER,
|
||||
|
||||
@ -9942,7 +9942,7 @@ bool ActivatePrevInst()
|
||||
COPYDATASTRUCT cds;
|
||||
|
||||
if ((Flags.fNoReuseWindow && !Flags.fSingleFileInstance) || s_flagStartAsTrayIcon || s_flagNewFromClipboard || s_flagPasteBoard)
|
||||
return(false);
|
||||
return false;
|
||||
|
||||
if (Flags.fSingleFileInstance && s_lpFileArg)
|
||||
{
|
||||
@ -10009,18 +10009,18 @@ bool ActivatePrevInst()
|
||||
SendMessage(hwnd,WM_COPYDATA,(WPARAM)NULL,(LPARAM)&cds);
|
||||
FreeMem(params);
|
||||
|
||||
return(true);
|
||||
return true;
|
||||
}
|
||||
// IsWindowEnabled()
|
||||
if (IDYES == MsgBoxLng(MBYESNOWARN, IDS_MUI_ERR_PREVWINDISABLED)) {
|
||||
return(false);
|
||||
return false;
|
||||
}
|
||||
return(true);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (Flags.fNoReuseWindow) {
|
||||
return(false);
|
||||
return false;
|
||||
}
|
||||
|
||||
hwnd = NULL;
|
||||
@ -10102,12 +10102,12 @@ bool ActivatePrevInst()
|
||||
FreeMem(params); params = NULL;
|
||||
FreeMem(s_lpFileArg); s_lpFileArg = NULL;
|
||||
}
|
||||
return(true);
|
||||
return true;
|
||||
}
|
||||
// IsWindowEnabled()
|
||||
return ((IDYES == MsgBoxLng(MBYESNOWARN, IDS_MUI_ERR_PREVWINDISABLED)) ? false : true);
|
||||
}
|
||||
return(false);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -453,10 +453,10 @@ bool Style_Import(HWND hwnd)
|
||||
}
|
||||
}
|
||||
FreeMem(pIniSection);
|
||||
return(true);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return(false);
|
||||
return false;
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
@ -504,9 +504,9 @@ bool Style_Export(HWND hwnd)
|
||||
if (dwError != ERROR_SUCCESS) {
|
||||
MsgBoxLng(MBINFO,IDS_MUI_EXPORT_FAIL,szFile);
|
||||
}
|
||||
return(true);
|
||||
return true;
|
||||
}
|
||||
return(false);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user