Merge pull request #1786 from RaiKoHoff/DevNewFeatures

Refactoring: Encoding Detection on File Load
This commit is contained in:
Rainer Kottenhoff 2019-11-18 08:30:42 +01:00 committed by GitHub
commit c2bb4a2b88
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 664 additions and 657 deletions

View File

@ -975,8 +975,7 @@ bool EditLoadFile(
Globals.dwLastError = GetLastError();
if (hFile == INVALID_HANDLE_VALUE) {
Encoding_SrcCmdLn(CPI_NONE);
Encoding_SrcWeak(CPI_NONE);
Encoding_Forced(CPI_NONE);
return false;
}
@ -993,8 +992,7 @@ bool EditLoadFile(
// refuse to handle file
InfoBoxLng(MB_ICONERROR, NULL, IDS_MUI_ERR_FILE_TOO_LARGE, (liFileSize.QuadPart / 1024LL / 1024LL));
CloseHandle(hFile);
Encoding_SrcCmdLn(CPI_NONE);
Encoding_SrcWeak(CPI_NONE);
Encoding_Forced(CPI_NONE);
status->bFileTooBig = true;
}
return false;
@ -1009,8 +1007,7 @@ bool EditLoadFile(
if ((dwFileSizeLimit != 0LL) && ((dwFileSizeLimit * 1024LL * 1024LL) < dwFileSize)) {
if (InfoBoxLng(MB_YESNO, L"MsgFileSizeWarning", IDS_MUI_WARN_LOAD_BIG_FILE) != IDYES) {
CloseHandle(hFile);
Encoding_SrcCmdLn(CPI_NONE);
Encoding_SrcWeak(CPI_NONE);
Encoding_Forced(CPI_NONE);
status->bFileTooBig = true;
return false;
}
@ -1022,8 +1019,7 @@ bool EditLoadFile(
INT_PTR const answer = InfoBoxLng(MB_YESNO, L"MsgFileUnknownExt", IDS_MUI_WARN_UNKNOWN_EXT, PathFindFileName(pszFile));
if (!((IDOK == answer) || (IDYES == answer))) {
CloseHandle(hFile);
Encoding_SrcCmdLn(CPI_NONE);
Encoding_SrcWeak(CPI_NONE);
Encoding_Forced(CPI_NONE);
status->bUnknownExt = true;
return false;
}
@ -1034,8 +1030,7 @@ bool EditLoadFile(
{
Globals.dwLastError = GetLastError();
CloseHandle(hFile);
Encoding_SrcCmdLn(CPI_NONE);
Encoding_SrcWeak(CPI_NONE);
Encoding_Forced(CPI_NONE);
status->bFileTooBig = true;
return false;
}
@ -1045,12 +1040,24 @@ bool EditLoadFile(
Globals.dwLastError = GetLastError();
CloseHandle(hFile);
if (cbData == 0) {
FileVars_Init(NULL, 0, &Globals.fvCurFile);
status->iEOLMode = Settings.DefaultEOLMode;
status->iEncoding = Settings.LoadASCIIasUTF8 ? CPI_UTF8 : CPI_ANSI_DEFAULT;
EditSetNewText(hwnd, "", 0, bClearUndoHistory);
SciCall_SetEOLMode(Settings.DefaultEOLMode);
Encoding_Forced(CPI_NONE);
FreeMem(lpData);
return true;
}
bool bReadSuccess = ((readFlag & DECRYPT_FATAL_ERROR) || (readFlag & DECRYPT_FREAD_FAILED)) ? false : true;
// ((readFlag == DECRYPT_SUCCESS) || (readFlag & DECRYPT_NO_ENCRYPTION)) => true;
if ((readFlag & DECRYPT_CANCELED_NO_PASS) || (readFlag & DECRYPT_WRONG_PASS))
{
bReadSuccess = (InfoBoxLng(MB_OKCANCEL, L"MsgNoOrWrongPassphrase", IDS_MUI_NOPASS) == IDOK);
if (!bReadSuccess) {
Encoding_Forced(CPI_NONE);
FreeMem(lpData);
return true;
}
@ -1058,222 +1065,100 @@ bool EditLoadFile(
status->bEncryptedRaw = true;
}
}
if (!bReadSuccess) {
Encoding_Forced(CPI_NONE);
FreeMem(lpData);
Encoding_SrcCmdLn(CPI_NONE);
Encoding_SrcWeak(CPI_NONE);
return false;
}
// --------------------------------------------------------------------------
// Encoding Detection
ENC_DET_T encDetection = Encoding_DetectEncoding(pszFile, lpData, cbData, bSkipUTFDetection, bSkipANSICPDetection, bForceEncDetection);
#define IS_ENC_ENFORCED() (!Encoding_IsNONE(encDetection.forcedEncoding))
// --------------------------------------------------------------------------
// assume current code-page or default encoding (if forced)
cpi_enc_t const iAnalyzeFallback = Settings.UseDefaultForFileEncoding ? Settings.DefaultEncoding : CPI_ANSI_DEFAULT;
// --- 1st check for force encodings ---
LPCWSTR lpszExt = PathFindExtension(pszFile);
bool const bNfoDizDetected = (lpszExt && !(StringCchCompareXI(lpszExt, L".nfo") && StringCchCompareXI(lpszExt, L".diz")));
cpi_enc_t iForcedEncoding = Globals.bForceReLoadAsUTF8 ? CPI_UTF8 :
((Settings.LoadNFOasOEM && bNfoDizDetected) ? Globals.DOSEncoding : Encoding_SrcCmdLn(CPI_GET));
#define IS_ENC_ENFORCED() (!Encoding_IsNONE(iForcedEncoding))
// --- 2nd Use Encoding Analysis if applicable
size_t const cbNbytes4Analysis = (cbData < 200000L) ? cbData : 200000L;
float confidence = 0.0f;
cpi_enc_t iAnalyzedEncoding = iAnalyzeFallback;
if (!IS_ENC_ENFORCED() || bForceEncDetection)
{
iAnalyzedEncoding = Encoding_AnalyzeText(lpData, cbNbytes4Analysis, &confidence, iAnalyzeFallback);
if (Flags.bDevDebugMode) {
if (Flags.bDevDebugMode) {
#if 1
SetAdditionalTitleInfo(Encoding_GetTitleInfoW());
SetAdditionalTitleInfo(Encoding_GetTitleInfoW());
#else
DocPos const iPos = SciCall_PositionFromLine(SciCall_GetFirstVisibleLine());
int const iXOff = SciCall_GetXOffset();
SciCall_SetXOffset(0);
SciCall_CallTipShow(iPos, Encoding_GetTitleInfoA());
SciCall_SetXOffset(iXOff);
Globals.CallTipType = CT_ENC_INFO;
DocPos const iPos = SciCall_PositionFromLine(SciCall_GetFirstVisibleLine());
int const iXOff = SciCall_GetXOffset();
SciCall_SetXOffset(0);
SciCall_CallTipShow(iPos, Encoding_GetTitleInfoA());
SciCall_SetXOffset(iXOff);
Globals.CallTipType = CT_ENC_INFO;
#endif
if (IS_ENC_ENFORCED()) {
WCHAR wchBuf[128] = { L'\0' };
StringCchPrintf(wchBuf, COUNTOF(wchBuf), L"ForcedEncoding='%s'", g_Encodings[encDetection.forcedEncoding].wchLabel);
SetAdditionalTitleInfo(wchBuf);
}
if (bForceEncDetection && !Encoding_IsNONE(iAnalyzedEncoding)) {
iForcedEncoding = (iAnalyzedEncoding == CPI_ASCII_7BIT) ? CPI_ANSI_DEFAULT : iAnalyzedEncoding; // no bIsReliable check (forced unreliable detection)
if (!Encoding_IsNONE(encDetection.fileVarEncoding) && FileVars_IsValidEncoding(&Globals.fvCurFile)) {
WCHAR wchBuf[128] = { L'\0' };
StringCchPrintf(wchBuf, COUNTOF(wchBuf), L" - FilEncTag='%s'",
g_Encodings[FileVars_GetEncoding(&Globals.fvCurFile)].wchLabel);
AppendAdditionalTitleInfo(wchBuf);
}
WCHAR wcBuf[128] = { L'\0' };
StringCchPrintf(wcBuf, ARRAYSIZE(wcBuf), L" - OS-CP='%s'", g_Encodings[CPI_ANSI_DEFAULT].wchLabel);
AppendAdditionalTitleInfo(wcBuf);
}
if (Flags.bDevDebugMode && IS_ENC_ENFORCED()) {
WCHAR wchBuf[128] = { L'\0' };
StringCchPrintf(wchBuf, COUNTOF(wchBuf), L"ForcedEncoding='%s'", g_Encodings[iForcedEncoding].wchLabel);
SetAdditionalTitleInfo(wchBuf);
}
// ------------------------------------------------------
if (!IS_ENC_ENFORCED())
{
bool const bIsUnicode = Encoding_IsUTF8(iAnalyzedEncoding) || Encoding_IsUNICODE(iAnalyzedEncoding);
if (iAnalyzedEncoding == CPI_NONE)
{
iAnalyzedEncoding = iAnalyzeFallback;
confidence = Settings2.AnalyzeReliableConfidenceLevel;
}
else if (iAnalyzedEncoding == CPI_ASCII_7BIT) {
iAnalyzedEncoding = Settings.LoadASCIIasUTF8 ? CPI_UTF8 : CPI_ANSI_DEFAULT;
confidence = 1.0;
}
else {
if ((bSkipUTFDetection && bIsUnicode) || (bSkipANSICPDetection && !bIsUnicode)) {
iAnalyzedEncoding = CPI_NONE;
confidence = 0.0;
}
}
}
else {
iAnalyzedEncoding = iForcedEncoding;
confidence = 1.0;
}
bool const bIsReliable = (confidence >= Settings2.AnalyzeReliableConfidenceLevel);
// --------------------------------------------------------------------------
// === UNICODE ( UTF-16LE / UTF-16BE ) ===
// --------------------------------------------------------------------------
// --- 3rd Unicode Checks
bool const bIsUnicodeDetected = !IS_ENC_ENFORCED() && Encoding_IsUNICODE(encDetection.unicodeAnalysis);
bool const bIsUnicodeForced = Encoding_IsUNICODE(iForcedEncoding);
// choose best encoding guess
cpi_enc_t const iFileEncWeak = Encoding_SrcWeak(CPI_GET);
// set Preferred Encoding
cpi_enc_t iPreferredEncoding = Settings.LoadASCIIasUTF8 ? CPI_UTF8 : CPI_ANSI_DEFAULT;
if (IS_ENC_ENFORCED()) {
iPreferredEncoding = iForcedEncoding;
}
else if (!Encoding_IsNONE(iFileEncWeak)) {
iPreferredEncoding = iFileEncWeak;
}
else if (!Encoding_IsNONE(iAnalyzedEncoding) && (bIsReliable || !Settings.UseReliableCEDonly)) {
iPreferredEncoding = iAnalyzedEncoding;
}
else if (Encoding_IsNONE(iPreferredEncoding)) {
iPreferredEncoding = Settings.LoadASCIIasUTF8 ? CPI_UTF8 : CPI_ANSI_DEFAULT;
}
// --------------------------------------------------------------------------
bool const bIsUTF8Sig = ((cbData >= 3) ? IsUTF8Signature(lpData) : false);
bool bBOM = false;
bool bReverse = false;
bool const bIsUnicodeAnalyzed = ((Encoding_IsUNICODE(iAnalyzedEncoding) && bIsReliable) && !IS_ENC_ENFORCED() && !bSkipUTFDetection && !bIsUTF8Sig);
cpi_enc_t const encUnicode = bSkipUTFDetection ? CPI_NONE : GetUnicodeEncoding(lpData, cbData, &bBOM, &bReverse);
if (cbData == 0) {
FileVars_Init(NULL, 0, &Globals.fvCurFile);
status->iEOLMode = Settings.DefaultEOLMode;
status->iEncoding = Settings.LoadASCIIasUTF8 ? CPI_UTF8 : iPreferredEncoding;
EditSetNewText(hwnd, "", 0, bClearUndoHistory);
SciCall_SetEOLMode(Settings.DefaultEOLMode);
FreeMem(lpData);
}
else if (bIsUnicodeForced || (!IS_ENC_ENFORCED() && (bIsUnicodeAnalyzed || !Encoding_IsNONE(encUnicode))))
if (Encoding_IsUNICODE(encDetection.Encoding) || bIsUnicodeDetected)
{
// === UNICODE ===
if (Encoding_IsNONE(encUnicode))
{
bool const bBOM_LE = Has_UTF16_LE_BOM(lpData, cbData);
bool const bBOM_BE = Has_UTF16_BE_BOM(lpData, cbData);
// ----------------------------------------------------------------------
status->iEncoding = encDetection.bHasBOM ? (encDetection.bIsReverse ? CPI_UNICODEBEBOM : CPI_UNICODEBOM) :
(encDetection.bIsReverse ? CPI_UNICODEBE : CPI_UNICODE);
// ----------------------------------------------------------------------
if ((iForcedEncoding == CPI_UNICODE) || bBOM_LE) {
bBOM = bBOM_LE;
bReverse = false;
}
else if ((iForcedEncoding == CPI_UNICODEBE) || bBOM_BE) {
bBOM = bBOM_BE;
bReverse = true;
}
}
if (encDetection.bIsReverse) { SwabEx(lpData, lpData, cbData); }
if (bReverse)
{
SwabEx(lpData, lpData, cbData);
status->iEncoding = (bBOM ? CPI_UNICODEBEBOM : CPI_UNICODEBE);
}
else {
status->iEncoding = (bBOM ? CPI_UNICODEBOM : CPI_UNICODE);
}
char* const lpDataUTF8 = AllocMem((cbData * 3) + 2, HEAP_ZERO_MEMORY);
char* lpDataUTF8 = AllocMem((cbData * 3) + 2, HEAP_ZERO_MEMORY);
ptrdiff_t convCnt = WideCharToMultiByteEx(Encoding_SciCP, 0, (bBOM) ? (LPWSTR)lpData + 1 : (LPWSTR)lpData,
(bBOM) ? (cbData / sizeof(WCHAR)) : (cbData / sizeof(WCHAR) + 1), lpDataUTF8, SizeOfMem(lpDataUTF8), NULL, NULL);
ptrdiff_t convCnt = WideCharToMultiByteEx(Encoding_SciCP, 0, (encDetection.bHasBOM ? (LPWSTR)lpData + 1 : (LPWSTR)lpData),
(encDetection.bHasBOM ? (cbData / sizeof(WCHAR)) : (cbData / sizeof(WCHAR) + 1)), lpDataUTF8, SizeOfMem(lpDataUTF8), NULL, NULL);
if (convCnt == 0) {
convCnt = WideCharToMultiByteEx(CP_ACP, 0, (encDetection.bHasBOM ? (LPWSTR)lpData + 1 : (LPWSTR)lpData),
-1, lpDataUTF8, SizeOfMem(lpDataUTF8), NULL, NULL);
status->bUnicodeErr = true;
convCnt = WideCharToMultiByteEx(CP_ACP, 0, (bBOM) ? (LPWSTR)lpData + 1 : (LPWSTR)lpData,
(-1), lpDataUTF8, SizeOfMem(lpDataUTF8), NULL, NULL);
}
if (convCnt != 0) {
FreeMem(lpData);
FileVars_Init(lpDataUTF8, convCnt - 1, &Globals.fvCurFile);
EditSetNewText(hwnd, lpDataUTF8, convCnt - 1, bClearUndoHistory);
EditDetectEOLMode(lpDataUTF8, convCnt - 1, status);
FreeMem(lpDataUTF8);
}
else {
FreeMem(lpDataUTF8);
FreeMem(lpData);
Encoding_SrcCmdLn(CPI_NONE);
Encoding_SrcWeak(CPI_NONE);
return false;
}
FileVars_Init(lpDataUTF8, convCnt - 1, &Globals.fvCurFile);
EditSetNewText(hwnd, lpDataUTF8, convCnt - 1, bClearUndoHistory);
EditDetectEOLMode(lpDataUTF8, convCnt - 1, status);
FreeMem(lpDataUTF8);
}
else // === ALL OTHERS ===
{
// ----------------------------------------------------------------------
status->iEncoding = encDetection.Encoding;
// ----------------------------------------------------------------------
else { // === ALL OTHERS ===
// force file vars ?
FileVars_Init(lpData, cbData, &Globals.fvCurFile);
cpi_enc_t const iFileVarEncoding = (FileVars_IsValidEncoding(&Globals.fvCurFile) && !Settings.NoEncodingTags) ?
FileVars_GetEncoding(&Globals.fvCurFile) : CPI_NONE;
if (!IS_ENC_ENFORCED() && !Encoding_IsNONE(iFileVarEncoding)) {
iForcedEncoding = (Globals.fvCurFile.mask & FV_ENCODING) ? iFileVarEncoding : iForcedEncoding;
iPreferredEncoding = IS_ENC_ENFORCED() ? iForcedEncoding : iPreferredEncoding;
}
if (Flags.bDevDebugMode) {
if (!Encoding_IsNONE(iFileVarEncoding) && FileVars_IsValidEncoding(&Globals.fvCurFile)) {
WCHAR wchBuf[128] = { L'\0' };
StringCchPrintf(wchBuf, COUNTOF(wchBuf), L" - FilEncTag='%s'",
g_Encodings[FileVars_GetEncoding(&Globals.fvCurFile)].wchLabel);
AppendAdditionalTitleInfo(wchBuf);
}
}
UINT const uCodePage = Encoding_GetCodePage(status->iEncoding);
// === UTF-8 ? ===
bool const bValidUTF8 = IsValidUTF8(lpData, cbData);
bool const bForcedUTF8 = Encoding_IsUTF8(iForcedEncoding);
bool const bAnalysisUTF8 = Encoding_IsUTF8(iAnalyzedEncoding) && bIsReliable;
bool const bSoftHintUTF8 = Encoding_IsUTF8(iAnalyzedEncoding) && Encoding_IsUTF8(iPreferredEncoding); // non-reliable analysis = soft-hint
bool const bForcedUTF8 = Encoding_IsUTF8(encDetection.forcedEncoding);// ~ don't || encDetection.bIsUTF8Sig here !
bool const bAnalysisUTF8 = Encoding_IsUTF8(encDetection.analyzedEncoding) && encDetection.bIsAnalysisReliable;
bool const bSoftHintUTF8 = Encoding_IsUTF8(encDetection.analyzedEncoding) && Encoding_IsUTF8(encDetection.Encoding); // non-reliable analysis = soft-hint
bool const bRejectUTF8 = IS_ENC_ENFORCED() || !bValidUTF8 || (!bIsUTF8Sig && bSkipUTFDetection);
bool const bRejectUTF8 = (IS_ENC_ENFORCED() && !bForcedUTF8) || !bValidUTF8 || (!encDetection.bIsUTF8Sig && bSkipUTFDetection);
if (bForcedUTF8 || (!bRejectUTF8 && (bIsUTF8Sig || bAnalysisUTF8 || bSoftHintUTF8))) // soft-hint = prefer UTF-8
if (bForcedUTF8 || (!bRejectUTF8 && (encDetection.bIsUTF8Sig || bAnalysisUTF8 || bSoftHintUTF8))) // soft-hint = prefer UTF-8
{
if (bIsUTF8Sig) {
if (encDetection.bIsUTF8Sig) {
EditSetNewText(hwnd, UTF8StringStart(lpData), cbData - 3, bClearUndoHistory);
status->iEncoding = CPI_UTF8SIGN;
EditDetectEOLMode(UTF8StringStart(lpData), cbData - 3, status);
@ -1283,20 +1168,20 @@ bool EditLoadFile(
status->iEncoding = CPI_UTF8;
EditDetectEOLMode(lpData, cbData, status);
}
FreeMem(lpData);
}
else { // === ALL OTHER ===
// ----------------------------------------------------------------------
status->iEncoding = Encoding_IsValid(iPreferredEncoding) ? iPreferredEncoding : CPI_ANSI_DEFAULT;
// ----------------------------------------------------------------------
if (((Encoding_GetCodePage(status->iEncoding) != CP_UTF7) && Encoding_IsEXTERNAL_8BIT(status->iEncoding)) ||
((Encoding_GetCodePage(status->iEncoding) == CP_UTF7) && IsValidUTF7(lpData, cbData))) {
UINT uCodePage = Encoding_GetCodePage(status->iEncoding);
else if ((uCodePage == CP_UTF7) && IsValidUTF7(lpData, cbData))
{
// load UTF-7/ASCII(7-bit) as ANSI/UTF-8
EditSetNewText(hwnd, lpData, cbData, bClearUndoHistory);
status->iEncoding = Settings.LoadASCIIasUTF8 ? CPI_UTF8 : CPI_ANSI_DEFAULT;
EditDetectEOLMode(lpData, cbData, status);
}
else { // === ALL OTHER NON UTF-8 ===
if (Encoding_IsEXTERNAL_8BIT(status->iEncoding))
{
LPWSTR lpDataWide = AllocMem(cbData * 2 + 16, HEAP_ZERO_MEMORY);
ptrdiff_t const cbDataWide = MultiByteToWideCharEx(uCodePage, 0, lpData, cbData, lpDataWide, (SizeOfMem(lpDataWide) / sizeof(WCHAR)));
if (cbDataWide != 0)
{
@ -1305,45 +1190,35 @@ bool EditLoadFile(
cbData = WideCharToMultiByteEx(Encoding_SciCP, 0, lpDataWide, cbDataWide, lpData, SizeOfMem(lpData), NULL, NULL);
if (cbData != 0) {
FreeMem(lpDataWide);
EditSetNewText(hwnd, lpData, cbData, bClearUndoHistory);
EditDetectEOLMode(lpData, cbData, status);
FreeMem(lpData);
FreeMem(lpDataWide);
}
else {
Encoding_Forced(CPI_NONE);
FreeMem(lpDataWide);
FreeMem(lpData);
Encoding_SrcCmdLn(CPI_NONE);
Encoding_SrcWeak(CPI_NONE);
return false;
}
}
else {
Encoding_Forced(CPI_NONE);
FreeMem(lpDataWide);
FreeMem(lpData);
Encoding_SrcCmdLn(CPI_NONE);
Encoding_SrcWeak(CPI_NONE);
return false;
}
}
else {
EditSetNewText(hwnd, lpData, cbData, bClearUndoHistory);
EditDetectEOLMode(lpData, cbData, status);
FreeMem(lpData);
}
}
}
Encoding_SrcCmdLn(CPI_NONE);
Encoding_SrcWeak(CPI_NONE);
SciCall_SetCharacterCategoryOptimization(Encoding_IsCJK(encDetection.analyzedEncoding) ? 0x10000 : 0x1000);
SciCall_SetCharacterCategoryOptimization(Encoding_IsCJK(iAnalyzedEncoding) ? 0x10000 : 0x1000);
if (Flags.bDevDebugMode) {
WCHAR wcBuf[128] = { L'\0' };
StringCchPrintf(wcBuf, ARRAYSIZE(wcBuf), L" - OS-CP='%s'", g_Encodings[CPI_ANSI_DEFAULT].wchLabel);
AppendAdditionalTitleInfo(wcBuf);
}
Encoding_Forced(CPI_NONE);
FreeMem(lpData);
return true;
}
@ -1429,29 +1304,6 @@ bool EditSaveFile(
lpData = AllocMem(cbData + 4, HEAP_ZERO_MEMORY); //fix: +bom
cbData = SciCall_GetText((cbData+1), lpData);
// FIXME: move checks in front of disk file access
// Msg if file tag encoding does not correspond to BOM
/*if ((g_Encodings[iEncoding].uFlags & NCP_UNICODE) == 0 && (g_Encodings[iEncoding].uFlags & NCP_UTF8_SIGN) == 0) {
bool bEncodingMismatch = true;
FILEVARS fv;
FileVars_Init(lpData,cbData,&fv);
if (fv.mask & FV_ENCODING) {
int iAltEncoding;
if (FileVars_IsValidEncoding(&fv)) {
iAltEncoding = FileVars_GetEncoding(&fv);
if (iAltEncoding == iEncoding)
bEncodingMismatch = false;
else if ((g_Encodings[iAltEncoding].uFlags & NCP_UTF8) && (g_Encodings[iEncoding].uFlags & NCP_UTF8))
bEncodingMismatch = false;
}
if (bEncodingMismatch) {
InfoBoxLng(MB_OK,L"MsgEncodingMismatch",IDS_MUI_ENCODINGMISMATCH,
g_Encodings[iAltEncoding].wchLabel,
g_Encodings[iEncoding].wchLabel);
}
}
}*/
if (Encoding_IsUNICODE(status->iEncoding)) // UTF-16LE/BE_(BOM)
{
SetEndOfFile(hFile);
@ -8452,292 +8304,6 @@ void EditSetBookmarkList(HWND hwnd, LPCWSTR pszBookMarks)
}
//=============================================================================
//
// _SetFileVars()
//
static void _SetFileVars(char* buffer, size_t cch, LPFILEVARS lpfv)
{
bool bDisableFileVar = false;
if (!Flags.NoFileVariables)
{
int i;
if (FileVars_ParseInt(buffer, "enable-local-variables", &i) && (!i)) {
bDisableFileVar = true;
}
if (!bDisableFileVar) {
if (FileVars_ParseInt(buffer, "tab-width", &i)) {
lpfv->iTabWidth = clampi(i, 1, 256);
lpfv->mask |= FV_TABWIDTH;
}
if (FileVars_ParseInt(buffer, "c-basic-indent", &i)) {
lpfv->iIndentWidth = clampi(i, 0, 256);
lpfv->mask |= FV_INDENTWIDTH;
}
if (FileVars_ParseInt(buffer, "indent-tabs-mode", &i)) {
lpfv->bTabsAsSpaces = (i) ? false : true;
lpfv->mask |= FV_TABSASSPACES;
}
if (FileVars_ParseInt(buffer, "c-tab-always-indent", &i)) {
lpfv->bTabIndents = (i) ? true : false;
lpfv->mask |= FV_TABINDENTS;
}
if (FileVars_ParseInt(buffer, "truncate-lines", &i)) {
lpfv->bWordWrap = (i) ? false : true;
lpfv->mask |= FV_WORDWRAP;
}
if (FileVars_ParseInt(buffer, "fill-column", &i)) {
lpfv->iLongLinesLimit = clampi(i, 0, LONG_LINES_MARKER_LIMIT);
lpfv->mask |= FV_LONGLINESLIMIT;
}
}
}
// Unicode Sig
bool const bHasSignature = IsUTF8Signature(buffer) || Has_UTF16_LE_BOM(buffer, cch) || Has_UTF16_BE_BOM(buffer, cch);
if (!bHasSignature && !Settings.NoEncodingTags && !bDisableFileVar) {
if (FileVars_ParseStr(buffer, "encoding", lpfv->tchEncoding, COUNTOF(lpfv->tchEncoding)))
lpfv->mask |= FV_ENCODING;
else if (FileVars_ParseStr(buffer, "charset", lpfv->tchEncoding, COUNTOF(lpfv->tchEncoding)))
lpfv->mask |= FV_ENCODING;
else if (FileVars_ParseStr(buffer, "coding", lpfv->tchEncoding, COUNTOF(lpfv->tchEncoding)))
lpfv->mask |= FV_ENCODING;
}
if (lpfv->mask & FV_ENCODING) {
lpfv->iEncoding = Encoding_MatchA(lpfv->tchEncoding);
}
if (!Flags.NoFileVariables && !bDisableFileVar) {
if (FileVars_ParseStr(buffer, "mode", lpfv->tchMode, COUNTOF(lpfv->tchMode)))
lpfv->mask |= FV_MODE;
}
}
//=============================================================================
//
// FileVars_Init()
//
bool FileVars_Init(char* lpData, size_t cbData, LPFILEVARS lpfv)
{
ZeroMemory(lpfv, sizeof(FILEVARS));
lpfv->bTabIndents = Settings.TabIndents;
lpfv->bTabsAsSpaces = Settings.TabsAsSpaces;
lpfv->bWordWrap = Settings.WordWrap;
lpfv->iTabWidth = Settings.TabWidth;
lpfv->iIndentWidth = Settings.IndentWidth;
lpfv->iLongLinesLimit = Settings.LongLinesLimit;
lpfv->iEncoding = Settings.DefaultEncoding;
if ((Flags.NoFileVariables && Settings.NoEncodingTags) || !lpData || !cbData) {
return true;
}
char tmpbuf[LARGE_BUFFER];
size_t const cch = min_s(cbData + 1, COUNTOF(tmpbuf));
StringCchCopyNA(tmpbuf, COUNTOF(tmpbuf), lpData, cch);
_SetFileVars(tmpbuf, cch, lpfv);
// if no file vars found, look at EOF
if ((lpfv->mask == 0) && (cbData > COUNTOF(tmpbuf))) {
StringCchCopyNA(tmpbuf, COUNTOF(tmpbuf), lpData + cbData - COUNTOF(tmpbuf) + 1, COUNTOF(tmpbuf));
_SetFileVars(tmpbuf, cch, lpfv);
}
return true;
}
//=============================================================================
//
// FileVars_Apply()
//
bool FileVars_Apply(LPFILEVARS lpfv) {
int const _iTabWidth = (lpfv->mask & FV_TABWIDTH) ? lpfv->iTabWidth : Settings.TabWidth;
SciCall_SetTabWidth(_iTabWidth);
int const _iIndentWidth = (lpfv->mask & FV_INDENTWIDTH) ? lpfv->iIndentWidth : ((lpfv->mask & FV_TABWIDTH) ? 0 : Settings.IndentWidth);
SciCall_SetIndent(_iIndentWidth);
bool const _bTabsAsSpaces = (lpfv->mask & FV_TABSASSPACES) ? lpfv->bTabsAsSpaces : Settings.TabsAsSpaces;
SciCall_SetUseTabs(!_bTabsAsSpaces);
bool const _bTabIndents = (lpfv->mask & FV_TABINDENTS) ? lpfv->bTabIndents : Settings.TabIndents;
SciCall_SetTabIndents(_bTabIndents);
SciCall_SetBackSpaceUnIndents(Settings.BackspaceUnindents);
bool const _bWordWrap = (lpfv->mask & FV_WORDWRAP) ? lpfv->bWordWrap : Settings.WordWrap;
int const _iWrapMode = _bWordWrap ? ((Settings.WordWrapMode == 0) ? SC_WRAP_WHITESPACE : SC_WRAP_CHAR) : SC_WRAP_NONE;
SciCall_SetWrapMode(_iWrapMode);
int const _iLongLinesLimit = (lpfv->mask & FV_LONGLINESLIMIT) ? lpfv->iLongLinesLimit : Settings.LongLinesLimit;
SciCall_SetEdgeColumn(_iLongLinesLimit);
Globals.iWrapCol = _iLongLinesLimit;
return true;
}
//=============================================================================
//
// FileVars_ParseInt()
//
bool FileVars_ParseInt(char* pszData,char* pszName,int* piValue) {
char *pvStart = StrStrIA(pszData, pszName);
while (pvStart) {
char chPrev = (pvStart > pszData) ? *(pvStart-1) : 0;
if (!IsCharAlphaNumericA(chPrev) && chPrev != '-' && chPrev != '_') {
pvStart += StringCchLenA(pszName,0);
while (*pvStart == ' ') {
pvStart++;
}
if (*pvStart == ':' || *pvStart == '=') { break; }
}
else {
pvStart += StringCchLenA(pszName, 0);
}
pvStart = StrStrIA(pvStart, pszName); // next
}
if (pvStart) {
while (*pvStart && StrChrIA(":=\"' \t", *pvStart)) {
pvStart++;
}
char tch[32] = { L'\0' };
StringCchCopyNA(tch,COUNTOF(tch),pvStart,COUNTOF(tch));
char* pvEnd = tch;
while (*pvEnd && IsCharAlphaNumericA(*pvEnd)) {
pvEnd++;
}
*pvEnd = 0;
StrTrimA(tch," \t:=\"'");
int itok = sscanf_s(tch,"%i",piValue);
if (itok == 1) {
return true;
}
if (tch[0] == 't') {
*piValue = 1;
return true;
}
if (tch[0] == 'n' || tch[0] == 'f') {
*piValue = 0;
return true;
}
}
return false;
}
//=============================================================================
//
// FileVars_ParseStr()
//
bool FileVars_ParseStr(char* pszData,char* pszName,char* pszValue,int cchValue) {
char *pvStart = StrStrIA(pszData, pszName);
while (pvStart) {
char chPrev = (pvStart > pszData) ? *(pvStart-1) : 0;
if (!IsCharAlphaNumericA(chPrev) && chPrev != '-' && chPrev != '_') {
pvStart += StringCchLenA(pszName,0);
while (*pvStart == ' ') {
pvStart++;
}
if (*pvStart == ':' || *pvStart == '=') {
break;
}
}
else {
pvStart += StringCchLenA(pszName, 0);
}
pvStart = StrStrIA(pvStart, pszName); // next
}
if (pvStart) {
bool bQuoted = false;
while (*pvStart && StrChrIA(":=\"' \t",*pvStart)) {
if (*pvStart == '\'' || *pvStart == '"')
bQuoted = true;
pvStart++;
}
char tch[32] = { L'\0' };
StringCchCopyNA(tch,COUNTOF(tch),pvStart,COUNTOF(tch));
char* pvEnd = tch;
while (*pvEnd && (IsCharAlphaNumericA(*pvEnd) || StrChrIA("+-/_", *pvEnd) || (bQuoted && *pvEnd == ' '))) {
pvEnd++;
}
*pvEnd = 0;
StrTrimA(tch," \t:=\"'");
StringCchCopyNA(pszValue,cchValue,tch,COUNTOF(tch));
return true;
}
return false;
}
//=============================================================================
//
// FileVars_IsUTF8()
//
bool FileVars_IsUTF8(LPFILEVARS lpfv) {
if (lpfv->mask & FV_ENCODING) {
if (StringCchCompareNIA(lpfv->tchEncoding,COUNTOF(lpfv->tchEncoding),"utf-8",CSTRLEN("utf-8")) == 0 ||
StringCchCompareNIA(lpfv->tchEncoding,COUNTOF(lpfv->tchEncoding),"utf8", CSTRLEN("utf8")) == 0)
return true;
}
return false;
}
//=============================================================================
//
// FileVars_IsValidEncoding()
//
bool FileVars_IsValidEncoding(LPFILEVARS lpfv) {
CPINFO cpi;
if (lpfv->mask & FV_ENCODING && Encoding_IsValidIdx(lpfv->iEncoding)) {
if ((Encoding_IsINTERNAL(lpfv->iEncoding)) ||
(IsValidCodePage(Encoding_GetCodePage(lpfv->iEncoding)) &&
GetCPInfo(Encoding_GetCodePage(lpfv->iEncoding),&cpi))) {
return true;
}
}
return false;
}
//=============================================================================
//
// FileVars_GetEncoding()
//
cpi_enc_t FileVars_GetEncoding(LPFILEVARS lpfv)
{
if (lpfv->mask & FV_ENCODING) {
return(lpfv->iEncoding);
}
return CPI_NONE;
}
//=============================================================================
//
// EditBookmarkClick()

View File

@ -121,25 +121,6 @@ void EditMarkAllOccurrences(HWND hwnd, bool bForceClear);
void EditHideNotMarkedLineRange(HWND hwnd, bool bHideLines);
void EditSelectionMultiSelectAll();
#define FV_TABWIDTH 1
#define FV_INDENTWIDTH 2
#define FV_TABSASSPACES 4
#define FV_TABINDENTS 8
#define FV_WORDWRAP 16
#define FV_LONGLINESLIMIT 32
#define FV_ENCODING 64
#define FV_MODE 128
bool FileVars_Init(char* lpData, size_t cbData,LPFILEVARS lpfv);
bool FileVars_Apply(LPFILEVARS lpfv);
bool FileVars_ParseInt(char* pszData,char* pszName,int* piValue);
bool FileVars_ParseStr(char* pszData,char* pszName,char* pszValue,int cchValue);
bool FileVars_IsUTF8(LPFILEVARS lpfv);
bool FileVars_IsValidEncoding(LPFILEVARS lpfv);
cpi_enc_t FileVars_GetEncoding(LPFILEVARS lpfv);
//
// Folding Functions
//

View File

@ -55,17 +55,17 @@ cpi_enc_t Encoding_Current(cpi_enc_t iEncoding)
// ============================================================================
cpi_enc_t Encoding_SrcCmdLn(cpi_enc_t iSrcEncoding)
cpi_enc_t Encoding_Forced(cpi_enc_t iEncoding)
{
static cpi_enc_t SourceEncoding = CPI_NONE;
if (iSrcEncoding >= 0) {
if (Encoding_IsValid(iSrcEncoding))
SourceEncoding = iSrcEncoding;
if (iEncoding >= 0) {
if (Encoding_IsValid(iEncoding))
SourceEncoding = iEncoding;
else
SourceEncoding = CPI_ANSI_DEFAULT;
}
else if (iSrcEncoding == CPI_NONE) {
else if (iEncoding == CPI_NONE) {
SourceEncoding = CPI_NONE;
}
return SourceEncoding;
@ -211,17 +211,17 @@ int Encoding_MapIniSetting(bool bLoad, int iSetting)
// ============================================================================
cpi_enc_t Encoding_MapUnicode(cpi_enc_t iUni)
cpi_enc_t Encoding_MapSignature(cpi_enc_t iUni)
{
if (iUni == CPI_UTF8SIGN) {
return CPI_UTF8;
}
if (iUni == CPI_UNICODEBOM) {
return CPI_UNICODE;
}
if (iUni == CPI_UNICODEBEBOM) {
return CPI_UNICODEBE;
}
if (iUni == CPI_UTF8SIGN) {
return CPI_UTF8;
}
return iUni;
}
// ============================================================================

View File

@ -68,19 +68,19 @@ typedef struct _np2encoding {
} NP2ENCODING;
cpi_enc_t Encoding_Current(cpi_enc_t iEncoding); // getter/setter
cpi_enc_t Encoding_SrcCmdLn(cpi_enc_t iSrcEncoding); // getter/setter
cpi_enc_t Encoding_Forced(cpi_enc_t iEncoding); // getter/setter
cpi_enc_t Encoding_SrcWeak(cpi_enc_t iSrcWeakEnc); // getter/setter
bool Encoding_HasChanged(cpi_enc_t iOriginalEncoding); // query/setter
void Encoding_InitDefaults();
int Encoding_MapIniSetting(bool, int iSetting);
cpi_enc_t Encoding_MapUnicode(cpi_enc_t iUni);
void Encoding_SetLabel(cpi_enc_t iEncoding);
cpi_enc_t Encoding_MatchW(LPCWSTR pwszTest);
cpi_enc_t Encoding_MatchA(const char* pchTest);
bool Encoding_IsValid(cpi_enc_t iTestEncoding);
cpi_enc_t Encoding_GetByCodePage(const UINT codepage);
cpi_enc_t Encoding_MapSignature(cpi_enc_t iUni);
void Encoding_AddToListView(HWND hwnd, cpi_enc_t idSel, bool);
bool Encoding_GetFromListView(HWND hwnd, cpi_enc_t* pidEncoding);
void Encoding_AddToComboboxEx(HWND hwnd, cpi_enc_t idSel, bool);
@ -145,8 +145,48 @@ inline bool IsDBCSCodePage(UINT cp) {
return ((cp == 932) || (cp == 936) || (cp == 949) || (cp == 950) || (cp == 951) || (cp == 1361));
}
cpi_enc_t Encoding_AnalyzeText(const char* const text, const size_t len, float* confidence_io, const cpi_enc_t encodingHint);
cpi_enc_t GetUnicodeEncoding(const char* pBuffer, const size_t len, bool* lpbBOM, bool* lpbReverse);
// ----------------------------------------------------------------------------
#define FV_TABWIDTH 1
#define FV_INDENTWIDTH 2
#define FV_TABSASSPACES 4
#define FV_TABINDENTS 8
#define FV_WORDWRAP 16
#define FV_LONGLINESLIMIT 32
#define FV_ENCODING 64
#define FV_MODE 128
bool FileVars_Init(const char* lpData, size_t cbData, LPFILEVARS lpfv);
bool FileVars_Apply(LPFILEVARS lpfv);
bool FileVars_ParseInt(char* pszData, char* pszName, int* piValue);
bool FileVars_ParseStr(char* pszData, char* pszName, char* pszValue, int cchValue);
bool FileVars_IsUTF8(LPFILEVARS lpfv);
bool FileVars_IsValidEncoding(LPFILEVARS lpfv);
cpi_enc_t FileVars_GetEncoding(LPFILEVARS lpfv);
// ----------------------------------------------------------------------------
typedef struct _enc_det_t
{
cpi_enc_t Encoding; // final detection result
// statistic:
cpi_enc_t forcedEncoding;
cpi_enc_t fileVarEncoding;
cpi_enc_t analyzedEncoding;
cpi_enc_t unicodeAnalysis;
// flags:
bool bIsAnalysisReliable;
bool bHasBOM;
bool bIsReverse;
bool bIsUTF8Sig;
} ENC_DET_T;
ENC_DET_T Encoding_DetectEncoding(LPWSTR pszFile, const char* lpData, const size_t cbData,
bool bSkipUTFDetection, bool bSkipANSICPDetection, bool bForceEncDetection);
// ----------------------------------------------------------------------------
const char* Encoding_GetTitleInfoA();
const WCHAR* Encoding_GetTitleInfoW();

View File

@ -27,6 +27,7 @@
#define WIN32_LEAN_AND_MEAN 1
#define NOMINMAX 1
#include <windows.h>
#include <shlwapi.h>
#define STRSAFE_NO_CB_FUNCTIONS
#define STRSAFE_NO_DEPRECATE // don't allow deprecated functions
@ -39,7 +40,9 @@
extern "C" {
#include "TypeDefs.h"
#include "Helpers.h"
#include "Encoding.h"
#include "SciCall.h"
}
// CED - Compact Encoding Detection (by Google)
@ -504,6 +507,56 @@ extern "C" void ChangeEncodingCodePage(const cpi_enc_t cpi, UINT newCP)
//=============================================================================
cpi_enc_t GetUnicodeEncoding(const char* pBuffer, const size_t len, bool* lpbBOM, bool* lpbReverse)
{
cpi_enc_t iEncoding = CPI_NONE;
size_t const enoughData = 2048LL;
size_t const cb = (len < enoughData) ? len : enoughData;
if (!pBuffer || cb < 2) { return iEncoding; }
// IS_TEXT_UNICODE_UNICODE_MASK -> IS_TEXT_UNICODE_ASCII16, IS_TEXT_UNICODE_STATISTICS, IS_TEXT_UNICODE_CONTROLS, IS_TEXT_UNICODE_SIGNATURE.
// IS_TEXT_UNICODE_REVERSE_MASK -> IS_TEXT_UNICODE_REVERSE_ASCII16, IS_TEXT_UNICODE_REVERSE_STATISTICS, IS_TEXT_UNICODE_REVERSE_CONTROLS, IS_TEXT_UNICODE_REVERSE_SIGNATURE.
// IS_TEXT_UNICODE_NOT_UNICODE_MASK -> IS_TEXT_UNICODE_ILLEGAL_CHARS, IS_TEXT_UNICODE_ODD_LENGTH, and two currently unused bit flags.
// IS_TEXT_UNICODE_NOT_ASCII_MASK -> IS_TEXT_UNICODE_NULL_BYTES and three currently unused bit flags.
//
int const iAllTests = IS_TEXT_UNICODE_UNICODE_MASK | IS_TEXT_UNICODE_REVERSE_MASK | IS_TEXT_UNICODE_NOT_UNICODE_MASK | IS_TEXT_UNICODE_NOT_ASCII_MASK;
int iTest = iAllTests;
/*bool const ok =*/ (void)IsTextUnicode(pBuffer, (int)cb, &iTest); // don't rely on result ok
if (iTest == iAllTests) {
iTest = 0; // iTest doesn't seem to have been modified ...
}
bool const bHasBOM = (iTest & IS_TEXT_UNICODE_SIGNATURE);
bool const bHasRBOM = (iTest & IS_TEXT_UNICODE_REVERSE_SIGNATURE);
bool const bIsUnicode = (iTest & IS_TEXT_UNICODE_UNICODE_MASK);
bool const bIsReverse = (iTest & IS_TEXT_UNICODE_REVERSE_MASK);
bool const bIsIllegal = (iTest & IS_TEXT_UNICODE_NOT_UNICODE_MASK);
//bool const bHasNullBytes = (iTest & IS_TEXT_UNICODE_NULL_BYTES);
if (bHasBOM || bHasRBOM || ((bIsUnicode || bIsReverse) && !bIsIllegal && !(bIsUnicode && bIsReverse)))
{
if (lpbBOM) {
*lpbBOM = (bHasBOM || bHasRBOM);
}
if (lpbReverse) {
*lpbReverse = (bHasRBOM || bIsReverse);
}
if (bHasBOM || bHasRBOM) {
iEncoding = bHasBOM ? CPI_UNICODEBOM : CPI_UNICODEBEBOM;
}
else if (bIsUnicode || bIsReverse) {
iEncoding = bIsUnicode ? CPI_UNICODE : CPI_UNICODEBE;
}
}
return iEncoding;
}
// ============================================================================
constexpr Encoding _MapCPI2CEDEncoding(const cpi_enc_t cpiEncoding)
{
@ -682,7 +735,8 @@ inline float max_f(float x, float y) { return (x > y) ? x : y; }
// --------------------------------------------------------------------------
extern "C" cpi_enc_t Encoding_AnalyzeText
//extern "C" cpi_enc_t Encoding_AnalyzeText
cpi_enc_t Encoding_AnalyzeText
(
const char* const text, const size_t len,
float* confidence_io, const cpi_enc_t encodingHint)
@ -836,58 +890,6 @@ extern "C" cpi_enc_t Encoding_AnalyzeText
// ============================================================================
cpi_enc_t GetUnicodeEncoding(const char* pBuffer, const size_t len, bool* lpbBOM, bool* lpbReverse)
{
cpi_enc_t iEncoding = CPI_NONE;
size_t const enoughData = 2048LL;
size_t const cb = (len < enoughData) ? len : enoughData;
if (!pBuffer || cb < 2) { return iEncoding; }
// IS_TEXT_UNICODE_UNICODE_MASK -> IS_TEXT_UNICODE_ASCII16, IS_TEXT_UNICODE_STATISTICS, IS_TEXT_UNICODE_CONTROLS, IS_TEXT_UNICODE_SIGNATURE.
// IS_TEXT_UNICODE_REVERSE_MASK -> IS_TEXT_UNICODE_REVERSE_ASCII16, IS_TEXT_UNICODE_REVERSE_STATISTICS, IS_TEXT_UNICODE_REVERSE_CONTROLS, IS_TEXT_UNICODE_REVERSE_SIGNATURE.
// IS_TEXT_UNICODE_NOT_UNICODE_MASK -> IS_TEXT_UNICODE_ILLEGAL_CHARS, IS_TEXT_UNICODE_ODD_LENGTH, and two currently unused bit flags.
// IS_TEXT_UNICODE_NOT_ASCII_MASK -> IS_TEXT_UNICODE_NULL_BYTES and three currently unused bit flags.
//
int const iAllTests = IS_TEXT_UNICODE_UNICODE_MASK | IS_TEXT_UNICODE_REVERSE_MASK | IS_TEXT_UNICODE_NOT_UNICODE_MASK | IS_TEXT_UNICODE_NOT_ASCII_MASK;
int iTest = iAllTests;
/*bool const ok =*/ (void)IsTextUnicode(pBuffer, (int)cb, &iTest); // don't rely on result ok
if (iTest == iAllTests) {
iTest = 0; // iTest doesn't seem to have been modified ...
}
bool const bHasBOM = (iTest & IS_TEXT_UNICODE_SIGNATURE);
bool const bHasRBOM = (iTest & IS_TEXT_UNICODE_REVERSE_SIGNATURE);
bool const bIsUnicode = (iTest & IS_TEXT_UNICODE_UNICODE_MASK);
bool const bIsReverse = (iTest & IS_TEXT_UNICODE_REVERSE_MASK);
bool const bIsIllegal = (iTest & IS_TEXT_UNICODE_NOT_UNICODE_MASK);
//bool const bHasNullBytes = (iTest & IS_TEXT_UNICODE_NULL_BYTES);
if (bHasBOM || bHasRBOM || ((bIsUnicode || bIsReverse) && !bIsIllegal && !(bIsUnicode && bIsReverse)))
{
if (lpbBOM) {
*lpbBOM = (bHasBOM || bHasRBOM);
}
if (lpbReverse) {
*lpbReverse = (bHasRBOM || bIsReverse);
}
if (bHasBOM || bHasRBOM) {
iEncoding = bHasBOM ? CPI_UNICODEBOM : CPI_UNICODEBEBOM;
}
else if (bIsUnicode || bIsReverse) {
iEncoding = bIsUnicode ? CPI_UNICODE : CPI_UNICODEBE;
}
}
return iEncoding;
}
// ============================================================================
//=============================================================================
//
// _SetEncodingTitleInfo()
@ -936,3 +938,431 @@ static void _SetEncodingTitleInfo(const char* encodingUCD, cpi_enc_t encUCD, flo
::MultiByteToWideChar(CP_UTF7, 0, chEncodingInfo, -1, wchEncodingInfo, ARRAYSIZE(wchEncodingInfo));
}
//=============================================================================
//
// _SetFileVars()
//
static void _SetFileVars(char* buffer, size_t cch, LPFILEVARS lpfv)
{
bool bDisableFileVar = false;
if (!Flags.NoFileVariables)
{
int i;
if (FileVars_ParseInt(buffer, "enable-local-variables", &i) && (!i)) {
bDisableFileVar = true;
}
if (!bDisableFileVar) {
if (FileVars_ParseInt(buffer, "tab-width", &i)) {
lpfv->iTabWidth = clampi(i, 1, 256);
lpfv->mask |= FV_TABWIDTH;
}
if (FileVars_ParseInt(buffer, "c-basic-indent", &i)) {
lpfv->iIndentWidth = clampi(i, 0, 256);
lpfv->mask |= FV_INDENTWIDTH;
}
if (FileVars_ParseInt(buffer, "indent-tabs-mode", &i)) {
lpfv->bTabsAsSpaces = (i) ? false : true;
lpfv->mask |= FV_TABSASSPACES;
}
if (FileVars_ParseInt(buffer, "c-tab-always-indent", &i)) {
lpfv->bTabIndents = (i) ? true : false;
lpfv->mask |= FV_TABINDENTS;
}
if (FileVars_ParseInt(buffer, "truncate-lines", &i)) {
lpfv->bWordWrap = (i) ? false : true;
lpfv->mask |= FV_WORDWRAP;
}
if (FileVars_ParseInt(buffer, "fill-column", &i)) {
lpfv->iLongLinesLimit = clampi(i, 0, LONG_LINES_MARKER_LIMIT);
lpfv->mask |= FV_LONGLINESLIMIT;
}
}
}
// Unicode Sig
bool const bHasSignature = IsUTF8Signature(buffer) || Has_UTF16_LE_BOM(buffer, cch) || Has_UTF16_BE_BOM(buffer, cch);
if (!bHasSignature && !Settings.NoEncodingTags && !bDisableFileVar) {
if (FileVars_ParseStr(buffer, "encoding", lpfv->tchEncoding, COUNTOF(lpfv->tchEncoding)))
lpfv->mask |= FV_ENCODING;
else if (FileVars_ParseStr(buffer, "charset", lpfv->tchEncoding, COUNTOF(lpfv->tchEncoding)))
lpfv->mask |= FV_ENCODING;
else if (FileVars_ParseStr(buffer, "coding", lpfv->tchEncoding, COUNTOF(lpfv->tchEncoding)))
lpfv->mask |= FV_ENCODING;
}
if (lpfv->mask & FV_ENCODING) {
lpfv->iEncoding = Encoding_MatchA(lpfv->tchEncoding);
}
if (!Flags.NoFileVariables && !bDisableFileVar) {
if (FileVars_ParseStr(buffer, "mode", lpfv->tchMode, COUNTOF(lpfv->tchMode)))
lpfv->mask |= FV_MODE;
}
}
//=============================================================================
//
// FileVars_Init()
//
extern "C" bool FileVars_Init(const char* lpData, size_t cbData, LPFILEVARS lpfv)
{
ZeroMemory(lpfv, sizeof(FILEVARS));
lpfv->bTabIndents = Settings.TabIndents;
lpfv->bTabsAsSpaces = Settings.TabsAsSpaces;
lpfv->bWordWrap = Settings.WordWrap;
lpfv->iTabWidth = Settings.TabWidth;
lpfv->iIndentWidth = Settings.IndentWidth;
lpfv->iLongLinesLimit = Settings.LongLinesLimit;
lpfv->iEncoding = Settings.DefaultEncoding;
if ((Flags.NoFileVariables && Settings.NoEncodingTags) || !lpData || !cbData) { return true; }
char tmpbuf[LARGE_BUFFER];
size_t const cch = min_s(cbData + 1, COUNTOF(tmpbuf));
StringCchCopyNA(tmpbuf, COUNTOF(tmpbuf), lpData, cch);
_SetFileVars(tmpbuf, cch, lpfv);
// if no file vars found, look at EOF
if ((lpfv->mask == 0) && (cbData > COUNTOF(tmpbuf))) {
StringCchCopyNA(tmpbuf, COUNTOF(tmpbuf), lpData + cbData - COUNTOF(tmpbuf) + 1, COUNTOF(tmpbuf));
_SetFileVars(tmpbuf, cch, lpfv);
}
return true;
}
//=============================================================================
//
// FileVars_Apply()
//
extern "C" bool FileVars_Apply(LPFILEVARS lpfv) {
int const _iTabWidth = (lpfv->mask & FV_TABWIDTH) ? lpfv->iTabWidth : Settings.TabWidth;
SciCall_SetTabWidth(_iTabWidth);
int const _iIndentWidth = (lpfv->mask & FV_INDENTWIDTH) ? lpfv->iIndentWidth : ((lpfv->mask & FV_TABWIDTH) ? 0 : Settings.IndentWidth);
SciCall_SetIndent(_iIndentWidth);
bool const _bTabsAsSpaces = (lpfv->mask & FV_TABSASSPACES) ? lpfv->bTabsAsSpaces : Settings.TabsAsSpaces;
SciCall_SetUseTabs(!_bTabsAsSpaces);
bool const _bTabIndents = (lpfv->mask & FV_TABINDENTS) ? lpfv->bTabIndents : Settings.TabIndents;
SciCall_SetTabIndents(_bTabIndents);
SciCall_SetBackSpaceUnIndents(Settings.BackspaceUnindents);
bool const _bWordWrap = (lpfv->mask & FV_WORDWRAP) ? lpfv->bWordWrap : Settings.WordWrap;
int const _iWrapMode = _bWordWrap ? ((Settings.WordWrapMode == 0) ? SC_WRAP_WHITESPACE : SC_WRAP_CHAR) : SC_WRAP_NONE;
SciCall_SetWrapMode(_iWrapMode);
int const _iLongLinesLimit = (lpfv->mask & FV_LONGLINESLIMIT) ? lpfv->iLongLinesLimit : Settings.LongLinesLimit;
SciCall_SetEdgeColumn(_iLongLinesLimit);
Globals.iWrapCol = _iLongLinesLimit;
return true;
}
//=============================================================================
//
// FileVars_ParseInt()
//
extern "C" bool FileVars_ParseInt(char* pszData, char* pszName, int* piValue) {
char* pvStart = StrStrIA(pszData, pszName);
while (pvStart) {
char chPrev = (pvStart > pszData) ? *(pvStart - 1) : 0;
if (!IsCharAlphaNumericA(chPrev) && chPrev != '-' && chPrev != '_') {
pvStart += StringCchLenA(pszName, 0);
while (*pvStart == ' ') {
pvStart++;
}
if (*pvStart == ':' || *pvStart == '=') { break; }
}
else {
pvStart += StringCchLenA(pszName, 0);
}
pvStart = StrStrIA(pvStart, pszName); // next
}
if (pvStart) {
while (*pvStart && StrChrIA(":=\"' \t", *pvStart)) {
pvStart++;
}
char tch[32] = { L'\0' };
StringCchCopyNA(tch, COUNTOF(tch), pvStart, COUNTOF(tch));
char* pvEnd = tch;
while (*pvEnd && IsCharAlphaNumericA(*pvEnd)) {
pvEnd++;
}
*pvEnd = 0;
StrTrimA(tch, " \t:=\"'");
int itok = sscanf_s(tch, "%i", piValue);
if (itok == 1) {
return true;
}
if (tch[0] == 't') {
*piValue = 1;
return true;
}
if (tch[0] == 'n' || tch[0] == 'f') {
*piValue = 0;
return true;
}
}
return false;
}
//=============================================================================
//
// FileVars_ParseStr()
//
extern "C" bool FileVars_ParseStr(char* pszData, char* pszName, char* pszValue, int cchValue) {
char* pvStart = StrStrIA(pszData, pszName);
while (pvStart) {
char chPrev = (pvStart > pszData) ? *(pvStart - 1) : 0;
if (!IsCharAlphaNumericA(chPrev) && chPrev != '-' && chPrev != '_') {
pvStart += StringCchLenA(pszName, 0);
while (*pvStart == ' ') {
pvStart++;
}
if (*pvStart == ':' || *pvStart == '=') {
break;
}
}
else {
pvStart += StringCchLenA(pszName, 0);
}
pvStart = StrStrIA(pvStart, pszName); // next
}
if (pvStart) {
bool bQuoted = false;
while (*pvStart && StrChrIA(":=\"' \t", *pvStart)) {
if (*pvStart == '\'' || *pvStart == '"')
bQuoted = true;
pvStart++;
}
char tch[32] = { L'\0' };
StringCchCopyNA(tch, COUNTOF(tch), pvStart, COUNTOF(tch));
char* pvEnd = tch;
while (*pvEnd && (IsCharAlphaNumericA(*pvEnd) || StrChrIA("+-/_", *pvEnd) || (bQuoted && *pvEnd == ' '))) {
pvEnd++;
}
*pvEnd = 0;
StrTrimA(tch, " \t:=\"'");
StringCchCopyNA(pszValue, cchValue, tch, COUNTOF(tch));
return true;
}
return false;
}
//=============================================================================
//
// FileVars_IsUTF8()
//
extern "C" bool FileVars_IsUTF8(LPFILEVARS lpfv) {
if (lpfv->mask & FV_ENCODING) {
if (StringCchCompareNIA(lpfv->tchEncoding, COUNTOF(lpfv->tchEncoding), "utf-8", CSTRLEN("utf-8")) == 0 ||
StringCchCompareNIA(lpfv->tchEncoding, COUNTOF(lpfv->tchEncoding), "utf8", CSTRLEN("utf8")) == 0)
return true;
}
return false;
}
//=============================================================================
//
// FileVars_IsValidEncoding()
//
extern "C" bool FileVars_IsValidEncoding(LPFILEVARS lpfv) {
CPINFO cpi;
if (lpfv->mask & FV_ENCODING && Encoding_IsValidIdx(lpfv->iEncoding)) {
if ((Encoding_IsINTERNAL(lpfv->iEncoding)) ||
(IsValidCodePage(Encoding_GetCodePage(lpfv->iEncoding)) &&
GetCPInfo(Encoding_GetCodePage(lpfv->iEncoding), &cpi))) {
return true;
}
}
return false;
}
//=============================================================================
//
// FileVars_GetEncoding()
//
extern "C" cpi_enc_t FileVars_GetEncoding(LPFILEVARS lpfv)
{
if (lpfv->mask & FV_ENCODING) {
return(lpfv->iEncoding);
}
return CPI_NONE;
}
//=============================================================================
//=============================================================================
//=============================================================================
//
// GetFileEncoding()
//
extern "C" ENC_DET_T Encoding_DetectEncoding(LPWSTR pszFile, const char* lpData, const size_t cbData,
bool bSkipUTFDetection, bool bSkipANSICPDetection, bool bForceEncDetection)
{
ENC_DET_T encDetRes = { CPI_NONE, CPI_NONE, CPI_NONE, CPI_NONE, CPI_NONE, false, false, false, false };
FileVars_Init(lpData, cbData, &Globals.fvCurFile);
bool const bBOM_LE = Has_UTF16_LE_BOM(lpData, cbData);
bool const bBOM_BE = Has_UTF16_BE_BOM(lpData, cbData);
encDetRes.bHasBOM = (bBOM_LE || bBOM_BE);
encDetRes.bIsReverse = bBOM_BE;
encDetRes.bIsUTF8Sig = ((cbData >= 3) ? IsUTF8Signature(lpData) : false);
// --- 1st check for force encodings ---
LPCWSTR lpszExt = PathFindExtension(pszFile);
bool const bNfoDizDetected = (lpszExt && !(StringCchCompareXI(lpszExt, L".nfo") && StringCchCompareXI(lpszExt, L".diz")));
#define IS_ENC_ENFORCED() (!Encoding_IsNONE(encDetRes.forcedEncoding))
encDetRes.forcedEncoding = (Settings.LoadNFOasOEM && bNfoDizDetected) ? Globals.DOSEncoding : Encoding_Forced(CPI_GET);
if (!IS_ENC_ENFORCED())
{
encDetRes.fileVarEncoding = (FileVars_IsValidEncoding(&Globals.fvCurFile)) ? FileVars_GetEncoding(&Globals.fvCurFile) : CPI_NONE;
// force file vars ?
if (Encoding_IsValid(encDetRes.fileVarEncoding) && (Globals.fvCurFile.mask & FV_ENCODING)) {
encDetRes.forcedEncoding = encDetRes.fileVarEncoding;
}
}
// --- 2nd Use Encoding Analysis if applicable
cpi_enc_t const iAnalyzeFallback = Settings.UseDefaultForFileEncoding ? Settings.DefaultEncoding : CPI_ANSI_DEFAULT;
size_t const cbNbytes4Analysis = (cbData < 200000L) ? cbData : 200000L;
float confidence = 0.0f;
if (!IS_ENC_ENFORCED() || bForceEncDetection)
{
if (!bSkipANSICPDetection)
{
encDetRes.analyzedEncoding = Encoding_AnalyzeText(lpData, cbNbytes4Analysis, &confidence, iAnalyzeFallback);
}
if (encDetRes.analyzedEncoding == CPI_NONE)
{
encDetRes.analyzedEncoding = iAnalyzeFallback;
confidence = Settings2.AnalyzeReliableConfidenceLevel;
}
if (!bSkipUTFDetection)
{
encDetRes.unicodeAnalysis = GetUnicodeEncoding(lpData, cbData, &(encDetRes.bHasBOM), &(encDetRes.bIsReverse));
if (Encoding_IsNONE(encDetRes.unicodeAnalysis) && Encoding_IsUNICODE(encDetRes.analyzedEncoding))
{
encDetRes.unicodeAnalysis = encDetRes.analyzedEncoding;
}
//// check for UTF-32, can't handle
//if (encDetRes.bHasBOM && !bBOM_LE && !bBOM_BE) {
// encDetRes.unicodeAnalysis = CPI_NONE;
//}
//else if (encDetRes.bHasBOM && encDetRes.bIsReverse && !bBOM_BE) {
// encDetRes.unicodeAnalysis = CPI_NONE;
//}
//else if (encDetRes.bHasBOM && !encDetRes.bIsReverse && !bBOM_LE) {
// // must be UTF-32, can't handle
// encDetRes.unicodeAnalysis = CPI_NONE;
//}
}
if (bForceEncDetection) {
if (Encoding_IsValid(encDetRes.analyzedEncoding)) {
// no bIsReliable check (forced unreliable detection)
encDetRes.forcedEncoding = (encDetRes.analyzedEncoding == CPI_ASCII_7BIT) ? CPI_ANSI_DEFAULT : encDetRes.analyzedEncoding;
}
else if (Encoding_IsValid(encDetRes.unicodeAnalysis)) {
encDetRes.forcedEncoding = encDetRes.unicodeAnalysis;
}
}
}
//bool const bIsUTF8orUnicodeAnalysis = Encoding_IsUTF8(encDetRes.analyzedEncoding) || Encoding_IsUNICODE(encDetRes.analyzedEncoding);
if (!IS_ENC_ENFORCED())
{
if (encDetRes.analyzedEncoding == CPI_NONE)
{
encDetRes.analyzedEncoding = iAnalyzeFallback;
confidence = Settings2.AnalyzeReliableConfidenceLevel;
}
else if (encDetRes.analyzedEncoding == CPI_ASCII_7BIT) {
encDetRes.analyzedEncoding = Settings.LoadASCIIasUTF8 ? CPI_UTF8 : CPI_ANSI_DEFAULT;
confidence = 1.0;
}
}
encDetRes.bIsAnalysisReliable = (confidence >= Settings2.AnalyzeReliableConfidenceLevel);
// --------------------------------------------------------------------------
// --- choose best encoding guess ----
// --------------------------------------------------------------------------
// init Preferred Encoding
encDetRes.Encoding = Settings.LoadASCIIasUTF8 ? CPI_UTF8 : CPI_ANSI_DEFAULT;
if (IS_ENC_ENFORCED())
{
encDetRes.Encoding = encDetRes.forcedEncoding;
}
else if (Encoding_IsValid(encDetRes.analyzedEncoding) && (encDetRes.bIsAnalysisReliable || !Settings.UseReliableCEDonly))
{
encDetRes.Encoding = encDetRes.analyzedEncoding;
}
else if (encDetRes.bIsUTF8Sig)
{
encDetRes.Encoding = CPI_UTF8SIGN;
}
else if (bBOM_LE || bBOM_BE) {
encDetRes.Encoding = bBOM_LE ? CPI_UNICODEBOM : CPI_UNICODEBEBOM;
encDetRes.bIsReverse = bBOM_BE;
}
else if (Encoding_IsValid(Encoding_SrcWeak(CPI_GET))) {
encDetRes.Encoding = Encoding_SrcWeak(CPI_GET);
}
if (!Encoding_IsValid(encDetRes.Encoding)) { encDetRes.Encoding = CPI_ANSI_DEFAULT; }
return encDetRes;
}

View File

@ -1671,14 +1671,14 @@ ptrdiff_t WideCharToMultiByteEx(
ptrdiff_t outBufSiz = cbMultiByte;
ptrdiff_t bytesConv = 0LL;
static ptrdiff_t const maxBufSize = (INT_MAX - 128);
static ptrdiff_t const maxBufSize = (INT_MAX - 1);
BOOL bIsDefCharUse = FALSE;
while ((inBufCnt > 0LL) || (inBufCnt == -1LL))
{
int const cnt = (inBufCnt > maxBufSize) ? (int)maxBufSize : ((inBufCnt > 0LL) ? (int)inBufCnt : -1);
int const siz = (outBufSiz > maxBufSize) ? (int)maxBufSize : (int)outBufSiz;
int const siz = (outBufSiz > (ptrdiff_t)INT_MAX) ? INT_MAX : (int)outBufSiz;
int const bytes = WideCharToMultiByte(CodePage, dwFlags, inPtr, cnt, outPtr, siz, lpDefaultChar, lpUsedDefaultChar);
if (bytes == 0) { break; }
@ -1716,12 +1716,12 @@ ptrdiff_t MultiByteToWideCharEx(
ptrdiff_t outBufCnt = cchWideChar;
ptrdiff_t wcharConv = 0LL;
static ptrdiff_t const maxBufSize = (INT_MAX - 128);
static ptrdiff_t const maxBufSize = (INT_MAX - 1);
while ((inBufSiz > 0LL) || (inBufSiz == -1LL))
{
int const siz = (inBufSiz > maxBufSize) ? (int)maxBufSize : ((inBufSiz > 0LL) ? (int)inBufSiz : -1);
int const cnt = (outBufCnt > maxBufSize) ? (int)maxBufSize : (int)outBufCnt;
int const cnt = (outBufCnt > (ptrdiff_t)INT_MAX) ? INT_MAX : (int)outBufCnt;
int const wchars = MultiByteToWideChar(CodePage, dwFlags, inPtr, siz, outPtr, cnt);
if (wchars == 0) { break; }

View File

@ -535,10 +535,9 @@ static void CALLBACK MQ_ExecuteNext(HWND hwnd, UINT uMsg, UINT_PTR idEvent, DWOR
//
// CommandLine Parsing Flags
//
static LPWSTR s_lpEncodingArg = NULL;
static LPWSTR s_lpMatchArg = NULL;
static LPWSTR s_lpSchemeArg = NULL;
static LPWSTR s_lpOrigFileArg = NULL;
static LPWSTR s_lpMatchArg = NULL;
static WCHAR s_lpFileArg[MAX_PATH+1];
static cpi_enc_t s_flagSetEncoding = CPI_NONE;
@ -639,7 +638,6 @@ static void _InitGlobals()
Globals.flagShellUseSystemMRU = 0;
Globals.flagPrintFileAndLeave = 0;
Globals.bForceReLoadAsUTF8 = false;
Globals.DOSEncoding = CPI_NONE;
Globals.bZeroBasedColumnIndex = false;
Globals.bZeroBasedCharacterCount = false;
@ -1217,9 +1215,7 @@ HWND InitInstance(HINSTANCE hInstance,LPCWSTR pszCmdLine,int nCmdShow)
}
// Source Encoding
if (s_lpEncodingArg) {
Encoding_SrcCmdLn(Encoding_MatchW(s_lpEncodingArg));
}
Encoding_Forced(s_flagSetEncoding);
// Pathname parameter
if (s_IsThisAnElevatedRelaunch || (StrIsNotEmpty(s_lpFileArg) /*&& !g_flagNewFromClipboard*/))
@ -1297,14 +1293,14 @@ HWND InitInstance(HINSTANCE hInstance,LPCWSTR pszCmdLine,int nCmdShow)
}
}
else {
if (Encoding_SrcCmdLn(CPI_GET) != CPI_NONE) {
Encoding_Current(Encoding_SrcCmdLn(CPI_GET));
Encoding_HasChanged(Encoding_SrcCmdLn(CPI_GET));
if (Encoding_IsValid(Encoding_Forced(CPI_GET))) {
Encoding_Current(Encoding_Forced(CPI_GET));
Encoding_HasChanged(Encoding_Forced(CPI_GET));
}
}
// reset
Encoding_SrcCmdLn(CPI_NONE);
Encoding_Forced(CPI_NONE);
s_flagQuietCreate = false;
s_flagKeepTitleExcerpt = false;
@ -2821,7 +2817,7 @@ LRESULT MsgCopyData(HWND hwnd, WPARAM wParam, LPARAM lParam)
if (params->flagFileSpecified) {
bool bOpened = false;
Encoding_SrcCmdLn(params->iSrcEncoding);
Encoding_Forced(params->flagSetEncoding);
if (PathIsDirectory(&params->wchData)) {
WCHAR tchFile[MAX_PATH] = { L'\0' };
@ -2876,7 +2872,7 @@ LRESULT MsgCopyData(HWND hwnd, WPARAM wParam, LPARAM lParam)
}
}
// reset
Encoding_SrcCmdLn(CPI_NONE);
Encoding_Forced(CPI_NONE);
}
if (params->flagJumpTo) {
@ -3930,7 +3926,7 @@ LRESULT MsgCommand(HWND hwnd, UINT umsg, WPARAM wParam, LPARAM lParam)
{
if (StrIsNotEmpty(Globals.CurrentFile))
{
cpi_enc_t iNewEncoding = Encoding_MapUnicode(Encoding_Current(CPI_GET));
cpi_enc_t iNewEncoding = Encoding_MapSignature(Encoding_Current(CPI_GET));
if (IsSaveNeeded(ISN_GET)) {
INT_PTR const answer = InfoBoxLng(MB_YESNO | MB_ICONQUESTION, NULL, IDS_MUI_ASK_RECODE);
@ -3942,7 +3938,7 @@ LRESULT MsgCommand(HWND hwnd, UINT umsg, WPARAM wParam, LPARAM lParam)
if (RecodeDlg(hwnd,&iNewEncoding))
{
StringCchCopy(tchMaxPathBuffer,COUNTOF(tchMaxPathBuffer),Globals.CurrentFile);
Encoding_SrcCmdLn(iNewEncoding);
Encoding_Forced(iNewEncoding);
FileLoad(true,false,true,false,true, false, tchMaxPathBuffer);
}
}
@ -5863,7 +5859,7 @@ LRESULT MsgCommand(HWND hwnd, UINT umsg, WPARAM wParam, LPARAM lParam)
case CMD_RECODEDEFAULT:
{
if (StrIsNotEmpty(Globals.CurrentFile)) {
Encoding_SrcCmdLn(Encoding_MapUnicode(Settings.DefaultEncoding));
Encoding_Forced(Settings.DefaultEncoding);
StringCchCopy(tchMaxPathBuffer,COUNTOF(tchMaxPathBuffer),Globals.CurrentFile);
FileLoad(false,false,true,true,true,false,tchMaxPathBuffer);
}
@ -5874,7 +5870,7 @@ LRESULT MsgCommand(HWND hwnd, UINT umsg, WPARAM wParam, LPARAM lParam)
case CMD_RECODEANSI:
{
if (StrIsNotEmpty(Globals.CurrentFile)) {
Encoding_SrcCmdLn(CPI_ANSI_DEFAULT);
Encoding_Forced(CPI_ANSI_DEFAULT);
StringCchCopy(tchMaxPathBuffer,COUNTOF(tchMaxPathBuffer),Globals.CurrentFile);
FileLoad(false,false,true,true,Settings.SkipANSICodePageDetection,false,tchMaxPathBuffer);
}
@ -5885,7 +5881,7 @@ LRESULT MsgCommand(HWND hwnd, UINT umsg, WPARAM wParam, LPARAM lParam)
case CMD_RECODEOEM:
{
if (StrIsNotEmpty(Globals.CurrentFile)) {
Encoding_SrcCmdLn(CPI_OEM);
Encoding_Forced(CPI_OEM);
StringCchCopy(tchMaxPathBuffer,COUNTOF(tchMaxPathBuffer),Globals.CurrentFile);
FileLoad(false,false,true,true,true,false,tchMaxPathBuffer);
}
@ -5896,7 +5892,7 @@ LRESULT MsgCommand(HWND hwnd, UINT umsg, WPARAM wParam, LPARAM lParam)
case CMD_RECODEGB18030:
{
if (StrIsNotEmpty(Globals.CurrentFile)) {
Encoding_SrcCmdLn(Encoding_GetByCodePage(54936)); // GB18030
Encoding_Forced(Encoding_GetByCodePage(54936)); // GB18030
StringCchCopy(tchMaxPathBuffer, COUNTOF(tchMaxPathBuffer), Globals.CurrentFile);
FileLoad(false, false, true, true, true, false, tchMaxPathBuffer);
}
@ -5908,10 +5904,9 @@ LRESULT MsgCommand(HWND hwnd, UINT umsg, WPARAM wParam, LPARAM lParam)
{
if (StrIsNotEmpty(Globals.CurrentFile))
{
Globals.bForceReLoadAsUTF8 = true;
Encoding_Forced(CPI_UTF8);
StringCchCopy(tchMaxPathBuffer,COUNTOF(tchMaxPathBuffer),Globals.CurrentFile);
FileLoad(false, false, true, true, true, false, tchMaxPathBuffer);
Globals.bForceReLoadAsUTF8 = false;
}
}
break;
@ -5921,7 +5916,7 @@ LRESULT MsgCommand(HWND hwnd, UINT umsg, WPARAM wParam, LPARAM lParam)
{
if (StrIsNotEmpty(Globals.CurrentFile))
{
Globals.bForceReLoadAsUTF8 = false;
Encoding_Forced(CPI_NONE);
StringCchCopy(tchMaxPathBuffer, COUNTOF(tchMaxPathBuffer), Globals.CurrentFile);
FileLoad(false, false, true, false, false, true, tchMaxPathBuffer);
}
@ -7659,6 +7654,8 @@ void ParseCommandLine()
bool bContinue = true;
bool bIsFileArg = false;
s_flagSetEncoding = CPI_NONE;
while (bContinue && ExtractFirstArgument(lp3, lp1, lp2, (int)len)) {
// options
if (lp1[1] == L'\0') {
@ -7697,13 +7694,9 @@ void ParseCommandLine()
s_flagSetEncoding = CPI_UTF8SIGN;
}
// maybe parsed encoding
else if (encoding != CPI_NONE) {
if (s_lpEncodingArg) { LocalFree(s_lpEncodingArg); }
s_lpEncodingArg = StrDup(lp1);
else if (Encoding_IsValid(encoding)) {
s_flagSetEncoding = encoding;
}
// EOL Mode
else if (StringCchCompareXI(lp1, L"CRLF") == 0 || StringCchCompareXI(lp1, L"CR+LF") == 0) {
s_flagSetEOLMode = IDM_LINEENDINGS_CRLF - IDM_LINEENDINGS_CRLF + 1;
@ -7872,8 +7865,7 @@ void ParseCommandLine()
case L'E':
if (ExtractFirstArgument(lp2, lp1, lp2, (int)len)) {
if (s_lpEncodingArg) { LocalFree(s_lpEncodingArg); }
s_lpEncodingArg = StrDup(lp1);
s_flagSetEncoding = Encoding_MatchW(lp1);
}
break;
@ -9571,12 +9563,13 @@ bool FileLoad(bool bDontSave, bool bNew, bool bReload,
EditSetNewText(Globals.hwndEdit,"",0, true);
Style_SetDefaultLexer(Globals.hwndEdit);
SciCall_SetEOLMode(Settings.DefaultEOLMode);
if (Encoding_SrcCmdLn(CPI_GET) != CPI_NONE) {
fioStatus.iEncoding = Encoding_SrcCmdLn(CPI_GET);
if (Encoding_IsValid(Encoding_Forced(CPI_GET))) {
fioStatus.iEncoding = Encoding_Forced(CPI_GET);
Encoding_Current(fioStatus.iEncoding);
Encoding_HasChanged(fioStatus.iEncoding);
}
else {
fioStatus.iEncoding = Settings.DefaultEncoding;
Encoding_Current(Settings.DefaultEncoding);
Encoding_HasChanged(Settings.DefaultEncoding);
}
@ -9593,8 +9586,9 @@ bool FileLoad(bool bDontSave, bool bNew, bool bReload,
int idx;
if (!bReload && MRU_FindFile(Globals.pFileMRU,szFileName,&idx)) {
fioStatus.iEncoding = Globals.pFileMRU->iEncoding[idx];
if (fioStatus.iEncoding > 0)
Encoding_SrcCmdLn(Encoding_MapUnicode(fioStatus.iEncoding));
if (Encoding_IsValid(fioStatus.iEncoding)) {
Encoding_SrcWeak(fioStatus.iEncoding);
}
}
else {
fioStatus.iEncoding = Encoding_Current(CPI_GET);
@ -9767,7 +9761,7 @@ bool FileRevert(LPCWSTR szFileName, bool bIgnoreCmdLnEnc)
DOCVIEWPOS_T const docView = EditGetCurrentDocView(Globals.hwndEdit);
if (bIgnoreCmdLnEnc) {
Encoding_SrcCmdLn(CPI_NONE); // ignore history too
Encoding_Forced(CPI_NONE); // ignore history too
}
Encoding_SrcWeak(Encoding_Current(CPI_GET));
@ -9832,7 +9826,7 @@ bool DoElevatedRelaunch(EditFileIOStatus* pFioStatus, bool bAutoSaveOnRelaunch)
// remove forced command line encoding from argument list
WCHAR wchEncoding[80] = { L'\0' };
wchEncoding[0] = L'/';
Encoding_GetNameW(Encoding_SrcCmdLn(CPI_GET), &wchEncoding[1], COUNTOF(wchEncoding)-1);
Encoding_GetNameW(Encoding_Forced(CPI_GET), &wchEncoding[1], COUNTOF(wchEncoding)-1);
if (StrIsNotEmpty(&wchEncoding[1])) {
lpArgs = StrCutI(lpArgs, wchEncoding);
}
@ -10312,7 +10306,6 @@ bool ActivatePrevInst()
params->iInitialLine = s_iInitialLine;
params->iInitialColumn = s_iInitialColumn;
params->iSrcEncoding = (s_lpEncodingArg) ? Encoding_MatchW(s_lpEncodingArg) : CPI_NONE;
params->flagSetEncoding = s_flagSetEncoding;
params->flagSetEOLMode = s_flagSetEOLMode;
params->flagTitleExcerpt = 0;
@ -10390,7 +10383,6 @@ bool ActivatePrevInst()
params->iInitialLine = s_iInitialLine;
params->iInitialColumn = s_iInitialColumn;
params->iSrcEncoding = (s_lpEncodingArg) ? Encoding_MatchW(s_lpEncodingArg) : CPI_NONE;
params->flagSetEncoding = s_flagSetEncoding;
params->flagSetEOLMode = s_flagSetEOLMode;

View File

@ -37,7 +37,6 @@ typedef struct np3params {
int flagJumpTo;
int iInitialLine;
int iInitialColumn;
cpi_enc_t iSrcEncoding;
cpi_enc_t flagSetEncoding;
int flagSetEOLMode;
int flagTitleExcerpt;

View File

@ -324,7 +324,6 @@ typedef struct _globals_t
int flagShellUseSystemMRU;
int flagPrintFileAndLeave;
bool bForceReLoadAsUTF8;
bool bZeroBasedColumnIndex;
bool bZeroBasedCharacterCount;
int iReplacedOccurrences;