Merge pull request #621 from RaiKoHoff/Dev_0820

UTF-8 and Unicode detection
This commit is contained in:
Rainer Kottenhoff 2018-08-22 15:01:32 +02:00 committed by GitHub
commit bd197525f8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 378 additions and 395 deletions

View File

@ -1 +1 @@
1056
1067

View File

@ -118,9 +118,9 @@ INT_PTR CALLBACK SetKeysDlgProc(HWND hDlg, UINT umsg, WPARAM wParam, LPARAM lPar
SetDlgItemText(hDlg, IDC_PWD_EDIT1, unicodeFileKey);
SetDlgItemText(hDlg, IDC_PWD_EDIT2, unicodeMasterKey);
ShowWindow(GetDlgItem(hDlg, IDC_PWD_CHECK3), hasMasterFileKey);
CheckDlgButton(hDlg, IDC_PWD_CHECK3, hasMasterFileKey ? BST_CHECKED : BST_UNCHECKED);
CheckDlgButton(hDlg, IDC_PWD_CHECK2, (hasBinFileKey | useFileKey) ? BST_CHECKED : BST_UNCHECKED);
CheckDlgButton(hDlg, IDC_PWD_CHECK1, useMasterKey ? BST_CHECKED : BST_UNCHECKED);
CheckDlgButton(hDlg, IDC_PWD_CHECK3, DlgBtnChk(hasMasterFileKey));
CheckDlgButton(hDlg, IDC_PWD_CHECK2, DlgBtnChk(hasBinFileKey | useFileKey));
CheckDlgButton(hDlg, IDC_PWD_CHECK1, DlgBtnChk(useMasterKey));
CenterDlgInParent(hDlg);
// Don't use: SetFocus( GetDlgItem( hDlg, IDC_PWD_EDIT1 ) );
SetDialogFocus(hDlg, GetDlgItem(hDlg, IDC_PWD_EDIT1));
@ -175,7 +175,7 @@ INT_PTR CALLBACK SetKeysDlgProc(HWND hDlg, UINT umsg, WPARAM wParam, LPARAM lPar
{
WCHAR newFileKey[WKEY_LEN] = { 0 };
GetDlgItemText(hDlg, IDC_PWD_EDIT1, newFileKey, COUNTOF(newFileKey));
CheckDlgButton(hDlg, IDC_PWD_CHECK2, (newFileKey[0] <= ' ') ? BST_UNCHECKED : BST_CHECKED);
CheckDlgButton(hDlg, IDC_PWD_CHECK2, DlgBtnChk(newFileKey[0] > ' '));
}
break;
@ -186,7 +186,7 @@ INT_PTR CALLBACK SetKeysDlgProc(HWND hDlg, UINT umsg, WPARAM wParam, LPARAM lPar
GetDlgItemText(hDlg, IDC_PWD_EDIT2, newMasKey, COUNTOF(newMasKey));
{
bool newuse = (newMasKey[0] > ' '); // no leading whitespace or empty passwords
CheckDlgButton(hDlg, IDC_PWD_CHECK1, newuse ? BST_CHECKED : BST_UNCHECKED);
CheckDlgButton(hDlg, IDC_PWD_CHECK1, DlgBtnChk(newuse));
if (newuse) { CheckDlgButton(hDlg, IDC_PWD_CHECK3, BST_UNCHECKED); }
}
@ -196,18 +196,16 @@ INT_PTR CALLBACK SetKeysDlgProc(HWND hDlg, UINT umsg, WPARAM wParam, LPARAM lPar
case IDC_PWD_CHECK3: // check reuse, uncheck set new and inverse
{
bool reuseMas = IsDlgButtonChecked(hDlg, IDC_PWD_CHECK3) == BST_CHECKED;
if (reuseMas) { CheckDlgButton(hDlg, IDC_PWD_CHECK1, reuseMas ? BST_UNCHECKED : BST_CHECKED); }
bool const reuseMas = IsDlgButtonChecked(hDlg, IDC_PWD_CHECK3) == BST_CHECKED;
if (reuseMas) { CheckDlgButton(hDlg, IDC_PWD_CHECK1, DlgBtnChk(!reuseMas)); }
}
break;
case IDC_PWD_CHECK1:
{
bool useMas = IsDlgButtonChecked(hDlg, IDC_PWD_CHECK1) == BST_CHECKED;
if (useMas) { CheckDlgButton(hDlg, IDC_PWD_CHECK3, useMas ? BST_UNCHECKED : BST_CHECKED); }
bool const useMas = IsDlgButtonChecked(hDlg, IDC_PWD_CHECK1) == BST_CHECKED;
if (useMas) { CheckDlgButton(hDlg, IDC_PWD_CHECK3, DlgBtnChk(!useMas)); }
}
break;

View File

@ -1380,7 +1380,7 @@ INT_PTR CALLBACK GetFilterDlgProc(HWND hwnd,UINT umsg,WPARAM wParam,LPARAM lPara
SendDlgItemMessage(hwnd,IDC_FILTER,EM_LIMITTEXT,COUNTOF(tchFilter)-1,0);
SetDlgItemText(hwnd,IDC_FILTER,tchFilter);
CheckDlgButton(hwnd,IDC_NEGFILTER,bNegFilter);
CheckDlgButton(hwnd,IDC_NEGFILTER, DlgBtnChk(bNegFilter));
CenterDlgInParent(hwnd);
}
@ -1466,14 +1466,14 @@ INT_PTR CALLBACK GetFilterDlgProc(HWND hwnd,UINT umsg,WPARAM wParam,LPARAM lPara
{
if (tchValue[1]) {
SetDlgItemText(hwnd,IDC_FILTER,&tchValue[1]);
CheckDlgButton(hwnd,IDC_NEGFILTER,TRUE);
CheckDlgButton(hwnd,IDC_NEGFILTER,BST_CHECKED);
}
else
MessageBeep(0);
}
else {
SetDlgItemText(hwnd,IDC_FILTER,tchValue);
CheckDlgButton(hwnd,IDC_NEGFILTER,FALSE);
CheckDlgButton(hwnd,IDC_NEGFILTER,BST_UNCHECKED);
}
}

View File

@ -79,6 +79,8 @@ void Toolbar_SetButtonImage(HWND,int,int);
LRESULT SendWMSize(HWND);
#define DlgBtnChk(b) ((b) ? BST_CHECKED : BST_UNCHECKED)
#define EnableCmd(hmenu,id,b) EnableMenuItem(hmenu,id,(b)\
?MF_BYCOMMAND|MF_ENABLED:MF_BYCOMMAND|MF_GRAYED)
@ -86,6 +88,9 @@ LRESULT SendWMSize(HWND);
?MF_BYCOMMAND|MF_CHECKED:MF_BYCOMMAND|MF_UNCHECKED)
#define Has_UTF16_LE_BOM(p) (*((UNALIGNED wchar_t*)(p)) == 0xFEFF)
#define Has_UTF16_BE_BOM(p) (*((UNALIGNED wchar_t*)(p)) == 0xFFFE) /* reverse */
#define GetLngString(id,pb,cb) LoadLngStringW((id),(pb),(cb))
#define GetLngStringA(id,pb,cb) LoadLngStringA((id),(pb),(cb))
int LoadLngStringW(UINT uID, LPWSTR lpBuffer, int nBufferMax);

View File

@ -191,7 +191,7 @@ static BOOL __fastcall _LngStrToMultiLngStr(WCHAR* pLngStr, WCHAR* pLngMultiStr,
if ((strLen > 0) && pLngMultiStr && (lngMultiStrSize > 0)) {
WCHAR* lngMultiStrPtr = pLngMultiStr;
WCHAR* last = pLngStr + (pLngStr[0] == 0xFEFF ? 1 : 0); // if read from unicode (UTF-16 LE) file
WCHAR* last = pLngStr + (Has_UTF16_LE_BOM(pLngStr) ? 1 : 0);
while (last && rtnVal) {
// make sure you validate the user input
WCHAR* next = StrNextTok(last, L",; :");

View File

@ -3,7 +3,7 @@
<assemblyIdentity
name="Notepad3"
processorArchitecture="*"
version="4.18.814.1056"
version="4.18.822.1067"
type="win32"
/>
<description>Notepad3 TinyExpr</description>

View File

@ -127,7 +127,7 @@ int MsgBoxLng(int iType, UINT uIdMsg, ...)
if (uIdMsg == IDS_MUI_ERR_LOADFILE || uIdMsg == IDS_MUI_ERR_SAVEFILE ||
uIdMsg == IDS_MUI_CREATEINI_FAIL || uIdMsg == IDS_MUI_WRITEINI_FAIL ||
uIdMsg == IDS_MUI_EXPORT_FAIL) {
LPVOID lpMsgBuf;
LPVOID lpMsgBuf = NULL;
WCHAR wcht;
FormatMessage(
FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS,
@ -137,10 +137,12 @@ int MsgBoxLng(int iType, UINT uIdMsg, ...)
(LPTSTR)&lpMsgBuf,
0,
NULL);
StrTrim(lpMsgBuf, L" \a\b\f\n\r\t\v");
StringCchCat(szText, COUNTOF(szText), L"\n");
StringCchCat(szText, COUNTOF(szText), lpMsgBuf);
LocalFree(lpMsgBuf);
if (lpMsgBuf) {
StrTrim(lpMsgBuf, L" \a\b\f\n\r\t\v");
StringCchCat(szText, COUNTOF(szText), L"\n");
StringCchCat(szText, COUNTOF(szText), lpMsgBuf);
LocalFree(lpMsgBuf);
}
wcht = *CharPrev(szText, StrEnd(szText));
if (IsCharAlphaNumeric(wcht) || wcht == '"' || wcht == '\'')
StringCchCat(szText, COUNTOF(szText), L".");
@ -1426,9 +1428,9 @@ INT_PTR CALLBACK FileMRUDlgProc(HWND hwnd,UINT umsg,WPARAM wParam,LPARAM lParam)
// Update view
SendMessage(hwnd,WM_COMMAND,MAKELONG(0x00A0,1),0);
CheckDlgButton(hwnd, IDC_SAVEMRU, g_bSaveRecentFiles ? BST_CHECKED : BST_UNCHECKED);
CheckDlgButton(hwnd, IDC_PRESERVECARET, g_bPreserveCaretPos ? BST_CHECKED : BST_UNCHECKED);
CheckDlgButton(hwnd, IDC_REMEMBERSEARCHPATTERN, g_bSaveFindReplace ? BST_CHECKED : BST_UNCHECKED);
CheckDlgButton(hwnd, IDC_SAVEMRU, DlgBtnChk(g_bSaveRecentFiles));
CheckDlgButton(hwnd, IDC_PRESERVECARET, DlgBtnChk(g_bPreserveCaretPos));
CheckDlgButton(hwnd, IDC_REMEMBERSEARCHPATTERN, DlgBtnChk(g_bSaveFindReplace));
//if (!g_bSaveRecentFiles) {
// DialogEnableWindow(hwnd,IDC_PRESERVECARET, false);
@ -2248,23 +2250,12 @@ INT_PTR CALLBACK SelectDefEncodingDlgProc(HWND hwnd,UINT umsg,WPARAM wParam,LPAR
Encoding_AddToComboboxEx(GetDlgItem(hwnd,IDC_ENCODINGLIST),pdd->idEncoding,0);
if (bUseDefaultForFileEncoding)
CheckDlgButton(hwnd, IDC_USEASREADINGFALLBACK, BST_CHECKED);
if (bSkipUnicodeDetection)
CheckDlgButton(hwnd,IDC_NOUNICODEDETECTION,BST_CHECKED);
if (bSkipANSICodePageDetection)
CheckDlgButton(hwnd, IDC_NOANSICPDETECTION, BST_CHECKED);
if (bLoadASCIIasUTF8)
CheckDlgButton(hwnd,IDC_ASCIIASUTF8,BST_CHECKED);
if (bLoadNFOasOEM)
CheckDlgButton(hwnd,IDC_NFOASOEM,BST_CHECKED);
if (bNoEncodingTags)
CheckDlgButton(hwnd,IDC_ENCODINGFROMFILEVARS,BST_CHECKED);
CheckDlgButton(hwnd, IDC_USEASREADINGFALLBACK, DlgBtnChk(bUseDefaultForFileEncoding));
CheckDlgButton(hwnd,IDC_NOUNICODEDETECTION, DlgBtnChk(bSkipUnicodeDetection));
CheckDlgButton(hwnd, IDC_NOANSICPDETECTION, DlgBtnChk(bSkipANSICodePageDetection));
CheckDlgButton(hwnd,IDC_ASCIIASUTF8, DlgBtnChk(bLoadASCIIasUTF8));
CheckDlgButton(hwnd,IDC_NFOASOEM, DlgBtnChk(bLoadNFOasOEM));
CheckDlgButton(hwnd,IDC_ENCODINGFROMFILEVARS, DlgBtnChk(bNoEncodingTags));
CenterDlgInParent(hwnd);
}
@ -2281,12 +2272,12 @@ INT_PTR CALLBACK SelectDefEncodingDlgProc(HWND hwnd,UINT umsg,WPARAM wParam,LPAR
EndDialog(hwnd,IDCANCEL);
}
else {
bUseDefaultForFileEncoding = (IsDlgButtonChecked(hwnd, IDC_USEASREADINGFALLBACK) == BST_CHECKED) ? 1 : 0;
bSkipUnicodeDetection = (IsDlgButtonChecked(hwnd,IDC_NOUNICODEDETECTION) == BST_CHECKED) ? 1 : 0;
bSkipANSICodePageDetection = (IsDlgButtonChecked(hwnd, IDC_NOANSICPDETECTION) == BST_CHECKED) ? 1 : 0;
bLoadASCIIasUTF8 = (IsDlgButtonChecked(hwnd,IDC_ASCIIASUTF8) == BST_CHECKED) ? 1 : 0;
bLoadNFOasOEM = (IsDlgButtonChecked(hwnd,IDC_NFOASOEM) == BST_CHECKED) ? 1 : 0;
bNoEncodingTags = (IsDlgButtonChecked(hwnd,IDC_ENCODINGFROMFILEVARS) == BST_CHECKED) ? 1 : 0;
bUseDefaultForFileEncoding = (IsDlgButtonChecked(hwnd, IDC_USEASREADINGFALLBACK) == BST_CHECKED);
bSkipUnicodeDetection = (IsDlgButtonChecked(hwnd,IDC_NOUNICODEDETECTION) == BST_CHECKED);
bSkipANSICodePageDetection = (IsDlgButtonChecked(hwnd, IDC_NOANSICPDETECTION) == BST_CHECKED);
bLoadASCIIasUTF8 = (IsDlgButtonChecked(hwnd,IDC_ASCIIASUTF8) == BST_CHECKED);
bLoadNFOasOEM = (IsDlgButtonChecked(hwnd,IDC_NFOASOEM) == BST_CHECKED);
bNoEncodingTags = (IsDlgButtonChecked(hwnd,IDC_ENCODINGFROMFILEVARS) == BST_CHECKED);
EndDialog(hwnd,IDOK);
}
}
@ -2577,11 +2568,8 @@ INT_PTR CALLBACK SelectDefLineEndingDlgProc(HWND hwnd,UINT umsg,WPARAM wParam,LP
SendDlgItemMessage(hwnd,100,CB_SETCURSEL,(WPARAM)*piOption,0);
SendDlgItemMessage(hwnd,100,CB_SETEXTENDEDUI,true,0);
if (bFixLineEndings)
CheckDlgButton(hwnd,IDC_CONSISTENTEOLS,BST_CHECKED);
if (bAutoStripBlanks)
CheckDlgButton(hwnd,IDC_AUTOSTRIPBLANKS, BST_CHECKED);
CheckDlgButton(hwnd,IDC_CONSISTENTEOLS, DlgBtnChk(bFixLineEndings));
CheckDlgButton(hwnd,IDC_AUTOSTRIPBLANKS, DlgBtnChk(bAutoStripBlanks));
CenterDlgInParent(hwnd);
}
@ -2593,8 +2581,8 @@ INT_PTR CALLBACK SelectDefLineEndingDlgProc(HWND hwnd,UINT umsg,WPARAM wParam,LP
{
case IDOK: {
*piOption = (int)SendDlgItemMessage(hwnd,100,CB_GETCURSEL,0,0);
bFixLineEndings = (IsDlgButtonChecked(hwnd,IDC_CONSISTENTEOLS) == BST_CHECKED) ? 1 : 0;
bAutoStripBlanks = (IsDlgButtonChecked(hwnd,IDC_AUTOSTRIPBLANKS) == BST_CHECKED) ? 1 : 0;
bFixLineEndings = (IsDlgButtonChecked(hwnd,IDC_CONSISTENTEOLS) == BST_CHECKED);
bAutoStripBlanks = (IsDlgButtonChecked(hwnd,IDC_AUTOSTRIPBLANKS) == BST_CHECKED);
EndDialog(hwnd,IDOK);
}
break;

View File

@ -96,6 +96,7 @@ extern int g_iDefaultCharSet;
extern bool bLoadASCIIasUTF8;
extern bool bForceLoadASCIIasUTF8;
extern bool bLoadNFOasOEM;
extern bool bNoEncodingTags;
extern bool g_bAccelWordNavigation;
@ -1020,31 +1021,43 @@ bool EditLoadFile(
return false;
}
bool bPreferOEM = false;
bool bNfoDizDetected = false;
if (bLoadNFOasOEM)
{
if (lpszExt && !(StringCchCompareIX(lpszExt,L".nfo") && StringCchCompareIX(lpszExt,L".diz")))
bPreferOEM = true;
bNfoDizDetected = true;
}
size_t const cbNbytes4Analysis = (cbData < 200000L) ? cbData : 200000L;
int iPreferedEncoding = (bPreferOEM) ? g_DOSEncoding :
int iPreferedEncoding = (bNfoDizDetected) ? g_DOSEncoding :
((bUseDefaultForFileEncoding || (cbNbytes4Analysis == 0)) ? g_iDefaultNewFileEncoding : CPI_ANSI_DEFAULT);
// --------------------------------------------------------------------------
bool bIsReliable = false;
int iAnalyzedEncoding = (bSkipANSICPDetection && !g_bForceCompEncDetection) ? CPI_NONE :
Encoding_Analyze(lpData, cbNbytes4Analysis, iPreferedEncoding, &bIsReliable);
if (iAnalyzedEncoding == CPI_ASCII_7BIT) {
iAnalyzedEncoding = bLoadASCIIasUTF8 ? CPI_UTF8 : iPreferedEncoding; // stay on prefered
int iAnalyzedEncoding = Encoding_Analyze(lpData, cbNbytes4Analysis, iPreferedEncoding, &bIsReliable);
if (!g_bForceCompEncDetection)
{
bool const bIsUnicode = Encoding_IsUTF8(iAnalyzedEncoding) || Encoding_IsUNICODE(iAnalyzedEncoding);
if (iAnalyzedEncoding == CPI_ASCII_7BIT) {
iAnalyzedEncoding = bLoadASCIIasUTF8 ? CPI_UTF8 : iPreferedEncoding; // stay on prefered
}
else {
if ((bSkipUTFDetection && bIsUnicode) || (bSkipANSICPDetection && !bIsUnicode)) {
iAnalyzedEncoding = CPI_NONE;
}
}
}
// --------------------------------------------------------------------------
int iForcedEncoding = bForceLoadASCIIasUTF8 ? CPI_UTF8 : Encoding_SrcCmdLn(CPI_GET);
if (Encoding_IsNONE(iForcedEncoding) && bNfoDizDetected) {
iForcedEncoding = g_DOSEncoding;
}
if (g_bForceCompEncDetection && !Encoding_IsNONE(iAnalyzedEncoding)) {
iForcedEncoding = iAnalyzedEncoding; // no bIsReliable check (forced)
iForcedEncoding = iAnalyzedEncoding; // no bIsReliable check (forced unreliable detection)
}
// --------------------------------------------------------------------------
@ -1054,13 +1067,10 @@ bool EditLoadFile(
if (!Encoding_IsNONE(iForcedEncoding)) {
iPreferedEncoding = iForcedEncoding;
}
else if (Encoding_IsUNICODE(iAnalyzedEncoding) && !bSkipUTFDetection) {
iPreferedEncoding = iAnalyzedEncoding;
}
else if (iFileEncWeak != CPI_NONE) {
iPreferedEncoding = iFileEncWeak;
}
else if (!Encoding_IsNONE(iAnalyzedEncoding) && bIsReliable ) {
else if (!Encoding_IsNONE(iAnalyzedEncoding) && bIsReliable) {
iPreferedEncoding = iAnalyzedEncoding;
}
else if (Encoding_IsNONE(iPreferedEncoding)) {
@ -1077,48 +1087,35 @@ bool EditLoadFile(
if (cbData == 0) {
FileVars_Init(NULL,0,&fvCurFile);
*iEOLMode = iLineEndings[g_iDefaultEOLMode];
if (iForcedEncoding == CPI_NONE) {
if (bLoadASCIIasUTF8 && !bPreferOEM)
*iEncoding = CPI_UTF8;
else
*iEncoding = iPreferedEncoding;
}
else
*iEncoding = iForcedEncoding;
*iEncoding = !Encoding_IsNONE(iForcedEncoding) ? iForcedEncoding : (bLoadASCIIasUTF8 ? CPI_UTF8 : iPreferedEncoding);
EditSetNewText(hwnd,"",0);
SendMessage(hwnd,SCI_SETEOLMODE,iLineEndings[g_iDefaultEOLMode],0);
FreeMem(lpData);
}
// === UNICODE ===
else if (Encoding_IsUNICODE(iForcedEncoding) ||
(!bSkipUTFDetection && !bIsUTF8Sig
&& Encoding_IsNONE(iForcedEncoding)
&& (IsUnicode(lpData, cbData, &bBOM, &bReverse)
|| (Encoding_IsUNICODE(iAnalyzedEncoding) && bIsReliable)
)
(Encoding_IsNONE(iForcedEncoding) && !bSkipUTFDetection && !bIsUTF8Sig
&& (IsValidUnicode(lpData, cbData, &bBOM, &bReverse)
|| (Encoding_IsUNICODE(iAnalyzedEncoding) && bIsReliable))
)
)
{
if (iForcedEncoding == CPI_UNICODE) {
bBOM = (*((UNALIGNED PWCHAR)lpData) == 0xFEFF);
bBOM = Has_UTF16_LE_BOM(lpData);
bReverse = false;
}
else if (iForcedEncoding == CPI_UNICODEBE)
bBOM = (*((UNALIGNED PWCHAR)lpData) == 0xFFFE);
else if (iForcedEncoding == CPI_UNICODEBE) {
bBOM = Has_UTF16_BE_BOM(lpData);
bReverse = true;
}
if (iForcedEncoding == CPI_UNICODEBE || bReverse) {
if (bReverse)
{
_swab(lpData,lpData,cbData);
if (bBOM)
*iEncoding = CPI_UNICODEBEBOM;
else
*iEncoding = CPI_UNICODEBE;
*iEncoding = (bBOM ? CPI_UNICODEBEBOM : CPI_UNICODEBE);
}
else {
if (bBOM)
*iEncoding = CPI_UNICODEBOM;
else
*iEncoding = CPI_UNICODE;
*iEncoding = (bBOM ? CPI_UNICODEBOM : CPI_UNICODE);
}
char* lpDataUTF8 = AllocMem((cbData * 3) + 2, HEAP_ZERO_MEMORY);
@ -1155,17 +1152,16 @@ bool EditLoadFile(
FileVars_Init(lpData,cbData,&fvCurFile);
// === UTF-8 ===
if (Encoding_IsUTF8(iForcedEncoding) ||
(!bSkipUTFDetection && !FileVars_IsNonUTF8(&fvCurFile)
&& Encoding_IsNONE(iForcedEncoding)
&& (bIsUTF8Sig
|| FileVars_IsUTF8(&fvCurFile)
|| (Encoding_IsUTF8(iAnalyzedEncoding) && bIsReliable)
|| (!bPreferOEM && (Encoding_IsUTF8(iPreferedEncoding) || bLoadASCIIasUTF8))
)
&& (IsUTF8(lpData, cbData) && !UTF8_ContainsInvalidChars(lpData, cbData))
)
)
bool const bHardRulesUTF8 = Encoding_IsUTF8(iForcedEncoding) || (FileVars_IsUTF8(&fvCurFile) && !bNoEncodingTags);
bool const bForcedNonUTF8 = !Encoding_IsNONE(iForcedEncoding) && !Encoding_IsUTF8(iForcedEncoding);
bool const bValidUTF8 = IsValidUTF8(lpData, cbData);
bool const bAnalysisUTF8 = Encoding_IsUTF8(iAnalyzedEncoding) && bIsReliable;
bool const bSoftHintUTF8 = (Encoding_IsUTF8(iPreferedEncoding) || bLoadASCIIasUTF8);
bool const bRejectUTF8 = bSkipUTFDetection || bForcedNonUTF8 || (FileVars_IsNonUTF8(&fvCurFile) && !bNoEncodingTags);
if (bHardRulesUTF8 || (!bRejectUTF8 && bValidUTF8 && (bIsUTF8Sig || bAnalysisUTF8 || bSoftHintUTF8)))
{
EditSetNewText(hwnd,"",0);
if (bIsUTF8Sig) {
@ -1187,17 +1183,14 @@ bool EditLoadFile(
*iEncoding = iForcedEncoding;
else {
*iEncoding = FileVars_GetEncoding(&fvCurFile);
if (Encoding_IsNONE(*iEncoding)) {
if (fvCurFile.mask & FV_ENCODING)
*iEncoding = CPI_ANSI_DEFAULT;
else {
*iEncoding = iPreferedEncoding;
}
if (Encoding_IsNONE(*iEncoding))
{
*iEncoding = ((fvCurFile.mask & FV_ENCODING) ? CPI_ANSI_DEFAULT : iPreferedEncoding);
}
}
if (((Encoding_GetCodePage(*iEncoding) != CP_UTF7) && Encoding_IsEXTERNAL_8BIT(*iEncoding)) ||
((Encoding_GetCodePage(*iEncoding) == CP_UTF7) && IsUTF7(lpData,cbData))) {
((Encoding_GetCodePage(*iEncoding) == CP_UTF7) && IsValidUTF7(lpData,cbData))) {
UINT uCodePage = Encoding_GetCodePage(*iEncoding);
@ -5519,14 +5512,14 @@ INT_PTR CALLBACK EditFindReplaceDlgProcW(HWND hwnd,UINT umsg,WPARAM wParam,LPARA
{
DialogEnableWindow(hwnd, IDC_DOT_MATCH_ALL, true);
CheckDlgButton(hwnd, IDC_WILDCARDSEARCH, BST_UNCHECKED); // Can not use wildcard search together with regexp
CheckDlgButton(hwnd, IDC_FINDTRANSFORMBS, (bSaveTFBackSlashes) ? BST_CHECKED : BST_UNCHECKED);
CheckDlgButton(hwnd, IDC_FINDTRANSFORMBS, DlgBtnChk(bSaveTFBackSlashes));
CheckDlgButton(hwnd, IDC_FINDTRANSFORMBS, BST_CHECKED); // transform BS handled by regex
DialogEnableWindow(hwnd, IDC_FINDTRANSFORMBS, false);
}
else { // unchecked
DialogEnableWindow(hwnd, IDC_DOT_MATCH_ALL, false);
DialogEnableWindow(hwnd, IDC_FINDTRANSFORMBS, true);
CheckDlgButton(hwnd, IDC_FINDTRANSFORMBS, (bSaveTFBackSlashes) ? BST_CHECKED : BST_UNCHECKED);
CheckDlgButton(hwnd, IDC_FINDTRANSFORMBS, DlgBtnChk(bSaveTFBackSlashes));
}
_SetSearchFlags(hwnd, sg_pefrData);
_DelayMarkAll(hwnd, 0, s_InitialSearchStart);
@ -5542,13 +5535,13 @@ INT_PTR CALLBACK EditFindReplaceDlgProcW(HWND hwnd,UINT umsg,WPARAM wParam,LPARA
{
CheckDlgButton(hwnd, IDC_FINDREGEXP, BST_UNCHECKED);
DialogEnableWindow(hwnd, IDC_DOT_MATCH_ALL, false);
CheckDlgButton(hwnd, IDC_FINDTRANSFORMBS, (bSaveTFBackSlashes) ? BST_CHECKED : BST_UNCHECKED);
CheckDlgButton(hwnd, IDC_FINDTRANSFORMBS, DlgBtnChk(bSaveTFBackSlashes));
CheckDlgButton(hwnd, IDC_FINDTRANSFORMBS, BST_CHECKED); // transform BS handled by regex
DialogEnableWindow(hwnd, IDC_FINDTRANSFORMBS, false);
}
else { // unchecked
DialogEnableWindow(hwnd, IDC_FINDTRANSFORMBS, true);
CheckDlgButton(hwnd, IDC_FINDTRANSFORMBS, (bSaveTFBackSlashes) ? BST_CHECKED : BST_UNCHECKED);
CheckDlgButton(hwnd, IDC_FINDTRANSFORMBS, DlgBtnChk(bSaveTFBackSlashes));
}
_SetSearchFlags(hwnd, sg_pefrData);
_DelayMarkAll(hwnd, 0, s_InitialSearchStart);

View File

@ -587,51 +587,50 @@ const char* Encoding_GetParseNames(int iEncoding) {
// ============================================================================
bool IsUnicode(const char* pBuffer, size_t cb, bool* lpbBOM, bool* lpbReverse) {
int i = 0xFFFF;
bool IsValidUnicode(const char* pBuffer, size_t cb, bool* lpbBOM, bool* lpbReverse)
{
if (!pBuffer || cb < 2) { return false; }
bool bIsTextUnicode;
// IS_TEXT_UNICODE_UNICODE_MASK -> IS_TEXT_UNICODE_ASCII16, IS_TEXT_UNICODE_STATISTICS, IS_TEXT_UNICODE_CONTROLS, IS_TEXT_UNICODE_SIGNATURE.
// IS_TEXT_UNICODE_REVERSE_MASK -> IS_TEXT_UNICODE_REVERSE_ASCII16, IS_TEXT_UNICODE_REVERSE_STATISTICS, IS_TEXT_UNICODE_REVERSE_CONTROLS, IS_TEXT_UNICODE_REVERSE_SIGNATURE.
// IS_TEXT_UNICODE_NOT_UNICODE_MASK -> IS_TEXT_UNICODE_ILLEGAL_CHARS, IS_TEXT_UNICODE_ODD_LENGTH, and two currently unused bit flags.
// IS_TEXT_UNICODE_NOT_ASCII_MASK -> IS_TEXT_UNICODE_NULL_BYTES and three currently unused bit flags.
//
int const iAllTests = IS_TEXT_UNICODE_UNICODE_MASK | IS_TEXT_UNICODE_REVERSE_MASK | IS_TEXT_UNICODE_NOT_UNICODE_MASK | IS_TEXT_UNICODE_NOT_ASCII_MASK;
bool bHasBOM;
bool bHasRBOM;
int iTest = iAllTests;
(void) IsTextUnicode(pBuffer, (int)cb, &iTest);
if (iTest == iAllTests) {
iTest = 0; // iTest doesn't seem to have been modified ...
}
if (!pBuffer || cb < 2)
return false;
bool const bHasBOM = Has_UTF16_LE_BOM(pBuffer) && (iTest & IS_TEXT_UNICODE_SIGNATURE);
bool const bHasRBOM = Has_UTF16_BE_BOM(pBuffer) && (iTest & IS_TEXT_UNICODE_REVERSE_SIGNATURE);
bIsTextUnicode = IsTextUnicode(pBuffer, (int)cb, &i);
bool const bIsUnicode = (iTest & IS_TEXT_UNICODE_UNICODE_MASK);
bool const bIsReverse = (iTest & IS_TEXT_UNICODE_REVERSE_MASK);
bool const bIsIllegal = (iTest & IS_TEXT_UNICODE_NOT_UNICODE_MASK);
bHasBOM = (*((UNALIGNED PWCHAR)pBuffer) == 0xFEFF);
bHasRBOM = (*((UNALIGNED PWCHAR)pBuffer) == 0xFFFE);
if (i == 0xFFFF) // i doesn't seem to have been modified ...
i = 0;
if (bIsTextUnicode || bHasBOM || bHasRBOM ||
((i & (IS_TEXT_UNICODE_UNICODE_MASK | IS_TEXT_UNICODE_REVERSE_MASK)) &&
!((i & IS_TEXT_UNICODE_UNICODE_MASK) && (i & IS_TEXT_UNICODE_REVERSE_MASK)) &&
!(i & IS_TEXT_UNICODE_ODD_LENGTH) &&
!(i & IS_TEXT_UNICODE_ILLEGAL_CHARS && !(i & IS_TEXT_UNICODE_REVERSE_SIGNATURE)) &&
!((i & IS_TEXT_UNICODE_REVERSE_MASK) == IS_TEXT_UNICODE_REVERSE_STATISTICS))) {
if (lpbBOM)
*lpbBOM = (bHasBOM || bHasRBOM ||
(i & (IS_TEXT_UNICODE_SIGNATURE | IS_TEXT_UNICODE_REVERSE_SIGNATURE)))
? true : false;
if (lpbReverse)
*lpbReverse = (bHasRBOM || (i & IS_TEXT_UNICODE_REVERSE_MASK)) ? true : false;
//bool const bHasNullBytes = (iTest & IS_TEXT_UNICODE_NULL_BYTES);
if (bHasBOM || bHasRBOM || ((bIsUnicode || bIsReverse) && !bIsIllegal && !(bIsUnicode && bIsReverse)))
{
if (lpbBOM) {
*lpbBOM = (bHasBOM || bHasRBOM);
}
if (lpbReverse) {
*lpbReverse = (bHasRBOM || bIsReverse);
}
return true;
}
else
return false;
return false;
}
// ============================================================================
bool IsUTF7(const char* pTest, size_t nLength) {
bool IsValidUTF7(const char* pTest, size_t nLength) {
const char *pt = pTest;
for (size_t i = 0; i < nLength; i++) {
@ -639,7 +638,6 @@ bool IsUTF7(const char* pTest, size_t nLength) {
return false;
pt++;
}
return true;
}
// ============================================================================
@ -649,7 +647,124 @@ bool IsUTF7(const char* pTest, size_t nLength) {
//#define _OLD_UTF8_VALIDATOR_ 1
#ifdef _OLD_UTF8_VALIDATOR_
bool IsUTF8(const char* pTest, size_t nLength)
// ============================================================================
/* byte length of UTF-8 sequence based on value of first byte.
for UTF-16 (21-bit space), max. code length is 4, so we only need to look
at 4 upper bits.
*/
static const size_t utf8_lengths[16] =
{
1,1,1,1,1,1,1,1, /* 0000 to 0111 : 1 byte (plain ASCII) */
0,0,0,0, /* 1000 to 1011 : not valid */
2,2, /* 1100, 1101 : 2 bytes */
3, /* 1110 : 3 bytes */
4 /* 1111 : 4 bytes */
};
// ----------------------------------------------------------------------------
/*++
Function :
UTF8_mbslen_bytes [INTERNAL]
Calculates the byte size of a NULL-terminated UTF-8 string.
Parameters :
char *utf8_string : string to examine
Return value :
size (in bytes) of a NULL-terminated UTF-8 string.
-1 if invalid NULL-terminated UTF-8 string
--*/
size_t __fastcall UTF8_mbslen_bytes(LPCSTR utf8_string)
{
size_t length = 0;
size_t code_size;
BYTE byte;
while (*utf8_string)
{
byte = (BYTE)*utf8_string;
if ((byte <= 0xF7) && (0 != (code_size = utf8_lengths[byte >> 4]))) {
length += code_size;
utf8_string += code_size;
}
else {
/* we got an invalid byte value but need to count it,
it will be later ignored during the string conversion */
//WARN("invalid first byte value 0x%02X in UTF-8 sequence!\n",byte);
length++;
utf8_string++;
}
}
length++; /* include NULL terminator */
return length;
}
// ----------------------------------------------------------------------------
/*++
Function :
UTF8_mbslen [INTERNAL]
Calculates the character size of a NULL-terminated UTF-8 string.
Parameters :
char *utf8_string : string to examine
int byte_length : byte size of string
Return value :
size (in characters) of a UTF-8 string.
-1 if invalid UTF-8 string
--*/
size_t __fastcall UTF8_mbslen(LPCSTR utf8_string, size_t byte_length)
{
size_t wchar_length = 0;
size_t code_size;
BYTE byte;
while (byte_length > 0) {
byte = (BYTE)*utf8_string;
/* UTF-16 can't encode 5-byte and 6-byte sequences, so maximum value
for first byte is 11110111. Use lookup table to determine sequence
length based on upper 4 bits of first byte */
if ((byte <= 0xF7) && (0 != (code_size = utf8_lengths[byte >> 4]))) {
/* 1 sequence == 1 character */
wchar_length++;
if (code_size == 4)
wchar_length++;
utf8_string += code_size; /* increment pointer */
byte_length -= code_size; /* decrement counter*/
}
else {
/*
unlike UTF8_mbslen_bytes, we ignore the invalid characters.
we only report the number of valid characters we have encountered
to match the Windows behavior.
*/
//WARN("invalid byte 0x%02X in UTF-8 sequence, skipping it!\n", byte);
utf8_string++;
byte_length--;
}
}
return wchar_length;
}
// ----------------------------------------------------------------------------
bool __fastcall UTF8_ContainsInvalidChars(LPCSTR utf8_string, size_t byte_length)
{
return ((UTF8_mbslen_bytes(UTF8StringStart(utf8_string)) - 1) !=
UTF8_mbslen(UTF8StringStart(utf8_string), IsUTF8Signature(utf8_string) ? (byte_length - 3) : byte_length));
}
// ----------------------------------------------------------------------------
bool IsValidUTF8(const char* pTest, size_t nLength)
{
static int byte_class_table[256] = {
/* 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F */
@ -706,121 +821,7 @@ bool IsUTF8(const char* pTest, size_t nLength)
break;
}
return (current == kSTART) ? true : false;
}
// ============================================================================
/* byte length of UTF-8 sequence based on value of first byte.
for UTF-16 (21-bit space), max. code length is 4, so we only need to look
at 4 upper bits.
*/
static const size_t utf8_lengths[16] =
{
1,1,1,1,1,1,1,1, /* 0000 to 0111 : 1 byte (plain ASCII) */
0,0,0,0, /* 1000 to 1011 : not valid */
2,2, /* 1100, 1101 : 2 bytes */
3, /* 1110 : 3 bytes */
4 /* 1111 : 4 bytes */
};
// ----------------------------------------------------------------------------
/*++
Function :
UTF8_mbslen_bytes [INTERNAL]
Calculates the byte size of a NULL-terminated UTF-8 string.
Parameters :
char *utf8_string : string to examine
Return value :
size (in bytes) of a NULL-terminated UTF-8 string.
-1 if invalid NULL-terminated UTF-8 string
--*/
size_t UTF8_mbslen_bytes(LPCSTR utf8_string)
{
size_t length = 0;
size_t code_size;
BYTE byte;
while (*utf8_string)
{
byte = (BYTE)*utf8_string;
if ((byte <= 0xF7) && (0 != (code_size = utf8_lengths[byte >> 4]))) {
length += code_size;
utf8_string += code_size;
}
else {
/* we got an invalid byte value but need to count it,
it will be later ignored during the string conversion */
//WARN("invalid first byte value 0x%02X in UTF-8 sequence!\n",byte);
length++;
utf8_string++;
}
}
length++; /* include NULL terminator */
return length;
}
// ----------------------------------------------------------------------------
/*++
Function :
UTF8_mbslen [INTERNAL]
Calculates the character size of a NULL-terminated UTF-8 string.
Parameters :
char *utf8_string : string to examine
int byte_length : byte size of string
Return value :
size (in characters) of a UTF-8 string.
-1 if invalid UTF-8 string
--*/
size_t UTF8_mbslen(LPCSTR utf8_string, size_t byte_length)
{
size_t wchar_length = 0;
size_t code_size;
BYTE byte;
while (byte_length > 0) {
byte = (BYTE)*utf8_string;
/* UTF-16 can't encode 5-byte and 6-byte sequences, so maximum value
for first byte is 11110111. Use lookup table to determine sequence
length based on upper 4 bits of first byte */
if ((byte <= 0xF7) && (0 != (code_size = utf8_lengths[byte >> 4]))) {
/* 1 sequence == 1 character */
wchar_length++;
if (code_size == 4)
wchar_length++;
utf8_string += code_size; /* increment pointer */
byte_length -= code_size; /* decrement counter*/
}
else {
/*
unlike UTF8_mbslen_bytes, we ignore the invalid characters.
we only report the number of valid characters we have encountered
to match the Windows behavior.
*/
//WARN("invalid byte 0x%02X in UTF-8 sequence, skipping it!\n", byte);
utf8_string++;
byte_length--;
}
}
return wchar_length;
}
// ----------------------------------------------------------------------------
bool UTF8_ContainsInvalidChars(LPCSTR utf8_string, size_t byte_length)
{
return ((UTF8_mbslen_bytes(UTF8StringStart(utf8_string)) - 1) !=
UTF8_mbslen(UTF8StringStart(utf8_string), IsUTF8Signature(utf8_string) ? (byte_length - 3) : byte_length));
return (current == kSTART) && !UTF8_ContainsInvalidChars(pTest, nLength);
}
@ -832,17 +833,13 @@ bool UTF8_ContainsInvalidChars(LPCSTR utf8_string, size_t byte_length)
// Copyright (c) 2008-2010 Bjoern Hoehrmann <bjoern@hoehrmann.de>
// See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
enum {
UTF8_ACCEPT = 0,
UTF8_REJECT = 12,
UTF8_NOTEST = 113
};
static UINT s_State = UTF8_NOTEST;
bool IsUTF8(const char* pTest, size_t nLength)
bool IsValidUTF8(const char* pTest, size_t nLength)
{
enum {
UTF8_ACCEPT = 0,
UTF8_REJECT = 12
};
static const unsigned char utf8_dfa[] = {
// The first part of the table maps bytes to character classes that
// to reduce the size of the transition table and create bitmasks.
@ -867,32 +864,16 @@ bool IsUTF8(const char* pTest, size_t nLength)
const unsigned char *pt = (const unsigned char *)pTest;
const unsigned char *end = pt + nLength;
s_State = UTF8_ACCEPT;
UINT state = UTF8_ACCEPT;
while (pt < end && *pt) {
s_State = utf8_dfa[256 + s_State + utf8_dfa[*pt++]];
if (s_State == UTF8_REJECT) {
state = utf8_dfa[256 + state + utf8_dfa[*pt++]];
if (state == UTF8_REJECT) {
return false;
}
}
return (s_State == UTF8_ACCEPT);
return (state == UTF8_ACCEPT);
}
// ----------------------------------------------------------------------------
bool UTF8_ContainsInvalidChars(LPCSTR utf8_string, size_t byte_length)
{
bool result = true;
if (s_State != UTF8_NOTEST) {
result = (s_State == UTF8_REJECT);
}
else {
result = IsUTF8(utf8_string, byte_length);
}
s_State = UTF8_NOTEST; // reset: old way, call IsUTF8() before
return result;
}
// ----------------------------------------------------------------------------
#endif

View File

@ -122,16 +122,15 @@ void Encoding_SetDefaultFlag(int);
const WCHAR* Encoding_GetLabel(int);
const char* Encoding_GetParseNames(int);
bool IsUnicode(const char*, size_t, bool*, bool*);
bool IsUTF8(const char*, size_t);
bool IsUTF7(const char*, size_t);
#define IsUTF8Signature(p) ((*((p)+0) == '\xEF' && *((p)+1) == '\xBB' && *((p)+2) == '\xBF'))
#define UTF8StringStart(p) (IsUTF8Signature(p)) ? ((p)+3) : (p)
size_t UTF8_mbslen_bytes(LPCSTR utf8_string);
size_t UTF8_mbslen(LPCSTR utf8_string, size_t byte_length);
bool UTF8_ContainsInvalidChars(LPCSTR utf8_string, size_t byte_length);
#define Has_UTF16_LE_BOM(p) (*((UNALIGNED wchar_t*)(p)) == 0xFEFF)
#define Has_UTF16_BE_BOM(p) (*((UNALIGNED wchar_t*)(p)) == 0xFFFE) /* reverse */
bool IsValidUnicode(const char*, size_t, bool*, bool*);
bool IsValidUTF7(const char*, size_t);
bool IsValidUTF8(const char*, size_t);
// Google's "Compact Encoding Detection"
extern NP2ENCODING g_Encodings[];

View File

@ -117,9 +117,9 @@ extern "C" {
#define ENC_PARSE_NAM_ISO_2022_JP "ISO-2022-jp,iso2022jp,"
#define ENC_PARSE_NAM_ISO_2022_KR "ISO-2022-kr,iso2022kr,csiso2022kr,"
#define ENC_PARSE_NAM_X_CHINESE_CNS "x-Chinese-CNS,xchinesecns,"
#define ENC_PARSE_NAM_JOHAB "johab,johab,"
#define ENC_PARSE_NAM_ISO_8859_10 "ISO-8859-10,iso885910,Windows-28600,Windows28600,"
#define ENC_PARSE_NAM_BIG5_HKSCS "big5hkscs,cnbig5hkscs,xxbig5hkscs,"
//#define ENC_PARSE_NAM_JOHAB "johab,johab,"
//#define ENC_PARSE_NAM_ISO_8859_10 "ISO-8859-10,iso885910,Windows-28600,Windows28600,"
//#define ENC_PARSE_NAM_BIG5_HKSCS "big5hkscs,cnbig5hkscs,xxbig5hkscs,"
//=============================================================================
@ -204,10 +204,10 @@ extern "C" NP2ENCODING g_Encodings[] = {
/* 076 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 52936, ENC_PARSE_NAM_HZ_GB2312, IDS_ENC_HZ_GB2312, HZ_GB_2312, L"" }, // Chinese Simplified (HZ-GB2312)
/* 077 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 50220, ENC_PARSE_NAM_ISO_2022_JP, IDS_ENC_ISO_2022_JP, KDDI_ISO_2022_JP, L"" }, // Japanese (JIS)
/* 078 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 50225, ENC_PARSE_NAM_ISO_2022_KR, IDS_ENC_ISO_2022_KR, ISO_2022_KR, L"" }, // Korean (ISO-2022-KR)
/* 079 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 20000, ENC_PARSE_NAM_X_CHINESE_CNS, IDS_ENC_X_CHINESE_CNS, CHINESE_CNS, L"" }, // Chinese Traditional (CNS)
/* 080 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 1361, ENC_PARSE_NAM_JOHAB, IDS_ENC_JOHAB, CED_NO_MAPPING, L"" }, // Korean (Johab)
/* 081 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 28600, ENC_PARSE_NAM_ISO_8859_10, IDS_ENC_ISO_8859_10, ISO_8859_10, L"" }, // Nordic (ISO 8859-10)
/* 082 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 951, ENC_PARSE_NAM_BIG5_HKSCS, IDS_ENC_BIG5_HKSCS, BIG5_HKSCS, L"" } // Chinese (Hong Kong Supplementary Character Set)
/* 079 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 20000, ENC_PARSE_NAM_X_CHINESE_CNS, IDS_ENC_X_CHINESE_CNS, CHINESE_CNS, L"" } // Chinese Traditional (CNS)
///* 080 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 1361, ENC_PARSE_NAM_JOHAB, IDS_ENC_JOHAB, CED_NO_MAPPING, L"" }, // Korean (Johab)
///* 081 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 28600, ENC_PARSE_NAM_ISO_8859_10, IDS_ENC_ISO_8859_10, ISO_8859_10, L"" }, // Nordic (ISO 8859-10)
///* 082 */{ NCP_EXTERNAL_8BIT | NCP_RECODE, 951, ENC_PARSE_NAM_BIG5_HKSCS, IDS_ENC_BIG5_HKSCS, BIG5_HKSCS, L"" } // Chinese (Hong Kong Supplementary Character Set)
#if 0
@ -322,6 +322,7 @@ static int __fastcall FindCodePage(const Encoding& encoding)
iCodePage = 1250;
break;
case ISO_8859_4:
case ISO_8859_10:
iCodePage = 1257;
break;
case ISO_8859_5:
@ -375,7 +376,7 @@ static int __fastcall FindCodePage(const Encoding& encoding)
static int __fastcall MapEncoding2CPI(const Encoding& encoding, bool* pIsReliable)
static int __fastcall MapEncoding2CPI(const char* const text, const size_t len, const Encoding& encoding, bool* pIsReliable)
{
int iNP3Encoding = CPI_NONE;
@ -399,24 +400,43 @@ static int __fastcall MapEncoding2CPI(const Encoding& encoding, bool* pIsReliabl
}
}
// postrocessing: not found, guess a mapping:
// === special Unicode analysis ===
switch (encoding)
{
case UNICODE:
iNP3Encoding = CPI_UNICODE;
case UTF16LE:
case UTF16BE:
{
bool bBOM;
bool bReverse;
if (IsValidUnicode(text, len, &bBOM, &bReverse)) {
iNP3Encoding = bBOM ? (bReverse ? CPI_UNICODEBEBOM : CPI_UNICODEBOM) : (bReverse ? CPI_UNICODEBE : CPI_UNICODE);
}
}
break;
case UTF8UTF8:
iNP3Encoding = CPI_UTF8;
break;
case UTF32BE:
iNP3Encoding = CPI_UTF32BE;
break;
case UTF32LE:
iNP3Encoding = CPI_UTF32;
break;
default:
break;
}
// === postrocessing: not found, guess a mapping: ===
if (iNP3Encoding == CPI_NONE)
{
switch (encoding) {
case UNICODE:
iNP3Encoding = CPI_UNICODE;
break;
case UTF8UTF8:
iNP3Encoding = CPI_UTF8;
break;
case UTF32BE:
iNP3Encoding = CPI_UTF32BE;
break;
case UTF32LE:
iNP3Encoding = CPI_UTF32;
break;
switch (encoding)
{
case ISO_8859_10:
iNP3Encoding = CPI_NONE;
break;
@ -498,13 +518,11 @@ extern "C" int Encoding_Analyze(const char* const text, const size_t len, const
text, static_cast<int>(len),
nullptr, nullptr, nullptr,
MapCPI2Encoding(encodingHint),
UNKNOWN_LANGUAGE,
CompactEncDet::WEB_CORPUS,
false,
UNKNOWN_LANGUAGE, CompactEncDet::QUERY_CORPUS, true,
&bytes_consumed,
pIsReliable);
return MapEncoding2CPI(encoding, pIsReliable);
return MapEncoding2CPI(text, len, encoding, pIsReliable);
}
// ============================================================================

View File

@ -388,12 +388,17 @@ HBITMAP ResizeImageForCurrentDPI(HBITMAP hbmp)
{
if (hbmp) {
BITMAP bmp;
if (g_uCurrentDPI > USER_DEFAULT_SCREEN_DPI && GetObject(hbmp, sizeof(BITMAP), &bmp)) {
int width = MulDiv(bmp.bmWidth, g_uCurrentDPI, USER_DEFAULT_SCREEN_DPI);
int height = MulDiv(bmp.bmHeight, g_uCurrentDPI, USER_DEFAULT_SCREEN_DPI);
HBITMAP hCopy = CopyImage(hbmp, IMAGE_BITMAP, width, height, LR_COPYRETURNORG | LR_COPYDELETEORG);
if (hCopy) {
hbmp = hCopy;
if (GetObject(hbmp, sizeof(BITMAP), &bmp)) {
UINT const uDPIUnit = (UINT)(USER_DEFAULT_SCREEN_DPI / 2U);
UINT uDPIScaleFactor = max(1U, (UINT)MulDiv(bmp.bmHeight, 8, 64));
UINT const uDPIBase = (uDPIScaleFactor - 1U) * uDPIUnit;
if (g_uCurrentDPI > (uDPIBase + uDPIUnit)) {
int width = MulDiv(bmp.bmWidth, (g_uCurrentDPI - uDPIBase), uDPIUnit);
int height = MulDiv(bmp.bmHeight, (g_uCurrentDPI - uDPIBase), uDPIUnit);
HBITMAP hCopy = CopyImage(hbmp, IMAGE_BITMAP, width, height, LR_CREATEDIBSECTION | LR_COPYRETURNORG | LR_COPYDELETEORG);
if (hCopy) {
hbmp = hCopy;
}
}
}
}
@ -536,23 +541,22 @@ bool BitmapMergeAlpha(HBITMAP hbmp,COLORREF crDest)
{
BITMAP bmp;
if (GetObject(hbmp,sizeof(BITMAP),&bmp)) {
if (bmp.bmBitsPixel == 32) {
int x,y;
RGBQUAD *prgba = bmp.bmBits;
for (y = 0; y < bmp.bmHeight; y++) {
for (x = 0; x < bmp.bmWidth; x++) {
BYTE alpha = prgba[x].rgbReserved;
prgba[x].rgbRed = ((prgba[x].rgbRed * alpha) + (GetRValue(crDest) * (255-alpha))) >> 8;
prgba[x].rgbGreen = ((prgba[x].rgbGreen * alpha) + (GetGValue(crDest) * (255-alpha))) >> 8;
prgba[x].rgbBlue = ((prgba[x].rgbBlue * alpha) + (GetBValue(crDest) * (255-alpha))) >> 8;
prgba[x].rgbReserved = 0xFF;
if (prgba) {
for (y = 0; y < bmp.bmHeight; y++) {
for (x = 0; x < bmp.bmWidth; x++) {
BYTE alpha = prgba[x].rgbReserved;
prgba[x].rgbRed = ((prgba[x].rgbRed * alpha) + (GetRValue(crDest) * (255 - alpha))) >> 8;
prgba[x].rgbGreen = ((prgba[x].rgbGreen * alpha) + (GetGValue(crDest) * (255 - alpha))) >> 8;
prgba[x].rgbBlue = ((prgba[x].rgbBlue * alpha) + (GetBValue(crDest) * (255 - alpha))) >> 8;
prgba[x].rgbReserved = 0xFF;
}
prgba = (RGBQUAD*)((LPBYTE)prgba + bmp.bmWidthBytes);
}
prgba = (RGBQUAD*)((LPBYTE)prgba + bmp.bmWidthBytes);
return true;
}
return true;
}
}
return false;
@ -568,21 +572,20 @@ bool BitmapAlphaBlend(HBITMAP hbmp,COLORREF crDest,BYTE alpha)
{
BITMAP bmp;
if (GetObject(hbmp,sizeof(BITMAP),&bmp)) {
if (bmp.bmBitsPixel == 32) {
int x,y;
RGBQUAD *prgba = bmp.bmBits;
for (y = 0; y < bmp.bmHeight; y++) {
for (x = 0; x < bmp.bmWidth; x++) {
prgba[x].rgbRed = ((prgba[x].rgbRed * alpha) + (GetRValue(crDest) * (255-alpha))) >> 8;
prgba[x].rgbGreen = ((prgba[x].rgbGreen * alpha) + (GetGValue(crDest) * (255-alpha))) >> 8;
prgba[x].rgbBlue = ((prgba[x].rgbBlue * alpha) + (GetBValue(crDest) * (255-alpha))) >> 8;
if (prgba) {
for (y = 0; y < bmp.bmHeight; y++) {
for (x = 0; x < bmp.bmWidth; x++) {
prgba[x].rgbRed = ((prgba[x].rgbRed * alpha) + (GetRValue(crDest) * (255 - alpha))) >> 8;
prgba[x].rgbGreen = ((prgba[x].rgbGreen * alpha) + (GetGValue(crDest) * (255 - alpha))) >> 8;
prgba[x].rgbBlue = ((prgba[x].rgbBlue * alpha) + (GetBValue(crDest) * (255 - alpha))) >> 8;
}
prgba = (RGBQUAD*)((LPBYTE)prgba + bmp.bmWidthBytes);
}
prgba = (RGBQUAD*)((LPBYTE)prgba + bmp.bmWidthBytes);
return true;
}
return true;
}
}
return false;
@ -598,20 +601,19 @@ bool BitmapGrayScale(HBITMAP hbmp)
{
BITMAP bmp;
if (GetObject(hbmp,sizeof(BITMAP),&bmp)) {
if (bmp.bmBitsPixel == 32) {
int x,y;
RGBQUAD *prgba = bmp.bmBits;
for (y = 0; y < bmp.bmHeight; y++) {
for (x = 0; x < bmp.bmWidth; x++) {
prgba[x].rgbRed = prgba[x].rgbGreen = prgba[x].rgbBlue =
(((BYTE)((prgba[x].rgbRed * 38 + prgba[x].rgbGreen * 75 + prgba[x].rgbBlue * 15) >> 7) * 0x80) + (0xD0 * (255-0x80))) >> 8;
if (prgba) {
for (y = 0; y < bmp.bmHeight; y++) {
for (x = 0; x < bmp.bmWidth; x++) {
prgba[x].rgbRed = prgba[x].rgbGreen = prgba[x].rgbBlue =
(((BYTE)((prgba[x].rgbRed * 38 + prgba[x].rgbGreen * 75 + prgba[x].rgbBlue * 15) >> 7) * 0x80) + (0xD0 * (255 - 0x80))) >> 8;
}
prgba = (RGBQUAD*)((LPBYTE)prgba + bmp.bmWidthBytes);
}
prgba = (RGBQUAD*)((LPBYTE)prgba + bmp.bmWidthBytes);
return true;
}
return true;
}
}
return false;

View File

@ -50,6 +50,9 @@ __forceinline void swapos(DocPos* a, DocPos* b) { DocPos t = *a; *a = *b; *b =
__forceinline int clampi(int x, int lower, int upper) {
return (x < lower) ? lower : ((x > upper) ? upper : x);
}
__forceinline unsigned clampu(unsigned x, unsigned lower, unsigned upper) {
return (x < lower) ? lower : ((x > upper) ? upper : x);
}
// Is the character an octal digit?
@ -176,6 +179,9 @@ bool IsFontAvailable(LPCWSTR);
bool IsCmdEnabled(HWND, UINT);
#define DlgBtnChk(b) ((b) ? BST_CHECKED : BST_UNCHECKED)
#define EnableCmd(hmenu,id,b) EnableMenuItem((hmenu),(id),(b)?MF_BYCOMMAND|MF_ENABLED:MF_BYCOMMAND|MF_GRAYED)
#define CheckCmd(hmenu,id,b) CheckMenuItem((hmenu),(id),(b)?MF_BYCOMMAND|MF_CHECKED:MF_BYCOMMAND|MF_UNCHECKED)

View File

@ -841,7 +841,7 @@ static bool __fastcall _LngStrToMultiLngStr(WCHAR* pLngStr, WCHAR* pLngMultiStr,
if ((strLen > 0) && pLngMultiStr && (lngMultiStrSize > 0)) {
WCHAR* lngMultiStrPtr = pLngMultiStr;
WCHAR* last = pLngStr + (pLngStr[0] == 0xFEFF ? 1 : 0); // if read from unicode (UTF-16 LE) file
WCHAR* last = pLngStr + (Has_UTF16_LE_BOM(pLngStr) ? 1 : 0);
while (last && rtnVal) {
// make sure you validate the user input
WCHAR* next = StrNextTok(last, L",; :");
@ -1867,8 +1867,6 @@ void CreateBars(HWND hwnd, HINSTANCE hInstance)
REBARINFO rbi;
REBARBANDINFO rbBand;
BITMAP bmp;
HBITMAP hbmp, hbmpCopy = NULL;
HIMAGELIST himl;
WCHAR szTmp[MAX_PATH] = { L'\0' };
@ -1891,14 +1889,15 @@ void CreateBars(HWND hwnd, HINSTANCE hInstance)
SendMessage(g_hwndToolbar,TB_BUTTONSTRUCTSIZE,(WPARAM)sizeof(TBBUTTON),0);
// Add normal Toolbar Bitmap
hbmp = NULL;
// Add Toolbar Bitmap
BITMAP bmp;
HBITMAP hbmp = NULL;
HBITMAP hbmpCopy = NULL;
if (StringCchLenW(g_tchToolbarBitmap,COUNTOF(g_tchToolbarBitmap)))
{
if (!SearchPath(NULL,g_tchToolbarBitmap,L".bmp",COUNTOF(szTmp),szTmp,NULL))
StringCchCopy(szTmp,COUNTOF(szTmp),g_tchToolbarBitmap);
hbmp = LoadImage(NULL,szTmp,IMAGE_BITMAP,0,0,LR_CREATEDIBSECTION|LR_LOADFROMFILE);
hbmp = ResizeImageForCurrentDPI(hbmp);
}
if (hbmp) {
@ -1907,13 +1906,16 @@ void CreateBars(HWND hwnd, HINSTANCE hInstance)
else {
LPWSTR toolBarIntRes = (iHighDpiToolBar > 0) ? MAKEINTRESOURCE(IDR_MAINWNDTB2) : MAKEINTRESOURCE(IDR_MAINWNDTB);
hbmp = LoadImage(hInstance, toolBarIntRes, IMAGE_BITMAP, 0, 0, LR_CREATEDIBSECTION);
hbmp = ResizeImageForCurrentDPI(hbmp);
hbmpCopy = CopyImage(hbmp, IMAGE_BITMAP, 0, 0, LR_CREATEDIBSECTION);
}
hbmpCopy = CopyImage(hbmp, IMAGE_BITMAP, 0, 0, LR_CREATEDIBSECTION);
// adjust to current DPI
hbmp = ResizeImageForCurrentDPI(hbmp);
hbmpCopy = ResizeImageForCurrentDPI(hbmpCopy);
GetObject(hbmp,sizeof(BITMAP),&bmp);
if (!IsXP())
BitmapMergeAlpha(hbmp,GetSysColor(COLOR_3DFACE));
himl = ImageList_Create(bmp.bmWidth/NUMTOOLBITMAPS,bmp.bmHeight,ILC_COLOR32|ILC_MASK,0,0);
ImageList_AddMasked(himl,hbmp,CLR_DEFAULT);
DeleteObject(hbmp);
@ -1972,8 +1974,9 @@ void CreateBars(HWND hwnd, HINSTANCE hInstance)
SendMessage(g_hwndToolbar,TB_SETDISABLEDIMAGELIST,0,(LPARAM)himl);
}
}
if (hbmpCopy)
if (hbmpCopy) {
DeleteObject(hbmpCopy);
}
// Load toolbar labels
pIniSection = LocalAlloc(LPTR,sizeof(WCHAR) * 32 * 1024);
@ -6906,10 +6909,7 @@ void LoadSettings()
iHighDpiToolBar = IniSectionGetInt(pIniSection, tchHighDpiToolBar, -1);
iHighDpiToolBar = clampi(iHighDpiToolBar, -1, 1);
if (iHighDpiToolBar < 0) { // undefined: determine high DPI (higher than Full-HD)
if ((ResX > 1920) && (ResY > 1080))
iHighDpiToolBar = 1;
else
iHighDpiToolBar = 0;
iHighDpiToolBar = ((ResX > 1920) && (ResY > 1080)) ? 1 : 0;
}
if (!g_flagPosParam /*|| g_bStickyWinPos*/) { // ignore window position if /p was specified

View File

@ -4477,7 +4477,7 @@ void Style_SetLexerFromFile(HWND hwnd,LPCWSTR lpszFile)
}
}
if (!bFound && Encoding_Current(CPI_GET) == g_DOSEncoding) {
if (!bFound && (Encoding_Current(CPI_GET) == g_DOSEncoding)) {
pLexNew = &lexANSI;
}
// Apply the new lexer
@ -6715,17 +6715,13 @@ INT_PTR CALLBACK Style_SelectLexerDlgProc(HWND hwnd,UINT umsg,WPARAM wParam,LPAR
{
ListView_SetItemState(hwndLV,i,LVIS_FOCUSED|LVIS_SELECTED,LVIS_FOCUSED|LVIS_SELECTED);
ListView_EnsureVisible(hwndLV,i,false);
if (g_iDefaultLexer == i) {
CheckDlgButton(hwnd,IDC_DEFAULTSCHEME,BST_CHECKED);
}
CheckDlgButton(hwnd, IDC_DEFAULTSCHEME, DlgBtnChk(g_iDefaultLexer == i));
break;
}
}
iInternalDefault = g_iDefaultLexer;
if (g_bAutoSelect)
CheckDlgButton(hwnd,IDC_AUTOSELECT,BST_CHECKED);
CheckDlgButton(hwnd,IDC_AUTOSELECT, DlgBtnChk(g_bAutoSelect));
CenterDlgInParent(hwnd);
}
@ -6809,10 +6805,7 @@ INT_PTR CALLBACK Style_SelectLexerDlgProc(HWND hwnd,UINT umsg,WPARAM wParam,LPAR
case LVN_DELETEITEM:
{
int i = ListView_GetNextItem(hwndLV, -1, LVNI_ALL | LVNI_SELECTED);
if (iInternalDefault == i)
CheckDlgButton(hwnd, IDC_DEFAULTSCHEME, BST_CHECKED);
else
CheckDlgButton(hwnd, IDC_DEFAULTSCHEME, BST_UNCHECKED);
CheckDlgButton(hwnd, IDC_DEFAULTSCHEME, DlgBtnChk(iInternalDefault == i));
DialogEnableWindow(hwnd, IDC_DEFAULTSCHEME, i != -1);
DialogEnableWindow(hwnd, IDOK, i != -1);
}

View File

@ -6,8 +6,8 @@
#define APPNAME "Notepad3"
#define VERSION_MAJOR 4
#define VERSION_MINOR 18
#define VERSION_REV 814
#define VERSION_BUILD 1056
#define VERSION_REV 822
#define VERSION_BUILD 1067
#define SCINTILLA_VER 410
#define ONIGMO_REGEX_VER 6.1.3
#define VERSION_PATCH " TinyExpr"