From cc7b5cb7bc80e63277e0b34eb3f6ad4ea5150e1c Mon Sep 17 00:00:00 2001 From: Rainer Kottenhoff Date: Wed, 6 Dec 2017 11:40:54 +0100 Subject: [PATCH 1/5] + fix: Edit/Special : URL encoding/decoding for full UTF8 (non ASCII) characters --- src/Edit.c | 12 +++++++++-- src/Helpers.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/Helpers.h | 4 ++++ 3 files changed, 69 insertions(+), 2 deletions(-) diff --git a/src/Edit.c b/src/Edit.c index 23b3e91c8..6dcf85446 100644 --- a/src/Edit.c +++ b/src/Edit.c @@ -1467,7 +1467,10 @@ void EditURLEncode(HWND hwnd) } cchEscapedW = (int)LocalSize(pszEscapedW) / sizeof(WCHAR); - UrlEscape(pszTextW,pszEscapedW,&cchEscapedW,URL_ESCAPE_SEGMENT_ONLY); + if (IsWin7()) + UrlEscape(pszTextW, pszEscapedW, &cchEscapedW, URL_ESCAPE_SEGMENT_ONLY | URL_ESCAPE_AS_UTF8); + else + UrlEscape(pszTextW, pszEscapedW, &cchEscapedW, URL_ESCAPE_SEGMENT_ONLY); cchEscaped = WideCharToMultiByte(cpEdit,0,pszEscapedW,cchEscapedW,pszEscaped,(int)LocalSize(pszEscaped),NULL,NULL); @@ -1551,7 +1554,12 @@ void EditURLDecode(HWND hwnd) } cchUnescapedW = (int)LocalSize(pszUnescapedW) / sizeof(WCHAR); - UrlUnescape(pszTextW,pszUnescapedW,&cchUnescapedW,0); + if (IsWin7()) + UrlUnescapeEx(pszTextW, pszUnescapedW, &cchUnescapedW); + //else if (IsWin8()) + // UrlUnescape(pszTextW, pszUnescapedW, &cchUnescapedW, URL_UNESCAPE_AS_UTF8); + else + UrlUnescape(pszTextW, pszUnescapedW, &cchUnescapedW, 0); cchUnescaped = WideCharToMultiByte(cpEdit,0,pszUnescapedW,cchUnescapedW,pszUnescaped,(int)LocalSize(pszUnescaped),NULL,NULL); diff --git a/src/Helpers.c b/src/Helpers.c index 7034ccc44..7ad12fd7f 100644 --- a/src/Helpers.c +++ b/src/Helpers.c @@ -3580,6 +3580,61 @@ INT UTF8_mbslen(LPCSTR source,INT byte_length) return wchar_length; } + +//============================================================================= +// +// UrlUnescapeEx() +// +void UrlUnescapeEx(LPCWSTR lpURL, LPWSTR lpUnescaped, DWORD* pcchUnescaped) +{ + CHAR* outBuffer; + int posOut = 0; + + outBuffer = LocalAlloc(LPTR, *pcchUnescaped + 1); + if (outBuffer == NULL) { + return; + } + int outLen = (int)LocalSize(outBuffer) - 1; + + int lastEsc = lstrlen(lpURL) - 2; + + int posIn = 0; + WCHAR buf[3] = { L'\0', L'\0', L'\0' }; + + while ((posIn < lastEsc) && (posOut < outLen)) + { + if (lpURL[posIn] == L'%') { + buf[0] = lpURL[posIn + 1]; + buf[1] = lpURL[posIn + 2]; + int octalCode; + if (swscanf_s(buf, L"%x", &octalCode) == 1) { + outBuffer[posOut++] = (CHAR)octalCode; + posIn += 3; + } + else { + outBuffer[posOut++] = (CHAR)lpURL[posIn++]; + } + } + else { + outBuffer[posOut++] = (CHAR)lpURL[posIn++]; + } + } + + // copy rest + while ((lpURL[posIn] != L'\0') && (posOut < outLen)) + { + outBuffer[posOut++] = (CHAR)lpURL[posIn++]; + } + outBuffer[posOut] = '\0'; + + int iOut = MultiByteToWideChar(CP_UTF8, 0, outBuffer, -1, lpUnescaped, (int)*pcchUnescaped); + LocalFree(outBuffer); + + *pcchUnescaped = ((iOut > 0) ? (iOut - 1) : 0); +} + + + /////////////////////////////////////////////////////////////////////////////// // // Drag N Drop helpers diff --git a/src/Helpers.h b/src/Helpers.h index 15ae6c687..565cea5d9 100644 --- a/src/Helpers.h +++ b/src/Helpers.h @@ -106,6 +106,7 @@ enum BufferSizes { MIDSZ_BUFFER = 256, LARGE_BUFFER = 512, HUGE_BUFFER = 1024, + XHUGE_BUFFER = 2048, FILE_ARG_BUF = MAX_PATH+4 }; @@ -425,6 +426,9 @@ BOOL IsUTF7(const char*,int); INT UTF8_mbslen_bytes(LPCSTR utf8_string); INT UTF8_mbslen(LPCSTR source,INT byte_length); + +void UrlUnescapeEx(LPCWSTR, LPWSTR, DWORD*); + // -------------------------------------------------------------------------------------------------------------------------------- // including and linking against pathcch.lib From 04110c1e6f01237a654ff678d0c033a1d932a365 Mon Sep 17 00:00:00 2001 From: Rainer Kottenhoff Date: Wed, 6 Dec 2017 11:52:11 +0100 Subject: [PATCH 2/5] + refactoring: Platform independent URL decoding --- src/Edit.c | 13 +++---------- src/Helpers.c | 5 +++++ 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/src/Edit.c b/src/Edit.c index 6dcf85446..626986220 100644 --- a/src/Edit.c +++ b/src/Edit.c @@ -1467,10 +1467,7 @@ void EditURLEncode(HWND hwnd) } cchEscapedW = (int)LocalSize(pszEscapedW) / sizeof(WCHAR); - if (IsWin7()) - UrlEscape(pszTextW, pszEscapedW, &cchEscapedW, URL_ESCAPE_SEGMENT_ONLY | URL_ESCAPE_AS_UTF8); - else - UrlEscape(pszTextW, pszEscapedW, &cchEscapedW, URL_ESCAPE_SEGMENT_ONLY); + UrlEscape(pszTextW, pszEscapedW, &cchEscapedW, URL_ESCAPE_SEGMENT_ONLY | URL_ESCAPE_AS_UTF8); cchEscaped = WideCharToMultiByte(cpEdit,0,pszEscapedW,cchEscapedW,pszEscaped,(int)LocalSize(pszEscaped),NULL,NULL); @@ -1554,12 +1551,8 @@ void EditURLDecode(HWND hwnd) } cchUnescapedW = (int)LocalSize(pszUnescapedW) / sizeof(WCHAR); - if (IsWin7()) - UrlUnescapeEx(pszTextW, pszUnescapedW, &cchUnescapedW); - //else if (IsWin8()) - // UrlUnescape(pszTextW, pszUnescapedW, &cchUnescapedW, URL_UNESCAPE_AS_UTF8); - else - UrlUnescape(pszTextW, pszUnescapedW, &cchUnescapedW, 0); + + UrlUnescapeEx(pszTextW, pszUnescapedW, &cchUnescapedW); cchUnescaped = WideCharToMultiByte(cpEdit,0,pszUnescapedW,cchUnescapedW,pszUnescaped,(int)LocalSize(pszUnescaped),NULL,NULL); diff --git a/src/Helpers.c b/src/Helpers.c index 7ad12fd7f..6100010ff 100644 --- a/src/Helpers.c +++ b/src/Helpers.c @@ -3587,6 +3587,10 @@ INT UTF8_mbslen(LPCSTR source,INT byte_length) // void UrlUnescapeEx(LPCWSTR lpURL, LPWSTR lpUnescaped, DWORD* pcchUnescaped) { +#if defined(URL_UNESCAPE_AS_UTF8) + UrlUnescape(lpURL, lpUnescaped, pcchUnescaped, URL_UNESCAPE_AS_UTF8); + return; +#else CHAR* outBuffer; int posOut = 0; @@ -3631,6 +3635,7 @@ void UrlUnescapeEx(LPCWSTR lpURL, LPWSTR lpUnescaped, DWORD* pcchUnescaped) LocalFree(outBuffer); *pcchUnescaped = ((iOut > 0) ? (iOut - 1) : 0); +#endif } From a60b2b92019091f0a9fb3da3f73c5563187a0f3d Mon Sep 17 00:00:00 2001 From: Rainer Kottenhoff Date: Wed, 6 Dec 2017 12:19:09 +0100 Subject: [PATCH 3/5] + code cleanup --- src/Helpers.c | 17 +++++++---------- src/Helpers.h | 2 +- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/src/Helpers.c b/src/Helpers.c index 6100010ff..1f61c0be9 100644 --- a/src/Helpers.c +++ b/src/Helpers.c @@ -3585,25 +3585,22 @@ INT UTF8_mbslen(LPCSTR source,INT byte_length) // // UrlUnescapeEx() // -void UrlUnescapeEx(LPCWSTR lpURL, LPWSTR lpUnescaped, DWORD* pcchUnescaped) +void UrlUnescapeEx(LPWSTR lpURL, LPWSTR lpUnescaped, DWORD* pcchUnescaped) { #if defined(URL_UNESCAPE_AS_UTF8) UrlUnescape(lpURL, lpUnescaped, pcchUnescaped, URL_UNESCAPE_AS_UTF8); return; #else - CHAR* outBuffer; int posOut = 0; - - outBuffer = LocalAlloc(LPTR, *pcchUnescaped + 1); + char* outBuffer = LocalAlloc(LPTR, *pcchUnescaped + 1); if (outBuffer == NULL) { return; } int outLen = (int)LocalSize(outBuffer) - 1; - int lastEsc = lstrlen(lpURL) - 2; - int posIn = 0; WCHAR buf[3] = { L'\0', L'\0', L'\0' }; + int lastEsc = lstrlen(lpURL) - 2; while ((posIn < lastEsc) && (posOut < outLen)) { @@ -3612,22 +3609,22 @@ void UrlUnescapeEx(LPCWSTR lpURL, LPWSTR lpUnescaped, DWORD* pcchUnescaped) buf[1] = lpURL[posIn + 2]; int octalCode; if (swscanf_s(buf, L"%x", &octalCode) == 1) { - outBuffer[posOut++] = (CHAR)octalCode; + outBuffer[posOut++] = (char)octalCode; posIn += 3; } else { - outBuffer[posOut++] = (CHAR)lpURL[posIn++]; + outBuffer[posOut++] = (char)lpURL[posIn++]; } } else { - outBuffer[posOut++] = (CHAR)lpURL[posIn++]; + outBuffer[posOut++] = (char)lpURL[posIn++]; } } // copy rest while ((lpURL[posIn] != L'\0') && (posOut < outLen)) { - outBuffer[posOut++] = (CHAR)lpURL[posIn++]; + outBuffer[posOut++] = (char)lpURL[posIn++]; } outBuffer[posOut] = '\0'; diff --git a/src/Helpers.h b/src/Helpers.h index 565cea5d9..651a5ad8c 100644 --- a/src/Helpers.h +++ b/src/Helpers.h @@ -427,7 +427,7 @@ INT UTF8_mbslen_bytes(LPCSTR utf8_string); INT UTF8_mbslen(LPCSTR source,INT byte_length); -void UrlUnescapeEx(LPCWSTR, LPWSTR, DWORD*); +void UrlUnescapeEx(LPWSTR, LPWSTR, DWORD*); // -------------------------------------------------------------------------------------------------------------------------------- From 1ed236b63a664842c299633e42e80a562ef37ce9 Mon Sep 17 00:00:00 2001 From: Rainer Kottenhoff Date: Wed, 6 Dec 2017 12:47:38 +0100 Subject: [PATCH 4/5] +fix: gibberish output if URL decoded again (2nd time) --- src/Helpers.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/Helpers.c b/src/Helpers.c index 1f61c0be9..a9b225c37 100644 --- a/src/Helpers.c +++ b/src/Helpers.c @@ -3589,7 +3589,6 @@ void UrlUnescapeEx(LPWSTR lpURL, LPWSTR lpUnescaped, DWORD* pcchUnescaped) { #if defined(URL_UNESCAPE_AS_UTF8) UrlUnescape(lpURL, lpUnescaped, pcchUnescaped, URL_UNESCAPE_AS_UTF8); - return; #else int posOut = 0; char* outBuffer = LocalAlloc(LPTR, *pcchUnescaped + 1); @@ -3613,18 +3612,18 @@ void UrlUnescapeEx(LPWSTR lpURL, LPWSTR lpUnescaped, DWORD* pcchUnescaped) posIn += 3; } else { - outBuffer[posOut++] = (char)lpURL[posIn++]; + posOut += WideCharToMultiByte(CP_UTF8, 0, &(lpURL[posIn++]), 1, &(outBuffer[posOut]), (int)(outLen - posOut), NULL, NULL); } } else { - outBuffer[posOut++] = (char)lpURL[posIn++]; + posOut += WideCharToMultiByte(CP_UTF8, 0, &(lpURL[posIn++]), 1, &(outBuffer[posOut]), (int)(outLen - posOut), NULL, NULL); } } // copy rest while ((lpURL[posIn] != L'\0') && (posOut < outLen)) { - outBuffer[posOut++] = (char)lpURL[posIn++]; + posOut += WideCharToMultiByte(CP_UTF8, 0, &(lpURL[posIn++]), 1, &(outBuffer[posOut]), (int)(outLen - posOut), NULL, NULL); } outBuffer[posOut] = '\0'; From 3007a32132fd3a208f75ab27668b054c7c0659fd Mon Sep 17 00:00:00 2001 From: Rainer Kottenhoff Date: Wed, 6 Dec 2017 14:07:45 +0100 Subject: [PATCH 5/5] +fix: URL decoding, if encoding has "%#nnn" formatted ASCII chars --- src/Helpers.c | 39 ++++++++++++++++++++++++++++++++------- 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/src/Helpers.c b/src/Helpers.c index a9b225c37..39b710474 100644 --- a/src/Helpers.c +++ b/src/Helpers.c @@ -3581,6 +3581,17 @@ INT UTF8_mbslen(LPCSTR source,INT byte_length) } + +/** +* Is the character an octal digit? +*/ +static BOOL IsDigit(WCHAR wch) +{ + return ((wch >= L'0') && (wch <= L'9')); +} + + + //============================================================================= // // UrlUnescapeEx() @@ -3598,24 +3609,38 @@ void UrlUnescapeEx(LPWSTR lpURL, LPWSTR lpUnescaped, DWORD* pcchUnescaped) int outLen = (int)LocalSize(outBuffer) - 1; int posIn = 0; - WCHAR buf[3] = { L'\0', L'\0', L'\0' }; + WCHAR buf[5] = { L'\0' }; int lastEsc = lstrlen(lpURL) - 2; + int code; while ((posIn < lastEsc) && (posOut < outLen)) { + BOOL bOk = FALSE; if (lpURL[posIn] == L'%') { buf[0] = lpURL[posIn + 1]; buf[1] = lpURL[posIn + 2]; - int octalCode; - if (swscanf_s(buf, L"%x", &octalCode) == 1) { - outBuffer[posOut++] = (char)octalCode; + buf[2] = L'\0'; + if (swscanf_s(buf, L"%x", &code) == 1) { + outBuffer[posOut++] = (char)code; posIn += 3; + bOk = TRUE; } - else { - posOut += WideCharToMultiByte(CP_UTF8, 0, &(lpURL[posIn++]), 1, &(outBuffer[posOut]), (int)(outLen - posOut), NULL, NULL); + else if (lpURL[posIn + 1] == L'#') { + int n = 0; + while (IsDigit(lpURL[posIn + 2 + n]) && (n < 4)) { + buf[n] = lpURL[posIn + 2 + n]; + ++n; + } + buf[n] = L'\0'; + if (swscanf_s(buf, L"%i", &code) == 1) { + outBuffer[posOut++] = (char)code; + posIn += (2 + n); + if (lpURL[posIn] == L';') ++posIn; + bOk = TRUE; + } } } - else { + if (!bOk) { posOut += WideCharToMultiByte(CP_UTF8, 0, &(lpURL[posIn++]), 1, &(outBuffer[posOut]), (int)(outLen - posOut), NULL, NULL); } }