From 5a81cb4c9eb64dc09360daee14d2e5d8686bb293 Mon Sep 17 00:00:00 2001 From: Rainer Kottenhoff Date: Wed, 6 Dec 2017 15:23:44 +0100 Subject: [PATCH] +fix: HTML character encoding starts with ampersand, not percent char --- src/Helpers.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/src/Helpers.c b/src/Helpers.c index 39b710474..6fe5995a3 100644 --- a/src/Helpers.c +++ b/src/Helpers.c @@ -3616,6 +3616,7 @@ void UrlUnescapeEx(LPWSTR lpURL, LPWSTR lpUnescaped, DWORD* pcchUnescaped) while ((posIn < lastEsc) && (posOut < outLen)) { BOOL bOk = FALSE; + // URL encoded if (lpURL[posIn] == L'%') { buf[0] = lpURL[posIn + 1]; buf[1] = lpURL[posIn + 2]; @@ -3625,14 +3626,17 @@ void UrlUnescapeEx(LPWSTR lpURL, LPWSTR lpUnescaped, DWORD* pcchUnescaped) posIn += 3; bOk = TRUE; } - else if (lpURL[posIn + 1] == L'#') { - int n = 0; - while (IsDigit(lpURL[posIn + 2 + n]) && (n < 4)) { - buf[n] = lpURL[posIn + 2 + n]; - ++n; - } - buf[n] = L'\0'; - if (swscanf_s(buf, L"%i", &code) == 1) { + } + // HTML encoded + else if ((lpURL[posIn] == L'&') && (lpURL[posIn + 1] == L'#')) { + int n = 0; + while (IsDigit(lpURL[posIn + 2 + n]) && (n < 4)) { + buf[n] = lpURL[posIn + 2 + n]; + ++n; + } + buf[n] = L'\0'; + if (swscanf_s(buf, L"%i", &code) == 1) { + if (code <= 0xFF) { outBuffer[posOut++] = (char)code; posIn += (2 + n); if (lpURL[posIn] == L';') ++posIn; @@ -3640,6 +3644,7 @@ void UrlUnescapeEx(LPWSTR lpURL, LPWSTR lpUnescaped, DWORD* pcchUnescaped) } } } + //TODO: HTML Hex encoded (&#x...) if (!bOk) { posOut += WideCharToMultiByte(CP_UTF8, 0, &(lpURL[posIn++]), 1, &(outBuffer[posOut]), (int)(outLen - posOut), NULL, NULL); }