+fix: HTML character encoding starts with ampersand, not percent char

This commit is contained in:
Rainer Kottenhoff 2017-12-06 15:23:44 +01:00
parent 3007a32132
commit 5a81cb4c9e

View File

@ -3616,6 +3616,7 @@ void UrlUnescapeEx(LPWSTR lpURL, LPWSTR lpUnescaped, DWORD* pcchUnescaped)
while ((posIn < lastEsc) && (posOut < outLen))
{
BOOL bOk = FALSE;
// URL encoded
if (lpURL[posIn] == L'%') {
buf[0] = lpURL[posIn + 1];
buf[1] = lpURL[posIn + 2];
@ -3625,14 +3626,17 @@ void UrlUnescapeEx(LPWSTR lpURL, LPWSTR lpUnescaped, DWORD* pcchUnescaped)
posIn += 3;
bOk = TRUE;
}
else if (lpURL[posIn + 1] == L'#') {
int n = 0;
while (IsDigit(lpURL[posIn + 2 + n]) && (n < 4)) {
buf[n] = lpURL[posIn + 2 + n];
++n;
}
buf[n] = L'\0';
if (swscanf_s(buf, L"%i", &code) == 1) {
}
// HTML encoded
else if ((lpURL[posIn] == L'&') && (lpURL[posIn + 1] == L'#')) {
int n = 0;
while (IsDigit(lpURL[posIn + 2 + n]) && (n < 4)) {
buf[n] = lpURL[posIn + 2 + n];
++n;
}
buf[n] = L'\0';
if (swscanf_s(buf, L"%i", &code) == 1) {
if (code <= 0xFF) {
outBuffer[posOut++] = (char)code;
posIn += (2 + n);
if (lpURL[posIn] == L';') ++posIn;
@ -3640,6 +3644,7 @@ void UrlUnescapeEx(LPWSTR lpURL, LPWSTR lpUnescaped, DWORD* pcchUnescaped)
}
}
}
//TODO: HTML Hex encoded (&#x...)
if (!bOk) {
posOut += WideCharToMultiByte(CP_UTF8, 0, &(lpURL[posIn++]), 1, &(outBuffer[posOut]), (int)(outLen - posOut), NULL, NULL);
}