mirror of
https://github.com/rizonesoft/Notepad3.git
synced 2026-06-14 21:09:05 +08:00
281 lines
8.5 KiB
C++
281 lines
8.5 KiB
C++
// sktoolslib - common files for SK tools
|
|
|
|
// Copyright (C) 2013, 2017, 2020 - Stefan Kueng
|
|
|
|
// This program is free software; you can redistribute it and/or
|
|
// modify it under the terms of the GNU General Public License
|
|
// as published by the Free Software Foundation; either version 2
|
|
// of the License, or (at your option) any later version.
|
|
|
|
// This program is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU General Public License for more details.
|
|
|
|
// You should have received a copy of the GNU General Public License
|
|
// along with this program; if not, write to the Free Software Foundation,
|
|
// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
//
|
|
|
|
#include "stdafx.h"
|
|
#include "EscapeUtils.h"
|
|
#include "StringUtils.h"
|
|
#include "UnicodeUtils.h"
|
|
|
|
#if defined(_M_IX86) || defined(_M_X64)
|
|
#include <emmintrin.h>
|
|
#endif
|
|
|
|
static BOOL sse2supported = ::IsProcessorFeaturePresent( PF_XMMI64_INSTRUCTIONS_AVAILABLE );
|
|
|
|
static constexpr char iri_escape_chars[256] = {
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
|
|
/* 128 */
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
|
};
|
|
|
|
const char uri_autoescape_chars[256] = {
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0,
|
|
|
|
/* 64 */
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
|
|
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0,
|
|
|
|
/* 128 */
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
/* 192 */
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
};
|
|
|
|
static constexpr char uri_char_validity[256] = {
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0,
|
|
|
|
/* 64 */
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
|
|
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0,
|
|
|
|
/* 128 */
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
/* 192 */
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
};
|
|
|
|
bool CEscapeUtils::ContainsEscapedChars( const char * psz, size_t length )
|
|
{
|
|
// most of our strings will be tens of bytes long
|
|
// -> afford some minor overhead to handle the main part very fast
|
|
|
|
const char* end = psz + length;
|
|
#if defined(_M_IX86) || defined(_M_X64)
|
|
if (sse2supported)
|
|
{
|
|
__m128i mask = _mm_set_epi8 ( '%', '%', '%', '%', '%', '%', '%', '%'
|
|
, '%', '%', '%', '%', '%', '%', '%', '%');
|
|
|
|
for (; psz + sizeof (mask) <= end; psz += sizeof (mask))
|
|
{
|
|
// fetch the next 16 bytes from the source
|
|
|
|
__m128i chunk = _mm_loadu_si128 ((const __m128i*)psz);
|
|
|
|
// check for non-ASCII
|
|
|
|
int flags = _mm_movemask_epi8 (_mm_cmpeq_epi8 (chunk, mask));
|
|
if (flags != 0)
|
|
return true;
|
|
};
|
|
}
|
|
#endif
|
|
|
|
// return odd bytes at the end of the string
|
|
|
|
for (; psz < end; ++psz)
|
|
if (*psz == '%')
|
|
return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
char* CEscapeUtils::Unescape( char * psz )
|
|
{
|
|
char * pszSource = psz;
|
|
char * pszDest = psz;
|
|
|
|
static constexpr char szHex[] = "0123456789ABCDEF";
|
|
|
|
// Unescape special characters. The number of characters
|
|
// in the *pszDest is assumed to be <= the number of characters
|
|
// in pszSource (they are both the same string anyway)
|
|
|
|
while (*pszSource != '\0' && *pszDest != '\0')
|
|
{
|
|
if (*pszSource == '%')
|
|
{
|
|
// The next two chars following '%' should be digits
|
|
if ( *(pszSource + 1) == '\0' ||
|
|
*(pszSource + 2) == '\0' )
|
|
{
|
|
// nothing left to do
|
|
break;
|
|
}
|
|
|
|
char nValue = '?';
|
|
const char * pszHigh = nullptr;
|
|
pszSource++;
|
|
|
|
*pszSource = (char) toupper(*pszSource);
|
|
pszHigh = strchr(szHex, *pszSource);
|
|
|
|
if (pszHigh != nullptr)
|
|
{
|
|
pszSource++;
|
|
*pszSource = (char) toupper(*pszSource);
|
|
const char * pszLow = strchr(szHex, *pszSource);
|
|
|
|
if (pszLow != nullptr)
|
|
{
|
|
nValue = (char) (((pszHigh - szHex) << 4) +
|
|
(pszLow - szHex));
|
|
}
|
|
}
|
|
else
|
|
{
|
|
pszSource--;
|
|
nValue = *pszSource;
|
|
}
|
|
*pszDest++ = nValue;
|
|
}
|
|
else
|
|
*pszDest++ = *pszSource;
|
|
|
|
pszSource++;
|
|
}
|
|
|
|
*pszDest = '\0';
|
|
return pszDest;
|
|
}
|
|
|
|
std::string CEscapeUtils::EscapeString( const std::string& str )
|
|
{
|
|
std::string ret2;
|
|
int c;
|
|
int i;
|
|
for (i=0; str[i]; ++i)
|
|
{
|
|
c = (unsigned char)str[i];
|
|
if (iri_escape_chars[c])
|
|
{
|
|
// no escaping needed for that char
|
|
ret2 += (unsigned char)str[i];
|
|
}
|
|
else
|
|
{
|
|
// char needs escaping
|
|
ret2 += CStringUtils::Format("%%%02X", (unsigned char)c);
|
|
}
|
|
}
|
|
std::string ret;
|
|
for (i=0; ret2[i]; ++i)
|
|
{
|
|
c = (unsigned char)ret2[i];
|
|
if (uri_autoescape_chars[c])
|
|
{
|
|
if ((c == '%')&&(DoesPercentNeedEscaping(ret2.substr(i).c_str())))
|
|
{
|
|
// this percent sign needs escaping!
|
|
ret += CStringUtils::Format("%%%02X", (unsigned char)c);
|
|
}
|
|
else
|
|
{
|
|
// no escaping needed for that char
|
|
ret += (unsigned char)ret2[i];
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// char needs escaping
|
|
ret += CStringUtils::Format("%%%02X", (unsigned char)c);
|
|
}
|
|
}
|
|
return ret;
|
|
|
|
}
|
|
|
|
std::wstring CEscapeUtils::EscapeString( const std::wstring& str )
|
|
{
|
|
return CUnicodeUtils::StdGetUnicode(EscapeString(CUnicodeUtils::StdGetUTF8(str)));
|
|
}
|
|
|
|
std::string CEscapeUtils::StringUnescape( const std::string& str )
|
|
{
|
|
auto urlabuf = std::make_unique<char[]>(str.size()+1);
|
|
|
|
strcpy_s(urlabuf.get(), str.size()+1, str.c_str());
|
|
Unescape(urlabuf.get());
|
|
|
|
return urlabuf.get();
|
|
}
|
|
|
|
std::wstring CEscapeUtils::StringUnescape( const std::wstring& str )
|
|
{
|
|
int len = (int)str.size();
|
|
if (len==0)
|
|
return std::wstring();
|
|
std::string stra = CUnicodeUtils::StdGetUTF8(str);
|
|
|
|
stra = StringUnescape(stra);
|
|
|
|
return CUnicodeUtils::StdGetUnicode(stra);
|
|
}
|
|
|
|
|
|
bool CEscapeUtils::DoesPercentNeedEscaping( LPCSTR str )
|
|
{
|
|
if (str[1] == 0)
|
|
return true;
|
|
if (!(((str[1] >= '0')&&(str[1] <= '9'))||((str[1] >= 'A')&&(str[1] <= 'F'))||((str[1] >= 'a')&&(str[1] <= 'f'))))
|
|
return true;
|
|
if (!(((str[2] >= '0')&&(str[2] <= '9'))||((str[2] >= 'A')&&(str[2] <= 'F'))||((str[2] >= 'a')&&(str[2] <= 'f'))))
|
|
return true;
|
|
return false;
|
|
}
|