mirror of
https://github.com/rizonesoft/Notepad3.git
synced 2026-06-14 21:09:05 +08:00
142 lines
3.9 KiB
C++
142 lines
3.9 KiB
C++
#include "stdafx.h"
|
|
#include "codecvt.h"
|
|
|
|
using namespace std;
|
|
|
|
ucs2_conversion::result
|
|
ucs2_conversion::do_in(mbstate_t&,
|
|
const char* from, const char* from_end, const char*& from_next,
|
|
wchar_t* to, wchar_t* to_limit, wchar_t*& to_next) const
|
|
{
|
|
size_t max_input = (from_end - from) & ~1;
|
|
size_t max_output = (to_limit - to);
|
|
size_t count = min(max_input / 2, max_output);
|
|
|
|
result res = ok;
|
|
|
|
from_next = from;
|
|
to_next = to;
|
|
for (; count--; from_next += 2, ++to_next)
|
|
{
|
|
unsigned char c1 = *from_next, c2 = *(from_next + 1);
|
|
*to_next = c1 | c2 << 8;
|
|
}
|
|
if (to_next == to && from_next == from_end - 1)
|
|
res = partial;
|
|
return res;
|
|
}
|
|
|
|
ucs2_conversion::result
|
|
ucs2_conversion::do_out(mbstate_t&,
|
|
const wchar_t* from, const wchar_t* from_end, const wchar_t*& from_next,
|
|
char* to, char* to_limit, char*& to_next) const
|
|
{
|
|
size_t max_input = (from_end - from);
|
|
size_t max_output = (to_limit - to) & ~1;
|
|
size_t count = min(max_input, max_output / 2);
|
|
|
|
from_next = from;
|
|
to_next = to;
|
|
for (; count--; ++from_next, to_next += 2)
|
|
{
|
|
*(to_next + 0) = (char)(*from_next & 0xFF);
|
|
*(to_next + 1) = (char)(*from_next >> 8 & 0xFF);
|
|
}
|
|
return ok;
|
|
}
|
|
|
|
typedef unsigned char uchar;
|
|
|
|
inline unsigned char
|
|
take_6_bits(unsigned value, size_t right_position)
|
|
{
|
|
return uchar((value >> right_position) & 63);
|
|
}
|
|
|
|
inline size_t
|
|
most_signifant_bit_position(unsigned value)
|
|
{
|
|
size_t result(0);
|
|
|
|
size_t half = 16;
|
|
for (; half > 0; half >>= 1)
|
|
{
|
|
if (1u << (result + half) <= value)
|
|
result += half;
|
|
}
|
|
return result + 1;
|
|
//return *lower_bound(range(0u, 31u), \x -> (1 << x <= value));
|
|
}
|
|
|
|
utf8_conversion::result
|
|
utf8_conversion::do_in(mbstate_t&,
|
|
const char* from, const char* from_end, const char*& from_next,
|
|
wchar_t* to, wchar_t* to_limit, wchar_t*& to_next) const
|
|
{
|
|
from_next = from;
|
|
to_next = to;
|
|
|
|
for (; to_next < to_limit && from_next < from_end; ++to_next)
|
|
{
|
|
if (uchar(*from_next) < 0x80)
|
|
*to_next = uchar(*from_next++);
|
|
else
|
|
{
|
|
// 111zxxxx : z = 0 xxxx are data bits
|
|
size_t zero_bit_pos = most_signifant_bit_position(~*from_next);
|
|
size_t extra_bytes = 7 - zero_bit_pos;
|
|
|
|
if (size_t(from_end - from_next) < extra_bytes + 1)
|
|
return partial;
|
|
|
|
*to_next = uchar(*from_next++) & (wchar_t)((1 << (zero_bit_pos - 1)) - 1);
|
|
|
|
for (; extra_bytes--; ++from_next)
|
|
{
|
|
*to_next = *to_next << 6 | uchar(*from_next) & 63;
|
|
}
|
|
}
|
|
}
|
|
|
|
return ok;
|
|
}
|
|
|
|
utf8_conversion::result
|
|
utf8_conversion::do_out(mbstate_t&,
|
|
const wchar_t* from, const wchar_t* from_end, const wchar_t*& from_next,
|
|
char* to, char* to_limit, char*& to_next) const
|
|
{
|
|
from_next = from;
|
|
to_next = to;
|
|
|
|
for (; from_next < from_end; ++from_next)
|
|
{
|
|
unsigned symbol = *from_next;
|
|
|
|
if (symbol < 0x7F)
|
|
{
|
|
if (to_next < to_limit)
|
|
*to_next++ = (unsigned char)symbol;
|
|
else
|
|
return ok;
|
|
}
|
|
else
|
|
{
|
|
size_t msb_pos = most_signifant_bit_position(symbol);
|
|
size_t extra_bytes = msb_pos / 6;
|
|
|
|
if (size_t(to_limit - to_next) >= extra_bytes + 1)
|
|
{
|
|
*to_next = uchar(0xFF80 >> extra_bytes);
|
|
*to_next++ |= take_6_bits(symbol, extra_bytes * 6);
|
|
|
|
for (; extra_bytes--;)
|
|
*to_next++ = 0x80 | take_6_bits(symbol, extra_bytes * 6);
|
|
}
|
|
else
|
|
return ok;
|
|
}
|
|
}
|
|
return ok;
|
|
}
|