mirror of
https://github.com/ultimatepp/ultimatepp.git
synced 2026-05-16 06:05:58 -06:00
This might bring some incompatibilities in the code that expects wchar to be 16 bit, which escpecially involves dealing with Win32 (and to lesser extend MacOS) APIs, so if your application is doing that, please check all instances of WCHAR (UniChar on MacOS) or even wchar especially type casts. To support host APIs, char16 is introduced (but there is no 16-bit String varian). Use ToSystemCharsetW, FromSystemCharsetW to convert texts to Win32 API. - Support of drawing non-BMP characters in GUI - Vastly improved character font replacement code (when drawing characters missing with requested font, replacement font is used) - Last instances of Win32 ANSI calls (those ending with A) are removed - UTF handling routines are refactored and their's naming is unified - RTF is now being able to handle non-BMP characters (RTF is used as clipboard format for RichText) Other minor changes: - fixed TryRealloc issue - improved MemoryCheck - Removed MemoryAlloc48/MemoryFree48 - In theide Background parsing should less often cause delays in the main thread
129 lines
3 KiB
C++
129 lines
3 KiB
C++
template <class Target>
|
|
force_inline bool ToUtf8_(Target t, wchar codepoint)
|
|
{
|
|
if(codepoint < 0x80)
|
|
t((char)codepoint);
|
|
else
|
|
if(codepoint < 0x800) {
|
|
t(0xc0 | byte(codepoint >> 6));
|
|
t(0x80 | byte(codepoint & 0x3f));
|
|
}
|
|
else
|
|
if((codepoint & 0xFFFFFF00) == 0xEE00) // ERROR ESCAPE
|
|
t((char) codepoint);
|
|
else
|
|
if(codepoint < 0x10000) {
|
|
t(0xe0 | byte(codepoint >> 12));
|
|
t(0x80 | byte((codepoint >> 6) & 0x3f));
|
|
t(0x80 | byte(codepoint & 0x3f));
|
|
}
|
|
else
|
|
if(codepoint < 0x110000) {
|
|
t(0xf0 | byte(codepoint >> 18));
|
|
t(0x80 | byte((codepoint >> 12) & 0x3f));
|
|
t(0x80 | byte((codepoint >> 6) & 0x3f));
|
|
t(0x80 | byte(codepoint & 0x3f));
|
|
}
|
|
else
|
|
return false;
|
|
return true;
|
|
}
|
|
|
|
force_inline dword FetchUtf8(const char *&_s, bool nolim, const char *_lim, bool& ok)
|
|
{
|
|
const byte *s = (const byte *)_s;
|
|
const byte *lim = (const byte *)_lim;
|
|
dword code = *s;
|
|
if(code < 0x80) {
|
|
_s++;
|
|
return *s;
|
|
}
|
|
else
|
|
if(code >= 0xC2) {
|
|
dword c;
|
|
if(code < 0xE0 && (nolim || s + 1 < lim) &&
|
|
s[1] >= 0x80 && s[1] < 0xc0 &&
|
|
(c = ((code - 0xC0) << 6) + s[1] - 0x80) >= 0x80 && c < 0x800) {
|
|
_s += 2;
|
|
return c;
|
|
}
|
|
else
|
|
if(code < 0xF0 && (nolim || s + 2 < lim) &&
|
|
s[1] >= 0x80 && s[1] < 0xc0 && s[2] >= 0x80 && s[2] < 0xc0 &&
|
|
(c = ((code - 0xE0) << 12) + ((s[1] - 0x80) << 6) + s[2] - 0x80) >= 0x800 &&
|
|
!(c >= 0xEE00 && c <= 0xEEFF)) {
|
|
_s += 3;
|
|
return c;
|
|
}
|
|
else
|
|
if(code < 0xF8 && (nolim || s + 3 < lim) &&
|
|
s[1] >= 0x80 && s[1] < 0xc0 && s[2] >= 0x80 && s[2] < 0xc0 && s[3] >= 0x80 && s[3] < 0xc0 &&
|
|
(c = ((code - 0xF0) << 18) + ((s[1] - 0x80) << 12) + ((s[2] - 0x80) << 6) + s[3] - 0x80) >= 0x10000 &&
|
|
c < 0x110000) {
|
|
_s += 4;
|
|
return c;
|
|
}
|
|
}
|
|
_s++;
|
|
ok = false;
|
|
return 0xEE00 + code; // ERROR ESCAPE
|
|
}
|
|
|
|
force_inline dword FetchUtf8(const char *&s, const char *lim, bool& ok)
|
|
{
|
|
return FetchUtf8(s, false, lim, ok);
|
|
}
|
|
|
|
force_inline dword FetchUtf8(const char *&s, bool& ok)
|
|
{
|
|
return FetchUtf8(s, true, NULL, ok);
|
|
}
|
|
|
|
template <class Target>
|
|
force_inline bool FromUtf8_(Target t, const char *s, size_t len)
|
|
{
|
|
bool ok = true;
|
|
const char *lim = s + len;
|
|
while(s < lim)
|
|
t(FetchUtf8(s, lim, ok));
|
|
return ok;
|
|
}
|
|
|
|
template <class Target>
|
|
force_inline bool ToUtf16_(Target t, size_t codepoint)
|
|
{
|
|
if(codepoint < 0x10000)
|
|
t((char16)codepoint);
|
|
else
|
|
if(codepoint < 0x110000) {
|
|
codepoint -= 0x10000;
|
|
t(char16(0xD800 + (0x3ff & (codepoint >> 10))));
|
|
t(char16(0xDC00 + (0x3ff & codepoint)));
|
|
}
|
|
else
|
|
return false;
|
|
return true;
|
|
}
|
|
|
|
force_inline wchar ReadSurrogatePair(const char16 *s, const char16 *lim)
|
|
{
|
|
return (*s & 0XFC00) == 0xD800 && s + 1 < lim && (s[1] & 0xFC00) == 0xDC00 ?
|
|
((wchar(s[0] & 0x3ff) << 10) | (s[1] & 0x3ff)) + 0x10000 : 0;
|
|
}
|
|
|
|
template <class Target>
|
|
force_inline void FromUtf16_(Target t, const char16 *s, size_t len)
|
|
{
|
|
const char16 *lim = s + len;
|
|
while(s < lim) {
|
|
wchar c = ReadSurrogatePair(s, lim);
|
|
if(c) {
|
|
t(c);
|
|
s += 2;
|
|
}
|
|
else {
|
|
t(*s);
|
|
s++;
|
|
}
|
|
}
|
|
}
|