ultimatepp/uppsrc/Core/Utf.cpp
Mirek Fidler 34ff691308 sizeof(wchar) is changed to 4 (32 bits) to support non BMP unicode characters
This might bring some incompatibilities in the code that expects wchar to be 16 bit, which
  escpecially involves dealing with Win32 (and to lesser extend MacOS) APIs, so if your application
  is doing that, please check all instances of WCHAR (UniChar on MacOS) or even wchar
  especially type casts.

  To support host APIs, char16 is introduced (but there is no 16-bit String varian).

  Use ToSystemCharsetW, FromSystemCharsetW to convert texts to Win32 API.

- Support of drawing non-BMP characters in GUI
- Vastly improved character font replacement code (when drawing characters missing with requested font, replacement font is used)
- Last instances of Win32 ANSI calls (those ending with A) are removed
- UTF handling routines are refactored and their's naming is unified
- RTF is now being able to handle non-BMP characters (RTF is used as clipboard format for RichText)

Other minor changes:

- fixed TryRealloc issue
- improved MemoryCheck
- Removed MemoryAlloc48/MemoryFree48
- In theide Background parsing should less often cause delays in the main thread
2021-12-02 12:03:19 +01:00

183 lines
No EOL
3.7 KiB
C++

#include "Core.h"
namespace Upp {
bool CheckUtf8(const char *s, int len)
{
return FromUtf8_([](wchar) {}, s, len);
}
int Utf8Len(const wchar *s, int len)
{
int rlen = 0;
for(const wchar *lim = s + len; s < lim; s++)
ToUtf8_([&](char) { rlen++; }, *s);
return rlen;
}
void ToUtf8(char *t, const wchar *s, int len)
{
for(const wchar *lim = s + len; s < lim; s++)
ToUtf8_([&](char c) { *t++ = c; }, *s);
}
String ToUtf8(const wchar *s, int len)
{
String r;
for(const wchar *lim = s + len; s < lim; s++)
ToUtf8_([&](char c) { r.Cat(c); }, *s);
return r;
}
int Utf8Len(const char16 *s, int len)
{
int rlen = 0;
FromUtf16_([&](wchar code) { ToUtf8_([&](char c) { rlen++; }, code); }, s, len);
return rlen;
}
void ToUtf8(char *t, const char16 *s, int len)
{
FromUtf16_([&](wchar code) { ToUtf8_([&](char c) { *t++ = c; }, code); }, s, len);
}
String ToUtf8(const char16 *s, int len)
{
StringBuffer r;
r.Reserve(len);
FromUtf16_([&](wchar code) { ToUtf8_([&](char c) { r.Cat(c); }, code); }, s, len);
return String(r);
}
int Utf16Len(const wchar *s, int len)
{
int rlen = 0;
for(const wchar *lim = s + len; s < lim; s++)
ToUtf16_([&](char16) { rlen++; }, *s);
return rlen;
}
int ToUtf16(char16 *t, const wchar *s, int len)
{
char16 *t0 = t;
for(const wchar *lim = s + len; s < lim; s++)
ToUtf16_([&](char16 c) { *t++ = c; }, *s);
return int(t - t0);
}
Vector<char16> ToUtf16(const wchar *s, int len)
{
Vector<char16> r;
r.Reserve(len);
for(const wchar *lim = s + len; s < lim; s++)
ToUtf16_([&](char16 c) { r.Add(c); }, *s);
return r;
}
int Utf16Len(const char *s, int len)
{
int rlen = 0;
FromUtf8_([&](wchar code) {
ToUtf16_([&](char16) { rlen++; }, code);
}, s, len);
return rlen;
}
int ToUtf16(char16 *t, const char *s, int len)
{
char16 *t0 = t;
FromUtf8_([&](wchar code) {
ToUtf16_([&](char16 c) { *t++ = c; }, code);
}, s, len);
return int(t - t0);
}
Vector<char16> ToUtf16(const char *s, int len)
{
Vector<char16> r;
FromUtf8_([&](wchar code) {
ToUtf16_([&](char16 c) { r.Add(c); } , code);
}, s, len);
return r;
}
int Utf32Len(const char *s, int len)
{
int rlen = 0;
FromUtf8_([&](wchar) { rlen++; }, s, len);
return rlen;
}
void ToUtf32(wchar *t, const char *s, int len)
{
FromUtf8_([&](wchar c) { *t++ = c; }, s, len);
}
WString ToUtf32(const char *s, int len)
{
WStringBuffer r;
FromUtf8_([&](wchar c) { r.Cat(c); }, s, len);
return WString(r);
}
int Utf32Len(const char16 *s, int len)
{
int rlen = 0;
FromUtf16_([&](wchar) { rlen++; }, s, len);
return rlen;
}
void ToUtf32(wchar *t, const char16 *s, int len)
{
FromUtf16_([&](wchar c) { *t++ = c; }, s, len);
}
WString ToUtf32(const char16 *s, int len)
{
WStringBuffer r;
r.Reserve(len);
FromUtf16_([&](wchar c) { r.Cat(c); }, s, len);
return WString(r);
}
String Utf8ToAscii(const String& src)
{
StringBuffer r(src.GetLength());
const char *s = src.begin();
const char *lim = src.end();
char *t = r;
while(s < lim)
*t++ = (byte)*s < 128 ? *s++ : ToAscii(FetchUtf8(s, lim));
r.SetLength(int(t - ~r));
return String(r);
}
String Utf8ToUpperAscii(const String& src)
{
StringBuffer r(src.GetLength());
const char *s = src.begin();
const char *lim = src.end();
char *t = r;
while(s < lim) {
*t++ = (byte)*s <= 'Z' ? *s++ : ToUpperAscii(FetchUtf8(s, lim));
}
r.SetLength(int(t - ~r));
return String(r);
}
String Utf8ToLowerAscii(const String& src)
{
StringBuffer r(src.GetLength());
const char *s = src.begin();
const char *lim = src.end();
char *t = r;
while(s < lim)
*t++ = ToLowerAscii(FetchUtf8(s, lim));
r.SetLength(int(t - ~r));
return String(r);
}
};