mirror of
https://github.com/ultimatepp/ultimatepp.git
synced 2026-06-17 06:05:28 -06:00
220 lines
No EOL
13 KiB
C++
220 lines
No EOL
13 KiB
C++
topic "Unicode UTF[8,16,32] support";
|
|
[2 $$0,0#00000000000000000000000000000000:Default]
|
|
[i448;a25;kKO9;2 $$1,0#37138531426314131252341829483380:class]
|
|
[l288;2 $$2,2#27521748481378242620020725143825:desc]
|
|
[0 $$3,0#96390100711032703541132217272105:end]
|
|
[H6;0 $$4,0#05600065144404261032431302351956:begin]
|
|
[i448;a25;kKO9;2 $$5,0#37138531426314131252341829483370:item]
|
|
[l288;a4;*@5;1 $$6,6#70004532496200323422659154056402:requirement]
|
|
[l288;i1121;b17;O9;~~~.1408;2 $$7,0#10431211400427159095818037425705:param]
|
|
[i448;b42;O9;2 $$8,8#61672508125594000341940100500538:tparam]
|
|
[b42;2 $$9,9#13035079074754324216151401829390:normal]
|
|
[{_}
|
|
[ {{10000@(113.42.0) [s0;%% [*@7;4 Unicode UTF`[8,16,32`] support]]}}&]
|
|
[s0;%% &]
|
|
[s5;:IsUtf8Lead`(int`): [@(0.0.255) bool]_[* IsUtf8Lead]([@(0.0.255) int]_[*@3 c])&]
|
|
[s2;%% Tests whether [%-*@3 c ]is lead UTF`-8 byte.&]
|
|
[s3;%% &]
|
|
[s4; &]
|
|
[s5;:Upp`:`:FetchUtf8`(const char`*`&`,const char`*`,bool`&`): [_^Upp`:`:dword^ dword]_
|
|
[* FetchUtf8]([@(0.0.255) const]_[@(0.0.255) char]_`*`&[*@3 s], [@(0.0.255) const]_[@(0.0.255) c
|
|
har]_`*[*@3 lim], [@(0.0.255) bool`&]_[*@3 ok])&]
|
|
[s5;:Upp`:`:FetchUtf8`(const char`*`&`,const char`*`): [_^Upp`:`:dword^ dword]_[* FetchUt
|
|
f8]([@(0.0.255) const]_[@(0.0.255) char]_`*`&[*@3 s], [@(0.0.255) const]_[@(0.0.255) char]_
|
|
`*[*@3 lim])&]
|
|
[s2;%% Reads a single UTF`-32 codepoint from UTF`-8 string [%-*@3 s]
|
|
with end at [%-*@3 lim]. [%-*@3 s] must be less than [%-*@3 lim]. [%-*@3 s]
|
|
is advanced accordingly. [%-*@3 ok] is set to false if UTF`-8 is
|
|
invalid `- in that case, error`-escape of single byte is returned
|
|
(but it is NOT set to true if valid UTF`-8 character is read).&]
|
|
[s3;%% &]
|
|
[s4; &]
|
|
[s5;:Upp`:`:CheckUtf8`(const char`*`,int`): [@(0.0.255) bool]_[* CheckUtf8]([@(0.0.255) con
|
|
st]_[@(0.0.255) char]_`*[*@3 s], [@(0.0.255) int]_[*@3 len])&]
|
|
[s5;:Upp`:`:CheckUtf8`(const char`*`): [@(0.0.255) bool]_[* CheckUtf8]([@(0.0.255) const]_[@(0.0.255) c
|
|
har]_`*[*@3 s])&]
|
|
[s5;:CheckUtf8`(const String`&`): [@(0.0.255) bool]_[* CheckUtf8]([@(0.0.255) const]_[_^String^ S
|
|
tring][@(0.0.255) `&]_[*@3 src])&]
|
|
[s2;%% Checks whether string contains a valid UTF`-8 sequence. If
|
|
source is specified as pointer [%-*@3 s] without [%-*@3 len], its
|
|
must be zero`-terminated.&]
|
|
[s3;%% &]
|
|
[s4; &]
|
|
[s5;:Upp`:`:Utf8Len`(const Upp`:`:dword`*`,int`): [@(0.0.255) int]_[* Utf8Len]([@(0.0.255) c
|
|
onst]_[_^Upp`:`:dword^ dword]_`*[*@3 s], [@(0.0.255) int]_[*@3 len])&]
|
|
[s5;:Upp`:`:Utf8Len`(const Upp`:`:dword`*`): [@(0.0.255) int]_[* Utf8Len]([@(0.0.255) const
|
|
]_[_^Upp`:`:dword^ dword]_`*[*@3 s])&]
|
|
[s5;:Upp`:`:Utf8Len`(const Upp`:`:Vector`<Upp`:`:dword`>`&`): [@(0.0.255) int]_[* Utf8Len
|
|
]([@(0.0.255) const]_[_^Upp`:`:Vector^ Vector]<[_^Upp`:`:dword^ dword]>`&_[*@3 s])&]
|
|
[s2;%% Returns the size in bytes of UTF`-32 Unicode text in UTF`-8.
|
|
If source is specified as pointer [%-*@3 s] without [%-*@3 len],
|
|
its must be zero`-terminated.&]
|
|
[s3; &]
|
|
[s4; &]
|
|
[s5;:Upp`:`:Utf8Len`(const Upp`:`:wchar`*`,int`): [@(0.0.255) int]_[* Utf8Len]([@(0.0.255) c
|
|
onst]_[_^Upp`:`:wchar^ wchar]_`*[*@3 s], [@(0.0.255) int]_[*@3 len])&]
|
|
[s5;:Upp`:`:Utf8Len`(const Upp`:`:wchar`*`): [@(0.0.255) int]_[* Utf8Len]([@(0.0.255) const
|
|
]_[_^Upp`:`:wchar^ wchar]_`*[*@3 s])&]
|
|
[s5;:Upp`:`:Utf8Len`(const Upp`:`:WString`&`): [@(0.0.255) int]_[* Utf8Len]([@(0.0.255) con
|
|
st]_[_^Upp`:`:WString^ WString][@(0.0.255) `&]_[*@3 s])&]
|
|
[s2;%% Returns the size in bytes of UTF`-16 Unicode text in UTF`-8.
|
|
If source is specified as pointer [%-*@3 s] without [%-*@3 len],
|
|
its must be zero`-terminated.&]
|
|
[s3;%% &]
|
|
[s4; &]
|
|
[s5;:Upp`:`:Utf8Len`(Upp`:`:dword`): [@(0.0.255) int]_[* Utf8Len]([_^Upp`:`:dword^ dword]_[*@3 c
|
|
ode])&]
|
|
[s2;%% Returns the size in bytes of single codepoint in UTF`-8.&]
|
|
[s3;%% &]
|
|
[s4; &]
|
|
[s5;:Upp`:`:ToUtf8`(char`*`,const Upp`:`:wchar`*`,int`): [@(0.0.255) void]_[* ToUtf8]([@(0.0.255) c
|
|
har]_`*[*@3 t], [@(0.0.255) const]_[_^Upp`:`:wchar^ wchar]_`*[*@3 s],
|
|
[@(0.0.255) int]_[*@3 len])&]
|
|
[s5;:Upp`:`:ToUtf8`(const Upp`:`:wchar`*`,int`): [_^Upp`:`:String^ String]_[* ToUtf8]([@(0.0.255) c
|
|
onst]_[_^Upp`:`:wchar^ wchar]_`*[*@3 s], [@(0.0.255) int]_[*@3 len])&]
|
|
[s5;:Upp`:`:ToUtf8`(const Upp`:`:wchar`*`): [_^Upp`:`:String^ String]_[* ToUtf8]([@(0.0.255) c
|
|
onst]_[_^Upp`:`:wchar^ wchar]_`*[*@3 s])&]
|
|
[s5;:Upp`:`:ToUtf8`(const Upp`:`:WString`&`): [_^Upp`:`:String^ String]_[* ToUtf8]([@(0.0.255) c
|
|
onst]_[_^Upp`:`:WString^ WString][@(0.0.255) `&]_[*@3 s])&]
|
|
[s2;%% UTF`-16 to UTF`-8 conversion. If target is specified as pointer
|
|
to buffer [%-*@3 t], the buffer must contain enough space for the
|
|
output. If source is specified as pointer [%-*@3 s] without [%-*@3 len],
|
|
its must be zero`-terminated.&]
|
|
[s3;%% &]
|
|
[s4; &]
|
|
[s5;:Upp`:`:ToUtf8`(char`*`,const Upp`:`:dword`*`,int`): [@(0.0.255) void]_[* ToUtf8]([@(0.0.255) c
|
|
har]_`*[*@3 t], [@(0.0.255) const]_[_^Upp`:`:dword^ dword]_`*[*@3 s],
|
|
[@(0.0.255) int]_[*@3 len])&]
|
|
[s5;:Upp`:`:ToUtf8`(const Upp`:`:dword`*`,int`): [_^Upp`:`:String^ String]_[* ToUtf8]([@(0.0.255) c
|
|
onst]_[_^Upp`:`:dword^ dword]_`*[*@3 s], [@(0.0.255) int]_[*@3 len])&]
|
|
[s5;:Upp`:`:ToUtf8`(const Upp`:`:dword`*`): [_^Upp`:`:String^ String]_[* ToUtf8]([@(0.0.255) c
|
|
onst]_[_^Upp`:`:dword^ dword]_`*[*@3 s])&]
|
|
[s5;:Upp`:`:ToUtf8`(const Upp`:`:Vector`<Upp`:`:dword`>`&`): [_^Upp`:`:String^ String]_
|
|
[* ToUtf8]([@(0.0.255) const]_[_^Upp`:`:Vector^ Vector]<[_^Upp`:`:dword^ dword]>`&_[*@3 s])
|
|
&]
|
|
[s2;%% UTF`-32 to UTF`-8 conversion. If target is specified as pointer
|
|
to buffer [%-*@3 t], the buffer must contain enough space for the
|
|
output. If source is specified as pointer [%-*@3 s] without [%-*@3 len],
|
|
its must be zero`-terminated.&]
|
|
[s3;%% &]
|
|
[s4; &]
|
|
[s5;:Upp`:`:ToUtf8`(Upp`:`:dword`): [_^Upp`:`:String^ String]_[* ToUtf8]([_^Upp`:`:dword^ d
|
|
word]_[*@3 code])&]
|
|
[s2;%% Converts single codepoint to UTF`-8.&]
|
|
[s3; &]
|
|
[s4; &]
|
|
[s5;:Upp`:`:Utf16Len`(const Upp`:`:dword`*`,int`): [@(0.0.255) int]_[* Utf16Len]([@(0.0.255) c
|
|
onst]_[_^Upp`:`:dword^ dword]_`*[*@3 s], [@(0.0.255) int]_[*@3 len])&]
|
|
[s5;:Upp`:`:Utf16Len`(const Upp`:`:dword`*`): [@(0.0.255) int]_[* Utf16Len]([@(0.0.255) con
|
|
st]_[_^Upp`:`:dword^ dword]_`*[*@3 s])&]
|
|
[s5;:Upp`:`:Utf16Len`(const Upp`:`:Vector`<Upp`:`:dword`>`&`): [@(0.0.255) int]_[* Utf16L
|
|
en]([@(0.0.255) const]_[_^Upp`:`:Vector^ Vector]<[_^Upp`:`:dword^ dword]>`&_[*@3 s])&]
|
|
[s2;%% Returns the size in wchars of UTF`-32 Unicode text in UTF`-16.
|
|
If source is specified as pointer [%-*@3 s] without [%-*@3 len],
|
|
its must be zero`-terminated.&]
|
|
[s3; &]
|
|
[s4; &]
|
|
[s5;:Upp`:`:Utf16Len`(Upp`:`:dword`): [@(0.0.255) int]_[* Utf16Len]([_^Upp`:`:dword^ dword]_
|
|
[*@3 code])&]
|
|
[s2;%% Returns the size in wchars of single codepoint in UTF`-16.&]
|
|
[s3;%% &]
|
|
[s4; &]
|
|
[s5;:Upp`:`:Utf16Len`(const char`*`,int`): [@(0.0.255) int]_[* Utf16Len]([@(0.0.255) const]_
|
|
[@(0.0.255) char]_`*[*@3 s], [@(0.0.255) int]_[*@3 len])&]
|
|
[s5;:Upp`:`:Utf16Len`(const char`*`): [@(0.0.255) int]_[* Utf16Len]([@(0.0.255) const]_[@(0.0.255) c
|
|
har]_`*[*@3 s])&]
|
|
[s5;:Upp`:`:Utf16Len`(const Upp`:`:String`&`): [@(0.0.255) int]_[* Utf16Len]([@(0.0.255) co
|
|
nst]_[_^Upp`:`:String^ String][@(0.0.255) `&]_[*@3 s])&]
|
|
[s2;%% Returns the size in wchars of UTF`-8 Unicode text in UTF`-16.
|
|
If source is specified as pointer [%-*@3 s] without [%-*@3 len],
|
|
its must be zero`-terminated.&]
|
|
[s3;%% &]
|
|
[s4; &]
|
|
[s5;:Upp`:`:ToUtf16`(Upp`:`:wchar`*`,const Upp`:`:dword`*`,int`): [@(0.0.255) void]_[* To
|
|
Utf16]([_^Upp`:`:wchar^ wchar]_`*[*@3 t], [@(0.0.255) const]_[_^Upp`:`:dword^ dword]_`*[*@3 s
|
|
], [@(0.0.255) int]_[*@3 len])&]
|
|
[s5;:Upp`:`:ToUtf16`(const Upp`:`:dword`*`,int`): [_^Upp`:`:WString^ WString]_[* ToUtf16](
|
|
[@(0.0.255) const]_[_^Upp`:`:dword^ dword]_`*[*@3 s], [@(0.0.255) int]_[*@3 len])&]
|
|
[s5;:Upp`:`:ToUtf16`(const Upp`:`:dword`*`): [_^Upp`:`:WString^ WString]_[* ToUtf16]([@(0.0.255) c
|
|
onst]_[_^Upp`:`:dword^ dword]_`*[*@3 s])&]
|
|
[s5;:Upp`:`:ToUtf16`(const Upp`:`:Vector`<Upp`:`:dword`>`&`): [_^Upp`:`:WString^ WStrin
|
|
g]_[* ToUtf16]([@(0.0.255) const]_[_^Upp`:`:Vector^ Vector]<[_^Upp`:`:dword^ dword]>`&_[*@3 s
|
|
])&]
|
|
[s2;%% [%- UTF`-32 to UTF`-16 conversion.] If target is specified as
|
|
pointer to buffer [%-*@3 t], the buffer must contain enough space
|
|
for the output. If source is specified as pointer [%-*@3 s] without
|
|
[%-*@3 len], its must be zero`-terminated.&]
|
|
[s3; &]
|
|
[s4; &]
|
|
[s5;:Upp`:`:ToUtf16`(Upp`:`:dword`): [_^Upp`:`:WString^ WString]_[* ToUtf16]([_^Upp`:`:dword^ d
|
|
word]_[*@3 code])&]
|
|
[s2;%% Converts single codepoint to UTF`-16.&]
|
|
[s3;%% &]
|
|
[s4; &]
|
|
[s5;:Upp`:`:ToUtf16`(Upp`:`:wchar`*`,const char`*`,int`): [@(0.0.255) void]_[* ToUtf16]([_^Upp`:`:wchar^ w
|
|
char]_`*[*@3 t], [@(0.0.255) const]_[@(0.0.255) char]_`*[*@3 s], [@(0.0.255) int]_[*@3 len])&]
|
|
[s5;:Upp`:`:ToUtf16`(const char`*`,int`): [_^Upp`:`:WString^ WString]_[* ToUtf16]([@(0.0.255) c
|
|
onst]_[@(0.0.255) char]_`*[*@3 s], [@(0.0.255) int]_[*@3 len])&]
|
|
[s5;:Upp`:`:ToUtf16`(const char`*`): [_^Upp`:`:WString^ WString]_[* ToUtf16]([@(0.0.255) co
|
|
nst]_[@(0.0.255) char]_`*[*@3 s])&]
|
|
[s5;:Upp`:`:ToUtf16`(const Upp`:`:String`&`): [_^Upp`:`:WString^ WString]_[* ToUtf16]([@(0.0.255) c
|
|
onst]_[_^Upp`:`:String^ String][@(0.0.255) `&]_[*@3 s])&]
|
|
[s2;%% UTF`-8 to UTF`-16 conversion. If target is specified as pointer
|
|
to buffer [%-*@3 t], the buffer must contain enough space for the
|
|
output. If source is specified as pointer [%-*@3 s] without [%-*@3 len],
|
|
its must be zero`-terminated.&]
|
|
[s3;%% &]
|
|
[s4; &]
|
|
[s5;:Upp`:`:Utf32Len`(const Upp`:`:wchar`*`,int`): [@(0.0.255) int]_[* Utf32Len]([@(0.0.255) c
|
|
onst]_[_^Upp`:`:wchar^ wchar]_`*[*@3 s], [@(0.0.255) int]_[*@3 len])&]
|
|
[s5;:Upp`:`:Utf32Len`(const Upp`:`:wchar`*`): [@(0.0.255) int]_[* Utf32Len]([@(0.0.255) con
|
|
st]_[_^Upp`:`:wchar^ wchar]_`*[*@3 s])&]
|
|
[s5;:Upp`:`:Utf32Len`(const Upp`:`:WString`&`): [@(0.0.255) int]_[* Utf32Len]([@(0.0.255) c
|
|
onst]_[_^Upp`:`:WString^ WString][@(0.0.255) `&]_[*@3 s])&]
|
|
[s2;%% Returns the size in dwords of UTF`-16 Unicode text in UTF`-32.
|
|
Note that this is the same as the number of Unicode codepoints
|
|
in the text. If source is specified as pointer [%-*@3 s] without
|
|
[%-*@3 len], its must be zero`-terminated.&]
|
|
[s3;%% &]
|
|
[s4; &]
|
|
[s5;:Upp`:`:Utf32Len`(const char`*`,int`): [@(0.0.255) int]_[* Utf32Len]([@(0.0.255) const]_
|
|
[@(0.0.255) char]_`*[*@3 s], [@(0.0.255) int]_[*@3 len])&]
|
|
[s5;:Upp`:`:Utf32Len`(const char`*`): [@(0.0.255) int]_[* Utf32Len]([@(0.0.255) const]_[@(0.0.255) c
|
|
har]_`*[*@3 s])&]
|
|
[s5;:Upp`:`:Utf32Len`(const Upp`:`:String`&`): [@(0.0.255) int]_[* Utf32Len]([@(0.0.255) co
|
|
nst]_[_^Upp`:`:String^ String][@(0.0.255) `&]_[*@3 s])&]
|
|
[s2;%% Returns the size in dwords of UTF`-8 Unicode text in UTF`-32.
|
|
Note that this is the same as the number of Unicode codepoints
|
|
in the text. If source is specified as pointer [%-*@3 s] without
|
|
[%-*@3 len], its must be zero`-terminated.&]
|
|
[s3;%% &]
|
|
[s4; &]
|
|
[s5;:Upp`:`:ToUtf32`(Upp`:`:dword`*`,const Upp`:`:wchar`*`,int`): [@(0.0.255) void]_[* To
|
|
Utf32]([_^Upp`:`:dword^ dword]_`*[*@3 t], [@(0.0.255) const]_[_^Upp`:`:wchar^ wchar]_`*[*@3 s
|
|
], [@(0.0.255) int]_[*@3 len])&]
|
|
[s5;:Upp`:`:ToUtf32`(const Upp`:`:wchar`*`,int`): [_^Upp`:`:Vector^ Vector]<[_^Upp`:`:dword^ d
|
|
word]>_[* ToUtf32]([@(0.0.255) const]_[_^Upp`:`:wchar^ wchar]_`*[*@3 s],
|
|
[@(0.0.255) int]_[*@3 len])&]
|
|
[s5;:Upp`:`:ToUtf32`(const Upp`:`:wchar`*`): [_^Upp`:`:Vector^ Vector]<[_^Upp`:`:dword^ d
|
|
word]>_[* ToUtf32]([@(0.0.255) const]_[_^Upp`:`:wchar^ wchar]_`*[*@3 s])&]
|
|
[s5;:Upp`:`:ToUtf32`(const Upp`:`:WString`&`): [_^Upp`:`:Vector^ Vector]<[_^Upp`:`:dword^ d
|
|
word]>_[* ToUtf32]([@(0.0.255) const]_[_^Upp`:`:WString^ WString][@(0.0.255) `&]_[*@3 s])&]
|
|
[s2;%% UTF`-16 to UTF`-32 conversion. If target is specified as pointer
|
|
to buffer [%-*@3 t], the buffer must contain enough space for the
|
|
output. If source is specified as pointer [%-*@3 s] without [%-*@3 len],
|
|
its must be zero`-terminated.&]
|
|
[s3;%% &]
|
|
[s4; &]
|
|
[s5;:Upp`:`:ToUtf32`(Upp`:`:dword`*`,const char`*`,int`): [@(0.0.255) void]_[* ToUtf32]([_^Upp`:`:dword^ d
|
|
word]_`*[*@3 t], [@(0.0.255) const]_[@(0.0.255) char]_`*[*@3 s], [@(0.0.255) int]_[*@3 len])&]
|
|
[s5;:Upp`:`:ToUtf32`(const char`*`,int`): [_^Upp`:`:Vector^ Vector]<[_^Upp`:`:dword^ dwor
|
|
d]>_[* ToUtf32]([@(0.0.255) const]_[@(0.0.255) char]_`*[*@3 s], [@(0.0.255) int]_[*@3 len])&]
|
|
[s5;:Upp`:`:ToUtf32`(const char`*`): [_^Upp`:`:Vector^ Vector]<[_^Upp`:`:dword^ dword]>_[* T
|
|
oUtf32]([@(0.0.255) const]_[@(0.0.255) char]_`*[*@3 s])&]
|
|
[s5;:Upp`:`:ToUtf32`(const Upp`:`:String`&`): [_^Upp`:`:Vector^ Vector]<[_^Upp`:`:dword^ d
|
|
word]>_[* ToUtf32]([@(0.0.255) const]_[_^Upp`:`:String^ String][@(0.0.255) `&]_[*@3 s])&]
|
|
[s2;%% UTF`-8 to UTF`-32 conversion. If target is specified as pointer
|
|
to buffer [%-*@3 t], the buffer must contain enough space for the
|
|
output. If source is specified as pointer [%-*@3 s] without [%-*@3 len],
|
|
its must be zero`-terminated.&]
|
|
[s3;%% ]] |