ultimatepp/uppsrc/Core/src.tpp/Utf_en-us.tpp
cxl a90ad2a80f uppsrc: Topic Language Separator change (more)
git-svn-id: svn://ultimatepp.org/upp/trunk@11510 f0d560ea-af0d-0410-9eb7-867de7ffcac7
2017-12-02 13:00:28 +00:00

220 lines
No EOL
13 KiB
C++

topic "Unicode UTF[8,16,32] support";
[2 $$0,0#00000000000000000000000000000000:Default]
[i448;a25;kKO9;2 $$1,0#37138531426314131252341829483380:class]
[l288;2 $$2,2#27521748481378242620020725143825:desc]
[0 $$3,0#96390100711032703541132217272105:end]
[H6;0 $$4,0#05600065144404261032431302351956:begin]
[i448;a25;kKO9;2 $$5,0#37138531426314131252341829483370:item]
[l288;a4;*@5;1 $$6,6#70004532496200323422659154056402:requirement]
[l288;i1121;b17;O9;~~~.1408;2 $$7,0#10431211400427159095818037425705:param]
[i448;b42;O9;2 $$8,8#61672508125594000341940100500538:tparam]
[b42;2 $$9,9#13035079074754324216151401829390:normal]
[{_}
[ {{10000@(113.42.0) [s0;%% [*@7;4 Unicode UTF`[8,16,32`] support]]}}&]
[s0;%% &]
[s5;:IsUtf8Lead`(int`): [@(0.0.255) bool]_[* IsUtf8Lead]([@(0.0.255) int]_[*@3 c])&]
[s2;%% Tests whether [%-*@3 c ]is lead UTF`-8 byte.&]
[s3;%% &]
[s4; &]
[s5;:Upp`:`:FetchUtf8`(const char`*`&`,const char`*`,bool`&`): [_^Upp`:`:dword^ dword]_
[* FetchUtf8]([@(0.0.255) const]_[@(0.0.255) char]_`*`&[*@3 s], [@(0.0.255) const]_[@(0.0.255) c
har]_`*[*@3 lim], [@(0.0.255) bool`&]_[*@3 ok])&]
[s5;:Upp`:`:FetchUtf8`(const char`*`&`,const char`*`): [_^Upp`:`:dword^ dword]_[* FetchUt
f8]([@(0.0.255) const]_[@(0.0.255) char]_`*`&[*@3 s], [@(0.0.255) const]_[@(0.0.255) char]_
`*[*@3 lim])&]
[s2;%% Reads a single UTF`-32 codepoint from UTF`-8 string [%-*@3 s]
with end at [%-*@3 lim]. [%-*@3 s] must be less than [%-*@3 lim]. [%-*@3 s]
is advanced accordingly. [%-*@3 ok] is set to false if UTF`-8 is
invalid `- in that case, error`-escape of single byte is returned
(but it is NOT set to true if valid UTF`-8 character is read).&]
[s3;%% &]
[s4; &]
[s5;:Upp`:`:CheckUtf8`(const char`*`,int`): [@(0.0.255) bool]_[* CheckUtf8]([@(0.0.255) con
st]_[@(0.0.255) char]_`*[*@3 s], [@(0.0.255) int]_[*@3 len])&]
[s5;:Upp`:`:CheckUtf8`(const char`*`): [@(0.0.255) bool]_[* CheckUtf8]([@(0.0.255) const]_[@(0.0.255) c
har]_`*[*@3 s])&]
[s5;:CheckUtf8`(const String`&`): [@(0.0.255) bool]_[* CheckUtf8]([@(0.0.255) const]_[_^String^ S
tring][@(0.0.255) `&]_[*@3 src])&]
[s2;%% Checks whether string contains a valid UTF`-8 sequence. If
source is specified as pointer [%-*@3 s] without [%-*@3 len], its
must be zero`-terminated.&]
[s3;%% &]
[s4; &]
[s5;:Upp`:`:Utf8Len`(const Upp`:`:dword`*`,int`): [@(0.0.255) int]_[* Utf8Len]([@(0.0.255) c
onst]_[_^Upp`:`:dword^ dword]_`*[*@3 s], [@(0.0.255) int]_[*@3 len])&]
[s5;:Upp`:`:Utf8Len`(const Upp`:`:dword`*`): [@(0.0.255) int]_[* Utf8Len]([@(0.0.255) const
]_[_^Upp`:`:dword^ dword]_`*[*@3 s])&]
[s5;:Upp`:`:Utf8Len`(const Upp`:`:Vector`<Upp`:`:dword`>`&`): [@(0.0.255) int]_[* Utf8Len
]([@(0.0.255) const]_[_^Upp`:`:Vector^ Vector]<[_^Upp`:`:dword^ dword]>`&_[*@3 s])&]
[s2;%% Returns the size in bytes of UTF`-32 Unicode text in UTF`-8.
If source is specified as pointer [%-*@3 s] without [%-*@3 len],
its must be zero`-terminated.&]
[s3; &]
[s4; &]
[s5;:Upp`:`:Utf8Len`(const Upp`:`:wchar`*`,int`): [@(0.0.255) int]_[* Utf8Len]([@(0.0.255) c
onst]_[_^Upp`:`:wchar^ wchar]_`*[*@3 s], [@(0.0.255) int]_[*@3 len])&]
[s5;:Upp`:`:Utf8Len`(const Upp`:`:wchar`*`): [@(0.0.255) int]_[* Utf8Len]([@(0.0.255) const
]_[_^Upp`:`:wchar^ wchar]_`*[*@3 s])&]
[s5;:Upp`:`:Utf8Len`(const Upp`:`:WString`&`): [@(0.0.255) int]_[* Utf8Len]([@(0.0.255) con
st]_[_^Upp`:`:WString^ WString][@(0.0.255) `&]_[*@3 s])&]
[s2;%% Returns the size in bytes of UTF`-16 Unicode text in UTF`-8.
If source is specified as pointer [%-*@3 s] without [%-*@3 len],
its must be zero`-terminated.&]
[s3;%% &]
[s4; &]
[s5;:Upp`:`:Utf8Len`(Upp`:`:dword`): [@(0.0.255) int]_[* Utf8Len]([_^Upp`:`:dword^ dword]_[*@3 c
ode])&]
[s2;%% Returns the size in bytes of single codepoint in UTF`-8.&]
[s3;%% &]
[s4; &]
[s5;:Upp`:`:ToUtf8`(char`*`,const Upp`:`:wchar`*`,int`): [@(0.0.255) void]_[* ToUtf8]([@(0.0.255) c
har]_`*[*@3 t], [@(0.0.255) const]_[_^Upp`:`:wchar^ wchar]_`*[*@3 s],
[@(0.0.255) int]_[*@3 len])&]
[s5;:Upp`:`:ToUtf8`(const Upp`:`:wchar`*`,int`): [_^Upp`:`:String^ String]_[* ToUtf8]([@(0.0.255) c
onst]_[_^Upp`:`:wchar^ wchar]_`*[*@3 s], [@(0.0.255) int]_[*@3 len])&]
[s5;:Upp`:`:ToUtf8`(const Upp`:`:wchar`*`): [_^Upp`:`:String^ String]_[* ToUtf8]([@(0.0.255) c
onst]_[_^Upp`:`:wchar^ wchar]_`*[*@3 s])&]
[s5;:Upp`:`:ToUtf8`(const Upp`:`:WString`&`): [_^Upp`:`:String^ String]_[* ToUtf8]([@(0.0.255) c
onst]_[_^Upp`:`:WString^ WString][@(0.0.255) `&]_[*@3 s])&]
[s2;%% UTF`-16 to UTF`-8 conversion. If target is specified as pointer
to buffer [%-*@3 t], the buffer must contain enough space for the
output. If source is specified as pointer [%-*@3 s] without [%-*@3 len],
its must be zero`-terminated.&]
[s3;%% &]
[s4; &]
[s5;:Upp`:`:ToUtf8`(char`*`,const Upp`:`:dword`*`,int`): [@(0.0.255) void]_[* ToUtf8]([@(0.0.255) c
har]_`*[*@3 t], [@(0.0.255) const]_[_^Upp`:`:dword^ dword]_`*[*@3 s],
[@(0.0.255) int]_[*@3 len])&]
[s5;:Upp`:`:ToUtf8`(const Upp`:`:dword`*`,int`): [_^Upp`:`:String^ String]_[* ToUtf8]([@(0.0.255) c
onst]_[_^Upp`:`:dword^ dword]_`*[*@3 s], [@(0.0.255) int]_[*@3 len])&]
[s5;:Upp`:`:ToUtf8`(const Upp`:`:dword`*`): [_^Upp`:`:String^ String]_[* ToUtf8]([@(0.0.255) c
onst]_[_^Upp`:`:dword^ dword]_`*[*@3 s])&]
[s5;:Upp`:`:ToUtf8`(const Upp`:`:Vector`<Upp`:`:dword`>`&`): [_^Upp`:`:String^ String]_
[* ToUtf8]([@(0.0.255) const]_[_^Upp`:`:Vector^ Vector]<[_^Upp`:`:dword^ dword]>`&_[*@3 s])
&]
[s2;%% UTF`-32 to UTF`-8 conversion. If target is specified as pointer
to buffer [%-*@3 t], the buffer must contain enough space for the
output. If source is specified as pointer [%-*@3 s] without [%-*@3 len],
its must be zero`-terminated.&]
[s3;%% &]
[s4; &]
[s5;:Upp`:`:ToUtf8`(Upp`:`:dword`): [_^Upp`:`:String^ String]_[* ToUtf8]([_^Upp`:`:dword^ d
word]_[*@3 code])&]
[s2;%% Converts single codepoint to UTF`-8.&]
[s3; &]
[s4; &]
[s5;:Upp`:`:Utf16Len`(const Upp`:`:dword`*`,int`): [@(0.0.255) int]_[* Utf16Len]([@(0.0.255) c
onst]_[_^Upp`:`:dword^ dword]_`*[*@3 s], [@(0.0.255) int]_[*@3 len])&]
[s5;:Upp`:`:Utf16Len`(const Upp`:`:dword`*`): [@(0.0.255) int]_[* Utf16Len]([@(0.0.255) con
st]_[_^Upp`:`:dword^ dword]_`*[*@3 s])&]
[s5;:Upp`:`:Utf16Len`(const Upp`:`:Vector`<Upp`:`:dword`>`&`): [@(0.0.255) int]_[* Utf16L
en]([@(0.0.255) const]_[_^Upp`:`:Vector^ Vector]<[_^Upp`:`:dword^ dword]>`&_[*@3 s])&]
[s2;%% Returns the size in wchars of UTF`-32 Unicode text in UTF`-16.
If source is specified as pointer [%-*@3 s] without [%-*@3 len],
its must be zero`-terminated.&]
[s3; &]
[s4; &]
[s5;:Upp`:`:Utf16Len`(Upp`:`:dword`): [@(0.0.255) int]_[* Utf16Len]([_^Upp`:`:dword^ dword]_
[*@3 code])&]
[s2;%% Returns the size in wchars of single codepoint in UTF`-16.&]
[s3;%% &]
[s4; &]
[s5;:Upp`:`:Utf16Len`(const char`*`,int`): [@(0.0.255) int]_[* Utf16Len]([@(0.0.255) const]_
[@(0.0.255) char]_`*[*@3 s], [@(0.0.255) int]_[*@3 len])&]
[s5;:Upp`:`:Utf16Len`(const char`*`): [@(0.0.255) int]_[* Utf16Len]([@(0.0.255) const]_[@(0.0.255) c
har]_`*[*@3 s])&]
[s5;:Upp`:`:Utf16Len`(const Upp`:`:String`&`): [@(0.0.255) int]_[* Utf16Len]([@(0.0.255) co
nst]_[_^Upp`:`:String^ String][@(0.0.255) `&]_[*@3 s])&]
[s2;%% Returns the size in wchars of UTF`-8 Unicode text in UTF`-16.
If source is specified as pointer [%-*@3 s] without [%-*@3 len],
its must be zero`-terminated.&]
[s3;%% &]
[s4; &]
[s5;:Upp`:`:ToUtf16`(Upp`:`:wchar`*`,const Upp`:`:dword`*`,int`): [@(0.0.255) void]_[* To
Utf16]([_^Upp`:`:wchar^ wchar]_`*[*@3 t], [@(0.0.255) const]_[_^Upp`:`:dword^ dword]_`*[*@3 s
], [@(0.0.255) int]_[*@3 len])&]
[s5;:Upp`:`:ToUtf16`(const Upp`:`:dword`*`,int`): [_^Upp`:`:WString^ WString]_[* ToUtf16](
[@(0.0.255) const]_[_^Upp`:`:dword^ dword]_`*[*@3 s], [@(0.0.255) int]_[*@3 len])&]
[s5;:Upp`:`:ToUtf16`(const Upp`:`:dword`*`): [_^Upp`:`:WString^ WString]_[* ToUtf16]([@(0.0.255) c
onst]_[_^Upp`:`:dword^ dword]_`*[*@3 s])&]
[s5;:Upp`:`:ToUtf16`(const Upp`:`:Vector`<Upp`:`:dword`>`&`): [_^Upp`:`:WString^ WStrin
g]_[* ToUtf16]([@(0.0.255) const]_[_^Upp`:`:Vector^ Vector]<[_^Upp`:`:dword^ dword]>`&_[*@3 s
])&]
[s2;%% [%- UTF`-32 to UTF`-16 conversion.] If target is specified as
pointer to buffer [%-*@3 t], the buffer must contain enough space
for the output. If source is specified as pointer [%-*@3 s] without
[%-*@3 len], its must be zero`-terminated.&]
[s3; &]
[s4; &]
[s5;:Upp`:`:ToUtf16`(Upp`:`:dword`): [_^Upp`:`:WString^ WString]_[* ToUtf16]([_^Upp`:`:dword^ d
word]_[*@3 code])&]
[s2;%% Converts single codepoint to UTF`-16.&]
[s3;%% &]
[s4; &]
[s5;:Upp`:`:ToUtf16`(Upp`:`:wchar`*`,const char`*`,int`): [@(0.0.255) void]_[* ToUtf16]([_^Upp`:`:wchar^ w
char]_`*[*@3 t], [@(0.0.255) const]_[@(0.0.255) char]_`*[*@3 s], [@(0.0.255) int]_[*@3 len])&]
[s5;:Upp`:`:ToUtf16`(const char`*`,int`): [_^Upp`:`:WString^ WString]_[* ToUtf16]([@(0.0.255) c
onst]_[@(0.0.255) char]_`*[*@3 s], [@(0.0.255) int]_[*@3 len])&]
[s5;:Upp`:`:ToUtf16`(const char`*`): [_^Upp`:`:WString^ WString]_[* ToUtf16]([@(0.0.255) co
nst]_[@(0.0.255) char]_`*[*@3 s])&]
[s5;:Upp`:`:ToUtf16`(const Upp`:`:String`&`): [_^Upp`:`:WString^ WString]_[* ToUtf16]([@(0.0.255) c
onst]_[_^Upp`:`:String^ String][@(0.0.255) `&]_[*@3 s])&]
[s2;%% UTF`-8 to UTF`-16 conversion. If target is specified as pointer
to buffer [%-*@3 t], the buffer must contain enough space for the
output. If source is specified as pointer [%-*@3 s] without [%-*@3 len],
its must be zero`-terminated.&]
[s3;%% &]
[s4; &]
[s5;:Upp`:`:Utf32Len`(const Upp`:`:wchar`*`,int`): [@(0.0.255) int]_[* Utf32Len]([@(0.0.255) c
onst]_[_^Upp`:`:wchar^ wchar]_`*[*@3 s], [@(0.0.255) int]_[*@3 len])&]
[s5;:Upp`:`:Utf32Len`(const Upp`:`:wchar`*`): [@(0.0.255) int]_[* Utf32Len]([@(0.0.255) con
st]_[_^Upp`:`:wchar^ wchar]_`*[*@3 s])&]
[s5;:Upp`:`:Utf32Len`(const Upp`:`:WString`&`): [@(0.0.255) int]_[* Utf32Len]([@(0.0.255) c
onst]_[_^Upp`:`:WString^ WString][@(0.0.255) `&]_[*@3 s])&]
[s2;%% Returns the size in dwords of UTF`-16 Unicode text in UTF`-32.
Note that this is the same as the number of Unicode codepoints
in the text. If source is specified as pointer [%-*@3 s] without
[%-*@3 len], its must be zero`-terminated.&]
[s3;%% &]
[s4; &]
[s5;:Upp`:`:Utf32Len`(const char`*`,int`): [@(0.0.255) int]_[* Utf32Len]([@(0.0.255) const]_
[@(0.0.255) char]_`*[*@3 s], [@(0.0.255) int]_[*@3 len])&]
[s5;:Upp`:`:Utf32Len`(const char`*`): [@(0.0.255) int]_[* Utf32Len]([@(0.0.255) const]_[@(0.0.255) c
har]_`*[*@3 s])&]
[s5;:Upp`:`:Utf32Len`(const Upp`:`:String`&`): [@(0.0.255) int]_[* Utf32Len]([@(0.0.255) co
nst]_[_^Upp`:`:String^ String][@(0.0.255) `&]_[*@3 s])&]
[s2;%% Returns the size in dwords of UTF`-8 Unicode text in UTF`-32.
Note that this is the same as the number of Unicode codepoints
in the text. If source is specified as pointer [%-*@3 s] without
[%-*@3 len], its must be zero`-terminated.&]
[s3;%% &]
[s4; &]
[s5;:Upp`:`:ToUtf32`(Upp`:`:dword`*`,const Upp`:`:wchar`*`,int`): [@(0.0.255) void]_[* To
Utf32]([_^Upp`:`:dword^ dword]_`*[*@3 t], [@(0.0.255) const]_[_^Upp`:`:wchar^ wchar]_`*[*@3 s
], [@(0.0.255) int]_[*@3 len])&]
[s5;:Upp`:`:ToUtf32`(const Upp`:`:wchar`*`,int`): [_^Upp`:`:Vector^ Vector]<[_^Upp`:`:dword^ d
word]>_[* ToUtf32]([@(0.0.255) const]_[_^Upp`:`:wchar^ wchar]_`*[*@3 s],
[@(0.0.255) int]_[*@3 len])&]
[s5;:Upp`:`:ToUtf32`(const Upp`:`:wchar`*`): [_^Upp`:`:Vector^ Vector]<[_^Upp`:`:dword^ d
word]>_[* ToUtf32]([@(0.0.255) const]_[_^Upp`:`:wchar^ wchar]_`*[*@3 s])&]
[s5;:Upp`:`:ToUtf32`(const Upp`:`:WString`&`): [_^Upp`:`:Vector^ Vector]<[_^Upp`:`:dword^ d
word]>_[* ToUtf32]([@(0.0.255) const]_[_^Upp`:`:WString^ WString][@(0.0.255) `&]_[*@3 s])&]
[s2;%% UTF`-16 to UTF`-32 conversion. If target is specified as pointer
to buffer [%-*@3 t], the buffer must contain enough space for the
output. If source is specified as pointer [%-*@3 s] without [%-*@3 len],
its must be zero`-terminated.&]
[s3;%% &]
[s4; &]
[s5;:Upp`:`:ToUtf32`(Upp`:`:dword`*`,const char`*`,int`): [@(0.0.255) void]_[* ToUtf32]([_^Upp`:`:dword^ d
word]_`*[*@3 t], [@(0.0.255) const]_[@(0.0.255) char]_`*[*@3 s], [@(0.0.255) int]_[*@3 len])&]
[s5;:Upp`:`:ToUtf32`(const char`*`,int`): [_^Upp`:`:Vector^ Vector]<[_^Upp`:`:dword^ dwor
d]>_[* ToUtf32]([@(0.0.255) const]_[@(0.0.255) char]_`*[*@3 s], [@(0.0.255) int]_[*@3 len])&]
[s5;:Upp`:`:ToUtf32`(const char`*`): [_^Upp`:`:Vector^ Vector]<[_^Upp`:`:dword^ dword]>_[* T
oUtf32]([@(0.0.255) const]_[@(0.0.255) char]_`*[*@3 s])&]
[s5;:Upp`:`:ToUtf32`(const Upp`:`:String`&`): [_^Upp`:`:Vector^ Vector]<[_^Upp`:`:dword^ d
word]>_[* ToUtf32]([@(0.0.255) const]_[_^Upp`:`:String^ String][@(0.0.255) `&]_[*@3 s])&]
[s2;%% UTF`-8 to UTF`-32 conversion. If target is specified as pointer
to buffer [%-*@3 t], the buffer must contain enough space for the
output. If source is specified as pointer [%-*@3 s] without [%-*@3 len],
its must be zero`-terminated.&]
[s3;%% ]]