mirror of
https://github.com/ultimatepp/ultimatepp.git
synced 2026-05-15 14:16:07 -06:00
94 lines
2.3 KiB
C++
94 lines
2.3 KiB
C++
#include <Core/Core.h>
|
|
|
|
using namespace Upp;
|
|
|
|
void ValidateUtf8(const String& src, int begin, int end, int pos)
|
|
{
|
|
// Checks for malformed, imcomplete and overlong UTF-8 sequences.
|
|
// Replaces each malformed/illegal byte with the replacement character. (Recommended method)
|
|
// The length of the original line and decoded/replaced line MUST be equal for these sequences.
|
|
|
|
String dest;
|
|
StringStream ss(src);
|
|
bool check_utf8 = pos >= begin && pos < end;
|
|
while(!ss.IsEof()) {
|
|
int c = check_utf8 ? ss.GetUtf8() : ss.Get();
|
|
dest.Cat(c < 0 ? 0xFFFD : c);
|
|
}
|
|
if(check_utf8)
|
|
ASSERT(dest.GetLength() == src.GetLength());
|
|
}
|
|
|
|
|
|
void CheckLine(const String& l)
|
|
{
|
|
StringStream ss(l);
|
|
WString q;
|
|
while(!ss.IsEof())
|
|
q.Cat(ss.GetUtf8());
|
|
if(ss.IsOK() != CheckUtf8(l)) {
|
|
DDUMP(AsCString(l));
|
|
DDUMP(ss.IsOK());
|
|
DDUMP(CheckUtf8(l));
|
|
}
|
|
ASSERT(ss.IsOK() == CheckUtf8(l));
|
|
if(ss.IsOK())
|
|
ASSERT(ToUtf8(q) == l);
|
|
}
|
|
|
|
CONSOLE_APP_MAIN
|
|
{
|
|
StdLogSetup(LOG_COUT|LOG_FILE);
|
|
FileIn in(GetDataFile("utf8_stress_test.txt"));
|
|
|
|
/* in case something does not work
|
|
char h[] = "2.1.5 5 bytes (U-00200000): \"øˆ€€€\" |";
|
|
String q(h, sizeof(h));
|
|
DDUMPHEX(q);
|
|
CheckUtf8(q);
|
|
CheckLine(q);
|
|
*/
|
|
|
|
while(!in.IsEof()) {
|
|
String l = in.GetLine();
|
|
CheckLine(l);
|
|
}
|
|
|
|
for(int pass = 0; pass < 2; pass++)
|
|
for(int i = 0; i < 0x110000; i++) {
|
|
if(i >= 0xee00 && i <= 0xeeff) // skip error escapes
|
|
continue;
|
|
WString q = "Test ";
|
|
q.Cat(i);
|
|
if(pass)
|
|
q.Cat(" ..");
|
|
String s = ToUtf8(q);
|
|
StringStream ss(s);
|
|
WString qq;
|
|
while(!ss.IsEof())
|
|
qq.Cat(ss.GetUtf8());
|
|
if(qq != q) {
|
|
DDUMPHEX(i);
|
|
DDUMPHEX(s);
|
|
DDUMPC(qq);
|
|
DDUMPC(q);
|
|
}
|
|
ASSERT(qq == q);
|
|
}
|
|
|
|
// This autotest uses Marcus Kuhn's UTf-8 stress test text.
|
|
// StdLogSetup(LOG_COUT);
|
|
String text = LoadDataFile("utf8_stress_test.txt");
|
|
ASSERT(!IsNull(text));
|
|
StringStream ss(text);
|
|
int pos = 0;
|
|
int begin = text.Find("3 Malformed sequences");
|
|
int end = text.Find("5.3 Noncharacter code positions");
|
|
ASSERT(begin >= 0 && begin < end);
|
|
while(!ss.IsEof()) {
|
|
String line = ss.GetLine();
|
|
ValidateUtf8(line, begin, end, pos++);
|
|
}
|
|
|
|
LOG("============= OK");
|
|
}
|