diff --git a/uppbox/ConvertDic/ConvertDic.cpp b/uppbox/ConvertDic/ConvertDic.cpp new file mode 100644 index 000000000..d7547a373 --- /dev/null +++ b/uppbox/ConvertDic/ConvertDic.cpp @@ -0,0 +1,146 @@ +#include + +#define LLOG(x) + +using namespace Upp; + +struct Sfx : Moveable { + struct Letter : public Bits { + bool neg; + + Letter() { neg = false; } + }; + Vector cond; + int len; + String text; +}; + +struct Sfxl : Moveable { + bool cross; + Vector sfx; + Sfxl() { cross = true; } +}; + +template struct Moveable; + +VectorMap< int, Sfxl > suffix; +VectorMap< int, Vector > prefix; + +bool Match(const String& w, const Sfx& f) +{ + if(w.GetLength() < f.cond.GetCount()) + return false; + const char *s = w.End() - f.cond.GetCount(); + for(int i = 0; i < f.cond.GetCount(); i++) { + const Sfx::Letter& l = f.cond[i]; + if(l.Get((byte)s[i]) == l.neg) return false; + } + return true; +} + +CONSOLE_APP_MAIN +{ + if(CommandLine().GetCount() != 1) { + Cout() << "Usage: ConvertDic \n"; + exit(1); + } + String d = CommandLine()[0]; + SetDefaultCharset(CHARSET_ISO8859_2); + byte outcharset = CHARSET_UTF8; // CHANGED !!! + + FileIn in(d + ".aff"); + in.GetLine(); + in.GetLine(); + prefix.Add(0).Add(); + while(!in.IsEof()) { + String ln = in.GetLine(); + int q = ln.Find('#'); + if(q >= 0) + ln.Trim(q); + Vector p = Split(ln, ' '); + if(p.GetCount()) { + if(p[0] == "PFX" && p.GetCount() == 5) + prefix.GetAdd(p[1][0]).Add(p[3]); + if(p[0] == "SFX") { + if(p.GetCount() == 5) { + Sfx& f = suffix.GetAdd(p[1][0]).sfx.Add(); + if(p[2][0] == '0') + f.len = 0; + else + f.len = p[2].GetLength(); + f.text = p[3]; + if(f.text == "0") + f.text.Clear(); + const char *s = p[4]; + if(*s != '.') { + while(*s) { + if(*s == '[') { + Sfx::Letter& l = f.cond.Add(); + s++; + if(*s == '^') { + l.neg = true; + s++; + } + while(*s && *s != ']') + l.Set((byte)*s++); + s++; + } + else + f.cond.Add().Set((byte)*s++); + } + } + } + if(p.GetCount() == 4 && isdigit(p[3][0]) && p[2][0] == 'N') + suffix.GetAdd(p[1][0]).cross = false; + } + } + } + + FileOut out(d + ".txt"); + in.Open(d + ".dic"); + in.GetLine(); + while(!in.IsEof()) { + String w = in.GetLine(); + LLOG(w); + String aff; + int q = w.Find('/'); + if(q >= 0) { + aff = w.Mid(q + 1); + w.Trim(q); + } + Vector pi; + Vector si; + for(const char *s = aff; *s; s++) { + int q = suffix.Find(*s); + if(q >= 0) + si.Add(q); + q = prefix.Find(*s); + if(q >= 0) + pi.Add(q); + } + pi.Add(0); + + for(int i = 0; i < pi.GetCount(); i++) { + Vector& p = prefix[pi[i]]; + for(int i = 0; i < p.GetCount(); i++) { + String pfx = p[i]; + LLOG("\tPrefix: " << pfx); + out.PutLine(ToCharset(outcharset, pfx + w)); + LLOG("> " << pfx + w); + for(int i = 0; i < si.GetCount(); i++) { + Sfxl& u = suffix[si[i]]; + if(u.cross || pfx.IsEmpty()) + for(int i = 0; i < u.sfx.GetCount(); i++) { + const Sfx& f = u.sfx[i]; + if(Match(w, f) && w.GetLength() > f.len) { + LLOG("\tSuffix: " << f.text); + String o = pfx + w.Mid(0, w.GetLength() - f.len) + f.text; + LLOG("> " << o); + out.PutLine(ToCharset(outcharset, o)); + } + } + } + } + } + } +} diff --git a/uppbox/ConvertDic/ConvertDic.upp b/uppbox/ConvertDic/ConvertDic.upp new file mode 100644 index 000000000..ed07413ee --- /dev/null +++ b/uppbox/ConvertDic/ConvertDic.upp @@ -0,0 +1,9 @@ +uses + Core; + +file + ConvertDic.cpp; + +mainconfig + "" = ""; + diff --git a/uppbox/MakeSpellScd/MakeSpellScd.cpp b/uppbox/MakeSpellScd/MakeSpellScd.cpp new file mode 100644 index 000000000..45e305d6d --- /dev/null +++ b/uppbox/MakeSpellScd/MakeSpellScd.cpp @@ -0,0 +1,126 @@ +#include + +#define LLOG(x) // LOG(x) +#define LLOGHEXDUMP(a, b) // LOGHEXDUMP(a, b) +#define CREATEINFO + +using namespace Upp; + +String Utf8ToUpperAscii(const String& x) +{ + return ToUpper(ToAscii(FromUtf8(x)).ToString()); +} + +bool ScdOrder(const String& a, const String& b) +{ + int q = SgnCompare(Utf8ToUpperAscii(a), Utf8ToUpperAscii(b)); + if(q) + return q < 0; + return a < b; +}; + +struct Block : Moveable { + String first; + int ctrl_len; + int text_len; +}; + +void Make() +{ + FileIn in(CommandLine()[0]); + if(!in) { + Cout() << "Unable to open " << CommandLine()[0] << " for reading\n"; + SetExitCode(1); + return; + } + FileOut out(CommandLine()[1]); + if(!out) { + Cout() << "Unable to open " << CommandLine()[1] << " for writing\n"; + SetExitCode(1); + return; + } + +#ifdef CREATEINFO + FileOut info(ForceExt(CommandLine()[1], ".info.txt")); + if(!out) { + Cout() << "Unable to open info file for writing\n"; + SetExitCode(1); + return; + } + info << " First | len | lenz | text | textz \r\n" + "------------------------------------------------------\r\n"; +#endif + + SetDefaultCharset(CHARSET_UTF8); + + Vector w; + while(!in.IsEof()) + w.Add(in.GetLine()); + + Cout() << w.GetCount() << " words loaded, now sorting...\n"; + + ASSERT(maxlen < 64); + + LLOG("Maximal length:" << maxlen << " " << maxl); + + Sort(w, ScdOrder); + + Cout() << "Sorted, now compressing..\n"; + + Vector block; + String data; + int i = 0; + + while(i < w.GetCount()) { + Block& t = block.Add(); + t.first = Utf8ToUpperAscii(w[i]); + String ctrl; + String text; + String pw; + while(i < w.GetCount() && text.GetCount() < 65000) { + String cw = w[i]; + int j = 0; + while(j < pw.GetCount() && j < cw.GetCount() && j < 31 && pw[j] == cw[j]) + j++; + ctrl.Cat(j); + text.Cat(cw.Mid(j)); + text.Cat(0); + pw = cw; + i++; + } + String ztext = ZCompress(text); + String zctrl = ZCompress(ctrl); +#ifdef CREATEINFO + info << Format("%-9.9s |%9d |%9d |%9d |%9d\r\n", t.first, + ctrl.GetCount(), zctrl.GetCount(), text.GetCount(), ztext.GetCount()); +#endif + t.ctrl_len = zctrl.GetCount(); + t.text_len = ztext.GetCount(); + data.Cat(zctrl); + data.Cat(ztext); + } + + Cout() << "Compressed, writing file directory..\n"; + out.Put(255); + out.PutL(block.GetCount()); + for(int i = 0; i < block.GetCount(); i++) { + Block& t = block[i]; + out.Put(t.first.GetCount()); + out.Put(t.first); + out.PutL(t.ctrl_len); + out.PutL(t.text_len); + } + + Cout() << "Writing data..\n"; + out.Put(data); +} + +CONSOLE_APP_MAIN +{ + if(CommandLine().GetCount() != 2) { + Cout() << "Usage: MakeSpellScd \n"; + exit(1); + } + Make(); + Cout() << "* Finished\n"; +} diff --git a/uppbox/MakeSpellScd/MakeSpellScd.upp b/uppbox/MakeSpellScd/MakeSpellScd.upp new file mode 100644 index 000000000..7003c0559 --- /dev/null +++ b/uppbox/MakeSpellScd/MakeSpellScd.upp @@ -0,0 +1,13 @@ +description "Utility for creation of RichEdit spelling dictionary files (.scd)\377"; + +optimize_speed; + +uses + Core; + +file + MakeSpellScd.cpp; + +mainconfig + "" = ""; + diff --git a/uppbox/MakeSpellScd/init b/uppbox/MakeSpellScd/init new file mode 100644 index 000000000..fd8113dac --- /dev/null +++ b/uppbox/MakeSpellScd/init @@ -0,0 +1,4 @@ +#ifndef _MakeSpellScd_icpp_init_stub +#define _MakeSpellScd_icpp_init_stub +#include "Core/init" +#endif