MakeSpellScd and ConvertDic moved to uppbox

git-svn-id: svn://ultimatepp.org/upp/trunk@1704 f0d560ea-af0d-0410-9eb7-867de7ffcac7
This commit is contained in:
cxl 2009-11-17 09:08:15 +00:00
parent a0053979af
commit 6f29fb7477
5 changed files with 298 additions and 0 deletions

View file

@ -0,0 +1,146 @@
#include <Core/Core.h>
#define LLOG(x)
using namespace Upp;
struct Sfx : Moveable <Sfx> {
struct Letter : public Bits {
bool neg;
Letter() { neg = false; }
};
Vector<Letter> cond;
int len;
String text;
};
struct Sfxl : Moveable <Sfxl> {
bool cross;
Vector <Sfx> sfx;
Sfxl() { cross = true; }
};
template struct Moveable<Sfx::Letter>;
VectorMap< int, Sfxl > suffix;
VectorMap< int, Vector<String> > prefix;
bool Match(const String& w, const Sfx& f)
{
if(w.GetLength() < f.cond.GetCount())
return false;
const char *s = w.End() - f.cond.GetCount();
for(int i = 0; i < f.cond.GetCount(); i++) {
const Sfx::Letter& l = f.cond[i];
if(l.Get((byte)s[i]) == l.neg) return false;
}
return true;
}
CONSOLE_APP_MAIN
{
if(CommandLine().GetCount() != 1) {
Cout() << "Usage: ConvertDic <file.dic file.aff>\n";
exit(1);
}
String d = CommandLine()[0];
SetDefaultCharset(CHARSET_ISO8859_2);
byte outcharset = CHARSET_UTF8; // CHANGED !!!
FileIn in(d + ".aff");
in.GetLine();
in.GetLine();
prefix.Add(0).Add();
while(!in.IsEof()) {
String ln = in.GetLine();
int q = ln.Find('#');
if(q >= 0)
ln.Trim(q);
Vector<String> p = Split(ln, ' ');
if(p.GetCount()) {
if(p[0] == "PFX" && p.GetCount() == 5)
prefix.GetAdd(p[1][0]).Add(p[3]);
if(p[0] == "SFX") {
if(p.GetCount() == 5) {
Sfx& f = suffix.GetAdd(p[1][0]).sfx.Add();
if(p[2][0] == '0')
f.len = 0;
else
f.len = p[2].GetLength();
f.text = p[3];
if(f.text == "0")
f.text.Clear();
const char *s = p[4];
if(*s != '.') {
while(*s) {
if(*s == '[') {
Sfx::Letter& l = f.cond.Add();
s++;
if(*s == '^') {
l.neg = true;
s++;
}
while(*s && *s != ']')
l.Set((byte)*s++);
s++;
}
else
f.cond.Add().Set((byte)*s++);
}
}
}
if(p.GetCount() == 4 && isdigit(p[3][0]) && p[2][0] == 'N')
suffix.GetAdd(p[1][0]).cross = false;
}
}
}
FileOut out(d + ".txt");
in.Open(d + ".dic");
in.GetLine();
while(!in.IsEof()) {
String w = in.GetLine();
LLOG(w);
String aff;
int q = w.Find('/');
if(q >= 0) {
aff = w.Mid(q + 1);
w.Trim(q);
}
Vector<int> pi;
Vector<int> si;
for(const char *s = aff; *s; s++) {
int q = suffix.Find(*s);
if(q >= 0)
si.Add(q);
q = prefix.Find(*s);
if(q >= 0)
pi.Add(q);
}
pi.Add(0);
for(int i = 0; i < pi.GetCount(); i++) {
Vector<String>& p = prefix[pi[i]];
for(int i = 0; i < p.GetCount(); i++) {
String pfx = p[i];
LLOG("\tPrefix: " << pfx);
out.PutLine(ToCharset(outcharset, pfx + w));
LLOG("> " << pfx + w);
for(int i = 0; i < si.GetCount(); i++) {
Sfxl& u = suffix[si[i]];
if(u.cross || pfx.IsEmpty())
for(int i = 0; i < u.sfx.GetCount(); i++) {
const Sfx& f = u.sfx[i];
if(Match(w, f) && w.GetLength() > f.len) {
LLOG("\tSuffix: " << f.text);
String o = pfx + w.Mid(0, w.GetLength() - f.len) + f.text;
LLOG("> " << o);
out.PutLine(ToCharset(outcharset, o));
}
}
}
}
}
}
}

View file

@ -0,0 +1,9 @@
uses
Core;
file
ConvertDic.cpp;
mainconfig
"" = "";

View file

@ -0,0 +1,126 @@
#include <Core/Core.h>
#define LLOG(x) // LOG(x)
#define LLOGHEXDUMP(a, b) // LOGHEXDUMP(a, b)
#define CREATEINFO
using namespace Upp;
String Utf8ToUpperAscii(const String& x)
{
return ToUpper(ToAscii(FromUtf8(x)).ToString());
}
bool ScdOrder(const String& a, const String& b)
{
int q = SgnCompare(Utf8ToUpperAscii(a), Utf8ToUpperAscii(b));
if(q)
return q < 0;
return a < b;
};
struct Block : Moveable<Block> {
String first;
int ctrl_len;
int text_len;
};
void Make()
{
FileIn in(CommandLine()[0]);
if(!in) {
Cout() << "Unable to open " << CommandLine()[0] << " for reading\n";
SetExitCode(1);
return;
}
FileOut out(CommandLine()[1]);
if(!out) {
Cout() << "Unable to open " << CommandLine()[1] << " for writing\n";
SetExitCode(1);
return;
}
#ifdef CREATEINFO
FileOut info(ForceExt(CommandLine()[1], ".info.txt"));
if(!out) {
Cout() << "Unable to open info file for writing\n";
SetExitCode(1);
return;
}
info << " First | len | lenz | text | textz \r\n"
"------------------------------------------------------\r\n";
#endif
SetDefaultCharset(CHARSET_UTF8);
Vector<String> w;
while(!in.IsEof())
w.Add(in.GetLine());
Cout() << w.GetCount() << " words loaded, now sorting...\n";
ASSERT(maxlen < 64);
LLOG("Maximal length:" << maxlen << " " << maxl);
Sort(w, ScdOrder);
Cout() << "Sorted, now compressing..\n";
Vector<Block> block;
String data;
int i = 0;
while(i < w.GetCount()) {
Block& t = block.Add();
t.first = Utf8ToUpperAscii(w[i]);
String ctrl;
String text;
String pw;
while(i < w.GetCount() && text.GetCount() < 65000) {
String cw = w[i];
int j = 0;
while(j < pw.GetCount() && j < cw.GetCount() && j < 31 && pw[j] == cw[j])
j++;
ctrl.Cat(j);
text.Cat(cw.Mid(j));
text.Cat(0);
pw = cw;
i++;
}
String ztext = ZCompress(text);
String zctrl = ZCompress(ctrl);
#ifdef CREATEINFO
info << Format("%-9.9s |%9d |%9d |%9d |%9d\r\n", t.first,
ctrl.GetCount(), zctrl.GetCount(), text.GetCount(), ztext.GetCount());
#endif
t.ctrl_len = zctrl.GetCount();
t.text_len = ztext.GetCount();
data.Cat(zctrl);
data.Cat(ztext);
}
Cout() << "Compressed, writing file directory..\n";
out.Put(255);
out.PutL(block.GetCount());
for(int i = 0; i < block.GetCount(); i++) {
Block& t = block[i];
out.Put(t.first.GetCount());
out.Put(t.first);
out.PutL(t.ctrl_len);
out.PutL(t.text_len);
}
Cout() << "Writing data..\n";
out.Put(data);
}
CONSOLE_APP_MAIN
{
if(CommandLine().GetCount() != 2) {
Cout() << "Usage: MakeSpellScd <inputfile> <outputfile>\n";
exit(1);
}
Make();
Cout() << "* Finished\n";
}

View file

@ -0,0 +1,13 @@
description "Utility for creation of RichEdit spelling dictionary files (.scd)\377";
optimize_speed;
uses
Core;
file
MakeSpellScd.cpp;
mainconfig
"" = "";

4
uppbox/MakeSpellScd/init Normal file
View file

@ -0,0 +1,4 @@
#ifndef _MakeSpellScd_icpp_init_stub
#define _MakeSpellScd_icpp_init_stub
#include "Core/init"
#endif