#include "Core.h" namespace Upp { #define LLOG(x) // LOG(x) struct SpellBlock : Moveable { String first; int offset; int ctrl_len; int text_len; }; struct Speller { String data; byte charset; const char *voc[256]; int dict; struct Line : Moveable { const byte *begin; const byte *end; }; VectorMap line; Index user; String path; Array block; bool SetOld(const String& data); void Clear() { data.Clear(); path.Clear(); } operator bool() const { return !data.IsEmpty() || path.GetCount(); } bool CheckOld(const WString& wstr) const; String Get(int offset, int len); }; bool Speller::SetOld(const String& _data) { data = _data; const char *s = data; if(s >= data.End()) { data.Clear(); return false; } charset = *s++; s++;// reserved for prefixes dict = *s++; for(int i = 0; i < 256 - dict; i++) { if(s >= data.End()) { data.Clear(); return false; } voc[i] = s; while(*s) s++; s++; } line.Clear(); while(s < data.End()) { if(s + 8 >= data.End()) { data.Clear(); return false; } int code = Peek32le(s); s += 4; int len = Peek32le(s); s += 4; Line& l = line.Add(code); l.begin = (const byte *)s; s += len; l.end = (const byte *)s; }; return true; } bool Speller::CheckOld(const WString& wstr) const { int len = wstr.GetLength(); if(len < 2) return true; if(len < 64) { String w = FromUnicode(wstr, charset); String wl = FromUnicode(ToLower(wstr), charset); int i; if(len == 2) { w.Cat(127); wl.Cat(127); } i = line.Find(ToLower(wl[0], charset) + (ToLower(wl[1], charset) << 8) + (ToLower(wl[2], charset) << 16)); if(i >= 0) { const byte *s = line[i].begin; const byte *e = line[i].end; String q; while(s < e) if(*s < dict) { if(q == w || q == wl) return true; q.Trim(*s++); } else { ASSERT(*s >= dict); const char *x = voc[(int)*s++ - dict]; q.Cat(x); } if(q == w || q == wl) return true; } } return user.Find(wstr) >= 0;; } static String sUserFile(int lang) { return ConfigFile(LNGAsText(lang) + ".usp"); } String spell_path; void SetSpellPath(const String& p) { spell_path = p; } static String sZet(FileIn& in, int offset, int len) { in.Seek(offset); return ZDecompress(in.Get(len)); } void DoSpellerPath(String& pp, String dir) { for(;;) { pp << dir << ';'; String d = GetFileFolder(dir); if(d == dir) break; dir = d; } } Speller *sGetSpeller(int lang) { static ArrayMap speller; int q = speller.Find(lang); if(q < 0) { String pp = spell_path; DoSpellerPath(pp, GetExeDirFile("scd")); DoSpellerPath(pp, ConfigFile("scd")); pp << spell_path << ';' << getenv("LIB") << ';' << getenv("PATH") << ';'; #ifdef PLATFORM_POSIX pp << "/usr/share/upp;/usr/local/share/upp;/usr/share/upp/scd;/usr/local/share/upp/scd"; #endif String path = GetFileOnPath(ToLower(LNGAsText(lang)) + ".udc", pp); if(IsNull(path)) path = GetFileOnPath(ToLower(LNGAsText(lang)) + ".scd", pp); if(IsNull(path)) return NULL; FileIn in(path); if(!in) return NULL; q = speller.GetCount(); Speller& f = speller.Add(lang); FileIn user(sUserFile(lang)); while(!user.IsEof()) { String s = user.GetLine(); if(!s.IsEmpty()) f.user.Add(FromUtf8(s)); } if(in.Get() != 255) f.SetOld(LoadFile(path)); else { f.path = path; int n = in.GetL(); LLOG("Found scd file " << path << " blocks " << n); if(n > 0 && n < 100000) { for(int i = 0; i < n; i++) { SpellBlock& b = f.block.Add(); b.first = in.Get(in.Get()); b.ctrl_len = in.GetL(); b.text_len = in.GetL(); } if(in.IsEof()) f.block.Clear(); else { int off = (int)in.GetPos(); for(int i = 0; i < n; i++) { SpellBlock& b = f.block[i]; b.offset = off; off += b.ctrl_len + b.text_len; } } } } } return &speller[q]; } bool SpellWordRaw(const WString& wrd, int lang, Vector *withdia) { Speller *f = sGetSpeller(lang); if(!f) return true; if(f->data.GetCount()) return f->CheckOld(wrd); String awrd = ToUpper(ToAscii(wrd).ToString()); String t1 = ToUtf8(wrd); String t2 = ToUtf8(ToLower(wrd)); for(int i = 0;; i++) { if(i + 1 >= f->block.GetCount() || awrd <= f->block[i + 1].first) { for(;;) { if(i >= f->block.GetCount()) return f->user.Find(wrd) >= 0;; LLOG("Spell block " << i << ": " << f->block[i].first); const SpellBlock& b = f->block[i++]; if(b.first > awrd) { LLOG(" --- end"); return f->user.Find(wrd) >= 0;; } FileIn in(f->path); String ctrl = sZet(in, b.offset, b.ctrl_len); String text = sZet(in, b.offset + b.ctrl_len, b.text_len); in.Close(); String w; const char *s = ctrl; const char *e = ctrl.End(); const char *t = text; const char *te = text.End(); while(s < e && t < te) { w.Trim(*s++); while(*t) w.Cat(*t++); if(w == t1 || w == t2) return true; if(withdia && t2 == ToLower(ToAscii(w.ToWString()).ToString())) withdia->Add(w); t++; } } } } return f->user.Find(wrd) >= 0;; } struct SpellKey : Moveable { int lang; WString wrd; unsigned GetHashValue() const { return CombineHash(lang, wrd); } bool operator==(const SpellKey& b) const { return lang == b.lang && wrd == b.wrd; } }; struct SpellMaker : LRUCache::Maker { SpellKey k; SpellKey Key() const { return k; } int Make(bool& r) const { r = SpellWordRaw(k.wrd, k.lang); return 1; } }; static LRUCache speller_cache; bool SpellWord(const WString& ws, int lang) { speller_cache.Shrink(2000); SpellMaker m; m.k.lang = lang; m.k.wrd = ws; return speller_cache.Get(m); } bool SpellWord(const wchar *ws, int len, int lang) { return SpellWord(WString(ws, len), lang); } void SpellerAdd(const WString& w, int lang) { if(!SpellWord(w, w.GetCount(), lang)) { Speller *f = sGetSpeller(lang); if(f) { FileAppend fa(sUserFile(lang)); fa.PutLine(ToUtf8(w)); f->user.Add(w); speller_cache.Clear(); } } } };