mirror of
https://github.com/ultimatepp/ultimatepp.git
synced 2026-05-15 14:16:07 -06:00
This might bring some incompatibilities in the code that expects wchar to be 16 bit, which escpecially involves dealing with Win32 (and to lesser extend MacOS) APIs, so if your application is doing that, please check all instances of WCHAR (UniChar on MacOS) or even wchar especially type casts. To support host APIs, char16 is introduced (but there is no 16-bit String varian). Use ToSystemCharsetW, FromSystemCharsetW to convert texts to Win32 API. - Support of drawing non-BMP characters in GUI - Vastly improved character font replacement code (when drawing characters missing with requested font, replacement font is used) - Last instances of Win32 ANSI calls (those ending with A) are removed - UTF handling routines are refactored and their's naming is unified - RTF is now being able to handle non-BMP characters (RTF is used as clipboard format for RichText) Other minor changes: - fixed TryRealloc issue - improved MemoryCheck - Removed MemoryAlloc48/MemoryFree48 - In theide Background parsing should less often cause delays in the main thread
433 lines
9.5 KiB
C++
433 lines
9.5 KiB
C++
#include "Core.h"
|
|
|
|
namespace Upp {
|
|
|
|
#define LLOG(x) // LOG(x)
|
|
|
|
struct SpellBlock : Moveable<SpellBlock> {
|
|
String first;
|
|
int offset;
|
|
int ctrl_len;
|
|
int text_len;
|
|
};
|
|
|
|
struct Speller {
|
|
String data;
|
|
byte charset;
|
|
const char *voc[256];
|
|
int dict;
|
|
struct Line : Moveable<Line> {
|
|
const byte *begin;
|
|
const byte *end;
|
|
};
|
|
VectorMap<dword, Line> line;
|
|
Index<WString> user;
|
|
|
|
String path;
|
|
Array<SpellBlock> block;
|
|
|
|
bool SetOld(const String& data);
|
|
void Clear() { data.Clear(); path.Clear(); }
|
|
operator bool() const { return !data.IsEmpty() || path.GetCount(); }
|
|
bool CheckOld(const WString& wstr) const;
|
|
|
|
String Get(int offset, int len);
|
|
};
|
|
|
|
bool Speller::SetOld(const String& _data)
|
|
{
|
|
data = _data;
|
|
const char *s = data;
|
|
if(s >= data.End()) {
|
|
data.Clear();
|
|
return false;
|
|
}
|
|
charset = *s++;
|
|
s++;// reserved for prefixes
|
|
dict = *s++;
|
|
for(int i = 0; i < 256 - dict; i++) {
|
|
if(s >= data.End()) {
|
|
data.Clear();
|
|
return false;
|
|
}
|
|
voc[i] = s;
|
|
while(*s) s++;
|
|
s++;
|
|
}
|
|
line.Clear();
|
|
while(s < data.End()) {
|
|
if(s + 8 >= data.End()) {
|
|
data.Clear();
|
|
return false;
|
|
}
|
|
int code = Peek32le(s);
|
|
s += 4;
|
|
int len = Peek32le(s);
|
|
s += 4;
|
|
Line& l = line.Add(code);
|
|
l.begin = (const byte *)s;
|
|
s += len;
|
|
l.end = (const byte *)s;
|
|
};
|
|
return true;
|
|
}
|
|
|
|
bool Speller::CheckOld(const WString& wstr) const
|
|
{
|
|
int len = wstr.GetLength();
|
|
if(len < 2)
|
|
return true;
|
|
if(len < 64) {
|
|
String w = FromUnicode(wstr, charset);
|
|
String wl = FromUnicode(ToLower(wstr), charset);
|
|
int i;
|
|
if(len == 2) {
|
|
w.Cat(127);
|
|
wl.Cat(127);
|
|
}
|
|
i = line.Find(ToLower(wl[0], charset) +
|
|
(ToLower(wl[1], charset) << 8) +
|
|
(ToLower(wl[2], charset) << 16));
|
|
if(i >= 0) {
|
|
const byte *s = line[i].begin;
|
|
const byte *e = line[i].end;
|
|
String q;
|
|
while(s < e)
|
|
if(*s < dict) {
|
|
if(q == w || q == wl)
|
|
return true;
|
|
q.Trim(*s++);
|
|
}
|
|
else {
|
|
ASSERT(*s >= dict);
|
|
const char *x = voc[(int)*s++ - dict];
|
|
q.Cat(x);
|
|
}
|
|
if(q == w || q == wl)
|
|
return true;
|
|
}
|
|
}
|
|
return user.Find(wstr) >= 0;;
|
|
}
|
|
|
|
static String sUserFile(int lang)
|
|
{
|
|
return ConfigFile(LNGAsText(lang) + ".usp");
|
|
}
|
|
|
|
String spell_path;
|
|
|
|
void SetSpellPath(const String& p)
|
|
{
|
|
spell_path = p;
|
|
}
|
|
|
|
static String sZet(FileIn& in, int offset, int len)
|
|
{
|
|
in.Seek(offset);
|
|
return ZDecompress(in.Get(len));
|
|
}
|
|
|
|
void DoSpellerPath(String& pp, String dir)
|
|
{
|
|
for(;;) {
|
|
pp << dir << ';';
|
|
String d = GetFileFolder(dir);
|
|
if(d == dir) break;
|
|
dir = d;
|
|
}
|
|
}
|
|
|
|
Speller *sGetSpeller(int lang)
|
|
{
|
|
static ArrayMap<int, Speller> speller;
|
|
int q = speller.Find(lang);
|
|
if(q < 0) {
|
|
String pp = spell_path;
|
|
#ifdef PLATFORM_COCOA
|
|
String Contents = GetFileFolder(GetFileFolder(GetExeFilePath()));
|
|
DoSpellerPath(pp, Contents + "/Resources");
|
|
DoSpellerPath(pp, Contents + "/SharedSupport");
|
|
#endif
|
|
DoSpellerPath(pp, GetExeDirFile("speller"));
|
|
DoSpellerPath(pp, ConfigFile("speller"));
|
|
pp << spell_path << ';' << getenv("LIB") << ';' << getenv("PATH") << ';';
|
|
#ifdef PLATFORM_POSIX
|
|
pp << "/usr/local/share/upp/speller;/usr/local/share/upp;/usr/share/upp/speller;/usr/share/upp";
|
|
#endif
|
|
String path = GetFileOnPath(ToLower(LNGAsText(lang)) + ".udc", pp);
|
|
if(IsNull(path))
|
|
return NULL;
|
|
FileIn in(path);
|
|
if(!in)
|
|
return NULL;
|
|
q = speller.GetCount();
|
|
Speller& f = speller.Add(lang);
|
|
FileIn user(sUserFile(lang));
|
|
while(!user.IsEof()) {
|
|
String s = user.GetLine();
|
|
if(!s.IsEmpty())
|
|
f.user.Add(ToUtf32(s));
|
|
}
|
|
if(in.Get() != 255)
|
|
f.SetOld(LoadFile(path));
|
|
else {
|
|
f.path = path;
|
|
int n = in.Get32();
|
|
LLOG("Found dictionary file " << path << " blocks " << n);
|
|
if(n > 0 && n < 100000) {
|
|
for(int i = 0; i < n; i++) {
|
|
SpellBlock& b = f.block.Add();
|
|
b.first = in.Get(in.Get());
|
|
b.ctrl_len = in.Get32();
|
|
b.text_len = in.Get32();
|
|
}
|
|
if(in.IsEof())
|
|
f.block.Clear();
|
|
else {
|
|
int off = (int)in.GetPos();
|
|
for(int i = 0; i < n; i++) {
|
|
SpellBlock& b = f.block[i];
|
|
b.offset = off;
|
|
off += b.ctrl_len + b.text_len;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return &speller[q];
|
|
}
|
|
|
|
bool SpellWordRaw(const WString& wrd, int lang, Vector<String> *withdia)
|
|
{
|
|
Speller *f = sGetSpeller(lang);
|
|
if(!f)
|
|
return true;
|
|
if(f->data.GetCount())
|
|
return f->CheckOld(wrd);
|
|
String awrd = ToUpper(ToAscii(wrd).ToString());
|
|
String t1 = ToUtf8(wrd);
|
|
String t2 = ToUtf8(ToLower(wrd));
|
|
for(int i = 0;; i++) {
|
|
if(i + 1 >= f->block.GetCount() || awrd <= f->block[i + 1].first) {
|
|
for(;;) {
|
|
if(i >= f->block.GetCount())
|
|
return f->user.Find(wrd) >= 0;;
|
|
LLOG("Spell block " << i << ": " << f->block[i].first);
|
|
const SpellBlock& b = f->block[i++];
|
|
if(b.first > awrd) {
|
|
LLOG(" --- end");
|
|
return f->user.Find(wrd) >= 0;;
|
|
}
|
|
FileIn in(f->path);
|
|
String ctrl = sZet(in, b.offset, b.ctrl_len);
|
|
String text = sZet(in, b.offset + b.ctrl_len, b.text_len);
|
|
in.Close();
|
|
String w;
|
|
const char *s = ctrl;
|
|
const char *e = ctrl.End();
|
|
const char *t = text;
|
|
const char *te = text.End();
|
|
while(s < e && t < te) {
|
|
w.Trim(*s++);
|
|
while(*t)
|
|
w.Cat(*t++);
|
|
if(w == t1 || w == t2)
|
|
return true;
|
|
if(withdia && t2 == ToLower(ToAscii(w.ToWString()).ToString()))
|
|
withdia->Add(w);
|
|
t++;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return f->user.Find(wrd) >= 0;;
|
|
}
|
|
|
|
bool AllSpellerWords(int lang, Gate<String> iter)
|
|
{
|
|
Speller *f = sGetSpeller(lang);
|
|
if(!f)
|
|
return false;
|
|
for(int i = 0; i < f->block.GetCount(); i++) {
|
|
const SpellBlock& b = f->block[i++];
|
|
FileIn in(f->path);
|
|
String ctrl = sZet(in, b.offset, b.ctrl_len);
|
|
String text = sZet(in, b.offset + b.ctrl_len, b.text_len);
|
|
in.Close();
|
|
String w;
|
|
const char *s = ctrl;
|
|
const char *e = ctrl.End();
|
|
const char *t = text;
|
|
const char *te = text.End();
|
|
while(s < e && t < te) {
|
|
w.Trim(*s++);
|
|
while(*t)
|
|
w.Cat(*t++);
|
|
if(iter(w))
|
|
return true;
|
|
t++;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
struct SpellKey : Moveable<SpellKey> {
|
|
int lang;
|
|
WString wrd;
|
|
|
|
hash_t GetHashValue() const { return CombineHash(lang, wrd); }
|
|
bool operator==(const SpellKey& b) const { return lang == b.lang && wrd == b.wrd; }
|
|
};
|
|
|
|
struct SpellMaker : LRUCache<bool, SpellKey>::Maker {
|
|
SpellKey k;
|
|
|
|
SpellKey Key() const { return k; }
|
|
int Make(bool& r) const {
|
|
r = SpellWordRaw(k.wrd, k.lang);
|
|
return 1;
|
|
}
|
|
};
|
|
|
|
static LRUCache<bool, SpellKey> speller_cache;
|
|
|
|
bool SpellWord(const WString& ws, int lang)
|
|
{
|
|
speller_cache.ShrinkCount(2000);
|
|
SpellMaker m;
|
|
m.k.lang = lang;
|
|
m.k.wrd = ws;
|
|
return speller_cache.Get(m);
|
|
}
|
|
|
|
bool SpellWord(const wchar *ws, int len, int lang)
|
|
{
|
|
return SpellWord(WString(ws, len), lang);
|
|
}
|
|
|
|
void SpellerAdd(const WString& w, int lang)
|
|
{
|
|
if(!SpellWord(w, w.GetCount(), lang)) {
|
|
Speller *f = sGetSpeller(lang);
|
|
if(f) {
|
|
FileAppend fa(sUserFile(lang));
|
|
fa.PutLine(ToUtf8(w));
|
|
f->user.Add(w);
|
|
speller_cache.Clear();
|
|
}
|
|
}
|
|
}
|
|
|
|
struct WordDistanceTester {
|
|
byte pchars[256];
|
|
byte wchars[256];
|
|
byte pairs[256 * 256];
|
|
int used_pairs[256]; // store used positions in pairs to be able to clear them at exit
|
|
int used_pairs_count; // count of pairs
|
|
|
|
int Get(const String& w, const String& p);
|
|
|
|
WordDistanceTester();
|
|
};
|
|
|
|
WordDistanceTester::WordDistanceTester()
|
|
{
|
|
Zero(pchars);
|
|
Zero(wchars);
|
|
Zero(pairs);
|
|
Zero(used_pairs);
|
|
used_pairs_count = 0;
|
|
}
|
|
|
|
int WordDistanceTester::Get(const String& p, const String& w)
|
|
{
|
|
if(abs(p.GetLength() - w.GetLength()) > 4 || w.GetCount() > 200 || p.GetCount() > 200)
|
|
return INT_MAX;
|
|
|
|
VectorMap<int, int> chars;
|
|
for(int i = 0; i < p.GetCount(); i++)
|
|
pchars[(byte)p[i]]++;
|
|
|
|
int score = 0;
|
|
String pp;
|
|
VectorMap<int, int> found;
|
|
for(int i = 0; i < w.GetCount(); i++) {
|
|
int c = (byte)w[i];
|
|
if(pchars[c]) {
|
|
pchars[c]--;
|
|
wchars[c]++;
|
|
score++;
|
|
LLOG("Letter " << (char)w[i]);
|
|
}
|
|
}
|
|
|
|
int first = 0;
|
|
used_pairs_count = 0;
|
|
for(int i = 0; i < p.GetCount(); i++) {
|
|
int c = p[i];
|
|
if(wchars[c]) {
|
|
wchars[c]--;
|
|
if(first) {
|
|
LLOG("Pair " << (char)first << (char)c);
|
|
int pairi = MAKEWORD(first, c);
|
|
pairs[pairi]++;
|
|
used_pairs[used_pairs_count++] = pairi;
|
|
}
|
|
first = c;
|
|
}
|
|
}
|
|
|
|
for(int i = 1; i < w.GetCount(); i++) {
|
|
int pairi = MAKEWORD(w[i - 1], w[i]);
|
|
if(pairs[pairi]) {
|
|
pairs[pairi]--;
|
|
score++;
|
|
}
|
|
}
|
|
|
|
score -= abs(p.GetLength() - w.GetLength());
|
|
|
|
Zero(pchars);
|
|
Zero(wchars);
|
|
for(int i = 0; i < used_pairs_count; i++)
|
|
pairs[used_pairs[i]] = 0;
|
|
|
|
return -score;
|
|
}
|
|
|
|
Vector<String> SpellerFindCloseWords(int lang, const String& w, int n)
|
|
{
|
|
Vector<String> r;
|
|
Vector<int> min_distance;
|
|
if(n < 1)
|
|
return r;
|
|
String aw = ToUpper(ToAscii(w));
|
|
One<WordDistanceTester> tester;
|
|
tester.Create();
|
|
AllSpellerWords(lang, [&] (String h) -> bool {
|
|
if(abs(h.GetLength() - w.GetLength()) < 5) {
|
|
int d = tester->Get(Utf8ToUpperAscii(h), aw);
|
|
if(min_distance.GetCount() == 0) {
|
|
min_distance.Add(d);
|
|
r.Add(h);
|
|
}
|
|
else
|
|
if(d <= min_distance.Top()) {
|
|
int ii = min_distance.GetCount() - 1;
|
|
while(ii > 0 && d < min_distance[ii - 1])
|
|
ii--;
|
|
min_distance.Insert(ii, d);
|
|
r.Insert(ii, h);
|
|
if(r.GetCount() > n) {
|
|
r.Trim(n);
|
|
min_distance.Trim(n);
|
|
}
|
|
}
|
|
}
|
|
return false;
|
|
});
|
|
return r;
|
|
}
|
|
|
|
};
|