diff --git a/uppsrc/Core/CharSet.cpp b/uppsrc/Core/CharSet.cpp index cb99d73a4..ee50a6132 100644 --- a/uppsrc/Core/CharSet.cpp +++ b/uppsrc/Core/CharSet.cpp @@ -2296,4 +2296,25 @@ String ToLowerAscii(const String& s, byte charset) return r; } +bool IsDoubleWidth(int c) +{ + // This function is taken from Markus Kuhn's wcwidth implementation. + // For license and implementation details, see: + // https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c + + return c >= 0x1100 + && (c <= 0x115F // Hangul Jamo init. consonants + || c == 0x2329 + || c == 0x232A + || (c >= 0x2E80 && c <= 0xA4CF && c != 0x303F) // CJK ... Yi + || (c >= 0xAC00 && c <= 0xD7A3) // Hangul syllables + || (c >= 0xF900 && c <= 0xFAFF) // CJK compatibility ideographs + || (c >= 0xFE10 && c <= 0xFE19) // Vertical forms + || (c >= 0xFE30 && c <= 0xFE6F) // CJK compatibility forms + || (c >= 0xFF00 && c <= 0xFF60) // Fullwidth forms + || (c >= 0xFFE0 && c <= 0xFFE6) + || (c >= 0x20000 && c <= 0x2FFFD) + || (c >= 0x30000 && c <= 0x3FFFD) + ); +} } diff --git a/uppsrc/Core/CharSet.h b/uppsrc/Core/CharSet.h index f74ed812a..a467275c5 100644 --- a/uppsrc/Core/CharSet.h +++ b/uppsrc/Core/CharSet.h @@ -190,7 +190,7 @@ inline bool IsPunct(int c) { return c != ' ' && !IsAlNum(c); } inline bool IsSpace(int c) { return c == ' ' || c == '\f' || c == '\n' || c == '\r' || c == '\v' || c == '\t'; } inline bool IsXDigit(int c) { return IsDigit(c) || c >= 'A' && c <= 'F' || c >= 'a' && c <= 'f'; } -inline bool IsCJKIdeograph(int c) { return c >= 0x2e80 && c <= 0xdfaf || c >= 0xf900 && c <= 0xfaff; } +bool IsDoubleWidth(int c); word UnicodeCombine(word chr, word combine); @@ -253,6 +253,8 @@ bool SaveFileBOMUtf8(const char *path, const String& data); // Deprecated +inline bool IsCJKIdeograph(int c) { return c >= 0x2e80 && c <= 0xdfaf || c >= 0xf900 && c <= 0xfaff; } + int ToUnicode(int chr, byte charset); int FromUnicode(wchar wchr, byte charset, int defchar = DEFAULTCHAR); diff --git a/uppsrc/Core/src.tpp/CharSet_en-us.tpp b/uppsrc/Core/src.tpp/CharSet_en-us.tpp index e6d4ba313..96adbe035 100644 --- a/uppsrc/Core/src.tpp/CharSet_en-us.tpp +++ b/uppsrc/Core/src.tpp/CharSet_en-us.tpp @@ -393,10 +393,11 @@ character.&] [s5;:IsXDigit`(int`): [@(0.0.255) bool]_[* IsXDigit]([@(0.0.255) int]_[*@3 c])&] [s2;%% Returns true [%-*@3 c].is hexadecimal digit (0`-9, a`-f, A`-F).&] [s3;%% &] -[s4;%% &] -[s5;:IsCJKIdeograph`(int`): [@(0.0.255) bool]_[* IsCJKIdeograph]([@(0.0.255) int]_[*@3 c])&] -[s2;%% Returns true if [%-*@3 c].is in UNICODE code`-point for CJK -ideogram.&] +[s4; &] +[s5;:Upp`:`:IsDoubleWidth`(int`): [@(0.0.255) bool]_[* IsDoubleWidth]([@(0.0.255) int]_[*@3 c +])&] +[s2;%% Returns true if [%-*@3 c] is a double`-width UNICODE character +(like CJK ideograph).&] [s3;%% &] [s4;%% &] [s5;:UnicodeCombine`(word`,word`): [_^word^ word]_[* UnicodeCombine]([_^word^ word]_[*@3 chr], diff --git a/uppsrc/CtrlLib/LineEdit.cpp b/uppsrc/CtrlLib/LineEdit.cpp index ccb54fc97..a96bc1a8c 100644 --- a/uppsrc/CtrlLib/LineEdit.cpp +++ b/uppsrc/CtrlLib/LineEdit.cpp @@ -480,7 +480,7 @@ void LineEdit::Paint0(Draw& w) { x = fsz.cx * gp; } else - if(IsCJKIdeograph(chr)) { + if(IsDoubleWidth(chr)) { x += 2 * fsz.cx; gp += 2; } @@ -508,7 +508,7 @@ void LineEdit::Paint0(Draw& w) { h.ink = color[INK_SELECTED]; } int x = gp * fsz.cx - scx; - bool cjk = IsCJKIdeograph(h.chr); + bool cjk = IsDoubleWidth(h.chr); int xx = x + (gp + 1 + cjk) * fsz.cx; if(h.chr == '\t') { int ngp = (gp + tabsize) / tabsize * tabsize; @@ -679,7 +679,7 @@ int64 LineEdit::GetGPos(int ln, int cl) const { if(*s == '\t') gl = (gl + tabsize) / tabsize * tabsize; else - gl += 1 + IsCJKIdeograph(*s); + gl += 1 + IsDoubleWidth(*s); if(cl < gl) break; s++; } @@ -704,7 +704,7 @@ Point LineEdit::GetColumnLine(int64 pos) const { if(*s == '\t') p.x = (p.x + tabsize) / tabsize * tabsize; else - p.x += 1 + IsCJKIdeograph(*s); + p.x += 1 + IsDoubleWidth(*s); s++; } return p; @@ -1125,7 +1125,7 @@ void LineEdit::SetHBar() if(*s == '\t') pos = (pos + tabsize) / tabsize * tabsize; else - pos += 1 + IsCJKIdeograph(*s); + pos += 1 + IsDoubleWidth(*s); s++; } mpos = max(mpos, pos);