aboutsummaryrefslogtreecommitdiff
path: root/src/utf8.cc
diff options
context:
space:
mode:
authorJohannes Hofmann <Johannes.Hofmann@gmx.de>2009-11-29 21:40:02 +0100
committerJohannes Hofmann <Johannes.Hofmann@gmx.de>2009-11-29 21:40:02 +0100
commita3daa9910dfbfc0fc6b57ec37ad712fbc19b1e01 (patch)
tree1e2aa63886a04d08ba18fe37441bb55580365e4f /src/utf8.cc
parentba9c7b7e9afdfcc01b5a35c4c387642925b1bf9a (diff)
respect UTF-8 when splitting long lines in plain.cc (noticed by corvid)
When splitting long lines in plain text to avoid X11 coordinate overflows we need to make sure that multibyte UTF-8 chars are not split. Additionally combining chars like accents should stay together with their base char.
Diffstat (limited to 'src/utf8.cc')
-rw-r--r--src/utf8.cc8
1 files changed, 8 insertions, 0 deletions
diff --git a/src/utf8.cc b/src/utf8.cc
index 47d8112b..0138c616 100644
--- a/src/utf8.cc
+++ b/src/utf8.cc
@@ -92,3 +92,11 @@ bool_t a_Utf8_ideographic(const char *s, const char *end, int *len)
}
return ret;
}
+
+bool_t a_Utf8_combining_char(int unicode)
+{
+ return ((unicode >= 0x0300 && unicode <= 0x036f) ||
+ (unicode >= 0x1dc0 && unicode <= 0x1dff) ||
+ (unicode >= 0x20d0 && unicode <= 0x20ff) ||
+ (unicode >= 0xfe20 && unicode <= 0xfe2f));
+}