From a3daa9910dfbfc0fc6b57ec37ad712fbc19b1e01 Mon Sep 17 00:00:00 2001 From: Johannes Hofmann Date: Sun, 29 Nov 2009 21:40:02 +0100 Subject: respect UTF-8 when splitting long lines in plain.cc (noticed by corvid) When splitting long lines in plain text to avoid X11 coordinate overflows we need to make sure that multibyte UTF-8 chars are not split. Additionally combining chars like accents should stay together with their base char. --- src/utf8.cc | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'src/utf8.cc') diff --git a/src/utf8.cc b/src/utf8.cc index 47d8112b..0138c616 100644 --- a/src/utf8.cc +++ b/src/utf8.cc @@ -92,3 +92,11 @@ bool_t a_Utf8_ideographic(const char *s, const char *end, int *len) } return ret; } + +bool_t a_Utf8_combining_char(int unicode) +{ + return ((unicode >= 0x0300 && unicode <= 0x036f) || + (unicode >= 0x1dc0 && unicode <= 0x1dff) || + (unicode >= 0x20d0 && unicode <= 0x20ff) || + (unicode >= 0xfe20 && unicode <= 0xfe2f)); +} -- cgit v1.2.3