From a3daa9910dfbfc0fc6b57ec37ad712fbc19b1e01 Mon Sep 17 00:00:00 2001
From: Johannes Hofmann <Johannes.Hofmann@gmx.de>
Date: Sun, 29 Nov 2009 21:40:02 +0100
Subject: respect UTF-8 when splitting long lines in plain.cc (noticed by
 corvid)

When splitting long lines in plain text to avoid X11 coordinate
overflows we need to make sure that multibyte UTF-8 chars are not
split.
Additionally combining chars like accents should stay together with
their base char.
---
 src/utf8.cc | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'src/utf8.cc')

diff --git a/src/utf8.cc b/src/utf8.cc
index 47d8112b..0138c616 100644
--- a/src/utf8.cc
+++ b/src/utf8.cc
@@ -92,3 +92,11 @@ bool_t a_Utf8_ideographic(const char *s, const char *end, int *len)
    }
    return ret;
 }
+
+bool_t a_Utf8_combining_char(int unicode)
+{
+   return ((unicode >= 0x0300 && unicode <= 0x036f) ||
+           (unicode >= 0x1dc0 && unicode <= 0x1dff) ||
+           (unicode >= 0x20d0 && unicode <= 0x20ff) ||
+           (unicode >= 0xfe20 && unicode <= 0xfe2f));
+}
-- 
cgit v1.2.3