aboutsummaryrefslogtreecommitdiff
path: root/src/utf8.cc
diff options
context:
space:
mode:
authorcorvid <corvid@lavabit.com>2009-05-17 12:04:13 -0400
committercorvid <corvid@lavabit.com>2009-05-17 12:04:13 -0400
commit5ff1a29d501f97a01c983ef62f4e6b76b7814785 (patch)
tree4fcf1ba4d94e1d76bc65ac7ea3addea0dfa2a129 /src/utf8.cc
parentf15858539330c89f607d039fc51979c432c5192c (diff)
New utility function: a_Utf8_end_of_char()
Diffstat (limited to 'src/utf8.cc')
-rw-r--r--src/utf8.cc20
1 files changed, 20 insertions, 0 deletions
diff --git a/src/utf8.cc b/src/utf8.cc
index 76a752c9..261024fb 100644
--- a/src/utf8.cc
+++ b/src/utf8.cc
@@ -16,6 +16,26 @@
// C++ functions with C linkage ----------------------------------------------
/*
+ * Return index of the last byte of the UTF-8-encoded character that str + i
+ * points to or into.
+ */
+uint_t a_Utf8_end_of_char(const char *str, uint_t i)
+{
+ /* We can almost get what we want from utf8fwd(p+1,...)-1, but that
+ * does not work for the last character in a string, and the fn makes some
+ * assumptions that do not suit us.
+ * Here's something very simpleminded instead:
+ */
+ if (str && *str && (str[i] & 0x80)) {
+ int internal_bytes = (str[i] & 0x40) ? 0 : 1;
+
+ while (((str[i + 1] & 0xc0) == 0x80) && (++internal_bytes < 4))
+ i++;
+ }
+ return i;
+}
+
+/*
* Write UTF-8 encoding of ucs into buf and return number of bytes written.
*/
int a_Utf8_encode(unsigned int ucs, char *buf)