summaryrefslogtreecommitdiff
path: root/lout/unicode.cc
diff options
context:
space:
mode:
authorSebastian Geerken <devnull@localhost>2012-12-13 22:43:15 +0100
committerSebastian Geerken <devnull@localhost>2012-12-13 22:43:15 +0100
commitf5380a56b1a6b83fea9b1c97140d4b1c8fe4ba49 (patch)
tree93a792dc006a046b4187acf95c6c8b79df97a920 /lout/unicode.cc
parent1471c240d49b60ef081f50230f2eee8852793716 (diff)
New function nextUtf8Char; usage in dw::Hyphenator and (partly) dw::Textblock.
Diffstat (limited to 'lout/unicode.cc')
-rw-r--r--lout/unicode.cc66
1 files changed, 57 insertions, 9 deletions
diff --git a/lout/unicode.cc b/lout/unicode.cc
index 38d71494..7d2502dc 100644
--- a/lout/unicode.cc
+++ b/lout/unicode.cc
@@ -1,4 +1,7 @@
#include "unicode.hh"
+#include "misc.hh"
+
+using namespace lout::misc;
namespace lout {
@@ -47,22 +50,19 @@ bool isAlpha (int ch)
return ch < 0x500 && (alpha[ch / 8] & (1 << (ch & 7)));
}
-int decodeUtf8 (char *s)
+int decodeUtf8 (const char *s)
{
if((s[0] & 0x80) == 0)
return s[0];
else {
- int mask = 0xe0, bits = 0xc0, done = 0, ch = 0, i = 0;
+ int mask = 0xe0, bits = 0xc0, done = 0, ch = 0;
for(int j = 1; !done && j < 7;
j++, mask = 0x80 | (mask >> 1), bits = 0x80 | (bits >> 1)) {
- if(((unsigned char)s[i] & mask) == bits) {
+ if(((unsigned char)s[0] & mask) == bits) {
done = 1;
- ch = (unsigned char)s[i] & ~mask & 0xff;
- i++;
- for(int k = 0; k < j; k++) {
- ch = (ch << 6) | ((unsigned char)s[i] & 0x3f);
- i++;
- }
+ ch = (unsigned char)s[0] & ~mask & 0xff;
+ for(int k = 0; k < j; k++)
+ ch = (ch << 6) | ((unsigned char)s[k + 1] & 0x3f);
}
}
@@ -70,6 +70,54 @@ int decodeUtf8 (char *s)
}
}
+static const char *_nextUtf8Char (const char *s)
+{
+ if (s == NULL)
+ return NULL;
+
+ const char *r;
+ if((s[0] & 0x80) == 0)
+ r = s + 1;
+ else {
+ int mask = 0xe0, bits = 0xc0, done = 0;
+ for(int j = 1; !done && j < 7;
+ j++, mask = 0x80 | (mask >> 1), bits = 0x80 | (bits >> 1)) {
+ if(((unsigned char)s[0] & mask) == bits) {
+ done = 1;
+ r = s + j + 1;
+ }
+ }
+
+ if(!done) {
+ assertNotReached();
+ return NULL;
+ }
+ }
+
+ return r;
+}
+
+const char *nextUtf8Char (const char *s)
+{
+ const char *r = _nextUtf8Char (s);
+ if (r != NULL && r[0] == 0)
+ return NULL;
+ else
+ return r;
+}
+
+const char *nextUtf8Char (const char *s, int len)
+{
+ if (len <= 0)
+ return NULL;
+
+ const char *r = _nextUtf8Char (s);
+ if (r != NULL && r - s >= len)
+ return NULL;
+ else
+ return r;
+}
+
} // namespace lout
} // namespace unicode