aboutsummaryrefslogtreecommitdiff
path: root/lout
diff options
context:
space:
mode:
Diffstat (limited to 'lout')
-rw-r--r--lout/unicode.cc66
-rw-r--r--lout/unicode.hh6
2 files changed, 62 insertions, 10 deletions
diff --git a/lout/unicode.cc b/lout/unicode.cc
index 38d71494..7d2502dc 100644
--- a/lout/unicode.cc
+++ b/lout/unicode.cc
@@ -1,4 +1,7 @@
#include "unicode.hh"
+#include "misc.hh"
+
+using namespace lout::misc;
namespace lout {
@@ -47,22 +50,19 @@ bool isAlpha (int ch)
return ch < 0x500 && (alpha[ch / 8] & (1 << (ch & 7)));
}
-int decodeUtf8 (char *s)
+int decodeUtf8 (const char *s)
{
if((s[0] & 0x80) == 0)
return s[0];
else {
- int mask = 0xe0, bits = 0xc0, done = 0, ch = 0, i = 0;
+ int mask = 0xe0, bits = 0xc0, done = 0, ch = 0;
for(int j = 1; !done && j < 7;
j++, mask = 0x80 | (mask >> 1), bits = 0x80 | (bits >> 1)) {
- if(((unsigned char)s[i] & mask) == bits) {
+ if(((unsigned char)s[0] & mask) == bits) {
done = 1;
- ch = (unsigned char)s[i] & ~mask & 0xff;
- i++;
- for(int k = 0; k < j; k++) {
- ch = (ch << 6) | ((unsigned char)s[i] & 0x3f);
- i++;
- }
+ ch = (unsigned char)s[0] & ~mask & 0xff;
+ for(int k = 0; k < j; k++)
+ ch = (ch << 6) | ((unsigned char)s[k + 1] & 0x3f);
}
}
@@ -70,6 +70,54 @@ int decodeUtf8 (char *s)
}
}
+static const char *_nextUtf8Char (const char *s)
+{
+ if (s == NULL)
+ return NULL;
+
+ const char *r;
+ if((s[0] & 0x80) == 0)
+ r = s + 1;
+ else {
+ int mask = 0xe0, bits = 0xc0, done = 0;
+ for(int j = 1; !done && j < 7;
+ j++, mask = 0x80 | (mask >> 1), bits = 0x80 | (bits >> 1)) {
+ if(((unsigned char)s[0] & mask) == bits) {
+ done = 1;
+ r = s + j + 1;
+ }
+ }
+
+ if(!done) {
+ assertNotReached();
+ return NULL;
+ }
+ }
+
+ return r;
+}
+
+const char *nextUtf8Char (const char *s)
+{
+ const char *r = _nextUtf8Char (s);
+ if (r != NULL && r[0] == 0)
+ return NULL;
+ else
+ return r;
+}
+
+const char *nextUtf8Char (const char *s, int len)
+{
+ if (len <= 0)
+ return NULL;
+
+ const char *r = _nextUtf8Char (s);
+ if (r != NULL && r - s >= len)
+ return NULL;
+ else
+ return r;
+}
+
} // namespace lout
} // namespace unicode
diff --git a/lout/unicode.hh b/lout/unicode.hh
index 123e7aa3..42d06911 100644
--- a/lout/unicode.hh
+++ b/lout/unicode.hh
@@ -11,7 +11,11 @@ namespace unicode {
bool isAlpha (int ch);
-int decodeUtf8 (char *s);
+int decodeUtf8 (const char *s);
+
+const char *nextUtf8Char (const char *s);
+
+const char *nextUtf8Char (const char *s, int len);
} // namespace lout