diff options
Diffstat (limited to 'lout')
-rw-r--r-- | lout/unicode.cc | 66 | ||||
-rw-r--r-- | lout/unicode.hh | 6 |
2 files changed, 62 insertions, 10 deletions
diff --git a/lout/unicode.cc b/lout/unicode.cc index 38d71494..7d2502dc 100644 --- a/lout/unicode.cc +++ b/lout/unicode.cc @@ -1,4 +1,7 @@ #include "unicode.hh" +#include "misc.hh" + +using namespace lout::misc; namespace lout { @@ -47,22 +50,19 @@ bool isAlpha (int ch) return ch < 0x500 && (alpha[ch / 8] & (1 << (ch & 7))); } -int decodeUtf8 (char *s) +int decodeUtf8 (const char *s) { if((s[0] & 0x80) == 0) return s[0]; else { - int mask = 0xe0, bits = 0xc0, done = 0, ch = 0, i = 0; + int mask = 0xe0, bits = 0xc0, done = 0, ch = 0; for(int j = 1; !done && j < 7; j++, mask = 0x80 | (mask >> 1), bits = 0x80 | (bits >> 1)) { - if(((unsigned char)s[i] & mask) == bits) { + if(((unsigned char)s[0] & mask) == bits) { done = 1; - ch = (unsigned char)s[i] & ~mask & 0xff; - i++; - for(int k = 0; k < j; k++) { - ch = (ch << 6) | ((unsigned char)s[i] & 0x3f); - i++; - } + ch = (unsigned char)s[0] & ~mask & 0xff; + for(int k = 0; k < j; k++) + ch = (ch << 6) | ((unsigned char)s[k + 1] & 0x3f); } } @@ -70,6 +70,54 @@ int decodeUtf8 (char *s) } } +static const char *_nextUtf8Char (const char *s) +{ + if (s == NULL) + return NULL; + + const char *r; + if((s[0] & 0x80) == 0) + r = s + 1; + else { + int mask = 0xe0, bits = 0xc0, done = 0; + for(int j = 1; !done && j < 7; + j++, mask = 0x80 | (mask >> 1), bits = 0x80 | (bits >> 1)) { + if(((unsigned char)s[0] & mask) == bits) { + done = 1; + r = s + j + 1; + } + } + + if(!done) { + assertNotReached(); + return NULL; + } + } + + return r; +} + +const char *nextUtf8Char (const char *s) +{ + const char *r = _nextUtf8Char (s); + if (r != NULL && r[0] == 0) + return NULL; + else + return r; +} + +const char *nextUtf8Char (const char *s, int len) +{ + if (len <= 0) + return NULL; + + const char *r = _nextUtf8Char (s); + if (r != NULL && r - s >= len) + return NULL; + else + return r; +} + } // namespace lout } // namespace unicode diff --git a/lout/unicode.hh b/lout/unicode.hh index 123e7aa3..42d06911 100644 --- a/lout/unicode.hh +++ b/lout/unicode.hh @@ -11,7 +11,11 @@ namespace unicode { bool isAlpha (int ch); -int decodeUtf8 (char *s); +int decodeUtf8 (const char *s); + +const char *nextUtf8Char (const char *s); + +const char *nextUtf8Char (const char *s, int len); } // namespace lout |