diff options
author | Sebastian Geerken <devnull@localhost> | 2012-12-13 22:43:15 +0100 |
---|---|---|
committer | Sebastian Geerken <devnull@localhost> | 2012-12-13 22:43:15 +0100 |
commit | f5380a56b1a6b83fea9b1c97140d4b1c8fe4ba49 (patch) | |
tree | 93a792dc006a046b4187acf95c6c8b79df97a920 /lout/unicode.cc | |
parent | 1471c240d49b60ef081f50230f2eee8852793716 (diff) |
New function nextUtf8Char; usage in dw::Hyphenator and (partly) dw::Textblock.
Diffstat (limited to 'lout/unicode.cc')
-rw-r--r-- | lout/unicode.cc | 66 |
1 files changed, 57 insertions, 9 deletions
diff --git a/lout/unicode.cc b/lout/unicode.cc index 38d71494..7d2502dc 100644 --- a/lout/unicode.cc +++ b/lout/unicode.cc @@ -1,4 +1,7 @@ #include "unicode.hh" +#include "misc.hh" + +using namespace lout::misc; namespace lout { @@ -47,22 +50,19 @@ bool isAlpha (int ch) return ch < 0x500 && (alpha[ch / 8] & (1 << (ch & 7))); } -int decodeUtf8 (char *s) +int decodeUtf8 (const char *s) { if((s[0] & 0x80) == 0) return s[0]; else { - int mask = 0xe0, bits = 0xc0, done = 0, ch = 0, i = 0; + int mask = 0xe0, bits = 0xc0, done = 0, ch = 0; for(int j = 1; !done && j < 7; j++, mask = 0x80 | (mask >> 1), bits = 0x80 | (bits >> 1)) { - if(((unsigned char)s[i] & mask) == bits) { + if(((unsigned char)s[0] & mask) == bits) { done = 1; - ch = (unsigned char)s[i] & ~mask & 0xff; - i++; - for(int k = 0; k < j; k++) { - ch = (ch << 6) | ((unsigned char)s[i] & 0x3f); - i++; - } + ch = (unsigned char)s[0] & ~mask & 0xff; + for(int k = 0; k < j; k++) + ch = (ch << 6) | ((unsigned char)s[k + 1] & 0x3f); } } @@ -70,6 +70,54 @@ int decodeUtf8 (char *s) } } +static const char *_nextUtf8Char (const char *s) +{ + if (s == NULL) + return NULL; + + const char *r; + if((s[0] & 0x80) == 0) + r = s + 1; + else { + int mask = 0xe0, bits = 0xc0, done = 0; + for(int j = 1; !done && j < 7; + j++, mask = 0x80 | (mask >> 1), bits = 0x80 | (bits >> 1)) { + if(((unsigned char)s[0] & mask) == bits) { + done = 1; + r = s + j + 1; + } + } + + if(!done) { + assertNotReached(); + return NULL; + } + } + + return r; +} + +const char *nextUtf8Char (const char *s) +{ + const char *r = _nextUtf8Char (s); + if (r != NULL && r[0] == 0) + return NULL; + else + return r; +} + +const char *nextUtf8Char (const char *s, int len) +{ + if (len <= 0) + return NULL; + + const char *r = _nextUtf8Char (s); + if (r != NULL && r - s >= len) + return NULL; + else + return r; +} + } // namespace lout } // namespace unicode |