summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--dw/hyphenator.cc10
-rw-r--r--dw/textblock.cc28
-rw-r--r--lout/unicode.cc66
-rw-r--r--lout/unicode.hh6
-rw-r--r--test/Makefile.am7
-rw-r--r--test/unicode_test.cc15
6 files changed, 103 insertions, 29 deletions
diff --git a/dw/hyphenator.cc b/dw/hyphenator.cc
index 4fe9433b..cc92b77f 100644
--- a/dw/hyphenator.cc
+++ b/dw/hyphenator.cc
@@ -16,6 +16,7 @@
using namespace lout::object;
using namespace lout::container::typed;
using namespace lout::misc;
+using namespace lout::unicode;
namespace dw {
@@ -204,7 +205,7 @@ bool Hyphenator::isCharPartOfActualWord (char *s)
(unsigned char)s[1] == 0x9f /* ß */ ));
#endif
- return lout::unicode::isAlpha (lout::unicode::decodeUtf8 (s));
+ return isAlpha (decodeUtf8 (s));
}
/**
@@ -315,9 +316,10 @@ void Hyphenator::hyphenateSingleWord(core::Platform *platform,
// No hyphens in the first two chars or the last two.
// Characters are not bytes, so UTF-8 characters must be counted.
- int numBytes1Start = platform->nextGlyph (wordLc, 0);
- int numBytes2Start = platform->nextGlyph (wordLc, numBytes1Start);
- for (int i = 0; i < numBytes2Start; i++)
+ const char *bytesStart = nextUtf8Char (nextUtf8Char (wordLc));
+ // TODO Check bytesStart == NULL;
+ int numBytesStart = bytesStart - wordLc;
+ for (int i = 0; i < numBytesStart; i++)
points.set (i + 1, 0);
int len = strlen (wordLc);
diff --git a/dw/textblock.cc b/dw/textblock.cc
index ba000e1a..816b2680 100644
--- a/dw/textblock.cc
+++ b/dw/textblock.cc
@@ -21,6 +21,7 @@
#include "textblock.hh"
#include "../lout/msg.h"
#include "../lout/misc.hh"
+#include "../lout/unicode.hh"
#include <stdio.h>
#include <math.h>
@@ -34,6 +35,7 @@ static dw::core::style::Tooltip *hoverTooltip = NULL;
using namespace lout;
+using namespace lout::unicode;
namespace dw {
@@ -1425,13 +1427,12 @@ void Textblock::addText (const char *text, size_t len,
// Count dividing characters.
int numParts = 1;
- for (int i = 0; i < (int)len;
- i < (int)len && (i = layout->nextGlyph (text, i))) {
+ for (const char *s = text; s; s = nextUtf8Char (s, text + len - s)) {
int foundDiv = -1;
for (int j = 0; foundDiv == -1 && j < NUM_DIV_CHARS; j++) {
int lDiv = strlen (divChars[j].s);
- if (i <= (int)len - lDiv) {
- if (memcmp (text + i, divChars[j].s, lDiv * sizeof (char)) == 0)
+ if (s <= text + len - lDiv) {
+ if (memcmp (s, divChars[j].s, lDiv * sizeof (char)) == 0)
foundDiv = j;
}
}
@@ -1466,13 +1467,12 @@ void Textblock::addText (const char *text, size_t len,
partStart[0] = 0;
partEnd[numParts - 1] = len;
- for (int i = 0; i < (int)len;
- i < (int)len && (i = layout->nextGlyph (text, i))) {
+ for (const char *s = text; s; s = nextUtf8Char (s, text + len - s)) {
int foundDiv = -1;
for (int j = 0; foundDiv == -1 && j < NUM_DIV_CHARS; j++) {
int lDiv = strlen (divChars[j].s);
- if (i <= (int)len - lDiv) {
- if (memcmp (text + i, divChars[j].s, lDiv * sizeof (char)) == 0)
+ if (s <= text + len - lDiv) {
+ if (memcmp (s, divChars[j].s, lDiv * sizeof (char)) == 0)
foundDiv = j;
}
}
@@ -1490,8 +1490,8 @@ void Textblock::addText (const char *text, size_t len,
unbreakableForMinWidth[n] =
divChars[foundDiv].unbreakableForMinWidth;
canBeHyphenated[n + 1] = divChars[foundDiv].canBeHyphenated;
- partEnd[n] = i;
- partStart[n + 1] = i + lDiv;
+ partEnd[n] = s - text;
+ partStart[n + 1] = s - text + lDiv;
n++;
totalLenCharRemoved += lDiv;
} else {
@@ -1505,8 +1505,8 @@ void Textblock::addText (const char *text, size_t len,
unbreakableForMinWidth[n] =
divChars[foundDiv].unbreakableForMinWidth;
canBeHyphenated[n + 1] = divChars[foundDiv].canBeHyphenated;
- partEnd[n] = i;
- partStart[n + 1] = i;
+ partEnd[n] = s - text;
+ partStart[n + 1] = s - text;
n++;
}
@@ -1517,8 +1517,8 @@ void Textblock::addText (const char *text, size_t len,
unbreakableForMinWidth[n] =
divChars[foundDiv].unbreakableForMinWidth;
canBeHyphenated[n + 1] = divChars[foundDiv].canBeHyphenated;
- partEnd[n] = i + lDiv;
- partStart[n + 1] = i + lDiv;
+ partEnd[n] = s - text + lDiv;
+ partStart[n + 1] = s - text + lDiv;
n++;
}
}
diff --git a/lout/unicode.cc b/lout/unicode.cc
index 38d71494..7d2502dc 100644
--- a/lout/unicode.cc
+++ b/lout/unicode.cc
@@ -1,4 +1,7 @@
#include "unicode.hh"
+#include "misc.hh"
+
+using namespace lout::misc;
namespace lout {
@@ -47,22 +50,19 @@ bool isAlpha (int ch)
return ch < 0x500 && (alpha[ch / 8] & (1 << (ch & 7)));
}
-int decodeUtf8 (char *s)
+int decodeUtf8 (const char *s)
{
if((s[0] & 0x80) == 0)
return s[0];
else {
- int mask = 0xe0, bits = 0xc0, done = 0, ch = 0, i = 0;
+ int mask = 0xe0, bits = 0xc0, done = 0, ch = 0;
for(int j = 1; !done && j < 7;
j++, mask = 0x80 | (mask >> 1), bits = 0x80 | (bits >> 1)) {
- if(((unsigned char)s[i] & mask) == bits) {
+ if(((unsigned char)s[0] & mask) == bits) {
done = 1;
- ch = (unsigned char)s[i] & ~mask & 0xff;
- i++;
- for(int k = 0; k < j; k++) {
- ch = (ch << 6) | ((unsigned char)s[i] & 0x3f);
- i++;
- }
+ ch = (unsigned char)s[0] & ~mask & 0xff;
+ for(int k = 0; k < j; k++)
+ ch = (ch << 6) | ((unsigned char)s[k + 1] & 0x3f);
}
}
@@ -70,6 +70,54 @@ int decodeUtf8 (char *s)
}
}
+static const char *_nextUtf8Char (const char *s)
+{
+ if (s == NULL)
+ return NULL;
+
+ const char *r;
+ if((s[0] & 0x80) == 0)
+ r = s + 1;
+ else {
+ int mask = 0xe0, bits = 0xc0, done = 0;
+ for(int j = 1; !done && j < 7;
+ j++, mask = 0x80 | (mask >> 1), bits = 0x80 | (bits >> 1)) {
+ if(((unsigned char)s[0] & mask) == bits) {
+ done = 1;
+ r = s + j + 1;
+ }
+ }
+
+ if(!done) {
+ assertNotReached();
+ return NULL;
+ }
+ }
+
+ return r;
+}
+
+const char *nextUtf8Char (const char *s)
+{
+ const char *r = _nextUtf8Char (s);
+ if (r != NULL && r[0] == 0)
+ return NULL;
+ else
+ return r;
+}
+
+const char *nextUtf8Char (const char *s, int len)
+{
+ if (len <= 0)
+ return NULL;
+
+ const char *r = _nextUtf8Char (s);
+ if (r != NULL && r - s >= len)
+ return NULL;
+ else
+ return r;
+}
+
} // namespace lout
} // namespace unicode
diff --git a/lout/unicode.hh b/lout/unicode.hh
index 123e7aa3..42d06911 100644
--- a/lout/unicode.hh
+++ b/lout/unicode.hh
@@ -11,7 +11,11 @@ namespace unicode {
bool isAlpha (int ch);
-int decodeUtf8 (char *s);
+int decodeUtf8 (const char *s);
+
+const char *nextUtf8Char (const char *s);
+
+const char *nextUtf8Char (const char *s, int len);
} // namespace lout
diff --git a/test/Makefile.am b/test/Makefile.am
index 9589e09b..d8dd785e 100644
--- a/test/Makefile.am
+++ b/test/Makefile.am
@@ -23,7 +23,8 @@ noinst_PROGRAMS = \
cookies \
liang \
trie \
- notsosimplevector
+ notsosimplevector \
+ unicode-test
dw_anchors_test_SOURCES = dw_anchors_test.cc
dw_anchors_test_LDADD = \
@@ -180,3 +181,7 @@ trie_LDADD = \
notsosimplevector_SOURCES = notsosimplevector.cc
notsosimplevector_LDADD = $(top_builddir)/lout/liblout.a
+
+unicode_test_SOURCES = unicode_test.cc
+
+unicode_test_LDADD = $(top_builddir)/lout/liblout.a
diff --git a/test/unicode_test.cc b/test/unicode_test.cc
new file mode 100644
index 00000000..3cd43710
--- /dev/null
+++ b/test/unicode_test.cc
@@ -0,0 +1,15 @@
+#include <string.h>
+#include <stdio.h>
+#include "../lout/unicode.hh"
+
+using namespace lout::unicode;
+
+int main (int argc, char *argv[])
+{
+ const char *t = "abcäöüабв−‐";
+
+ for (const char *s = t; s; s = nextUtf8Char (s, strlen (s)))
+ printf ("%3d -> U+%04x ('%s')\n", (int)(s - t), decodeUtf8(s), s);
+
+ return 0;
+}