aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorsgeerken <devnull@localhost>2012-07-11 14:15:04 +0200
committersgeerken <devnull@localhost>2012-07-11 14:15:04 +0200
commit3cef8224ed1a1a3267adf5a5d3f1bdcf4c1b5eca (patch)
tree2689b169db86211ed86f158e4e2e3eb37326b380
parentde3847bff7e3d0215d4f8cfae210315871a6f4d5 (diff)
At least at the beginning, UTF-8 characters are countet correctly. Ückendorf is now hyphenated correctly.
-rw-r--r--dw/hyphenator.cc12
1 files changed, 9 insertions, 3 deletions
diff --git a/dw/hyphenator.cc b/dw/hyphenator.cc
index 7a1f5df7..4acee185 100644
--- a/dw/hyphenator.cc
+++ b/dw/hyphenator.cc
@@ -262,7 +262,6 @@ int *Hyphenator::hyphenateWord(const char *word, int *numBreaks)
char work[strlen (word) + 3];
strcpy (work, ".");
strcat (work, wordLc + startActualWord);
- delete wordLc;
strcat (work, ".");
int l = strlen (work);
@@ -290,8 +289,13 @@ int *Hyphenator::hyphenateWord(const char *word, int *numBreaks)
}
// No hyphens in the first two chars or the last two.
- points.set (1, 0);
- points.set (2, 0);
+ // Characters are not bytes, so UTF-8 characters must be counted.
+ int numBytes1 = platform->nextGlyph (wordLc + startActualWord, 0);
+ int numBytes2 = platform->nextGlyph (wordLc + startActualWord, numBytes1);
+ for (int i = 0; i < numBytes2; i++)
+ points.set (i + 1, 0);
+
+ // TODO: Characters, not bytes (as above).
points.set (points.size() - 2, 0);
points.set (points.size() - 3, 0);
@@ -306,6 +310,8 @@ int *Hyphenator::hyphenateWord(const char *word, int *numBreaks)
}
}
+ delete wordLc;
+
*numBreaks = breakPos.size ();
if (*numBreaks == 0)
return NULL;