diff options
author | sgeerken <devnull@localhost> | 2012-07-11 14:15:04 +0200 |
---|---|---|
committer | sgeerken <devnull@localhost> | 2012-07-11 14:15:04 +0200 |
commit | 3cef8224ed1a1a3267adf5a5d3f1bdcf4c1b5eca (patch) | |
tree | 2689b169db86211ed86f158e4e2e3eb37326b380 /dw/hyphenator.cc | |
parent | de3847bff7e3d0215d4f8cfae210315871a6f4d5 (diff) |
At least at the beginning, UTF-8 characters are countet correctly. Ückendorf is now hyphenated correctly.
Diffstat (limited to 'dw/hyphenator.cc')
-rw-r--r-- | dw/hyphenator.cc | 12 |
1 files changed, 9 insertions, 3 deletions
diff --git a/dw/hyphenator.cc b/dw/hyphenator.cc index 7a1f5df7..4acee185 100644 --- a/dw/hyphenator.cc +++ b/dw/hyphenator.cc @@ -262,7 +262,6 @@ int *Hyphenator::hyphenateWord(const char *word, int *numBreaks) char work[strlen (word) + 3]; strcpy (work, "."); strcat (work, wordLc + startActualWord); - delete wordLc; strcat (work, "."); int l = strlen (work); @@ -290,8 +289,13 @@ int *Hyphenator::hyphenateWord(const char *word, int *numBreaks) } // No hyphens in the first two chars or the last two. - points.set (1, 0); - points.set (2, 0); + // Characters are not bytes, so UTF-8 characters must be counted. + int numBytes1 = platform->nextGlyph (wordLc + startActualWord, 0); + int numBytes2 = platform->nextGlyph (wordLc + startActualWord, numBytes1); + for (int i = 0; i < numBytes2; i++) + points.set (i + 1, 0); + + // TODO: Characters, not bytes (as above). points.set (points.size() - 2, 0); points.set (points.size() - 3, 0); @@ -306,6 +310,8 @@ int *Hyphenator::hyphenateWord(const char *word, int *numBreaks) } } + delete wordLc; + *numBreaks = breakPos.size (); if (*numBreaks == 0) return NULL; |