diff options
-rw-r--r-- | dw/hyphenator.cc | 97 | ||||
-rw-r--r-- | dw/hyphenator.hh | 7 | ||||
-rw-r--r-- | dw/textblock.cc | 3 | ||||
-rw-r--r-- | dw/textblock.hh | 5 | ||||
-rw-r--r-- | dw/textblock_linebreaking.cc | 162 | ||||
-rw-r--r-- | test/liang.cc | 3 |
6 files changed, 200 insertions, 77 deletions
diff --git a/dw/hyphenator.cc b/dw/hyphenator.cc index 5c84fe99..dddf6d3e 100644 --- a/dw/hyphenator.cc +++ b/dw/hyphenator.cc @@ -22,17 +22,18 @@ HashTable <TypedPair <TypedPointer <core::Platform>, ConstString>, new HashTable <TypedPair <TypedPointer <core::Platform>, ConstString>, Hyphenator> (true, true); -Hyphenator::Hyphenator (core::Platform *platform, const char *filename) +Hyphenator::Hyphenator (core::Platform *platform, + const char *patFile, const char *excFile) { this->platform = platform; tree = NULL; // As long we are not sure whether a pattern file can be read. - FILE *file = fopen (filename, "r"); - if (file) { + FILE *patF = fopen (patFile, "r"); + if (patF) { tree = new HashTable <Integer, Collection <Integer> > (true, true); - while (!feof (file)) { + while (!feof (patF)) { char buf[LEN + 1]; - char *s = fgets (buf, LEN, file); + char *s = fgets (buf, LEN, patF); if (s) { // TODO Better exit with an error, when the line is too long. int l = strlen (s); @@ -41,7 +42,26 @@ Hyphenator::Hyphenator (core::Platform *platform, const char *filename) insertPattern (s); } } - fclose (file); + fclose (patF); + } + + exceptions = NULL; // Again, only instanciated when needed. + + FILE *excF = fopen (excFile, "r"); + if (excF) { + exceptions = new HashTable <ConstString, Vector <Integer> > (true, true); + while (!feof (excF)) { + char buf[LEN + 1]; + char *s = fgets (buf, LEN, excF); + if (s) { + // TODO Better exit with an error, when the line is too long. + int l = strlen (s); + if (s[l - 1] == '\n') + s[l - 1] = 0; + insertException (s); + } + } + fclose (excF); } } @@ -49,10 +69,12 @@ Hyphenator::~Hyphenator () { if (tree) delete tree; + if (exceptions) + delete exceptions; } Hyphenator *Hyphenator::getHyphenator (core::Platform *platform, - const char *language) + const char *lang) { // TODO Not very efficient. Other key than TypedPair? // (Keeping the parts of the pair on the stack does not help, since @@ -60,19 +82,22 @@ Hyphenator *Hyphenator::getHyphenator (core::Platform *platform, TypedPair <TypedPointer <core::Platform>, ConstString> *pair = new TypedPair <TypedPointer <core::Platform>, ConstString> (new TypedPointer <core::Platform> (platform), - new ConstString (language)); + new ConstString (lang)); Hyphenator *hyphenator = hyphenators->get (pair); if (hyphenator) delete pair; else { // TODO Much hard-coded! - char filename [256]; - sprintf (filename, "/usr/local/lib/dillo/hyphenation/%s.pat", language); + char patFile [256]; + sprintf (patFile, "/usr/local/lib/dillo/hyphenation/%s.pat", lang); + char excFile [256]; + sprintf (excFile, "/usr/local/lib/dillo/hyphenation/%s.exc", lang); - //printf ("Loading hyphenation patterns '%s' ...\n", filename); + printf ("Loading hyphenation patterns for language '%s' from '%s' and " + "exceptions from '%s' ...\n", lang, patFile, excFile); - hyphenator = new Hyphenator (platform, filename); + hyphenator = new Hyphenator (platform, patFile, excFile); hyphenators->put (pair, hyphenator); } @@ -120,6 +145,29 @@ void Hyphenator::insertPattern (char *s) t->put (new Integer (0), points); } +void Hyphenator::insertException (char *s) +{ + Vector<Integer> *breaks = new Vector<Integer> (1, true); + + int len = strlen (s); + for (int i = 0; i < len - 1; i++) + if((unsigned char)s[i] == 0xc2 && (unsigned char)s[i + 1] == 0xad) + breaks->put (new Integer (i - 2 * breaks->size())); + + char noHyphens[len - 2 * breaks->size() + 1]; + int j = 0; + for (int i = 0; i < len; ) { + if(i < len - 1 && + (unsigned char)s[i] == 0xc2 && (unsigned char)s[i + 1] == 0xad) + i += 2; + else + noHyphens[j++] = s[i++]; + } + noHyphens[j] = 0; + + exceptions->put (new String (noHyphens), breaks); +} + /** * Simple test to avoid much costs. Passing it does not mean that the word * can be hyphenated. @@ -135,17 +183,34 @@ bool Hyphenator::isHyphenationCandidate (const char *word) */ int *Hyphenator::hyphenateWord(const char *word, int *numBreaks) { - // tree == NULL means that there is no pattern file. - if (tree == NULL || !isHyphenationCandidate (word)) { + if ((tree == NULL && exceptions ==NULL) || !isHyphenationCandidate (word)) { *numBreaks = 0; return NULL; } + char *wordLc = platform->textToLower (word, strlen (word)); + // If the word is an exception, get the stored points. - // TODO + Vector <Integer> *exceptionalBreaks; + ConstString key (wordLc); + if (exceptions != NULL && (exceptionalBreaks = exceptions->get (&key))) { + int *result = new int[exceptionalBreaks->size()]; + for (int i = 0; i < exceptionalBreaks->size(); i++) + result[i] = exceptionalBreaks->get(i)->getValue(); + delete wordLc; + *numBreaks = exceptionalBreaks->size(); + return result; + } + + // tree == NULL means that there is no pattern file. + if (tree == NULL) { + delete wordLc; + *numBreaks = 0; + return NULL; + } + char work[strlen (word) + 3]; strcpy (work, "."); - char *wordLc = platform->textToLower (word, strlen (word)); strcat (work, wordLc); delete wordLc; strcat (work, "."); diff --git a/dw/hyphenator.hh b/dw/hyphenator.hh index 905d682f..9ef3e306 100644 --- a/dw/hyphenator.hh +++ b/dw/hyphenator.hh @@ -24,10 +24,15 @@ private: lout::container::typed::HashTable <lout::object::Integer, lout::container::typed::Collection <lout::object::Integer> > *tree; + lout::container::typed::HashTable <lout::object::ConstString, + lout::container::typed::Vector + <lout::object::Integer> > *exceptions; void insertPattern (char *s); + void insertException (char *s); public: - Hyphenator (core::Platform *platform, const char *filename); + Hyphenator (core::Platform *platform, + const char *patFile, const char *excFile); ~Hyphenator(); static Hyphenator *getHyphenator (core::Platform *platform, diff --git a/dw/textblock.cc b/dw/textblock.cc index c84dfbf5..780a07ec 100644 --- a/dw/textblock.cc +++ b/dw/textblock.cc @@ -1138,6 +1138,7 @@ int Textblock::findLineOfWord (int wordIndex) { int high = lines->size () - 1, index, low = 0; + // TODO regard also not-yet-existing lines? if (wordIndex < 0 || wordIndex >= words->size ()) return -1; @@ -1497,7 +1498,7 @@ void Textblock::addWidget (core::Widget *widget, core::style::Style *style) * end of this function, the correct value is assigned. */ widget->parentRef = -1; - PRINTF ("%p becomes child of %p\n", widget, this); + printf ("%p becomes child of %p\n", widget, this); widget->setParent (this); widget->setStyle (style); diff --git a/dw/textblock.hh b/dw/textblock.hh index d324a820..7cc32d58 100644 --- a/dw/textblock.hh +++ b/dw/textblock.hh @@ -186,7 +186,8 @@ private: protected: enum { - HYPHEN_BREAK = 1000000 // to be tested and tuned + //HYPHEN_BREAK = 1000000 // to be tested and tuned + HYPHEN_BREAK = 0 }; struct Line @@ -251,6 +252,8 @@ protected: later set by a_Dw_page_add_space */ }; + void printWord (Word *word); + struct Anchor { char *name; diff --git a/dw/textblock_linebreaking.cc b/dw/textblock_linebreaking.cc index 35f8e1ae..b804fe11 100644 --- a/dw/textblock_linebreaking.cc +++ b/dw/textblock_linebreaking.cc @@ -63,6 +63,8 @@ void Textblock::BadnessAndPenalty::calcBadness (int totalWidth, int idealWidth, this->totalStretchability = totalStretchability; this->totalShrinkability = totalShrinkability; + ratio = 0; // because this is used in print() + if (totalWidth == idealWidth) { badnessState = BADNESS_VALUE; badness = 0; @@ -156,34 +158,59 @@ void Textblock::BadnessAndPenalty::print () { switch (badnessState) { case TOO_LOOSE: - PRINTF ("loose"); + printf ("too loose"); break; case TOO_TIGHT: - PRINTF ("tight"); + printf ("too tight"); break; case BADNESS_VALUE: - PRINTF ("%d", badness); + printf ("%d", badness); break; } - PRINTF (" [%d + %d - %d vs. %d] + ", totalWidth, totalStretchability, - totalShrinkability, idealWidth); + printf (" [%d + %d - %d vs. %d => ratio = %d] + ", + totalWidth, totalStretchability, totalShrinkability, idealWidth, + ratio); switch (penaltyState) { case FORCE_BREAK: - PRINTF ("-inf"); + printf ("-inf"); break; case PROHIBIT_BREAK: - PRINTF ("inf"); + printf ("inf"); break; case PENALTY_VALUE: - PRINTF ("%d", penalty); + printf ("%d", penalty); + break; + } +} + +void Textblock::printWord (Word *word) +{ + switch(word->content.type) { + case core::Content::TEXT: + printf ("\"%s\"", word->content.text); break; + case core::Content::WIDGET: + printf ("<widget: %p>\n", word->content.widget); + break; + case core::Content::BREAK: + printf ("<break>\n"); + break; + default: + printf ("<?>\n"); + break; } + + printf (" [%d / %d + %d - %d => %d + %d - %d] => ", + word->size.width, word->origSpace, word->stretchability, + word->shrinkability, word->totalWidth, word->totalStretchability, + word->totalShrinkability); + word->badnessAndPenalty.print (); } /* @@ -254,6 +281,9 @@ Textblock::Line *Textblock::addLine (int firstWord, int lastWord, PRINTF (" words[%d]->totalWidth = %d\n", lastWord, lastWordOfLine->totalWidth); + printf ("[%p] ##### LINE ADDED: %d, from %d to %d #####\n", + this, lines->size (), firstWord, lastWord); + lines->increase (); if(!temporary) { // If the last line was temporary, this will be temporary, too, even @@ -396,54 +426,42 @@ void Textblock::wordWrap (int wordIndex, bool wrapAll) } else newLine = false; - if(newLine) { + if(newLine) { accumulateWordData (wordIndex); + int wordIndexEnd = wordIndex; bool lineAdded; do { - PRINTF (" searching from %d to %d\n", firstIndex, searchUntil); + printf (" searching from %d to %d\n", firstIndex, searchUntil); int breakPos = -1; for (int i = firstIndex; i <= searchUntil; i++) { Word *w = words->getRef(i); - if(word->content.type && core::Content::REAL_CONTENT) { - PRINTF (" %d (of %d): ", i, words->size ()); - - switch(w->content.type) { - case core::Content::TEXT: - PRINTF ("\"%s\"", w->content.text); - break; - case core::Content::WIDGET: - PRINTF ("<widget: %p>\n", w->content.widget); - break; - case core::Content::BREAK: - PRINTF ("<break>\n"); - break; - default: - PRINTF ("<?>\n"); - break; - } - - PRINTF (" [%d / %d + %d - %d] => ", - w->size.width, w->origSpace, w->stretchability, - w->shrinkability); - w->badnessAndPenalty.print (); - PRINTF ("\n"); - } - + printf (" %d (of %d): ", i, words->size ()); + printWord (w); + printf ("\n"); // TODO: is this condition needed: // if(w->badnessAndPenalty.lineCanBeBroken ()) ? + int c; if (breakPos == -1 || - w->badnessAndPenalty.compareTo - (&words->getRef(breakPos)->badnessAndPenalty) <= 0) + (c = w->badnessAndPenalty.compareTo + (&words->getRef(breakPos)->badnessAndPenalty)) <= 0) { // "<=" instead of "<" in the next lines tends to result in // more words per line -- theoretically. Practically, the // case "==" will never occur. + if (breakPos == -1) + printf (" => initial\n"); + else + printf (" => c = %d\n", c); + breakPos = i; + } } + + printf (" breakPos = %d\n", breakPos); if (wrapAll && searchUntil == words->size () - 1) { // Since no break and no space is added, the last word @@ -455,12 +473,18 @@ void Textblock::wordWrap (int wordIndex, bool wrapAll) BadnessAndPenalty correctedBap = lastWord->badnessAndPenalty; correctedBap.setPenaltyForceBreak (); if (correctedBap.compareTo - (&words->getRef(breakPos)->badnessAndPenalty) <= 0) + (&words->getRef(breakPos)->badnessAndPenalty) <= 0) { breakPos = searchUntil; + printf (" corrected: breakPos = %d\n", breakPos); + } } int hyphenatedWord = -1; Word *word1 = words->getRef(breakPos); + printf ("[%p] line (broken at word %d): ", this, breakPos); + word1->badnessAndPenalty.print (); + printf ("\n"); + if (word1->badnessAndPenalty.lineTight () && word1->canBeHyphenated && word1->style->x_lang[0] && @@ -478,28 +502,33 @@ void Textblock::wordWrap (int wordIndex, bool wrapAll) hyphenatedWord = breakPos + 1; } - PRINTF ("[%p] breakPos = %d, hyphenatedWord = %d\n", + printf ("[%p] breakPos = %d, hyphenatedWord = %d\n", this, breakPos, hyphenatedWord); if(hyphenatedWord == -1) { addLine (firstIndex, breakPos, tempNewLine); - PRINTF ("[%p] new line %d (%s), from %d to %d\n", + printf ("[%p] new line %d (%s), from %d to %d\n", this, lines->size() - 1, tempNewLine ? "temporally" : "permanently", firstIndex, breakPos); lineAdded = true; - PRINTF (" accumulating again from %d to %d\n", - breakPos + 1, wordIndex); } else { // TODO hyphenateWord() should return weather something has // changed at all. So that a second run, with // !word->canBeHyphenated, is unneccessary. // TODO Update: for this, searchUntil == 0 should be checked. - searchUntil += hyphenateWord (hyphenatedWord); + printf ("[%p] old searchUntil = %d ...\n", this, searchUntil); + int n = hyphenateWord (hyphenatedWord); + searchUntil += n; + if (hyphenatedWord >= wordIndex) + wordIndexEnd += n; + printf ("[%p] -> new searchUntil = %d ...\n", this, searchUntil); lineAdded = false; } - for(int i = breakPos + 1; i <= wordIndex; i++) + printf ("[%p] accumulating again from %d to %d\n", + this, breakPos + 1, wordIndexEnd); + for(int i = breakPos + 1; i <= wordIndexEnd; i++) accumulateWordData (i); } while(!lineAdded); @@ -514,7 +543,7 @@ int Textblock::hyphenateWord (int wordIndex) hyphenatedWord->style->x_lang[1], 0 }; Hyphenator *hyphenator = Hyphenator::getHyphenator (layout->getPlatform (), lang); - PRINTF ("[%p] considering to hyphenate word %d, '%s', in language '%s'\n", + printf ("[%p] considering to hyphenate word %d, '%s', in language '%s'\n", this, wordIndex, words->getRef(wordIndex)->content.text, lang); int numBreaks; int *breakPos = @@ -527,9 +556,9 @@ int Textblock::hyphenateWord (int wordIndex) calcTextSizes (origWord.content.text, strlen (origWord.content.text), origWord.style, numBreaks, breakPos, wordSize); - PRINTF ("[%p] %d words ...\n", this, words->size ()); + printf ("[%p] %d words ...\n", this, words->size ()); words->insert (wordIndex, numBreaks); - PRINTF ("[%p] ... => %d words\n", this, words->size ()); + printf ("[%p] ... => %d words\n", this, words->size ()); for (int i = 0; i < numBreaks + 1; i++) { Word *w = words->getRef (wordIndex + i); @@ -546,22 +575,27 @@ int Textblock::hyphenateWord (int wordIndex) w->content.text = layout->textZone->strndup (origWord.content.text + start, end - start); - PRINTF (" [%d] -> '%s'\n", wordIndex + i, w->content.text); + printf (" [%d] -> '%s'\n", wordIndex + i, w->content.text); - if (i < numBreaks - 1) { + // Note: there are numBreaks + 1 word parts. + if (i < numBreaks) { // TODO There should be a method fillHyphen. w->badnessAndPenalty.setPenalty (HYPHEN_BREAK); w->hyphenWidth = layout->textWidth (origWord.style->font, "\xc2\xad", 2); + printf (" [%d] + hyphen\n", wordIndex + i); } else { - if (origWord.content.space) + if (origWord.content.space) { fillSpace (w, origWord.spaceStyle); + printf (" [%d] + space\n", wordIndex + i); + } else + printf (" [%d] + nothing\n", wordIndex + i); } accumulateWordData (wordIndex + i); } - PRINTF (" finished\n"); + printf (" finished\n"); //delete origword->content.text; TODO: Via textZone? origWord.style->unref (); @@ -635,9 +669,9 @@ void Textblock::accumulateWordForLine (int lineIndex, int wordIndex) void Textblock::accumulateWordData (int wordIndex) { - PRINTF ("[%p] ACCUMULATE_WORD_DATA: %d\n", this, wordIndex); - Word *word = words->getRef (wordIndex); + printf ("[%p] ACCUMULATE_WORD_DATA (%d): ...\n", this, wordIndex); + int availWidth = calcAvailWidth (); // todo: variable? parameter? if (wordIndex == 0 || @@ -647,7 +681,21 @@ void Textblock::accumulateWordData (int wordIndex) word->totalWidth = word->size.width + word->hyphenWidth; word->totalStretchability = 0; word->totalShrinkability = 0; + printf(" (first word of line)\n"); } else { + if (lines->size () == 0) + printf(" (word %d word of not-yet-existing line %d)\n", + wordIndex, 0); + else if (wordIndex > lines->getLastRef()->lastWord) + printf(" (word %d word of not-yet-existing line %d)\n", + wordIndex - (lines->getLastRef()->lastWord + 1), + lines->size()); + else { + int line = findLineOfWord (wordIndex); + printf(" (word %d word of line %d)\n", + wordIndex - lines->getRef(line)->firstWord, line); + } + Word *prevWord = words->getRef (wordIndex - 1); word->totalWidth = prevWord->totalWidth @@ -659,13 +707,13 @@ void Textblock::accumulateWordData (int wordIndex) prevWord->totalShrinkability + prevWord->shrinkability; } - PRINTF(" line width: %d of %d\n", word->totalWidth, availWidth); - PRINTF(" spaces: + %d - %d\n", - word->totalStretchability, word->totalShrinkability); - word->badnessAndPenalty.calcBadness (word->totalWidth, availWidth, word->totalStretchability, word->totalShrinkability); + + printf (" => "); + printWord (word); + printf ("\n"); } int Textblock::calcAvailWidth () diff --git a/test/liang.cc b/test/liang.cc index d878420c..59aaa6d5 100644 --- a/test/liang.cc +++ b/test/liang.cc @@ -9,7 +9,7 @@ void hyphenateWord (dw::core::Platform *p, const char *word) int *breakPos = h->hyphenateWord (word, &numBreaks); for (int i = 0; i < numBreaks + 1; i++) { if (i != 0) - putchar ('-'); + printf ("\xc2\xad"); int start = (i == 0 ? 0 : breakPos[i - 1]); int end = (i == numBreaks ? strlen (word) : breakPos[i]); for (int j = start; j < end; j++) @@ -35,6 +35,7 @@ int main (int argc, char *argv[]) "übertragungsverordnung"); hyphenateWord (&p, "„Grundstücksverkehrsgenehmigungszuständigkeits" "übertragungsverordnung“"); + hyphenateWord (&p, "Grundstücksverkehrsgenehmigungszuständigkeit"); return 0; } |