aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorsgeerken <devnull@localhost>2012-07-09 00:46:52 +0200
committersgeerken <devnull@localhost>2012-07-09 00:46:52 +0200
commit528de63c0e0258a9f626f2c79d89d6d49086d176 (patch)
tree268e82b699bd3f53d7d58684036560f7d1d148ef
parentcbe5e631bdc8837d8ef3eea9053dacf70e974de7 (diff)
Hyphenation exceptions. Some bug fixes.
-rw-r--r--dw/hyphenator.cc97
-rw-r--r--dw/hyphenator.hh7
-rw-r--r--dw/textblock.cc3
-rw-r--r--dw/textblock.hh5
-rw-r--r--dw/textblock_linebreaking.cc162
-rw-r--r--test/liang.cc3
6 files changed, 200 insertions, 77 deletions
diff --git a/dw/hyphenator.cc b/dw/hyphenator.cc
index 5c84fe99..dddf6d3e 100644
--- a/dw/hyphenator.cc
+++ b/dw/hyphenator.cc
@@ -22,17 +22,18 @@ HashTable <TypedPair <TypedPointer <core::Platform>, ConstString>,
new HashTable <TypedPair <TypedPointer <core::Platform>, ConstString>,
Hyphenator> (true, true);
-Hyphenator::Hyphenator (core::Platform *platform, const char *filename)
+Hyphenator::Hyphenator (core::Platform *platform,
+ const char *patFile, const char *excFile)
{
this->platform = platform;
tree = NULL; // As long we are not sure whether a pattern file can be read.
- FILE *file = fopen (filename, "r");
- if (file) {
+ FILE *patF = fopen (patFile, "r");
+ if (patF) {
tree = new HashTable <Integer, Collection <Integer> > (true, true);
- while (!feof (file)) {
+ while (!feof (patF)) {
char buf[LEN + 1];
- char *s = fgets (buf, LEN, file);
+ char *s = fgets (buf, LEN, patF);
if (s) {
// TODO Better exit with an error, when the line is too long.
int l = strlen (s);
@@ -41,7 +42,26 @@ Hyphenator::Hyphenator (core::Platform *platform, const char *filename)
insertPattern (s);
}
}
- fclose (file);
+ fclose (patF);
+ }
+
+ exceptions = NULL; // Again, only instanciated when needed.
+
+ FILE *excF = fopen (excFile, "r");
+ if (excF) {
+ exceptions = new HashTable <ConstString, Vector <Integer> > (true, true);
+ while (!feof (excF)) {
+ char buf[LEN + 1];
+ char *s = fgets (buf, LEN, excF);
+ if (s) {
+ // TODO Better exit with an error, when the line is too long.
+ int l = strlen (s);
+ if (s[l - 1] == '\n')
+ s[l - 1] = 0;
+ insertException (s);
+ }
+ }
+ fclose (excF);
}
}
@@ -49,10 +69,12 @@ Hyphenator::~Hyphenator ()
{
if (tree)
delete tree;
+ if (exceptions)
+ delete exceptions;
}
Hyphenator *Hyphenator::getHyphenator (core::Platform *platform,
- const char *language)
+ const char *lang)
{
// TODO Not very efficient. Other key than TypedPair?
// (Keeping the parts of the pair on the stack does not help, since
@@ -60,19 +82,22 @@ Hyphenator *Hyphenator::getHyphenator (core::Platform *platform,
TypedPair <TypedPointer <core::Platform>, ConstString> *pair =
new TypedPair <TypedPointer <core::Platform>,
ConstString> (new TypedPointer <core::Platform> (platform),
- new ConstString (language));
+ new ConstString (lang));
Hyphenator *hyphenator = hyphenators->get (pair);
if (hyphenator)
delete pair;
else {
// TODO Much hard-coded!
- char filename [256];
- sprintf (filename, "/usr/local/lib/dillo/hyphenation/%s.pat", language);
+ char patFile [256];
+ sprintf (patFile, "/usr/local/lib/dillo/hyphenation/%s.pat", lang);
+ char excFile [256];
+ sprintf (excFile, "/usr/local/lib/dillo/hyphenation/%s.exc", lang);
- //printf ("Loading hyphenation patterns '%s' ...\n", filename);
+ printf ("Loading hyphenation patterns for language '%s' from '%s' and "
+ "exceptions from '%s' ...\n", lang, patFile, excFile);
- hyphenator = new Hyphenator (platform, filename);
+ hyphenator = new Hyphenator (platform, patFile, excFile);
hyphenators->put (pair, hyphenator);
}
@@ -120,6 +145,29 @@ void Hyphenator::insertPattern (char *s)
t->put (new Integer (0), points);
}
+void Hyphenator::insertException (char *s)
+{
+ Vector<Integer> *breaks = new Vector<Integer> (1, true);
+
+ int len = strlen (s);
+ for (int i = 0; i < len - 1; i++)
+ if((unsigned char)s[i] == 0xc2 && (unsigned char)s[i + 1] == 0xad)
+ breaks->put (new Integer (i - 2 * breaks->size()));
+
+ char noHyphens[len - 2 * breaks->size() + 1];
+ int j = 0;
+ for (int i = 0; i < len; ) {
+ if(i < len - 1 &&
+ (unsigned char)s[i] == 0xc2 && (unsigned char)s[i + 1] == 0xad)
+ i += 2;
+ else
+ noHyphens[j++] = s[i++];
+ }
+ noHyphens[j] = 0;
+
+ exceptions->put (new String (noHyphens), breaks);
+}
+
/**
* Simple test to avoid much costs. Passing it does not mean that the word
* can be hyphenated.
@@ -135,17 +183,34 @@ bool Hyphenator::isHyphenationCandidate (const char *word)
*/
int *Hyphenator::hyphenateWord(const char *word, int *numBreaks)
{
- // tree == NULL means that there is no pattern file.
- if (tree == NULL || !isHyphenationCandidate (word)) {
+ if ((tree == NULL && exceptions ==NULL) || !isHyphenationCandidate (word)) {
*numBreaks = 0;
return NULL;
}
+ char *wordLc = platform->textToLower (word, strlen (word));
+
// If the word is an exception, get the stored points.
- // TODO
+ Vector <Integer> *exceptionalBreaks;
+ ConstString key (wordLc);
+ if (exceptions != NULL && (exceptionalBreaks = exceptions->get (&key))) {
+ int *result = new int[exceptionalBreaks->size()];
+ for (int i = 0; i < exceptionalBreaks->size(); i++)
+ result[i] = exceptionalBreaks->get(i)->getValue();
+ delete wordLc;
+ *numBreaks = exceptionalBreaks->size();
+ return result;
+ }
+
+ // tree == NULL means that there is no pattern file.
+ if (tree == NULL) {
+ delete wordLc;
+ *numBreaks = 0;
+ return NULL;
+ }
+
char work[strlen (word) + 3];
strcpy (work, ".");
- char *wordLc = platform->textToLower (word, strlen (word));
strcat (work, wordLc);
delete wordLc;
strcat (work, ".");
diff --git a/dw/hyphenator.hh b/dw/hyphenator.hh
index 905d682f..9ef3e306 100644
--- a/dw/hyphenator.hh
+++ b/dw/hyphenator.hh
@@ -24,10 +24,15 @@ private:
lout::container::typed::HashTable <lout::object::Integer,
lout::container::typed::Collection
<lout::object::Integer> > *tree;
+ lout::container::typed::HashTable <lout::object::ConstString,
+ lout::container::typed::Vector
+ <lout::object::Integer> > *exceptions;
void insertPattern (char *s);
+ void insertException (char *s);
public:
- Hyphenator (core::Platform *platform, const char *filename);
+ Hyphenator (core::Platform *platform,
+ const char *patFile, const char *excFile);
~Hyphenator();
static Hyphenator *getHyphenator (core::Platform *platform,
diff --git a/dw/textblock.cc b/dw/textblock.cc
index c84dfbf5..780a07ec 100644
--- a/dw/textblock.cc
+++ b/dw/textblock.cc
@@ -1138,6 +1138,7 @@ int Textblock::findLineOfWord (int wordIndex)
{
int high = lines->size () - 1, index, low = 0;
+ // TODO regard also not-yet-existing lines?
if (wordIndex < 0 || wordIndex >= words->size ())
return -1;
@@ -1497,7 +1498,7 @@ void Textblock::addWidget (core::Widget *widget, core::style::Style *style)
* end of this function, the correct value is assigned. */
widget->parentRef = -1;
- PRINTF ("%p becomes child of %p\n", widget, this);
+ printf ("%p becomes child of %p\n", widget, this);
widget->setParent (this);
widget->setStyle (style);
diff --git a/dw/textblock.hh b/dw/textblock.hh
index d324a820..7cc32d58 100644
--- a/dw/textblock.hh
+++ b/dw/textblock.hh
@@ -186,7 +186,8 @@ private:
protected:
enum {
- HYPHEN_BREAK = 1000000 // to be tested and tuned
+ //HYPHEN_BREAK = 1000000 // to be tested and tuned
+ HYPHEN_BREAK = 0
};
struct Line
@@ -251,6 +252,8 @@ protected:
later set by a_Dw_page_add_space */
};
+ void printWord (Word *word);
+
struct Anchor
{
char *name;
diff --git a/dw/textblock_linebreaking.cc b/dw/textblock_linebreaking.cc
index 35f8e1ae..b804fe11 100644
--- a/dw/textblock_linebreaking.cc
+++ b/dw/textblock_linebreaking.cc
@@ -63,6 +63,8 @@ void Textblock::BadnessAndPenalty::calcBadness (int totalWidth, int idealWidth,
this->totalStretchability = totalStretchability;
this->totalShrinkability = totalShrinkability;
+ ratio = 0; // because this is used in print()
+
if (totalWidth == idealWidth) {
badnessState = BADNESS_VALUE;
badness = 0;
@@ -156,34 +158,59 @@ void Textblock::BadnessAndPenalty::print ()
{
switch (badnessState) {
case TOO_LOOSE:
- PRINTF ("loose");
+ printf ("too loose");
break;
case TOO_TIGHT:
- PRINTF ("tight");
+ printf ("too tight");
break;
case BADNESS_VALUE:
- PRINTF ("%d", badness);
+ printf ("%d", badness);
break;
}
- PRINTF (" [%d + %d - %d vs. %d] + ", totalWidth, totalStretchability,
- totalShrinkability, idealWidth);
+ printf (" [%d + %d - %d vs. %d => ratio = %d] + ",
+ totalWidth, totalStretchability, totalShrinkability, idealWidth,
+ ratio);
switch (penaltyState) {
case FORCE_BREAK:
- PRINTF ("-inf");
+ printf ("-inf");
break;
case PROHIBIT_BREAK:
- PRINTF ("inf");
+ printf ("inf");
break;
case PENALTY_VALUE:
- PRINTF ("%d", penalty);
+ printf ("%d", penalty);
+ break;
+ }
+}
+
+void Textblock::printWord (Word *word)
+{
+ switch(word->content.type) {
+ case core::Content::TEXT:
+ printf ("\"%s\"", word->content.text);
break;
+ case core::Content::WIDGET:
+ printf ("<widget: %p>\n", word->content.widget);
+ break;
+ case core::Content::BREAK:
+ printf ("<break>\n");
+ break;
+ default:
+ printf ("<?>\n");
+ break;
}
+
+ printf (" [%d / %d + %d - %d => %d + %d - %d] => ",
+ word->size.width, word->origSpace, word->stretchability,
+ word->shrinkability, word->totalWidth, word->totalStretchability,
+ word->totalShrinkability);
+ word->badnessAndPenalty.print ();
}
/*
@@ -254,6 +281,9 @@ Textblock::Line *Textblock::addLine (int firstWord, int lastWord,
PRINTF (" words[%d]->totalWidth = %d\n", lastWord,
lastWordOfLine->totalWidth);
+ printf ("[%p] ##### LINE ADDED: %d, from %d to %d #####\n",
+ this, lines->size (), firstWord, lastWord);
+
lines->increase ();
if(!temporary) {
// If the last line was temporary, this will be temporary, too, even
@@ -396,54 +426,42 @@ void Textblock::wordWrap (int wordIndex, bool wrapAll)
} else
newLine = false;
- if(newLine) {
+ if(newLine) {
accumulateWordData (wordIndex);
+ int wordIndexEnd = wordIndex;
bool lineAdded;
do {
- PRINTF (" searching from %d to %d\n", firstIndex, searchUntil);
+ printf (" searching from %d to %d\n", firstIndex, searchUntil);
int breakPos = -1;
for (int i = firstIndex; i <= searchUntil; i++) {
Word *w = words->getRef(i);
- if(word->content.type && core::Content::REAL_CONTENT) {
- PRINTF (" %d (of %d): ", i, words->size ());
-
- switch(w->content.type) {
- case core::Content::TEXT:
- PRINTF ("\"%s\"", w->content.text);
- break;
- case core::Content::WIDGET:
- PRINTF ("<widget: %p>\n", w->content.widget);
- break;
- case core::Content::BREAK:
- PRINTF ("<break>\n");
- break;
- default:
- PRINTF ("<?>\n");
- break;
- }
-
- PRINTF (" [%d / %d + %d - %d] => ",
- w->size.width, w->origSpace, w->stretchability,
- w->shrinkability);
- w->badnessAndPenalty.print ();
- PRINTF ("\n");
- }
-
+ printf (" %d (of %d): ", i, words->size ());
+ printWord (w);
+ printf ("\n");
// TODO: is this condition needed:
// if(w->badnessAndPenalty.lineCanBeBroken ()) ?
+ int c;
if (breakPos == -1 ||
- w->badnessAndPenalty.compareTo
- (&words->getRef(breakPos)->badnessAndPenalty) <= 0)
+ (c = w->badnessAndPenalty.compareTo
+ (&words->getRef(breakPos)->badnessAndPenalty)) <= 0) {
// "<=" instead of "<" in the next lines tends to result in
// more words per line -- theoretically. Practically, the
// case "==" will never occur.
+ if (breakPos == -1)
+ printf (" => initial\n");
+ else
+ printf (" => c = %d\n", c);
+
breakPos = i;
+ }
}
+
+ printf (" breakPos = %d\n", breakPos);
if (wrapAll && searchUntil == words->size () - 1) {
// Since no break and no space is added, the last word
@@ -455,12 +473,18 @@ void Textblock::wordWrap (int wordIndex, bool wrapAll)
BadnessAndPenalty correctedBap = lastWord->badnessAndPenalty;
correctedBap.setPenaltyForceBreak ();
if (correctedBap.compareTo
- (&words->getRef(breakPos)->badnessAndPenalty) <= 0)
+ (&words->getRef(breakPos)->badnessAndPenalty) <= 0) {
breakPos = searchUntil;
+ printf (" corrected: breakPos = %d\n", breakPos);
+ }
}
int hyphenatedWord = -1;
Word *word1 = words->getRef(breakPos);
+ printf ("[%p] line (broken at word %d): ", this, breakPos);
+ word1->badnessAndPenalty.print ();
+ printf ("\n");
+
if (word1->badnessAndPenalty.lineTight () &&
word1->canBeHyphenated &&
word1->style->x_lang[0] &&
@@ -478,28 +502,33 @@ void Textblock::wordWrap (int wordIndex, bool wrapAll)
hyphenatedWord = breakPos + 1;
}
- PRINTF ("[%p] breakPos = %d, hyphenatedWord = %d\n",
+ printf ("[%p] breakPos = %d, hyphenatedWord = %d\n",
this, breakPos, hyphenatedWord);
if(hyphenatedWord == -1) {
addLine (firstIndex, breakPos, tempNewLine);
- PRINTF ("[%p] new line %d (%s), from %d to %d\n",
+ printf ("[%p] new line %d (%s), from %d to %d\n",
this, lines->size() - 1,
tempNewLine ? "temporally" : "permanently",
firstIndex, breakPos);
lineAdded = true;
- PRINTF (" accumulating again from %d to %d\n",
- breakPos + 1, wordIndex);
} else {
// TODO hyphenateWord() should return weather something has
// changed at all. So that a second run, with
// !word->canBeHyphenated, is unneccessary.
// TODO Update: for this, searchUntil == 0 should be checked.
- searchUntil += hyphenateWord (hyphenatedWord);
+ printf ("[%p] old searchUntil = %d ...\n", this, searchUntil);
+ int n = hyphenateWord (hyphenatedWord);
+ searchUntil += n;
+ if (hyphenatedWord >= wordIndex)
+ wordIndexEnd += n;
+ printf ("[%p] -> new searchUntil = %d ...\n", this, searchUntil);
lineAdded = false;
}
- for(int i = breakPos + 1; i <= wordIndex; i++)
+ printf ("[%p] accumulating again from %d to %d\n",
+ this, breakPos + 1, wordIndexEnd);
+ for(int i = breakPos + 1; i <= wordIndexEnd; i++)
accumulateWordData (i);
} while(!lineAdded);
@@ -514,7 +543,7 @@ int Textblock::hyphenateWord (int wordIndex)
hyphenatedWord->style->x_lang[1], 0 };
Hyphenator *hyphenator =
Hyphenator::getHyphenator (layout->getPlatform (), lang);
- PRINTF ("[%p] considering to hyphenate word %d, '%s', in language '%s'\n",
+ printf ("[%p] considering to hyphenate word %d, '%s', in language '%s'\n",
this, wordIndex, words->getRef(wordIndex)->content.text, lang);
int numBreaks;
int *breakPos =
@@ -527,9 +556,9 @@ int Textblock::hyphenateWord (int wordIndex)
calcTextSizes (origWord.content.text, strlen (origWord.content.text),
origWord.style, numBreaks, breakPos, wordSize);
- PRINTF ("[%p] %d words ...\n", this, words->size ());
+ printf ("[%p] %d words ...\n", this, words->size ());
words->insert (wordIndex, numBreaks);
- PRINTF ("[%p] ... => %d words\n", this, words->size ());
+ printf ("[%p] ... => %d words\n", this, words->size ());
for (int i = 0; i < numBreaks + 1; i++) {
Word *w = words->getRef (wordIndex + i);
@@ -546,22 +575,27 @@ int Textblock::hyphenateWord (int wordIndex)
w->content.text =
layout->textZone->strndup (origWord.content.text + start,
end - start);
- PRINTF (" [%d] -> '%s'\n", wordIndex + i, w->content.text);
+ printf (" [%d] -> '%s'\n", wordIndex + i, w->content.text);
- if (i < numBreaks - 1) {
+ // Note: there are numBreaks + 1 word parts.
+ if (i < numBreaks) {
// TODO There should be a method fillHyphen.
w->badnessAndPenalty.setPenalty (HYPHEN_BREAK);
w->hyphenWidth =
layout->textWidth (origWord.style->font, "\xc2\xad", 2);
+ printf (" [%d] + hyphen\n", wordIndex + i);
} else {
- if (origWord.content.space)
+ if (origWord.content.space) {
fillSpace (w, origWord.spaceStyle);
+ printf (" [%d] + space\n", wordIndex + i);
+ } else
+ printf (" [%d] + nothing\n", wordIndex + i);
}
accumulateWordData (wordIndex + i);
}
- PRINTF (" finished\n");
+ printf (" finished\n");
//delete origword->content.text; TODO: Via textZone?
origWord.style->unref ();
@@ -635,9 +669,9 @@ void Textblock::accumulateWordForLine (int lineIndex, int wordIndex)
void Textblock::accumulateWordData (int wordIndex)
{
- PRINTF ("[%p] ACCUMULATE_WORD_DATA: %d\n", this, wordIndex);
-
Word *word = words->getRef (wordIndex);
+ printf ("[%p] ACCUMULATE_WORD_DATA (%d): ...\n", this, wordIndex);
+
int availWidth = calcAvailWidth (); // todo: variable? parameter?
if (wordIndex == 0 ||
@@ -647,7 +681,21 @@ void Textblock::accumulateWordData (int wordIndex)
word->totalWidth = word->size.width + word->hyphenWidth;
word->totalStretchability = 0;
word->totalShrinkability = 0;
+ printf(" (first word of line)\n");
} else {
+ if (lines->size () == 0)
+ printf(" (word %d word of not-yet-existing line %d)\n",
+ wordIndex, 0);
+ else if (wordIndex > lines->getLastRef()->lastWord)
+ printf(" (word %d word of not-yet-existing line %d)\n",
+ wordIndex - (lines->getLastRef()->lastWord + 1),
+ lines->size());
+ else {
+ int line = findLineOfWord (wordIndex);
+ printf(" (word %d word of line %d)\n",
+ wordIndex - lines->getRef(line)->firstWord, line);
+ }
+
Word *prevWord = words->getRef (wordIndex - 1);
word->totalWidth = prevWord->totalWidth
@@ -659,13 +707,13 @@ void Textblock::accumulateWordData (int wordIndex)
prevWord->totalShrinkability + prevWord->shrinkability;
}
- PRINTF(" line width: %d of %d\n", word->totalWidth, availWidth);
- PRINTF(" spaces: + %d - %d\n",
- word->totalStretchability, word->totalShrinkability);
-
word->badnessAndPenalty.calcBadness (word->totalWidth, availWidth,
word->totalStretchability,
word->totalShrinkability);
+
+ printf (" => ");
+ printWord (word);
+ printf ("\n");
}
int Textblock::calcAvailWidth ()
diff --git a/test/liang.cc b/test/liang.cc
index d878420c..59aaa6d5 100644
--- a/test/liang.cc
+++ b/test/liang.cc
@@ -9,7 +9,7 @@ void hyphenateWord (dw::core::Platform *p, const char *word)
int *breakPos = h->hyphenateWord (word, &numBreaks);
for (int i = 0; i < numBreaks + 1; i++) {
if (i != 0)
- putchar ('-');
+ printf ("\xc2\xad");
int start = (i == 0 ? 0 : breakPos[i - 1]);
int end = (i == numBreaks ? strlen (word) : breakPos[i]);
for (int j = start; j < end; j++)
@@ -35,6 +35,7 @@ int main (int argc, char *argv[])
"übertragungsverordnung");
hyphenateWord (&p, "„Grundstücksverkehrsgenehmigungszuständigkeits"
"übertragungsverordnung“");
+ hyphenateWord (&p, "Grundstücksverkehrsgenehmigungszuständigkeit");
return 0;
}