diff options
author | sgeerke <devnull@localhost> | 2012-06-24 12:06:15 +0200 |
---|---|---|
committer | sgeerke <devnull@localhost> | 2012-06-24 12:06:15 +0200 |
commit | 39985d2c5158742fc9e00e7dd90559f9f5c8c75f (patch) | |
tree | 1950c633fd73c1b7ef5056ed18b9dcd527552d3b | |
parent | e02ec1bd1405715f050a71d40a9e3c212dd43123 (diff) |
First part of hyphenation! And some minor changes in "lout".
-rw-r--r-- | doc/dw-line-breaking.doc | 205 | ||||
-rw-r--r-- | doc/index.doc | 20 | ||||
-rw-r--r-- | doc/lout.doc | 66 | ||||
-rw-r--r-- | doc/rounding-errors.doc | 4 | ||||
-rw-r--r-- | dw/Makefile.am | 1 | ||||
-rw-r--r-- | dw/fltkviewbase.cc | 6 | ||||
-rw-r--r-- | dw/tablecell.cc | 4 | ||||
-rw-r--r-- | dw/tablecell.hh | 2 | ||||
-rw-r--r-- | dw/textblock.cc | 826 | ||||
-rw-r--r-- | dw/textblock.hh | 142 | ||||
-rw-r--r-- | dw/textblock_linebreaking.cc | 688 | ||||
-rw-r--r-- | dw/types.hh | 6 | ||||
-rw-r--r-- | lout/container.hh | 4 | ||||
-rw-r--r-- | lout/identity.hh | 5 | ||||
-rw-r--r-- | lout/misc.hh | 36 | ||||
-rw-r--r-- | lout/object.hh | 4 | ||||
-rw-r--r-- | lout/signal.hh | 4 | ||||
-rw-r--r-- | test/KHM1.html | 56 | ||||
-rw-r--r-- | test/KHM1b.html | 14 | ||||
-rw-r--r-- | test/KHM1c.html | 10 | ||||
-rw-r--r-- | test/Makefile.am | 7 | ||||
-rw-r--r-- | test/liang.cc | 184 |
22 files changed, 1653 insertions, 641 deletions
diff --git a/doc/dw-line-breaking.doc b/doc/dw-line-breaking.doc new file mode 100644 index 00000000..43d7d448 --- /dev/null +++ b/doc/dw-line-breaking.doc @@ -0,0 +1,205 @@ +/** \page dw-line-breaking Changes in Line-Breaking + +<div style="border: 2px solid #ff0000; padding: 0.5em 1em; +background-color: #ffe0e0"><b>Warning:</b> Unsorted collection of +notes. Should be incorporated into dw::Textblock.</div> + +<ul> +<li>Motivation: opimized line breaking and introduction of penalties.</li> +<li>Definition of word: hyphenationed word consists of multiple words.</li> +<li>Splitting up: dw/textblock.cc and dw/textblock_linebreaking.cc</li> +<li>Test HTML files in the <i>test</i> directory.</li> +<li>Adjust dw::Textblock::HYPHEN_BREAK.</li> +</ul> + +<h2>Literature</h2> + +Although dillo does not (yet?) implement the algorithm TeX uses for +line breaking, this document shares much of the notation used by the +article <i>Breaking Paragraphs Into Lines</i> by Donald E. Knuth and +Michael F. Plass; originally published in: Software -- Practice and +Experience <b>11</b> (1981), 1119-1184; reprinted in: <i>Digital +Typography</i> by Donalt E. Knuth, CSLI Publications 1999. Anyway an +interesting reading. + + +<h2>Criteria for Line-Breaking</h2> + +Currently, a word (represented by dw::Textblock::Word) has the +following attributes related to line-breaking: + +<ul> + +<li> the width of the word itself, represented by dw::Textblock::Word::size; +<li> the width of the space following the word, represented by +dw::Textblock::Word::origSpace. +</ul> + +[TODO: When is breaking between two words not permitted?] + +In a more mathematical notation, the \f$i\f$th word has got a width +\f$w_i\f$ and a space \f$s_i\f$. + +With hyphenation, the criteria are refined. Hyphenation should only be +used when otherwise line breaking results in very large spaces. So, we +define: + +<ul> +<li>the badness \f$\beta\f$ of a line, which is the greater the more the +spaces between the words differ from the ideal space; +<li>a penalty \f$p\f$ for any possible break point. +</ul> + +The goal is to find those break points, where \f$\beta + p\f$ is +minimal. + +Examples for the penalty \f$p\f$: + +<ul> +<li>0 for normal line breaks (between words); +<li>\f$\infty\f$ to prevent a line break at all costs; +<li>\f$-\infty\f$ to force a line <li>a value > 0 for hyphens. +</ul> + +[TODO: which value exactly for hyphens?] + +So we need the following values: + +<ul> +<li> \f$w_i\f$ (the width of the word \f$i\f$ itself); +<li> \f$s_i\f$ (the width of the space following the word \f$i\f$); +<li> the strechability \f$y_i\f$, a value denoting how much the space +after word\f$i\f$ can be streched (typically \f${1\over 2} s_i\f$); +<li> the shrinkability \f$y_i\f$, a value denoting how much the space +after word\f$i\f$ can be shrunken (typically \f${1\over 3} s_i\f$); +<li> the penalty \f$p_i\f$, if the line is broken after word \f$i\f$; +<li> a width \f$h_i\f$, which is added, when the line is broken after +word \f$i\f$. +</ul> + +\f$h_i\f$ is the width of the hyphen, if the word \f$i\f$ is a part of +the hyphenated word (except the last part); otherwise 0. + +Let \f$l\f$ be the (ideal) width (length) of the line. Furthermore, +all words from \f$a\f$ to \f$b\f$ are added to the line. \f$a\f$ is +fixed: we do not modify the previous lines anymore; but our task is to +find a suitable \f$b\f$. + +We define: + +\f[W_a^b = \sum_{i=a}^{b} w_i + \sum_{i=a}^{b-1} s_i + h_b\f] + +\f[Y_a^b = \sum_{i=a}^{b-1} y_i\f] + +\f[Z_a^b = \sum_{i=a}^{b-1} z_i\f] + + +\f$W_a^b\f$ is the total width, \f$Y_a^b\f$ the total strechability, and +\f$Z_a^b\f$ the total shrinkability. + +Furthermore the <i>adjustment ratio</i> \f$r_a^b\f$: + +<ul> +<li>in the ideal case that \f$W_a^b = l\f$: \f$r_a^b = 0\f$; +<li>if \f$W_a^b < l\f$: \f$r_a^b = (l - W_a^b) / Y_a^b\f$ (\f$r_a^b < 0\f$ in +this case); +<li>if \f$W_a^b > l\f$: \f$r_a^b = (l - W_a^b) / Z_a^b\f$ (\f$r_a^b < 0\f$ in +this case). +</ul> + +The badness \f$\beta_a^b\f$ is defined as follows: + +<ul> +<li>if \f$r_a^b\f$ is undefined or \f$r_a^b < -1\f$: \f$\beta_a^b = \infty\f$; +<li>otherwise: \f$\beta_a^b = |r_a^b|^3\f$ +</ul> + +The goal is to find the value of \f$b\f$ where \f$\beta_a^b + p_b\f$ +is minimal. (\f$a\f$ is given, since we do not modify the previous +lines.) + +After a couple of words, it is not predictable weather this minimum +has already been reached. There are two cases where this is possible +for a given \f$b'\f$: + +<ul> +<li>\f$\beta_{b'}^a = \infty\f$ (line gets too tight): \f$a \le b < +b'\f$, the minimun has to be searched between these two values; +<li>\f$p_{b'} = -\infty\f$ (forced line break): \f$a \le b \le b'\f$ +(there may be another minimum of \f$\beta_a^b\f$ before; note the +\f$\le\f$ instead of \f$<\f$). +</ul> + +This also means, that all lines must be finalized with a forced line +break. + +[TODO: Change: only adding complete lines, which leads to "hanging" +words; temporary lines.] + +<h2>Soft Hyphens</h2> + +Calculating the width causes some problems, since it is not required +that the width of text "AB" is identical to the width of "A" plus the +width of "B". Instead, kerning, ligatures etc. may lead to other +results. For this reason, a word is also always drawn as a whole. + +[TODO: Translate the following text to English.] + +<pre> +Als Beispiel Wort aus vier trennbaren Silben: A-B-C-D + +3 mögliche Trennstelle, daher 8 mögliche Trennungen (allerdings einige +unwahrscheinlich): + +ABCD, ABC-D, AB-CD, AB-C-D, A-BCD, A-BC-D, A-B-CD, A-B-C-D, + +w sei Wortbreite, l das Ergebnis von textWidth. Zwingende Bedingungen +(bezieht sich auf die Teile, wo kein Trennstrich vorkommt; bei +Trennstrichen kann man eventuell improvisieren): + +ABCD => w(A) + w(B) + w(C) + w(D) = l(ABCD) +A-BCD => w(B) + w(C) + w(D) = l(BCD) +AB-CD, A-B-CD => w(C) + w(D) = l(CD) +ABC-D, AB-C-D, A-BC-D, A-B-C-D => w(D) = l(D) + +Also einfache Berechnung: + +w(D) = l(D) +w(C) = l(CD) - w(D) +w(B) = l(BCD) - (w(C) + w(D)) +w(A) = l(ABCD) - (w(B) + w(C) + w(D)) + +Bei den Breiten inklusive Trennstrich ergibt sich eine +Überbestimmtheit (bei konsanter Trennstrichbreite pro Wort). Daher +wird einfach eine feste Trennstrichbreite angenommen: + +w(A-) = w(A) + l(-) +w(AB-) = w(A) + w(B) + l(-) + +usw. +</pre> + +<h2>Bugs</h2> + +<h3>Major</h3> + +<ul> +<li>Collapsing spaces and collapsing margins do not work yet. (Are + collapsing spaces still needed anyway?)</li> +<li>List items (and aligned table cells) have to be reviewed (usage of + dw::Textblock::line1Offset).</li> +<li>Sometimes, lines are too wide. It seems that this difference is + exacly the width of a hyphen. +</ul> + +<h3>Minor</h3> + +<ul> +<li>Should dw::core::Content::BREAK still be used? Currently, this is + redundant to dw::Textblock::BadnessAndPenalty.</li> +<li>The calculation of badness is designed for justified text. For + other alignments, it may be modified. (TODO: document this in + detail.)</li> +</ul> + +*/ diff --git a/doc/index.doc b/doc/index.doc index 9892f177..59de8cd8 100644 --- a/doc/index.doc +++ b/doc/index.doc @@ -23,23 +23,23 @@ GLib. For an overview on all this, take a look at \ref lout. GtkObject is replaced by the following: <ul> -<li> object::Object is a common base class for many classes used dillo. In - the namespace ::object, there are also some more common classes and - interfaces. +<li> lout::object::Object is a common base class for many classes used + dillo. In the namespace lout::object, there are also some more common + classes and interfaces. -<li> A sub class of object::Object is identity::IdentifiableObject, which - allows to determine the class at run-time (equivalent to GTK_CHECK_CAST - in GtkObject). +<li> A sub class of lout::object::Object is + lout::identity::IdentifiableObject, which allows to determine the + class at run-time (equivalent to GTK_CHECK_CAST in GtkObject). -<li> For signals, there is the namespace ::signal. +<li> For signals, there is the namespace lout::signal. </ul> -Hash tables, linked lists etc. can be found in the ::container namespace, +Hash tables, linked lists etc. can be found in the lout::container namespace, several useful macros from GLib have been implemented as inline functions -in the ::misc namespace. +in the lout::misc namespace. As an alternative to the macros defined in list.h, there is also a template -class, misc::SimpleVector, which does the same. +class, lout::misc::SimpleVector, which does the same. <h3>Changes in Dw</h3> diff --git a/doc/lout.doc b/doc/lout.doc index 0d5be679..7f00d7b8 100644 --- a/doc/lout.doc +++ b/doc/lout.doc @@ -6,7 +6,7 @@ overview. <h2>Common Base Class</h2> -Many classes are derived from object::Object, which defines some +Many classes are derived from lout::object::Object, which defines some general methods. See there for more information. For the case, that you need primitive C++ types, there are some @@ -14,47 +14,47 @@ wrappers: <table> <tr><th>C++ Type <th>Wrapper Class -<tr><td>void* <td>object::Pointer -<tr><td>specific pointer <td>object::TypedPointer (template class) -<tr><td>int <td>object::Integer -<tr><td>const char* <td>object::ConstString -<tr><td>char* <td>object::String +<tr><td>void* <td>lout::object::Pointer +<tr><td>specific pointer <td>lout::object::TypedPointer (template class) +<tr><td>int <td>lout::object::Integer +<tr><td>const char* <td>lout::object::ConstString +<tr><td>char* <td>lout::object::String </table> <h2>Containers</h2> -In the namespace ::container, several container classes are defined, -which all deal with instances of object::Object. +In the namespace lout::container, several container classes are defined, +which all deal with instances of lout::object::Object. <h3>Untyped Containers</h3> -In container::untyped, there are the following containers: +In lout::container::untyped, there are the following containers: <ul> -<li>container::untyped::Vector, a dynamically increases array, -<li>container::untyped::List, a linked list, -<li>container::untyped::HashTable, a hash table, and -<li>container::untyped::Stack, a stack. +<li>lout::container::untyped::Vector, a dynamically increases array, +<li>lout::container::untyped::List, a linked list, +<li>lout::container::untyped::HashTable, a hash table, and +<li>lout::container::untyped::Stack, a stack. </ul> All provide specific methods, but since they have a common base class, -container::untyped::Collection, they all provide iterators, by the -method container::untyped::Collection::iterator. +lout::container::untyped::Collection, they all provide iterators, by the +method lout::container::untyped::Collection::iterator. <h3>Typed Containers</h3> -container::typed provides wrappers for the container classes defined -in container::untyped, which are more type safe, by using C++ +lout::container::typed provides wrappers for the container classes defined +in lout::container::untyped, which are more type safe, by using C++ templates. <h2>Signals</h2> For how to connect objects at run-time (to reduce dependencies), take a -look at the ::signal namespace. +look at the lout::signal namespace. -There is also a base class signal::ObservedObject, which implements +There is also a base class lout::signal::ObservedObject, which implements signals for deletion. @@ -67,28 +67,28 @@ see the file for mor informations. <h2>Identifying Classes at Runtime</h2> If the class of an object must be identified at runtime, -identity::IdentifiableObject should be used as the base class, see -there for more details. +lout::identity::IdentifiableObject should be used as the base class, +see there for more details. <h2>Miscellaneous</h2> -The ::misc namespace provides several miscellaneous stuff: +The lout::misc namespace provides several miscellaneous stuff: <ul> <li> In some contexts, it is necessary to compare objects - (less/greater), for this, also misc::Comparable must be - implemented. For example., container::untyped::Vector::sort and - container::typed::Vector::sort cast the elements to - misc::Comparable. This can be mixed with object::Object. -<li> misc::SimpleVector, a simple, template based vector class (not - depending on object::Object), -<li> misc::StringBuffer, class for fast concatenation of a large number + (less/greater), for this, also lout::misc::Comparable must be + implemented. For example., lout::container::untyped::Vector::sort and + lout::container::typed::Vector::sort cast the elements to + lout::misc::Comparable. This can be mixed with lout::object::Object. +<li> lout::misc::SimpleVector, a simple, template based vector class (not + depending on lout::object::Object), +<li> lout::misc::StringBuffer, class for fast concatenation of a large number of strings, -<li> misc::BitSet implements a bitset. -<li> useful (template) functions (misc::min, misc::max), and -<li> some functions useful for runtime checks (misc::assert, - misc::assertNotReached). +<li> lout::misc::BitSet implements a bitset. +<li> useful (template) functions (lout::misc::min, lout::misc::max), and +<li> some functions useful for runtime checks (lout::misc::assert, + lout::misc::assertNotReached). </ul> */ diff --git a/doc/rounding-errors.doc b/doc/rounding-errors.doc index 433d6ed9..133a1fe5 100644 --- a/doc/rounding-errors.doc +++ b/doc/rounding-errors.doc @@ -15,10 +15,10 @@ implementation would result in something, for which because of rounding errors, due to the integer division. This can be avoided by transforming the formula into -\f[y_i = {(\sum_{j=0}^{j=i} x_j) a \over b} - \sum_{j=0}^{j=i} y_j\f] +\f[y_i = {(\sum_{j=0}^{j=i} x_j) a \over b} - \sum_{j=0}^{j=i-1} y_j\f] Of corse, when all \f$y_i\f$ are calculated in a sequence, -\f$\sum_{j=0}^{j=i} x_j\f$ and \f$\sum_{j=0}^{j=i} y_j\f$ can be +\f$\sum_{j=0}^{j=i} x_j\f$ and \f$\sum_{j=0}^{j=i-1} y_j\f$ can be accumulated in the same loop. */
\ No newline at end of file diff --git a/dw/Makefile.am b/dw/Makefile.am index 3014b35d..4cd53099 100644 --- a/dw/Makefile.am +++ b/dw/Makefile.am @@ -68,6 +68,7 @@ libDw_widgets_a_SOURCES = \ tablecell.cc \ tablecell.hh \ textblock.cc \ + textblock_linebreaking.cc \ textblock.hh EXTRA_DIST = preview.xbm diff --git a/dw/fltkviewbase.cc b/dw/fltkviewbase.cc index 1d385cf9..240937e2 100644 --- a/dw/fltkviewbase.cc +++ b/dw/fltkviewbase.cc @@ -531,9 +531,9 @@ FltkWidgetView::~FltkWidgetView () } void FltkWidgetView::drawText (core::style::Font *font, - core::style::Color *color, - core::style::Color::Shading shading, - int X, int Y, const char *text, int len) + core::style::Color *color, + core::style::Color::Shading shading, + int X, int Y, const char *text, int len) { FltkFont *ff = (FltkFont*)font; fl_font(ff->font, ff->size); diff --git a/dw/tablecell.cc b/dw/tablecell.cc index 5b93fe86..90dc310d 100644 --- a/dw/tablecell.cc +++ b/dw/tablecell.cc @@ -42,12 +42,12 @@ TableCell::~TableCell() { } -void TableCell::wordWrap(int wordIndex) +void TableCell::wordWrap(int wordIndex, bool wrapAll) { Textblock::Word *word; const char *p; - Textblock::wordWrap (wordIndex); + Textblock::wordWrap (wordIndex, wrapAll); if (charWordIndex == -1) { word = words->getRef (wordIndex); diff --git a/dw/tablecell.hh b/dw/tablecell.hh index 318d1f4e..4bb8633c 100644 --- a/dw/tablecell.hh +++ b/dw/tablecell.hh @@ -12,7 +12,7 @@ private: int charWordIndex, charWordPos; protected: - void wordWrap(int wordIndex); + void wordWrap (int wordIndex, bool wrapAll); int getValue (); void setMaxValue (int maxValue, int value); diff --git a/dw/textblock.cc b/dw/textblock.cc index 3778687c..df0b926e 100644 --- a/dw/textblock.cc +++ b/dw/textblock.cc @@ -18,13 +18,12 @@ */ - #include "textblock.hh" #include "../lout/msg.h" #include "../lout/misc.hh" #include <stdio.h> -#include <limits.h> +#include <math.h> /* * Local variables @@ -51,8 +50,6 @@ Textblock::Textblock (bool limitTextWidth) hasListitemValue = false; innerPadding = 0; line1Offset = 0; - line1OffsetEff = 0; - ignoreLine1OffsetSometimes = false; mustQueueResize = false; redrawY = 0; lastWordDrawn = -1; @@ -67,13 +64,12 @@ Textblock::Textblock (bool limitTextWidth) * TODO: Some tests would be useful. */ lines = new misc::SimpleVector <Line> (1); + nonTemporaryLines = 0; words = new misc::SimpleVector <Word> (1); anchors = new misc::SimpleVector <Anchor> (1); //DBG_OBJ_SET_NUM(this, "num_lines", num_lines); - lastLineWidth = 0; - lastLineParMax = 0; wrapRef = -1; //DBG_OBJ_SET_NUM(this, "last_line_width", last_line_width); @@ -112,6 +108,7 @@ Textblock::~Textblock () delete word->content.widget; word->style->unref (); word->spaceStyle->unref (); + word->hyphenStyle->unref (); } for (int i = 0; i < anchors->size(); i++) { @@ -139,11 +136,21 @@ Textblock::~Textblock () void Textblock::sizeRequestImpl (core::Requisition *requisition) { rewrap (); + showMissingLines (); if (lines->size () > 0) { Line *lastLine = lines->getRef (lines->size () - 1); - requisition->width = - misc::max (lastLine->maxLineWidth, lastLineWidth); + requisition->width = lastLine->maxLineWidth; + + PRINTF ("[%p] SIZE_REQUEST: lines[0]->boxAscent = %d\n", + this, lines->getRef(0)->boxAscent); + PRINTF ("[%p] SIZE_REQUEST: lines[%d]->top = %d\n", + this, lines->size () - 1, lastLine->top); + PRINTF ("[%p] SIZE_REQUEST: lines[%d]->boxAscent = %d\n", + this, lines->size () - 1, lastLine->boxAscent); + PRINTF ("[%p] SIZE_REQUEST: lines[%d]->boxDescent = %d\n", + this, lines->size () - 1, lastLine->boxDescent); + /* Note: the breakSpace of the last line is ignored, so breaks at the end of a textblock are not visible. */ requisition->ascent = lines->getRef(0)->boxAscent; @@ -151,7 +158,7 @@ void Textblock::sizeRequestImpl (core::Requisition *requisition) + lastLine->boxAscent + lastLine->boxDescent - lines->getRef(0)->boxAscent; } else { - requisition->width = lastLineWidth; + requisition->width = 0; // before: lastLineWidth; requisition->ascent = 0; requisition->descent = 0; } @@ -162,6 +169,9 @@ void Textblock::sizeRequestImpl (core::Requisition *requisition) if (requisition->width < availWidth) requisition->width = availWidth; + + PRINTF("[%p] SIZE_REQUEST: %d x %d + %d\n", this, requisition->width, + requisition->ascent, requisition->descent); } /** @@ -205,23 +215,27 @@ void Textblock::getExtremesImpl (core::Extremes *extremes) int wordIndex, lineIndex; int parMax; - //DBG_MSG (widget, "extremes", 0, "getExtremesImpl"); - //DBG_MSG_START (widget); + showMissingLines (); if (lines->size () == 0) { /* empty page */ extremes->minWidth = 0; extremes->maxWidth = 0; + + PRINTF ("GET_EXTREMES: empty (but %d words)\n", words->size()); } else if (wrapRef == -1) { /* no rewrap necessary -> values in lines are up to date */ line = lines->getRef (lines->size () - 1); extremes->minWidth = line->maxParMin; - extremes->maxWidth = misc::max (line->maxParMax, lastLineParMax); - //DBG_MSG (widget, "extremes", 0, "simple case"); + extremes->maxWidth = line->maxParMax; + + PRINTF ("GET_EXTREMES: no rewrap => %d, %d\n", + line->maxParMin, line->maxParMax); } else { /* Calculate the extremes, based on the values in the line from where a rewrap is necessary. */ - //DBG_MSG (widget, "extremes", 0, "complex case"); + + PRINTF ("GET_EXTREMES: complex case ...\n"); if (wrapRef == 0) { extremes->minWidth = 0; @@ -232,97 +246,65 @@ void Textblock::getExtremesImpl (core::Extremes *extremes) extremes->minWidth = line->maxParMin; extremes->maxWidth = line->maxParMax; parMax = line->parMax; - - //DBG_MSGF (widget, "extremes", 0, "parMin = %d", parMin); } - //_MSG ("*** parMin = %d\n", parMin); - int prevWordSpace = 0; for (lineIndex = wrapRef; lineIndex < lines->size (); lineIndex++) { - //DBG_MSGF (widget, "extremes", 0, "line %d", lineIndex); - //DBG_MSG_START (widget); - int parMin = 0; - line = lines->getRef (lineIndex); + int parMin = 0; for (wordIndex = line->firstWord; wordIndex <= line->lastWord; wordIndex++) { word = words->getRef (wordIndex); - getWordExtremes (word, &wordExtremes); + getWordExtremes (word, &wordExtremes); if (wordIndex == 0) { - wordExtremes.minWidth += line1OffsetEff; - wordExtremes.maxWidth += line1OffsetEff; - //DEBUG_MSG (DEBUG_SIZE_LEVEL + 1, - // " (next plus %d)\n", line1OffsetEff); + wordExtremes.minWidth += line1Offset; + wordExtremes.maxWidth += line1Offset; } + extremes->minWidth = misc::max (extremes->minWidth, + wordExtremes.minWidth); - if (extremes->minWidth < wordExtremes.minWidth) - extremes->minWidth = wordExtremes.minWidth; - - _MSG("parMax = %d, wordMaxWidth=%d, prevWordSpace=%d\n", - parMax, wordExtremes.maxWidth, prevWordSpace); if (word->content.type != core::Content::BREAK) parMax += prevWordSpace; parMax += wordExtremes.maxWidth; - if (prevWord && !canBreakAfter(prevWord)) { + if (prevWord && !prevWord->badnessAndPenalty.lineMustBeBroken ()) parMin += prevWordSpace + wordExtremes.minWidth; - } else { + else parMin = wordExtremes.minWidth; - } - if (extremes->minWidth < parMin) { + if (extremes->minWidth < parMin) extremes->minWidth = parMin; - } prevWordSpace = word->origSpace; prevWord = word; - - //DEBUG_MSG (DEBUG_SIZE_LEVEL + 1, - // " word %s: maxWidth = %d\n", - // word->content.text, - // word_extremes.maxWidth); } if ((words->getRef(line->lastWord)->content.type == core::Content::BREAK ) || lineIndex == lines->size () - 1 ) { - - //DEBUG_MSG (DEBUG_SIZE_LEVEL + 2, - // " parMax = %d, after word %d (%s)\n", - // parMax, line->last_word - 1, - // word->content.text); - - if (extremes->maxWidth < parMax) - extremes->maxWidth = parMax; - + extremes->maxWidth = misc::max (extremes->maxWidth, parMax); prevWordSpace = 0; parMax = 0; } - - //DBG_MSG_END (widget); } - //DEBUG_MSG (DEBUG_SIZE_LEVEL + 3, " Result: %d, %d\n", - // extremes->minWidth, extremes->maxWidth); } - //DBG_MSGF (widget, "extremes", 0, "width difference: %d + %d", - // innerPadding, getStyle()->boxDiffWidth ()); - int diff = innerPadding + getStyle()->boxDiffWidth (); extremes->minWidth += diff; extremes->maxWidth += diff; - - //DBG_MSG_END (widget); } void Textblock::sizeAllocateImpl (core::Allocation *allocation) { + PRINTF ("SIZE_ALLOCATE: %d, %d, %d x %d + %d\n", + allocation->x, allocation->y, allocation->width, + allocation->ascent, allocation->descent); + int lineIndex, wordIndex; Line *line; Word *word; @@ -461,16 +443,19 @@ void Textblock::markExtremesChange (int ref) */ void Textblock::markChange (int ref) { + /* By the way: ref == -1 may have two different causes: (i) flush() + calls "queueResize (-1, true)", when no rewrapping is necessary; + and (ii) a word may have parentRef == -1 , when it is not yet + added to a line. In the latter case, nothing has to be done + now, but addLine(...) will do everything neccessary. */ if (ref != -1) { - //DBG_MSGF (page, "wrap", 0, "markChange (ref = %d)", ref); - if (wrapRef == -1) wrapRef = ref; else wrapRef = misc::min (wrapRef, ref); - - //DBG_OBJ_SET_NUM (this, "wrap_ref", wrapRef); } + + PRINTF ("[%p] MARK_CHANGE (%d) => %d\n", this, ref, wrapRef); } void Textblock::setWidth (int width) @@ -731,361 +716,6 @@ core::Iterator *Textblock::iterator (core::Content::Type mask, bool atEnd) return new TextblockIterator (this, mask, atEnd); } -/* - * ... - * - * availWidth is passed from wordWrap, to avoid calculating it twice. - */ -void Textblock::justifyLine (Line *line, int availWidth) -{ - /* To avoid rounding errors, the calculation is based on accumulated - * values (*_cum). */ - int i; - int origSpaceSum, origSpaceCum; - int effSpaceDiffCum, lastEffSpaceDiffCum; - int diff; - - diff = availWidth - lastLineWidth; - if (diff > 0) { - origSpaceSum = 0; - for (i = line->firstWord; i < line->lastWord; i++) - origSpaceSum += words->getRef(i)->origSpace; - - origSpaceCum = 0; - lastEffSpaceDiffCum = 0; - for (i = line->firstWord; i < line->lastWord; i++) { - origSpaceCum += words->getRef(i)->origSpace; - - if (origSpaceCum == 0) - effSpaceDiffCum = lastEffSpaceDiffCum; - else - effSpaceDiffCum = diff * origSpaceCum / origSpaceSum; - - words->getRef(i)->effSpace = words->getRef(i)->origSpace + - (effSpaceDiffCum - lastEffSpaceDiffCum); - //DBG_OBJ_ARRSET_NUM (this, "words.%d.effSpace", i, - // words->getRef(i)->effSpace); - - lastEffSpaceDiffCum = effSpaceDiffCum; - } - } -} - - -Textblock::Line *Textblock::addLine (int wordIndex, bool newPar) -{ - Line *lastLine; - - //DBG_MSG (page, "wrap", 0, "addLine"); - //DBG_MSG_START (page); - - lines->increase (); - //DBG_OBJ_SET_NUM(this, "num_lines", lines->size ()); - - //DEBUG_MSG (DEBUG_REWRAP_LEVEL, "--- new line %d in %p, with word %d of %d" - // "\n", lines->size () - 1, page, word_ind, words->size()); - - lastLine = lines->getRef (lines->size () - 1); - - if (lines->size () == 1) { - lastLine->top = 0; - lastLine->maxLineWidth = line1OffsetEff; - lastLine->maxParMin = 0; - lastLine->maxParMax = 0; - } else { - Line *prevLine = lines->getRef (lines->size () - 2); - - lastLine->top = prevLine->top + prevLine->boxAscent + - prevLine->boxDescent + prevLine->breakSpace; - lastLine->maxLineWidth = prevLine->maxLineWidth; - lastLine->maxParMin = prevLine->maxParMin; - lastLine->maxParMax = prevLine->maxParMax; - } - - //DBG_OBJ_ARRSET_NUM (this, "lines.%d.top", lines->size () - 1, - // lastLine->top); - //DBG_OBJ_ARRSET_NUM (this, "lines.%d.maxLineWidth", lines->size () - 1, - // lastLine->maxLineWidth); - //DBG_OBJ_ARRSET_NUM (this, "lines.%d.maxParMin", lines->size () - 1, - // lastLine->maxParMin); - //DBG_OBJ_ARRSET_NUM (this, "lines.%d.maxParMax", lines->size () - 1, - // lastLine->maxParMax); - //DBG_OBJ_ARRSET_NUM (this, "lines.%d.parMin", lines->size () - 1, - // lastLine->parMin); - //DBG_OBJ_ARRSET_NUM (this, "lines.%d.parMax", lines->size () - 1, - // lastLine->parMax); - - lastLine->firstWord = wordIndex; - lastLine->boxAscent = lastLine->contentAscent = 0; - lastLine->boxDescent = lastLine->contentDescent = 0; - lastLine->marginDescent = 0; - lastLine->breakSpace = 0; - lastLine->leftOffset = 0; - - //DBG_OBJ_ARRSET_NUM (this, "lines.%d.ascent", lines->size () - 1, - // lastLine->boxAscent); - //DBG_OBJ_ARRSET_NUM (this, "lines.%d.descent", lines->size () - 1, - // lastLine->boxDescent); - - /* update values in line */ - lastLine->maxLineWidth = misc::max (lastLine->maxLineWidth, lastLineWidth); - - if (lines->size () > 1) - lastLineWidth = 0; - else - lastLineWidth = line1OffsetEff; - - if (newPar) { - lastLine->maxParMax = misc::max (lastLine->maxParMax, lastLineParMax); - //DBG_OBJ_ARRSET_NUM (this, "lines.%d.maxParMax", lines->size () - 1, - // lastLine->maxParMax); - - if (lines->size () > 1) { - lastLineParMax = 0; - } else { - lastLineParMax = line1OffsetEff; - } - - //DBG_OBJ_SET_NUM(this, "lastLineParMax", lastLineParMax); - } - - lastLine->parMax = lastLineParMax; - - //DBG_OBJ_ARRSET_NUM (this, "lines.%d.parMin", lines->size () - 1, - // lastLine->parMin); - //DBG_OBJ_ARRSET_NUM (this, "lines.%d.parMax", lines->size () - 1, - // lastLine->parMax); - - //DBG_MSG_END (page); - return lastLine; -} - -/* - * This method is called in two cases: (i) when a word is added - * (ii) when a page has to be (partially) rewrapped. It does word wrap, - * and adds new lines if necessary. - */ -void Textblock::wordWrap(int wordIndex) -{ - Line *lastLine; - Word *word; - int availWidth, lastSpace, leftOffset, len; - bool newLine = false, newPar = false, canBreakBefore = true; - core::Extremes wordExtremes; - - //DBG_MSGF (page, "wrap", 0, "wordWrap (%d): %s, width = %d", - // wordIndex, words->getRef(wordIndex)->content.text), - // words->getRef(wordIndex)->size.width); - //DBG_MSG_START (page); - - availWidth = this->availWidth - getStyle()->boxDiffWidth() - innerPadding; - if (limitTextWidth && - layout->getUsesViewport () && - availWidth > layout->getWidthViewport () - 10) - availWidth = layout->getWidthViewport () - 10; - - word = words->getRef (wordIndex); - word->effSpace = word->origSpace; - - /* Test whether line1Offset can be used. */ - if (wordIndex == 0) { - if (ignoreLine1OffsetSometimes && - line1Offset + word->size.width > availWidth) { - line1OffsetEff = 0; - } else { - int indent = 0; - - if (word->content.type == core::Content::WIDGET && - word->content.widget->blockLevel() == true) { - /* don't use text-indent when nesting blocks */ - } else { - if (core::style::isPerLength(getStyle()->textIndent)) { - indent = misc::roundInt(this->availWidth * - core::style::perLengthVal (getStyle()->textIndent)); - } else { - indent = core::style::absLengthVal (getStyle()->textIndent); - } - } - line1OffsetEff = line1Offset + indent; - } - } - - if (lines->size () == 0) { - //DBG_MSG (page, "wrap", 0, "first line"); - newLine = true; - newPar = true; - lastLine = NULL; - } else { - Word *prevWord = words->getRef (wordIndex - 1); - - lastLine = lines->getRef (lines->size () - 1); - - if (prevWord->content.type == core::Content::BREAK) { - //DBG_MSG (page, "wrap", 0, "after a break"); - /* previous word is a break */ - newLine = true; - newPar = true; - } else if (!canBreakAfter (prevWord)) { - canBreakBefore = false; - // no break within nowrap - newLine = false; - newPar = false; - if (lastLineWidth + prevWord->origSpace + word->size.width > - availWidth) - markChange (lines->size () - 1); - } else if (lastLine->firstWord != wordIndex) { - // check if we need to break because nowrap sequence is following - newLine = false; - int lineWidthNeeded = lastLineWidth + prevWord->origSpace; - for (int i = wordIndex; i < words->size (); i++) { - Word *w = words->getRef (i); - - if (w->content.type == core::Content::BREAK || - (word->content.type == core::Content::WIDGET && - word->content.widget->blockLevel())) - break; - - lineWidthNeeded += w->size.width; - - if (lineWidthNeeded > availWidth) { - newLine = true; - break; - } else if (canBreakAfter (w)) { - break; - } - - lineWidthNeeded += w->origSpace; - } - } - } - - if (newLine) { - if (word->style->textAlign == core::style::TEXT_ALIGN_JUSTIFY && - lastLine != NULL && !newPar) { - justifyLine (lastLine, availWidth); - } - lastLine = addLine (wordIndex, newPar); - } - - lastLine->lastWord = wordIndex; - lastLine->boxAscent = misc::max (lastLine->boxAscent, word->size.ascent); - lastLine->boxDescent = misc::max (lastLine->boxDescent, word->size.descent); - - len = word->style->font->ascent; - if (word->style->valign == core::style::VALIGN_SUPER) - len += len / 2; - lastLine->contentAscent = misc::max (lastLine->contentAscent, len); - - len = word->style->font->descent; - if (word->style->valign == core::style::VALIGN_SUB) - len += word->style->font->ascent / 3; - lastLine->contentDescent = misc::max (lastLine->contentDescent, len); - - //DBG_OBJ_ARRSET_NUM (this, "lines.%d.ascent", lines->size () - 1, - // lastLine->boxAscent); - //DBG_OBJ_ARRSET_NUM (this, "lines.%d.descent", lines->size () - 1, - // lastLine->boxDescent); - - if (word->content.type == core::Content::WIDGET) { - int collapseMarginTop = 0; - - lastLine->marginDescent = - misc::max (lastLine->marginDescent, - word->size.descent + - word->content.widget->getStyle()->margin.bottom); - - if (lines->size () == 1 && - word->content.widget->blockLevel () && - getStyle ()->borderWidth.top == 0 && - getStyle ()->padding.top == 0) { - // collapse top margins of parent element and its first child - // see: http://www.w3.org/TR/CSS21/box.html#collapsing-margins - collapseMarginTop = getStyle ()->margin.top; - } - - lastLine->boxAscent = - misc::max (lastLine->boxAscent, - word->size.ascent, - word->size.ascent - + word->content.widget->getStyle()->margin.top - - collapseMarginTop); - - } else { - lastLine->marginDescent = - misc::max (lastLine->marginDescent, lastLine->boxDescent); - - if (word->content.type == core::Content::BREAK) - lastLine->breakSpace = - misc::max (word->content.breakSpace, - lastLine->marginDescent - lastLine->boxDescent, - lastLine->breakSpace); - } - - lastSpace = (wordIndex > 0) ? words->getRef(wordIndex - 1)->origSpace : 0; - - if (!newLine) - lastLineWidth += lastSpace; - if (!newPar) { - lastLineParMax += lastSpace; - } - - lastLineWidth += word->size.width; - - getWordExtremes (word, &wordExtremes); - lastLineParMax += wordExtremes.maxWidth; - - if (!canBreakBefore) { - lastLineParMin += wordExtremes.minWidth + lastSpace; - /* This may also increase the accumulated minimum word width. */ - lastLine->maxParMin = misc::max (lastLine->maxParMin, lastLineParMin); - } else { - lastLineParMin = wordExtremes.minWidth; - lastLine->maxParMin = - misc::max (lastLine->maxParMin, wordExtremes.minWidth); - } - - //DBG_OBJ_SET_NUM(this, "lastLine_par_min", lastLineParMin); - //DBG_OBJ_SET_NUM(this, "lastLine_par_max", lastLineParMax); - //DBG_OBJ_ARRSET_NUM (this, "lines.%d.par_min", lines->size () - 1, - // lastLine->par_min); - //DBG_OBJ_ARRSET_NUM (this, "lines.%d.par_max", lines->size () - 1, - // lastLine->par_max); - //DBG_OBJ_ARRSET_NUM (this, "lines.%d.max_word_min", lines->size () - 1, - // lastLine->max_word_min); - - /* Align the line. - * \todo Use block's style instead once paragraphs become proper blocks. - */ - if (word->content.type != core::Content::BREAK) { - switch (word->style->textAlign) { - case core::style::TEXT_ALIGN_LEFT: - case core::style::TEXT_ALIGN_JUSTIFY: /* see some lines above */ - case core::style::TEXT_ALIGN_STRING: /* handled elsewhere (in the - * future) */ - leftOffset = 0; - break; - case core::style::TEXT_ALIGN_RIGHT: - leftOffset = availWidth - lastLineWidth; - break; - case core::style::TEXT_ALIGN_CENTER: - leftOffset = (availWidth - lastLineWidth) / 2; - break; - default: - /* compiler happiness */ - leftOffset = 0; - } - - /* For large lines (images etc), which do not fit into the viewport: */ - if (leftOffset < 0) - leftOffset = 0; - - lastLine->leftOffset = leftOffset; - } - mustQueueResize = true; - - //DBG_MSG_END (page); -} - /** * Calculate the size of a widget within the page. @@ -1144,93 +774,6 @@ void Textblock::calcWidgetSize (core::Widget *widget, core::Requisition *size) size->descent -= wstyle->margin.bottom; } -/** - * Rewrap the page from the line from which this is necessary. - * There are basically two times we'll want to do this: - * either when the viewport is resized, or when the size changes on one - * of the child widgets. - */ -void Textblock::rewrap () -{ - int i, wordIndex; - Word *word; - Line *lastLine; - - if (wrapRef == -1) - /* page does not have to be rewrapped */ - return; - - //DBG_MSGF (page, "wrap", 0, - // "rewrap: wrapRef = %d, in page with %d word(s)", - // wrapRef, words->size()); - //DBG_MSG_START (page); - - /* All lines up from wrapRef will be rebuild from the word list, - * the line list up from this position is rebuild. */ - lines->setSize (wrapRef); - lastLineWidth = 0; - lastLineParMin = 0; - //DBG_OBJ_SET_NUM(this, "num_lines", lines->size ()); - //DBG_OBJ_SET_NUM(this, "lastLine_width", lastLineWidth); - - /* In the word list, start at the last word plus one in the line before. */ - if (wrapRef > 0) { - /* Note: In this case, wordWrap() will immediately find the need - * to rewrap the line, since we start with the last one (plus one). - * This is also the reason, why lastLineWidth is set - * to the length of the line. */ - lastLine = lines->getRef (lines->size () - 1); - - lastLineParMax = lastLine->parMax; - - // take line1OffsetEff into account, if lastLine is the first line - if (lines->size () - 1 == 0) - lastLineWidth = line1OffsetEff; - - wordIndex = lastLine->lastWord + 1; - for (i = lastLine->firstWord; i < lastLine->lastWord; i++) - lastLineWidth += (words->getRef(i)->size.width + - words->getRef(i)->origSpace); - lastLineWidth += words->getRef(lastLine->lastWord)->size.width; - } else { - lastLineParMax = 0; - - wordIndex = 0; - } - - for (; wordIndex < words->size (); wordIndex++) { - word = words->getRef (wordIndex); - - if (word->content.type == core::Content::WIDGET) - calcWidgetSize (word->content.widget, &word->size); - wordWrap (wordIndex); - - if (word->content.type == core::Content::WIDGET) { - word->content.widget->parentRef = lines->size () - 1; - //DBG_OBJ_SET_NUM (word->content.widget, "parent_ref", - // word->content.widget->parent_ref); - } - - //DEBUG_MSG(DEBUG_REWRAP_LEVEL, - // "Assigning parent_ref = %d to rewrapped word %d, " - // "in page with %d word(s)\n", - // lines->size () - 1, wordIndex, words->size()); - - /* todo_refactoring: - if (word->content.type == DW_CONTENT_ANCHOR) - p_Dw_gtk_viewport_change_anchor - (widget, word->content.anchor, - Dw_page_line_total_y_offset (page, - &page->lines[lines->size () - 1])); - */ - } - - /* Next time, the page will not have to be rewrapped. */ - wrapRef = -1; - - //DBG_MSG_END (page); -} - /* * Draw the decorations on a word. */ @@ -1314,14 +857,58 @@ void Textblock::drawText(core::View *view, core::style::Style *style, } /* - * Draw a word of text. + * Draw a word of text. TODO New description; */ -void Textblock::drawWord(int wordIndex, core::View *view,core::Rectangle *area, - int xWidget, int yWidgetBase) +void Textblock::drawWord (Line *line, int wordIndex1, int wordIndex2, + core::View *view, core::Rectangle *area, + int xWidget, int yWidgetBase) { - Word *word = words->getRef(wordIndex); + core::style::Style *style = words->getRef(wordIndex1)->style; + bool drawHyphen = wordIndex2 == line->lastWord + && words->getRef(wordIndex2)->hyphenWidth > 0; + + if (wordIndex1 == wordIndex2 && !drawHyphen) { + // Simple case, where copying in one buffer is not needed. + Word *word = words->getRef (wordIndex1); + drawWord0 (wordIndex1, wordIndex2, word->content.text, word->size.width, + style, view, area, xWidget, yWidgetBase); + } else { + // Concaternate all words in a new buffer. + int l = 0, totalWidth = 0; + for (int i = wordIndex1; i <= wordIndex2; i++) { + Word *w = words->getRef (i); + l += strlen (w->content.text); + totalWidth += w->size.width; + } + + char text[l + (drawHyphen ? 2 : 0) + 1]; + int p = 0; + for (int i = wordIndex1; i <= wordIndex2; i++) { + const char * t = words->getRef(i)->content.text; + strcpy (text + p, t); + p += strlen (t); + } + + if(drawHyphen) { + text[p++] = 0xc2; + text[p++] = 0xad; + text[p++] = 0; + } + + drawWord0 (wordIndex1, wordIndex2, text, totalWidth, + style, view, area, xWidget, yWidgetBase); + } +} + +/** + * TODO Comment + */ +void Textblock::drawWord0 (int wordIndex1, int wordIndex2, + const char *text, int totalWidth, + core::style::Style *style, core::View *view, + core::Rectangle *area, int xWidget, int yWidgetBase) +{ int xWorld = allocation.x + xWidget; - core::style::Style *style = word->style; int yWorldBase; /* Adjust the text baseline if the word is <SUP>-ed or <SUB>-ed. */ @@ -1333,33 +920,33 @@ void Textblock::drawWord(int wordIndex, core::View *view,core::Rectangle *area, yWorldBase = yWidgetBase + allocation.y; drawText (view, style, core::style::Color::SHADING_NORMAL, xWorld, - yWorldBase, word->content.text, 0, strlen (word->content.text)); + yWorldBase, text, 0, strlen (text)); if (style->textDecoration) decorateText(view, style, core::style::Color::SHADING_NORMAL, xWorld, - yWorldBase, word->size.width); + yWorldBase, totalWidth); for (int layer = 0; layer < core::HIGHLIGHT_NUM_LAYERS; layer++) { - if (hlStart[layer].index <= wordIndex && - hlEnd[layer].index >= wordIndex) { - const int wordLen = strlen (word->content.text); + if (hlStart[layer].index <= wordIndex1 && + hlEnd[layer].index >= wordIndex2) { + const int wordLen = strlen (text); int xStart, width; int firstCharIdx = 0; int lastCharIdx = wordLen; - if (wordIndex == hlStart[layer].index) + if (wordIndex1 == hlStart[layer].index) firstCharIdx = misc::min (hlStart[layer].nChar, wordLen); - if (wordIndex == hlEnd[layer].index) + if (wordIndex2 == hlEnd[layer].index) lastCharIdx = misc::min (hlEnd[layer].nChar, wordLen); xStart = xWorld; if (firstCharIdx) - xStart += textWidth (word->content.text, 0, firstCharIdx, style); + xStart += textWidth (text, 0, firstCharIdx, style); if (firstCharIdx == 0 && lastCharIdx == wordLen) - width = word->size.width; + width = totalWidth; else - width = textWidth (word->content.text, firstCharIdx, + width = textWidth (text, firstCharIdx, lastCharIdx - firstCharIdx, style); if (width > 0) { /* Highlight text */ @@ -1376,7 +963,7 @@ void Textblock::drawWord(int wordIndex, core::View *view,core::Rectangle *area, /* Highlight the text. */ drawText (view, style, core::style::Color::SHADING_INVERSE, xStart, - yWorldBase, word->content.text, firstCharIdx, + yWorldBase, text, firstCharIdx, lastCharIdx - firstCharIdx); if (style->textDecoration) @@ -1445,18 +1032,12 @@ void Textblock::drawLine (Line *line, core::View *view, core::Rectangle *area) int xWidget = lineXOffsetWidget(line); int yWidgetBase = lineYOffsetWidget (line) + line->boxAscent; - /* Here's an idea on how to optimize this routine to minimize the number - * of drawing calls: - * - * Copy the text from the words into a buffer, adding a new word - * only if: the attributes match, and the spacing is either zero or - * equal to the width of ' '. In the latter case, copy a " " into - * the buffer. Then draw the buffer. */ - for (int wordIndex = line->firstWord; - wordIndex <= line->lastWord && xWidget < area->x + area->width; + wordIndex <= line->lastWord + /* TODO && xWidget < area->x + area->width*/; wordIndex++) { Word *word = words->getRef(wordIndex); + int wordSize = word->size.width; if (xWidget + word->size.width + word->effSpace >= area->x) { if (word->content.type == core::Content::TEXT || @@ -1470,12 +1051,26 @@ void Textblock::drawLine (Line *line, core::View *view, core::Rectangle *area) if (child->intersects (area, &childArea)) child->draw (view, &childArea); } else { + /* TODO: include in drawWord: if (word->style->hasBackground ()) { drawBox (view, word->style, area, xWidget, yWidgetBase - line->boxAscent, word->size.width, line->boxAscent + line->boxDescent, false); - } - drawWord(wordIndex, view, area, xWidget, yWidgetBase); + }*/ + int wordIndex2 = wordIndex; + while (wordIndex2 < line->lastWord && + words->getRef(wordIndex2)->hyphenWidth > 0 && + word->style == words->getRef(wordIndex2 + 1)->style) + wordIndex2++; + + drawWord(line, wordIndex, wordIndex2, view, area, + xWidget, yWidgetBase); + wordSize = 0; + for (int i = wordIndex; i <= wordIndex2; i++) + wordSize += words->getRef(i)->size.width; + + wordIndex = wordIndex2; + word = words->getRef(wordIndex); } } if (word->effSpace > 0 && wordIndex < line->lastWord && @@ -1492,7 +1087,7 @@ void Textblock::drawLine (Line *line, core::View *view, core::Rectangle *area) } } - xWidget += word->size.width + word->effSpace; + xWidget += wordSize + word->effSpace; } } @@ -1599,6 +1194,9 @@ Textblock::Word *Textblock::findWord (int x, int y, bool *inSpace) void Textblock::draw (core::View *view, core::Rectangle *area) { + PRINTF ("DRAW: %d, %d, %d x %d\n", + area->x, area->y, area->width, area->height); + int lineIndex; Line *line; @@ -1629,10 +1227,11 @@ Textblock::Word *Textblock::addWord (int width, int ascent, int descent, word->size.width = width; word->size.ascent = ascent; word->size.descent = descent; - word->origSpace = 0; - word->effSpace = 0; + word->origSpace = word->effSpace = word->stretchability = + word->shrinkability = 0; + word->hyphenWidth = 0; + word->badnessAndPenalty.setPenaltyProhibitBreak (); word->content.space = false; - word->content.breakType = core::Content::BREAK_NO; //DBG_OBJ_ARRSET_NUM (this, "words.%d.size.width", words->size() - 1, // word->size.width); @@ -1649,6 +1248,8 @@ Textblock::Word *Textblock::addWord (int width, int ascent, int descent, word->style = style; word->spaceStyle = style; + word->hyphenStyle = style; + style->ref (); style->ref (); style->ref (); @@ -1765,23 +1366,119 @@ void Textblock::calcTextSize (const char *text, size_t len, /** - * Add a word to the page structure. + * Add a word to the page structure. If it contains soft hyphens, it is + * divided. */ void Textblock::addText (const char *text, size_t len, core::style::Style *style) { + // Count hyphens. + int numHyphens = 0; + for (size_t i = 0; i < len - 1; i++) + // (0xc2, 0xad) is the UTF-8 representation of a soft hyphen (Unicode + // 0xc2). + if((unsigned char)text[i] == 0xc2 && (unsigned char)text[i + 1] == 0xad) + numHyphens++; + + if (numHyphens == 0) { + // Simple (and often) case: no soft hyphens. + core::Requisition size; + calcTextSize (text, len, style, &size); + addText0 (text, len, style, &size); + } else { + PRINTF("HYPHENATION: '"); + for (size_t i = 0; i < len; i++) + PUTCHAR(text[i]); + PRINTF("', with %d hyphen(s)\n", numHyphens); + + // Store hyphen positions. + int n = 0, hyphenPos[numHyphens]; + for (size_t i = 0; i < len - 1; i++) + if((unsigned char)text[i] == 0xc2 && + (unsigned char)text[i + 1] == 0xad) + hyphenPos[n++] = i; + + // Get text without hyphens. (There are numHyphens + 1 parts in the word, + // and 2 * numHyphens bytes less, 2 for each hyphen, are needed.) + char textWithoutHyphens[len - 2 * numHyphens]; + int start = 0; // related to "text" + for (int i = 0; i < numHyphens + 1; i++) { + int end = (i == numHyphens) ? len : hyphenPos[i]; + memmove (textWithoutHyphens + start - 2 * i, text + start, + end - start); + start = end + 2; + } + + PRINTF("H... without hyphens: '"); + for (size_t i = 0; i < len - 2 * numHyphens; i++) + PUTCHAR(textWithoutHyphens[i]); + PRINTF("'\n"); + + // Calc sizes. + core::Requisition wordSize[numHyphens + 1]; + + // The size of the last part is calculated in a simple way. + int lastStart = hyphenPos[numHyphens - 1] + 2; + calcTextSize (text + lastStart, len - lastStart, style, + &wordSize[numHyphens]); + + PRINTF("H... [%d] '", numHyphens); + for (size_t i = 0; i < len - lastStart; i++) + PUTCHAR(text[i + lastStart]); + PRINTF("' -> %d\n", wordSize[numHyphens].width); + + // The rest is more complicated. TODO Documentation. + for (int i = numHyphens - 1; i >= 0; i--) { + int start = (i == 0) ? 0 : hyphenPos[i - 1] - 2 * (i - 1); + calcTextSize (textWithoutHyphens + start, + len - 2 * numHyphens - start, style, &wordSize[i]); + + PRINTF("H... [%d] '", i); + for (size_t j = 0; j < len - 2 * numHyphens - start; j++) + PUTCHAR(textWithoutHyphens[j + start]); + PRINTF("' -> %d\n", wordSize[i].width); + + for (int j = i + 1; j < numHyphens + 1; j++) { + wordSize[i].width -= wordSize[j].width; + PRINTF("H... - %d = %d\n", wordSize[j].width, wordSize[i].width); + } + } + + // Finished! + for (int i = 0; i < numHyphens + 1; i++) { + int start = (i == 0) ? 0 : hyphenPos[i - 1] + 2; + int end = (i == numHyphens) ? len : hyphenPos[i]; + addText0 (text + start, end - start, style, &wordSize[i]); + + PRINTF("H... [%d] '", i); + for (int j = start; j < end; j++) + PUTCHAR(text[j]); + PRINTF("' added\n"); + + if(i < numHyphens) { + addHyphen (style); + PRINTF("H... yphen added\n"); + } + } + } +} + +/** + * Add a word (without hyphens) to the page structure. + */ +void Textblock::addText0 (const char *text, size_t len, + core::style::Style *style, core::Requisition *size) +{ Word *word; - core::Requisition size; - calcTextSize (text, len, style, &size); - word = addWord (size.width, size.ascent, size.descent, style); + word = addWord (size->width, size->ascent, size->descent, style); word->content.type = core::Content::TEXT; word->content.text = layout->textZone->strndup(text, len); //DBG_OBJ_ARRSET_STR (page, "words.%d.content.text", words->size() - 1, // word->content.text); - wordWrap (words->size () - 1); + wordWrap (words->size () - 1, false); } /** @@ -1798,6 +1495,8 @@ void Textblock::addWidget (core::Widget *widget, core::style::Style *style) * end of this function, the correct value is assigned. */ widget->parentRef = -1; + PRINTF("%p becomes child of %p\n", widget, this); + widget->setParent (this); widget->setStyle (style); @@ -1810,8 +1509,7 @@ void Textblock::addWidget (core::Widget *widget, core::style::Style *style) //DBG_OBJ_ARRSET_PTR (page, "words.%d.content.widget", words->size() - 1, // word->content.widget); - wordWrap (words->size () - 1); - word->content.widget->parentRef = lines->size () - 1; + wordWrap (words->size () - 1, false); //DBG_OBJ_SET_NUM (word->content.widget, "parent_ref", // word->content.widget->parent_ref); @@ -1879,11 +1577,17 @@ void Textblock::addSpace (core::style::Style *style) // This is important e.g. to be able to break between foo and bar in: // <span style="white-space:nowrap">foo </span> bar addBreakOption (style); - + if (!word->content.space) { + word->badnessAndPenalty.setPenalty (0); word->content.space = true; word->effSpace = word->origSpace = style->font->spaceWidth + style->wordSpacing; + word->stretchability = word->origSpace / 2; + if(style->textAlign == core::style::TEXT_ALIGN_JUSTIFY) + word->shrinkability = word->origSpace / 3; + else + word->shrinkability = 0; //DBG_OBJ_ARRSET_NUM (this, "words.%d.origSpace", wordIndex, // word->origSpace); @@ -1891,13 +1595,36 @@ void Textblock::addSpace (core::style::Style *style) // word->effSpace); //DBG_OBJ_ARRSET_NUM (this, "words.%d.content.space", wordIndex, // word->content.space); + word->spaceStyle->unref (); word->spaceStyle = style; style->ref (); + + accumulateWordData (wordIndex); } } } +void Textblock::addHyphen (core::style::Style *style) +{ + int wordIndex = words->size () - 1; + + if (wordIndex >= 0) { + Word *word = words->getRef(wordIndex); + + word->badnessAndPenalty.setPenalty (HYPHEN_BREAK); + //word->penalty = 0; + // TODO Optimize? Like spaces? + word->hyphenWidth = layout->textWidth (style->font, "\xc2\xad", 2); + + word->hyphenStyle->unref (); + word->hyphenStyle = style; + style->ref (); + + accumulateWordData (wordIndex); + } +} + /** * Cause a paragraph break */ @@ -1969,8 +1696,9 @@ void Textblock::addParbreak (int space, core::style::Style *style) word = addWord (0, 0, 0, style); word->content.type = core::Content::BREAK; + word->badnessAndPenalty.setPenaltyForceBreak (); word->content.breakSpace = space; - wordWrap (words->size () - 1); + wordWrap (words->size () - 1, false); } /* @@ -1990,8 +1718,9 @@ void Textblock::addLinebreak (core::style::Style *style) word = addWord (0, 0, 0, style); word->content.type = core::Content::BREAK; + word->badnessAndPenalty.setPenaltyForceBreak (); word->content.breakSpace = 0; - wordWrap (words->size () - 1); + wordWrap (words->size () - 1, false); } @@ -2064,6 +1793,9 @@ void Textblock::handOverBreak (core::style::Style *style) */ void Textblock::flush () { + PRINTF ("[%p] FLUSH => %s (parentRef = %d)\n", + this, mustQueueResize ? "true" : "false", parentRef); + if (mustQueueResize) { queueResize (-1, true); mustQueueResize = false; diff --git a/dw/textblock.hh b/dw/textblock.hh index 3f98f878..b301db7d 100644 --- a/dw/textblock.hh +++ b/dw/textblock.hh @@ -1,15 +1,27 @@ #ifndef __DW_TEXTBLOCK_HH__ #define __DW_TEXTBLOCK_HH__ +#include <limits.h> + #include "core.hh" #include "../lout/misc.hh" +// These were used when improved line breaking and hyphenation were +// implemented. Should be cleaned up; perhaps reactivate RTFL again. +#define PRINTF(fmt, ...) +#define PUTCHAR(ch) + namespace dw { /** * \brief A Widget for rendering text blocks, i.e. paragraphs or sequences * of paragraphs. * + * <div style="border: 2px solid #ff0000; padding: 0.5em 1em; + * background-color: #ffe0e0"><b>Warning:</b> The recent changes (line + * breaking and hyphenation) have not yet been incorporated into this + * documentation. See \ref dw-line-breaking.</div> + * * <h3>Signals</h3> * * dw::Textblock uses the signals defined in @@ -130,7 +142,50 @@ namespace dw { */ class Textblock: public core::Widget { +private: + /** + * This class encapsulates the badness/penalty calculation, and so + * (i) makes changes (hopefully) simpler, and (ii) hides the + * integer arithmetics (floating point arithmetics avoided for + * performance reasons). Unfortunately, the value range of the + * badness is not well defined, so fiddling with the penalties is a + * bit difficult. + */ + class BadnessAndPenalty + { + private: + enum { TOO_LOOSE, TOO_TIGHT, BADNESS_VALUE } badnessState; + enum { FORCE_BREAK, PROHIBIT_BREAK, PENALTY_VALUE } penaltyState; + int badness, penalty; + + // for debugging: + int totalWidth, idealWidth, totalStretchability, totalShrinkability; + + int badnessInfinities (); + int penaltyInfinities (); + int badnessValue (); + int penaltyValue (); + + public: + void calcBadness (int totalWidth, int idealWidth, + int totalStretchability, int totalShrinkability); + void setPenalty (int penalty); + void setPenaltyProhibitBreak (); + void setPenaltyForceBreak (); + + bool lineTooTight (); + bool lineMustBeBroken (); + bool lineCanBeBroken (); + int compareTo (BadnessAndPenalty *other); + + void print (); + }; + protected: + enum { + HYPHEN_BREAK = 1000000 // to be tested and tuned + }; + struct Line { int firstWord; /* first word's index in word vector */ @@ -150,9 +205,10 @@ protected: int maxLineWidth; /* maximum of all line widths */ int maxParMin; /* maximum of all paragraph minima */ int maxParMax; /* maximum of all paragraph maxima */ - int parMax; /* the maximal total width down from the last - * paragraph start, to the *beginning* of the - * line */ + int parMax; /* The maximal total width down from the last + * paragraph start, to the *ene* of this + * line. (Notice that the semantics have + * changed.) */ }; struct Word @@ -161,13 +217,35 @@ protected: core::Requisition size; /* Space after the word, only if it's not a break: */ short origSpace; /* from font, set by addSpace */ + short stretchability, shrinkability; short effSpace; /* effective space, set by wordWrap, * used for drawing etc. */ + short hyphenWidth; /* Additional width, when a word is part + * (except the last part) of a hyphenationed + * word. Has to be added to the width, when + * this is the last word of the line, and + * "hyphenWidth > 0" is also used to decide + * weather to draw a hyphen. */ core::Content content; + // accumulated values, relative to the beginning of the line + int totalWidth; /* The sum of all word widths; plus all + spaces, excluding the one of this + word; plus the hypthen width of this + word (but of course, no hyphen + widths of previous words. In other + words: the value compared to the + ideal width of the line, if the line + would be broken after this word. */ + int totalStretchability; // includes all *before* current word + int totalShrinkability; // includes all *before* current word + BadnessAndPenalty badnessAndPenalty; /* when line is broken after this + * word */ + core::style::Style *style; core::style::Style *spaceStyle; /* initially the same as of the word, later set by a_Dw_page_add_space */ + core::style::Style *hyphenStyle; }; struct Anchor @@ -236,13 +314,10 @@ protected: /* These values are set by set_... */ int availWidth, availAscent, availDescent; - int lastLineWidth; - int lastLineParMin; /* width of the current non-breakable word sequence - * used by wordWrap () */ - int lastLineParMax; int wrapRef; /* [0 based] */ lout::misc::SimpleVector <Line> *lines; + int nonTemporaryLines; lout::misc::SimpleVector <Word> *words; lout::misc::SimpleVector <Anchor> *anchors; @@ -254,25 +329,28 @@ protected: void queueDrawRange (int index1, int index2); void getWordExtremes (Word *word, core::Extremes *extremes); - inline bool canBreakAfter (Word *word) - { - return word->content.breakType == core::Content::BREAK_OK; - } void markChange (int ref); - void justifyLine (Line *line, int availWidth); - Line *addLine (int wordInd, bool newPar); + void justifyLine (Line *line, int diff); + Line *addLine (int firstWord, int lastWord, bool temporary); void calcWidgetSize (core::Widget *widget, core::Requisition *size); void rewrap (); - void decorateText(core::View *view, core::style::Style *style, - core::style::Color::Shading shading, - int x, int yBase, int width); - void drawText(core::View *view, core::style::Style *style, - core::style::Color::Shading shading, int x, int y, - const char *text, int start, int len); - void drawWord(int wordIndex, core::View *view, core::Rectangle *area, - int xWidget, int yWidgetBase); - void drawSpace(int wordIndex, core::View *view, core::Rectangle *area, - int xWidget, int yWidgetBase); + void showMissingLines (); + void removeTemporaryLines (); + + void decorateText (core::View *view, core::style::Style *style, + core::style::Color::Shading shading, + int x, int yBase, int width); + void drawText (core::View *view, core::style::Style *style, + core::style::Color::Shading shading, int x, int y, + const char *text, int start, int len); + void drawWord (Line *line, int wordIndex1, int wordIndex2, core::View *view, + core::Rectangle *area, int xWidget, int yWidgetBase); + void drawWord0 (int wordIndex1, int wordIndex2, + const char *text, int totalWidth, + core::style::Style *style, core::View *view, + core::Rectangle *area, int xWidget, int yWidgetBase); + void drawSpace (int wordIndex, core::View *view, core::Rectangle *area, + int xWidget, int yWidgetBase); void drawLine (Line *line, core::View *view, core::Rectangle *area); int findLineIndex (int y); int findLineOfWord (int wordIndex); @@ -348,7 +426,14 @@ protected: bool sendSelectionEvent (core::SelectionState::EventType eventType, core::MousePositionEvent *event); - virtual void wordWrap(int wordIndex); + void accumulateWordExtremees (int firstWord, int lastWord, + int *maxOfMinWidth, int *sumOfMaxWidth); + virtual void wordWrap (int wordIndex, bool wrapAll); + void accumulateWordForLine (int lineIndex, int wordIndex); + void accumulateWordData(int wordIndex); + int calcAvailWidth (); + void initLine1Offset (int wordIndex); + void alignLine (Line *line); void sizeRequestImpl (core::Requisition *requisition); void getExtremesImpl (core::Extremes *extremes); @@ -370,6 +455,9 @@ protected: void removeChild (Widget *child); + void addText0 (const char *text, size_t len, core::style::Style *style, + core::Requisition *size); + public: static int CLASS_ID; @@ -388,14 +476,16 @@ public: void addWidget (core::Widget *widget, core::style::Style *style); bool addAnchor (const char *name, core::style::Style *style); void addSpace(core::style::Style *style); - inline void addBreakOption (core::style::Style *style) + inline void addBreakOption (core::style::Style *style) // TODO needed? { int wordIndex = words->size () - 1; if (wordIndex >= 0 && style->whiteSpace != core::style::WHITE_SPACE_NOWRAP && style->whiteSpace != core::style::WHITE_SPACE_PRE) - words->getRef(wordIndex)->content.breakType = core::Content::BREAK_OK; + words->getRef(wordIndex)->badnessAndPenalty.setPenaltyForceBreak (); } + + void addHyphen(core::style::Style *style); void addParbreak (int space, core::style::Style *style); void addLinebreak (core::style::Style *style); diff --git a/dw/textblock_linebreaking.cc b/dw/textblock_linebreaking.cc new file mode 100644 index 00000000..7037330e --- /dev/null +++ b/dw/textblock_linebreaking.cc @@ -0,0 +1,688 @@ +#include "textblock.hh" +#include "../lout/msg.h" +#include "../lout/misc.hh" + +#include <stdio.h> +#include <math.h> + +using namespace lout; + +namespace dw { + +int Textblock::BadnessAndPenalty::badnessInfinities () +{ + switch (badnessState) { + case TOO_LOOSE: + case TOO_TIGHT: + return 1; + + case BADNESS_VALUE: + return 0; + } + + // compiler happiness + lout::misc::assertNotReached (); + return 0; +} + +int Textblock::BadnessAndPenalty::penaltyInfinities () +{ + switch (penaltyState) { + case FORCE_BREAK: + return -1; + + case PROHIBIT_BREAK: + return 1; + + case PENALTY_VALUE: + return 0; + } + + // compiler happiness + lout::misc::assertNotReached (); + return 0; +} + +int Textblock::BadnessAndPenalty::badnessValue () +{ + return badnessState == BADNESS_VALUE ? badness : 0; +} + +int Textblock::BadnessAndPenalty::penaltyValue () +{ + return penaltyState == PENALTY_VALUE ? penalty : 0; +} + +void Textblock::BadnessAndPenalty::calcBadness (int totalWidth, int idealWidth, + int totalStretchability, + int totalShrinkability) +{ + this->totalWidth = totalWidth; + this->idealWidth = idealWidth; + this->totalStretchability = totalStretchability; + this->totalShrinkability = totalShrinkability; + + if (totalWidth == idealWidth) { + badnessState = BADNESS_VALUE; + badness = 0; + } else if (totalWidth < idealWidth) { + if (totalStretchability == 0) + badnessState = TOO_LOOSE; + else { + int ratio = 100 * (idealWidth - totalWidth) / totalStretchability; + if (ratio > 1024) + badnessState = TOO_LOOSE; + else { + badnessState = BADNESS_VALUE; + badness = ratio * ratio * ratio; + } + } + } else { // if (word->totalWidth > availWidth) + if (totalShrinkability == 0) + badnessState = TOO_TIGHT; + else { + // Important: ratio is positive here. + int ratio = 100 * (totalWidth - idealWidth) / totalShrinkability; + if (ratio >= 100) + badnessState = TOO_TIGHT; + else { + badnessState = BADNESS_VALUE; + badness = ratio * ratio * ratio; + } + } + } +} + +void Textblock::BadnessAndPenalty::setPenalty (int penalty) +{ + this->penalty = penalty; + penaltyState = PENALTY_VALUE; +} + +void Textblock::BadnessAndPenalty::setPenaltyProhibitBreak () +{ + penaltyState = PROHIBIT_BREAK; +} + +void Textblock::BadnessAndPenalty::setPenaltyForceBreak () +{ + penaltyState = FORCE_BREAK; +} + +bool Textblock::BadnessAndPenalty::lineTooTight () +{ + return badnessState == TOO_TIGHT; +} + +bool Textblock::BadnessAndPenalty::lineMustBeBroken () +{ + return penaltyState == FORCE_BREAK; +} + +bool Textblock::BadnessAndPenalty::lineCanBeBroken () +{ + return penaltyState != PROHIBIT_BREAK; +} + +int Textblock::BadnessAndPenalty::compareTo (BadnessAndPenalty *other) +{ + int thisNumInfinities = badnessInfinities () + penaltyInfinities (); + int otherNumInfinities = + other->badnessInfinities () + other->penaltyInfinities (); + int thisValue = badnessValue () + penaltyValue (); + int otherValue = other->badnessValue () + other->penaltyValue (); + + if (thisNumInfinities == otherNumInfinities) + return thisValue - otherValue; + else + return thisNumInfinities - otherNumInfinities; +} + +void Textblock::BadnessAndPenalty::print () +{ + switch (badnessState) { + case TOO_LOOSE: + PRINTF ("loose"); + break; + + case TOO_TIGHT: + PRINTF ("tight"); + break; + + case BADNESS_VALUE: + PRINTF ("%d", badness); + break; + } + + PRINTF (" [%d + %d - %d vs. %d] + ", totalWidth, totalStretchability, + totalShrinkability, idealWidth); + + switch (penaltyState) { + case FORCE_BREAK: + PRINTF ("-inf"); + break; + + case PROHIBIT_BREAK: + PRINTF ("inf"); + break; + + case PENALTY_VALUE: + PRINTF ("%d", penalty); + break; + } +} + +/* + * ... + * + * diff ... + */ +void Textblock::justifyLine (Line *line, int diff) +{ + /* To avoid rounding errors, the calculation is based on accumulated + * values. */ + + if (diff > 0) { + int stretchabilitySum = 0; + for (int i = line->firstWord; i < line->lastWord; i++) + stretchabilitySum += words->getRef(i)->stretchability; + + if (stretchabilitySum > 0) { + int stretchabilityCum = 0; + int spaceDiffCum = 0; + for (int i = line->firstWord; i < line->lastWord; i++) { + Word *word = words->getRef (i); + stretchabilityCum += word->stretchability; + int spaceDiff = + stretchabilityCum * diff / stretchabilitySum - spaceDiffCum; + spaceDiffCum += spaceDiff; + + PRINTF (" %d (of %d): diff = %d\n", i, words->size (), + spaceDiff); + + word->effSpace = word->origSpace + spaceDiff; + } + } + } else if (diff < 0) { + int shrinkabilitySum = 0; + for (int i = line->firstWord; i < line->lastWord; i++) + shrinkabilitySum += words->getRef(i)->shrinkability; + + if (shrinkabilitySum > 0) { + int shrinkabilityCum = 0; + int spaceDiffCum = 0; + for (int i = line->firstWord; i < line->lastWord; i++) { + Word *word = words->getRef (i); + shrinkabilityCum += word->shrinkability; + int spaceDiff = + shrinkabilityCum * diff / shrinkabilitySum - spaceDiffCum; + spaceDiffCum += spaceDiff; + + word->effSpace = word->origSpace + spaceDiff; + } + } + } +} + + +Textblock::Line *Textblock::addLine (int firstWord, int lastWord, + bool temporary) +{ + PRINTF ("[%p] ADD_LINE (%d, %d)\n", this, firstWord, lastWord); + + Word *lastWordOfLine = words->getRef(lastWord); + // Word::totalWidth includes the hyphen (which is what we want here). + int lineWidth = lastWordOfLine->totalWidth; + int maxOfMinWidth, sumOfMaxWidth; + accumulateWordExtremees (firstWord, lastWord, &maxOfMinWidth, + &sumOfMaxWidth); + + PRINTF (" words[%d]->totalWidth = %d\n", lastWord, + lastWordOfLine->totalWidth); + + lines->increase (); + if(!temporary) { + // If the last line was temporary, this will be temporary, too, even + // if not requested. + if (lines->size () == 1 || nonTemporaryLines == lines->size () -1) + nonTemporaryLines = lines->size (); + } + + PRINTF ("nonTemporaryLines = %d\n", nonTemporaryLines); + + int lineIndex = lines->size () - 1; + Line *line = lines->getRef (lineIndex); + + line->firstWord = firstWord; + line->lastWord = lastWord; + line->boxAscent = line->contentAscent = 0; + line->boxDescent = line->contentDescent = 0; + line->marginDescent = 0; + line->breakSpace = 0; + line->leftOffset = 0; + + if (lines->size () == 1) { + line->top = 0; + + // TODO What to do with this one: lastLine->maxLineWidth = line1OffsetEff; + line->maxLineWidth = lineWidth; + line->maxParMin = maxOfMinWidth; + line->parMax = line->maxParMax = sumOfMaxWidth; + } else { + Line *prevLine = lines->getRef (lines->size () - 2); + + line->top = prevLine->top + prevLine->boxAscent + + prevLine->boxDescent + prevLine->breakSpace; + + line->maxLineWidth = misc::max (lineWidth, prevLine->maxLineWidth); + line->maxParMin = misc::max (maxOfMinWidth, prevLine->maxParMin); + + Word *lastWordOfPrevLine = words->getRef (prevLine->lastWord); + if (lastWordOfPrevLine->content.type == core::Content::BREAK) + // This line starts a new paragraph. + line->parMax = sumOfMaxWidth; + else + // This line continues the paragraph from prevLine. + line->parMax = prevLine->parMax + sumOfMaxWidth; + + line->maxParMax = misc::max (line->parMax, prevLine->maxParMax); + + } + + for(int i = line->firstWord; i <= line->lastWord; i++) + accumulateWordForLine (lineIndex, i); + + PRINTF (" line[%d].top = %d\n", lines->size () - 1, line->top); + PRINTF (" line[%d].boxAscent = %d\n", lines->size () - 1, line->boxAscent); + PRINTF (" line[%d].boxDescent = %d\n", + lines->size () - 1, line->boxDescent); + PRINTF (" line[%d].contentAscent = %d\n", lines->size () - 1, + line->contentAscent); + PRINTF (" line[%d].contentDescent = %d\n", + lines->size () - 1, line->contentDescent); + + alignLine (line); + mustQueueResize = true; + + return line; +} + +void Textblock::accumulateWordExtremees (int firstWord, int lastWord, + int *maxOfMinWidth, int *sumOfMaxWidth) +{ + *maxOfMinWidth = *sumOfMaxWidth = 0; + + for (int i = firstWord; i <= lastWord; i++) { + Word *word = words->getRef (i); + core::Extremes extremes; + getWordExtremes (word, &extremes); + + *maxOfMinWidth = misc::min (*maxOfMinWidth, extremes.minWidth); + *sumOfMaxWidth += (extremes.maxWidth + word->origSpace); + // Regarding the sum: if this is the end of the paragraph, it + // does not matter, since word->space is 0 in this case. + } +} + +/* + * This method is called in two cases: (i) when a word is added + * (ii) when a page has to be (partially) rewrapped. It does word wrap, + * and adds new lines if necessary. + */ +void Textblock::wordWrap (int wordIndex, bool wrapAll) +{ + Word *word; + //core::Extremes wordExtremes; + + if (!wrapAll) + removeTemporaryLines (); + + initLine1Offset (wordIndex); + + word = words->getRef (wordIndex); + word->effSpace = word->origSpace; + + accumulateWordData (wordIndex); + + bool newLine; + do { + bool tempNewLine = false; + int firstIndex = lines->size() == 0 ? + 0 : lines->getRef(lines->size() - 1)->lastWord + 1; + int searchUntil; + + if (wrapAll && wordIndex >= firstIndex && wordIndex == words->size() -1) { + newLine = true; + searchUntil = wordIndex; + tempNewLine = true; + PRINTF ("NEW LINE: last word\n"); + } else if (wordIndex >= firstIndex && + word->badnessAndPenalty.lineMustBeBroken ()) { + newLine = true; + searchUntil = wordIndex; + PRINTF ("NEW LINE: forced break\n"); + } else if (wordIndex > firstIndex && + word->badnessAndPenalty.lineTooTight () && + words->getRef(wordIndex- 1) + ->badnessAndPenalty.lineCanBeBroken ()) { + // TODO Comment the last condition (also below where the minimun is + // searched for) + newLine = true; + searchUntil = wordIndex - 1; + PRINTF ("NEW LINE: line too tight\n"); + } else + newLine = false; + + if(newLine) { + PRINTF (" searching from %d to %d\n", firstIndex, searchUntil); + + accumulateWordData (wordIndex); + + int breakPos = -1; + for (int i = firstIndex; i <= searchUntil; i++) { + Word *w = words->getRef(i); + + if(word->content.type && core::Content::REAL_CONTENT) { + PRINTF (" %d (of %d): ", i, words->size ()); + + switch(w->content.type) { + case core::Content::TEXT: + PRINTF ("\"%s\"", w->content.text); + break; + case core::Content::WIDGET: + PRINTF ("<widget: %p>\n", w->content.widget); + break; + case core::Content::BREAK: + PRINTF ("<break>\n"); + break; + default: + PRINTF ("<?>\n"); + break; + } + + PRINTF (" [%d / %d + %d - %d] => ", + w->size.width, w->origSpace, w->stretchability, + w->shrinkability); + w->badnessAndPenalty.print (); + PRINTF ("\n"); + } + + + // TODO: is this condition needed: + // if(w->badnessAndPenalty.lineCanBeBroken ()) ? + + if (breakPos == -1 || + w->badnessAndPenalty.compareTo + (&words->getRef(breakPos)->badnessAndPenalty) <= 0) + // "<=" instead of "<" in the next lines tends to result in more + // words per line -- theoretically. Practically, the case "==" + // will never occur. + breakPos = i; + } + + if (wrapAll && searchUntil == words->size () - 1) { + // Since no break and no space is added, the last word + // will have a penalty of inf. Actually, it should be -inf, + // since it is the last word. However, since more words may + // follow, the penalty is not changesd, but here, the search + // is corrected (maybe only temporary). + Word *lastWord = words->getRef (searchUntil); + BadnessAndPenalty correctedBap = lastWord->badnessAndPenalty; + correctedBap.setPenaltyForceBreak (); + if (correctedBap.compareTo + (&words->getRef(breakPos)->badnessAndPenalty) <= 0) + breakPos = searchUntil; + } + + PRINTF (" new line from %d to %d\n", firstIndex, breakPos); + addLine (firstIndex, breakPos, tempNewLine); + PRINTF (" accumulating again from %d to %d\n", + breakPos + 1, wordIndex); + + for(int i = breakPos + 1; i <= wordIndex; i++) + accumulateWordData (i); + } + } while (newLine); +} + +void Textblock::accumulateWordForLine (int lineIndex, int wordIndex) +{ + Line *line = lines->getRef (lineIndex); + Word *word = words->getRef (wordIndex); + + PRINTF (" %d + %d / %d + %d\n", line->boxAscent, line->boxDescent, + word->size.ascent, word->size.descent); + + line->boxAscent = misc::max (line->boxAscent, word->size.ascent); + line->boxDescent = misc::max (line->boxDescent, word->size.descent); + + int len = word->style->font->ascent; + if (word->style->valign == core::style::VALIGN_SUPER) + len += len / 2; + line->contentAscent = misc::max (line->contentAscent, len); + + len = word->style->font->descent; + if (word->style->valign == core::style::VALIGN_SUB) + len += word->style->font->ascent / 3; + line->contentDescent = misc::max (line->contentDescent, len); + + if (word->content.type == core::Content::WIDGET) { + int collapseMarginTop = 0; + + line->marginDescent = + misc::max (line->marginDescent, + word->size.descent + + word->content.widget->getStyle()->margin.bottom); + + if (lines->size () == 1 && + word->content.widget->blockLevel () && + getStyle ()->borderWidth.top == 0 && + getStyle ()->padding.top == 0) { + // collapse top margins of parent element and its first child + // see: http://www.w3.org/TR/CSS21/box.html#collapsing-margins + collapseMarginTop = getStyle ()->margin.top; + } + + line->boxAscent = + misc::max (line->boxAscent, + word->size.ascent, + word->size.ascent + + word->content.widget->getStyle()->margin.top + - collapseMarginTop); + + word->content.widget->parentRef = lineIndex; + } else { + line->marginDescent = + misc::max (line->marginDescent, line->boxDescent); + + if (word->content.type == core::Content::BREAK) + line->breakSpace = + misc::max (word->content.breakSpace, + line->marginDescent - line->boxDescent, + line->breakSpace); + } +} + +void Textblock::accumulateWordData (int wordIndex) +{ + PRINTF ("[%p] ACCUMULATE_WORD_DATA: %d\n", this, wordIndex); + + Word *word = words->getRef (wordIndex); + int availWidth = calcAvailWidth (); // todo: variable? parameter? + + if (wordIndex == 0 || + (lines->size () > 0 && + wordIndex == lines->getRef(lines->size () - 1)->lastWord + 1)) { + // first word of the (not neccessarily yet existing) line + word->totalWidth = word->size.width; + word->totalStretchability = 0; + word->totalShrinkability = 0; + } else { + Word *prevWord = words->getRef (wordIndex - 1); + + word->totalWidth = prevWord->totalWidth + + prevWord->origSpace - prevWord->hyphenWidth + + word->size.width + word->hyphenWidth; + word->totalStretchability = + prevWord->totalStretchability + prevWord->stretchability; + word->totalShrinkability = + prevWord->totalShrinkability + prevWord->shrinkability; + } + + PRINTF(" line width: %d of %d\n", word->totalWidth, availWidth); + PRINTF(" spaces: + %d - %d\n", + word->totalStretchability, word->totalShrinkability); + + word->badnessAndPenalty.calcBadness (word->totalWidth, availWidth, + word->totalStretchability, + word->totalShrinkability); +} + +int Textblock::calcAvailWidth () +{ + int availWidth = + this->availWidth - getStyle()->boxDiffWidth() - innerPadding; + if (limitTextWidth && + layout->getUsesViewport () && + availWidth > layout->getWidthViewport () - 10) + availWidth = layout->getWidthViewport () - 10; + + //PRINTF("[%p] CALC_AVAIL_WIDTH => %d - %d - %d = %d\n", + // this, this->availWidth, getStyle()->boxDiffWidth(), innerPadding, + // availWidth); + + return availWidth; +} + +void Textblock::initLine1Offset (int wordIndex) +{ + Word *word = words->getRef (wordIndex); + + /* Test whether line1Offset can be used. */ + if (wordIndex == 0) { + if (ignoreLine1OffsetSometimes && + line1Offset + word->size.width > availWidth) { + line1OffsetEff = 0; + } else { + int indent = 0; + + if (word->content.type == core::Content::WIDGET && + word->content.widget->blockLevel() == true) { + /* don't use text-indent when nesting blocks */ + } else { + if (core::style::isPerLength(getStyle()->textIndent)) { + indent = misc::roundInt(this->availWidth * + core::style::perLengthVal (getStyle()->textIndent)); + } else { + indent = core::style::absLengthVal (getStyle()->textIndent); + } + } + line1OffsetEff = line1Offset + indent; + } + } +} + +/** + * Align the line. + * + * \todo Use block's style instead once paragraphs become proper blocks. + */ +void Textblock::alignLine (Line *line) +{ + int availWidth = calcAvailWidth (); + Word *firstWord = words->getRef (line->firstWord); + Word *lastWord = words->getRef (line->lastWord); + + for (int i = line->firstWord; i < line->lastWord; i++) + words->getRef(i)->origSpace = words->getRef(i)->effSpace; + + if (firstWord->content.type != core::Content::BREAK) { + switch (firstWord->style->textAlign) { + case core::style::TEXT_ALIGN_LEFT: + case core::style::TEXT_ALIGN_STRING: /* handled elsewhere (in the + * future)? */ + line->leftOffset = 0; + break; + case core::style::TEXT_ALIGN_JUSTIFY: /* see some lines above */ + line->leftOffset = 0; + if(lastWord->content.type != core::Content::BREAK && + line->lastWord != words->size () - 1) { + PRINTF (" justifyLine => %d vs. %d\n", + lastWord->totalWidth, availWidth); + justifyLine (line, availWidth - lastWord->totalWidth); + } + break; + case core::style::TEXT_ALIGN_RIGHT: + line->leftOffset = availWidth - lastWord->totalWidth; + break; + case core::style::TEXT_ALIGN_CENTER: + line->leftOffset = (availWidth - lastWord->totalWidth) / 2; + break; + default: + /* compiler happiness */ + line->leftOffset = 0; + } + + /* For large lines (images etc), which do not fit into the viewport: */ + if (line->leftOffset < 0) + line->leftOffset = 0; + } +} + +/** + * Rewrap the page from the line from which this is necessary. + * There are basically two times we'll want to do this: + * either when the viewport is resized, or when the size changes on one + * of the child widgets. + */ +void Textblock::rewrap () +{ + PRINTF ("[%p] REWRAP: wrapRef = %d\n", this, wrapRef); + + if (wrapRef == -1) + /* page does not have to be rewrapped */ + return; + + /* All lines up from wrapRef will be rebuild from the word list, + * the line list up from this position is rebuild. */ + lines->setSize (wrapRef); + nonTemporaryLines = misc::min (nonTemporaryLines, wrapRef); + + int firstWord; + if (lines->size () > 0) + firstWord = lines->getLastRef()->lastWord + 1; + else + firstWord = 0; + + for (int i = firstWord; i < words->size (); i++) { + Word *word = words->getRef (i); + + if (word->content.type == core::Content::WIDGET) + calcWidgetSize (word->content.widget, &word->size); + + wordWrap (i, false); + + if (word->content.type == core::Content::WIDGET) { + word->content.widget->parentRef = lines->size () - 1; + } + } + + /* Next time, the page will not have to be rewrapped. */ + wrapRef = -1; +} + +void Textblock::showMissingLines () +{ + int firstWordToWrap = lines->size () > 0 ? + lines->getRef(lines->size () - 1)->lastWord + 1 : 0; + for (int i = firstWordToWrap; i < words->size (); i++) + wordWrap (i, true); +} + + +void Textblock::removeTemporaryLines () +{ + lines->setSize (nonTemporaryLines); +} + +} // namespace dw diff --git a/dw/types.hh b/dw/types.hh index eac834ff..65983fad 100644 --- a/dw/types.hh +++ b/dw/types.hh @@ -194,16 +194,12 @@ struct Content REAL_CONTENT = 0xff ^ (START | END), SELECTION_CONTENT = TEXT | WIDGET | BREAK }; - enum BreakType { - BREAK_NO, - BREAK_OK - }; + /* Content is embedded in struct Word therefore we * try to be space efficient. */ short type; bool space; - unsigned char breakType; union { const char *text; Widget *widget; diff --git a/lout/container.hh b/lout/container.hh index 8a9e3e0f..d2484b13 100644 --- a/lout/container.hh +++ b/lout/container.hh @@ -3,6 +3,8 @@ #include "object.hh" +namespace lout { + /** * \brief This namespace contains a framework for container classes, which * members are instances of object::Object. @@ -15,8 +17,6 @@ * * \sa container::untyped, container::typed */ -namespace lout { - namespace container { /** diff --git a/lout/identity.hh b/lout/identity.hh index 6102933d..1f0b4bdf 100644 --- a/lout/identity.hh +++ b/lout/identity.hh @@ -5,12 +5,11 @@ #include "container.hh" #include "signal.hh" +namespace lout { + /** * \brief Some stuff to identify classes of objects at run-time. */ - -namespace lout { - namespace identity { /** diff --git a/lout/misc.hh b/lout/misc.hh index 2a8584eb..05b87602 100644 --- a/lout/misc.hh +++ b/lout/misc.hh @@ -7,13 +7,13 @@ #include <string.h> #include <assert.h> +namespace lout { + /** * \brief Miscellaneous stuff, which does not fit anywhere else. * * Actually, the other parts, beginning with ::object, depend on this. */ -namespace lout { - namespace misc { template <class T> inline T min (T a, T b) { return a < b ? a : b; } @@ -201,6 +201,38 @@ public: } /** + * \brief Return the reference of the first element (convenience method). + */ + inline T* getFirstRef () { + assert (this->num > 0); + return this->array; + } + + /** + * \brief Return the first element, explicitly. + */ + inline T getFirst () { + assert (this->num > 0); + return this->array[0]; + } + + /** + * \brief Return the reference of the last element (convenience method). + */ + inline T* getLastRef () { + assert (this->num > 0); + return this->array + this->num - 1; + } + + /** + * \brief Return the last element, explicitly. + */ + inline T getLast () { + assert (this->num > 0); + return this->array[this->num - 1]; + } + + /** * \brief Store an object in the vector. * * Unlike in container::untyped::Vector and container::typed::Vector, diff --git a/lout/object.hh b/lout/object.hh index 789542fe..9df69987 100644 --- a/lout/object.hh +++ b/lout/object.hh @@ -6,12 +6,12 @@ #include "misc.hh" +namespace lout { + /** * \brief Here, some common classes (or interfaces) are defined, to standardize * the access to other classes. */ -namespace lout { - namespace object { /** diff --git a/lout/signal.hh b/lout/signal.hh index 6b332203..117779d6 100644 --- a/lout/signal.hh +++ b/lout/signal.hh @@ -4,6 +4,8 @@ #include "object.hh" #include "container.hh" +namespace lout { + /** * \brief This namespace provides base classes to define signals. * @@ -174,8 +176,6 @@ * After this, &\em barReceiver can be connected to all instances of * BarEmitter, also multiple times. */ -namespace lout { - namespace signal { class Receiver; diff --git a/test/KHM1.html b/test/KHM1.html new file mode 100644 index 00000000..b3924409 --- /dev/null +++ b/test/KHM1.html @@ -0,0 +1,56 @@ +<div style="text-align: justify"> +<p>In den al­ten Zei­ten, wo das Wün­schen noch +ge­hol­fen hat, leb­te ein Kö­nig, des­sen +Töch­ter wa­ren al­le schön, aber die jüng­ste war so +schön, daß die Son­ne sel­ber, die doch so vie­les +ge­se­hen hat, sich ver­wun­der­te so oft sie ihr +ins Ge­sicht schien. Na­he bei dem Schlos­se des +Kö­nigs lag ein gro­ßer dunk­ler Wald, und in dem +Wal­de un­ter ei­ner al­ten Lin­de war ein +Brun­nen: wenn nun der Tag recht heiß war, so ging das +Kö­nigs­kind hin­aus in den Wald und setz­te sich an +den Rand des küh­len Brun­nens: und wenn sie +Lan­ge­wei­le hat­te, so nahm sie eine +gol­de­ne Ku­gel, warf sie in die Hö­he und fieng sie +wie­der; und das war ihr liebs­tes Spiel­werk.</p> +<p>Nun trug es sich ein­mal zu, daß die gol­de­ne +Ku­gel der Kön­igs­toch­ter nicht in ihr Händ­chen +fiel, das sie in die Hö­he ge­hal­ten hat­te, +son­dern vor­bei auf die Er­de schlug und +ge­ra­de­zu ins Was­ser hin­ein roll­te. Die +Kö­nigs­toch­ter folg­te ihr mit den Aug­en nach, +aber die Ku­gel ver­schwand, und der Brun­nen war tief, so +tief daß man kei­nen Grund sah. Da fieng sie an zu wei­nen und +wein­te im­mer lau­ter und konn­te sich gar nicht +trös­ten. Und wie sie so klag­te, rief ihr je­mand zu „was +hast du vor, Kö­nigs­toch­ter, du schreist ja daß sich ein +Stein er­bar­men möchte.“ Sie sah sich um, wo­her die +Stim­me kä­me, da er­blick­te sie einen Frosch, der +sei­nen di­cken häß­li­chen Kopf aus dem Was­ser +streck­te. „Ach, du bists, al­ter +Was­ser­pat­scher,“ sag­te sie, „ich wei­ne über +mei­ne gol­de­ne Ku­gel, die mir in den Brun­nen +hin­ab ge­fal­len ist.“ „Sei still und wei­ne nicht,“ +ant­wor­te­te der Frosch, „ich kann wohl Rath +schaf­fen, aber was gibst du mir, wenn ich dein Spiel­werk +wie­der her­auf­ho­le?“ „Was du ha­ben willst, +lie­ber Frosch,“ sag­te sie, „mei­ne Klei­der, +mei­ne Per­len und Edel­stei­ne, auch noch die +gol­de­ne Kro­ne, die ich tra­ge.“ Der Frosch +ant­wor­te­te „dei­ne Klei­der, dei­ne +Per­len und Edel­stei­ne, und dei­ne gol­de­ne +Kro­ne, die mag ich nicht: aber wenn du mich lieb ha­ben +willst, und ich soll dein Ge­sel­le und +Spiel­ka­me­rad sein, an dei­nem Tisch­lein +ne­ben dir si­tzen, von dei­nem gol­de­nen +Tel­ler­lein es­sen, aus dei­nem Be­cher­lein +trin­ken, in dei­nem Bett­lein schla­fen: wenn du mir +das ver­sprichst, so will ich hin­un­ter stei­gen und +dir die gol­de­ne Ku­gel wie­der her­auf +ho­len.“ „Ach ja,“ sag­te sie, „ich ver­spre­che dir +alles, was du willst, wenn du mir nur die Ku­gel wie­der +bringst.“ Sie dach­te aber „was der ein­fäl­ti­ge +Frosch schwätzt, der sitzt im Was­ser bei sei­nes +Glei­chen und quackt, und kann kei­nes Men­schen +Ge­sel­le sein.“</p> +</div> diff --git a/test/KHM1b.html b/test/KHM1b.html new file mode 100644 index 00000000..adefc1ef --- /dev/null +++ b/test/KHM1b.html @@ -0,0 +1,14 @@ +<p style="text-align: justify">In den al­ten Zei­ten, wo das +Wün­schen noch ge­hol­fen hat, leb­te ein Kö­nig, +des­sen Töch­ter wa­ren al­le schön, aber die +jüng­ste war so schön, daß die Son­ne sel­ber, die doch so +vie­les ge­se­hen hat, sich ver­wun­der­te so +oft sie ihr ins Ge­sicht schien. Na­he bei dem Schlos­se +des Kö­nigs lag ein gro­ßer dunk­ler Wald, und in dem +Wal­de un­ter ei­ner al­ten Lin­de war ein +Brun­nen: wenn nun der Tag recht heiß war, so ging das +Kö­nigs­kind hin­aus in den Wald und setz­te sich an +den Rand des küh­len Brun­nens: und wenn sie +Lan­ge­wei­le hat­te, so nahm sie eine +gol­de­ne Ku­gel, warf sie in die Hö­he und fieng sie +wie­der; und das war ihr liebs­tes Spiel­werk.</p> diff --git a/test/KHM1c.html b/test/KHM1c.html new file mode 100644 index 00000000..3f3271fc --- /dev/null +++ b/test/KHM1c.html @@ -0,0 +1,10 @@ +<p style="text-align: justify">In den alten Zeiten, wo das Wünschen +noch geholfen hat, lebte ein König, dessen Töchter waren alle schön, +aber die jüngste war so schön, daß die Sonne selber, die doch so +vieles gesehen hat, sich verwunderte so oft sie ihr ins Gesicht +schien. Nahe bei dem Schlosse des Königs lag ein großer dunkler Wald, +und in dem Walde unter einer alten Linde war ein Brunnen: wenn nun der +Tag recht heiß war, so ging das Königskind hinaus in den Wald und +setzte sich an den Rand des kühlen Brunnens: und wenn sie Langeweile +hatte, so nahm sie eine goldene Kugel, warf sie in die Höhe und fieng +sie wieder; und das war ihr liebstes Spielwerk.</p> diff --git a/test/Makefile.am b/test/Makefile.am index a0a23c6a..daa2cef5 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -21,7 +21,8 @@ noinst_PROGRAMS = \ dw-ui-test \ fltk-browser \ shapes \ - cookies + cookies \ + liang dw_anchors_test_SOURCES = dw_anchors_test.cc dw_anchors_test_LDADD = \ @@ -159,3 +160,7 @@ cookies_SOURCES = cookies.c cookies_LDADD = \ $(top_builddir)/dpip/libDpip.a \ $(top_builddir)/dlib/libDlib.a + +liang_SOURCES = liang.cc + +liang_LDADD = $(top_builddir)/lout/liblout.a diff --git a/test/liang.cc b/test/liang.cc new file mode 100644 index 00000000..3c402e6f --- /dev/null +++ b/test/liang.cc @@ -0,0 +1,184 @@ +#include "../lout/misc.hh" +#include "../lout/object.hh" +#include "../lout/container.hh" +#include <stdio.h> +#include <string.h> + +#define LEN 1000 + +/* + * This is a direct translation of the Python implementation by Ned + * Batchelder. + */ + +class Hyphenator +{ +private: + lout::container::typed::HashTable <lout::object::Integer, + lout::container::typed::Collection + <lout::object::Integer> > *tree; + void insertPattern (char *s); + +public: + Hyphenator (const char *filename); + + lout::container::typed::Vector <lout::object::String> + *hyphenateWord(const char *word); +}; + +using namespace lout::object; +using namespace lout::container::typed; + +Hyphenator::Hyphenator (const char *filename) +{ + tree = new HashTable <Integer, Collection <Integer> > (true, true); + + FILE *file = fopen (filename, "r"); + while (!feof (file)) { + char buf[LEN + 1]; + char *s = fgets (buf, LEN, file); + if (s) { + int l = strlen (s); + if (s[l - 1] == '\n') + s[l - 1] = 0; + insertPattern (s); + } + } + fclose (file); +} + +void Hyphenator::insertPattern (char *s) +{ + // Convert the a pattern like 'a1bc3d4' into a string of chars 'abcd' + // and a list of points [ 0, 1, 0, 3, 4 ]. + int l = strlen (s); + char chars [l + 1]; + Vector <Integer> *points = new Vector <Integer> (1, true); + + // TODO numbers consisting of multiple digits? + // TODO Encoding: This implementation works exactly like the Python + // implementation, based on UTF-8. Does this always work? + int numChars = 0; + for (int i = 0; s[i]; i++) + if (s[i] >= '0' && s[i] <= '9') + points->put (new Integer (s[i] - '0'), numChars); + else + chars[numChars++] = s[i]; + chars[numChars] = 0; + + for (int i = 0; i < numChars + 1; i++) { + Integer *val = points->get (i); + if (val == NULL) + points->put (new Integer (0), i); + } + + // Insert the pattern into the tree. Each character finds a dict + // another level down in the tree, and leaf nodes have the list of + // points. + + HashTable <Integer, Collection <Integer> > *t = tree; + for (int i = 0; chars[i]; i++) { + Integer c (chars[i]); + if (!t->contains(&c)) + t->put (new Integer (chars[i]), + new HashTable <Integer, Collection <Integer> > (true, true)); + t = (HashTable <Integer, Collection <Integer> >*) t->get (&c); + } + + t->put (new Integer (0), points); +} + +/** + * Given a word, returns a list of pieces, broken at the possible + * hyphenation points. + */ +Vector <String> *Hyphenator::hyphenateWord(const char *word) +{ + // Short words aren't hyphenated. + if (strlen (word) <= 4) // TODO UTF-8 + return NULL; // TODO + + // If the word is an exception, get the stored points. + // TODO + + char work[strlen (word) + 3]; + strcpy (work, "."); + strcat (work, word); // TODO tolower + strcat (work, "."); + + int l = strlen (work); + Vector <Integer> points (l + 1, true); + for (int i = 0; i < l + 1; i++) + points.put (new Integer (0), i); + + Integer null (0); + + for (int i = 0; i < l; i++) { + HashTable <Integer, Collection <Integer> > *t = tree; + for (int j = i; j < l; j++) { + Integer c (work[j]); + if (t->contains (&c)) { + t = (HashTable <Integer, Collection <Integer> >*) t->get (&c); + if (t->contains (&null)) { + Vector <Integer> *p = (Vector <Integer>*) t->get (&null); + + for (int k = 0; k < p->size (); k++) { + Integer *v1 = points.get (i + k); + Integer *v2 = p->get (k); + // TODO Not very efficient, especially here: too much + // calls of "new" + points.put(new Integer (lout::misc::max (v1->getValue (), + v2->getValue ())), + i + k); + } + } + } else + break; + } + } + + // No hyphens in the first two chars or the last two. + points.put (new Integer (0), 1); + points.put (new Integer (0), 2); + points.put (new Integer (0), points.size () - 2); + points.put (new Integer (0), points.size () - 3); + + // Examine the points to build the pieces list. + Vector <String> *pieces = new Vector <String> (1, true); + char temp[strlen (word) + 1], *ptemp = temp; + + int n = lout::misc::min ((int)strlen (word), points.size () - 2); + for (int i = 0; i < n; i++) { + char c = word[i]; + int p = points.get(i + 2)->getValue (); + + *(ptemp++) = c; + if (p % 2) { + *ptemp = 0; + printf ("'%s'\n", temp); + ptemp = temp; + } + } + + *ptemp = 0; + printf ("'%s'\n", temp); + + return pieces; +} + +int main (int argc, char *argv[]) +{ + Hyphenator h ("test/hyph-de-1996.pat"); + h.hyphenateWord ("jahrhundertroman"); + puts ("---"); + h.hyphenateWord ("währenddessen"); + puts ("---"); + h.hyphenateWord ("ückerdorf"); + puts ("---"); + h.hyphenateWord ("über"); + puts ("---"); + h.hyphenateWord ("aber"); + puts ("---"); + + return 0; +} |