1 files changed, 180 insertions, 71 deletions
diff --git a/dw/textblock.cc b/dw/textblock.cc
index 045126d9..213594d3 100644
--- a/dw/textblock.cc
+++ b/dw/textblock.cc
@@ -40,7 +40,13 @@ namespace dw {
 
 int Textblock::CLASS_ID = -1;
 
-Textblock::Textblock (bool limitTextWidth)
+Textblock::DivChar Textblock::divChars[NUM_DIV_CHARS] = {
+   { "\xc2\xad", true, false, PENALTY_HYPHEN, -1 },
+   { "-", false, true, -1, PENALTY_HYPHEN },
+   { "\xe2\x80\x94", false, true, PENALTY_HYPHEN, PENALTY_HYPHEN }
+};
+
+Textblock::Textblock (bool limitTextWidth, int penaltyHyphen)
 {
    registerName ("dw::Textblock", &CLASS_ID);
    setFlags (BLOCK_LEVEL);
@@ -86,6 +92,8 @@ Textblock::Textblock (bool limitTextWidth)
    availDescent = 0;
 
    this->limitTextWidth = limitTextWidth;
+   penalties[PENALTY_HYPHEN][0] = penaltyHyphen;
+   penalties[PENALTY_HYPHEN][1] = INT_MAX; // TODO Configuration
 
    for (int layer = 0; layer < core::HIGHLIGHT_NUM_LAYERS; layer++) {
       /* hlStart[layer].index > hlEnd[layer].index means no highlighting */
@@ -289,7 +297,8 @@ void Textblock::getExtremesImpl (core::Extremes *extremes)
             }
             
             // Minimum: between two *possible* breaks (or at the end).
-            if (word->badnessAndPenalty.lineCanBeBroken () || atLastWord) {
+            // TODO: Explain why index 1 is used in lineCanBeBroken().
+            if (word->badnessAndPenalty.lineCanBeBroken (1) || atLastWord) {
                parMin += wordExtremes.minWidth + word->hyphenWidth;
                extremes->minWidth = misc::max (extremes->minWidth, parMin);
                parMin = 0;
@@ -299,7 +308,9 @@ void Textblock::getExtremesImpl (core::Extremes *extremes)
                parMin += wordExtremes.minWidth + word->origSpace;
             
             // Maximum: between two *necessary* breaks (or at the end).
-            if (word->badnessAndPenalty.lineMustBeBroken () || atLastWord) {
+            // TODO: lineMustBeBroken should be independent of the
+            // penalty index?
+            if (word->badnessAndPenalty.lineMustBeBroken (1) || atLastWord) {
                parMax += wordExtremes.maxWidth + word->hyphenWidth;
                extremes->maxWidth = misc::max (extremes->maxWidth, parMax);
                parMax = 0;
@@ -882,6 +893,7 @@ void Textblock::drawText(core::View *view, core::style::Style *style,
             break;
          }
       }
+     
       view->drawText(style->font, style->color, shading, x, y,
                      str ? str : text + start, str ? strlen(str) : len);
       if (str)
@@ -903,7 +915,7 @@ void Textblock::drawWord (Line *line, int wordIndex1, int wordIndex2,
 {
    core::style::Style *style = words->getRef(wordIndex1)->style;
    bool drawHyphen = wordIndex2 == line->lastWord
-      && words->getRef(wordIndex2)->hyphenWidth > 0;
+      && (words->getRef(wordIndex2)->flags & Word::DIV_CHAR_AT_EOL);
 
    if (style->hasBackground ()) {
       int w = 0;
@@ -937,6 +949,7 @@ void Textblock::drawWord (Line *line, int wordIndex1, int wordIndex2,
       }
 
       if(drawHyphen) {
+         // "\xc2\xad" is the UTF-8 code of a soft hyphen.
          text[p++] = 0xc2;
          text[p++] = 0xad;
          text[p++] = 0;
@@ -1121,7 +1134,8 @@ void Textblock::drawLine (Line *line, core::View *view, core::Rectangle *area)
                } else {
                   int wordIndex2 = wordIndex;
                   while (wordIndex2 < line->lastWord &&
-                         words->getRef(wordIndex2)->hyphenWidth > 0 &&
+                         (words->getRef(wordIndex2)->flags
+                          & Word::DRAW_AS_ONE_TEXT) &&
                          word->style == words->getRef(wordIndex2 + 1)->style)
                      wordIndex2++;
 
@@ -1135,6 +1149,7 @@ void Textblock::drawLine (Line *line, core::View *view, core::Rectangle *area)
                   word = words->getRef(wordIndex);
                }
             }
+
             if (word->effSpace > 0 && wordIndex < line->lastWord &&
                 words->getRef(wordIndex + 1)->content.type !=
                                                         core::Content::BREAK) {
@@ -1298,9 +1313,9 @@ void Textblock::fillWord (Word *word, int width, int ascent, int descent,
    word->origSpace = word->effSpace = word->stretchability =
       word->shrinkability = 0;
    word->hyphenWidth = 0;
-   word->badnessAndPenalty.setPenaltyProhibitBreak ();
+   word->badnessAndPenalty.setBothPenaltiesProhibitBreak ();
    word->content.space = false;
-   word->canBeHyphenated = canBeHyphenated;
+   word->flags = canBeHyphenated ? Word::CAN_BE_HYPHENATED : 0;
 
    word->style = style;
    word->spaceStyle = style;
@@ -1417,79 +1432,179 @@ void Textblock::calcTextSize (const char *text, size_t len,
 }
 
 /**
- * Add a word to the page structure. If it contains soft hyphens, it is
- * divided.
+ * Add a word to the page structure. If it contains dividing
+ * characters (hard or soft hyphens, em-dashes, etc.), it is divided.
  */
 void Textblock::addText (const char *text, size_t len,
                          core::style::Style *style)
 {
    PRINTF ("[%p] ADD_TEXT (%d characters)\n", this, (int)len);
 
-   // Count hyphens.
-   int numHyphens = 0;
-   for (int i = 0; i < (int)len - 1; i++)
-      // (0xc2, 0xad) is the UTF-8 representation of a soft hyphen (Unicode
-      // 0xc2).
-      if((unsigned char)text[i] == 0xc2 && (unsigned char)text[i + 1] == 0xad)
-         numHyphens++;
-
-   if (numHyphens == 0) {
-      // Simple (and common) case: no soft hyphens. May still be hyphenated
-      // automatically.
+   // Count dividing characters.
+   int numParts = 1;
+
+   for (int i = 0; i < (int)len;
+        i < (int)len && (i = layout->nextGlyph (text, i))) {
+      int foundDiv = -1;
+      for (int j = 0; foundDiv == -1 && j < NUM_DIV_CHARS; j++) {
+         int lDiv = strlen (divChars[j].s);
+         if (i <= (int)len - lDiv) {
+            if (memcmp (text + i, divChars[j].s, lDiv * sizeof (char)) == 0)
+               foundDiv = j;
+         }
+      }
+
+      if (foundDiv != -1) {
+         if (divChars[foundDiv].penaltyIndexLeft != -1)
+            numParts ++;
+         if (divChars[foundDiv].penaltyIndexRight != -1)
+            numParts ++;
+      }
+   }
+
+   if (numParts == 1) {
+      // Simple (and common) case: no dividing characters. May still
+      // be hyphenated automatically.
       core::Requisition size;
       calcTextSize (text, len, style, &size);
       addText0 (text, len, true, style, &size);
    } else {
-      PRINTF("HYPHENATION: '");
+      PRINTF ("HYPHENATION: '");
       for (size_t i = 0; i < len; i++)
          PUTCHAR(text[i]);
-      PRINTF("', with %d hyphen(s)\n", numHyphens);
+      PRINTF ("', with %d parts\n", numParts);
 
       // Store hyphen positions.
-      int n = 0, hyphenPos[numHyphens], breakPos[numHyphens];
-      for (size_t i = 0; i < len - 1; i++)
-         if((unsigned char)text[i] == 0xc2 &&
-            (unsigned char)text[i + 1] == 0xad) {
-            hyphenPos[n] = i;
-            breakPos[n] = i - 2 * n;
-            n++;
+      int n = 0, totalLenSignRemoved = 0;
+      int partPenaltyIndex[numParts - 1];
+      int partStart[numParts], partEnd[numParts];
+      bool signRemoved[numParts - 1], canBeHyphenated[numParts + 1];
+      canBeHyphenated[0] = canBeHyphenated[numParts] = true;
+      partStart[0] = 0;
+      partEnd[numParts - 1] = len;
+
+      for (int i = 0; i < (int)len;
+           i < (int)len && (i = layout->nextGlyph (text, i))) {
+         int foundDiv = -1;
+         for (int j = 0; foundDiv == -1 && j < NUM_DIV_CHARS; j++) {
+            int lDiv = strlen (divChars[j].s);
+            if (i <= (int)len - lDiv) {
+               if (memcmp (text + i, divChars[j].s, lDiv * sizeof (char)) == 0)
+                  foundDiv = j;
+            }
+         }
+         
+         if (foundDiv != -1) {
+            int lDiv = strlen (divChars[foundDiv].s);
+            
+            if (divChars[foundDiv].charRemoved) {
+               assert (divChars[foundDiv].penaltyIndexLeft != -1);
+               assert (divChars[foundDiv].penaltyIndexRight == -1);
+
+               partPenaltyIndex[n] = divChars[foundDiv].penaltyIndexLeft;
+               signRemoved[n] = true;
+               canBeHyphenated[n + 1] = divChars[foundDiv].canBeHyphenated;
+               partEnd[n] = i;
+               partStart[n + 1] = i + lDiv;
+               n++;
+               totalLenSignRemoved += lDiv;
+            } else {
+               assert (divChars[foundDiv].penaltyIndexLeft != -1 ||
+                       divChars[foundDiv].penaltyIndexRight != -1);
+
+               if (divChars[foundDiv].penaltyIndexLeft != -1) {
+                  partPenaltyIndex[n] = divChars[foundDiv].penaltyIndexLeft;
+                  signRemoved[n] = false;
+                  canBeHyphenated[n + 1] = divChars[foundDiv].canBeHyphenated;
+                  partEnd[n] = i;
+                  partStart[n + 1] = i;
+                  n++;
+               }
+
+               if (divChars[foundDiv].penaltyIndexRight != -1) {
+                  partPenaltyIndex[n] = divChars[foundDiv].penaltyIndexRight;
+                  signRemoved[n] = false;
+                  canBeHyphenated[n + 1] = divChars[foundDiv].canBeHyphenated;
+                  partEnd[n] = i + lDiv;
+                  partStart[n + 1] = i + lDiv;
+                  n++;
+               }
+            }
          }
+      }
 
-      // Get text without hyphens. (There are numHyphens + 1 parts in the word,
-      // and 2 * numHyphens bytes less, 2 for each hyphen, are needed.)
-      char textWithoutHyphens[len - 2 * numHyphens];
-      int start = 0; // related to "text"
-      for (int i = 0; i < numHyphens + 1; i++) {
-         int end = (i == numHyphens) ? len : hyphenPos[i];
-         memmove (textWithoutHyphens + start - 2 * i, text + start,
-                  end - start);
-         start = end + 2;
+      // Get text without removed characters, e. g. hyphens.
+      const char *textWithoutHyphens;
+      char textWithoutHyphensBuf[len - totalLenSignRemoved];
+      int *breakPosWithoutHyphens, breakPosWithoutHyphensBuf[numParts - 1];
+
+      if (totalLenSignRemoved == 0) {
+         // No removed characters: take original arrays.
+         textWithoutHyphens = text;
+         // Ends are also break positions, except the last end, which
+         // is superfluous, but does not harm (since arrays in C/C++
+         // does not have an implicit length).
+         breakPosWithoutHyphens = partEnd;
+      } else {
+         // Copy into special buffers.
+         textWithoutHyphens = textWithoutHyphensBuf;
+         breakPosWithoutHyphens = breakPosWithoutHyphensBuf;
+
+         int n = 0;
+         for (int i = 0; i < numParts; i++) {
+            memmove (textWithoutHyphensBuf + n, text + partStart[i],
+                     partEnd[i] - partStart[i]);
+            n += partEnd[i] - partStart[i];
+            if (i < numParts - 1)
+               breakPosWithoutHyphensBuf[i] = n;
+         }
       }
 
       PRINTF("H... without hyphens: '");
-      for (size_t i = 0; i < len - 2 * numHyphens; i++)
+      for (size_t i = 0; i < len - totalLenSignRemoved; i++)
          PUTCHAR(textWithoutHyphens[i]);
       PRINTF("'\n");
 
-      core::Requisition wordSize[numHyphens + 1];
-      calcTextSizes (textWithoutHyphens, len - 2 * numHyphens, style,
-                     numHyphens, breakPos, wordSize);
+      core::Requisition wordSize[numParts];
+      calcTextSizes (textWithoutHyphens, len - totalLenSignRemoved, style,
+                     numParts - 1, breakPosWithoutHyphens, wordSize);
 
       // Finished!
-      for (int i = 0; i < numHyphens + 1; i++) {
-         int start = (i == 0) ? 0 : hyphenPos[i - 1] + 2;
-         int end = (i == numHyphens) ? len : hyphenPos[i];
-         // Do not anymore hyphen automatically.
-         addText0 (text + start, end - start, false, style, &wordSize[i]);
+      for (int i = 0; i < numParts; i++) {
+         addText0 (text + partStart[i], partEnd[i] - partStart[i],
+                   // If this parts adjoins at least one division
+                   // characters, for which canBeHyphenated is set to
+                   // false (this is the case for soft hyphens), do
+                   // not hyphenate.
+                   canBeHyphenated[i] && canBeHyphenated[i + 1],
+                   style, &wordSize[i]);
 
          PRINTF("H... [%d] '", i);
-         for (int j = start; j < end; j++)
+         for (int j = partStart[i]; j < partEnd[i]; j++)
             PUTCHAR(text[j]);
          PRINTF("' added\n");
 
-         if(i < numHyphens) {
-            addHyphen ();
-            PRINTF("H... yphen added\n");
+         if(i < numParts - 1) {
+            Word *word = words->getLastRef();
+
+            word->badnessAndPenalty
+               .setPenalty (0, penalties[partPenaltyIndex[i]][0]);
+            word->badnessAndPenalty
+               .setPenalty (1, penalties[partPenaltyIndex[i]][1]);
+
+            if (signRemoved[i]) {
+               // Currently, only soft hyphens (UTF-8: "\xc2\xad") can
+               // be used. See also drawWord, last section "if
+               // (drawHyphen)".
+               // The character defined in DivSign::s could be used,
+               // but it must then also stored in the word.
+               word->hyphenWidth =
+                  layout->textWidth (word->style->font, "\xc2\xad", 2);
+               word->flags |= Word::DIV_CHAR_AT_EOL;
+            }
+
+            word->flags |= Word::DRAW_AS_ONE_TEXT;
+            accumulateWordData (words->size() - 1);
          }
       }
    }
@@ -1510,7 +1625,8 @@ void Textblock::calcTextSizes (const char *text, size_t textLen,
       PUTCHAR(text[i + lastStart]);
    PRINTF("' -> %d\n", wordSize[numBreaks].width);
 
-   // The rest is more complicated. TODO Documentation.
+   // The rest is more complicated. See dw-line-breaking, section
+   // "Hyphens".
    for (int i = numBreaks - 1; i >= 0; i--) {
       int start = (i == 0) ? 0 : breakPos[i - 1];
       calcTextSize (text + start, textLen - start, style, &wordSize[i]);
@@ -1533,6 +1649,11 @@ void Textblock::calcTextSizes (const char *text, size_t textLen,
 void Textblock::addText0 (const char *text, size_t len, bool canBeHyphenated,
                           core::style::Style *style, core::Requisition *size)
 {
+   //printf("[%p] addText0 ('", this);
+   //for (size_t i = 0; i < len; i++)
+   //   putchar(text[i]);
+   //printf("', %s, ...)\n", canBeHyphenated ? "true" : "false");
+
    Word *word = addWord (size->width, size->ascent, size->descent,
                          canBeHyphenated, style);
    word->content.type = core::Content::TEXT;
@@ -1683,36 +1804,24 @@ void Textblock::fillSpace (Word *word, core::style::Style *style)
  */
 void Textblock::setBreakOption (Word *word, core::style::Style *style)
 {
-   if (!word->badnessAndPenalty.lineMustBeBroken()) {
+   // TODO: lineMustBeBroken should be independent of the penalty
+   // index? Otherwise, examine the last line.
+   if (!word->badnessAndPenalty.lineMustBeBroken(0)) {
       switch (style->whiteSpace) {
       case core::style::WHITE_SPACE_NORMAL:
       case core::style::WHITE_SPACE_PRE_LINE:
       case core::style::WHITE_SPACE_PRE_WRAP:
-         word->badnessAndPenalty.setPenalty (0);
+         word->badnessAndPenalty.setBothPenalties (0);
          break;
 
       case core::style::WHITE_SPACE_PRE:
       case core::style::WHITE_SPACE_NOWRAP:
-         word->badnessAndPenalty.setPenaltyProhibitBreak ();
+         word->badnessAndPenalty.setBothPenaltiesProhibitBreak ();
          break;
       }
    }
 }
 
-void Textblock::addHyphen ()
-{
-   int wordIndex = words->size () - 1;
-
-   if (wordIndex >= 0) {
-      Word *word = words->getRef(wordIndex);
- 
-      word->badnessAndPenalty.setPenalty (HYPHEN_BREAK);
-      // TODO Optimize? Like spaces?
-      word->hyphenWidth = layout->textWidth (word->style->font, "\xc2\xad", 2);
-
-      accumulateWordData (wordIndex);
-   }
-}
 
 /**
  * Cause a paragraph break
@@ -1785,7 +1894,7 @@ void Textblock::addParbreak (int space, core::style::Style *style)
 
    word = addWord (0, 0, 0, false, style);
    word->content.type = core::Content::BREAK;
-   word->badnessAndPenalty.setPenaltyForceBreak ();
+   word->badnessAndPenalty.setBothPenaltiesForceBreak ();
    word->content.breakSpace = space;
    wordWrap (words->size () - 1, false);
 }
@@ -1808,7 +1917,7 @@ void Textblock::addLinebreak (core::style::Style *style)
       word = addWord (0, 0, 0, false, style);
 
    word->content.type = core::Content::BREAK;
-   word->badnessAndPenalty.setPenaltyForceBreak ();
+   word->badnessAndPenalty.setBothPenaltiesForceBreak ();
    word->content.breakSpace = 0;
    wordWrap (words->size () - 1, false);
 }