diff options
-rw-r--r-- | doc/dw-line-breaking.doc | 73 | ||||
-rw-r--r-- | dw/fltkviewbase.cc | 5 | ||||
-rw-r--r-- | dw/textblock.cc | 287 | ||||
-rw-r--r-- | dw/textblock.hh | 93 | ||||
-rw-r--r-- | dw/textblock_linebreaking.cc | 141 | ||||
-rw-r--r-- | src/dillo.cc | 4 | ||||
-rw-r--r-- | src/form.cc | 2 | ||||
-rw-r--r-- | src/html.cc | 2 | ||||
-rw-r--r-- | src/prefs.c | 6 | ||||
-rw-r--r-- | src/prefs.h | 2 | ||||
-rw-r--r-- | src/prefsparser.cc | 25 | ||||
-rw-r--r-- | test/hyphens-etc.html | 5 |
12 files changed, 478 insertions, 167 deletions
diff --git a/doc/dw-line-breaking.doc b/doc/dw-line-breaking.doc index 2967e98f..8d9a1df1 100644 --- a/doc/dw-line-breaking.doc +++ b/doc/dw-line-breaking.doc @@ -322,35 +322,11 @@ Bugs and Things Needing Improvement High Priority ------------- -**Bugs in hyphenation:** There seem to be problems when breaking -words containing hyphens already. Example: "Abtei-Stadt", which is -divided into "Abtei-" and "Stadt", resulting possibly in -"Abtei-<span></span>-[new line]Stadt". See also below under -"Medium Priority", on how to deal with hyphens and dashes. +None. Medium Priority --------------- -**Break hyphens and dashes:** The following rules seem to be relevant: - -- In English, an em-dash is used with no spaces around. Breaking - before and after the dash should be possible, perhaps with a - penalty > 0. (In German, an en-dash (Halbgeviert) with spaces around - is used instead.) -- After a hyphen, which is part of a compound word, a break should be - possible. As described above ("Abtei-Stadt"), this collides with - hyphenation. - -Where to implement? In the same dynamic, lazy way like hyphenation? As -part of hyphenation? - -Notice that Liang's algorithm may behave different regarding hyphens: -"Abtei-Stadt" is (using the patterns from CTAN) divided into "Abtei-" -and "Stadt", but "Nordrhein-Westfalen" is divided into "Nord", -"rhein-West", "fa", "len": the part containing the hyphen -("rhein-West") is untouched. (Sorry for the German words; if you have -got English examples, send them me.) - **Incorrect calculation of extremes:** The minimal width of a text block (as part of the width extremes, which are mainly used for tables) is defined by everything between two possible breaks. A @@ -410,8 +386,53 @@ lines will, when spaces are shrunken, get too long!) Analogous considerations must be made for right-aligned and centered text. (For centered texts, there are two adjustable spaces.) -**Hyphens in adjacent lines:** It should be simple to assign a larger +Solved (Must Be Documented) +--------------------------- + +These have been solved recently and should be documented above. + +*Bugs in hyphenation:* There seem to be problems when breaking words +containing hyphens already. Example: "Abtei-Stadt", which is divided +into "Abtei-" and "Stadt", resulting possibly in +"Abtei-<span></span>-[new line]Stadt". See also below under +"Medium Priority", on how to deal with hyphens and dashes. + +**Solution:** See next. + +*Break hyphens and dashes:* The following rules seem to be relevant: + +- In English, an em-dash is used with no spaces around. Breaking + before and after the dash should be possible, perhaps with a + penalty > 0. (In German, an en-dash (Halbgeviert) with spaces around + is used instead.) +- After a hyphen, which is part of a compound word, a break should be + possible. As described above ("Abtei-Stadt"), this collides with + hyphenation. + +Where to implement? In the same dynamic, lazy way like hyphenation? As +part of hyphenation? + +Notice that Liang's algorithm may behave different regarding hyphens: +"Abtei-Stadt" is (using the patterns from CTAN) divided into "Abtei-" +and "Stadt", but "Nordrhein-Westfalen" is divided into "Nord", +"rhein-West", "fa", "len": the part containing the hyphen +("rhein-West") is untouched. (Sorry for the German words; if you have +got English examples, send them me.)</div> + +**Solution for both:** This has been implemented in +dw::Textblock::addText, in a similar way to soft hyphens. Liang's +algorithm now only operates on the parts: "Abtei" and "Stadt"; +"Nordrhein" and "Westfalen". + +*Hyphens in adjacent lines:* It should be simple to assign a larger penalty for hyphens, when the line before is already hyphenated. This way, hyphens in adjacent lines are penalized further. +**Solved:** There are always two penalties. Must be documented in +detail. + +**Also:** + +- Configuration of penalties. + */ diff --git a/dw/fltkviewbase.cc b/dw/fltkviewbase.cc index d9782a4e..0977aac5 100644 --- a/dw/fltkviewbase.cc +++ b/dw/fltkviewbase.cc @@ -538,6 +538,11 @@ void FltkWidgetView::drawText (core::style::Font *font, core::style::Color::Shading shading, int X, int Y, const char *text, int len) { + //printf ("drawText (..., %d, %d, '", X, Y); + //for (int i = 0; i < len; i++) + // putchar (text[i]); + //printf ("'\n"); + FltkFont *ff = (FltkFont*)font; fl_font(ff->font, ff->size); fl_color(((FltkColor*)color)->colors[shading]); diff --git a/dw/textblock.cc b/dw/textblock.cc index b49362cd..5ba41068 100644 --- a/dw/textblock.cc +++ b/dw/textblock.cc @@ -40,6 +40,45 @@ namespace dw { int Textblock::CLASS_ID = -1; +Textblock::DivChar Textblock::divChars[NUM_DIV_CHARS] = { + { "\xc2\xad", true, false, true, PENALTY_HYPHEN, -1 }, + { "-", false, true, true, -1, PENALTY_HYPHEN }, + { "\xe2\x80\x94", false, true, false, + PENALTY_EM_DASH_LEFT, PENALTY_EM_DASH_RIGHT } +}; + +int Textblock::penalties[PENALTY_NUM][2] = { + { 100, 800 }, + { 800, 800 }, + { 100, 800 } +}; + +void Textblock::setPenaltyHyphen (int penaltyHyphen) +{ + penalties[PENALTY_HYPHEN][0] = penaltyHyphen; +} + +void Textblock::setPenaltyHyphen2 (int penaltyHyphen2) +{ + penalties[PENALTY_HYPHEN][1] = penaltyHyphen2; +} + +void Textblock::setPenaltyEmDashLeft (int penaltyLeftEmDash) +{ + penalties[PENALTY_EM_DASH_LEFT][0] = penaltyLeftEmDash; + penalties[PENALTY_EM_DASH_LEFT][1] = penaltyLeftEmDash; +} + +void Textblock::setPenaltyEmDashRight (int penaltyRightEmDash) +{ + penalties[PENALTY_EM_DASH_RIGHT][0] = penaltyRightEmDash; +} + +void Textblock::setPenaltyEmDashRight2 (int penaltyRightEmDash2) +{ + penalties[PENALTY_EM_DASH_RIGHT][1] = penaltyRightEmDash2; +} + Textblock::Textblock (bool limitTextWidth) { registerName ("dw::Textblock", &CLASS_ID); @@ -289,7 +328,8 @@ void Textblock::getExtremesImpl (core::Extremes *extremes) } // Minimum: between two *possible* breaks (or at the end). - if (word->badnessAndPenalty.lineCanBeBroken () || atLastWord) { + // TODO: Explain why index 1 is used in lineCanBeBroken(). + if (word->badnessAndPenalty.lineCanBeBroken (1) || atLastWord) { parMin += wordExtremes.minWidth + word->hyphenWidth; extremes->minWidth = misc::max (extremes->minWidth, parMin); parMin = 0; @@ -299,7 +339,9 @@ void Textblock::getExtremesImpl (core::Extremes *extremes) parMin += wordExtremes.minWidth + word->origSpace; // Maximum: between two *necessary* breaks (or at the end). - if (word->badnessAndPenalty.lineMustBeBroken () || atLastWord) { + // TODO: lineMustBeBroken should be independent of the + // penalty index? + if (word->badnessAndPenalty.lineMustBeBroken (1) || atLastWord) { parMax += wordExtremes.maxWidth + word->hyphenWidth; extremes->maxWidth = misc::max (extremes->maxWidth, parMax); parMax = 0; @@ -882,6 +924,7 @@ void Textblock::drawText(core::View *view, core::style::Style *style, break; } } + view->drawText(style->font, style->color, shading, x, y, str ? str : text + start, str ? strlen(str) : len); if (str) @@ -903,7 +946,7 @@ void Textblock::drawWord (Line *line, int wordIndex1, int wordIndex2, { core::style::Style *style = words->getRef(wordIndex1)->style; bool drawHyphen = wordIndex2 == line->lastWord - && words->getRef(wordIndex2)->hyphenWidth > 0; + && (words->getRef(wordIndex2)->flags & Word::DIV_CHAR_AT_EOL); if (style->hasBackground ()) { int w = 0; @@ -937,10 +980,16 @@ void Textblock::drawWord (Line *line, int wordIndex1, int wordIndex2, } if(drawHyphen) { +<<<<<<< local + // "\xc2\xad" is the UTF-8 code of a soft hyphen. + text[p++] = 0xc2; + text[p++] = 0xad; +======= // "\xe2\x80\x90" is an unconditional hyphen. text[p++] = 0xe2; text[p++] = 0x80; text[p++] = 0x90; +>>>>>>> other text[p++] = 0; } @@ -1123,7 +1172,8 @@ void Textblock::drawLine (Line *line, core::View *view, core::Rectangle *area) } else { int wordIndex2 = wordIndex; while (wordIndex2 < line->lastWord && - words->getRef(wordIndex2)->hyphenWidth > 0 && + (words->getRef(wordIndex2)->flags + & Word::DRAW_AS_ONE_TEXT) && word->style == words->getRef(wordIndex2 + 1)->style) wordIndex2++; @@ -1137,6 +1187,7 @@ void Textblock::drawLine (Line *line, core::View *view, core::Rectangle *area) word = words->getRef(wordIndex); } } + if (word->effSpace > 0 && wordIndex < line->lastWord && words->getRef(wordIndex + 1)->content.type != core::Content::BREAK) { @@ -1300,9 +1351,9 @@ void Textblock::fillWord (Word *word, int width, int ascent, int descent, word->origSpace = word->effSpace = word->stretchability = word->shrinkability = 0; word->hyphenWidth = 0; - word->badnessAndPenalty.setPenaltyProhibitBreak (); + word->badnessAndPenalty.setPenalty (PENALTY_PROHIBIT_BREAK); word->content.space = false; - word->canBeHyphenated = canBeHyphenated; + word->flags = canBeHyphenated ? Word::CAN_BE_HYPHENATED : 0; word->style = style; word->spaceStyle = style; @@ -1419,79 +1470,194 @@ void Textblock::calcTextSize (const char *text, size_t len, } /** - * Add a word to the page structure. If it contains soft hyphens, it is - * divided. + * Add a word to the page structure. If it contains dividing + * characters (hard or soft hyphens, em-dashes, etc.), it is divided. */ void Textblock::addText (const char *text, size_t len, core::style::Style *style) { PRINTF ("[%p] ADD_TEXT (%d characters)\n", this, (int)len); - // Count hyphens. - int numHyphens = 0; - for (int i = 0; i < (int)len - 1; i++) - // (0xc2, 0xad) is the UTF-8 representation of a soft hyphen (Unicode - // 0xc2). - if((unsigned char)text[i] == 0xc2 && (unsigned char)text[i + 1] == 0xad) - numHyphens++; - - if (numHyphens == 0) { - // Simple (and common) case: no soft hyphens. May still be hyphenated - // automatically. + // Count dividing characters. + int numParts = 1; + + for (int i = 0; i < (int)len; + i < (int)len && (i = layout->nextGlyph (text, i))) { + int foundDiv = -1; + for (int j = 0; foundDiv == -1 && j < NUM_DIV_CHARS; j++) { + int lDiv = strlen (divChars[j].s); + if (i <= (int)len - lDiv) { + if (memcmp (text + i, divChars[j].s, lDiv * sizeof (char)) == 0) + foundDiv = j; + } + } + + if (foundDiv != -1) { + if (divChars[foundDiv].penaltyIndexLeft != -1) + numParts ++; + if (divChars[foundDiv].penaltyIndexRight != -1) + numParts ++; + } + } + + if (numParts == 1) { + // Simple (and common) case: no dividing characters. May still + // be hyphenated automatically. core::Requisition size; calcTextSize (text, len, style, &size); addText0 (text, len, true, style, &size); } else { - PRINTF("HYPHENATION: '"); + PRINTF ("HYPHENATION: '"); for (size_t i = 0; i < len; i++) PUTCHAR(text[i]); - PRINTF("', with %d hyphen(s)\n", numHyphens); + PRINTF ("', with %d parts\n", numParts); // Store hyphen positions. - int n = 0, hyphenPos[numHyphens], breakPos[numHyphens]; - for (size_t i = 0; i < len - 1; i++) - if((unsigned char)text[i] == 0xc2 && - (unsigned char)text[i + 1] == 0xad) { - hyphenPos[n] = i; - breakPos[n] = i - 2 * n; - n++; + int n = 0, totalLenCharRemoved = 0; + int partPenaltyIndex[numParts - 1]; + int partStart[numParts], partEnd[numParts]; + bool charRemoved[numParts - 1], canBeHyphenated[numParts + 1]; + bool permDivChar[numParts - 1], unbreakableForMinWidth[numParts - 1]; + canBeHyphenated[0] = canBeHyphenated[numParts] = true; + partStart[0] = 0; + partEnd[numParts - 1] = len; + + for (int i = 0; i < (int)len; + i < (int)len && (i = layout->nextGlyph (text, i))) { + int foundDiv = -1; + for (int j = 0; foundDiv == -1 && j < NUM_DIV_CHARS; j++) { + int lDiv = strlen (divChars[j].s); + if (i <= (int)len - lDiv) { + if (memcmp (text + i, divChars[j].s, lDiv * sizeof (char)) == 0) + foundDiv = j; + } } + + if (foundDiv != -1) { + int lDiv = strlen (divChars[foundDiv].s); + + if (divChars[foundDiv].charRemoved) { + assert (divChars[foundDiv].penaltyIndexLeft != -1); + assert (divChars[foundDiv].penaltyIndexRight == -1); + + partPenaltyIndex[n] = divChars[foundDiv].penaltyIndexLeft; + charRemoved[n] = true; + permDivChar[n] = false; + unbreakableForMinWidth[n] = + divChars[foundDiv].unbreakableForMinWidth; + canBeHyphenated[n + 1] = divChars[foundDiv].canBeHyphenated; + partEnd[n] = i; + partStart[n + 1] = i + lDiv; + n++; + totalLenCharRemoved += lDiv; + } else { + assert (divChars[foundDiv].penaltyIndexLeft != -1 || + divChars[foundDiv].penaltyIndexRight != -1); + + if (divChars[foundDiv].penaltyIndexLeft != -1) { + partPenaltyIndex[n] = divChars[foundDiv].penaltyIndexLeft; + charRemoved[n] = false; + permDivChar[n] = false; + unbreakableForMinWidth[n] = + divChars[foundDiv].unbreakableForMinWidth; + canBeHyphenated[n + 1] = divChars[foundDiv].canBeHyphenated; + partEnd[n] = i; + partStart[n + 1] = i; + n++; + } - // Get text without hyphens. (There are numHyphens + 1 parts in the word, - // and 2 * numHyphens bytes less, 2 for each hyphen, are needed.) - char textWithoutHyphens[len - 2 * numHyphens]; - int start = 0; // related to "text" - for (int i = 0; i < numHyphens + 1; i++) { - int end = (i == numHyphens) ? len : hyphenPos[i]; - memmove (textWithoutHyphens + start - 2 * i, text + start, - end - start); - start = end + 2; + if (divChars[foundDiv].penaltyIndexRight != -1) { + partPenaltyIndex[n] = divChars[foundDiv].penaltyIndexRight; + charRemoved[n] = false; + permDivChar[n] = true; + unbreakableForMinWidth[n] = + divChars[foundDiv].unbreakableForMinWidth; + canBeHyphenated[n + 1] = divChars[foundDiv].canBeHyphenated; + partEnd[n] = i + lDiv; + partStart[n + 1] = i + lDiv; + n++; + } + } + } + } + + // Get text without removed characters, e. g. hyphens. + const char *textWithoutHyphens; + char textWithoutHyphensBuf[len - totalLenCharRemoved]; + int *breakPosWithoutHyphens, breakPosWithoutHyphensBuf[numParts - 1]; + + if (totalLenCharRemoved == 0) { + // No removed characters: take original arrays. + textWithoutHyphens = text; + // Ends are also break positions, except the last end, which + // is superfluous, but does not harm (since arrays in C/C++ + // does not have an implicit length). + breakPosWithoutHyphens = partEnd; + } else { + // Copy into special buffers. + textWithoutHyphens = textWithoutHyphensBuf; + breakPosWithoutHyphens = breakPosWithoutHyphensBuf; + + int n = 0; + for (int i = 0; i < numParts; i++) { + memmove (textWithoutHyphensBuf + n, text + partStart[i], + partEnd[i] - partStart[i]); + n += partEnd[i] - partStart[i]; + if (i < numParts - 1) + breakPosWithoutHyphensBuf[i] = n; + } } PRINTF("H... without hyphens: '"); - for (size_t i = 0; i < len - 2 * numHyphens; i++) + for (size_t i = 0; i < len - totalLenCharRemoved; i++) PUTCHAR(textWithoutHyphens[i]); PRINTF("'\n"); - core::Requisition wordSize[numHyphens + 1]; - calcTextSizes (textWithoutHyphens, len - 2 * numHyphens, style, - numHyphens, breakPos, wordSize); + core::Requisition wordSize[numParts]; + calcTextSizes (textWithoutHyphens, len - totalLenCharRemoved, style, + numParts - 1, breakPosWithoutHyphens, wordSize); // Finished! - for (int i = 0; i < numHyphens + 1; i++) { - int start = (i == 0) ? 0 : hyphenPos[i - 1] + 2; - int end = (i == numHyphens) ? len : hyphenPos[i]; - // Do not anymore hyphen automatically. - addText0 (text + start, end - start, false, style, &wordSize[i]); + for (int i = 0; i < numParts; i++) { + addText0 (text + partStart[i], partEnd[i] - partStart[i], + // If this parts adjoins at least one division + // characters, for which canBeHyphenated is set to + // false (this is the case for soft hyphens), do + // not hyphenate. + canBeHyphenated[i] && canBeHyphenated[i + 1], + style, &wordSize[i]); PRINTF("H... [%d] '", i); - for (int j = start; j < end; j++) + for (int j = partStart[i]; j < partEnd[i]; j++) PUTCHAR(text[j]); PRINTF("' added\n"); - if(i < numHyphens) { - addHyphen (); - PRINTF("H... yphen added\n"); + if(i < numParts - 1) { + Word *word = words->getLastRef(); + + word->badnessAndPenalty + .setPenalties (penalties[partPenaltyIndex[i]][0], + penalties[partPenaltyIndex[i]][1]); + + if (charRemoved[i]) { + // Currently, only soft hyphens (UTF-8: "\xc2\xad") can + // be used. See also drawWord, last section "if + // (drawHyphen)". + // The character defined in DivChar::s could be used, + // but it must then also stored in the word. + word->hyphenWidth = + layout->textWidth (word->style->font, "\xc2\xad", 2); + word->flags |= Word::DIV_CHAR_AT_EOL; + } + + if (permDivChar[i]) + word->flags |= Word::PERM_DIV_CHAR; + if (unbreakableForMinWidth[i]) + word->flags |= Word::UNBREAKABLE_FOR_MIN_WIDTH; + + word->flags |= Word::DRAW_AS_ONE_TEXT; + + accumulateWordData (words->size() - 1); } } } @@ -1512,7 +1678,8 @@ void Textblock::calcTextSizes (const char *text, size_t textLen, PUTCHAR(text[i + lastStart]); PRINTF("' -> %d\n", wordSize[numBreaks].width); - // The rest is more complicated. TODO Documentation. + // The rest is more complicated. See dw-line-breaking, section + // "Hyphens". for (int i = numBreaks - 1; i >= 0; i--) { int start = (i == 0) ? 0 : breakPos[i - 1]; calcTextSize (text + start, textLen - start, style, &wordSize[i]); @@ -1535,6 +1702,11 @@ void Textblock::calcTextSizes (const char *text, size_t textLen, void Textblock::addText0 (const char *text, size_t len, bool canBeHyphenated, core::style::Style *style, core::Requisition *size) { + //printf("[%p] addText0 ('", this); + //for (size_t i = 0; i < len; i++) + // putchar(text[i]); + //printf("', %s, ...)\n", canBeHyphenated ? "true" : "false"); + Word *word = addWord (size->width, size->ascent, size->descent, canBeHyphenated, style); word->content.type = core::Content::TEXT; @@ -1685,7 +1857,9 @@ void Textblock::fillSpace (Word *word, core::style::Style *style) */ void Textblock::setBreakOption (Word *word, core::style::Style *style) { - if (!word->badnessAndPenalty.lineMustBeBroken()) { + // TODO: lineMustBeBroken should be independent of the penalty + // index? Otherwise, examine the last line. + if (!word->badnessAndPenalty.lineMustBeBroken(0)) { switch (style->whiteSpace) { case core::style::WHITE_SPACE_NORMAL: case core::style::WHITE_SPACE_PRE_LINE: @@ -1695,12 +1869,14 @@ void Textblock::setBreakOption (Word *word, core::style::Style *style) case core::style::WHITE_SPACE_PRE: case core::style::WHITE_SPACE_NOWRAP: - word->badnessAndPenalty.setPenaltyProhibitBreak (); + word->badnessAndPenalty.setPenalty (PENALTY_PROHIBIT_BREAK); break; } } } +<<<<<<< local +======= void Textblock::addHyphen () { int wordIndex = words->size () - 1; @@ -1717,6 +1893,7 @@ void Textblock::addHyphen () accumulateWordData (wordIndex); } } +>>>>>>> other /** * Cause a paragraph break @@ -1789,7 +1966,7 @@ void Textblock::addParbreak (int space, core::style::Style *style) word = addWord (0, 0, 0, false, style); word->content.type = core::Content::BREAK; - word->badnessAndPenalty.setPenaltyForceBreak (); + word->badnessAndPenalty.setPenalty (PENALTY_FORCE_BREAK); word->content.breakSpace = space; wordWrap (words->size () - 1, false); } @@ -1812,7 +1989,7 @@ void Textblock::addLinebreak (core::style::Style *style) word = addWord (0, 0, 0, false, style); word->content.type = core::Content::BREAK; - word->badnessAndPenalty.setPenaltyForceBreak (); + word->badnessAndPenalty.setPenalty (PENALTY_FORCE_BREAK); word->content.breakSpace = 0; wordWrap (words->size () - 1, false); } diff --git a/dw/textblock.hh b/dw/textblock.hh index 9bf0259f..5359a46d 100644 --- a/dw/textblock.hh +++ b/dw/textblock.hh @@ -152,14 +152,19 @@ private: * badness is not well defined, so fiddling with the penalties is a * bit difficult. */ + + enum { + PENALTY_FORCE_BREAK = INT_MIN, + PENALTY_PROHIBIT_BREAK = INT_MAX + }; + class BadnessAndPenalty { private: enum { NOT_STRETCHABLE, QUITE_LOOSE, BADNESS_VALUE, TOO_TIGHT } badnessState; - enum { FORCE_BREAK, PROHIBIT_BREAK, PENALTY_VALUE } penaltyState; int ratio; // ratio is only defined when badness is defined - int badness, penalty; + int badness, penalty[2]; // For debugging: define DEBUG for more informations in print(). #ifdef DEBUG @@ -191,36 +196,40 @@ private: // etc. works. }; + void setSinglePenalty (int index, int penalty); int badnessValue (int infLevel); - int penaltyValue (int infLevel); + int penaltyValue (int index, int infLevel); public: void calcBadness (int totalWidth, int idealWidth, int totalStretchability, int totalShrinkability); - void setPenalty (int penalty); - void setPenaltyProhibitBreak (); - void setPenaltyForceBreak (); + inline void setPenalty (int penalty) { setPenalties (penalty, penalty); } + void setPenalties (int penalty1, int penalty2); bool lineLoose (); bool lineTight (); bool lineTooTight (); - bool lineMustBeBroken (); - bool lineCanBeBroken (); - int compareTo (BadnessAndPenalty *other); + bool lineMustBeBroken (int penaltyIndex); + bool lineCanBeBroken (int penaltyIndex); + int compareTo (int penaltyIndex, BadnessAndPenalty *other); void print (); }; -protected: - enum { - /** - * The penalty for hyphens, multiplied by 100. So, 100 means - * 1.0. See dw::Textblock::BadnessAndPenalty::setPenalty for - * more details. - */ - HYPHEN_BREAK = 100 - }; + enum { PENALTY_HYPHEN, PENALTY_EM_DASH_LEFT, PENALTY_EM_DASH_RIGHT, + PENALTY_NUM }; + enum { NUM_DIV_CHARS = 3}; + + typedef struct + { + const char *s; + bool charRemoved, canBeHyphenated, unbreakableForMinWidth; + int penaltyIndexLeft, penaltyIndexRight; + } DivChar; + static DivChar divChars[NUM_DIV_CHARS]; + +protected: struct Line { int firstWord; /* first word's index in word vector */ @@ -268,6 +277,26 @@ protected: struct Word { + enum { + /** Can be hyphenated automatically. (Cleared after + * hyphenation.) */ + CAN_BE_HYPHENATED = 1 << 0, + /** Must be drawn with a hyphen, when at the end of the line. */ + DIV_CHAR_AT_EOL = 1 << 1, + /** Is or ends with a "division character", which is part of + * the word. */ + PERM_DIV_CHAR = 1 << 2, + /** This word must be drawn, together with the following + * word(s), by only one call of View::drawText(), to get + * kerning, ligatures etc. right. The last of the words drawn + * as one text does *not* have this flag set. */ + DRAW_AS_ONE_TEXT = 1 << 3, + /* When calculating the minimal width (as part of extremes), + * do not consider this word as breakable. This flag is + * ignored when the line is actually broken. */ + UNBREAKABLE_FOR_MIN_WIDTH = 1 << 4, + }; + /* TODO: perhaps add a xLeft? */ core::Requisition size; /* Space after the word, only if it's not a break: */ @@ -281,8 +310,9 @@ protected: * this is the last word of the line, and * "hyphenWidth > 0" is also used to decide * whether to draw a hyphen. */ + short flags; + short penaltyIndex; core::Content content; - bool canBeHyphenated; // accumulated values, relative to the beginning of the line int totalWidth; /* The sum of all word widths; plus all @@ -363,6 +393,14 @@ protected: bool mustQueueResize; + /** + * The penalties for hyphens and other, multiplied by 100. So, 100 + * means 1.0. INT_MAX and INT_MIN are also allowed. See + * dw::Textblock::BadnessAndPenalty::setPenalty for more + * details. Set from preferences. + */ + static int penalties[PENALTY_NUM][2]; + bool limitTextWidth; /* from preferences */ int redrawY; @@ -483,6 +521,16 @@ protected: { return lineYOffsetCanvas (lines->getRef (lineIndex)); } + + inline int calcPenaltyIndexForNewLine () + { + if (lines->size() == 0) + return 0; + else + return + (words->getRef(lines->getLastRef()->lastWord)->flags & + (Word::DIV_CHAR_AT_EOL | Word::PERM_DIV_CHAR)) ? 1 : 0; + } bool sendSelectionEvent (core::SelectionState::EventType eventType, core::MousePositionEvent *event); @@ -527,6 +575,12 @@ protected: public: static int CLASS_ID; + static void setPenaltyHyphen (int penaltyHyphen); + static void setPenaltyHyphen2 (int penaltyHyphen2); + static void setPenaltyEmDashLeft (int penaltyLeftEmDash); + static void setPenaltyEmDashRight (int penaltyRightEmDash); + static void setPenaltyEmDashRight2 (int penaltyRightEmDash2); + Textblock(bool limitTextWidth); ~Textblock(); @@ -554,7 +608,6 @@ public: setBreakOption (words->getRef(wordIndex), style); } - void addHyphen(); void addParbreak (int space, core::style::Style *style); void addLinebreak (core::style::Style *style); diff --git a/dw/textblock_linebreaking.cc b/dw/textblock_linebreaking.cc index a203f5a4..cbfd4fa7 100644 --- a/dw/textblock_linebreaking.cc +++ b/dw/textblock_linebreaking.cc @@ -31,22 +31,14 @@ int Textblock::BadnessAndPenalty::badnessValue (int infLevel) return 0; } -int Textblock::BadnessAndPenalty::penaltyValue (int infLevel) +int Textblock::BadnessAndPenalty::penaltyValue (int index, int infLevel) { - switch (penaltyState) { - case FORCE_BREAK: + if (penalty[index] == INT_MIN) return infLevel == INF_PENALTIES ? -1 : 0; - - case PROHIBIT_BREAK: + else if (penalty[index] == INT_MAX) return infLevel == INF_PENALTIES ? 1 : 0; - - case PENALTY_VALUE: - return infLevel == INF_VALUE ? penalty : 0; - } - - // compiler happiness - lout::misc::assertNotReached (); - return 0; + else + return infLevel == INF_VALUE ? penalty[index] : 0; } void Textblock::BadnessAndPenalty::calcBadness (int totalWidth, int idealWidth, @@ -98,6 +90,9 @@ void Textblock::BadnessAndPenalty::calcBadness (int totalWidth, int idealWidth, * to deal with fractional numbers, without having to use floating * point numbers. So, to set a penalty to 0.5, pass 50. * + * INT_MAX and INT_MIN (representing inf and -inf, respectively) are + * also allowed. + * * The definition of penalties depends on the definition of badness, * which adheres to the description in \ref dw-line-breaking, section * "Criteria for Line-Breaking". The exact calculation may vary, but @@ -105,27 +100,28 @@ void Textblock::BadnessAndPenalty::calcBadness (int totalWidth, int idealWidth, * fitting line has a badness of 0. (ii) A line, where all spaces * are extended by exactly the stretchability, as well as a line, where * all spaces are reduced by the shrinkability, have a badness of 1. + * + * (TODO plural: penalties, not penalty. Correct above comment) */ -void Textblock::BadnessAndPenalty::setPenalty (int penalty) -{ - // This factor consists of: (i) 100^3, since in calcBadness(), the - // ratio is multiplied with 100 (again, to use integer numbers for - // fractional numbers), and the badness (which has to be compared - // to the penalty!) is the third power or it; (ii) the denominator - // 100, of course, since 100 times the penalty is passed to this - // method. - this->penalty = penalty * (100 * 100 * 100 / 100); - penaltyState = PENALTY_VALUE; -} - -void Textblock::BadnessAndPenalty::setPenaltyProhibitBreak () +void Textblock::BadnessAndPenalty::setPenalties (int penalty1, int penalty2) { - penaltyState = PROHIBIT_BREAK; + // TODO Check here some cases, e.g. both or no penalty INT_MIN. + setSinglePenalty(0, penalty1); + setSinglePenalty(1, penalty2); } -void Textblock::BadnessAndPenalty::setPenaltyForceBreak () +void Textblock::BadnessAndPenalty::setSinglePenalty (int index, int penalty) { - penaltyState = FORCE_BREAK; + if (penalty == INT_MAX || penalty == INT_MIN) + this->penalty[index] = penalty; + else + // This factor consists of: (i) 100^3, since in calcBadness(), the + // ratio is multiplied with 100 (again, to use integer numbers for + // fractional numbers), and the badness (which has to be compared + // to the penalty!) is the third power or it; (ii) the denominator + // 100, of course, since 100 times the penalty is passed to this + // method. + this->penalty[index] = penalty * (100 * 100 * 100 / 100); } bool Textblock::BadnessAndPenalty::lineLoose () @@ -147,21 +143,23 @@ bool Textblock::BadnessAndPenalty::lineTooTight () } -bool Textblock::BadnessAndPenalty::lineMustBeBroken () +bool Textblock::BadnessAndPenalty::lineMustBeBroken (int penaltyIndex) { - return penaltyState == FORCE_BREAK; + return penalty[penaltyIndex] == PENALTY_FORCE_BREAK; } -bool Textblock::BadnessAndPenalty::lineCanBeBroken () +bool Textblock::BadnessAndPenalty::lineCanBeBroken (int penaltyIndex) { - return penaltyState != PROHIBIT_BREAK; + return penalty[penaltyIndex] != PENALTY_PROHIBIT_BREAK; } -int Textblock::BadnessAndPenalty::compareTo (BadnessAndPenalty *other) +int Textblock::BadnessAndPenalty::compareTo (int penaltyIndex, + BadnessAndPenalty *other) { for (int l = INF_MAX; l >= 0; l--) { - int thisValue = badnessValue (l) + penaltyValue (l); - int otherValue = other->badnessValue (l) + other->penaltyValue (l); + int thisValue = badnessValue (l) + penaltyValue (penaltyIndex, l); + int otherValue = + other->badnessValue (l) + other->penaltyValue (penaltyIndex, l); if (thisValue != otherValue) return thisValue - otherValue; @@ -197,19 +195,19 @@ void Textblock::BadnessAndPenalty::print () printf (" <no debug> + "); #endif - switch (penaltyState) { - case FORCE_BREAK: - printf ("-inf"); - break; - - case PROHIBIT_BREAK: - printf ("inf"); - break; + printf ("("); + for (int i = 0; i < 2; i++) { + if (penalty[i] == INT_MIN) + printf ("-inf"); + else if (penalty[i] == INT_MAX) + printf ("inf"); + else + printf ("%d", penalty[i]); - case PENALTY_VALUE: - printf ("%d", penalty); - break; + if (i == 0) + printf (", "); } + printf (")"); } void Textblock::printWord (Word *word) @@ -219,15 +217,17 @@ void Textblock::printWord (Word *word) printf ("\"%s\"", word->content.text); break; case core::Content::WIDGET: - printf ("<widget: %p>\n", word->content.widget); + printf ("<widget: %p>", word->content.widget); break; case core::Content::BREAK: - printf ("<break>\n"); + printf ("<break>"); break; default: - printf ("<?>\n"); + printf ("<?>"); break; } + + printf (" (flags = %d)", word->flags); printf (" [%d / %d + %d - %d => %d + %d - %d] => ", word->size.width, word->origSpace, word->stretchability, @@ -356,7 +356,9 @@ Textblock::Line *Textblock::addLine (int firstWord, int lastWord, line->maxParMin = misc::max (maxOfMinWidth, prevLine->maxParMin); Word *lastWordOfPrevLine = words->getRef (prevLine->lastWord); - if (lastWordOfPrevLine->badnessAndPenalty.lineMustBeBroken ()) + // TODO: lineMustBeBroken should be independent of the penalty + // index? Otherwise, examine the last line. + if (lastWordOfPrevLine->badnessAndPenalty.lineMustBeBroken (0)) // This line starts a new paragraph. line->parMax = sumOfMaxWidth; else @@ -369,7 +371,9 @@ Textblock::Line *Textblock::addLine (int firstWord, int lastWord, // "maxParMax" is only set, when this line is the last line of the // paragraph. Word *lastWordOfThisLine = words->getRef (line->lastWord); - if (lastWordOfThisLine->badnessAndPenalty.lineMustBeBroken ()) + // TODO: lineMustBeBroken should be independent of the penalty + // index? Otherwise, examine the last line. + if (lastWordOfThisLine->badnessAndPenalty.lineMustBeBroken (0)) // Paragraph ends here. line->maxParMax = misc::max (lastMaxParMax, @@ -420,7 +424,8 @@ void Textblock::accumulateWordExtremes (int firstWord, int lastWord, // Minimum: between two *possible* breaks (or at the end). // TODO This is redundant to getExtremesImpl(). - if (word->badnessAndPenalty.lineCanBeBroken () || atLastWord) { + // TODO: Again, index 1 is used for lineCanBeBroken(). See getExtremes(). + if (word->badnessAndPenalty.lineCanBeBroken (1) || atLastWord) { parMin += extremes.minWidth + word->hyphenWidth; *maxOfMinWidth = misc::max (*maxOfMinWidth, parMin); parMin = 0; @@ -465,6 +470,8 @@ void Textblock::wordWrap (int wordIndex, bool wrapAll) accumulateWordData (wordIndex); + int penaltyIndex = calcPenaltyIndexForNewLine (); + bool newLine; do { bool tempNewLine = false; @@ -478,14 +485,16 @@ void Textblock::wordWrap (int wordIndex, bool wrapAll) tempNewLine = true; PRINTF (" NEW LINE: last word\n"); } else if (wordIndex >= firstIndex && - word->badnessAndPenalty.lineMustBeBroken ()) { + // TODO: lineMustBeBroken should be independent of + // the penalty index? + word->badnessAndPenalty.lineMustBeBroken (penaltyIndex)) { newLine = true; searchUntil = wordIndex; PRINTF (" NEW LINE: forced break\n"); } else if (wordIndex > firstIndex && word->badnessAndPenalty.lineTooTight () && words->getRef(wordIndex- 1) - ->badnessAndPenalty.lineCanBeBroken ()) { + ->badnessAndPenalty.lineCanBeBroken (penaltyIndex)) { // TODO Comment the last condition (also below where the minimum is // searched for) newLine = true; @@ -524,7 +533,8 @@ void Textblock::wordWrap (int wordIndex, bool wrapAll) if (breakPos == -1 || w->badnessAndPenalty.compareTo - (&words->getRef(breakPos)->badnessAndPenalty) <= 0) + (penaltyIndex, + &words->getRef(breakPos)->badnessAndPenalty) <= 0) // "<=" instead of "<" in the next lines tends to result in // more words per line -- theoretically. Practically, the // case "==" will never occur. @@ -548,7 +558,8 @@ void Textblock::wordWrap (int wordIndex, bool wrapAll) BadnessAndPenalty correctedBap = lastWord->badnessAndPenalty; correctedBap.setPenalty (0); if (correctedBap.compareTo - (&words->getRef(breakPos)->badnessAndPenalty) <= 0) { + (penaltyIndex, + &words->getRef(breakPos)->badnessAndPenalty) <= 0) { breakPos = searchUntil; PRINTF (" corrected: breakPos = %d\n", breakPos); } @@ -561,7 +572,7 @@ void Textblock::wordWrap (int wordIndex, bool wrapAll) PRINTF ("\n"); if (word1->badnessAndPenalty.lineTight () && - word1->canBeHyphenated && + (word1->flags & Word::CAN_BE_HYPHENATED) && word1->style->x_lang[0] && word1->content.type == core::Content::TEXT && Hyphenator::isHyphenationCandidate (word1->content.text)) @@ -570,7 +581,7 @@ void Textblock::wordWrap (int wordIndex, bool wrapAll) if (word1->badnessAndPenalty.lineLoose () && breakPos + 1 < words->size ()) { Word *word2 = words->getRef(breakPos + 1); - if (word2->canBeHyphenated && + if ((word2->flags & Word::CAN_BE_HYPHENATED) && word2->style->x_lang[0] && word2->content.type == core::Content::TEXT && Hyphenator::isHyphenationCandidate (word2->content.text)) @@ -587,6 +598,7 @@ void Textblock::wordWrap (int wordIndex, bool wrapAll) tempNewLine ? "temporally" : "permanently", firstIndex, breakPos); lineAdded = true; + penaltyIndex = calcPenaltyIndexForNewLine (); } else { // TODO hyphenateWord() should return whether something has // changed at all. So that a second run, with @@ -690,10 +702,14 @@ int Textblock::hyphenateWord (int wordIndex) // Note: there are numBreaks + 1 word parts. if (i < numBreaks) { // TODO There should be a method fillHyphen. - w->badnessAndPenalty.setPenalty (HYPHEN_BREAK); + w->badnessAndPenalty.setPenalties (penalties[PENALTY_HYPHEN][0], + penalties[PENALTY_HYPHEN][1]); // "\xe2\x80\x90" is an unconditional hyphen. w->hyphenWidth = layout->textWidth (origWord.style->font, "\xe2\x80\x90", 3); + w->flags |= (Word::DRAW_AS_ONE_TEXT | Word::DIV_CHAR_AT_EOL | + Word::UNBREAKABLE_FOR_MIN_WIDTH); + PRINTF (" [%d] + hyphen\n", wordIndex + i); } else { if (origWord.content.space) { @@ -714,9 +730,8 @@ int Textblock::hyphenateWord (int wordIndex) origWord.spaceStyle->unref (); free (breakPos); - } else { - words->getRef(wordIndex)->canBeHyphenated = false; - } + } else + words->getRef(wordIndex)->flags &= ~Word::CAN_BE_HYPHENATED; return numBreaks; } diff --git a/src/dillo.cc b/src/dillo.cc index 173ac036..9bab589c 100644 --- a/src/dillo.cc +++ b/src/dillo.cc @@ -50,6 +50,7 @@ #include "auth.h" #include "dw/fltkcore.hh" +#include "dw/textblock.hh" /* * Command line options structure @@ -359,6 +360,9 @@ int main(int argc, char **argv) a_Cookies_init(); a_Auth_init(); + dw::Textblock::setPenaltyHyphen (prefs.penalty_hyphen); + dw::Textblock::setPenaltyHyphen2 (prefs.penalty_hyphen_2); + /* command line options override preferences */ if (options_got & DILLO_CLI_FULLWINDOW) prefs.fullwindow_start = TRUE; diff --git a/src/form.cc b/src/form.cc index d5bd71da..92e06ab1 100644 --- a/src/form.cc +++ b/src/form.cc @@ -870,7 +870,7 @@ void Html_tag_open_button(DilloHtml *html, const char *tag, int tagsize) html->styleEngine->setNonCssHint (PROPERTY_X_TOOLTIP, CSS_TYPE_STRING, attrbuf); } - /* We used to have Textblock (prefs.limit_text_width) here, + /* We used to have Textblock (prefs.limit_text_width, ...) here, * but it caused 100% CPU usage. */ page = new Textblock (false); diff --git a/src/html.cc b/src/html.cc index 0efe1bbf..44023e37 100644 --- a/src/html.cc +++ b/src/html.cc @@ -458,7 +458,7 @@ void DilloHtml::initDw() dReturn_if_fail (dw == NULL); /* Create the main widget */ - dw = stack->getRef(0)->textblock = new Textblock (prefs.limit_text_width); + dw = stack->getRef(0)->textblock = new Textblock (prefs.limit_text_width); bw->num_page_bugs = 0; dStr_truncate(bw->page_bugs, 0); diff --git a/src/prefs.c b/src/prefs.c index a7fa1bcf..88d10a8d 100644 --- a/src/prefs.c +++ b/src/prefs.c @@ -101,6 +101,12 @@ void a_Prefs_init(void) prefs.start_page = a_Url_new(PREFS_START_PAGE, NULL); prefs.theme = dStrdup(PREFS_THEME); prefs.w3c_plus_heuristics = TRUE; + + prefs.penalty_hyphen = 100; + prefs.penalty_hyphen_2 = 100; + prefs.penalty_em_dash_left = 800; + prefs.penalty_em_dash_right = 100; + prefs.penalty_em_dash_right_2 = 800; } /* diff --git a/src/prefs.h b/src/prefs.h index 7622aea3..0c392ae5 100644 --- a/src/prefs.h +++ b/src/prefs.h @@ -89,6 +89,8 @@ struct _DilloPrefs { bool_t show_msg; bool_t show_extra_warnings; bool_t middle_click_drags_page; + int penalty_hyphen, penalty_hyphen_2; + int penalty_em_dash_left, penalty_em_dash_right, penalty_em_dash_right_2; }; /* Global Data */ diff --git a/src/prefsparser.cc b/src/prefsparser.cc index aa810b1e..6eb8c11d 100644 --- a/src/prefsparser.cc +++ b/src/prefsparser.cc @@ -12,6 +12,8 @@ #include <sys/types.h> #include <stdlib.h> #include <locale.h> /* for setlocale */ +#include <math.h> /* for isinf */ +#include <limits.h> #include "prefs.h" #include "misc.h" @@ -28,6 +30,7 @@ typedef enum { PREFS_URL, PREFS_INT32, PREFS_DOUBLE, + PREFS_FRACTION_100, PREFS_GEOMETRY, PREFS_PANEL_SIZE } PrefType_t; @@ -107,7 +110,15 @@ int PrefsParser::parseOption(char *name, char *value) { "small_icons", &prefs.small_icons, PREFS_BOOL }, { "start_page", &prefs.start_page, PREFS_URL }, { "theme", &prefs.theme, PREFS_STRING }, - { "w3c_plus_heuristics", &prefs.w3c_plus_heuristics, PREFS_BOOL } + { "w3c_plus_heuristics", &prefs.w3c_plus_heuristics, PREFS_BOOL }, + { "penalty_hyphen", &prefs.penalty_hyphen, PREFS_FRACTION_100 }, + { "penalty_hyphen_2", &prefs.penalty_hyphen_2, PREFS_FRACTION_100 }, + { "penalty_em_dash_left", &prefs.penalty_em_dash_left, + PREFS_FRACTION_100 }, + { "penalty_em_dash_right", &prefs.penalty_em_dash_right, + PREFS_FRACTION_100 }, + { "penalty_em_dash_right_2", &prefs.penalty_em_dash_right_2, + PREFS_FRACTION_100 } }; node = NULL; @@ -160,6 +171,18 @@ int PrefsParser::parseOption(char *name, char *value) case PREFS_DOUBLE: *(double *)node->pref = strtod(value, NULL); break; + case PREFS_FRACTION_100: + { + double d = strtod (value, NULL); + if (isinf(d)) { + if (d > 0) + *(int*)node->pref = INT_MAX; + else + *(int*)node->pref = INT_MIN; + } else + *(int*)node->pref = 100 * d; + } + break; case PREFS_GEOMETRY: a_Misc_parse_geometry(value, &prefs.xpos, &prefs.ypos, &prefs.width, &prefs.height); diff --git a/test/hyphens-etc.html b/test/hyphens-etc.html new file mode 100644 index 00000000..a13e68d4 --- /dev/null +++ b/test/hyphens-etc.html @@ -0,0 +1,5 @@ +<p>Abcde­abcde­abcde­abcde­abcde­abcde­abcde­abcde­abcde­abcde­abcde­abcde­abcde­abcde­abcde­abcde­abcde­abcde­abcde­abcde­abcde­abcde­abcde­abcde­abcde­abcde­abcde­abcde­abcde­abcde­abcde­abcde­abcde­abcde­abcde­abcde­abcde­abcde­abcde­abcde­abcde­abcde­abcde­abcde­abcde­abcde</p> +<p>Abcde-abcde-abcde-abcde-abcde-abcde-abcde-abcde-abcde-abcde-abcde-abcde-abcde-abcde-abcde-abcde-abcde-abcde-abcde-abcde-abcde-abcde-abcde-abcde-abcde-abcde-abcde-abcde-abcde-abcde-abcde-abcde-abcde-abcde-abcde-abcde-abcde-abcde-abcde-abcde-abcde-abcde-abcde-abcde-abcde-abcde</p> +<p>Abcde—abcde—abcde—abcde—abcde—abcde—abcde—abcde—abcde—abcde—abcde—abcde—abcde—abcde—abcde—abcde—abcde—abcde—abcde—abcde—abcde—abcde—abcde—abcde—abcde—abcde—abcde—abcde—abcde—abcde—abcde—abcde—abcde—abcde—abcde—abcde—abcde—abcde—abcde—abcde—abcde—abcde—abcde—abcde—abcde—abcde</p> +<p lang="de">Nordrhein-Westfalen—Nordrhein-Westfalen—Nordrhein-Westfalen—Nordrhein-Westfalen—Nordrhein-Westfalen—Nordrhein-Westfalen—Nordrhein-Westfalen—Nordrhein-Westfalen—Nordrhein-Westfalen—Nordrhein-Westfalen—Nordrhein-Westfalen—Nordrhein-Westfalen</p> +<p lang="de">Nord­rheinwestfalen—Nord­rheinwestfalen—Nord­rheinwestfalen—Nord­rheinwestfalen—Nord­rheinwestfalen—Nord­rheinwestfalen—Nord­rheinwestfalen—Nord­rheinwestfalen—Nord­rheinwestfalen—Nord­rheinwestfalen—Nord­rheinwestfalen—Nord­rheinwestfalen</p> |