diff options
author | Sebastian Geerken <devnull@localhost> | 2012-11-09 21:00:10 +0100 |
---|---|---|
committer | Sebastian Geerken <devnull@localhost> | 2012-11-09 21:00:10 +0100 |
commit | ba12d00e4304138098beec462bd9c7ab5063b0bd (patch) | |
tree | bfe3c1fbefc8fbb3822882da1302c42c0a266b30 | |
parent | 5e000618f0774aa24605ab984c46411e745a1574 (diff) |
Generalization of soft hyphens, for "hard" hyphens, em-dashes and similar characters. Works now for soft hyphens.
-rw-r--r-- | dw/textblock.cc | 176 | ||||
-rw-r--r-- | dw/textblock.hh | 18 | ||||
-rw-r--r-- | dw/textblock_linebreaking.cc | 2 |
3 files changed, 141 insertions, 55 deletions
diff --git a/dw/textblock.cc b/dw/textblock.cc index 095ad219..367e154a 100644 --- a/dw/textblock.cc +++ b/dw/textblock.cc @@ -40,6 +40,10 @@ namespace dw { int Textblock::CLASS_ID = -1; +Textblock::DivSign Textblock::divSigns[NUM_DIV_SIGNS] = { + { "\xc2\xad", true, false, PENALTY_HYPHEN, -1 } +}; + Textblock::Textblock (bool limitTextWidth, int penaltyHyphen) { registerName ("dw::Textblock", &CLASS_ID); @@ -86,7 +90,7 @@ Textblock::Textblock (bool limitTextWidth, int penaltyHyphen) availDescent = 0; this->limitTextWidth = limitTextWidth; - this->penaltyHyphen = penaltyHyphen; + penalties[PENALTY_HYPHEN] = penaltyHyphen; for (int layer = 0; layer < core::HIGHLIGHT_NUM_LAYERS; layer++) { /* hlStart[layer].index > hlEnd[layer].index means no highlighting */ @@ -1418,77 +1422,146 @@ void Textblock::calcTextSize (const char *text, size_t len, } /** - * Add a word to the page structure. If it contains soft hyphens, it is - * divided. + * Add a word to the page structure. If it contains dividing + * characters (hard or soft hyphens, em-dashes, etc.), it is divided. */ void Textblock::addText (const char *text, size_t len, core::style::Style *style) { PRINTF ("[%p] ADD_TEXT (%d characters)\n", this, (int)len); - // Count hyphens. - int numHyphens = 0; - for (int i = 0; i < (int)len - 1; i++) - // (0xc2, 0xad) is the UTF-8 representation of a soft hyphen (Unicode - // 0xc2). - if((unsigned char)text[i] == 0xc2 && (unsigned char)text[i + 1] == 0xad) - numHyphens++; - - if (numHyphens == 0) { - // Simple (and common) case: no soft hyphens. May still be hyphenated - // automatically. + // Count dividing characters. + int numParts = 1; + + for (int i = 0; i < (int)len; + i < (int)len && (i = layout->nextGlyph (text, i))) { + int foundDiv = -1; + for (int j = 0; foundDiv == -1 && j < NUM_DIV_SIGNS; j++) { + int lDiv = strlen (divSigns[j].s); + if (i <= (int)len - lDiv) { + if (memcmp (text + i, divSigns[j].s, lDiv * sizeof (char)) == 0) + foundDiv = j; + } + } + + if (foundDiv != -1) { + if (divSigns[foundDiv].penaltyIndexLeft != -1) + numParts ++; + if (divSigns[foundDiv].penaltyIndexRight != -1) + numParts ++; + } + } + + if (numParts == 1) { + // Simple (and common) case: no dividing characters. May still + // be hyphenated automatically. core::Requisition size; calcTextSize (text, len, style, &size); addText0 (text, len, true, style, &size); } else { - PRINTF("HYPHENATION: '"); + PRINTF ("HYPHENATION: '"); for (size_t i = 0; i < len; i++) PUTCHAR(text[i]); - PRINTF("', with %d hyphen(s)\n", numHyphens); + PRINTF ("', with %d parts\n", numParts); // Store hyphen positions. - int n = 0, hyphenPos[numHyphens], breakPos[numHyphens]; - for (size_t i = 0; i < len - 1; i++) - if((unsigned char)text[i] == 0xc2 && - (unsigned char)text[i + 1] == 0xad) { - hyphenPos[n] = i; - breakPos[n] = i - 2 * n; - n++; + int n = 0, totalLenSignRemoved = 0; + int partPenalty[numParts], partStart[numParts], partEnd[numParts]; + partPenalty[numParts - 1] = INT_MAX; + partStart[0] = 0; + partEnd[numParts - 1] = len; + + for (int i = 0; i < (int)len; + i < (int)len && (i = layout->nextGlyph (text, i))) { + int foundDiv = -1; + for (int j = 0; foundDiv == -1 && j < NUM_DIV_SIGNS; j++) { + int lDiv = strlen (divSigns[j].s); + if (i <= (int)len - lDiv) { + if (memcmp (text + i, divSigns[j].s, lDiv * sizeof (char)) == 0) + foundDiv = j; + } + } + + if (foundDiv != -1) { + int lDiv = strlen (divSigns[foundDiv].s); + + if (divSigns[foundDiv].signRemoved) { + assert (divSigns[foundDiv].penaltyIndexLeft != -1); + assert (divSigns[foundDiv].penaltyIndexRight == -1); + + partPenalty[n] = penalties[divSigns[foundDiv].penaltyIndexLeft]; + partEnd[n] = i; + partStart[n + 1] = i + lDiv; + n++; + totalLenSignRemoved += lDiv; + } else { + assert (divSigns[foundDiv].penaltyIndexLeft != -1 || + divSigns[foundDiv].penaltyIndexRight != -1); + + if (divSigns[foundDiv].penaltyIndexLeft != -1) { + partPenalty[n] = + penalties[divSigns[foundDiv].penaltyIndexLeft]; + partEnd[n] = i; + partStart[n + 1] = i; + n++; + } + + if (divSigns[foundDiv].penaltyIndexRight != -1) { + partPenalty[n] = + penalties[divSigns[foundDiv].penaltyIndexRight]; + partEnd[n] = i + lDiv; + partStart[n + 1] = i + lDiv; + n++; + } + } } + } + + // Get text without removed characters, e. g. hyphens. + const char *textWithoutHyphens; + char textWithoutHyphensBuf[len - totalLenSignRemoved]; + int *partEndWithoutHyphens, partEndWithoutHyphensBuf[numParts]; - // Get text without hyphens. (There are numHyphens + 1 parts in the word, - // and 2 * numHyphens bytes less, 2 for each hyphen, are needed.) - char textWithoutHyphens[len - 2 * numHyphens]; - int start = 0; // related to "text" - for (int i = 0; i < numHyphens + 1; i++) { - int end = (i == numHyphens) ? len : hyphenPos[i]; - memmove (textWithoutHyphens + start - 2 * i, text + start, - end - start); - start = end + 2; + if (totalLenSignRemoved == 0) { + // No removed characters: take original arrays. + textWithoutHyphens = text; + partEndWithoutHyphens = partEnd; + } else { + // Copy into special buffers. + textWithoutHyphens = textWithoutHyphensBuf; + partEndWithoutHyphens = partEndWithoutHyphensBuf; + + int n = 0; + for (int i = 0; i < numParts; i++) { + memmove (textWithoutHyphensBuf + n, text + partStart[i], + partEnd[i] - partStart[i]); + n += partEnd[i] - partStart[i]; + partEndWithoutHyphensBuf[i] = n; + } } PRINTF("H... without hyphens: '"); - for (size_t i = 0; i < len - 2 * numHyphens; i++) + for (size_t i = 0; i < len - totalLenSignRemoved; i++) PUTCHAR(textWithoutHyphens[i]); PRINTF("'\n"); - core::Requisition wordSize[numHyphens + 1]; - calcTextSizes (textWithoutHyphens, len - 2 * numHyphens, style, - numHyphens, breakPos, wordSize); + core::Requisition wordSize[numParts]; + calcTextSizes (textWithoutHyphens, len - totalLenSignRemoved, style, + numParts, partEndWithoutHyphens, wordSize); // Finished! - for (int i = 0; i < numHyphens + 1; i++) { - int start = (i == 0) ? 0 : hyphenPos[i - 1] + 2; - int end = (i == numHyphens) ? len : hyphenPos[i]; - // Do not anymore hyphen automatically. - addText0 (text + start, end - start, false, style, &wordSize[i]); + for (int i = 0; i < numParts; i++) { + // Do not anymore hyphen automatically. TODO Sometimes do. + addText0 (text + partStart[i], partEnd[i] - partStart[i], + false, style, &wordSize[i]); PRINTF("H... [%d] '", i); - for (int j = start; j < end; j++) + for (int j = partStart[i]; j < partEnd[i]; j++) PUTCHAR(text[j]); PRINTF("' added\n"); - if(i < numHyphens) { + if(i < numParts - 1) { + // TODO Here again. Consider also penalties. addHyphen (); PRINTF("H... yphen added\n"); } @@ -1498,22 +1571,23 @@ void Textblock::addText (const char *text, size_t len, void Textblock::calcTextSizes (const char *text, size_t textLen, core::style::Style *style, - int numBreaks, int *breakPos, + int numParts, int *partEnd, core::Requisition *wordSize) { // The size of the last part is calculated in a simple way. - int lastStart = breakPos[numBreaks - 1]; + int lastStart = partEnd[numParts - 2]; calcTextSize (text + lastStart, textLen - lastStart, style, - &wordSize[numBreaks]); + &wordSize[numParts - 1]); PRINTF("H... [%d] '", numBreaks); for (size_t i = 0; i < textLen - lastStart; i++) PUTCHAR(text[i + lastStart]); PRINTF("' -> %d\n", wordSize[numBreaks].width); - // The rest is more complicated. TODO Documentation. - for (int i = numBreaks - 1; i >= 0; i--) { - int start = (i == 0) ? 0 : breakPos[i - 1]; + // The rest is more complicated. See dw-line-breaking, section + // "Hyphens". + for (int i = numParts - 2; i >= 0; i--) { + int start = (i == 0) ? 0 : partEnd[i - 1]; calcTextSize (text + start, textLen - start, style, &wordSize[i]); PRINTF("H... [%d] '", i); @@ -1521,7 +1595,7 @@ void Textblock::calcTextSizes (const char *text, size_t textLen, PUTCHAR(text[j + start]); PRINTF("' -> %d\n", wordSize[i].width); - for (int j = i + 1; j < numBreaks + 1; j++) { + for (int j = i + 1; j < numParts; j++) { wordSize[i].width -= wordSize[j].width; PRINTF("H... - %d = %d\n", wordSize[j].width, wordSize[i].width); } @@ -1707,7 +1781,7 @@ void Textblock::addHyphen () if (wordIndex >= 0) { Word *word = words->getRef(wordIndex); - word->badnessAndPenalty.setPenalty (penaltyHyphen); + word->badnessAndPenalty.setPenalty (penalties[PENALTY_HYPHEN]); // TODO Optimize? Like spaces? word->hyphenWidth = layout->textWidth (word->style->font, "\xc2\xad", 2); diff --git a/dw/textblock.hh b/dw/textblock.hh index cc11eb9c..2a0eda0f 100644 --- a/dw/textblock.hh +++ b/dw/textblock.hh @@ -211,6 +211,18 @@ private: void print (); }; + enum { PENALTY_HYPHEN, PENALTY_NUM }; + enum { NUM_DIV_SIGNS = 1 }; + + typedef struct + { + const char *s; + bool signRemoved, canBeHyphenated; + int penaltyIndexLeft, penaltyIndexRight; + } DivSign; + + static DivSign divSigns[NUM_DIV_SIGNS]; + protected: struct Line { @@ -355,12 +367,12 @@ protected: bool mustQueueResize; /** - * The penalty for hyphens, multiplied by 100. So, 100 means - * 1.0. INT_MAX and INT_MIN are also allowed. See + * The penalties for hyphens and other, multiplied by 100. So, 100 + * means 1.0. INT_MAX and INT_MIN are also allowed. See * dw::Textblock::BadnessAndPenalty::setPenalty for more * details. Set from preferences. */ - int penaltyHyphen; + int penalties[PENALTY_NUM]; bool limitTextWidth; /* from preferences */ diff --git a/dw/textblock_linebreaking.cc b/dw/textblock_linebreaking.cc index ebe3d534..d111a5b5 100644 --- a/dw/textblock_linebreaking.cc +++ b/dw/textblock_linebreaking.cc @@ -699,7 +699,7 @@ int Textblock::hyphenateWord (int wordIndex) // Note: there are numBreaks + 1 word parts. if (i < numBreaks) { // TODO There should be a method fillHyphen. - w->badnessAndPenalty.setPenalty (penaltyHyphen); + w->badnessAndPenalty.setPenalty (penalties[PENALTY_HYPHEN]); w->hyphenWidth = layout->textWidth (origWord.style->font, "\xc2\xad", 2); PRINTF (" [%d] + hyphen\n", wordIndex + i); |