diff options
author | corvid <corvid@lavabit.com> | 2010-02-16 03:32:17 +0000 |
---|---|---|
committer | corvid <corvid@lavabit.com> | 2010-02-16 03:32:17 +0000 |
commit | ceb4a77cb81853fb7e7ca6f3c10f8e625f585496 (patch) | |
tree | 2c72a5286e665598fc10e8eef5bb2dae922c717c /src | |
parent | 3dc8ad3068f5e740e78930c79e8fdadc0619d801 (diff) |
handle zero-width space.
http://lists.auriga.wearlab.de/pipermail/dillo-dev/2009-September/006894.html
has the thread. Jorge expressed no opinion. Johannes approved, but wasn't
sure that adding zero-width spaces to the page was necessary. I wasn't sure
that it was necessary, either, so this version doesn't do that.
Diffstat (limited to 'src')
-rw-r--r-- | src/html.cc | 3 | ||||
-rw-r--r-- | src/utf8.hh | 3 |
2 files changed, 6 insertions, 0 deletions
diff --git a/src/html.cc b/src/html.cc index cf3e54d8..21cfe2c4 100644 --- a/src/html.cc +++ b/src/html.cc @@ -1178,6 +1178,8 @@ static void Html_process_word(DilloHtml *html, const char *word, int size) if (isspace(word2[i])) { while (word2[++i] && isspace(word2[i])) ; Html_process_space(html, word2 + start, i - start); + } else if (!strncmp(word2+i, utf8_zero_width_space, 3)) { + i += 3; } else if (a_Utf8_ideographic(word2+i, word2_end, &len)) { i += len; HT2TB(html)->addText(word2 + start, i - start, @@ -1186,6 +1188,7 @@ static void Html_process_word(DilloHtml *html, const char *word, int size) do { i += len; } while (word2[i] && !isspace(word2[i]) && + strncmp(word2+i, utf8_zero_width_space, 3) && (!a_Utf8_ideographic(word2+i, word2_end, &len))); HT2TB(html)->addText(word2 + start, i - start, html->styleEngine->wordStyle ()); diff --git a/src/utf8.hh b/src/utf8.hh index fd1fb87e..4ded50b8 100644 --- a/src/utf8.hh +++ b/src/utf8.hh @@ -15,6 +15,9 @@ extern "C" { */ static const char utf8_replacement_char[] = "\xEF\xBF\xBD"; +/* Unicode zero width space U+200B */ +static const char utf8_zero_width_space[] = "\xE2\x80\x8B"; + uint_t a_Utf8_end_of_char(const char *str, uint_t i); uint_t a_Utf8_decode(const char*, const char* end, int* len); int a_Utf8_encode(unsigned int ucs, char *buf); |