aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorcorvid <corvid@lavabit.com>2010-02-16 03:32:17 +0000
committercorvid <corvid@lavabit.com>2010-02-16 03:32:17 +0000
commitceb4a77cb81853fb7e7ca6f3c10f8e625f585496 (patch)
tree2c72a5286e665598fc10e8eef5bb2dae922c717c
parent3dc8ad3068f5e740e78930c79e8fdadc0619d801 (diff)
handle zero-width space.
http://lists.auriga.wearlab.de/pipermail/dillo-dev/2009-September/006894.html has the thread. Jorge expressed no opinion. Johannes approved, but wasn't sure that adding zero-width spaces to the page was necessary. I wasn't sure that it was necessary, either, so this version doesn't do that.
-rw-r--r--ChangeLog1
-rw-r--r--src/html.cc3
-rw-r--r--src/utf8.hh3
3 files changed, 7 insertions, 0 deletions
diff --git a/ChangeLog b/ChangeLog
index 13848071..06ce7443 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -7,6 +7,7 @@ dillo-2.2.1 [not released yet]
Patch: Alexander Voigt, corvid
+- Include Accept header in HTTP queries.
- Don't use obsolete png_check_sig().
+ - Handle zero-width space.
Patches: corvid
-----------------------------------------------------------------------------
diff --git a/src/html.cc b/src/html.cc
index cf3e54d8..21cfe2c4 100644
--- a/src/html.cc
+++ b/src/html.cc
@@ -1178,6 +1178,8 @@ static void Html_process_word(DilloHtml *html, const char *word, int size)
if (isspace(word2[i])) {
while (word2[++i] && isspace(word2[i])) ;
Html_process_space(html, word2 + start, i - start);
+ } else if (!strncmp(word2+i, utf8_zero_width_space, 3)) {
+ i += 3;
} else if (a_Utf8_ideographic(word2+i, word2_end, &len)) {
i += len;
HT2TB(html)->addText(word2 + start, i - start,
@@ -1186,6 +1188,7 @@ static void Html_process_word(DilloHtml *html, const char *word, int size)
do {
i += len;
} while (word2[i] && !isspace(word2[i]) &&
+ strncmp(word2+i, utf8_zero_width_space, 3) &&
(!a_Utf8_ideographic(word2+i, word2_end, &len)));
HT2TB(html)->addText(word2 + start, i - start,
html->styleEngine->wordStyle ());
diff --git a/src/utf8.hh b/src/utf8.hh
index fd1fb87e..4ded50b8 100644
--- a/src/utf8.hh
+++ b/src/utf8.hh
@@ -15,6 +15,9 @@ extern "C" {
*/
static const char utf8_replacement_char[] = "\xEF\xBF\xBD";
+/* Unicode zero width space U+200B */
+static const char utf8_zero_width_space[] = "\xE2\x80\x8B";
+
uint_t a_Utf8_end_of_char(const char *str, uint_t i);
uint_t a_Utf8_decode(const char*, const char* end, int* len);
int a_Utf8_encode(unsigned int ucs, char *buf);