From 59775fee0bd17390ced7af4c71e7500b9c9b080a Mon Sep 17 00:00:00 2001 From: corvid Date: Sat, 21 Feb 2015 19:04:40 +0000 Subject: update some urls in comments --- src/html.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/html.cc') diff --git a/src/html.cc b/src/html.cc index 8d214841..53be82c3 100644 --- a/src/html.cc +++ b/src/html.cc @@ -1575,7 +1575,7 @@ static int * rendering modes, so it may be better to chose another behaviour. --Jcid * * http://www.mozilla.org/docs/web-developer/quirks/doctypes.html - * http://lists.auriga.wearlab.de/pipermail/dillo-dev/2004-October/002300.html + * http://lists.dillo.org/pipermail/dillo-dev/2004-October/002300.html * * This is not a full DOCTYPE parser, just enough for what Dillo uses. */ -- cgit v1.2.3 From b5399229a859f0f8009890eb9837b1d5ee6635d3 Mon Sep 17 00:00:00 2001 From: corvid Date: Wed, 1 Apr 2015 23:40:37 +0000 Subject: limit size when copying strings to find character references https://github.com/torvalds/linux/pull/17 has a five-megabyte title attribute, which is just a bit excessive. Since it has tons of < and >, dillo couldn't cope with it. Over five minutes to parse as much of it as it got before the connection broke. With this change, it's about fifty seconds (on this old computer) to get/show the full 24 megs, which is an improvement, at least. --- src/html.cc | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'src/html.cc') diff --git a/src/html.cc b/src/html.cc index 53be82c3..1344c69c 100644 --- a/src/html.cc +++ b/src/html.cc @@ -995,6 +995,14 @@ static const char *Html_parse_entity(DilloHtml *html, const char *token, const char *ret = NULL; char *tok; + if (toksize > 50) { + /* In pathological cases, attributes can be megabytes long and filled + * with character references. As of HTML5, the longest defined character + * reference is about 32 bytes long. + */ + toksize = 50; + } + token++; tok = dStrndup(token, (uint_t)toksize); -- cgit v1.2.3 From 5e00c2e9f8ac73c29fc44e348e9e2016639832f0 Mon Sep 17 00:00:00 2001 From: corvid Date: Sun, 12 Apr 2015 17:35:43 +0000 Subject: more html5 doctype strings Followed a link to instructables.com and found that they use one of these. I'm a little surprised to see one of these strings around. A minute of research shows: Apparently it generally has something to do with xslt restrictions. --- src/html.cc | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src/html.cc') diff --git a/src/html.cc b/src/html.cc index 1344c69c..d8000328 100644 --- a/src/html.cc +++ b/src/html.cc @@ -1655,7 +1655,11 @@ static void Html_parse_doctype(DilloHtml *html, const char *tag, int tagsize) html->DocTypeVersion = 2.0f; } } else if (!dStrAsciiCasecmp(ntag, "") || - !dStrAsciiCasecmp(ntag, "")) { + !dStrAsciiCasecmp(ntag, "") || + !dStrAsciiCasecmp(ntag, + "") || + !dStrAsciiCasecmp(ntag, + "")) { html->DocType = DT_HTML; html->DocTypeVersion = 5.0f; } -- cgit v1.2.3 From f105d0398e5be19451d51e2ab72d8c9f1248f146 Mon Sep 17 00:00:00 2001 From: corvid Date: Sun, 26 Apr 2015 14:07:55 +0000 Subject: in html5, ADDRESS may contain certain elements that we classify as block. Not heading/sectioning ones, but P is legal, for example. --- src/html.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/html.cc') diff --git a/src/html.cc b/src/html.cc index d8000328..61c216fa 100644 --- a/src/html.cc +++ b/src/html.cc @@ -3506,7 +3506,7 @@ const TagInfo Tags[] = { {"a", B8(011101),'R',2, Html_tag_open_a, NULL, Html_tag_close_a}, {"abbr", B8(010101),'R',2, Html_tag_open_abbr, NULL, NULL}, /* acronym 010101 -- obsolete in HTML5 */ - {"address", B8(010110),'R',2,Html_tag_open_default, NULL, Html_tag_close_par}, + {"address", B8(011110),'R',2,Html_tag_open_default, NULL, Html_tag_close_par}, {"area", B8(010001),'F',0, Html_tag_open_default, Html_tag_content_area, NULL}, {"article", B8(011110),'R',2, Html_tag_open_sectioning, NULL, NULL}, -- cgit v1.2.3 From c7298e589b9065919761676b1606d17d372d18e3 Mon Sep 17 00:00:00 2001 From: corvid Date: Mon, 27 Apr 2015 15:32:54 +0000 Subject: html5 permits relative BASE url --- src/html.cc | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'src/html.cc') diff --git a/src/html.cc b/src/html.cc index 61c216fa..e2bc6bf8 100644 --- a/src/html.cc +++ b/src/html.cc @@ -3396,8 +3396,13 @@ static void Html_tag_open_base(DilloHtml *html, const char *tag, int tagsize) if (html->InFlags & IN_HEAD) { if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "href"))) { - BaseUrl = a_Html_url_new(html, attrbuf, "", 1); - if (URL_SCHEME_(BaseUrl)) { + bool_t html5 = html->DocType == DT_HTML && + html->DocTypeVersion >= 5.0f; + + BaseUrl = html5 ? a_Html_url_new(html, attrbuf, NULL, 0) : + a_Html_url_new(html, attrbuf, "", 1); + + if (html5 || URL_SCHEME_(BaseUrl)) { /* Pass the URL_SpamSafe flag to the new base url */ a_Url_set_flags( BaseUrl, URL_FLAGS(html->base_url) & URL_SpamSafe); -- cgit v1.2.3 From 10605ce2b45e4859a5cb0b2c95c84d7fa377baca Mon Sep 17 00:00:00 2001 From: corvid Date: Tue, 28 Apr 2015 21:42:28 +0000 Subject: BUG_MSG --- src/html.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/html.cc') diff --git a/src/html.cc b/src/html.cc index e2bc6bf8..f5631886 100644 --- a/src/html.cc +++ b/src/html.cc @@ -3789,7 +3789,8 @@ static void Html_test_section(DilloHtml *html, int new_idx, int IsCloseTag) int tag_idx; if (!(html->InFlags & IN_HTML) && html->DocType == DT_NONE) - BUG_MSG("The required DOCTYPE declaration is missing."); + BUG_MSG("The required DOCTYPE declaration is missing. " + "Handling as HTML4."); if (!(html->InFlags & IN_HTML)) { tag = ""; -- cgit v1.2.3 From af1c5e39e54e1ba2d26a472dfac0547afee3269e Mon Sep 17 00:00:00 2001 From: corvid Date: Tue, 5 May 2015 06:19:12 +0000 Subject: html5 coords don't permit percentages --- src/html.cc | 1 - 1 file changed, 1 deletion(-) (limited to 'src/html.cc') diff --git a/src/html.cc b/src/html.cc index f5631886..d6b64a19 100644 --- a/src/html.cc +++ b/src/html.cc @@ -2481,7 +2481,6 @@ static void type = UNKNOWN; } if (type == RECTANGLE || type == CIRCLE || type == POLYGON) { - /* TODO: add support for coords in % */ if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "coords"))) { coords = Html_read_coords(html, attrbuf); -- cgit v1.2.3