diff options
author | Jorge Arellano Cid <jcid@dillo.org> | 2016-05-11 10:02:49 -0300 |
---|---|---|
committer | Jorge Arellano Cid <jcid@dillo.org> | 2016-05-11 10:02:49 -0300 |
commit | 28aa78ac9465788c745b1abb9b59a2723282c4fa (patch) | |
tree | c5d8dd7fecbcbcd58c7277c7df459564bd36081c | |
parent | 9afbae353b722209d63786366a175ae7386e7575 (diff) |
Fixed handling of BODY and HTML tags. Also improved their html-bug messages.
BODY and HTML have optional open and close, making them tricky to handle.
Even more when considering Tag soup pages with multiple body or html
sections, and corner cases.
This patch tackles the problems by leaving the first HTML and BODY
stack elements open, until EOF.
There's also better html-bug detection and messages, and more accurate
comments in the code.
Beware: it may look simple, but it's not!
-rw-r--r-- | src/html.cc | 32 | ||||
-rw-r--r-- | src/html_common.hh | 4 |
2 files changed, 32 insertions, 4 deletions
diff --git a/src/html.cc b/src/html.cc index 9c2d9493..a3004dba 100644 --- a/src/html.cc +++ b/src/html.cc @@ -463,6 +463,8 @@ DilloHtml::DilloHtml(BrowserWindow *p_bw, const DilloUrl *url, ReqTagClose = false; TagSoup = true; loadCssFromStash = false; + PrevWasBodyClose = false; + PrevWasHtmlClose = false; Num_HTML = Num_HEAD = Num_BODY = Num_TITLE = 0; @@ -1189,6 +1191,10 @@ static void Html_process_word(DilloHtml *html, const char *word, int size) if (S_TOP(html)->display_none) return; + if ((i = html->PrevWasHtmlClose ? 1 : html->PrevWasBodyClose ? 2 : 0)) { + BUG_MSG("Content after </%s> tag.", i == 1 ? "html" : "body"); + html->PrevWasHtmlClose = html->PrevWasBodyClose = false; + } if (parse_mode == DILLO_HTML_PARSE_MODE_STASH || parse_mode == DILLO_HTML_PARSE_MODE_STASH_AND_BODY) { @@ -1351,13 +1357,19 @@ static void Html_real_pop_tag(DilloHtml *html) */ static void Html_tag_cleanup_to_idx(DilloHtml *html, int idx) { + static int i_BODY = a_Html_tag_index("body"); int s_sz; while ((s_sz = html->stack->size()) > idx) { int toptag_idx = S_TOP(html)->tag_idx; TagInfo toptag = Tags[toptag_idx]; if (s_sz > idx + 1 && toptag.EndTag != 'O') BUG_MSG(" - forcing close of open tag: <%s>.", toptag.name); - _MSG("Close: %*s%s\n", size," ", toptag.name); + _MSG("Close: %s sz=%d idx=%d\n", toptag.name, s_sz, idx); + if (toptag_idx == i_BODY && + !((html->InFlags & IN_EOF) || html->ReqTagClose)) { + (idx == 1 ? html->PrevWasHtmlClose : html->PrevWasBodyClose) = true; + break; // only pop {BODY,HTML} upon EOF or redundancy + } if (toptag.close) toptag.close(html); Html_real_pop_tag(html); @@ -1706,6 +1718,10 @@ static void Html_tag_open_html(DilloHtml *html, const char *tag, int tagsize) static void Html_tag_close_html(DilloHtml *html) { _MSG("Html_tag_close_html: Num_HTML=%d\n", html->Num_HTML); + + /* As some Tag soup pages use multiple HTML tags, this function + * gets called only on EOF and upon and extra HTML open. + * Also, we defer clearing the IN_HTML flag until IN_EOF */ } /* @@ -1962,8 +1978,11 @@ static void Html_tag_open_body(DilloHtml *html, const char *tag, int tagsize) */ static void Html_tag_close_body(DilloHtml *html) { - /* Some tag soup pages use multiple BODY tags... - * Defer clearing the IN_BODY flag until IN_EOF */ + _MSG("Html_tag_close_body: Num_BODY=%d\n", html->Num_BODY); + + /* As some Tag soup pages use multiple BODY tags, this function + * gets called only on EOF and upon and extra BODY open. + * Also, we defer clearing the IN_BODY flag until IN_EOF */ } /* @@ -3973,6 +3992,7 @@ static void Html_display_listitem(DilloHtml *html) */ static void Html_process_tag(DilloHtml *html, char *tag, int tagsize) { + static int i_HTML = a_Html_tag_index("html"); int ci, ni; /* current and new tag indexes */ char *start = tag + 1; /* discard the '<' */ int IsCloseTag = (*start == '/'); @@ -3989,10 +4009,16 @@ static void Html_process_tag(DilloHtml *html, char *tag, int tagsize) /* Ignore unknown tags */ return; } + _MSG("Html_process_tag: %s%s\n", IsCloseTag ? "/" : "", Tags[ni].name); if (!IsCloseTag && html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f) Html_check_html5_obsolete(html, ni); + int i = html->PrevWasHtmlClose ? 1 : html->PrevWasBodyClose ? 2 : 0; + if (i == 1 || (i == 2 && ni != i_HTML)) + BUG_MSG("Content after </%s> tag.", i == 1 ? "html" : "body"); + html->PrevWasHtmlClose = html->PrevWasBodyClose = false; + /* Handle HTML, HEAD and BODY. Elements with optional open and close */ if (!(html->InFlags & IN_BODY) /* && parsing HTML */) Html_test_section(html, ni, IsCloseTag); diff --git a/src/html_common.hh b/src/html_common.hh index 68ed0d08..6d0d8c62 100644 --- a/src/html_common.hh +++ b/src/html_common.hh @@ -177,9 +177,11 @@ public: //BUG: for now everything is public bool PrevWasCR; /* Flag to help parsing of "\r\n" in PRE tags */ bool PrevWasOpenTag; /* Flag to help deferred parsing of white space */ bool InVisitedLink; /* used to 'contrast_visited_colors' */ - bool ReqTagClose; /* Flag to help handling bad-formed HTML */ + bool ReqTagClose; /* Flag to close the stack's top tag */ bool TagSoup; /* Flag to enable the parser's cleanup functions */ bool loadCssFromStash; /* current stash content should be loaded as CSS */ + bool PrevWasBodyClose; /* set when </body> is found */ + bool PrevWasHtmlClose; /* set when </html> is found */ /* element counters: used for validation purposes. * ATM they're used as three state flags {0,1,>1} */ |