/* * File: html.cc * * Copyright (C) 2005-2007 Jorge Arellano Cid * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. */ /* * Dillo HTML parsing routines */ /*----------------------------------------------------------------------------- * Includes *---------------------------------------------------------------------------*/ #include /* for isspace */ #include /* for memcpy and memmove */ #include #include /* for sprintf */ #include #include "bw.h" /* for BrowserWindow */ #include "msg.h" #include "binaryconst.h" #include "colors.h" #include "html_charrefs.h" #include "utf8.hh" #include "misc.h" #include "uicmd.hh" #include "history.h" #include "menu.hh" #include "prefs.h" #include "capi.h" #include "html.hh" #include "html_common.hh" #include "form.hh" #include "table.hh" #include "dw/textblock.hh" #include "dw/bullet.hh" #include "dw/listitem.hh" #include "dw/image.hh" #include "dw/ruler.hh" /*----------------------------------------------------------------------------- * Defines *---------------------------------------------------------------------------*/ /* Define to 1 to ignore white space immediately after an open tag, * and immediately before a close tag. */ #define SGML_SPCDEL 0 #define TAB_SIZE 8 /*----------------------------------------------------------------------------- * Name spaces *---------------------------------------------------------------------------*/ using namespace lout; using namespace dw; using namespace dw::core; using namespace dw::core::ui; using namespace dw::core::style; /*----------------------------------------------------------------------------- * Typedefs *---------------------------------------------------------------------------*/ class DilloHtml; typedef void (*TagOpenFunct) (DilloHtml *html, const char *tag, int tagsize); typedef void (*TagCloseFunct) (DilloHtml *html); typedef enum { SEEK_ATTR_START, MATCH_ATTR_NAME, SEEK_TOKEN_START, SEEK_VALUE_START, SKIP_VALUE, GET_VALUE, FINISHED } DilloHtmlTagParsingState; typedef enum { HTML_LeftTrim = 1 << 0, HTML_RightTrim = 1 << 1, HTML_ParseEntities = 1 << 2 } DilloHtmlTagParsingFlags; /* * Exported function with C linkage. */ extern "C" { void *a_Html_text(const char *type, void *P, CA_Callback_t *Call,void **Data); } /*----------------------------------------------------------------------------- * Forward declarations *---------------------------------------------------------------------------*/ static int Html_write_raw(DilloHtml *html, char *buf, int bufsize, int Eof); static bool Html_load_image(BrowserWindow *bw, DilloUrl *url, const DilloUrl *requester, DilloImage *image); static void Html_callback(int Op, CacheClient_t *Client); static void Html_tag_cleanup_at_close(DilloHtml *html, int TagIdx); /*----------------------------------------------------------------------------- * Local Data *---------------------------------------------------------------------------*/ /* Parsing table structure */ typedef struct { const char *name; /* element name */ unsigned char Flags; /* flags (explained near the table data) */ char EndTag; /* Is it Required, Optional or Forbidden */ uchar_t TagLevel; /* Used to heuristically parse bad HTML */ TagOpenFunct open; /* Open function */ TagOpenFunct content; /* Content function */ TagCloseFunct close; /* Close function */ } TagInfo; extern const TagInfo Tags[]; /*----------------------------------------------------------------------------- *----------------------------------------------------------------------------- * Main Code *----------------------------------------------------------------------------- *---------------------------------------------------------------------------*/ /* * Collect HTML error strings. */ void DilloHtml::bugMessage(const char *format, ... ) { va_list argp; if (bw->num_page_bugs) dStr_append_c(bw->page_bugs, '\n'); dStr_sprintfa(bw->page_bugs, "HTML warning: line %d, ", getCurrLineNumber()); va_start(argp, format); dStr_vsprintfa(bw->page_bugs, format, argp); va_end(argp); a_UIcmd_set_bug_prog(bw, ++bw->num_page_bugs); } /* * Wrapper for a_Url_new that adds an error detection message. * If use_base_url is TRUE, it uses base_url. Otherwise it uses html->base_url. */ DilloUrl *a_Html_url_new(DilloHtml *html, const char *url_str, const char *base_url, int use_base_url) { DilloUrl *url; int n_ic, n_ic_spc; url = a_Url_new(url_str, (use_base_url) ? base_url : URL_STR_(html->base_url)); if ((n_ic = URL_ILLEGAL_CHARS(url)) != 0) { const char *suffix = (n_ic) > 1 ? "s" : ""; n_ic_spc = URL_ILLEGAL_CHARS_SPC(url); if (n_ic == n_ic_spc) { BUG_MSG("URL has %d illegal space%s ('%s').", n_ic, suffix, url_str); } else if (n_ic_spc == 0) { BUG_MSG("URL has %d illegal byte%s in {00-1F, 7F-FF} range ('%s').", n_ic, suffix, url_str); } else { BUG_MSG("URL has %d illegal byte%s: " "%d space%s and %d in {00-1F, 7F-FF} range ('%s').", n_ic, suffix, n_ic_spc, n_ic_spc > 1 ? "s" : "", n_ic-n_ic_spc, url_str); } } return url; } /* * Set callback function and callback data for the "html/text" MIME type. */ void *a_Html_text(const char *Type, void *P, CA_Callback_t *Call, void **Data) { DilloWeb *web = (DilloWeb*)P; DilloHtml *html = new DilloHtml(web->bw, web->url, Type); *Data = (void*)html; *Call = (CA_Callback_t)Html_callback; return (void*)html->dw; } static void Html_free(void *data) { delete ((DilloHtml*)data); } /* * Used by the "Load images" page menuitem. */ void a_Html_load_images(void *v_html, DilloUrl *pattern) { DilloHtml *html = (DilloHtml*)v_html; html->loadImages(pattern); } /* * Search for form */ static bool Html_contains_form(DilloHtml *html, void *v_form) { for (int i = 0; i < html->forms->size(); i++) { if (html->forms->get(i) == v_form) { return true; } } return false; } /* * Used by the "Submit form" form menuitem. */ void a_Html_form_submit(void *v_html, void *v_form) { DilloHtml *html = (DilloHtml*)v_html; if (Html_contains_form(html, v_form)) { /* it's still valid */ a_Html_form_submit2(v_form); } } /* * Used by the "Reset form" form menuitem. */ void a_Html_form_reset(void *v_html, void *v_form) { DilloHtml *html = (DilloHtml*)v_html; if (Html_contains_form(html, v_form)) { /* it's still valid */ a_Html_form_reset2(v_form); } } /* * Used by the "Show/Hide hiddens" form menuitem. */ void a_Html_form_display_hiddens(void *v_html, void *v_form, bool_t display) { DilloHtml *html = (DilloHtml*)v_html; if (Html_contains_form(html, v_form)) { /* it's still valid */ a_Html_form_display_hiddens2(v_form, (display != 0)); } } /* * Set the URL data for image maps. */ static void Html_set_link_coordinates(DilloHtml *html, int link, int x, int y) { char data[64]; if (x != -1) { snprintf(data, 64, "?%d,%d", x, y); a_Url_set_ismap_coords(html->links->get(link), data); } } /* * Create a new link, set it as the url's parent * and return the index. */ static int Html_set_new_link(DilloHtml *html, DilloUrl **url) { int nl = html->links->size(); html->links->increase(); html->links->set(nl, (*url) ? *url : NULL); return nl; } /* * Evaluates the ALIGN attribute (left|center|right|justify) and * sets the style at the top of the stack. */ void a_Html_tag_set_align_attr(DilloHtml *html, const char *tag, int tagsize) { const char *align; if ((align = a_Html_get_attr(html, tag, tagsize, "align"))) { TextAlignType textAlignType = TEXT_ALIGN_LEFT; if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f) BUG_MSG("The align attribute is obsolete in HTML5."); if (dStrAsciiCasecmp (align, "left") == 0) textAlignType = TEXT_ALIGN_LEFT; else if (dStrAsciiCasecmp (align, "right") == 0) textAlignType = TEXT_ALIGN_RIGHT; else if (dStrAsciiCasecmp (align, "center") == 0) textAlignType = TEXT_ALIGN_CENTER; else if (dStrAsciiCasecmp (align, "justify") == 0) textAlignType = TEXT_ALIGN_JUSTIFY; #if 0 else if (dStrAsciiCasecmp (align, "char") == 0) { /* TODO: Actually not supported for

etc. */ v.textAlign = TEXT_ALIGN_STRING; if ((charattr = a_Html_get_attr(html, tag, tagsize, "char"))) { if (charattr[0] == 0) /* TODO: ALIGN=" ", and even ALIGN="&32;" will reult in * an empty string (don't know whether the latter is * correct, has to be clarified with the specs), so * that for empty strings, " " is assumed. */ style_attrs.textAlignChar = ' '; else style_attrs.textAlignChar = charattr[0]; } else /* TODO: Examine LANG attr of . */ style_attrs.textAlignChar = '.'; } #endif html->styleEngine->setNonCssHint(CSS_PROPERTY_TEXT_ALIGN, CSS_TYPE_ENUM, textAlignType); } } /* * Evaluates the VALIGN attribute (top|bottom|middle|baseline) and * sets the style in style_attrs. Returns true when set. */ bool a_Html_tag_set_valign_attr(DilloHtml *html, const char *tag, int tagsize) { const char *attr; VAlignType valign; if ((attr = a_Html_get_attr(html, tag, tagsize, "valign"))) { if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f) BUG_MSG("The valign attribute is obsolete in HTML5."); if (dStrAsciiCasecmp (attr, "top") == 0) valign = VALIGN_TOP; else if (dStrAsciiCasecmp (attr, "bottom") == 0) valign = VALIGN_BOTTOM; else if (dStrAsciiCasecmp (attr, "baseline") == 0) valign = VALIGN_BASELINE; else valign = VALIGN_MIDDLE; html->styleEngine->setNonCssHint (CSS_PROPERTY_VERTICAL_ALIGN, CSS_TYPE_ENUM, valign); return true; } else return false; } /* * Create and add a new Textblock to the current Textblock. Typically * only one of addBreaks and addBreakOpt is true. */ static void Html_add_textblock(DilloHtml *html, bool addBreaks, int breakSpace, bool addBreakOpt) { Textblock *textblock = new Textblock (prefs.limit_text_width); Style *style; if (addBreaks) { StyleAttrs attrs = *(html->style ()); attrs.display = DISPLAY_BLOCK; style = Style::create (&attrs); } else { style = html->style (); style->ref (); } if (addBreaks) HT2TB(html)->addParbreak (breakSpace, html->wordStyle ()); HT2TB(html)->addWidget (textblock, style); /* Works also for floats etc. */ if (addBreakOpt) HT2TB(html)->addBreakOption (html->style (), false); if (addBreaks) HT2TB(html)->addParbreak (breakSpace, html->wordStyle ()); S_TOP(html)->textblock = html->dw = textblock; if (addBreaks) S_TOP(html)->hand_over_break = true; style->unref (); } static bool Html_must_add_breaks(DilloHtml *html) { return HT2TB(html)->mustAddBreaks (html->style ()); } /* * Create and initialize a new DilloHtml class */ DilloHtml::DilloHtml(BrowserWindow *p_bw, const DilloUrl *url, const char *content_type) { /* Init main variables */ bw = p_bw; page_url = a_Url_dup(url); base_url = a_Url_dup(url); dw = NULL; /* Init event receiver */ linkReceiver.html = this; HT2LT(this)->connectLink (&linkReceiver); a_Bw_add_doc(p_bw, this); /* Init for-parsing variables */ Start_Buf = NULL; Start_Ofs = 0; _MSG("DilloHtml(): content type: %s\n", content_type); this->content_type = dStrdup(content_type); /* get charset */ a_Misc_parse_content_type(content_type, NULL, NULL, &charset); stop_parser = false; CurrOfs = OldOfs = 0; OldLine = 1; DocType = DT_NONE; /* assume Tag Soup 0.0! :-) */ DocTypeVersion = 0.0f; styleEngine = new StyleEngine (HT2LT (this), page_url, base_url); cssUrls = new misc::SimpleVector (1); stack = new misc::SimpleVector (16); stack->increase(); stack->getRef(0)->parse_mode = DILLO_HTML_PARSE_MODE_INIT; stack->getRef(0)->table_mode = DILLO_HTML_TABLE_MODE_NONE; stack->getRef(0)->table_border_mode = DILLO_HTML_TABLE_BORDER_SEPARATE; stack->getRef(0)->cell_text_align_set = false; stack->getRef(0)->display_none = false; stack->getRef(0)->list_type = HTML_LIST_NONE; stack->getRef(0)->list_number = 0; stack->getRef(0)->tag_idx = -1; /* MUST not be used */ stack->getRef(0)->textblock = NULL; stack->getRef(0)->table = NULL; stack->getRef(0)->ref_list_item = NULL; stack->getRef(0)->hand_over_break = false; InFlags = IN_NONE; Stash = dStr_new(""); StashSpace = false; pre_column = 0; PreFirstChar = false; PrevWasCR = false; InVisitedLink = false; ReqTagClose = false; TagSoup = true; loadCssFromStash = false; PrevWasBodyClose = false; PrevWasHtmlClose = false; Num_HTML = Num_HEAD = Num_BODY = Num_TITLE = 0; attr_data = dStr_sized_new(1024); non_css_link_color = -1; non_css_visited_color = -1; visited_color = -1; /* Init page-handling variables */ forms = new misc::SimpleVector (1); inputs_outside_form = new misc::SimpleVector (1); links = new misc::SimpleVector (64); images = new misc::SimpleVector (16); /* Initialize the main widget */ initDw(); /* Hook destructor to the dw delete call */ dw->setDeleteCallback(Html_free, this); } /* * Miscellaneous initializations for Dw */ void DilloHtml::initDw() { dReturn_if_fail (dw == NULL); /* Create the main widget */ dw = stack->getRef(0)->textblock = new Textblock (prefs.limit_text_width); bw->num_page_bugs = 0; dStr_truncate(bw->page_bugs, 0); } /* * Free memory used by the DilloHtml class. */ DilloHtml::~DilloHtml() { _MSG("::~DilloHtml(this=%p)\n", this); freeParseData(); a_Bw_remove_doc(bw, this); a_Url_free(page_url); a_Url_free(base_url); for (int i = 0; i < cssUrls->size(); i++) a_Url_free(cssUrls->get(i)); delete (cssUrls); for (int i = 0; i < forms->size(); i++) a_Html_form_delete (forms->get(i)); delete(forms); for (int i = 0; i < inputs_outside_form->size(); i++) a_Html_input_delete(inputs_outside_form->get(i)); delete(inputs_outside_form); for (int i = 0; i < links->size(); i++) a_Url_free(links->get(i)); delete (links); for (int i = 0; i < images->size(); i++) { DilloHtmlImage *img = images->get(i); a_Url_free(img->url); a_Image_unref(img->image); dFree(img); } delete (images); delete styleEngine; } /* * Process the newly arrived html and put it into the page structure. * (This function is called by Html_callback whenever there's new data) */ void DilloHtml::write(char *Buf, int BufSize, int Eof) { int token_start; char *buf = Buf + Start_Ofs; int bufsize = BufSize - Start_Ofs; _MSG("DilloHtml::write BufSize=%d Start_Ofs=%d\n", BufSize, Start_Ofs); #if 0 char *aux = dStrndup(Buf, BufSize); MSG(" {%s}\n", aux); dFree(aux); #endif /* Update Start_Buf. It may be used after the parser is stopped */ Start_Buf = Buf; dReturn_if (dw == NULL); dReturn_if (stop_parser == true); token_start = Html_write_raw(this, buf, bufsize, Eof); Start_Ofs += token_start; } /* * Return the line number of the tag/word being processed by the parser. * Also update the offsets. */ int DilloHtml::getCurrLineNumber() { int i, ofs, line; const char *p = Start_Buf; dReturn_val_if_fail(p != NULL, -1); /* Disable line counting for META hack. Buffers differ. */ dReturn_val_if((InFlags & IN_META_HACK), -1); ofs = CurrOfs; line = OldLine; for (i = OldOfs; i < ofs; ++i) if (p[i] == '\n' || (p[i] == '\r' && p[i+1] != '\n')) ++line; OldOfs = CurrOfs; OldLine = line; return line; } /* * Free parsing data. */ void DilloHtml::freeParseData() { delete(stack); dStr_free(Stash, TRUE); dStr_free(attr_data, TRUE); dFree(content_type); dFree(charset); } /* * Finish parsing a HTML page. Close the parser and close the client. * The class is not deleted here, it remains until the widget is destroyed. */ void DilloHtml::finishParsing(int ClientKey) { int si; dReturn_if (stop_parser == true); /* flag we've already parsed up to the last byte */ InFlags |= IN_EOF; /* force the close of elements left open (TODO: not for XHTML) */ while ((si = stack->size() - 1)) { if (stack->getRef(si)->tag_idx != -1) { Html_tag_cleanup_at_close(this, stack->getRef(si)->tag_idx); } } /* Nothing left to do with the parser. Clear all flags, except EOF. */ InFlags = IN_EOF; /* Remove this client from our active list */ a_Bw_close_client(bw, ClientKey); } /* * Allocate and insert form information. */ int DilloHtml::formNew(DilloHtmlMethod method, const DilloUrl *action, DilloHtmlEnc enc, const char *charset) { // avoid data loss on repush after CSS stylesheets have been loaded bool enabled = bw->NumPendingStyleSheets == 0; DilloHtmlForm *form = a_Html_form_new (this, method, action, enc, charset, enabled); int nf = forms->size (); forms->increase (); forms->set (nf, form); _MSG("Html formNew: action=%s nform=%d\n", action, nf); return forms->size(); } /* * Get the current form. */ DilloHtmlForm *DilloHtml::getCurrentForm () { return forms->get (forms->size() - 1); } bool_t DilloHtml::unloadedImages() { for (int i = 0; i < images->size(); i++) { if (images->get(i)->image != NULL) { return TRUE; } } return FALSE; } /* * Load images if they were disabled. */ void DilloHtml::loadImages (const DilloUrl *pattern) { dReturn_if (a_Bw_expecting(bw)); /* If the user asked for a specific image, the user (NULL) is the requester, * and the domain mechanism will always permit the request. But if the user * just asked for all images (clicking "Load images"), use the page URL as * the requester so that the domain mechanism can act as a filter. * If the possible patterns become more complex, it might be good to have * the caller supply the requester instead. */ const DilloUrl *requester = pattern ? NULL : this->page_url; for (int i = 0; i < images->size(); i++) { DilloHtmlImage *hi = images->get(i); if (hi->image) { assert(hi->url); if ((!pattern) || (!a_Url_cmp(hi->url, pattern))) { if (Html_load_image(bw, hi->url, requester, hi->image)) { a_Image_unref (hi->image); hi->image = NULL; // web owns it now } } } } } /* * Save URL in a vector (may be loaded later). */ void DilloHtml::addCssUrl(const DilloUrl *url) { int nu = cssUrls->size(); cssUrls->increase(); cssUrls->set(nu, a_Url_dup(url)); } bool DilloHtml::HtmlLinkReceiver::enter (Widget *widget, int link, int img, int x, int y) { BrowserWindow *bw = html->bw; _MSG(" ** "); if (link == -1) { _MSG(" Link LEAVE notify...\n"); a_UIcmd_set_msg(bw, ""); } else { _MSG(" Link ENTER notify...\n"); Html_set_link_coordinates(html, link, x, y); a_UIcmd_set_msg(bw, "%s", URL_STR(html->links->get(link))); } return true; } /* * Handle the "press" signal. */ bool DilloHtml::HtmlLinkReceiver::press (Widget *widget, int link, int img, int x, int y, EventButton *event) { BrowserWindow *bw = html->bw; int ret = false; DilloUrl *linkurl = NULL; _MSG("pressed button %d\n", event->button); if (event->button == 3) { // popup menus if (img != -1) { // image menu if (link != -1) linkurl = html->links->get(link); const bool_t loaded_img = (html->images->get(img)->image == NULL); a_UIcmd_image_popup(bw, html->images->get(img)->url, loaded_img, html->page_url, linkurl); ret = true; } else { if (link == -1) { a_UIcmd_page_popup(bw, bw->num_page_bugs != 0, html->cssUrls); ret = true; } else { a_UIcmd_link_popup(bw, html->links->get(link)); ret = true; } } } return ret; } /* * Handle the "click" signal. */ bool DilloHtml::HtmlLinkReceiver::click (Widget *widget, int link, int img, int x, int y, EventButton *event) { BrowserWindow *bw = html->bw; if ((img != -1) && (html->images->get(img)->image)) { // clicked an image that has not already been loaded if (event->button == 1){ // load all instances of this image DilloUrl *pattern = html->images->get(img)->url; html->loadImages(pattern); return true; } } if (link != -1) { DilloUrl *url = html->links->get(link); _MSG("clicked on URL %d: %s\n", link, a_Url_str (url)); Html_set_link_coordinates(html, link, x, y); if (event->button == 1) { a_UIcmd_open_url(bw, url); } else if (event->button == 2) { if (prefs.middle_click_opens_new_tab) { int focus = prefs.focus_new_tab ? 1 : 0; if (event->state == SHIFT_MASK) focus = !focus; a_UIcmd_open_url_nt(bw, url, focus); } else a_UIcmd_open_url_nw(bw, url); } else { return false; } /* Change the link color to "visited" as visual feedback */ for (Widget *w = widget; w; w = w->getParent()) { _MSG(" ->%s\n", w->getClassName()); if (w->instanceOf(dw::Textblock::CLASS_ID)) { ((Textblock*)w)->changeLinkColor (link, html->visited_color); break; } } } return true; } /* * Initialize the stash buffer */ void a_Html_stash_init(DilloHtml *html) { S_TOP(html)->parse_mode = DILLO_HTML_PARSE_MODE_STASH; html->StashSpace = false; dStr_truncate(html->Stash, 0); } /* * This is M$ non-standard "smart quotes" (w1252). Now even deprecated by them! * * SGML for HTML4.01 defines c >= 128 and c <= 159 as UNUSED. * TODO: Probably I should remove this hack. --Jcid */ static int Html_ms_stupid_quotes_2ucs(int codepoint) { int ret; switch (codepoint) { case 145: case 146: ret = '\''; break; case 147: case 148: ret = '"'; break; case 149: ret = 176; break; case 150: case 151: ret = '-'; break; default: ret = codepoint; break; } return ret; } /* * Parse a numeric character reference (e.g., "/" or "/"). * The "&#" has already been consumed. */ static const char *Html_parse_numeric_charref(DilloHtml *html, char *tok, bool_t is_attr, int *entsize) { static char buf[5]; char *s = tok; int n, codepoint = -1; errno = 0; if (*s == 'x' || *s == 'X') { if (isxdigit(*++s)) { /* strtol with base 16 accepts leading "0x" - we don't */ if (*s == '0' && s[1] == 'x') { s++; codepoint = 0; } else { codepoint = strtol(s, &s, 16); } } } else if (isdigit(*s)) { codepoint = strtol(s, &s, 10); } if (errno) codepoint = -1; if (*s == ';') s++; else { if (prefs.show_extra_warnings && (html->DocType == DT_XHTML || (html->DocType == DT_HTML && html->DocTypeVersion <= 4.01f))) { char c = *s; *s = '\0'; BUG_MSG("Character reference '&#%s' lacks ';'.", tok); *s = c; } /* Don't require ';' for old HTML, except that our current heuristic * is to require it in attributes to avoid cases like "©=1" found * in URLs. */ if (is_attr || html->DocType == DT_XHTML || (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)) { return NULL; } } if ((codepoint < 0x20 && codepoint != '\t' && codepoint != '\n' && codepoint != '\f') || (codepoint >= 0x7f && codepoint <= 0x9f) || (codepoint >= 0xd800 && codepoint <= 0xdfff) || codepoint > 0x10ffff || ((codepoint & 0xfffe) == 0xfffe) || (!(html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f) && codepoint > 0xffff)) { /* this catches null bytes, errors, codes out of range, disallowed * control chars, permanently undefined chars, and surrogates. */ char c = *s; *s = '\0'; BUG_MSG("Numeric character reference '&#%s' is not valid.", tok); *s = c; codepoint = (codepoint >= 145 && codepoint <= 151) ? Html_ms_stupid_quotes_2ucs(codepoint) : -1; } if (codepoint != -1) { if (codepoint >= 128) { n = a_Utf8_encode(codepoint, buf); } else { n = 1; buf[0] = (char) codepoint; } assert(n < 5); buf[n] = '\0'; *entsize = s-tok+2; return buf; } else { return NULL; } } /* * Comparison function for binary search */ static int Html_charref_comp(const void *a, const void *b) { return strcmp(((Charref_t *)a)->ref, ((Charref_t *)b)->ref); } /* * Binary search of 'key' in charref list */ static Charref_t *Html_charref_search(char *key) { Charref_t RefKey; RefKey.ref = key; return (Charref_t*) bsearch(&RefKey, Charrefs, NumRef, sizeof(Charref_t), Html_charref_comp); } /* * Parse a named character reference (e.g., "&" or "…"). * The "&" has already been consumed. */ static const char *Html_parse_named_charref(DilloHtml *html, char *tok, bool_t is_attr, int *entsize) { Charref_t *p; char c; char *s = tok; const char *ret = NULL; while (*++s && (isalnum(*s) || strchr(":_.-", *s))) ; c = *s; *s = '\0'; if (c != ';') { if (prefs.show_extra_warnings && (html->DocType == DT_XHTML || (html->DocType == DT_HTML && html->DocTypeVersion <= 4.01f))) BUG_MSG("Character reference '&%s' lacks ';'.", tok); /* Don't require ';' for old HTML, except that our current heuristic * is to require it in attributes to avoid cases like "©=1" found * in URLs. */ if (is_attr || html->DocType == DT_XHTML || (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)) { return ret; } } if ((p = Html_charref_search(tok))) { ret = (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f) ? p->html5_str : p->html4_str; } if (!ret && html->DocType == DT_XHTML && !strcmp(tok, "apos")) ret = "'"; *s = c; if (c == ';') s++; if (!ret) { c = *s; *s = '\0'; BUG_MSG("Undefined character reference '&%s'.", tok); *s = c; } *entsize = s-tok+1; return ret; } /* * Given an entity, return the corresponding string. * Returns NULL if not a valid entity. * * The first character *token is assumed to be == '&' * * For valid entities, *entsize is set to the length of the parsed entity. */ static const char *Html_parse_entity(DilloHtml *html, const char *token, int toksize, int *entsize, bool_t is_attr) { const char *ret = NULL; char *tok; if (toksize > 50) { /* In pathological cases, attributes can be megabytes long and filled * with character references. As of HTML5, the longest defined character * reference is about 32 bytes long. */ toksize = 50; } token++; tok = dStrndup(token, (uint_t)toksize); if (*tok == '#') { ret = Html_parse_numeric_charref(html, tok+1, is_attr, entsize); } else if (isalpha(*tok)) { ret = Html_parse_named_charref(html, tok, is_attr, entsize); } else if (prefs.show_extra_warnings && (!(html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f))) { // HTML5 doesn't mind literal '&'s. BUG_MSG("Literal '&'."); } dFree(tok); return ret; } /* * Parse all the entities in a token. Takes the token and its length, and * returns a newly allocated string. */ char *a_Html_parse_entities(DilloHtml *html, const char *token, int toksize) { const char *esc_set = "&"; int i, s, entsize; char *str; s = strcspn(token, esc_set); if (s >= toksize) { /* no ampersands */ str = dStrndup(token, toksize); } else { Dstr *ds = dStr_sized_new(toksize); dStr_append_l(ds, token, s); for (i = s; i < toksize; i++) { const char *entstr; const bool_t is_attr = FALSE; if (token[i] == '&' && (entstr = Html_parse_entity(html, token+i, toksize-i, &entsize, is_attr))) { dStr_append(ds, entstr); i += entsize-1; } else { dStr_append_c(ds, token[i]); } } str = ds->str; dStr_free(ds, 0); } return str; } /* * For white-space: pre-line, we must break the line if encountering a newline. * Otherwise, collapse whitespace as usual. */ static void Html_process_space_pre_line(DilloHtml *html, const char *space, int spacesize) { int i, breakCnt = 0; for (i = 0; i < spacesize; i++) { /* Support for "\r", "\n" and "\r\n" line breaks */ if (space[i] == '\r' || (space[i] == '\n' && !html->PrevWasCR)) { breakCnt++; html->PrevWasCR = (space[i] == '\r'); HT2TB(html)->addLinebreak (html->wordStyle ()); } } if (breakCnt == 0) { HT2TB(html)->addSpace(html->wordStyle ()); } } /* * Parse spaces */ static void Html_process_space(DilloHtml *html, const char *space, int spacesize) { char *spc; int i, offset; DilloHtmlParseMode parse_mode = S_TOP(html)->parse_mode; if (S_TOP(html)->display_none) { /* do nothing */ } else if (parse_mode == DILLO_HTML_PARSE_MODE_STASH) { html->StashSpace = (html->Stash->len > 0); } else if (parse_mode == DILLO_HTML_PARSE_MODE_VERBATIM) { dStr_append_l(html->Stash, space, spacesize); } else if (parse_mode == DILLO_HTML_PARSE_MODE_PRE) { int spaceCnt = 0; /* re-scan the string for characters that cause line breaks */ for (i = 0; i < spacesize; i++) { /* Support for "\r", "\n" and "\r\n" line breaks (skips the first) */ if (!html->PreFirstChar && (space[i] == '\r' || (space[i] == '\n' && !html->PrevWasCR))) { if (spaceCnt) { spc = dStrnfill(spaceCnt, ' '); HT2TB(html)->addText (spc, spaceCnt, html->wordStyle ()); dFree(spc); spaceCnt = 0; } HT2TB(html)->addLinebreak (html->wordStyle ()); html->pre_column = 0; } html->PreFirstChar = false; /* cr and lf should not be rendered -- they appear as a break */ switch (space[i]) { case '\r': case '\n': break; case '\t': if (prefs.show_extra_warnings) BUG_MSG("TAB character inside

.");
            offset = TAB_SIZE - html->pre_column % TAB_SIZE;
            spaceCnt += offset;
            html->pre_column += offset;
            break;
         default:
            spaceCnt++;
            html->pre_column++;
            break;
         }

         html->PrevWasCR = (space[i] == '\r');
      }

      if (spaceCnt) {
         // add break possibility for the white-space:pre-wrap case
         HT2TB(html)->addBreakOption (html->wordStyle (), false);
         spc = dStrnfill(spaceCnt, ' ');
         HT2TB(html)->addText (spc, spaceCnt, html->wordStyle ());
         dFree(spc);
      }

   } else {
      if (SGML_SPCDEL) {
         /* SGML_SPCDEL ignores white space immediately after an open tag */
      } else if (html->wordStyle ()->whiteSpace == WHITE_SPACE_PRE_LINE) {
         Html_process_space_pre_line(html, space, spacesize);
      } else {
         HT2TB(html)->addSpace(html->wordStyle ());
      }

      if (parse_mode == DILLO_HTML_PARSE_MODE_STASH_AND_BODY)
         html->StashSpace = (html->Stash->len > 0);
   }
}

/*
 * Handles putting the word into its proper place
 *  > STASH and VERBATIM --> html->Stash
 *  > otherwise it goes through addText()
 *
 * Entities are parsed (or not) according to parse_mode.
 * 'word' is a '\0'-terminated string.
 */
static void Html_process_word(DilloHtml *html, const char *word, int size)
{
   int i, j, start;
   char *Pword;
   DilloHtmlParseMode parse_mode = S_TOP(html)->parse_mode;

   if (S_TOP(html)->display_none)
      return;
   if ((i = html->PrevWasHtmlClose ? 1 : html->PrevWasBodyClose ? 2 : 0)) {
      BUG_MSG("Content after  tag.", i == 1 ? "html" : "body");
      html->PrevWasHtmlClose = html->PrevWasBodyClose = false;
   }

   if (parse_mode == DILLO_HTML_PARSE_MODE_STASH ||
       parse_mode == DILLO_HTML_PARSE_MODE_STASH_AND_BODY) {
      if (html->StashSpace) {
         dStr_append_c(html->Stash, ' ');
         html->StashSpace = false;
      }
      Pword = a_Html_parse_entities(html, word, size);
      dStr_append(html->Stash, Pword);
      dFree(Pword);

   } else if (parse_mode == DILLO_HTML_PARSE_MODE_VERBATIM) {
      /* word goes in untouched, it is not processed here. */
      dStr_append_l(html->Stash, word, size);
   }

   if (parse_mode == DILLO_HTML_PARSE_MODE_STASH ||
       parse_mode == DILLO_HTML_PARSE_MODE_VERBATIM) {
      /* skip until the closing instructions */

   } else if (parse_mode == DILLO_HTML_PARSE_MODE_PRE) {
      /* all this overhead is to catch white-space entities */
      Pword = a_Html_parse_entities(html, word, size);
      for (start = i = 0; Pword[i]; start = i)
         if (isspace(Pword[i])) {
            while (Pword[++i] && isspace(Pword[i])) ;
            Html_process_space(html, Pword + start, i - start);
         } else {
            while (Pword[++i] && !isspace(Pword[i])) ;
            HT2TB(html)->addText(Pword + start, i - start, html->wordStyle ());
            html->pre_column += i - start;
            html->PreFirstChar = false;
         }
      dFree(Pword);

   } else {
      const char *word2, *beyond_word2;

      Pword = NULL;
      if (!memchr(word,'&', size)) {
         /* No entities */
         word2 = word;
         beyond_word2 = word + size;
      } else {
         /* Collapse white-space entities inside the word (except  ) */
         Pword = a_Html_parse_entities(html, word, size);
         /* Collapse adjacent " \t\f\n\r" characters into a single space */
         for (i = j = 0; (Pword[i] = Pword[j]); ++i, ++j) {
            if (strchr(" \t\f\n\r", Pword[i])) {
               if (i == 0 || (i > 0 && Pword[i-1] != ' '))
                  Pword[i] = ' ';
               else
                  for (--i; Pword[j+1] && strchr(" \t\f\n\r", Pword[j+1]); ++j)
                     ;
            }
         }
         word2 = Pword;
         beyond_word2 = word2 + strlen(word2);
      }
      for (start = i = 0; word2[i]; start = i) {
         int len;

         if (isspace(word2[i])) {
            while (word2[++i] && isspace(word2[i])) ;
            Html_process_space(html, word2 + start, i - start);
         } else if (!strncmp(word2+i, utf8_zero_width_space, 3)) {
            i += 3;
            HT2TB(html)->addBreakOption(html->wordStyle (), false);
         } else if (a_Utf8_ideographic(word2+i, beyond_word2, &len)) {
            i += len;
            HT2TB(html)->addText(word2 + start, i - start, html->wordStyle ());
            HT2TB(html)->addBreakOption(html->wordStyle (), false);
         } else {
            do {
               i += len;
            } while (word2[i] && !isspace(word2[i]) &&
                     strncmp(word2+i, utf8_zero_width_space, 3) &&
                     (!a_Utf8_ideographic(word2+i, beyond_word2, &len)));
            HT2TB(html)->addText(word2 + start, i - start, html->wordStyle ());
         }
      }
      if (Pword == word2)
         dFree(Pword);
   }
}

/*
 * Does the tag in tagstr (e.g. "p") match the tag in the tag, tagsize
 * structure, with the initial < skipped over (e.g. "P align=center>")?
 */
static bool Html_match_tag(const char *tagstr, char *tag, int tagsize)
{
   int i;

   for (i = 0; i < tagsize && tagstr[i] != '\0'; i++) {
      if (D_ASCII_TOLOWER(tagstr[i]) != D_ASCII_TOLOWER(tag[i]))
         return false;
   }
   /* The test for '/' is for xml compatibility: "empty/>" will be matched. */
   if (i < tagsize && (isspace(tag[i]) || tag[i] == '>' || tag[i] == '/'))
      return true;
   return false;
}

/*
 * This function is called after popping the stack, to
 * handle nested Textblock widgets.
 */
static void Html_eventually_pop_dw(DilloHtml *html, bool hand_over_break)
{
   if (html->dw != S_TOP(html)->textblock) {
      if (hand_over_break)
         HT2TB(html)->handOverBreak (html->style ());
      HT2TB(html)->flush ();
      html->dw = S_TOP(html)->textblock;
   }
}

/*
 * Push the tag (copying attributes from the top of the stack)
 */
static void Html_push_tag(DilloHtml *html, int tag_idx)
{
   int n_items;

   n_items = html->stack->size ();
   html->stack->increase ();
   /* We'll copy the former stack item and just change the tag and its index
    * instead of copying all fields except for tag.  --Jcid */
   *html->stack->getRef(n_items) = *html->stack->getRef(n_items - 1);
   html->stack->getRef(n_items)->tag_idx = tag_idx;
   html->dw = S_TOP(html)->textblock;
}

/*
 * Push the tag (used to force en element with optional open into the stack)
 * Note: now it's the same as Html_push_tag(), but things may change...
 */
static void Html_force_push_tag(DilloHtml *html, int tag_idx)
{
   html->startElement (tag_idx);
   Html_push_tag(html, tag_idx);
}

/*
 * Pop the top tag in the stack
 */
static void Html_real_pop_tag(DilloHtml *html)
{
   bool hand_over_break;

   html->styleEngine->endElement (S_TOP(html)->tag_idx);
   hand_over_break = S_TOP(html)->hand_over_break;
   html->stack->setSize (html->stack->size() - 1);
   Html_eventually_pop_dw(html, hand_over_break);
}

/*
 * Cleanup the stack to a given index.
 */
static void Html_tag_cleanup_to_idx(DilloHtml *html, int idx)
{
   static int i_BODY = a_Html_tag_index("body");
   int s_sz;
   while ((s_sz = html->stack->size()) > idx) {
      int toptag_idx = S_TOP(html)->tag_idx;
      TagInfo toptag = Tags[toptag_idx];
      if (s_sz > idx + 1 && toptag.EndTag != 'O')
         BUG_MSG("  - forcing close of open tag: <%s>.", toptag.name);
      _MSG("Close: %s sz=%d idx=%d\n", toptag.name, s_sz, idx);
      if (toptag_idx == i_BODY &&
          !((html->InFlags & IN_EOF) || html->ReqTagClose)) {
         (idx == 1 ? html->PrevWasHtmlClose : html->PrevWasBodyClose) = true;
         break; // only pop {BODY,HTML} upon EOF or redundancy
      }
      if (toptag.close)
         toptag.close(html);
      Html_real_pop_tag(html);
   }
}

/*
 * Default close function for tags.
 * (conditional cleanup of the stack)
 * There are several ways of doing it. Considering the HTML 4.01 spec
 * which defines optional close tags, and the will to deliver useful diagnose
 * messages for bad-formed HTML, it'll go as follows:
 *   1.- Search the stack for the first tag that requires a close tag.
 *   2.- If it matches, clean all the optional-close tags in between.
 *   3.- Cleanup the matching tag. (on error, give a warning message)
 *
 * If 'w3c_mode' is NOT enabled:
 *   1.- Search the stack for a matching tag based on tag level.
 *   2.- If it exists, clean all the tags in between.
 *   3.- Cleanup the matching tag. (on error, give a warning message)
 */
static void Html_tag_cleanup_at_close(DilloHtml *html, int new_idx)
{
   static int i_BUTTON = a_Html_tag_index("button"),
              i_SELECT = a_Html_tag_index("select"),
              i_TEXTAREA = a_Html_tag_index("textarea");
   int w3c_mode = !prefs.w3c_plus_heuristics;
   int stack_idx, tag_idx, matched = 0, expected = 0;
   TagInfo new_tag = Tags[new_idx];

   /* Look for the candidate tag to close */
   stack_idx = html->stack->size();
   while (--stack_idx) {
      tag_idx = html->stack->getRef(stack_idx)->tag_idx;
      if (tag_idx == new_idx) {
         /* matching tag found */
         matched = 1;
         break;
      } else if (Tags[tag_idx].EndTag == 'O') {
         /* skip an optional tag */
         continue;
      } else if ((new_idx == i_BUTTON && html->InFlags & IN_BUTTON) ||
                 (new_idx == i_SELECT && html->InFlags & IN_SELECT) ||
                 (new_idx == i_TEXTAREA && html->InFlags & IN_TEXTAREA)) {
         /* let these elements close tags inside them */
         continue;
      } else if (w3c_mode || Tags[tag_idx].TagLevel >= new_tag.TagLevel) {
         /* this is the tag that should have been closed */
         expected = 1;
         break;
      }
   }

   if (matched) {
      Html_tag_cleanup_to_idx(html, stack_idx);
   } else if (expected) {
      BUG_MSG("Unexpected closing tag:  -- expected .",
              new_tag.name, Tags[tag_idx].name);
   } else {
      BUG_MSG("Unexpected closing tag: .", new_tag.name);
   }
}

/*
 * Avoid nesting and inter-nesting of BUTTON, SELECT and TEXTAREA,
 * by closing them before opening another.
 * This is not an HTML SPEC restriction , but it avoids lots of trouble
 * inside dillo (concurrent inputs), and makes almost no sense to have.
 */
static void Html_tag_cleanup_nested_inputs(DilloHtml *html, int new_idx)
{
   static int i_BUTTON = a_Html_tag_index("button"),
              i_SELECT = a_Html_tag_index("select"),
              i_TEXTAREA = a_Html_tag_index("textarea");
   int stack_idx, u_idx, matched = 0;

   dReturn_if_fail(html->InFlags & (IN_BUTTON | IN_SELECT | IN_TEXTAREA));
   dReturn_if_fail(new_idx == i_BUTTON || new_idx == i_SELECT ||
                   new_idx == i_TEXTAREA);

   /* Get the unclosed tag index */
   u_idx = (html->InFlags & IN_BUTTON) ? i_BUTTON :
                 (html->InFlags & IN_SELECT) ? i_SELECT : i_TEXTAREA;

   /* Look for it inside the stack */
   stack_idx = html->stack->size();
   while (--stack_idx) {
      if (html->stack->getRef(stack_idx)->tag_idx == u_idx) {
         /* matching tag found */
         matched = 1;
         break;
      }
   }

   if (matched) {
      BUG_MSG("Attempt to nest <%s> element inside <%s> -- closing <%s>.",
              Tags[new_idx].name, Tags[u_idx].name, Tags[u_idx].name);
      Html_tag_cleanup_to_idx(html, stack_idx);
   } else {
      MSG_WARN("Inconsistent parser state, flag is SET but no '%s' element"
               "was found in the stack\n", Tags[u_idx].name);
   }

   html->InFlags &= ~(IN_BUTTON | IN_SELECT | IN_TEXTAREA);
}


/*
 * Some parsing routines.
 */

/*
 * Used by a_Html_parse_length
 */
static CssLength Html_parse_length_or_multi_length (const char *attr,
                                                    char **endptr)
{
   CssLength l;
   double v;
   char *end;

   v = strtod (attr, &end);
   switch (*end) {
   case '%':
      end++;
      l = CSS_CREATE_LENGTH (v / 100, CSS_LENGTH_TYPE_PERCENTAGE);
      break;

   case '*':
      end++;
      l = CSS_CREATE_LENGTH (v, CSS_LENGTH_TYPE_RELATIVE);
      break;
/*
   The "px" suffix seems not allowed by HTML4.01 SPEC.
   case 'p':
      if (end[1] == 'x')
         end += 2;
*/
   default:
      l = CSS_CREATE_LENGTH (v, CSS_LENGTH_TYPE_PX);
      break;
   }

   if (endptr)
      *endptr = end;
   return l;
}


/*
 * Returns a length or a percentage, or UNDEF_LENGTH in case
 * of an error, or if attr is NULL.
 */
CssLength a_Html_parse_length (DilloHtml *html, const char *attr)
{
   CssLength l;
   char *end;

   l = Html_parse_length_or_multi_length (attr, &end);
   if (CSS_LENGTH_TYPE (l) == CSS_LENGTH_TYPE_RELATIVE)
      /* not allowed as &Length; */
      l = CSS_CREATE_LENGTH(0.0, CSS_LENGTH_TYPE_AUTO);
   else {
      /* allow only whitespaces */
      if (*end && !isspace (*end)) {
         BUG_MSG("Garbage after length: '%s'.", attr);
         l = CSS_CREATE_LENGTH(0.0, CSS_LENGTH_TYPE_AUTO);
      }
   }

   _MSG("a_Html_parse_length: \"%s\" %d\n", attr, CSS_LENGTH_VALUE(l));
   return l;
}

/*
 * Parse a color attribute.
 * Return value: parsed color, or default_color (+ error msg) on error.
 */
int32_t a_Html_color_parse(DilloHtml *html, const char *str,
                           int32_t default_color)
{
   int err = 1;
   int32_t color = a_Color_parse(str, default_color, &err);

   if (err) {
      BUG_MSG("Color '%s' is not in \"#RRGGBB\" format.", str);
   }
   return color;
}

/*
 * Check that 'val' is composed of characters inside [A-Za-z0-9:_.-]
 * Note: ID can't have entities, but this check is enough (no '&').
 * Return value: 1 if OK, 0 otherwise.
 */
static int
 Html_check_name_val(DilloHtml *html, const char *val, const char *attrname)
{
   if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f) {
      bool valid = *val && !strchr(val, ' ');

      if (!valid) {
         BUG_MSG("'%s' value \"%s\" must not be empty and must not contain "
                 "spaces.", attrname, val);
      }
      return valid ? 1 : 0;
   } else {
      int i;

      for (i = 0; val[i]; ++i)
         if (!isascii(val[i]) || !(isalnum(val[i]) || strchr(":_.-", val[i])))
            break;

      if (val[i] || !(isascii(val[0]) && isalpha(val[0])))
         BUG_MSG("%s attribute value \"%s\" is not of the form "
                 "'[A-Za-z][A-Za-z0-9:_.-]*'.", attrname, val);

      return !(val[i]);
   }
}

/*
 * Handle DOCTYPE declaration
 *
 * Follows the convention that HTML 4.01
 * doctypes which include a full w3c DTD url are treated as
 * standards-compliant, but 4.01 without the url and HTML 4.0 and
 * earlier are not. XHTML doctypes are always standards-compliant
 * whether or not an url is present.
 *
 * Note: I'm not sure about this convention. The W3C validator
 * recognizes the "HTML Level" with or without the URL. The convention
 * comes from mozilla (see URLs below), but Dillo doesn't have the same
 * rendering modes, so it may be better to chose another behaviour. --Jcid
 *
 * http://www.mozilla.org/docs/web-developer/quirks/doctypes.html
 * http://lists.dillo.org/pipermail/dillo-dev/2004-October/002300.html
 *
 * This is not a full DOCTYPE parser, just enough for what Dillo uses.
 */
static void Html_parse_doctype(DilloHtml *html, const char *tag, int tagsize)
{
   static const char HTML_SGML_sig [] = "DocType != DT_NONE)
      BUG_MSG("Multiple DOCTYPE declarations.");

   /* The default DT_NONE type is TagSoup */
   if (i > strlen(HTML_SGML_sig) && // avoid out of bounds reads!
       !dStrnAsciiCasecmp(ntag, HTML_SGML_sig, strlen(HTML_SGML_sig))) {
      p = ntag + strlen(HTML_SGML_sig) + 1;
      if (!strncmp(p, HTML401, strlen(HTML401)) &&
          dStriAsciiStr(p + strlen(HTML401), HTML401_url)) {
         html->DocType = DT_HTML;
         html->DocTypeVersion = 4.01f;
      } else if (!strncmp(p, XHTML1, strlen(XHTML1)) &&
                 dStriAsciiStr(p + strlen(XHTML1), XHTML1_url)) {
         html->DocType = DT_XHTML;
         html->DocTypeVersion = 1.0f;
      } else if (!strncmp(p, XHTML11, strlen(XHTML11)) &&
                 dStriAsciiStr(p + strlen(XHTML11), XHTML11_url)) {
         html->DocType = DT_XHTML;
         html->DocTypeVersion = 1.1f;
      } else if (!strncmp(p, HTML40, strlen(HTML40))) {
         html->DocType = DT_HTML;
         html->DocTypeVersion = 4.0f;
      } else if (!strncmp(p, HTML32, strlen(HTML32))) {
         html->DocType = DT_HTML;
         html->DocTypeVersion = 3.2f;
      } else if (!strncmp(p, HTML20, strlen(HTML20))) {
         html->DocType = DT_HTML;
         html->DocTypeVersion = 2.0f;
      }
   } else if (!dStrAsciiCasecmp(ntag, "") ||
              !dStrAsciiCasecmp(ntag, "") ||
              !dStrAsciiCasecmp(ntag,
                           "") ||
              !dStrAsciiCasecmp(ntag,
                             "")) {
      html->DocType = DT_HTML;
      html->DocTypeVersion = 5.0f;
   }
   if (html->DocType == DT_NONE) {
      html->DocType = DT_UNRECOGNIZED;
      BUG_MSG("DOCTYPE not recognized: ('%s').", ntag);
   }
   dFree(ntag);
}

/*
 * Handle open HTML element
 */
static void Html_tag_open_html(DilloHtml *html, const char *tag, int tagsize)
{
   /* The IN_HTML flag will be kept set until at IN_EOF condition.
    * This allows to handle pages with multiple or uneven HTML tags */

   if (!(html->InFlags & IN_HTML))
      html->InFlags |= IN_HTML;
   if (html->Num_HTML < UCHAR_MAX)
      ++html->Num_HTML;

   if (html->Num_HTML > 1) {
      BUG_MSG(" was already open.");
      html->ReqTagClose = true;
   }
}

/*
 * Handle close HTML element
 */
static void Html_tag_close_html(DilloHtml *html)
{
   _MSG("Html_tag_close_html: Num_HTML=%d\n", html->Num_HTML);

  /* As some Tag soup pages use multiple HTML tags, this function
   * gets called only on EOF and upon and extra HTML open.
   * Also, we defer clearing the IN_HTML flag until IN_EOF */
}

/*
 * Handle open HEAD element
 */
static void Html_tag_open_head(DilloHtml *html, const char *tag, int tagsize)
{
   if (html->InFlags & IN_BODY) {
      BUG_MSG(" must go before the BODY section.");
      html->ReqTagClose = true;
      return;
   }

   if (html->Num_HEAD < UCHAR_MAX)
      ++html->Num_HEAD;
   if (html->InFlags & IN_HEAD) {
      BUG_MSG(" was already open.");
      html->ReqTagClose = true;
   } else if (html->Num_HEAD > 1) {
      BUG_MSG(" already finished -- ignoring.");
      html->ReqTagClose = true;
   } else {
      html->InFlags |= IN_HEAD;
   }
}

/*
 * Handle close HEAD element
 * Note: HEAD is parsed once completely got.
 */
static void Html_tag_close_head(DilloHtml *html)
{
   if (html->InFlags & IN_HEAD) {
      if (html->Num_HEAD == 1) {
         /* match for the well formed start of HEAD section */
         if (html->Num_TITLE == 0)
            BUG_MSG(" lacks .");

         html->InFlags &= ~IN_HEAD;

         /* charset is already set, load remote stylesheets now */
         for (int i = 0; i < html->cssUrls->size(); i++) {
            a_Html_load_stylesheet(html, html->cssUrls->get(i));
         }
      } else if (html->Num_HEAD > 1) {
         --html->Num_HEAD;
      }
   } else {
      /* not reached, see Html_tag_cleanup_at_close() */
   }
}

/*
 * Handle open TITLE
 * calls stash init, where the title string will be stored
 */
static void Html_tag_open_title(DilloHtml *html, const char *tag, int tagsize)
{
   /* fill the stash buffer so TITLE content can be ignored
    * when not valid, redundant or outside HEAD section */
   a_Html_stash_init(html);

   if (html->InFlags & IN_HEAD) {
      if (html->Num_TITLE < UCHAR_MAX)
         ++html->Num_TITLE;
      if (html->Num_TITLE > 1)
         BUG_MSG("Redundant <title>.");
   } else {
      BUG_MSG("<title> must be inside <head> -- ignoring.");
   }
}

/*
 * Handle close TITLE
 * set page-title in the browser window and in the history.
 */
static void Html_tag_close_title(DilloHtml *html)
{
   if (html->InFlags & IN_HEAD && html->Num_TITLE == 1) {
      /* title is only valid inside HEAD */
      a_UIcmd_set_page_title(html->bw, html->Stash->str);
      a_History_set_title_by_url(html->page_url, html->Stash->str);
   }
}

/*
 * Handle open SCRIPT
 * initializes stash, where the embedded code will be stored.
 * MODE_VERBATIM is used because MODE_STASH catches entities.
 */
static void Html_tag_open_script(DilloHtml *html, const char *tag, int tagsize)
{
   a_Html_stash_init(html);
   S_TOP(html)->parse_mode = DILLO_HTML_PARSE_MODE_VERBATIM;
}

/*
 * Handle close SCRIPT
 */
static void Html_tag_close_script(DilloHtml *html)
{
   /* eventually the stash will be sent to an interpreter for parsing */
}

/*
 * Handle open STYLE
 * Store contents in the stash where the style sheet interpreter can get it.
 */
static void Html_tag_open_style(DilloHtml *html, const char *tag, int tagsize)
{
   const char *attrbuf;

   html->loadCssFromStash = true;

   if (!(attrbuf = a_Html_get_attr(html, tag, tagsize, "type"))) {
      if (html->DocType != DT_HTML || html->DocTypeVersion <= 4.01f)
         BUG_MSG("<style> requires type attribute.");
   } else if (dStrAsciiCasecmp(attrbuf, "text/css")) {
      html->loadCssFromStash = false;
   }
   if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "media")) &&
       dStrAsciiCasecmp(attrbuf, "all") && !dStriAsciiStr(attrbuf, "screen")) {
      /* HTML 4.01 sec. 6.13 says that media descriptors are case-sensitive,
       * but sec. 14.2.3 says that the attribute is case-insensitive.
       * TODO can be a comma-separated list.
       * TODO handheld.
       */
      html->loadCssFromStash = false;
   }

   a_Html_stash_init(html);
   S_TOP(html)->parse_mode = DILLO_HTML_PARSE_MODE_VERBATIM;
}

/*
 * Handle close STYLE
 */
static void Html_tag_close_style(DilloHtml *html)
{
   if (prefs.parse_embedded_css && html->loadCssFromStash)
      html->styleEngine->parse(html, html->base_url, html->Stash->str,
                               html->Stash->len, CSS_ORIGIN_AUTHOR);
}

/*
 * <BODY>
 */
static void Html_tag_open_body(DilloHtml *html, const char *tag, int tagsize)
{
   const char *attrbuf;
   int32_t color;
   int tag_index_a = a_Html_tag_index ("a");
   style::Color *bgColor;
   style::StyleImage *bgImage;
   style::BackgroundRepeat bgRepeat;
   style::BackgroundAttachment bgAttachment;
   style::Length bgPositionX, bgPositionY;

   _MSG("Html_tag_open_body Num_BODY=%d\n", html->Num_BODY);
   if (!(html->InFlags & IN_BODY))
      html->InFlags |= IN_BODY;
   if (html->Num_BODY < UCHAR_MAX)
      ++html->Num_BODY;

   if (html->Num_BODY > 1) {
      BUG_MSG("<body> was already open.");
      html->ReqTagClose = true;
      return;
   }

   if (html->InFlags & IN_HEAD) {
      /* if we're here, it's bad XHTML, no need to recover */
      BUG_MSG("Unclosed <head>.");
   }

   if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "bgcolor"))) {
      color = a_Html_color_parse(html, attrbuf, -1);

      if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
         BUG_MSG("<body> bgcolor attribute is obsolete.");

      if (color != -1)
         html->styleEngine->setNonCssHint (CSS_PROPERTY_BACKGROUND_COLOR,
                                           CSS_TYPE_COLOR, color);
   }

   if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "text"))) {
      color = a_Html_color_parse(html, attrbuf, -1);

      if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
         BUG_MSG("<body> text attribute is obsolete.");

      if (color != -1)
         html->styleEngine->setNonCssHint (CSS_PROPERTY_COLOR,
                                           CSS_TYPE_COLOR, color);
   }

   html->restyle ();

   if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "link"))) {
      html->non_css_link_color = a_Html_color_parse(html, attrbuf, -1);
      if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
         BUG_MSG("<body> link attribute is obsolete.");
   }

   if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "vlink"))) {
      html->non_css_visited_color = a_Html_color_parse(html, attrbuf, -1);
      if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
         BUG_MSG("<body> vlink attribute is obsolete.");
   }

   html->dw->setStyle (html->style ());

   bgColor = html->styleEngine->backgroundColor ();
   if (bgColor)
      HT2LT(html)->setBgColor(bgColor);

   bgImage = html->styleEngine->backgroundImage (&bgRepeat, &bgAttachment,
                                                 &bgPositionX, &bgPositionY);
   if (bgImage)
      HT2LT(html)->setBgImage(bgImage, bgRepeat, bgAttachment, bgPositionX,
                              bgPositionY);

   /* Determine a color for visited links.
    * This color is computed once per page and used for immediate feedback
    * when clicking a link.
    * On reload style including color for visited links is computed properly
    * according to CSS.
    */
   html->startElement (tag_index_a);
   html->styleEngine->setPseudoVisited ();
   if (html->non_css_visited_color != -1) {
      html->styleEngine->setNonCssHint (CSS_PROPERTY_COLOR, CSS_TYPE_COLOR,
                                        html->non_css_visited_color);
   }
   html->visited_color = html->style ()->color->getColor ();
   html->styleEngine->endElement (tag_index_a);

   if (prefs.contrast_visited_color) {
      /* get a color that has a "safe distance" from text, link and bg */
      html->visited_color =
         a_Color_vc(html->visited_color,
            html->style ()->color->getColor(),
            html->non_css_link_color,
            html->backgroundStyle()->backgroundColor->getColor());
   }


   S_TOP(html)->parse_mode = DILLO_HTML_PARSE_MODE_BODY;
}

/*
 * BODY
 */
static void Html_tag_close_body(DilloHtml *html)
{
   _MSG("Html_tag_close_body: Num_BODY=%d\n", html->Num_BODY);

  /* As some Tag soup pages use multiple BODY tags, this function
   * gets called only on EOF and upon and extra BODY open.
   * Also, we defer clearing the IN_BODY flag until IN_EOF */
}

/*
 * <P>
 * TODO: what's the point between adding the parbreak before and
 *       after the push?
 */
static void Html_tag_open_p(DilloHtml *html, const char *tag, int tagsize)
{
   CssPropertyList props;

   a_Html_tag_set_align_attr (html, tag, tagsize);
}

/*
 * <FRAME>, <IFRAME>
 * TODO: This is just a temporary fix while real frame support
 *       isn't finished. Imitates lynx/w3m's frames.
 */
static void Html_tag_open_frame (DilloHtml *html, const char *tag, int tagsize)
{
   const char *attrbuf;
   DilloUrl *url;
   CssPropertyList props;

   if (!(attrbuf = a_Html_get_attr(html, tag, tagsize, "src")))
      return;

   if (!(url = a_Html_url_new(html, attrbuf, NULL, 0)))
      return;

   if (a_Capi_get_flags_with_redirection(url) & CAPI_IsCached) {
      /* visited frame */
      html->styleEngine->setPseudoVisited ();
   } else {
      /* unvisited frame */
      html->styleEngine->setPseudoLink ();
   }

   html->styleEngine->setNonCssHint (PROPERTY_X_LINK, CSS_TYPE_INTEGER,
                                     Html_set_new_link(html,&url));
}

static void
 Html_tag_content_frame (DilloHtml *html, const char *tag, int tagsize)
{
   const char *attrbuf;
   char *src;
   Textblock *textblock;
   Widget *bullet;

   textblock = HT2TB(html);

   if (!(attrbuf = a_Html_get_attr(html, tag, tagsize, "src")))
      return;

   src = dStrdup(attrbuf);

   textblock->addParbreak (5, html->wordStyle ());

   bullet = new Bullet();
   textblock->addWidget(bullet, html->wordStyle ());
   textblock->addSpace(html->wordStyle ());

   if (D_ASCII_TOLOWER(tag[1]) == 'i') {
      /* IFRAME usually comes with very long advertising/spying URLS,
       * to not break rendering we will force name="IFRAME" */
      textblock->addText ("IFRAME", html->wordStyle ());

   } else {
      /* FRAME:
       * If 'name' tag is present use it, if not use 'src' value */
      if (!(attrbuf = a_Html_get_attr(html, tag, tagsize, "name"))) {
         textblock->addText (src, html->wordStyle ());
      } else {
         textblock->addText (attrbuf, html->wordStyle ());
      }
   }

   textblock->addParbreak (5, html->wordStyle ());

   dFree(src);
}

/*
 * <FRAMESET>
 * TODO: This is just a temporary fix while real frame support
 *       isn't finished. Imitates lynx/w3m's frames.
 */
static void Html_tag_content_frameset (DilloHtml *html,
                                    const char *tag, int tagsize)
{
   HT2TB(html)->addParbreak (9, html->wordStyle ());
   HT2TB(html)->addText("--FRAME--", html->wordStyle ());
   Html_add_textblock(html, true, 5, false);
}

/*
 * <H1> | <H2> | <H3> | <H4> | <H5> | <H6>
 */
static void Html_tag_open_h(DilloHtml *html, const char *tag, int tagsize)
{
   a_Html_tag_set_align_attr (html, tag, tagsize);

   a_Html_stash_init(html);
   S_TOP(html)->parse_mode =
      DILLO_HTML_PARSE_MODE_STASH_AND_BODY;
}

/*
 * <BR>
 */
static void Html_tag_content_br(DilloHtml *html, const char *tag, int tagsize)
{
   HT2TB(html)->addLinebreak (html->wordStyle ());
}

/*
 * <FONT>
 */
static void Html_tag_open_font(DilloHtml *html, const char *tag, int tagsize)
{
   const char *attrbuf;
   char *fontFamily = NULL;
   int32_t color;

   if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "color"))) {
      if (prefs.contrast_visited_color && html->InVisitedLink) {
         color = html->visited_color;
      } else {
         /* use the tag-specified color */
         color = a_Html_color_parse(html, attrbuf, -1);
      }
      if (color != -1)
         html->styleEngine->setNonCssHint (CSS_PROPERTY_COLOR,
                                           CSS_TYPE_COLOR, color);
   }

   if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "face"))) {
      fontFamily = dStrdup(attrbuf);
       html->styleEngine->setNonCssHint (CSS_PROPERTY_FONT_FAMILY,
                                         CSS_TYPE_SYMBOL, fontFamily);
   }

   dFree(fontFamily);
}

/*
 * <ABBR>
 */
static void Html_tag_open_abbr(DilloHtml *html, const char *tag, int tagsize)
{
   const char *attrbuf;

   html->styleEngine->inheritBackgroundColor ();

   if (prefs.show_tooltip &&
       (attrbuf = a_Html_get_attr(html, tag, tagsize, "title"))) {

      html->styleEngine->setNonCssHint (PROPERTY_X_TOOLTIP, CSS_TYPE_STRING,
                                        attrbuf);
   }
}

/*
 * Read image-associated tag attributes and create new image.
 */
void a_Html_common_image_attrs(DilloHtml *html, const char *tag, int tagsize)
{
   char *width_ptr, *height_ptr;
   const char *attrbuf;
   CssLength l_w = CSS_CREATE_LENGTH(0.0, CSS_LENGTH_TYPE_AUTO);
   CssLength l_h = CSS_CREATE_LENGTH(0.0, CSS_LENGTH_TYPE_AUTO);
   int w = 0, h = 0;

   if (prefs.show_tooltip &&
       (attrbuf = a_Html_get_attr(html, tag, tagsize, "title"))) {
      html->styleEngine->setNonCssHint(PROPERTY_X_TOOLTIP, CSS_TYPE_STRING,
                                       attrbuf);
   }
   width_ptr = a_Html_get_attr_wdef(html, tag, tagsize, "width", NULL);
   height_ptr = a_Html_get_attr_wdef(html, tag, tagsize, "height", NULL);
   // Check for malicious values
   // TODO: the same for percentage and relative lengths.
   if (width_ptr) {
      l_w = a_Html_parse_length (html, width_ptr);
      w = (int) (CSS_LENGTH_TYPE(l_w) == CSS_LENGTH_TYPE_PX ?
                 CSS_LENGTH_VALUE(l_w) : 0);
   }
   if (height_ptr) {
      l_h = a_Html_parse_length (html, height_ptr);
      h = (int) (CSS_LENGTH_TYPE(l_h) == CSS_LENGTH_TYPE_PX ?
                 CSS_LENGTH_VALUE(l_h) : 0);
   }
   /* Check for suspicious image size request that would cause
    * an excessive amount of memory to be allocated for the
    * image buffer.
    * Be careful to avoid integer overflows during the checks.
    * There is an additional check in dw/image.cc to catch cases
    * where only one dimension is given and the image is scaled
    * preserving its original aspect ratio.
    * Size requests passed via CSS are also checked there.
    */
   if (w < 0 || h < 0 ||
       w > IMAGE_MAX_AREA || h > IMAGE_MAX_AREA ||
       (h > 0 && w > IMAGE_MAX_AREA / h)) {
      dFree(width_ptr);
      dFree(height_ptr);
      width_ptr = height_ptr = NULL;
      MSG("a_Html_common_image_attrs: suspicious image size request %d x %d\n",
          w, h);
   } else {
      if (CSS_LENGTH_TYPE(l_w) != CSS_LENGTH_TYPE_AUTO)
         html->styleEngine->setNonCssHint (CSS_PROPERTY_WIDTH,
                                           CSS_TYPE_LENGTH_PERCENTAGE, l_w);
      if (CSS_LENGTH_TYPE(l_h) != CSS_LENGTH_TYPE_AUTO)
         html->styleEngine->setNonCssHint (CSS_PROPERTY_HEIGHT,
                                           CSS_TYPE_LENGTH_PERCENTAGE, l_h);
   }

   /* TODO: we should scale the image respecting its ratio.
    *       As the image size is not known at this time, maybe a flag
    *       can be set to scale it later.
   if ((width_ptr && !height_ptr) || (height_ptr && !width_ptr))
      [...]
   */

   /* x_img is an index to a list of {url,image} pairs.
    * We know a_Html_image_new() will use size() as its next index */
   html->styleEngine->setNonCssHint (PROPERTY_X_IMG, CSS_TYPE_INTEGER,
                                     html->images->size());


   dFree(width_ptr);
   dFree(height_ptr);
}

DilloImage *a_Html_image_new(DilloHtml *html, const char *tag, int tagsize)
{
   bool load_now;
   char *alt_ptr;
   const char *attrbuf;
   DilloUrl *url;
   DilloImage *image;

   if (!(attrbuf = a_Html_get_attr(html, tag, tagsize, "src")) ||
       !(url = a_Html_url_new(html, attrbuf, NULL, 0)))
      return NULL;

   alt_ptr = a_Html_get_attr_wdef(html, tag, tagsize, "alt", NULL);
   if (!alt_ptr || !*alt_ptr) {
      dFree(alt_ptr);
      alt_ptr = dStrdup("[IMG]");
   }

   dw::Image *dw = new dw::Image(alt_ptr);
   image =
      a_Image_new(html->dw->getLayout(), (void*)(dw::core::ImgRenderer*)dw, 0);
   
   a_Image_ref(image);

   if (HT2TB(html)->getBgColor())
      image->bg_color = HT2TB(html)->getBgColor()->getColor();

   DilloHtmlImage *hi = dNew(DilloHtmlImage, 1);
   hi->url = url;
   html->images->increase();
   html->images->set(html->images->size() - 1, hi);

   load_now = prefs.load_images ||
              !dStrAsciiCasecmp(URL_SCHEME(url), "data") ||
              (a_Capi_get_flags_with_redirection(url) & CAPI_IsCached);

   if (load_now && Html_load_image(html->bw, url, html->page_url, image)) {
      // hi->image is NULL if dillo tries to load the image immediately
      hi->image = NULL;
      a_Image_unref(image);
   } else {
      // otherwise a reference is kept in html->images
      hi->image = image;
   }

   dFree(alt_ptr);
   return image;
}

/*
 * Tell cache to retrieve image
 */
static bool Html_load_image(BrowserWindow *bw, DilloUrl *url,
                            const DilloUrl *requester, DilloImage *Image)
{
   DilloWeb *Web;
   int ClientKey;
   /* Fill a Web structure for the cache query */
   Web = a_Web_new(bw, url, requester);
   Web->Image = Image;
   a_Image_ref(Image);
   Web->flags |= WEB_Image;
   /* Request image data from the cache */
   if ((ClientKey = a_Capi_open_url(Web, NULL, NULL)) != 0) {
      a_Bw_add_client(bw, ClientKey, 0);
      a_Bw_add_url(bw, url);
   }
   return ClientKey != 0;
}

static void Html_tag_open_img(DilloHtml *html, const char *tag, int tagsize)
{
   int space, border;
   const char *attrbuf;

   a_Html_common_image_attrs(html, tag, tagsize);

   /* Spacing to the left and right */
   if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "hspace"))) {
      space = strtol(attrbuf, NULL, 10);
      if (space > 0) {
         space = CSS_CREATE_LENGTH(space, CSS_LENGTH_TYPE_PX);
         html->styleEngine->setNonCssHint (CSS_PROPERTY_MARGIN_LEFT,
                                           CSS_TYPE_LENGTH_PERCENTAGE, space);
         html->styleEngine->setNonCssHint (CSS_PROPERTY_MARGIN_RIGHT,
                                           CSS_TYPE_LENGTH_PERCENTAGE, space);
      }
   }

   /* Spacing at the top and bottom */
   if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "vspace"))) {
      space = strtol(attrbuf, NULL, 10);
      if (space > 0) {
         space = CSS_CREATE_LENGTH(space, CSS_LENGTH_TYPE_PX);
         html->styleEngine->setNonCssHint (CSS_PROPERTY_MARGIN_TOP,
                                           CSS_TYPE_LENGTH_PERCENTAGE, space);
         html->styleEngine->setNonCssHint (CSS_PROPERTY_MARGIN_BOTTOM,
                                           CSS_TYPE_LENGTH_PERCENTAGE, space);
      }
   }

   /* Border */
   if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "border"))) {
      border = strtol(attrbuf, NULL, 10);
      if (border >= 0) {
         border = CSS_CREATE_LENGTH(border, CSS_LENGTH_TYPE_PX);
         html->styleEngine->setNonCssHint (CSS_PROPERTY_BORDER_TOP_WIDTH,
                                           CSS_TYPE_LENGTH_PERCENTAGE, border);
         html->styleEngine->setNonCssHint (CSS_PROPERTY_BORDER_BOTTOM_WIDTH,
                                           CSS_TYPE_LENGTH_PERCENTAGE, border);
         html->styleEngine->setNonCssHint (CSS_PROPERTY_BORDER_LEFT_WIDTH,
                                           CSS_TYPE_LENGTH_PERCENTAGE, border);
         html->styleEngine->setNonCssHint (CSS_PROPERTY_BORDER_RIGHT_WIDTH,
                                           CSS_TYPE_LENGTH_PERCENTAGE, border);

         html->styleEngine->setNonCssHint (CSS_PROPERTY_BORDER_TOP_STYLE,
                                           CSS_TYPE_ENUM, BORDER_SOLID);
         html->styleEngine->setNonCssHint (CSS_PROPERTY_BORDER_BOTTOM_STYLE,
                                           CSS_TYPE_ENUM, BORDER_SOLID);
         html->styleEngine->setNonCssHint (CSS_PROPERTY_BORDER_LEFT_STYLE,
                                           CSS_TYPE_ENUM, BORDER_SOLID);
         html->styleEngine->setNonCssHint (CSS_PROPERTY_BORDER_RIGHT_STYLE,
                                           CSS_TYPE_ENUM, BORDER_SOLID);
      }
   }

}

/*
 * Create a new Image struct and request the image-url to the cache
 * (If it either hits or misses, is not relevant here; that's up to the
 *  cache functions)
 */
static void Html_tag_content_img(DilloHtml *html, const char *tag, int tagsize)
{
   DilloImage *Image;
   DilloUrl *usemap_url;
   const char *attrbuf;

   /* This avoids loading images. Useful for viewing suspicious HTML email. */
   if (URL_FLAGS(html->base_url) & URL_SpamSafe)
      return;

   Image = a_Html_image_new(html, tag, tagsize);
   if (!Image)
      return;

   usemap_url = NULL;
   if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "usemap")))
      /* TODO: usemap URLs outside of the document are not used. */
      usemap_url = a_Html_url_new(html, attrbuf, NULL, 0);

   // At this point, we know that Image->ir represents an image
   // widget. Notice that the order of the casts matters, because of
   // multiple inheritance.
   dw::Image *dwi = (dw::Image*)(dw::core::ImgRenderer*)Image->img_rndr;
   HT2TB(html)->addWidget(dwi, html->style());
   HT2TB(html)->addBreakOption (html->style (), false);

   /* Image maps */
   if (a_Html_get_attr(html, tag, tagsize, "ismap")) {
      dwi->setIsMap();
      _MSG("  Html_tag_open_img: server-side map (ISMAP)\n");
   } else if (html->style ()->x_link != -1 &&
              usemap_url == NULL) {
      /* For simple links, we have to suppress the "image_pressed" signal.
       * This is overridden for USEMAP images. */
//    a_Dw_widget_set_button_sensitive (IM2DW(Image->dw), FALSE);
   }

   if (usemap_url) {
      dwi->setUseMap(&html->maps, new ::object::String(URL_STR(usemap_url)));
      a_Url_free (usemap_url);
   }
}

/*
 * <map>
 */
static void Html_tag_content_map(DilloHtml *html, const char *tag, int tagsize)
{
   char *hash_name;
   const char *attrbuf;
   DilloUrl *url;

   if (html->InFlags & IN_MAP) {
      BUG_MSG("Nested <map>.");
   } else {
      if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "name"))) {
         html->InFlags |= IN_MAP;
         hash_name = dStrconcat("#", attrbuf, NULL);
         url = a_Html_url_new(html, hash_name, NULL, 0);
         html->maps.startNewMap(new ::object::String(URL_STR(url)));
         a_Url_free (url);
         dFree(hash_name);
      } else {
         BUG_MSG("<map> requires name attribute.");
      }
   }
}

/*
 * Handle close <MAP>
 */
static void Html_tag_close_map(DilloHtml *html)
{
   /* This is a hack for the perhaps frivolous feature of drawing image map
    * shapes when there is no image to display. If this map is defined after
    * an image that has not been loaded (img != NULL), tell the image to
    * redraw. (It will only do so if it uses a map.)
    */
   for (int i = 0; i < html->images->size(); i++) {
      DilloImage *img = html->images->get(i)->image;

      if (img) {
         // At this point, we know that img->ir represents an image
         // widget. (Really? Is this assumtion safe?) Notice that the
         // order of the casts matters, because of multiple
         // inheritance.
         dw::Image *dwi = (dw::Image*)(dw::core::ImgRenderer*)img->img_rndr;
         dwi->forceMapRedraw();
      }
   }
   html->InFlags &= ~IN_MAP;
}

/*
 * Read coords in a string, returning a vector of ints.
 */
static
misc::SimpleVector<int> *Html_read_coords(DilloHtml *html, const char *str)
{
   int coord;
   const char *tail = str;
   char *newtail = NULL;
   misc::SimpleVector<int> *coords = new misc::SimpleVector<int> (4);

   while (1) {
      coord = strtol(tail, &newtail, 10);
      if (coord == 0 && newtail == tail)
         break;
      coords->increase();
      coords->set(coords->size() - 1, coord);
      while (isspace(*newtail))
         newtail++;
      if (!*newtail)
         break;
      if (*newtail != ',') {
         BUG_MSG("<area> coords must be integers separated by commas.");
      }
      tail = newtail + 1;
   }

   return coords;
}

/*
 * <AREA>
 */
static void
 Html_tag_content_area(DilloHtml *html, const char *tag, int tagsize)
{
   enum types {UNKNOWN, RECTANGLE, CIRCLE, POLYGON, BACKGROUND};
   types type;
   misc::SimpleVector<int> *coords = NULL;
   DilloUrl* url;
   const char *attrbuf;
   int link = -1;
   Shape *shape = NULL;

   if (!(html->InFlags & IN_MAP)) {
      BUG_MSG("<area> not inside <map>.");
      return;
   }
   attrbuf = a_Html_get_attr(html, tag, tagsize, "shape");

   if (!attrbuf || !*attrbuf || !dStrAsciiCasecmp(attrbuf, "rect")) {
      /* the default shape is a rectangle */
      type = RECTANGLE;
   } else if (dStrAsciiCasecmp(attrbuf, "default") == 0) {
      /* "default" is the background */
      type = BACKGROUND;
   } else if (dStrAsciiCasecmp(attrbuf, "circle") == 0) {
      type = CIRCLE;
   } else if (dStrnAsciiCasecmp(attrbuf, "poly", 4) == 0) {
      type = POLYGON;
   } else {
      BUG_MSG("<area> unknown shape: '%s'.", attrbuf);
      type = UNKNOWN;
   }
   if (type == RECTANGLE || type == CIRCLE || type == POLYGON) {
      if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "coords"))) {
         coords = Html_read_coords(html, attrbuf);

         if (type == RECTANGLE) {
            if (coords->size() != 4)
               BUG_MSG("<area> rectangle must have four coordinate values.");
            if (coords->size() >= 4)
               shape = new Rectangle(coords->get(0),
                                     coords->get(1),
                                     coords->get(2) - coords->get(0),
                                     coords->get(3) - coords->get(1));
         } else if (type == CIRCLE) {
            if (coords->size() != 3)
               BUG_MSG("<area> circle must have three coordinate values.");
            if (coords->size() >= 3)
               shape = new Circle(coords->get(0), coords->get(1),
                                  coords->get(2));
         } else if (type == POLYGON) {
            Polygon *poly;
            int i;
            if (coords->size() % 2)
               BUG_MSG("<area> polygon with odd number of coordinates.");
            shape = poly = new Polygon();
            for (i = 0; i < (coords->size() / 2); i++)
               poly->addPoint(coords->get(2*i), coords->get(2*i + 1));
         }
         delete(coords);
      }
   }
   if (shape != NULL || type == BACKGROUND) {
      if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "href"))) {
         url = a_Html_url_new(html, attrbuf, NULL, 0);
         dReturn_if_fail ( url != NULL );

         link = Html_set_new_link(html, &url);
      }
      if (type == BACKGROUND)
         html->maps.setCurrentMapDefaultLink(link);
      else
         html->maps.addShapeToCurrentMap(shape, link);
   }
}

/*
 * <OBJECT>
 * Simply provide a link if the object is something downloadable.
 */
static void Html_tag_open_object(DilloHtml *html, const char *tag, int tagsize)
{
   DilloUrl *url, *base_url = NULL;
   const char *attrbuf;

   if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "codebase"))) {
      base_url = a_Html_url_new(html, attrbuf, NULL, 0);
   }

   if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "data"))) {
      url = a_Html_url_new(html, attrbuf,
                           URL_STR(base_url), (base_url != NULL));
      dReturn_if_fail ( url != NULL );

      if (a_Capi_get_flags_with_redirection(url) & CAPI_IsCached) {
         html->styleEngine->setPseudoVisited ();
      } else {
         html->styleEngine->setPseudoLink ();
      }

      html->styleEngine->setNonCssHint(PROPERTY_X_LINK, CSS_TYPE_INTEGER,
                                       Html_set_new_link(html, &url));
   }
   a_Url_free(base_url);
}

static void Html_tag_content_object(DilloHtml *html, const char *tag,
                                    int tagsize)
{
   if (a_Html_get_attr(html, tag, tagsize, "data"))
      HT2TB(html)->addText("[OBJECT]", html->wordStyle ());
}

/*
 * <VIDEO>
 * Provide a link to the video.
 */
static void Html_tag_open_video(DilloHtml *html, const char *tag, int tagsize)
{
   DilloUrl *url;
   const char *attrbuf;

   if (html->InFlags & IN_MEDIA) {
      MSG("<video> not handled when already inside a media element.\n");
      return;
   }
   /* TODO: poster attr */

   if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "src"))) {
      url = a_Html_url_new(html, attrbuf, NULL, 0);
      dReturn_if_fail ( url != NULL );

      if (a_Capi_get_flags_with_redirection(url) & CAPI_IsCached) {
         html->styleEngine->setPseudoVisited ();
      } else {
         html->styleEngine->setPseudoLink ();
      }

      html->styleEngine->setNonCssHint(PROPERTY_X_LINK, CSS_TYPE_INTEGER,
                                       Html_set_new_link(html, &url));

      HT2TB(html)->addText("[VIDEO]", html->wordStyle ());
   }
   html->InFlags |= IN_MEDIA;
}

/*
 * <AUDIO>
 * Provide a link to the audio.
 */
static void Html_tag_open_audio(DilloHtml *html, const char *tag, int tagsize)
{
   DilloUrl *url;
   const char *attrbuf;

   if (html->InFlags & IN_MEDIA) {
      MSG("<audio> not handled when already inside a media element.\n");
      return;
   }

   if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "src"))) {
      url = a_Html_url_new(html, attrbuf, NULL, 0);
      dReturn_if_fail ( url != NULL );

      if (a_Capi_get_flags_with_redirection(url) & CAPI_IsCached) {
         html->styleEngine->setPseudoVisited ();
      } else {
         html->styleEngine->setPseudoLink ();
      }

      html->styleEngine->setNonCssHint(PROPERTY_X_LINK, CSS_TYPE_INTEGER,
                                       Html_set_new_link(html, &url));

      HT2TB(html)->addText("[AUDIO]", html->wordStyle ());
   }
   html->InFlags |= IN_MEDIA;
}

/*
 * <SOURCE>
 * Media resource; provide a link to its address.
 */
static void Html_tag_open_source(DilloHtml *html, const char *tag,
                                    int tagsize)
{
   const char *attrbuf;

   if (!(html->InFlags & IN_MEDIA)) {
      BUG_MSG("<source> not inside a media element.");
      return;
   }
   if (!(attrbuf = a_Html_get_attr(html, tag, tagsize, "src"))) {
      BUG_MSG("<source> requires src attribute.");
      return;
   } else {
      DilloUrl *url = a_Html_url_new(html, attrbuf, NULL, 0);

      dReturn_if_fail ( url != NULL );

      if (a_Capi_get_flags_with_redirection(url) & CAPI_IsCached) {
         html->styleEngine->setPseudoVisited ();
      } else {
         html->styleEngine->setPseudoLink ();
      }
      html->styleEngine->setNonCssHint(PROPERTY_X_LINK, CSS_TYPE_INTEGER,
                                       Html_set_new_link(html, &url));
   }
}

static void Html_tag_content_source(DilloHtml *html, const char *tag,
                                    int tagsize)
{
   if ((html->InFlags & IN_MEDIA) && a_Html_get_attr(html, tag, tagsize,"src"))
      HT2TB(html)->addText("[MEDIA SOURCE]", html->wordStyle ());
}

/*
 * Media (AUDIO/VIDEO) close function
 */
static void Html_tag_close_media(DilloHtml *html)
{
   html->InFlags &= ~IN_MEDIA;
}

/*
 * <EMBED>
 * Provide a link to embedded content.
 */
static void Html_tag_open_embed(DilloHtml *html, const char *tag, int tagsize)
{
   const char *attrbuf;

   if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "src"))) {
      DilloUrl *url = a_Html_url_new(html, attrbuf, NULL, 0);

      dReturn_if_fail ( url != NULL );

      if (a_Capi_get_flags_with_redirection(url) & CAPI_IsCached) {
         html->styleEngine->setPseudoVisited ();
      } else {
         html->styleEngine->setPseudoLink ();
      }

      html->styleEngine->setNonCssHint(PROPERTY_X_LINK, CSS_TYPE_INTEGER,
                                       Html_set_new_link(html, &url));
   }
}

static void Html_tag_content_embed(DilloHtml *html,const char *tag,int tagsize)
{
   if (a_Html_get_attr(html, tag, tagsize, "src"))
      HT2TB(html)->addText("[EMBED]", html->wordStyle ());
}

/*
 * Test and extract the link from a javascript instruction.
 */
static const char* Html_get_javascript_link(DilloHtml *html)
{
   size_t i;
   char ch, *p1, *p2;
   Dstr *Buf = html->attr_data;

   if (dStrnAsciiCasecmp("javascript", Buf->str, 10) == 0) {
      i = strcspn(Buf->str, "'\"");
      ch = Buf->str[i];
      if ((ch == '"' || ch == '\'') &&
          (p2 = strchr(Buf->str + i + 1 , ch))) {
         p1 = Buf->str + i;
         BUG_MSG("Link depends on javascript().");
         dStr_truncate(Buf, p2 - Buf->str);
         dStr_erase(Buf, 0, p1 - Buf->str + 1);
      }
   }
   return Buf->str;
}

/*
 * Register an anchor for this page.
 */
static void Html_add_anchor(DilloHtml *html, const char *name)
{
   _MSG("Registering ANCHOR: %s\n", name);
   if (!HT2TB(html)->addAnchor (name, html->style ()))
      BUG_MSG("Anchor names must be unique within the document (\"%s\").",
              name);
   /*
    * According to Sec. 12.2.1 of the HTML 4.01 spec, "anchor names that
    * differ only in case may not appear in the same document", but
    * "comparisons between fragment identifiers and anchor names must be
    * done by exact (case-sensitive) match." We ignore the case issue and
    * always test for exact matches. Moreover, what does uppercase mean
    * for Unicode characters outside the ASCII range?
    */
}

/*
 * <A>
 */
static void Html_tag_open_a(DilloHtml *html, const char *tag, int tagsize)
{
   DilloUrl *url;
   const char *attrbuf;

   /* TODO: add support for MAP with A HREF */
   if (html->InFlags & IN_MAP)
      Html_tag_content_area(html, tag, tagsize);

   if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "href"))) {
      /* if it's a javascript link, extract the reference. */
      if (D_ASCII_TOLOWER(attrbuf[0]) == 'j')
         attrbuf = Html_get_javascript_link(html);

      url = a_Html_url_new(html, attrbuf, NULL, 0);
      dReturn_if_fail ( url != NULL );

      if (a_Capi_get_flags_with_redirection(url) & CAPI_IsCached) {
         html->InVisitedLink = true;
         html->styleEngine->setPseudoVisited ();
         if (html->non_css_visited_color != -1)
            html->styleEngine->setNonCssHint(CSS_PROPERTY_COLOR,
                                             CSS_TYPE_COLOR,
                                             html->non_css_visited_color);
      } else {
         html->styleEngine->setPseudoLink ();
         if (html->non_css_link_color != -1)
            html->styleEngine->setNonCssHint(CSS_PROPERTY_COLOR,
                                             CSS_TYPE_COLOR,
                                             html->non_css_link_color);
      }

      html->styleEngine->setNonCssHint (PROPERTY_X_LINK, CSS_TYPE_INTEGER,
                                        Html_set_new_link(html, &url));
   }
   if (prefs.show_tooltip &&
       (attrbuf = a_Html_get_attr(html, tag, tagsize, "title"))) {
      html->styleEngine->setNonCssHint (PROPERTY_X_TOOLTIP, CSS_TYPE_STRING,
                                        attrbuf);
   }

   html->styleEngine->inheritBackgroundColor ();

   if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "name"))) {
      char *nameVal;
      const char *id = html->styleEngine->getId ();

      if (prefs.show_extra_warnings)
         Html_check_name_val(html, attrbuf, "name");

      nameVal = a_Url_decode_hex_str(attrbuf);

      if (nameVal) {
         /* We compare the "id" value with the url-decoded "name" value */
         if (!id || strcmp(nameVal, id)) {
            if (id)
               BUG_MSG("In <a>, id ('%s') and name ('%s') attributes differ.",
                        id, nameVal);
            Html_add_anchor(html, nameVal);
         }

         dFree(nameVal);
      }
   }
}

/*
 * <A> close function
 */
static void Html_tag_close_a(DilloHtml *html)
{
   html->InVisitedLink = false;
}

/*
 * <BLOCKQUOTE>
 */
static void Html_tag_open_blockquote(DilloHtml *html,
                                     const char *tag, int tagsize)
{
   Html_add_textblock(html, true, 9, false);
}

/*
 * <Q>
 */
static void Html_tag_open_q(DilloHtml *html, const char *tag, int tagsize)
{
   /*
    * Left Double Quotation Mark, which is wrong in many cases, but
    * should at least be widely recognized.
    */
   const char *U201C = "\xe2\x80\x9c";

   html->styleEngine->inheritBackgroundColor ();
   HT2TB(html)->addText (U201C, html->wordStyle ());
}

/*
 * </Q>
 */
static void Html_tag_close_q(DilloHtml *html)
{
   /* Right Double Quotation Mark */
   const char *U201D = "\xe2\x80\x9d";

   HT2TB(html)->addText (U201D, html->wordStyle ());
}

/*
 * Handle the <UL> tag.
 */
static void Html_tag_open_ul(DilloHtml *html, const char *tag, int tagsize)
{
   const char *attrbuf;
   ListStyleType list_style_type;

   if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "type"))) {

      /* list_style_type explicitly defined */
      if (dStrAsciiCasecmp(attrbuf, "disc") == 0)
         list_style_type = LIST_STYLE_TYPE_DISC;
      else if (dStrAsciiCasecmp(attrbuf, "circle") == 0)
         list_style_type = LIST_STYLE_TYPE_CIRCLE;
      else if (dStrAsciiCasecmp(attrbuf, "square") == 0)
         list_style_type = LIST_STYLE_TYPE_SQUARE;
      else
         /* invalid value */
         list_style_type = LIST_STYLE_TYPE_DISC;

      html->styleEngine->setNonCssHint (CSS_PROPERTY_LIST_STYLE_TYPE,
                                        CSS_TYPE_ENUM, list_style_type);
      if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
         BUG_MSG("<ul> type attribute is obsolete.");
   }

   S_TOP(html)->list_type = HTML_LIST_UNORDERED;
   S_TOP(html)->list_number = 0;
   S_TOP(html)->ref_list_item = NULL;
}

/*
 * Handle the <DIR> or <MENU> tag.
 * (Deprecated and almost the same as <UL>)
 */
static void Html_tag_open_dir(DilloHtml *html, const char *tag, int tagsize)
{
   html->styleEngine->inheritBackgroundColor ();
   HT2TB(html)->addParbreak (9, html->wordStyle ());

   S_TOP(html)->list_type = HTML_LIST_UNORDERED;
   S_TOP(html)->list_number = 0;
   S_TOP(html)->ref_list_item = NULL;

   if (prefs.show_extra_warnings)
      BUG_MSG("Obsolete list type; use <ul> instead.");
}

/*
 * Handle the <MENU> tag.
 */
static void Html_tag_open_menu(DilloHtml *html, const char *tag, int tagsize)
{
   /* In another bit of ridiculous mess from the HTML5 world, the menu
    * element, which was deprecated in HTML4:
    * - does not appear at all in W3C's HTML5 spec
    * - appears in WHATWG's HTML5 doc and the W3C's 5.1 draft, where it
    *   means something totally different than it did in the old days
    *   (now it's for popup menus and toolbar menus rather than being a
    *   sort of list).
    */
   if (!(html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f))
      Html_tag_open_dir(html, tag, tagsize);
}

/*
 * Handle the <OL> tag.
 */
static void Html_tag_open_ol(DilloHtml *html, const char *tag, int tagsize)
{
   const char *attrbuf;
   int n = 1;

   if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "type"))) {
      ListStyleType listStyleType = LIST_STYLE_TYPE_DECIMAL;

      if (*attrbuf == '1')
         listStyleType = LIST_STYLE_TYPE_DECIMAL;
      else if (*attrbuf == 'a')
         listStyleType = LIST_STYLE_TYPE_LOWER_ALPHA;
      else if (*attrbuf == 'A')
         listStyleType = LIST_STYLE_TYPE_UPPER_ALPHA;
      else if (*attrbuf == 'i')
         listStyleType = LIST_STYLE_TYPE_LOWER_ROMAN;
      else if (*attrbuf == 'I')
         listStyleType = LIST_STYLE_TYPE_UPPER_ROMAN;

      html->styleEngine->setNonCssHint (CSS_PROPERTY_LIST_STYLE_TYPE,
                                        CSS_TYPE_ENUM, listStyleType);
   }

   S_TOP(html)->list_type = HTML_LIST_ORDERED;

   if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "start")) &&
       (n = (int) strtol(attrbuf, NULL, 10)) < 0) {
      BUG_MSG("Illegal '-' character in START attribute; Starting from 0.");
      n = 0;
   }
   S_TOP(html)->list_number = n;
   S_TOP(html)->ref_list_item = NULL;
}

/*
 * Handle the <LI> tag.
 */
static void Html_tag_open_li(DilloHtml *html, const char *tag, int tagsize)
{
   Style *style = html->style ();
   int *list_number;
   const char *attrbuf;

   if (S_TOP(html)->list_type == HTML_LIST_NONE)
      BUG_MSG("<li> outside <ul> or <ol>.");

   html->InFlags |= IN_LI;

   /* Get our parent tag's variables (used as state storage) */
   list_number = &html->stack->getRef(html->stack->size()-2)->list_number;

   if (style->listStyleType >= LIST_STYLE_TYPE_DECIMAL) {
      // ordered
      if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "value")) &&
          (*list_number = strtol(attrbuf, NULL, 10)) < 0) {
         BUG_MSG("Illegal negative list value attribute; Starting from 0.");
         *list_number = 0;
      }
   }
}

/*
 * Close <LI>
 */
static void Html_tag_close_li(DilloHtml *html)
{
   html->InFlags &= ~IN_LI;
   ((ListItem *)html->dw)->flush ();
}

/*
 * <HR>
 */
static void Html_tag_open_hr(DilloHtml *html, const char *tag, int tagsize)
{
   char *width_ptr;
   const char *attrbuf;
   int32_t size = 0;

   width_ptr = a_Html_get_attr_wdef(html, tag, tagsize, "width", NULL);
   if (width_ptr) {
      if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
         BUG_MSG("<hr> width attribute is obsolete.");
      html->styleEngine->setNonCssHint (CSS_PROPERTY_WIDTH,
                                        CSS_TYPE_LENGTH_PERCENTAGE,
                                        a_Html_parse_length (html, width_ptr));
      dFree(width_ptr);
   }

   if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "size"))) {
      size = strtol(attrbuf, NULL, 10);
      if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
         BUG_MSG("<hr> size attribute is obsolete.");
   }

   a_Html_tag_set_align_attr(html, tag, tagsize);

   /* TODO: evaluate attribute */
   if (a_Html_get_attr(html, tag, tagsize, "noshade")) {
      if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
         BUG_MSG("<hr> noshade attribute is obsolete.");
      html->styleEngine->setNonCssHint (CSS_PROPERTY_BORDER_TOP_STYLE,
                                        CSS_TYPE_ENUM, BORDER_SOLID);
      html->styleEngine->setNonCssHint (CSS_PROPERTY_BORDER_BOTTOM_STYLE,
                                        CSS_TYPE_ENUM, BORDER_SOLID);
      html->styleEngine->setNonCssHint (CSS_PROPERTY_BORDER_LEFT_STYLE,
                                        CSS_TYPE_ENUM, BORDER_SOLID);
      html->styleEngine->setNonCssHint (CSS_PROPERTY_BORDER_RIGHT_STYLE,
                                        CSS_TYPE_ENUM, BORDER_SOLID);

      if (size <= 0)
         size = 1;
   }

   if (size > 0) {
      CssLength size_top = CSS_CREATE_LENGTH ((size+1)/2, CSS_LENGTH_TYPE_PX);
      CssLength size_bottom = CSS_CREATE_LENGTH (size / 2, CSS_LENGTH_TYPE_PX);
      html->styleEngine->setNonCssHint (CSS_PROPERTY_BORDER_TOP_WIDTH,
                                        CSS_TYPE_LENGTH_PERCENTAGE, size_top);
      html->styleEngine->setNonCssHint (CSS_PROPERTY_BORDER_LEFT_WIDTH,
                                        CSS_TYPE_LENGTH_PERCENTAGE, size_top);
      html->styleEngine->setNonCssHint (CSS_PROPERTY_BORDER_BOTTOM_WIDTH,
                                        CSS_TYPE_LENGTH_PERCENTAGE,
                                        size_bottom);
      html->styleEngine->setNonCssHint (CSS_PROPERTY_BORDER_RIGHT_WIDTH,
                                        CSS_TYPE_LENGTH_PERCENTAGE,
                                        size_bottom);
   }

}

static void Html_tag_content_hr(DilloHtml *html, const char *tag, int tagsize)
{
   Widget *hruler;
   HT2TB(html)->addParbreak (5, html->wordStyle ());

   hruler = new Ruler();
   hruler->setStyle (html->style ());
   HT2TB(html)->addWidget (hruler, html->style ());
   HT2TB(html)->addParbreak (5, html->wordStyle ());
}

/*
 * <DL>
 */
static void Html_tag_open_dl(DilloHtml *html, const char *tag, int tagsize)
{
   /* may want to actually do some stuff here. */
   html->styleEngine->inheritBackgroundColor ();
   HT2TB(html)->addParbreak (9, html->wordStyle ());
}

/*
 * <DT>
 */
static void Html_tag_open_dt(DilloHtml *html, const char *tag, int tagsize)
{
   html->styleEngine->inheritBackgroundColor ();
   HT2TB(html)->addParbreak (9, html->wordStyle ());
}

/*
 * <DD>
 */
static void Html_tag_open_dd(DilloHtml *html, const char *tag, int tagsize)
{
   Html_add_textblock(html, true, 9, false);
}

/*
 * <PRE>
 */
static void Html_tag_open_pre(DilloHtml *html, const char *tag, int tagsize)
{
   html->styleEngine->inheritBackgroundColor ();
   HT2TB(html)->addParbreak (9, html->wordStyle ());

   html->InFlags |= IN_PRE;
}

/*
 * Custom close for <PRE>
 */
static void Html_tag_close_pre(DilloHtml *html)
{
   html->InFlags &= ~IN_PRE;
}

/*
 * Check whether a tag is in the "excluding" element set for PRE
 * Excl. Set = {IMG, OBJECT, APPLET, BIG, SMALL, SUB, SUP, FONT, BASEFONT}
 */
static int Html_tag_pre_excludes(DilloHtml *html, int tag_idx)
{
   if (!(html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)) {
      /* HTML5 doesn't say anything about excluding elements */
      const char *es_set[] = {"img", "object", "applet", "big", "small", "sub",
                              "sup", "font", "basefont", NULL};
      static int ei_set[10], i;

      /* initialize array */
      if (!ei_set[0])
         for (i = 0; es_set[i]; ++i)
            ei_set[i] = a_Html_tag_index(es_set[i]);

      for (i = 0; ei_set[i]; ++i)
         if (tag_idx == ei_set[i])
            return 1;
   }
   return 0;
}

/*
 * Update the document's content type information based on meta tag data.
 */
static void Html_update_content_type(DilloHtml *html, const char *content)
{
   const char *new_content = a_Capi_set_content_type(html->page_url, content,
                                                     "meta");
   /* Cannot ask cache whether the content type was changed, as
    * this code in another bw might have already changed it for us.
    */
   if (a_Misc_content_type_cmp(html->content_type, new_content)) {
      html->stop_parser = true; /* The cache buffer is no longer valid */
      a_UIcmd_repush(html->bw);
   }
}

/*
 * Handle <META>
 * We do not support http-equiv=refresh with delay>0 because it's
 * non standard, (the HTML 4.01 SPEC recommends explicitly to avoid it).
 * More info at:
 *   http://lists.w3.org/Archives/Public/www-html/2000Feb/thread.html#msg232
 * Instant client-side redirects (delay=0) are supported:
 *   http://www.w3.org/TR/2008/NOTE-WCAG20-TECHS-20081211/H76.html
 *
 * TODO: Note that we're sending custom HTML while still IN_HEAD. This
 * is a hackish way to put the message. A much cleaner approach is to
 * build a custom widget for it.
 */
static void Html_tag_open_meta(DilloHtml *html, const char *tag, int tagsize)
{
   const char meta_template[] =
"<table width='100%%'><tr><td bgcolor='#ee0000'>Warning:</td>\n"
" <td bgcolor='#8899aa' width='100%%'>\n"
" This page uses the NON-STANDARD meta refresh tag.<br> The HTML 4.01 SPEC\n"
" (sec 7.4.4) recommends explicitly to avoid it.</td></tr>\n"
" <tr><td bgcolor='#a0a0a0' colspan='2'>The author wanted you to go\n"
" <a href='%s'>here</a>%s</td></tr></table><br>\n";

   const char *p, *equiv, *charset, *content;
   char delay_str[64], *mr_url;
   DilloUrl *new_url;
   int delay;

   /* only valid inside HEAD */
   if (!(html->InFlags & IN_HEAD)) {
      if (!((html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f) &&
            a_Html_get_attr(html, tag, tagsize, "itemprop"))) {
         /* With the HTML 5.1 draft spec, meta with itemprop may appear
          * in the body.
          */
         BUG_MSG("This <meta> element must be inside the HEAD section.");
      }
      return;
   }

   if ((equiv = a_Html_get_attr(html, tag, tagsize, "http-equiv"))) {
      if (!dStrAsciiCasecmp(equiv, "refresh") &&
          (content = a_Html_get_attr(html, tag, tagsize, "content"))) {

         /* Get delay, if present, and make a message with it */
         if ((delay = strtol(content, NULL, 0))) {
            snprintf(delay_str, 64, " after %d second%s.",
                     delay, (delay > 1) ? "s" : "");
         } else {
            sprintf(delay_str, ".");
         }
         /* Skip to anything after "URL=" or ";" if "URL=" is not found */
         if ((p = dStriAsciiStr(content, "url=")))
            content = p + strlen("url=");
         else if ((p = strstr(content, ";")))
            content = p + strlen(";");
         /* Handle the case of a quoted URL */
         if (*content == '"' || *content == '\'') {
            if ((p = strchr(content + 1, *content)))
               mr_url = dStrndup(content + 1, p - content - 1);
            else
               mr_url = dStrdup(content + 1);
         } else {
            mr_url = dStrdup(content);
         }
         new_url = a_Html_url_new(html, mr_url, NULL, 0);

         if (a_Url_cmp(html->base_url, new_url) == 0) {
            /* redirection loop, or empty url string: ignore */
            BUG_MSG("<meta> refresh: %s.",
                    *mr_url ? "redirection loop" : "no target URL");
         } else if (delay == 0) {
            /* zero-delay redirection */
            html->stop_parser = true;
            if (URL_FLAGS(html->base_url) & URL_SpamSafe) {
               a_UIcmd_set_msg(html->bw,
                  "WARNING: local URL with META refresh.  Aborting.");
            } else if (a_Capi_dpi_verify_request(html->bw, new_url)) {
               a_UIcmd_redirection0((void*)html->bw, new_url);
            }
         } else {
            /* Send a custom HTML message.
             * TODO: This is a hairy hack,
             *       It'd be much better to build a widget. */
            Dstr *ds_msg = dStr_sized_new(256);
            dStr_sprintf(ds_msg, meta_template, URL_STR(new_url), delay_str);
            {
               int o_InFlags = html->InFlags;
               int o_TagSoup = html->TagSoup;
               html->InFlags = IN_BODY + IN_META_HACK;
               html->TagSoup = false;
               Html_write_raw(html, ds_msg->str, ds_msg->len, 0);
               html->TagSoup = o_TagSoup;
               html->InFlags = o_InFlags;
            }
            dStr_free(ds_msg, 1);
         }
         a_Url_free(new_url);
         dFree(mr_url);

      } else if (!dStrAsciiCasecmp(equiv, "content-type") &&
                 (content = a_Html_get_attr(html, tag, tagsize, "content"))) {
         _MSG("Html_tag_open_meta: content={%s}\n", content);
         Html_update_content_type(html, content);
      }
   } else if (html->DocType == DT_HTML && html->DocTypeVersion == 5.0f &&
              (charset = a_Html_get_attr(html, tag, tagsize, "charset"))) {
      char *content = dStrconcat("text/html; charset=", charset, NULL);

      Html_update_content_type(html, content);
      dFree(content);
   }
}

/*
 * Called by the network engine when a stylesheet has new data.
 */
static void Html_css_load_callback(int Op, CacheClient_t *Client)
{
   _MSG("Html_css_load_callback: Op=%d\n", Op);
   if (Op) { /* EOF */
      BrowserWindow *bw = ((DilloWeb *)Client->Web)->bw;
      /* Repush when we've got them all */
      if (--bw->NumPendingStyleSheets == 0)
         a_UIcmd_repush(bw);
   }
}

/*
 * Tell cache to retrieve a stylesheet
 */
void a_Html_load_stylesheet(DilloHtml *html, DilloUrl *url)
{
   char *data;
   int len;

   dReturn_if (url == NULL || ! prefs.load_stylesheets);

   _MSG("Html_load_stylesheet: ");
   if ((a_Capi_get_flags_with_redirection(url) & CAPI_Completed) &&
       a_Capi_get_buf(url, &data, &len)) {
      _MSG("cached URL=%s len=%d", URL_STR(url), len);
      if (strncmp("@charset \"", data, 10) == 0) {
         char *endq = strchr(data+10, '"');

         if (endq && (endq - data <= 51)) {
            /* IANA limits charset names to 40 characters */
            char *content_type;

            *endq = '\0';
            content_type = dStrconcat("text/css; charset=", data+10, NULL);
            *endq = '"';
            a_Capi_unref_buf(url);
            a_Capi_set_content_type(url, content_type, "meta");
            dFree(content_type);
            a_Capi_get_buf(url, &data, &len);
         }
      }
      html->styleEngine->parse(html, url, data, len, CSS_ORIGIN_AUTHOR);
      a_Capi_unref_buf(url);
   } else {
      /* Fill a Web structure for the cache query */
      int ClientKey;
      DilloWeb *Web = a_Web_new(html->bw, url, html->page_url);
      Web->flags |= WEB_Stylesheet;
      if ((ClientKey = a_Capi_open_url(Web, Html_css_load_callback, NULL))) {
         ++html->bw->NumPendingStyleSheets;
         a_Bw_add_client(html->bw, ClientKey, 0);
         a_Bw_add_url(html->bw, url);
         MSG("NumPendingStyleSheets=%d\n", html->bw->NumPendingStyleSheets);
      }
   }
   _MSG("\n");
}

/*
 * Parse the LINK element (Only CSS stylesheets by now).
 * (If it either hits or misses, is not relevant here; that's up to the
 *  cache functions)
 *
 * TODO: How will we know when to use "handheld"? Ask the html->bw->ui for
 * screen dimensions, or a dillorc preference.
 */
static void Html_tag_open_link(DilloHtml *html, const char *tag, int tagsize)
{
   DilloUrl *url;
   const char *attrbuf;

   //char *tag_str = dStrndup(tag, tagsize);
   //MSG("Html_tag_open_link(): %s\n", tag_str);
   //dFree(tag_str);

   /* When viewing suspicious HTML email, don't load LINK */
   dReturn_if (URL_FLAGS(html->base_url) & URL_SpamSafe);

   /* Ignore LINK outside HEAD */
   if (!(html->InFlags & IN_HEAD)) {
      if (!((html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f) &&
            a_Html_get_attr(html, tag, tagsize, "itemprop"))) {
         /* With the HTML 5.1 draft spec, link with itemprop may appear
          * in the body.
          */
         BUG_MSG("This <link> element must be inside the HEAD section.");
      }
      return;
   }
   /* Remote stylesheets enabled? */
   dReturn_if_fail (prefs.load_stylesheets);
   /* CSS stylesheet link */
   if (!(attrbuf = a_Html_get_attr(html, tag, tagsize, "rel")) ||
       dStrAsciiCasecmp(attrbuf, "stylesheet"))
      return;

   /* IMPLIED attributes? */
   if (((attrbuf = a_Html_get_attr(html, tag, tagsize, "type")) &&
        dStrAsciiCasecmp(attrbuf, "text/css")) ||
       ((attrbuf = a_Html_get_attr(html, tag, tagsize, "media")) &&
        !dStriAsciiStr(attrbuf, "screen") && dStrAsciiCasecmp(attrbuf, "all")))
      return;

   if (!(attrbuf = a_Html_get_attr(html, tag, tagsize, "href")) ||
       !(url = a_Html_url_new(html, attrbuf, NULL, 0)))
      return;

   _MSG("  Html_tag_open_link(): addCssUrl %s\n", URL_STR(url));

   html->addCssUrl(url);
   a_Url_free(url);
}

/*
 * Set the Document Base URI
 */
static void Html_tag_open_base(DilloHtml *html, const char *tag, int tagsize)
{
   const char *attrbuf;
   DilloUrl *BaseUrl;

   if (html->InFlags & IN_HEAD) {
      if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "href"))) {
         bool_t html5 = html->DocType == DT_HTML &&
                        html->DocTypeVersion >= 5.0f;

         BaseUrl = html5 ? a_Html_url_new(html, attrbuf, NULL, 0) :
                           a_Html_url_new(html, attrbuf, "", 1);

         if (html5 || URL_SCHEME_(BaseUrl)) {
            /* Pass the URL_SpamSafe flag to the new base url */
            a_Url_set_flags(
               BaseUrl, URL_FLAGS(html->base_url) & URL_SpamSafe);
            a_Url_free(html->base_url);
            html->base_url = BaseUrl;
         } else {
            BUG_MSG("<base> URI is relative (it MUST be absolute).");
            a_Url_free(BaseUrl);
         }
      }
   } else {
      BUG_MSG("<base> not inside HEAD section.");
   }
}

static void Html_tag_open_default(DilloHtml *html,const char *tag,int tagsize)
{
   html->styleEngine->inheritBackgroundColor();
}

/*
 * <SPAN>
 */
static void Html_tag_open_span(DilloHtml *html, const char *tag, int tagsize)
{
   const char *attrbuf;

   html->styleEngine->inheritBackgroundColor();

   if (prefs.show_tooltip &&
       (attrbuf = a_Html_get_attr(html, tag, tagsize, "title"))) {

      html->styleEngine->setNonCssHint (PROPERTY_X_TOOLTIP, CSS_TYPE_STRING,
                                        attrbuf);
   }
}

/*
 * html5 sectioning stuff: article aside nav section header footer
 */
static void Html_tag_open_sectioning(DilloHtml *html, const char *tag,
                                     int tagsize)
{
   const char *attrbuf;

   if (prefs.show_tooltip &&
       (attrbuf = a_Html_get_attr(html, tag, tagsize, "title"))) {

      html->styleEngine->setNonCssHint (PROPERTY_X_TOOLTIP, CSS_TYPE_STRING,
                                        attrbuf);
   }
}

/*
 * <DIV> (TODO: make a complete implementation)
 */
static void Html_tag_open_div(DilloHtml *html, const char *tag, int tagsize)
{
   a_Html_tag_set_align_attr (html, tag, tagsize);
   Html_tag_open_sectioning(html, tag, tagsize);
}

/*
 * Default close for paragraph tags - pop the stack and break.
 */
static void Html_tag_close_par(DilloHtml *html)
{
   HT2TB(html)->addParbreak (9, html->wordStyle ());
}

/*
 * <WBR> "The wbr element represents a line break opportunity."
 */
static void Html_tag_content_wbr(DilloHtml *html, const char *tag, int tagsize)
{
   HT2TB(html)->addBreakOption(html->wordStyle (), true);
}


/*
 * Function index for the open, content, and close functions for each tag
 * (Alphabetically sorted for a binary search).
 * The open and close functions are always called. They are used for style
 * handling and HTML bug reporting.
 * Content creation (e.g. adding new widgets or text) is done in the content
 * function, which is not called in the display:none case.
 * Note: many tags don't need a content function (e.g. <div>, <span>, ...).
 *
 * Explanation for the 'Flags' field:
 *
 *   {"address", B8(010110), ...}
 *                  |||||`- inline element
 *                  ||||`-- block element
 *                  |||`--- inline container
 *                  ||`---- block container
 *                  |`----- body element
 *                  `------ head element
 *
 *   Notes:
 *     - The upper two bits are not used yet.
 *     - Empty elements have both inline and block container clear.
 *       (flow have both set)
 */

const TagInfo Tags[] = {
 {"a", B8(011101),'R',2, Html_tag_open_a, NULL, Html_tag_close_a},
 {"abbr", B8(010101),'R',2, Html_tag_open_abbr, NULL, NULL},
 /* acronym 010101 -- obsolete in HTML5 */
 {"address", B8(011110),'R',2,Html_tag_open_default, NULL, Html_tag_close_par},
 {"area", B8(010001),'F',0, Html_tag_open_default, Html_tag_content_area,
                            NULL},
 {"article", B8(011110),'R',2, Html_tag_open_sectioning, NULL, NULL},
 {"aside", B8(011110),'R',2, Html_tag_open_sectioning, NULL, NULL},
 {"audio", B8(011101),'R',2, Html_tag_open_audio, NULL, Html_tag_close_media},
 {"b", B8(010101),'R',2, Html_tag_open_default, NULL, NULL},
 {"base", B8(100001),'F',0, Html_tag_open_base, NULL, NULL},
 /* basefont 010001 -- obsolete in HTML5 */
 /* bdo 010101 */
 {"big", B8(010101),'R',2, Html_tag_open_default, NULL, NULL},
 {"blockquote", B8(011110),'R',2, Html_tag_open_blockquote, NULL,
                                  NULL},
 {"body", B8(011110),'O',1, Html_tag_open_body, NULL, Html_tag_close_body},
 {"br", B8(010001),'F',0, Html_tag_open_default, Html_tag_content_br,
                          NULL},
 {"button", B8(011101),'R',2, Html_tag_open_button,NULL,Html_tag_close_button},
 /* caption */
 {"center", B8(011110),'R',2, Html_tag_open_default, NULL, NULL},
 {"cite", B8(010101),'R',2, Html_tag_open_default, NULL, NULL},
 {"code", B8(010101),'R',2, Html_tag_open_default, NULL, NULL},
 /* col 010010 'F' */
 /* colgroup */
 {"dd", B8(011110),'O',1, Html_tag_open_dd, NULL, NULL},
 {"del", B8(011101),'R',2, Html_tag_open_default, NULL, NULL},
 {"dfn", B8(010101),'R',2, Html_tag_open_default, NULL, NULL},
 {"dir", B8(011010),'R',2, Html_tag_open_dir, NULL, Html_tag_close_par},
 /* TODO: complete <div> support! */
 {"div", B8(011110),'R',2, Html_tag_open_div, NULL, NULL},
 {"dl", B8(011010),'R',2, Html_tag_open_dl, NULL, Html_tag_close_par},
 {"dt", B8(010110),'O',1, Html_tag_open_dt, NULL, Html_tag_close_par},
 {"em", B8(010101),'R',2, Html_tag_open_default, NULL, NULL},
 {"embed", B8(010001),'F',0, Html_tag_open_embed, Html_tag_content_embed,NULL},
 /* fieldset */
 {"figcaption", B8(011110),'R',2, Html_tag_open_default, NULL, NULL},
 {"figure", B8(011110),'R',2, Html_tag_open_default, NULL, NULL},
 {"font", B8(010101),'R',2, Html_tag_open_font, NULL, NULL},
 {"footer", B8(011110),'R',2, Html_tag_open_sectioning, NULL, NULL},
 {"form", B8(011110),'R',2, Html_tag_open_form, NULL, Html_tag_close_form},
 {"frame", B8(010010),'F',0, Html_tag_open_frame, Html_tag_content_frame,
                             NULL},
 {"frameset", B8(011110),'R',2, Html_tag_open_default,
                                Html_tag_content_frameset, NULL},
 {"h1", B8(010110),'R',2, Html_tag_open_h, NULL, NULL},
 {"h2", B8(010110),'R',2, Html_tag_open_h, NULL, NULL},
 {"h3", B8(010110),'R',2, Html_tag_open_h, NULL, NULL},
 {"h4", B8(010110),'R',2, Html_tag_open_h, NULL, NULL},
 {"h5", B8(010110),'R',2, Html_tag_open_h, NULL, NULL},
 {"h6", B8(010110),'R',2, Html_tag_open_h, NULL, NULL},
 {"head", B8(101101),'O',1, Html_tag_open_head, NULL, Html_tag_close_head},
 {"header", B8(011110),'R',2, Html_tag_open_sectioning, NULL, NULL},
 {"hr", B8(010010),'F',0, Html_tag_open_hr, Html_tag_content_hr,
                          NULL},
 {"html", B8(001110),'O',1, Html_tag_open_html, NULL, Html_tag_close_html},
 {"i", B8(010101),'R',2, Html_tag_open_default, NULL, NULL},
 {"iframe", B8(011110),'R',2, Html_tag_open_frame, Html_tag_content_frame,
                              NULL},
 {"img", B8(010001),'F',0, Html_tag_open_img, Html_tag_content_img,
                           NULL},
 {"input", B8(010001),'F',0, Html_tag_open_input, NULL, NULL},
 {"ins", B8(011101),'R',2, Html_tag_open_default, NULL, NULL},
 {"isindex", B8(110001),'F',0, Html_tag_open_isindex, NULL, NULL},
 {"kbd", B8(010101),'R',2, Html_tag_open_default, NULL, NULL},
 /* label 010101 */
 /* legend 01?? */
 {"li", B8(011110),'O',1, Html_tag_open_li, NULL, Html_tag_close_li},
 {"link", B8(100001),'F',0, Html_tag_open_link, NULL, NULL},
 {"map", B8(011001),'R',2, Html_tag_open_default, Html_tag_content_map,
                           Html_tag_close_map},
 {"mark", B8(010101),'R',2, Html_tag_open_default, NULL, NULL},
 /* menu 1010 -- TODO: not exactly 1010, it can contain LI and inline */
 {"menu", B8(011010),'R',2, Html_tag_open_menu, NULL, Html_tag_close_par},
 {"meta", B8(110001),'F',0, Html_tag_open_meta, NULL, NULL},
 {"nav", B8(011110),'R',2, Html_tag_open_sectioning, NULL, NULL},
 /* noframes 1011 -- obsolete in HTML5 */
 /* noscript 1011 */
 {"object", B8(111101),'R',2, Html_tag_open_object, Html_tag_content_object,
                              NULL},
 {"ol", B8(011010),'R',2, Html_tag_open_ol, NULL, NULL},
 {"optgroup", B8(010101),'O',1, Html_tag_open_optgroup, NULL,
                                Html_tag_close_optgroup},
 {"option", B8(010001),'O',0, Html_tag_open_option,NULL,Html_tag_close_option},
 {"p", B8(010110),'O',1, Html_tag_open_p, NULL, NULL},
 /* param 010001 'F' */
 {"pre", B8(010110),'R',2, Html_tag_open_pre, NULL, Html_tag_close_pre},
 {"q", B8(010101),'R',2, Html_tag_open_q, NULL, Html_tag_close_q},
 {"s", B8(010101),'R',2, Html_tag_open_default, NULL, NULL},
 {"samp", B8(010101),'R',2, Html_tag_open_default, NULL, NULL},
 {"script", B8(111001),'R',2, Html_tag_open_script,NULL,Html_tag_close_script},
 {"section", B8(011110),'R',2, Html_tag_open_sectioning, NULL, NULL},
 {"select", B8(010101),'R',2, Html_tag_open_select,NULL,Html_tag_close_select},
 {"small", B8(010101),'R',2, Html_tag_open_default, NULL, NULL},
 {"source", B8(010001),'F',0, Html_tag_open_source, Html_tag_content_source,
                              NULL},
 {"span", B8(010101),'R',2, Html_tag_open_span, NULL, NULL},
 {"strike", B8(010101),'R',2, Html_tag_open_default, NULL, NULL},
 {"strong", B8(010101),'R',2, Html_tag_open_default, NULL, NULL},
 {"style", B8(100101),'R',2, Html_tag_open_style, NULL, Html_tag_close_style},
 {"sub", B8(010101),'R',2, Html_tag_open_default, NULL, NULL},
 {"sup", B8(010101),'R',2, Html_tag_open_default, NULL, NULL},
 {"table", B8(011010),'R',5, Html_tag_open_table, Html_tag_content_table,
                             NULL},
 /* tbody */
 {"td", B8(011110),'O',3, Html_tag_open_td, Html_tag_content_td,
                          NULL},
 {"textarea", B8(010101),'R', 2, Html_tag_open_textarea,
                          Html_tag_content_textarea, Html_tag_close_textarea},
 /* tfoot */
 {"th", B8(011110),'O',1, Html_tag_open_th, Html_tag_content_th,
                          NULL},
 /* thead */
 {"title", B8(100101),'R',2, Html_tag_open_title, NULL, Html_tag_close_title},
 {"tr", B8(011010),'O',4, Html_tag_open_tr, Html_tag_content_tr,
                          NULL},
 {"tt", B8(010101),'R',2, Html_tag_open_default, NULL, NULL},
 {"u", B8(010101),'R',2, Html_tag_open_default, NULL, NULL},
 {"ul", B8(011010),'R',2, Html_tag_open_ul, NULL, NULL},
 {"var", B8(010101),'R',2, Html_tag_open_default, NULL, NULL},
 {"video", B8(011101),'R',2, Html_tag_open_video, NULL, Html_tag_close_media},
 {"wbr", B8(010101),'F',0, Html_tag_open_default, Html_tag_content_wbr, NULL}
};
#define NTAGS (sizeof(Tags)/sizeof(Tags[0]))


/*
 * Compares tag from buffer ('/' or '>' or space-ended string) [p1]
 * with tag from taglist (lowercase, zero ended string) [p2]
 * Return value: as strcmp()
 */
static int Html_tag_compare(const char *p1, const char *p2)
{
   while ( *p2 ) {
      if (D_ASCII_TOLOWER(*p1) != *p2)
         return(D_ASCII_TOLOWER(*p1) - *p2);
      ++p1;
      ++p2;
   }
   return !strchr(" >/\n\r\t", *p1);
}

/*
 * Get 'tag' index
 * return -1 if tag is not handled yet
 */
int a_Html_tag_index(const char *tag)
{
   int low, high, mid, cond;

   /* Binary search */
   low = 0;
   high = NTAGS - 1;          /* Last tag index */
   while (low <= high) {
      mid = (low + high) / 2;
      if ((cond = Html_tag_compare(tag, Tags[mid].name)) < 0 )
         high = mid - 1;
      else if (cond > 0)
         low = mid + 1;
      else
         return mid;
   }
   return -1;
}

/*
 * For elements with optional close, check whether is time to close.
 * Return value: (1: Close, 0: Don't close)
 * --tuned for speed.
 */
static int Html_needs_optional_close(int old_idx, int cur_idx)
{
   static int i_P = -1, i_LI, i_TD, i_TR, i_TH, i_DD, i_DT, i_OPTION;
               // i_THEAD, i_TFOOT, i_COLGROUP;

   if (i_P == -1) {
    /* initialize the indexes of elements with optional close */
    i_P  = a_Html_tag_index("p"),
    i_LI = a_Html_tag_index("li"),
    i_TD = a_Html_tag_index("td"),
    i_TR = a_Html_tag_index("tr"),
    i_TH = a_Html_tag_index("th"),
    i_DD = a_Html_tag_index("dd"),
    i_DT = a_Html_tag_index("dt"),
    i_OPTION = a_Html_tag_index("option");
    // i_THEAD = a_Html_tag_index("thead");
    // i_TFOOT = a_Html_tag_index("tfoot");
    // i_COLGROUP = a_Html_tag_index("colgroup");
   }

   if (old_idx == i_P || old_idx == i_DT) {
      /* P and DT are closed by block elements */
      return (Tags[cur_idx].Flags & 2);
   } else if (old_idx == i_LI) {
      /* LI closes LI */
      return (cur_idx == i_LI);
   } else if (old_idx == i_TD || old_idx == i_TH) {
      /* TD and TH are closed by TD, TH and TR */
      return (cur_idx == i_TD || cur_idx == i_TH || cur_idx == i_TR);
   } else if (old_idx == i_TR) {
      /* TR closes TR */
      return (cur_idx == i_TR);
   } else if (old_idx == i_DD) {
      /* DD is closed by DD and DT */
      return (cur_idx == i_DD || cur_idx == i_DT);
   } else if (old_idx == i_OPTION) {
      return 1;  // OPTION always needs close
   }

   /* HTML, HEAD, BODY are handled by Html_test_section(), not here. */
   /* TODO: TBODY is pending */
   return 0;
}


/*
 * Conditional cleanup of the stack (at open time).
 * - This helps catching block elements inside inline containers (a BUG).
 * - It also closes elements with "optional" close tag.
 *
 * This function is called when opening a block element or <OPTION>.
 *
 * It searches the stack closing open inline containers, and closing
 * elements with optional close tag when necessary.
 *
 * Note: OPTION is the only non-block element with an optional close.
 */
static void Html_stack_cleanup_at_open(DilloHtml *html, int new_idx)
{
   /* We know that the element we're about to push is a block element.
    * (except for OPTION, which is an empty inline, so is closed anyway)
    * Notes:
    *   Its 'tag' is not yet pushed into the stack,
    *   'new_idx' is its index inside Tags[].
    */

   if (!html->TagSoup)
      return;

   while (html->stack->size() > 1) {
      int oldtag_idx = S_TOP(html)->tag_idx;

      if (Tags[oldtag_idx].EndTag == 'O') {    // Element with optional close
         if (!Html_needs_optional_close(oldtag_idx, new_idx))
            break;
      } else if (Tags[oldtag_idx].Flags & 8) { // Block container
         break;
      }

      /* we have an inline (or empty) container... */
      if (Tags[oldtag_idx].EndTag == 'R') {
         BUG_MSG("<%s> is not allowed to contain <%s>. -- closing <%s>.",
                 Tags[oldtag_idx].name, Tags[new_idx].name,
                 Tags[oldtag_idx].name);
      }

      /* Workaround for Apache and its bad HTML directory listings... */
      if ((html->InFlags & IN_PRE) &&
          strcmp(Tags[new_idx].name, "hr") == 0)
         break;
      /* Avoid OPTION closing SELECT */
      if ((html->InFlags & IN_SELECT) &&
          strcmp(Tags[new_idx].name,"option") == 0)
         break;

      /* This call closes the top tag only. */
      Html_tag_cleanup_at_close(html, oldtag_idx);
   }
}

/*
 * HTML, HEAD and BODY elements have optional open and close tags.
 * Handle this "magic" here.
 */
static void Html_test_section(DilloHtml *html, int new_idx, int IsCloseTag)
{
   const char *tag;
   int tag_idx;

   if (!(html->InFlags & IN_HTML) && html->DocType == DT_NONE)
      BUG_MSG("The required DOCTYPE declaration is missing. "
              "Handling as HTML4.");

   if (!(html->InFlags & IN_HTML)) {
      tag = "<html>";
      tag_idx = a_Html_tag_index(tag + 1);
      if (tag_idx != new_idx || IsCloseTag) {
         /* implicit open */
         Html_force_push_tag(html, tag_idx);
         _MSG("Open : %*s%s\n", html->stack->size()," ",Tags[tag_idx].name);
         Tags[tag_idx].open (html, tag, strlen(tag));
      }
   }

   if (Tags[new_idx].Flags & 32) {
      /* head element */
      if (!(html->InFlags & IN_HEAD) && html->Num_HEAD == 0) {
         tag = "<head>";
         tag_idx = a_Html_tag_index(tag + 1);
         if (tag_idx != new_idx || IsCloseTag) {
            /* implicit open of the head element */
            Html_force_push_tag(html, tag_idx);
            _MSG("Open : %*s%s\n", html->stack->size()," ",Tags[tag_idx].name);
            Tags[tag_idx].open (html, tag, strlen(tag));
         }
      }

   } else if (Tags[new_idx].Flags & 16) {
      /* body element */
      if (html->InFlags & IN_HEAD) {
         tag = "</head>";
         tag_idx = a_Html_tag_index(tag + 2);
         Html_tag_cleanup_at_close(html, tag_idx);
      }
      tag = "<body>";
      tag_idx = a_Html_tag_index(tag + 1);
      if (tag_idx != new_idx || IsCloseTag) {
         /* implicit open */
         Html_force_push_tag(html, tag_idx);
         _MSG("Open : %*s%s\n", html->stack->size()," ",Tags[tag_idx].name);
         Tags[tag_idx].open (html, tag, strlen(tag));
      }
   }
}

/*
 * Parse attributes that can appear on any tag.
 */
static void Html_parse_common_attrs(DilloHtml *html, char *tag, int tagsize)
{
   const char *attrbuf;
   char lang[3];

   if (tagsize >= 8 &&        /* length of "<t id=i>" */
       (attrbuf = a_Html_get_attr(html, tag, tagsize, "id"))) {
      /* According to the SGML declaration of HTML 4, all NAME values
       * occuring outside entities must be converted to uppercase
       * (this is what "NAMECASE GENERAL YES" says). But the HTML 4
       * spec states in Sec. 7.5.2 that anchor ids are case-sensitive.
       * So we don't do it and hope for better specs in the future ...
       */
      Html_check_name_val(html, attrbuf, "id");

      html->styleEngine->setId(attrbuf);
   }

   if (tagsize >= 11 && (prefs.parse_embedded_css || prefs.load_stylesheets)) {
      /* length of "<t class=i>" or "<t style=i>" */
      attrbuf = a_Html_get_attr(html, tag, tagsize, "class");
      if (attrbuf)
         html->styleEngine->setClass (attrbuf);

      attrbuf = a_Html_get_attr(html, tag, tagsize, "style");
      if (attrbuf)
         html->styleEngine->setStyle (attrbuf);
   }

   /* handle "xml:lang" and "lang" attributes
    * We use only the first two chars of the value to deal with
    * extended language tags (see http://www.rfc-editor.org/rfc/bcp/bcp47.txt)
    */
   memset(lang, 0, sizeof(lang));
   if (tagsize >= 14) {
      /* length of "<t xml:lang=i>" */
      attrbuf = a_Html_get_attr(html, tag, tagsize, "xml:lang");
      if (attrbuf)
         strncpy(lang, attrbuf, 2);
   }
   if (!lang[0] && tagsize >= 10) { /* 'xml:lang' prevails over 'lang' */
      /* length of "<t lang=i>" */
      attrbuf = a_Html_get_attr(html, tag, tagsize, "lang");
      if (attrbuf)
         strncpy(lang, attrbuf, 2);
   }
   if (lang[0])
      html->styleEngine->setNonCssHint(PROPERTY_X_LANG, CSS_TYPE_STRING, lang);
}

/*
 * Warn when encountering elements that are obsolete in HTML5. This list
 * was from the "W3C Candidate Recommendation 6 August 2013".
 */
static void Html_check_html5_obsolete(DilloHtml *html, int ni)
{
   static int indexes[9] = {-1};

   if (indexes[0] == -1) {
      indexes[0] = a_Html_tag_index("dir");
      indexes[1] = a_Html_tag_index("frame");
      indexes[2] = a_Html_tag_index("frameset");
      indexes[3] = a_Html_tag_index("isindex");
      indexes[4] = a_Html_tag_index("strike");
      indexes[5] = a_Html_tag_index("big");
      indexes[6] = a_Html_tag_index("center");
      indexes[7] = a_Html_tag_index("font");
      indexes[8] = a_Html_tag_index("tt");
   }
   for (int i = 0; i < 9; i++) {
      if (indexes[i] == ni) {
         BUG_MSG("<%s> is obsolete in HTML5.", Tags[ni].name);
         break;
      }
   }
}

static void Html_display_block(DilloHtml *html)
{
   Html_add_textblock(html, Html_must_add_breaks (html), 0,
                      false /* Perhaps true for widgets oof? */);
}

static void Html_display_inline_block(DilloHtml *html)
{
   Html_add_textblock(html, false, 0, true);
}

static void Html_display_listitem(DilloHtml *html)
{
   Style *style = html->style ();
   Style *wordStyle = html->wordStyle ();
   Widget **ref_list_item;
   ListItem *list_item;
   int *list_number;
   char buf[16];

   /* Get our parent tag's variables (used as state storage) */
   list_number = &html->stack->getRef(html->stack->size()-2)->list_number;
   ref_list_item = &html->stack->getRef(html->stack->size()-2)->ref_list_item;

   HT2TB(html)->addParbreak (0, wordStyle);

   list_item = new ListItem ((ListItem*)*ref_list_item,prefs.limit_text_width);
   HT2TB(html)->addWidget (list_item, style);
   HT2TB(html)->addParbreak (0, wordStyle);
   *ref_list_item = list_item;
   S_TOP(html)->textblock = html->dw = list_item;

   if (style->listStyleType == LIST_STYLE_TYPE_NONE) {
      // none
   } else if (style->listStyleType >= LIST_STYLE_TYPE_DECIMAL) {
      // ordered
      numtostr((*list_number)++, buf, 16, style->listStyleType);
      list_item->initWithText (buf, wordStyle);
   } else {
      // unordered
      list_item->initWithWidget (new Bullet(), wordStyle);
   }
}

/*
 * Process a tag, given as 'tag' and 'tagsize'. -- tagsize is [1 based]
 * ('tag' must include the enclosing angle brackets)
 * This function calls the right open or close function for the tag.
 */
static void Html_process_tag(DilloHtml *html, char *tag, int tagsize)
{
   static int i_HTML = a_Html_tag_index("html");
   int ci, ni;           /* current and new tag indexes */
   char *start = tag + 1; /* discard the '<' */
   int IsCloseTag = (*start == '/');

   dReturn_if (html->stop_parser == true);

   ni = a_Html_tag_index(start + IsCloseTag);
   if (ni == -1) {
      /* TODO: doctype parsing is a bit fuzzy, but enough for the time being */
      if (!(html->InFlags & IN_HTML)) {
         if (tagsize > 9 && !dStrnAsciiCasecmp(tag, "<!doctype", 9))
            Html_parse_doctype(html, tag, tagsize);
      }
      /* Ignore unknown tags */
      return;
   }
   _MSG("Html_process_tag: %s%s\n", IsCloseTag ? "/" : "", Tags[ni].name);

   if (!IsCloseTag && html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
      Html_check_html5_obsolete(html, ni);

   int i = html->PrevWasHtmlClose ? 1 : html->PrevWasBodyClose ? 2 : 0;
   if (i == 1 || (i == 2 && ni != i_HTML))
      BUG_MSG("Content after </%s> tag.", i == 1 ? "html" : "body");
   html->PrevWasHtmlClose = html->PrevWasBodyClose = false;

   /* Handle HTML, HEAD and BODY. Elements with optional open and close */
   if (!(html->InFlags & IN_BODY) /* && parsing HTML */)
      Html_test_section(html, ni, IsCloseTag);

   /* Tag processing */
   ci = S_TOP(html)->tag_idx;
   switch (IsCloseTag) {
   case 0:
      /* Open function */

      /* Cleanup when opening a block element, or
       * when openning over an element with optional close */
      if (Tags[ni].Flags & 2 || (ci != -1 && Tags[ci].EndTag == 'O'))
         Html_stack_cleanup_at_open(html, ni);

      /* TODO: this is only raising a warning, take some defined action.
       * Note: apache uses IMG inside PRE (we could use its "alt"). */
      if ((html->InFlags & IN_PRE) && Html_tag_pre_excludes(html, ni))
         BUG_MSG("<pre> is not allowed to contain <%s>.", Tags[ni].name);

      /* Make sure these elements don't nest each other */
      if (html->InFlags & (IN_BUTTON | IN_SELECT | IN_TEXTAREA))
         Html_tag_cleanup_nested_inputs(html, ni);

      /* Push the tag into the stack */
      Html_push_tag(html, ni);

      html->startElement (ni);
      _MSG("Open : %*s%s\n", html->stack->size(), " ", Tags[ni].name);

      /* Parse attributes that can appear on any tag */
      Html_parse_common_attrs(html, tag, tagsize);

      /* Call the open function for this tag */
      _MSG("Html_process_tag Open : %s\n", Tags[ni].name);
      Tags[ni].open (html, tag, tagsize);

      if (! S_TOP(html)->display_none) {
         switch (html->style ()->display) {
            case DISPLAY_BLOCK:
               Html_display_block(html);
               break;
            case DISPLAY_INLINE_BLOCK:
               Html_display_inline_block(html);
               break;
            case DISPLAY_LIST_ITEM:
               Html_display_listitem(html);
               break;
            case DISPLAY_NONE:
               S_TOP(html)->display_none = true;
               break;
            case DISPLAY_INLINE:
            default:
               break;
         }

         if (Tags[ni].content && ! S_TOP(html)->display_none) {
            Tags[ni].content (html, tag, tagsize);
         }
      }

      if (html->stop_parser)
         break;

      if (S_TOP(html)->parse_mode == DILLO_HTML_PARSE_MODE_VERBATIM) {
         /* don't change anything */
      } else if (S_TOP(html)->parse_mode != DILLO_HTML_PARSE_MODE_PRE &&
          (html->style ()->whiteSpace == WHITE_SPACE_PRE ||
           html->style ()->whiteSpace == WHITE_SPACE_PRE_WRAP)) {
         S_TOP(html)->parse_mode = DILLO_HTML_PARSE_MODE_PRE;
         html->pre_column = 0;
         html->PreFirstChar = true;
      }

      if (html->styleEngine->getId ())
         Html_add_anchor(html, html->styleEngine->getId ());

      /* Request immediate close for elements with forbidden close tag. */
      /* TODO: XHTML always requires close tags. A simple implementation
       * of the commented clause below will make it work. */
      if (/* parsing HTML && */ Tags[ni].EndTag == 'F')
         html->ReqTagClose = true;

      /* Don't break! Open tags may also close themselves */

   default:
      /* Close function */

      /* Test for </x>, ReqTagClose, <x /> and <x/> */
      if (*start == '/' ||                                      /* </x>    */
          html->ReqTagClose ||                                  /* request */
          (tag[tagsize-2] == '/' &&                             /* XML:    */
           (strchr(" \"'", tag[tagsize-3]) ||                   /* [ "']/> */
            (size_t)tagsize == strlen(Tags[ni].name) + 3))) {   /*  <x/>   */

         _MSG("Html_process_tag Close: %s\n", Tags[ni].name);
         Html_tag_cleanup_at_close(html, ni);
         /* This was a close tag */
         html->ReqTagClose = false;
      }
   }
}

/*
 * Get attribute value for 'attrname' and return it.
 *  Tags start with '<' and end with a '>' (Ex: "<P align=center>")
 *  tagsize = strlen(tag) from '<' to '>', inclusive.
 *
 * Returns one of the following:
 *    * The value of the attribute.
 *    * An empty string if the attribute exists but has no value.
 *    * NULL if the attribute doesn't exist.
 */
static const char *Html_get_attr2(DilloHtml *html,
                                  const char *tag,
                                  int tagsize,
                                  const char *attrname,
                                  int tag_parsing_flags)
{
   int i, entsize, Found = 0, delimiter = 0, attr_pos = 0;
   Dstr *Buf = html->attr_data;
   DilloHtmlTagParsingState state = SEEK_ATTR_START;

   dReturn_val_if_fail(*attrname, NULL);

   dStr_truncate(Buf, 0);

   for (i = 1; i < tagsize; ++i) {
      switch (state) {
      case SEEK_ATTR_START:
         if (isspace(tag[i]))
            state = SEEK_TOKEN_START;
         else if (tag[i] == '=')
            state = SEEK_VALUE_START;
         break;

      case MATCH_ATTR_NAME:
         if (!attrname[attr_pos] &&
             (tag[i] == '=' || isspace(tag[i]) || tag[i] == '>')) {
            Found = 1;
            state = SEEK_TOKEN_START;
            --i;
         } else if (!tag[i]) {
            state = SEEK_ATTR_START; // NULL byte is not allowed
         } else {
            if (D_ASCII_TOLOWER(tag[i]) != D_ASCII_TOLOWER(attrname[attr_pos]))
               state = SEEK_ATTR_START;
            attr_pos++;
         }
         break;

      case SEEK_TOKEN_START:
         if (tag[i] == '=') {
            state = SEEK_VALUE_START;
         } else if (!isspace(tag[i])) {
            attr_pos = 0;
            state = (Found) ? FINISHED : MATCH_ATTR_NAME;
            --i;
         }
         break;
      case SEEK_VALUE_START:
         if (!isspace(tag[i])) {
            delimiter = (tag[i] == '"' || tag[i] == '\'') ? tag[i] : ' ';
            i -= (delimiter == ' ');
            state = (Found) ? GET_VALUE : SKIP_VALUE;
         }
         break;

      case SKIP_VALUE:
         if ((delimiter == ' ' && isspace(tag[i])) || tag[i] == delimiter)
            state = SEEK_TOKEN_START;
         break;
      case GET_VALUE:
         if ((delimiter == ' ' && (isspace(tag[i]) || tag[i] == '>')) ||
             tag[i] == delimiter) {
            state = FINISHED;
         } else if (tag[i] == '&' &&
                    (tag_parsing_flags & HTML_ParseEntities)) {
            const char *entstr;
            const bool_t is_attr = TRUE;

            if ((entstr = Html_parse_entity(html, tag+i, tagsize-i, &entsize,
                                            is_attr))) {
               dStr_append(Buf, entstr);
               i += entsize-1;
            } else {
               dStr_append_c(Buf, tag[i]);
            }
         } else if (tag[i] == '\r' || tag[i] == '\t') {
            dStr_append_c(Buf, ' ');
         } else if (tag[i] == '\n') {
            /* ignore */
         } else {
            dStr_append_c(Buf, tag[i]);
         }
         break;

      case FINISHED:
         i = tagsize;
         break;
      }
   }

   if (tag_parsing_flags & HTML_LeftTrim)
      while (isspace(Buf->str[0]))
         dStr_erase(Buf, 0, 1);
   if (tag_parsing_flags & HTML_RightTrim)
      while (Buf->len && isspace(Buf->str[Buf->len - 1]))
         dStr_truncate(Buf, Buf->len - 1);

   return (Found) ? Buf->str : NULL;
}

/*
 * Call Html_get_attr2 telling it to parse entities and strip the result
 */
const char *a_Html_get_attr(DilloHtml *html,
                            const char *tag,
                            int tagsize,
                            const char *attrname)
{
   return Html_get_attr2(html, tag, tagsize, attrname,
                         HTML_LeftTrim | HTML_RightTrim | HTML_ParseEntities);
}

/*
 * "a_Html_get_attr with default"
 * Call a_Html_get_attr() and dStrdup() the returned string.
 * If the attribute isn't found a copy of 'def' is returned.
 */
char *a_Html_get_attr_wdef(DilloHtml *html,
                           const char *tag,
                           int tagsize,
                           const char *attrname,
                           const char *def)
{
   const char *attrbuf = a_Html_get_attr(html, tag, tagsize, attrname);

   return attrbuf ? dStrdup(attrbuf) : dStrdup(def);
}

/*
 * Dispatch the apropriate function for 'Op'
 * This function is a Cache client and gets called whenever new data arrives
 *  Op      : operation to perform.
 *  CbData  : a pointer to a DilloHtml structure
 *  Buf     : a pointer to new data
 *  BufSize : new data size (in bytes)
 */
static void Html_callback(int Op, CacheClient_t *Client)
{
   DilloHtml *html = (DilloHtml*)Client->CbData;

   if (Op) { /* EOF */
      html->write((char*)Client->Buf, Client->BufSize, 1);
      html->finishParsing(Client->Key);
   } else {
      html->write((char*)Client->Buf, Client->BufSize, 0);
   }
}

/*
 * Here's where we parse the html and put it into the Textblock structure.
 * Return value: number of bytes parsed
 */
static int Html_write_raw(DilloHtml *html, char *buf, int bufsize, int Eof)
{
   char ch = 0, *p, *text;
   int token_start, buf_index;

   /* Now, 'buf' and 'bufsize' define a buffer aligned to start at a token
    * boundary. Iterate through tokens until end of buffer is reached. */
   buf_index = 0;
   token_start = buf_index;
   while ((buf_index < bufsize) && !html->stop_parser) {
      /* invariant: buf_index == bufsize || token_start == buf_index */

      if (S_TOP(html)->parse_mode ==
          DILLO_HTML_PARSE_MODE_VERBATIM) {
         /* Non HTML code here, let's skip until closing tag */
         do {
            const char *tag = Tags[S_TOP(html)->tag_idx].name;
            buf_index += strcspn(buf + buf_index, "<");
            if (buf_index + (int)strlen(tag) + 3 > bufsize) {
               buf_index = bufsize;
            } else if (strncmp(buf + buf_index, "</", 2) == 0 &&
                       Html_match_tag(tag, buf+buf_index+2, strlen(tag)+1)) {
               /* copy VERBATIM text into the stash buffer */
               text = dStrndup(buf + token_start, buf_index - token_start);
               dStr_append(html->Stash, text);
               dFree(text);
               token_start = buf_index;
               break;
            } else
               ++buf_index;
         } while (buf_index < bufsize);

         if (buf_index == bufsize)
            break;
      }

      if (isspace(buf[buf_index])) {
         /* whitespace: group all available whitespace */
         while (++buf_index < bufsize && isspace(buf[buf_index])) ;
         Html_process_space(html, buf + token_start, buf_index - token_start);
         token_start = buf_index;

      } else if (buf[buf_index] == '<' && (ch = buf[buf_index + 1]) &&
                 (isalpha(ch) || strchr("/!?", ch)) ) {
         /* Tag */
         if (buf_index + 3 < bufsize && !strncmp(buf + buf_index, "<!--", 4)) {
            /* Comment: search for close of comment, skipping over
             * everything except a matching "-->" tag. */
            while ( (p = (char*) memchr(buf + buf_index, '>',
                                        bufsize - buf_index)) ){
               buf_index = p - buf + 1;
               if (p[-1] == '-' && p[-2] == '-') break;
            }
            if (p) {
               /* Got the whole comment. Let's throw it away! :) */
               token_start = buf_index;
            } else
               buf_index = bufsize;
         } else {
            /* Tag: search end of tag (skipping over quoted strings) */
            html->CurrOfs = html->Start_Ofs + token_start;

            while ( buf_index < bufsize ) {
               buf_index++;
               buf_index += strcspn(buf + buf_index, ">\"'<");
               if ((ch = buf[buf_index]) == '>') {
                  break;
               } else if (ch == '"' || ch == '\'') {
                  /* Skip over quoted string */
                  buf_index++;
                  buf_index += strcspn(buf + buf_index,
                                       (ch == '"') ? "\">" : "'>");
                  if (buf[buf_index] == '>') {
                     /* Unterminated string value? Let's look ahead and test:
                      * (<: unterminated, closing-quote: terminated) */
                     int offset = buf_index + 1;
                     offset += strcspn(buf + offset,
                                       (ch == '"') ? "\"<" : "'<");
                     if (buf[offset] == ch || !buf[offset]) {
                        buf_index = offset;
                     } else {
                        BUG_MSG("Attribute lacks closing quote.");
                        break;
                     }
                  }
               } else if (ch == '<') {
                  /* unterminated tag detected */
                  p = dStrndup(buf+token_start+1,
                               strcspn(buf+token_start+1, " <\n\r\t"));
                  BUG_MSG("<%s> lacks its closing '>'.", p);
                  dFree(p);
                  --buf_index;
                  break;
               }
            }
            if (buf_index < bufsize) {
               buf_index++;
               Html_process_tag(html, buf + token_start,
                                buf_index - token_start);
               token_start = buf_index;
            }
         }
      } else {
         /* A Word: search for whitespace or tag open */
         html->CurrOfs = html->Start_Ofs + token_start;

         while (++buf_index < bufsize) {
            buf_index += strcspn(buf + buf_index, " <\n\r\t\f\v");
            if (buf[buf_index] == '<' && (ch = buf[buf_index + 1]) &&
                !isalpha(ch) && !strchr("/!?", ch))
               continue;
            break;
         }
         if (buf_index < bufsize || Eof) {
            /* successfully found end of token */
            ch = buf[buf_index];
            buf[buf_index] = 0;
            Html_process_word(html, buf + token_start,
                              buf_index - token_start);
            buf[buf_index] = ch;
            token_start = buf_index;
         }
      }
   }/*while*/

   HT2TB(html)->flush ();

   return token_start;
}