1 files changed, 283 insertions, 246 deletions
diff --git a/src/html.cc b/src/html.cc
index fe861ce7..75d1820f 100644
--- a/src/html.cc
+++ b/src/html.cc
@@ -26,6 +26,7 @@
 #include "msg.h"
 #include "binaryconst.h"
 #include "colors.h"
+#include "html_charrefs.h"
 #include "utf8.hh"
 
 #include "misc.h"
@@ -356,17 +357,32 @@ bool a_Html_tag_set_valign_attr(DilloHtml *html, const char *tag, int tagsize)
 
 
 /*
- * Create and add a new Textblock to the current Textblock
+ * Create and add a new Textblock to the current Textblock. Typically
+ * only one of addBreaks and addBreakOpt is true.
  */
-static void Html_add_textblock(DilloHtml *html, int space)
+static void Html_add_textblock(DilloHtml *html, bool addBreaks, int breakSpace,
+                               bool addBreakOpt)
 {
    Textblock *textblock = new Textblock (prefs.limit_text_width);
 
-   HT2TB(html)->addParbreak (space, html->wordStyle ());
-   HT2TB(html)->addWidget (textblock, html->style ());
-   HT2TB(html)->addParbreak (space, html->wordStyle ());
+   if (addBreaks)
+      HT2TB(html)->addParbreak (breakSpace, html->wordStyle ());
+
+   HT2TB(html)->addWidget (textblock, html->style ()); /* Works also for floats
+                                                          etc. */
+   if (addBreakOpt)
+      HT2TB(html)->addBreakOption (html->style (), false);
+
+   if (addBreaks)
+      HT2TB(html)->addParbreak (breakSpace, html->wordStyle ());
    S_TOP(html)->textblock = html->dw = textblock;
-   S_TOP(html)->hand_over_break = true;
+   if (addBreaks)
+      S_TOP(html)->hand_over_break = true;
+}
+
+static bool Html_will_textblock_be_out_of_flow(DilloHtml *html)
+{
+   return HT2TB(html)->isStyleOutOfFlow (html->style ());
 }
 
 /*
@@ -788,113 +804,16 @@ void a_Html_stash_init(DilloHtml *html)
    dStr_truncate(html->Stash, 0);
 }
 
-/* Entities list from the HTML 4.01 DTD */
-typedef struct {
-   const char *entity;
-   int isocode;
-} Ent_t;
-
-#define NumEnt 252
-static const Ent_t Entities[NumEnt] = {
-   {"AElig",0306}, {"Aacute",0301}, {"Acirc",0302},  {"Agrave",0300},
-   {"Alpha",01621},{"Aring",0305},  {"Atilde",0303}, {"Auml",0304},
-   {"Beta",01622}, {"Ccedil",0307}, {"Chi",01647},   {"Dagger",020041},
-   {"Delta",01624},{"ETH",0320},    {"Eacute",0311}, {"Ecirc",0312},
-   {"Egrave",0310},{"Epsilon",01625},{"Eta",01627},  {"Euml",0313},
-   {"Gamma",01623},{"Iacute",0315}, {"Icirc",0316},  {"Igrave",0314},
-   {"Iota",01631}, {"Iuml",0317},   {"Kappa",01632}, {"Lambda",01633},
-   {"Mu",01634},   {"Ntilde",0321}, {"Nu",01635},    {"OElig",0522},
-   {"Oacute",0323},{"Ocirc",0324},  {"Ograve",0322}, {"Omega",01651},
-   {"Omicron",01637},{"Oslash",0330},{"Otilde",0325},{"Ouml",0326},
-   {"Phi",01646},  {"Pi",01640},    {"Prime",020063},{"Psi",01650},
-   {"Rho",01641},  {"Scaron",0540}, {"Sigma",01643}, {"THORN",0336},
-   {"Tau",01644},  {"Theta",01630}, {"Uacute",0332}, {"Ucirc",0333},
-   {"Ugrave",0331},{"Upsilon",01645},{"Uuml",0334},  {"Xi",01636},
-   {"Yacute",0335},{"Yuml",0570},   {"Zeta",01626},  {"aacute",0341},
-   {"acirc",0342}, {"acute",0264},  {"aelig",0346},  {"agrave",0340},
-   {"alefsym",020465},{"alpha",01661},{"amp",38},    {"and",021047},
-   {"ang",021040}, {"aring",0345},  {"asymp",021110},{"atilde",0343},
-   {"auml",0344},  {"bdquo",020036},{"beta",01662},  {"brvbar",0246},
-   {"bull",020042},{"cap",021051},  {"ccedil",0347}, {"cedil",0270},
-   {"cent",0242},  {"chi",01707},   {"circ",01306},  {"clubs",023143},
-   {"cong",021105},{"copy",0251},   {"crarr",020665},{"cup",021052},
-   {"curren",0244},{"dArr",020723}, {"dagger",020040},{"darr",020623},
-   {"deg",0260},   {"delta",01664}, {"diams",023146},{"divide",0367},
-   {"eacute",0351},{"ecirc",0352},  {"egrave",0350}, {"empty",021005},
-   {"emsp",020003},{"ensp",020002}, {"epsilon",01665},{"equiv",021141},
-   {"eta",01667},  {"eth",0360},    {"euml",0353},   {"euro",020254},
-   {"exist",021003},{"fnof",0622},  {"forall",021000},{"frac12",0275},
-   {"frac14",0274},{"frac34",0276}, {"frasl",020104},{"gamma",01663},
-   {"ge",021145},  {"gt",62},       {"hArr",020724}, {"harr",020624},
-   {"hearts",023145},{"hellip",020046},{"iacute",0355},{"icirc",0356},
-   {"iexcl",0241}, {"igrave",0354}, {"image",020421},{"infin",021036},
-   {"int",021053}, {"iota",01671},  {"iquest",0277}, {"isin",021010},
-   {"iuml",0357},  {"kappa",01672}, {"lArr",020720}, {"lambda",01673},
-   {"lang",021451},{"laquo",0253},  {"larr",020620}, {"lceil",021410},
-   {"ldquo",020034},{"le",021144},  {"lfloor",021412},{"lowast",021027},
-   {"loz",022712}, {"lrm",020016},  {"lsaquo",020071},{"lsquo",020030},
-   {"lt",60},      {"macr",0257},   {"mdash",020024},{"micro",0265},
-   {"middot",0267},{"minus",021022},{"mu",01674},    {"nabla",021007},
-   {"nbsp",0240},  {"ndash",020023},{"ne",021140},   {"ni",021013},
-   {"not",0254},   {"notin",021011},{"nsub",021204}, {"ntilde",0361},
-   {"nu",01675},   {"oacute",0363}, {"ocirc",0364},  {"oelig",0523},
-   {"ograve",0362},{"oline",020076},{"omega",01711}, {"omicron",01677},
-   {"oplus",021225},{"or",021050},  {"ordf",0252},   {"ordm",0272},
-   {"oslash",0370},{"otilde",0365}, {"otimes",021227},{"ouml",0366},
-   {"para",0266},  {"part",021002}, {"permil",020060},{"perp",021245},
-   {"phi",01706},  {"pi",01700},    {"piv",01726},   {"plusmn",0261},
-   {"pound",0243}, {"prime",020062},{"prod",021017}, {"prop",021035},
-   {"psi",01710},  {"quot",34},     {"rArr",020722}, {"radic",021032},
-   {"rang",021452},{"raquo",0273},  {"rarr",020622}, {"rceil",021411},
-   {"rdquo",020035},{"real",020434},{"reg",0256},    {"rfloor",021413},
-   {"rho",01701},  {"rlm",020017},  {"rsaquo",020072},{"rsquo",020031},
-   {"sbquo",020032},{"scaron",0541},{"sdot",021305}, {"sect",0247},
-   {"shy",0255},   {"sigma",01703}, {"sigmaf",01702},{"sim",021074},
-   {"spades",023140},{"sub",021202},{"sube",021206}, {"sum",021021},
-   {"sup",021203}, {"sup1",0271},   {"sup2",0262},   {"sup3",0263},
-   {"supe",021207},{"szlig",0337},  {"tau",01704},   {"there4",021064},
-   {"theta",01670},{"thetasym",01721},{"thinsp",020011},{"thorn",0376},
-   {"tilde",01334},{"times",0327},  {"trade",020442},{"uArr",020721},
-   {"uacute",0372},{"uarr",020621}, {"ucirc",0373},  {"ugrave",0371},
-   {"uml",0250},   {"upsih",01722}, {"upsilon",01705},{"uuml",0374},
-   {"weierp",020430},{"xi",01676},  {"yacute",0375}, {"yen",0245},
-   {"yuml",0377},  {"zeta",01666},  {"zwj",020015},  {"zwnj",020014}
-};
-
-
-/*
- * Comparison function for binary search
- */
-static int Html_entity_comp(const void *a, const void *b)
-{
-   return strcmp(((Ent_t *)a)->entity, ((Ent_t *)b)->entity);
-}
-
-/*
- * Binary search of 'key' in entity list
- */
-static int Html_entity_search(char *key)
-{
-   Ent_t *res, EntKey;
-
-   EntKey.entity = key;
-   res = (Ent_t*) bsearch(&EntKey, Entities, NumEnt,
-                          sizeof(Ent_t), Html_entity_comp);
-   if (res)
-     return (res - Entities);
-   return -1;
-}
-
 /*
  * This is M$ non-standard "smart quotes" (w1252). Now even deprecated by them!
  *
  * SGML for HTML4.01 defines c >= 128 and c <= 159 as UNUSED.
- * TODO: Probably I should remove this hack, and add a HTML warning. --Jcid
+ * TODO: Probably I should remove this hack. --Jcid
  */
-static int Html_ms_stupid_quotes_2ucs(int isocode)
+static int Html_ms_stupid_quotes_2ucs(int codepoint)
 {
    int ret;
-   switch (isocode) {
+   switch (codepoint) {
    case 145:
    case 146: ret = '\''; break;
    case 147:
@@ -902,130 +821,241 @@ static int Html_ms_stupid_quotes_2ucs(int isocode)
    case 149: ret = 176; break;
    case 150:
    case 151: ret = '-'; break;
-   default:  ret = isocode; break;
+   default:  ret = codepoint; break;
    }
    return ret;
 }
 
 /*
- * Given an entity, return the UCS character code.
- * Returns a negative value (error code) if not a valid entity.
- *
- * The first character *token is assumed to be == '&'
- *
- * For valid entities, *entsize is set to the length of the parsed entity.
+ * Parse a numeric character reference (e.g., "&#47;" or "&#x2F;").
+ * The "&#" has already been consumed.
  */
-static int Html_parse_entity(DilloHtml *html, const char *token,
-                             int toksize, int *entsize)
+static const char *Html_parse_numeric_charref(DilloHtml *html, char *tok,
+                                              bool_t is_attr, int *entsize)
 {
-   int isocode, i;
-   char *tok, *s, c;
+   static char buf[5];
+   char *s = tok;
+   int n, codepoint = -1;
 
-   token++;
-   tok = s = toksize ? dStrndup(token, (uint_t)toksize) : dStrdup(token);
-
-   isocode = -1;
-
-   if (*s == '#') {
-      /* numeric character reference */
-      errno = 0;
-      if (*++s == 'x' || *s == 'X') {
-         if (isxdigit(*++s)) {
-            /* strtol with base 16 accepts leading "0x" - we don't */
-            if (*s == '0' && s[1] == 'x') {
-               s++;
-               isocode = 0;
-            } else {
-               isocode = strtol(s, &s, 16);
-            }
+   errno = 0;
+
+   if (*s == 'x' || *s == 'X') {
+      if (isxdigit(*++s)) {
+         /* strtol with base 16 accepts leading "0x" - we don't */
+         if (*s == '0' && s[1] == 'x') {
+            s++;
+            codepoint = 0;
+         } else {
+            codepoint = strtol(s, &s, 16);
          }
-      } else if (isdigit(*s)) {
-         isocode = strtol(s, &s, 10);
       }
+   } else if (isdigit(*s)) {
+      codepoint = strtol(s, &s, 10);
+   }
+   if (errno)
+      codepoint = -1;
 
-      if (!isocode || errno || isocode > 0xffff) {
-         /* this catches null bytes, errors and codes >= 0xFFFF */
-         BUG_MSG("Numeric character reference \"%s\" out of range.", tok);
-         isocode = -2;
+   if (*s == ';')
+      s++;
+   else {
+      if (prefs.show_extra_warnings && (html->DocType == DT_XHTML ||
+          (html->DocType == DT_HTML && html->DocTypeVersion <= 4.01f))) {
+         char c = *s;
+         *s = '\0';
+         BUG_MSG("Character reference '&#%s' lacks ';'.", tok);
+         *s = c;
       }
-
-      if (isocode != -1) {
-         if (*s == ';')
-            s++;
-         else if (prefs.show_extra_warnings)
-            BUG_MSG("Numeric character reference without trailing ';'.");
+      /* Don't require ';' for old HTML, except that our current heuristic
+       * is to require it in attributes to avoid cases like "&copy=1" found
+       * in URLs.
+       */
+      if (is_attr || html->DocType == DT_XHTML ||
+          (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)) {
+         return NULL;
       }
 
-   } else if (isalpha(*s)) {
-      /* character entity reference */
-      while (*++s && (isalnum(*s) || strchr(":_.-", *s))) ;
-      c = *s;
-      *s = 0;
+   }
+   if ((codepoint < 0x20 && codepoint != '\t' && codepoint != '\n' &&
+        codepoint != '\f') ||
+       (codepoint >= 0x7f && codepoint <= 0x9f) ||
+       (codepoint >= 0xd800 && codepoint <= 0xdfff) || codepoint > 0x10ffff ||
+       ((codepoint & 0xfffe) == 0xfffe) ||
+       (!(html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f) &&
+        codepoint > 0xffff)) {
+      /* this catches null bytes, errors, codes out of range, disallowed
+       * control chars, permanently undefined chars, and surrogates.
+       */
+      char c = *s;
+      *s = '\0';
+      BUG_MSG("Numeric character reference '&#%s' is not valid.", tok);
+      *s = c;
 
-      if ((i = Html_entity_search(tok)) >= 0) {
-         isocode = Entities[i].isocode;
+      codepoint = (codepoint >= 145 && codepoint <= 151) ?
+                  Html_ms_stupid_quotes_2ucs(codepoint) : -1;
+   }
+   if (codepoint != -1) {
+      if (codepoint >= 128) {
+         n = a_Utf8_encode(codepoint, buf);
       } else {
-         if (html->DocType == DT_XHTML && !strcmp(tok, "apos")) {
-            isocode = 0x27;
-         } else {
-            if ((html->DocType == DT_HTML && html->DocTypeVersion == 4.01f) ||
-                html->DocType == DT_XHTML)
-               BUG_MSG("Undefined character entity '%s'.", tok);
-            isocode = -3;
-         }
+         n = 1;
+         buf[0] = (char) codepoint;
+      }
+      assert(n < 5);
+      buf[n] = '\0';
+      *entsize = s-tok+2;
+      return buf;
+   } else {
+      return NULL;
+   }
+}
+
+/*
+ * Comparison function for binary search
+ */
+static int Html_charref_comp(const void *a, const void *b)
+{
+   return strcmp(((Charref_t *)a)->ref, ((Charref_t *)b)->ref);
+}
+
+/*
+ * Binary search of 'key' in charref list
+ */
+static Charref_t *Html_charref_search(char *key)
+{
+   Charref_t RefKey;
+
+   RefKey.ref = key;
+   return (Charref_t*) bsearch(&RefKey, Charrefs, NumRef,
+                       sizeof(Charref_t), Html_charref_comp);
+}
+
+/*
+ * Parse a named character reference (e.g., "&amp;" or "&hellip;").
+ * The "&" has already been consumed.
+ */
+static const char *Html_parse_named_charref(DilloHtml *html, char *tok,
+                                            bool_t is_attr, int *entsize)
+{
+   Charref_t *p;
+   char c;
+   char *s = tok;
+   const char *ret = NULL;
+
+   while (*++s && (isalnum(*s) || strchr(":_.-", *s))) ;
+   c = *s;
+   *s = '\0';
+   if (c != ';') {
+      if (prefs.show_extra_warnings && (html->DocType == DT_XHTML ||
+          (html->DocType == DT_HTML && html->DocTypeVersion <= 4.01f)))
+         BUG_MSG("Character reference '&%s' lacks ';'.", tok);
+
+      /* Don't require ';' for old HTML, except that our current heuristic
+       * is to require it in attributes to avoid cases like "&copy=1" found
+       * in URLs.
+       */
+      if (is_attr || html->DocType == DT_XHTML ||
+          (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)) {
+         return ret;
       }
-      if (c == ';')
-         s++;
-      else if (prefs.show_extra_warnings)
-         BUG_MSG("Character entity reference without trailing ';'.");
    }
 
+   if ((p = Html_charref_search(tok))) {
+      ret = (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f) ?
+            p->html5_str : p->html4_str;
+   }
+
+   if (!ret && html->DocType == DT_XHTML && !strcmp(tok, "apos"))
+      ret = "'";
+
+   *s = c;
+   if (c == ';')
+      s++;
+
+   if (!ret) {
+      c = *s;
+      *s = '\0';
+      BUG_MSG("Undefined character reference '&%s'.", tok);
+      *s = c;
+   }
    *entsize = s-tok+1;
-   dFree(tok);
+   return ret;
+}
 
-   if (isocode >= 145 && isocode <= 151) {
-      /* TODO: remove this hack. */
-      isocode = Html_ms_stupid_quotes_2ucs(isocode);
-   } else if (isocode == -1 && prefs.show_extra_warnings)
+/*
+ * Given an entity, return the corresponding string.
+ * Returns NULL if not a valid entity.
+ *
+ * The first character *token is assumed to be == '&'
+ *
+ * For valid entities, *entsize is set to the length of the parsed entity.
+ */
+static const char *Html_parse_entity(DilloHtml *html, const char *token,
+                                     int toksize, int *entsize, bool_t is_attr)
+{
+   const char *ret = NULL;
+   char *tok;
+
+   if (toksize > 50) {
+      /* In pathological cases, attributes can be megabytes long and filled
+       * with character references. As of HTML5, the longest defined character
+       * reference is about 32 bytes long.
+       */
+      toksize = 50;
+   }
+
+   token++;
+   tok = dStrndup(token, (uint_t)toksize);
+
+   if (*tok == '#') {
+      ret = Html_parse_numeric_charref(html, tok+1, is_attr, entsize);
+   } else if (isalpha(*tok)) {
+      ret = Html_parse_named_charref(html, tok, is_attr, entsize);
+   } else if (prefs.show_extra_warnings &&
+       (!(html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f))) {
+      // HTML5 doesn't mind literal '&'s.
       BUG_MSG("Literal '&'.");
+   }
+   dFree(tok);
 
-   return isocode;
+   return ret;
 }
 
 /*
- * Convert all the entities in a token to utf8 encoding. Takes
- * a token and its length, and returns a newly allocated string.
+ * Parse all the entities in a token. Takes the token and its length, and
+ * returns a newly allocated string.
  */
 char *a_Html_parse_entities(DilloHtml *html, const char *token, int toksize)
 {
    const char *esc_set = "&";
-   char *new_str, buf[4];
-   int i, j, k, n, s, isocode, entsize;
-
-   new_str = dStrndup(token, toksize);
-   s = strcspn(new_str, esc_set);
-   if (new_str[s] == 0)
-      return new_str;
-
-   for (i = j = s; i < toksize; i++) {
-      if (token[i] == '&' &&
-          (isocode = Html_parse_entity(html, token+i,
-                                       toksize-i, &entsize)) >= 0) {
-         if (isocode >= 128) {
-            /* multibyte encoding */
-            n = a_Utf8_encode(isocode, buf);
-            for (k = 0; k < n; ++k)
-               new_str[j++] = buf[k];
+   int i, s, entsize;
+   char *str;
+
+   s = strcspn(token, esc_set);
+   if (s >= toksize) {
+      /* no ampersands */
+      str = dStrndup(token, toksize);
+   } else {
+      Dstr *ds = dStr_sized_new(toksize);
+
+      dStr_append_l(ds, token, s);
+
+      for (i = s; i < toksize; i++) {
+         const char *entstr;
+         const bool_t is_attr = FALSE;
+
+         if (token[i] == '&' &&
+             (entstr = Html_parse_entity(html, token+i, toksize-i, &entsize,
+                                         is_attr))) {
+            dStr_append(ds, entstr);
+            i += entsize-1;
          } else {
-            new_str[j++] = (char) isocode;
+            dStr_append_c(ds, token[i]);
          }
-         i += entsize-1;
-      } else {
-         new_str[j++] = token[i];
       }
+      str = ds->str;
+      dStr_free(ds, 0);
    }
-   new_str[j] = '\0';
-   return new_str;
+   return str;
 }
 
 /*
@@ -1553,7 +1583,7 @@ static int
  * rendering modes, so it may be better to chose another behaviour. --Jcid
  *
  * http://www.mozilla.org/docs/web-developer/quirks/doctypes.html
- * http://lists.auriga.wearlab.de/pipermail/dillo-dev/2004-October/002300.html
+ * http://lists.dillo.org/pipermail/dillo-dev/2004-October/002300.html
  *
  * This is not a full DOCTYPE parser, just enough for what Dillo uses.
  */
@@ -2017,7 +2047,7 @@ static void Html_tag_content_frameset (DilloHtml *html,
 {
    HT2TB(html)->addParbreak (9, html->wordStyle ());
    HT2TB(html)->addText("--FRAME--", html->wordStyle ());
-   Html_add_textblock(html, 5);
+   Html_add_textblock(html, true, 5, false);
 }
 
 /*
@@ -2094,8 +2124,8 @@ void a_Html_common_image_attrs(DilloHtml *html, const char *tag, int tagsize)
 {
    char *width_ptr, *height_ptr;
    const char *attrbuf;
-   CssLength l_w  = CSS_CREATE_LENGTH(0.0, CSS_LENGTH_TYPE_AUTO);
-   CssLength l_h  = CSS_CREATE_LENGTH(0.0, CSS_LENGTH_TYPE_AUTO);
+   CssLength l_w = CSS_CREATE_LENGTH(0.0, CSS_LENGTH_TYPE_AUTO);
+   CssLength l_h = CSS_CREATE_LENGTH(0.0, CSS_LENGTH_TYPE_AUTO);
    int w = 0, h = 0;
 
    if (prefs.show_tooltip &&
@@ -2128,7 +2158,7 @@ void a_Html_common_image_attrs(DilloHtml *html, const char *tag, int tagsize)
     */
    if (w < 0 || h < 0 ||
        w > IMAGE_MAX_AREA || h > IMAGE_MAX_AREA ||
-       (h > 0 &&  w > IMAGE_MAX_AREA / h)) {
+       (h > 0 && w > IMAGE_MAX_AREA / h)) {
       dFree(width_ptr);
       dFree(height_ptr);
       width_ptr = height_ptr = NULL;
@@ -2173,14 +2203,16 @@ DilloImage *a_Html_image_new(DilloHtml *html, const char *tag, int tagsize)
       return NULL;
 
    alt_ptr = a_Html_get_attr_wdef(html, tag, tagsize, "alt", NULL);
-   if ((!alt_ptr || !*alt_ptr) && !prefs.load_images) {
+   if (!alt_ptr || !*alt_ptr) {
       dFree(alt_ptr);
-      alt_ptr = dStrdup("[IMG]"); // Place holder for img_off mode
+      alt_ptr = dStrdup("[IMG]");
    }
 
    dw::Image *dw = new dw::Image(alt_ptr);
    image =
       a_Image_new(html->dw->getLayout(), (void*)(dw::core::ImgRenderer*)dw, 0);
+   
+   a_Image_ref(image);
 
    if (HT2TB(html)->getBgColor())
       image->bg_color = HT2TB(html)->getBgColor()->getColor();
@@ -2197,10 +2229,10 @@ DilloImage *a_Html_image_new(DilloHtml *html, const char *tag, int tagsize)
    if (load_now && Html_load_image(html->bw, url, html->page_url, image)) {
       // hi->image is NULL if dillo tries to load the image immediately
       hi->image = NULL;
+      a_Image_unref(image);
    } else {
       // otherwise a reference is kept in html->images
       hi->image = image;
-      a_Image_ref(image);
    }
 
    dFree(alt_ptr);
@@ -2315,6 +2347,7 @@ static void Html_tag_content_img(DilloHtml *html, const char *tag, int tagsize)
    // multiple inheritance.
    dw::Image *dwi = (dw::Image*)(dw::core::ImgRenderer*)Image->img_rndr;
    HT2TB(html)->addWidget(dwi, html->style());
+   HT2TB(html)->addBreakOption (html->style (), false);
 
    /* Image maps */
    if (a_Html_get_attr(html, tag, tagsize, "ismap")) {
@@ -2448,7 +2481,6 @@ static void
       type = UNKNOWN;
    }
    if (type == RECTANGLE || type == CIRCLE || type == POLYGON) {
-      /* TODO: add support for coords in % */
       if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "coords"))) {
          coords = Html_read_coords(html, attrbuf);
 
@@ -2482,8 +2514,6 @@ static void
       if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "href"))) {
          url = a_Html_url_new(html, attrbuf, NULL, 0);
          dReturn_if_fail ( url != NULL );
-         if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "alt")))
-            a_Url_set_alt(url, attrbuf);
 
          link = Html_set_new_link(html, &url);
       }
@@ -2797,7 +2827,7 @@ static void Html_tag_close_a(DilloHtml *html)
 static void Html_tag_open_blockquote(DilloHtml *html,
                                      const char *tag, int tagsize)
 {
-   Html_add_textblock(html, 9);
+   Html_add_textblock(html, true, 9, false);
 }
 
 /*
@@ -3061,7 +3091,7 @@ static void Html_tag_open_dt(DilloHtml *html, const char *tag, int tagsize)
  */
 static void Html_tag_open_dd(DilloHtml *html, const char *tag, int tagsize)
 {
-   Html_add_textblock(html, 9);
+   Html_add_textblock(html, true, 9, false);
 }
 
 /*
@@ -3262,27 +3292,26 @@ void a_Html_load_stylesheet(DilloHtml *html, DilloUrl *url)
    dReturn_if (url == NULL || ! prefs.load_stylesheets);
 
    _MSG("Html_load_stylesheet: ");
-   if (a_Capi_get_buf(url, &data, &len)) {
+   if ((a_Capi_get_flags_with_redirection(url) & CAPI_Completed) &&
+       a_Capi_get_buf(url, &data, &len)) {
       _MSG("cached URL=%s len=%d", URL_STR(url), len);
-      if (a_Capi_get_flags_with_redirection(url) & CAPI_Completed) {
-         if (strncmp("@charset \"", data, 10) == 0) {
-            char *endq = strchr(data+10, '"');
-
-            if (endq && (endq - data <= 51)) {
-               /* IANA limits charset names to 40 characters */
-               char *content_type;
-
-               *endq = '\0';
-               content_type = dStrconcat("text/css; charset=", data+10, NULL);
-               *endq = '"';
-               a_Capi_unref_buf(url);
-               a_Capi_set_content_type(url, content_type, "meta");
-               dFree(content_type);
-               a_Capi_get_buf(url, &data, &len);
-            }
+      if (strncmp("@charset \"", data, 10) == 0) {
+         char *endq = strchr(data+10, '"');
+
+         if (endq && (endq - data <= 51)) {
+            /* IANA limits charset names to 40 characters */
+            char *content_type;
+
+            *endq = '\0';
+            content_type = dStrconcat("text/css; charset=", data+10, NULL);
+            *endq = '"';
+            a_Capi_unref_buf(url);
+            a_Capi_set_content_type(url, content_type, "meta");
+            dFree(content_type);
+            a_Capi_get_buf(url, &data, &len);
          }
-         html->styleEngine->parse(html, url, data, len, CSS_ORIGIN_AUTHOR);
       }
+      html->styleEngine->parse(html, url, data, len, CSS_ORIGIN_AUTHOR);
       a_Capi_unref_buf(url);
    } else {
       /* Fill a Web structure for the cache query */
@@ -3364,8 +3393,13 @@ static void Html_tag_open_base(DilloHtml *html, const char *tag, int tagsize)
 
    if (html->InFlags & IN_HEAD) {
       if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "href"))) {
-         BaseUrl = a_Html_url_new(html, attrbuf, "", 1);
-         if (URL_SCHEME_(BaseUrl)) {
+         bool_t html5 = html->DocType == DT_HTML &&
+                        html->DocTypeVersion >= 5.0f;
+
+         BaseUrl = html5 ? a_Html_url_new(html, attrbuf, NULL, 0) :
+                           a_Html_url_new(html, attrbuf, "", 1);
+
+         if (html5 || URL_SCHEME_(BaseUrl)) {
             /* Pass the URL_SpamSafe flag to the new base url */
             a_Url_set_flags(
                BaseUrl, URL_FLAGS(html->base_url) & URL_SpamSafe);
@@ -3474,7 +3508,7 @@ const TagInfo Tags[] = {
  {"a", B8(011101),'R',2, Html_tag_open_a, NULL, Html_tag_close_a},
  {"abbr", B8(010101),'R',2, Html_tag_open_abbr, NULL, NULL},
  /* acronym 010101 -- obsolete in HTML5 */
- {"address", B8(010110),'R',2,Html_tag_open_default, NULL, Html_tag_close_par},
+ {"address", B8(011110),'R',2,Html_tag_open_default, NULL, Html_tag_close_par},
  {"area", B8(010001),'F',0, Html_tag_open_default, Html_tag_content_area,
                             NULL},
  {"article", B8(011110),'R',2, Html_tag_open_sectioning, NULL, NULL},
@@ -3674,10 +3708,10 @@ static int Html_needs_optional_close(int old_idx, int cur_idx)
    } else if (old_idx == i_TR) {
       /* TR closes TR */
       return (cur_idx == i_TR);
-   } else if (old_idx ==  i_DD) {
+   } else if (old_idx == i_DD) {
       /* DD is closed by DD and DT */
       return (cur_idx == i_DD || cur_idx == i_DT);
-   } else if (old_idx ==  i_OPTION) {
+   } else if (old_idx == i_OPTION) {
       return 1;  // OPTION always needs close
    }
 
@@ -3879,8 +3913,13 @@ static void Html_check_html5_obsolete(DilloHtml *html, int ni)
 
 static void Html_display_block(DilloHtml *html)
 {
-   //HT2TB(html)->addParbreak (5, html->styleEngine->wordStyle ());
-   Html_add_textblock(html, 0);
+   Html_add_textblock(html, !Html_will_textblock_be_out_of_flow (html), 0,
+                      false /* Perhaps true for widgets oof? */);
+}
+
+static void Html_display_inline_block(DilloHtml *html)
+{
+   Html_add_textblock(html, false, 0, true);
 }
 
 static void Html_display_listitem(DilloHtml *html)
@@ -3985,6 +4024,9 @@ static void Html_process_tag(DilloHtml *html, char *tag, int tagsize)
             case DISPLAY_BLOCK:
                Html_display_block(html);
                break;
+            case DISPLAY_INLINE_BLOCK:
+               Html_display_inline_block(html);
+               break;
             case DISPLAY_LIST_ITEM:
                Html_display_listitem(html);
                break;
@@ -3992,7 +4034,6 @@ static void Html_process_tag(DilloHtml *html, char *tag, int tagsize)
                S_TOP(html)->display_none = true;
                break;
             case DISPLAY_INLINE:
-            case DISPLAY_INLINE_BLOCK: // TODO: implement inline-block
             default:
                break;
          }
@@ -4060,7 +4101,7 @@ static const char *Html_get_attr2(DilloHtml *html,
                                   const char *attrname,
                                   int tag_parsing_flags)
 {
-   int i, isocode, entsize, Found = 0, delimiter = 0, attr_pos = 0;
+   int i, entsize, Found = 0, delimiter = 0, attr_pos = 0;
    Dstr *Buf = html->attr_data;
    DilloHtmlTagParsingState state = SEEK_ATTR_START;
 
@@ -4119,16 +4160,12 @@ static const char *Html_get_attr2(DilloHtml *html,
             state = FINISHED;
          } else if (tag[i] == '&' &&
                     (tag_parsing_flags & HTML_ParseEntities)) {
-            if ((isocode = Html_parse_entity(html, tag+i,
-                                             tagsize-i, &entsize)) >= 0) {
-               if (isocode >= 128) {
-                  char buf[4];
-                  int k, n = a_Utf8_encode(isocode, buf);
-                  for (k = 0; k < n; ++k)
-                     dStr_append_c(Buf, buf[k]);
-               } else {
-                  dStr_append_c(Buf, (char) isocode);
-               }
+            const char *entstr;
+            const bool_t is_attr = TRUE;
+
+            if ((entstr = Html_parse_entity(html, tag+i, tagsize-i, &entsize,
+                                            is_attr))) {
+               dStr_append(Buf, entstr);
                i += entsize-1;
             } else {
                dStr_append_c(Buf, tag[i]);