diff options
Diffstat (limited to 'src/html.cc')
-rw-r--r-- | src/html.cc | 529 |
1 files changed, 283 insertions, 246 deletions
diff --git a/src/html.cc b/src/html.cc index fe861ce7..75d1820f 100644 --- a/src/html.cc +++ b/src/html.cc @@ -26,6 +26,7 @@ #include "msg.h" #include "binaryconst.h" #include "colors.h" +#include "html_charrefs.h" #include "utf8.hh" #include "misc.h" @@ -356,17 +357,32 @@ bool a_Html_tag_set_valign_attr(DilloHtml *html, const char *tag, int tagsize) /* - * Create and add a new Textblock to the current Textblock + * Create and add a new Textblock to the current Textblock. Typically + * only one of addBreaks and addBreakOpt is true. */ -static void Html_add_textblock(DilloHtml *html, int space) +static void Html_add_textblock(DilloHtml *html, bool addBreaks, int breakSpace, + bool addBreakOpt) { Textblock *textblock = new Textblock (prefs.limit_text_width); - HT2TB(html)->addParbreak (space, html->wordStyle ()); - HT2TB(html)->addWidget (textblock, html->style ()); - HT2TB(html)->addParbreak (space, html->wordStyle ()); + if (addBreaks) + HT2TB(html)->addParbreak (breakSpace, html->wordStyle ()); + + HT2TB(html)->addWidget (textblock, html->style ()); /* Works also for floats + etc. */ + if (addBreakOpt) + HT2TB(html)->addBreakOption (html->style (), false); + + if (addBreaks) + HT2TB(html)->addParbreak (breakSpace, html->wordStyle ()); S_TOP(html)->textblock = html->dw = textblock; - S_TOP(html)->hand_over_break = true; + if (addBreaks) + S_TOP(html)->hand_over_break = true; +} + +static bool Html_will_textblock_be_out_of_flow(DilloHtml *html) +{ + return HT2TB(html)->isStyleOutOfFlow (html->style ()); } /* @@ -788,113 +804,16 @@ void a_Html_stash_init(DilloHtml *html) dStr_truncate(html->Stash, 0); } -/* Entities list from the HTML 4.01 DTD */ -typedef struct { - const char *entity; - int isocode; -} Ent_t; - -#define NumEnt 252 -static const Ent_t Entities[NumEnt] = { - {"AElig",0306}, {"Aacute",0301}, {"Acirc",0302}, {"Agrave",0300}, - {"Alpha",01621},{"Aring",0305}, {"Atilde",0303}, {"Auml",0304}, - {"Beta",01622}, {"Ccedil",0307}, {"Chi",01647}, {"Dagger",020041}, - {"Delta",01624},{"ETH",0320}, {"Eacute",0311}, {"Ecirc",0312}, - {"Egrave",0310},{"Epsilon",01625},{"Eta",01627}, {"Euml",0313}, - {"Gamma",01623},{"Iacute",0315}, {"Icirc",0316}, {"Igrave",0314}, - {"Iota",01631}, {"Iuml",0317}, {"Kappa",01632}, {"Lambda",01633}, - {"Mu",01634}, {"Ntilde",0321}, {"Nu",01635}, {"OElig",0522}, - {"Oacute",0323},{"Ocirc",0324}, {"Ograve",0322}, {"Omega",01651}, - {"Omicron",01637},{"Oslash",0330},{"Otilde",0325},{"Ouml",0326}, - {"Phi",01646}, {"Pi",01640}, {"Prime",020063},{"Psi",01650}, - {"Rho",01641}, {"Scaron",0540}, {"Sigma",01643}, {"THORN",0336}, - {"Tau",01644}, {"Theta",01630}, {"Uacute",0332}, {"Ucirc",0333}, - {"Ugrave",0331},{"Upsilon",01645},{"Uuml",0334}, {"Xi",01636}, - {"Yacute",0335},{"Yuml",0570}, {"Zeta",01626}, {"aacute",0341}, - {"acirc",0342}, {"acute",0264}, {"aelig",0346}, {"agrave",0340}, - {"alefsym",020465},{"alpha",01661},{"amp",38}, {"and",021047}, - {"ang",021040}, {"aring",0345}, {"asymp",021110},{"atilde",0343}, - {"auml",0344}, {"bdquo",020036},{"beta",01662}, {"brvbar",0246}, - {"bull",020042},{"cap",021051}, {"ccedil",0347}, {"cedil",0270}, - {"cent",0242}, {"chi",01707}, {"circ",01306}, {"clubs",023143}, - {"cong",021105},{"copy",0251}, {"crarr",020665},{"cup",021052}, - {"curren",0244},{"dArr",020723}, {"dagger",020040},{"darr",020623}, - {"deg",0260}, {"delta",01664}, {"diams",023146},{"divide",0367}, - {"eacute",0351},{"ecirc",0352}, {"egrave",0350}, {"empty",021005}, - {"emsp",020003},{"ensp",020002}, {"epsilon",01665},{"equiv",021141}, - {"eta",01667}, {"eth",0360}, {"euml",0353}, {"euro",020254}, - {"exist",021003},{"fnof",0622}, {"forall",021000},{"frac12",0275}, - {"frac14",0274},{"frac34",0276}, {"frasl",020104},{"gamma",01663}, - {"ge",021145}, {"gt",62}, {"hArr",020724}, {"harr",020624}, - {"hearts",023145},{"hellip",020046},{"iacute",0355},{"icirc",0356}, - {"iexcl",0241}, {"igrave",0354}, {"image",020421},{"infin",021036}, - {"int",021053}, {"iota",01671}, {"iquest",0277}, {"isin",021010}, - {"iuml",0357}, {"kappa",01672}, {"lArr",020720}, {"lambda",01673}, - {"lang",021451},{"laquo",0253}, {"larr",020620}, {"lceil",021410}, - {"ldquo",020034},{"le",021144}, {"lfloor",021412},{"lowast",021027}, - {"loz",022712}, {"lrm",020016}, {"lsaquo",020071},{"lsquo",020030}, - {"lt",60}, {"macr",0257}, {"mdash",020024},{"micro",0265}, - {"middot",0267},{"minus",021022},{"mu",01674}, {"nabla",021007}, - {"nbsp",0240}, {"ndash",020023},{"ne",021140}, {"ni",021013}, - {"not",0254}, {"notin",021011},{"nsub",021204}, {"ntilde",0361}, - {"nu",01675}, {"oacute",0363}, {"ocirc",0364}, {"oelig",0523}, - {"ograve",0362},{"oline",020076},{"omega",01711}, {"omicron",01677}, - {"oplus",021225},{"or",021050}, {"ordf",0252}, {"ordm",0272}, - {"oslash",0370},{"otilde",0365}, {"otimes",021227},{"ouml",0366}, - {"para",0266}, {"part",021002}, {"permil",020060},{"perp",021245}, - {"phi",01706}, {"pi",01700}, {"piv",01726}, {"plusmn",0261}, - {"pound",0243}, {"prime",020062},{"prod",021017}, {"prop",021035}, - {"psi",01710}, {"quot",34}, {"rArr",020722}, {"radic",021032}, - {"rang",021452},{"raquo",0273}, {"rarr",020622}, {"rceil",021411}, - {"rdquo",020035},{"real",020434},{"reg",0256}, {"rfloor",021413}, - {"rho",01701}, {"rlm",020017}, {"rsaquo",020072},{"rsquo",020031}, - {"sbquo",020032},{"scaron",0541},{"sdot",021305}, {"sect",0247}, - {"shy",0255}, {"sigma",01703}, {"sigmaf",01702},{"sim",021074}, - {"spades",023140},{"sub",021202},{"sube",021206}, {"sum",021021}, - {"sup",021203}, {"sup1",0271}, {"sup2",0262}, {"sup3",0263}, - {"supe",021207},{"szlig",0337}, {"tau",01704}, {"there4",021064}, - {"theta",01670},{"thetasym",01721},{"thinsp",020011},{"thorn",0376}, - {"tilde",01334},{"times",0327}, {"trade",020442},{"uArr",020721}, - {"uacute",0372},{"uarr",020621}, {"ucirc",0373}, {"ugrave",0371}, - {"uml",0250}, {"upsih",01722}, {"upsilon",01705},{"uuml",0374}, - {"weierp",020430},{"xi",01676}, {"yacute",0375}, {"yen",0245}, - {"yuml",0377}, {"zeta",01666}, {"zwj",020015}, {"zwnj",020014} -}; - - -/* - * Comparison function for binary search - */ -static int Html_entity_comp(const void *a, const void *b) -{ - return strcmp(((Ent_t *)a)->entity, ((Ent_t *)b)->entity); -} - -/* - * Binary search of 'key' in entity list - */ -static int Html_entity_search(char *key) -{ - Ent_t *res, EntKey; - - EntKey.entity = key; - res = (Ent_t*) bsearch(&EntKey, Entities, NumEnt, - sizeof(Ent_t), Html_entity_comp); - if (res) - return (res - Entities); - return -1; -} - /* * This is M$ non-standard "smart quotes" (w1252). Now even deprecated by them! * * SGML for HTML4.01 defines c >= 128 and c <= 159 as UNUSED. - * TODO: Probably I should remove this hack, and add a HTML warning. --Jcid + * TODO: Probably I should remove this hack. --Jcid */ -static int Html_ms_stupid_quotes_2ucs(int isocode) +static int Html_ms_stupid_quotes_2ucs(int codepoint) { int ret; - switch (isocode) { + switch (codepoint) { case 145: case 146: ret = '\''; break; case 147: @@ -902,130 +821,241 @@ static int Html_ms_stupid_quotes_2ucs(int isocode) case 149: ret = 176; break; case 150: case 151: ret = '-'; break; - default: ret = isocode; break; + default: ret = codepoint; break; } return ret; } /* - * Given an entity, return the UCS character code. - * Returns a negative value (error code) if not a valid entity. - * - * The first character *token is assumed to be == '&' - * - * For valid entities, *entsize is set to the length of the parsed entity. + * Parse a numeric character reference (e.g., "/" or "/"). + * The "&#" has already been consumed. */ -static int Html_parse_entity(DilloHtml *html, const char *token, - int toksize, int *entsize) +static const char *Html_parse_numeric_charref(DilloHtml *html, char *tok, + bool_t is_attr, int *entsize) { - int isocode, i; - char *tok, *s, c; + static char buf[5]; + char *s = tok; + int n, codepoint = -1; - token++; - tok = s = toksize ? dStrndup(token, (uint_t)toksize) : dStrdup(token); - - isocode = -1; - - if (*s == '#') { - /* numeric character reference */ - errno = 0; - if (*++s == 'x' || *s == 'X') { - if (isxdigit(*++s)) { - /* strtol with base 16 accepts leading "0x" - we don't */ - if (*s == '0' && s[1] == 'x') { - s++; - isocode = 0; - } else { - isocode = strtol(s, &s, 16); - } + errno = 0; + + if (*s == 'x' || *s == 'X') { + if (isxdigit(*++s)) { + /* strtol with base 16 accepts leading "0x" - we don't */ + if (*s == '0' && s[1] == 'x') { + s++; + codepoint = 0; + } else { + codepoint = strtol(s, &s, 16); } - } else if (isdigit(*s)) { - isocode = strtol(s, &s, 10); } + } else if (isdigit(*s)) { + codepoint = strtol(s, &s, 10); + } + if (errno) + codepoint = -1; - if (!isocode || errno || isocode > 0xffff) { - /* this catches null bytes, errors and codes >= 0xFFFF */ - BUG_MSG("Numeric character reference \"%s\" out of range.", tok); - isocode = -2; + if (*s == ';') + s++; + else { + if (prefs.show_extra_warnings && (html->DocType == DT_XHTML || + (html->DocType == DT_HTML && html->DocTypeVersion <= 4.01f))) { + char c = *s; + *s = '\0'; + BUG_MSG("Character reference '&#%s' lacks ';'.", tok); + *s = c; } - - if (isocode != -1) { - if (*s == ';') - s++; - else if (prefs.show_extra_warnings) - BUG_MSG("Numeric character reference without trailing ';'."); + /* Don't require ';' for old HTML, except that our current heuristic + * is to require it in attributes to avoid cases like "©=1" found + * in URLs. + */ + if (is_attr || html->DocType == DT_XHTML || + (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)) { + return NULL; } - } else if (isalpha(*s)) { - /* character entity reference */ - while (*++s && (isalnum(*s) || strchr(":_.-", *s))) ; - c = *s; - *s = 0; + } + if ((codepoint < 0x20 && codepoint != '\t' && codepoint != '\n' && + codepoint != '\f') || + (codepoint >= 0x7f && codepoint <= 0x9f) || + (codepoint >= 0xd800 && codepoint <= 0xdfff) || codepoint > 0x10ffff || + ((codepoint & 0xfffe) == 0xfffe) || + (!(html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f) && + codepoint > 0xffff)) { + /* this catches null bytes, errors, codes out of range, disallowed + * control chars, permanently undefined chars, and surrogates. + */ + char c = *s; + *s = '\0'; + BUG_MSG("Numeric character reference '&#%s' is not valid.", tok); + *s = c; - if ((i = Html_entity_search(tok)) >= 0) { - isocode = Entities[i].isocode; + codepoint = (codepoint >= 145 && codepoint <= 151) ? + Html_ms_stupid_quotes_2ucs(codepoint) : -1; + } + if (codepoint != -1) { + if (codepoint >= 128) { + n = a_Utf8_encode(codepoint, buf); } else { - if (html->DocType == DT_XHTML && !strcmp(tok, "apos")) { - isocode = 0x27; - } else { - if ((html->DocType == DT_HTML && html->DocTypeVersion == 4.01f) || - html->DocType == DT_XHTML) - BUG_MSG("Undefined character entity '%s'.", tok); - isocode = -3; - } + n = 1; + buf[0] = (char) codepoint; + } + assert(n < 5); + buf[n] = '\0'; + *entsize = s-tok+2; + return buf; + } else { + return NULL; + } +} + +/* + * Comparison function for binary search + */ +static int Html_charref_comp(const void *a, const void *b) +{ + return strcmp(((Charref_t *)a)->ref, ((Charref_t *)b)->ref); +} + +/* + * Binary search of 'key' in charref list + */ +static Charref_t *Html_charref_search(char *key) +{ + Charref_t RefKey; + + RefKey.ref = key; + return (Charref_t*) bsearch(&RefKey, Charrefs, NumRef, + sizeof(Charref_t), Html_charref_comp); +} + +/* + * Parse a named character reference (e.g., "&" or "…"). + * The "&" has already been consumed. + */ +static const char *Html_parse_named_charref(DilloHtml *html, char *tok, + bool_t is_attr, int *entsize) +{ + Charref_t *p; + char c; + char *s = tok; + const char *ret = NULL; + + while (*++s && (isalnum(*s) || strchr(":_.-", *s))) ; + c = *s; + *s = '\0'; + if (c != ';') { + if (prefs.show_extra_warnings && (html->DocType == DT_XHTML || + (html->DocType == DT_HTML && html->DocTypeVersion <= 4.01f))) + BUG_MSG("Character reference '&%s' lacks ';'.", tok); + + /* Don't require ';' for old HTML, except that our current heuristic + * is to require it in attributes to avoid cases like "©=1" found + * in URLs. + */ + if (is_attr || html->DocType == DT_XHTML || + (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)) { + return ret; } - if (c == ';') - s++; - else if (prefs.show_extra_warnings) - BUG_MSG("Character entity reference without trailing ';'."); } + if ((p = Html_charref_search(tok))) { + ret = (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f) ? + p->html5_str : p->html4_str; + } + + if (!ret && html->DocType == DT_XHTML && !strcmp(tok, "apos")) + ret = "'"; + + *s = c; + if (c == ';') + s++; + + if (!ret) { + c = *s; + *s = '\0'; + BUG_MSG("Undefined character reference '&%s'.", tok); + *s = c; + } *entsize = s-tok+1; - dFree(tok); + return ret; +} - if (isocode >= 145 && isocode <= 151) { - /* TODO: remove this hack. */ - isocode = Html_ms_stupid_quotes_2ucs(isocode); - } else if (isocode == -1 && prefs.show_extra_warnings) +/* + * Given an entity, return the corresponding string. + * Returns NULL if not a valid entity. + * + * The first character *token is assumed to be == '&' + * + * For valid entities, *entsize is set to the length of the parsed entity. + */ +static const char *Html_parse_entity(DilloHtml *html, const char *token, + int toksize, int *entsize, bool_t is_attr) +{ + const char *ret = NULL; + char *tok; + + if (toksize > 50) { + /* In pathological cases, attributes can be megabytes long and filled + * with character references. As of HTML5, the longest defined character + * reference is about 32 bytes long. + */ + toksize = 50; + } + + token++; + tok = dStrndup(token, (uint_t)toksize); + + if (*tok == '#') { + ret = Html_parse_numeric_charref(html, tok+1, is_attr, entsize); + } else if (isalpha(*tok)) { + ret = Html_parse_named_charref(html, tok, is_attr, entsize); + } else if (prefs.show_extra_warnings && + (!(html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f))) { + // HTML5 doesn't mind literal '&'s. BUG_MSG("Literal '&'."); + } + dFree(tok); - return isocode; + return ret; } /* - * Convert all the entities in a token to utf8 encoding. Takes - * a token and its length, and returns a newly allocated string. + * Parse all the entities in a token. Takes the token and its length, and + * returns a newly allocated string. */ char *a_Html_parse_entities(DilloHtml *html, const char *token, int toksize) { const char *esc_set = "&"; - char *new_str, buf[4]; - int i, j, k, n, s, isocode, entsize; - - new_str = dStrndup(token, toksize); - s = strcspn(new_str, esc_set); - if (new_str[s] == 0) - return new_str; - - for (i = j = s; i < toksize; i++) { - if (token[i] == '&' && - (isocode = Html_parse_entity(html, token+i, - toksize-i, &entsize)) >= 0) { - if (isocode >= 128) { - /* multibyte encoding */ - n = a_Utf8_encode(isocode, buf); - for (k = 0; k < n; ++k) - new_str[j++] = buf[k]; + int i, s, entsize; + char *str; + + s = strcspn(token, esc_set); + if (s >= toksize) { + /* no ampersands */ + str = dStrndup(token, toksize); + } else { + Dstr *ds = dStr_sized_new(toksize); + + dStr_append_l(ds, token, s); + + for (i = s; i < toksize; i++) { + const char *entstr; + const bool_t is_attr = FALSE; + + if (token[i] == '&' && + (entstr = Html_parse_entity(html, token+i, toksize-i, &entsize, + is_attr))) { + dStr_append(ds, entstr); + i += entsize-1; } else { - new_str[j++] = (char) isocode; + dStr_append_c(ds, token[i]); } - i += entsize-1; - } else { - new_str[j++] = token[i]; } + str = ds->str; + dStr_free(ds, 0); } - new_str[j] = '\0'; - return new_str; + return str; } /* @@ -1553,7 +1583,7 @@ static int * rendering modes, so it may be better to chose another behaviour. --Jcid * * http://www.mozilla.org/docs/web-developer/quirks/doctypes.html - * http://lists.auriga.wearlab.de/pipermail/dillo-dev/2004-October/002300.html + * http://lists.dillo.org/pipermail/dillo-dev/2004-October/002300.html * * This is not a full DOCTYPE parser, just enough for what Dillo uses. */ @@ -2017,7 +2047,7 @@ static void Html_tag_content_frameset (DilloHtml *html, { HT2TB(html)->addParbreak (9, html->wordStyle ()); HT2TB(html)->addText("--FRAME--", html->wordStyle ()); - Html_add_textblock(html, 5); + Html_add_textblock(html, true, 5, false); } /* @@ -2094,8 +2124,8 @@ void a_Html_common_image_attrs(DilloHtml *html, const char *tag, int tagsize) { char *width_ptr, *height_ptr; const char *attrbuf; - CssLength l_w = CSS_CREATE_LENGTH(0.0, CSS_LENGTH_TYPE_AUTO); - CssLength l_h = CSS_CREATE_LENGTH(0.0, CSS_LENGTH_TYPE_AUTO); + CssLength l_w = CSS_CREATE_LENGTH(0.0, CSS_LENGTH_TYPE_AUTO); + CssLength l_h = CSS_CREATE_LENGTH(0.0, CSS_LENGTH_TYPE_AUTO); int w = 0, h = 0; if (prefs.show_tooltip && @@ -2128,7 +2158,7 @@ void a_Html_common_image_attrs(DilloHtml *html, const char *tag, int tagsize) */ if (w < 0 || h < 0 || w > IMAGE_MAX_AREA || h > IMAGE_MAX_AREA || - (h > 0 && w > IMAGE_MAX_AREA / h)) { + (h > 0 && w > IMAGE_MAX_AREA / h)) { dFree(width_ptr); dFree(height_ptr); width_ptr = height_ptr = NULL; @@ -2173,14 +2203,16 @@ DilloImage *a_Html_image_new(DilloHtml *html, const char *tag, int tagsize) return NULL; alt_ptr = a_Html_get_attr_wdef(html, tag, tagsize, "alt", NULL); - if ((!alt_ptr || !*alt_ptr) && !prefs.load_images) { + if (!alt_ptr || !*alt_ptr) { dFree(alt_ptr); - alt_ptr = dStrdup("[IMG]"); // Place holder for img_off mode + alt_ptr = dStrdup("[IMG]"); } dw::Image *dw = new dw::Image(alt_ptr); image = a_Image_new(html->dw->getLayout(), (void*)(dw::core::ImgRenderer*)dw, 0); + + a_Image_ref(image); if (HT2TB(html)->getBgColor()) image->bg_color = HT2TB(html)->getBgColor()->getColor(); @@ -2197,10 +2229,10 @@ DilloImage *a_Html_image_new(DilloHtml *html, const char *tag, int tagsize) if (load_now && Html_load_image(html->bw, url, html->page_url, image)) { // hi->image is NULL if dillo tries to load the image immediately hi->image = NULL; + a_Image_unref(image); } else { // otherwise a reference is kept in html->images hi->image = image; - a_Image_ref(image); } dFree(alt_ptr); @@ -2315,6 +2347,7 @@ static void Html_tag_content_img(DilloHtml *html, const char *tag, int tagsize) // multiple inheritance. dw::Image *dwi = (dw::Image*)(dw::core::ImgRenderer*)Image->img_rndr; HT2TB(html)->addWidget(dwi, html->style()); + HT2TB(html)->addBreakOption (html->style (), false); /* Image maps */ if (a_Html_get_attr(html, tag, tagsize, "ismap")) { @@ -2448,7 +2481,6 @@ static void type = UNKNOWN; } if (type == RECTANGLE || type == CIRCLE || type == POLYGON) { - /* TODO: add support for coords in % */ if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "coords"))) { coords = Html_read_coords(html, attrbuf); @@ -2482,8 +2514,6 @@ static void if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "href"))) { url = a_Html_url_new(html, attrbuf, NULL, 0); dReturn_if_fail ( url != NULL ); - if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "alt"))) - a_Url_set_alt(url, attrbuf); link = Html_set_new_link(html, &url); } @@ -2797,7 +2827,7 @@ static void Html_tag_close_a(DilloHtml *html) static void Html_tag_open_blockquote(DilloHtml *html, const char *tag, int tagsize) { - Html_add_textblock(html, 9); + Html_add_textblock(html, true, 9, false); } /* @@ -3061,7 +3091,7 @@ static void Html_tag_open_dt(DilloHtml *html, const char *tag, int tagsize) */ static void Html_tag_open_dd(DilloHtml *html, const char *tag, int tagsize) { - Html_add_textblock(html, 9); + Html_add_textblock(html, true, 9, false); } /* @@ -3262,27 +3292,26 @@ void a_Html_load_stylesheet(DilloHtml *html, DilloUrl *url) dReturn_if (url == NULL || ! prefs.load_stylesheets); _MSG("Html_load_stylesheet: "); - if (a_Capi_get_buf(url, &data, &len)) { + if ((a_Capi_get_flags_with_redirection(url) & CAPI_Completed) && + a_Capi_get_buf(url, &data, &len)) { _MSG("cached URL=%s len=%d", URL_STR(url), len); - if (a_Capi_get_flags_with_redirection(url) & CAPI_Completed) { - if (strncmp("@charset \"", data, 10) == 0) { - char *endq = strchr(data+10, '"'); - - if (endq && (endq - data <= 51)) { - /* IANA limits charset names to 40 characters */ - char *content_type; - - *endq = '\0'; - content_type = dStrconcat("text/css; charset=", data+10, NULL); - *endq = '"'; - a_Capi_unref_buf(url); - a_Capi_set_content_type(url, content_type, "meta"); - dFree(content_type); - a_Capi_get_buf(url, &data, &len); - } + if (strncmp("@charset \"", data, 10) == 0) { + char *endq = strchr(data+10, '"'); + + if (endq && (endq - data <= 51)) { + /* IANA limits charset names to 40 characters */ + char *content_type; + + *endq = '\0'; + content_type = dStrconcat("text/css; charset=", data+10, NULL); + *endq = '"'; + a_Capi_unref_buf(url); + a_Capi_set_content_type(url, content_type, "meta"); + dFree(content_type); + a_Capi_get_buf(url, &data, &len); } - html->styleEngine->parse(html, url, data, len, CSS_ORIGIN_AUTHOR); } + html->styleEngine->parse(html, url, data, len, CSS_ORIGIN_AUTHOR); a_Capi_unref_buf(url); } else { /* Fill a Web structure for the cache query */ @@ -3364,8 +3393,13 @@ static void Html_tag_open_base(DilloHtml *html, const char *tag, int tagsize) if (html->InFlags & IN_HEAD) { if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "href"))) { - BaseUrl = a_Html_url_new(html, attrbuf, "", 1); - if (URL_SCHEME_(BaseUrl)) { + bool_t html5 = html->DocType == DT_HTML && + html->DocTypeVersion >= 5.0f; + + BaseUrl = html5 ? a_Html_url_new(html, attrbuf, NULL, 0) : + a_Html_url_new(html, attrbuf, "", 1); + + if (html5 || URL_SCHEME_(BaseUrl)) { /* Pass the URL_SpamSafe flag to the new base url */ a_Url_set_flags( BaseUrl, URL_FLAGS(html->base_url) & URL_SpamSafe); @@ -3474,7 +3508,7 @@ const TagInfo Tags[] = { {"a", B8(011101),'R',2, Html_tag_open_a, NULL, Html_tag_close_a}, {"abbr", B8(010101),'R',2, Html_tag_open_abbr, NULL, NULL}, /* acronym 010101 -- obsolete in HTML5 */ - {"address", B8(010110),'R',2,Html_tag_open_default, NULL, Html_tag_close_par}, + {"address", B8(011110),'R',2,Html_tag_open_default, NULL, Html_tag_close_par}, {"area", B8(010001),'F',0, Html_tag_open_default, Html_tag_content_area, NULL}, {"article", B8(011110),'R',2, Html_tag_open_sectioning, NULL, NULL}, @@ -3674,10 +3708,10 @@ static int Html_needs_optional_close(int old_idx, int cur_idx) } else if (old_idx == i_TR) { /* TR closes TR */ return (cur_idx == i_TR); - } else if (old_idx == i_DD) { + } else if (old_idx == i_DD) { /* DD is closed by DD and DT */ return (cur_idx == i_DD || cur_idx == i_DT); - } else if (old_idx == i_OPTION) { + } else if (old_idx == i_OPTION) { return 1; // OPTION always needs close } @@ -3879,8 +3913,13 @@ static void Html_check_html5_obsolete(DilloHtml *html, int ni) static void Html_display_block(DilloHtml *html) { - //HT2TB(html)->addParbreak (5, html->styleEngine->wordStyle ()); - Html_add_textblock(html, 0); + Html_add_textblock(html, !Html_will_textblock_be_out_of_flow (html), 0, + false /* Perhaps true for widgets oof? */); +} + +static void Html_display_inline_block(DilloHtml *html) +{ + Html_add_textblock(html, false, 0, true); } static void Html_display_listitem(DilloHtml *html) @@ -3985,6 +4024,9 @@ static void Html_process_tag(DilloHtml *html, char *tag, int tagsize) case DISPLAY_BLOCK: Html_display_block(html); break; + case DISPLAY_INLINE_BLOCK: + Html_display_inline_block(html); + break; case DISPLAY_LIST_ITEM: Html_display_listitem(html); break; @@ -3992,7 +4034,6 @@ static void Html_process_tag(DilloHtml *html, char *tag, int tagsize) S_TOP(html)->display_none = true; break; case DISPLAY_INLINE: - case DISPLAY_INLINE_BLOCK: // TODO: implement inline-block default: break; } @@ -4060,7 +4101,7 @@ static const char *Html_get_attr2(DilloHtml *html, const char *attrname, int tag_parsing_flags) { - int i, isocode, entsize, Found = 0, delimiter = 0, attr_pos = 0; + int i, entsize, Found = 0, delimiter = 0, attr_pos = 0; Dstr *Buf = html->attr_data; DilloHtmlTagParsingState state = SEEK_ATTR_START; @@ -4119,16 +4160,12 @@ static const char *Html_get_attr2(DilloHtml *html, state = FINISHED; } else if (tag[i] == '&' && (tag_parsing_flags & HTML_ParseEntities)) { - if ((isocode = Html_parse_entity(html, tag+i, - tagsize-i, &entsize)) >= 0) { - if (isocode >= 128) { - char buf[4]; - int k, n = a_Utf8_encode(isocode, buf); - for (k = 0; k < n; ++k) - dStr_append_c(Buf, buf[k]); - } else { - dStr_append_c(Buf, (char) isocode); - } + const char *entstr; + const bool_t is_attr = TRUE; + + if ((entstr = Html_parse_entity(html, tag+i, tagsize-i, &entsize, + is_attr))) { + dStr_append(Buf, entstr); i += entsize-1; } else { dStr_append_c(Buf, tag[i]); |