diff options
Diffstat (limited to 'src/html.cc')
-rw-r--r-- | src/html.cc | 697 |
1 files changed, 366 insertions, 331 deletions
diff --git a/src/html.cc b/src/html.cc index a8c70879..a1452858 100644 --- a/src/html.cc +++ b/src/html.cc @@ -26,6 +26,7 @@ #include "msg.h" #include "binaryconst.h" #include "colors.h" +#include "html_charrefs.h" #include "utf8.hh" #include "misc.h" @@ -132,9 +133,11 @@ void DilloHtml::bugMessage(const char *format, ... ) { va_list argp; + if (bw->num_page_bugs) + dStr_append_c(bw->page_bugs, '\n'); dStr_sprintfa(bw->page_bugs, "HTML warning: line %d, ", - getCurTagLineNumber()); + getCurrLineNumber()); va_start(argp, format); dStr_vsprintfa(bw->page_bugs, format, argp); va_end(argp); @@ -158,15 +161,15 @@ DilloUrl *a_Html_url_new(DilloHtml *html, const char *suffix = (n_ic) > 1 ? "s" : ""; n_ic_spc = URL_ILLEGAL_CHARS_SPC(url); if (n_ic == n_ic_spc) { - BUG_MSG("URL has %d illegal space%s\n", n_ic, suffix); + BUG_MSG("URL has %d illegal space%s ('%s').", n_ic, suffix, url_str); } else if (n_ic_spc == 0) { - BUG_MSG("URL has %d illegal character%s in {00-1F, 7F} range\n", - n_ic, suffix); + BUG_MSG("URL has %d illegal byte%s in {00-1F, 7F-FF} range ('%s').", + n_ic, suffix, url_str); } else { - BUG_MSG("URL has %d illegal character%s: " - "%d space%s, and %d in {00-1F, 7F} range\n", + BUG_MSG("URL has %d illegal byte%s: " + "%d space%s and %d in {00-1F, 7F-FF} range ('%s').", n_ic, suffix, - n_ic_spc, n_ic_spc > 1 ? "s" : "", n_ic-n_ic_spc); + n_ic_spc, n_ic_spc > 1 ? "s" : "", n_ic-n_ic_spc, url_str); } } return url; @@ -290,7 +293,7 @@ void a_Html_tag_set_align_attr(DilloHtml *html, const char *tag, int tagsize) TextAlignType textAlignType = TEXT_ALIGN_LEFT; if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f) - BUG_MSG("The align attribute is obsolete in HTML5.\n"); + BUG_MSG("The align attribute is obsolete in HTML5."); if (dStrAsciiCasecmp (align, "left") == 0) textAlignType = TEXT_ALIGN_LEFT; @@ -334,7 +337,7 @@ bool a_Html_tag_set_valign_attr(DilloHtml *html, const char *tag, int tagsize) if ((attr = a_Html_get_attr(html, tag, tagsize, "valign"))) { if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f) - BUG_MSG("The valign attribute is obsolete in HTML5.\n"); + BUG_MSG("The valign attribute is obsolete in HTML5."); if (dStrAsciiCasecmp (attr, "top") == 0) valign = VALIGN_TOP; @@ -356,15 +359,24 @@ bool a_Html_tag_set_valign_attr(DilloHtml *html, const char *tag, int tagsize) /* * Create and add a new Textblock to the current Textblock */ -static void Html_add_textblock(DilloHtml *html, int space) +static void Html_add_textblock(DilloHtml *html, bool addBreaks, int breakSpace) { Textblock *textblock = new Textblock (prefs.limit_text_width); - HT2TB(html)->addParbreak (space, html->wordStyle ()); - HT2TB(html)->addWidget (textblock, html->style ()); - HT2TB(html)->addParbreak (space, html->wordStyle ()); + if (addBreaks) + HT2TB(html)->addParbreak (breakSpace, html->wordStyle ()); + HT2TB(html)->addWidget (textblock, html->style ()); /* Works also for floats + etc. */ + if (addBreaks) + HT2TB(html)->addParbreak (breakSpace, html->wordStyle ()); S_TOP(html)->textblock = html->dw = textblock; - S_TOP(html)->hand_over_break = true; + if (addBreaks) + S_TOP(html)->hand_over_break = true; +} + +static bool Html_will_textblock_be_out_of_flow(DilloHtml *html) +{ + return HT2TB(html)->isStyleOutOfFlow (html->style ()); } /* @@ -397,9 +409,8 @@ DilloHtml::DilloHtml(BrowserWindow *p_bw, const DilloUrl *url, stop_parser = false; - CurrTagOfs = 0; - OldTagOfs = 0; - OldTagLine = 1; + CurrOfs = OldOfs = 0; + OldLine = 1; DocType = DT_NONE; /* assume Tag Soup 0.0! :-) */ DocTypeVersion = 0.0f; @@ -539,10 +550,10 @@ void DilloHtml::write(char *Buf, int BufSize, int Eof) } /* - * Return the line number of the tag being processed by the parser. + * Return the line number of the tag/word being processed by the parser. * Also update the offsets. */ -int DilloHtml::getCurTagLineNumber() +int DilloHtml::getCurrLineNumber() { int i, ofs, line; const char *p = Start_Buf; @@ -551,13 +562,13 @@ int DilloHtml::getCurTagLineNumber() /* Disable line counting for META hack. Buffers differ. */ dReturn_val_if((InFlags & IN_META_HACK), -1); - ofs = CurrTagOfs; - line = OldTagLine; - for (i = OldTagOfs; i < ofs; ++i) + ofs = CurrOfs; + line = OldLine; + for (i = OldOfs; i < ofs; ++i) if (p[i] == '\n' || (p[i] == '\r' && p[i+1] != '\n')) ++line; - OldTagOfs = CurrTagOfs; - OldTagLine = line; + OldOfs = CurrOfs; + OldLine = line; return line; } @@ -787,113 +798,16 @@ void a_Html_stash_init(DilloHtml *html) dStr_truncate(html->Stash, 0); } -/* Entities list from the HTML 4.01 DTD */ -typedef struct { - const char *entity; - int isocode; -} Ent_t; - -#define NumEnt 252 -static const Ent_t Entities[NumEnt] = { - {"AElig",0306}, {"Aacute",0301}, {"Acirc",0302}, {"Agrave",0300}, - {"Alpha",01621},{"Aring",0305}, {"Atilde",0303}, {"Auml",0304}, - {"Beta",01622}, {"Ccedil",0307}, {"Chi",01647}, {"Dagger",020041}, - {"Delta",01624},{"ETH",0320}, {"Eacute",0311}, {"Ecirc",0312}, - {"Egrave",0310},{"Epsilon",01625},{"Eta",01627}, {"Euml",0313}, - {"Gamma",01623},{"Iacute",0315}, {"Icirc",0316}, {"Igrave",0314}, - {"Iota",01631}, {"Iuml",0317}, {"Kappa",01632}, {"Lambda",01633}, - {"Mu",01634}, {"Ntilde",0321}, {"Nu",01635}, {"OElig",0522}, - {"Oacute",0323},{"Ocirc",0324}, {"Ograve",0322}, {"Omega",01651}, - {"Omicron",01637},{"Oslash",0330},{"Otilde",0325},{"Ouml",0326}, - {"Phi",01646}, {"Pi",01640}, {"Prime",020063},{"Psi",01650}, - {"Rho",01641}, {"Scaron",0540}, {"Sigma",01643}, {"THORN",0336}, - {"Tau",01644}, {"Theta",01630}, {"Uacute",0332}, {"Ucirc",0333}, - {"Ugrave",0331},{"Upsilon",01645},{"Uuml",0334}, {"Xi",01636}, - {"Yacute",0335},{"Yuml",0570}, {"Zeta",01626}, {"aacute",0341}, - {"acirc",0342}, {"acute",0264}, {"aelig",0346}, {"agrave",0340}, - {"alefsym",020465},{"alpha",01661},{"amp",38}, {"and",021047}, - {"ang",021040}, {"aring",0345}, {"asymp",021110},{"atilde",0343}, - {"auml",0344}, {"bdquo",020036},{"beta",01662}, {"brvbar",0246}, - {"bull",020042},{"cap",021051}, {"ccedil",0347}, {"cedil",0270}, - {"cent",0242}, {"chi",01707}, {"circ",01306}, {"clubs",023143}, - {"cong",021105},{"copy",0251}, {"crarr",020665},{"cup",021052}, - {"curren",0244},{"dArr",020723}, {"dagger",020040},{"darr",020623}, - {"deg",0260}, {"delta",01664}, {"diams",023146},{"divide",0367}, - {"eacute",0351},{"ecirc",0352}, {"egrave",0350}, {"empty",021005}, - {"emsp",020003},{"ensp",020002}, {"epsilon",01665},{"equiv",021141}, - {"eta",01667}, {"eth",0360}, {"euml",0353}, {"euro",020254}, - {"exist",021003},{"fnof",0622}, {"forall",021000},{"frac12",0275}, - {"frac14",0274},{"frac34",0276}, {"frasl",020104},{"gamma",01663}, - {"ge",021145}, {"gt",62}, {"hArr",020724}, {"harr",020624}, - {"hearts",023145},{"hellip",020046},{"iacute",0355},{"icirc",0356}, - {"iexcl",0241}, {"igrave",0354}, {"image",020421},{"infin",021036}, - {"int",021053}, {"iota",01671}, {"iquest",0277}, {"isin",021010}, - {"iuml",0357}, {"kappa",01672}, {"lArr",020720}, {"lambda",01673}, - {"lang",021451},{"laquo",0253}, {"larr",020620}, {"lceil",021410}, - {"ldquo",020034},{"le",021144}, {"lfloor",021412},{"lowast",021027}, - {"loz",022712}, {"lrm",020016}, {"lsaquo",020071},{"lsquo",020030}, - {"lt",60}, {"macr",0257}, {"mdash",020024},{"micro",0265}, - {"middot",0267},{"minus",021022},{"mu",01674}, {"nabla",021007}, - {"nbsp",0240}, {"ndash",020023},{"ne",021140}, {"ni",021013}, - {"not",0254}, {"notin",021011},{"nsub",021204}, {"ntilde",0361}, - {"nu",01675}, {"oacute",0363}, {"ocirc",0364}, {"oelig",0523}, - {"ograve",0362},{"oline",020076},{"omega",01711}, {"omicron",01677}, - {"oplus",021225},{"or",021050}, {"ordf",0252}, {"ordm",0272}, - {"oslash",0370},{"otilde",0365}, {"otimes",021227},{"ouml",0366}, - {"para",0266}, {"part",021002}, {"permil",020060},{"perp",021245}, - {"phi",01706}, {"pi",01700}, {"piv",01726}, {"plusmn",0261}, - {"pound",0243}, {"prime",020062},{"prod",021017}, {"prop",021035}, - {"psi",01710}, {"quot",34}, {"rArr",020722}, {"radic",021032}, - {"rang",021452},{"raquo",0273}, {"rarr",020622}, {"rceil",021411}, - {"rdquo",020035},{"real",020434},{"reg",0256}, {"rfloor",021413}, - {"rho",01701}, {"rlm",020017}, {"rsaquo",020072},{"rsquo",020031}, - {"sbquo",020032},{"scaron",0541},{"sdot",021305}, {"sect",0247}, - {"shy",0255}, {"sigma",01703}, {"sigmaf",01702},{"sim",021074}, - {"spades",023140},{"sub",021202},{"sube",021206}, {"sum",021021}, - {"sup",021203}, {"sup1",0271}, {"sup2",0262}, {"sup3",0263}, - {"supe",021207},{"szlig",0337}, {"tau",01704}, {"there4",021064}, - {"theta",01670},{"thetasym",01721},{"thinsp",020011},{"thorn",0376}, - {"tilde",01334},{"times",0327}, {"trade",020442},{"uArr",020721}, - {"uacute",0372},{"uarr",020621}, {"ucirc",0373}, {"ugrave",0371}, - {"uml",0250}, {"upsih",01722}, {"upsilon",01705},{"uuml",0374}, - {"weierp",020430},{"xi",01676}, {"yacute",0375}, {"yen",0245}, - {"yuml",0377}, {"zeta",01666}, {"zwj",020015}, {"zwnj",020014} -}; - - -/* - * Comparison function for binary search - */ -static int Html_entity_comp(const void *a, const void *b) -{ - return strcmp(((Ent_t *)a)->entity, ((Ent_t *)b)->entity); -} - -/* - * Binary search of 'key' in entity list - */ -static int Html_entity_search(char *key) -{ - Ent_t *res, EntKey; - - EntKey.entity = key; - res = (Ent_t*) bsearch(&EntKey, Entities, NumEnt, - sizeof(Ent_t), Html_entity_comp); - if (res) - return (res - Entities); - return -1; -} - /* * This is M$ non-standard "smart quotes" (w1252). Now even deprecated by them! * * SGML for HTML4.01 defines c >= 128 and c <= 159 as UNUSED. - * TODO: Probably I should remove this hack, and add a HTML warning. --Jcid + * TODO: Probably I should remove this hack. --Jcid */ -static int Html_ms_stupid_quotes_2ucs(int isocode) +static int Html_ms_stupid_quotes_2ucs(int codepoint) { int ret; - switch (isocode) { + switch (codepoint) { case 145: case 146: ret = '\''; break; case 147: @@ -901,130 +815,233 @@ static int Html_ms_stupid_quotes_2ucs(int isocode) case 149: ret = 176; break; case 150: case 151: ret = '-'; break; - default: ret = isocode; break; + default: ret = codepoint; break; } return ret; } /* - * Given an entity, return the UCS character code. - * Returns a negative value (error code) if not a valid entity. - * - * The first character *token is assumed to be == '&' - * - * For valid entities, *entsize is set to the length of the parsed entity. + * Parse a numeric character reference (e.g., "/" or "/"). + * The "&#" has already been consumed. */ -static int Html_parse_entity(DilloHtml *html, const char *token, - int toksize, int *entsize) +static const char *Html_parse_numeric_charref(DilloHtml *html, char *tok, + bool_t is_attr, int *entsize) { - int isocode, i; - char *tok, *s, c; + static char buf[5]; + char *s = tok; + int n, codepoint = -1; - token++; - tok = s = toksize ? dStrndup(token, (uint_t)toksize) : dStrdup(token); - - isocode = -1; - - if (*s == '#') { - /* numeric character reference */ - errno = 0; - if (*++s == 'x' || *s == 'X') { - if (isxdigit(*++s)) { - /* strtol with base 16 accepts leading "0x" - we don't */ - if (*s == '0' && s[1] == 'x') { - s++; - isocode = 0; - } else { - isocode = strtol(s, &s, 16); - } + errno = 0; + + if (*s == 'x' || *s == 'X') { + if (isxdigit(*++s)) { + /* strtol with base 16 accepts leading "0x" - we don't */ + if (*s == '0' && s[1] == 'x') { + s++; + codepoint = 0; + } else { + codepoint = strtol(s, &s, 16); } - } else if (isdigit(*s)) { - isocode = strtol(s, &s, 10); } + } else if (isdigit(*s)) { + codepoint = strtol(s, &s, 10); + } + if (errno) + codepoint = -1; - if (!isocode || errno || isocode > 0xffff) { - /* this catches null bytes, errors and codes >= 0xFFFF */ - BUG_MSG("numeric character reference \"%s\" out of range\n", tok); - isocode = -2; + if (*s == ';') + s++; + else { + if (prefs.show_extra_warnings && (html->DocType == DT_XHTML || + (html->DocType == DT_HTML && html->DocTypeVersion <= 4.01f))) { + char c = *s; + *s = '\0'; + BUG_MSG("Character reference '&#%s' lacks ';'.", tok); + *s = c; } - - if (isocode != -1) { - if (*s == ';') - s++; - else if (prefs.show_extra_warnings) - BUG_MSG("numeric character reference without trailing ';'\n"); + /* Don't require ';' for old HTML, except that our current heuristic + * is to require it in attributes to avoid cases like "©=1" found + * in URLs. + */ + if (is_attr || html->DocType == DT_XHTML || + (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)) { + return NULL; } - } else if (isalpha(*s)) { - /* character entity reference */ - while (*++s && (isalnum(*s) || strchr(":_.-", *s))) ; - c = *s; - *s = 0; + } + if ((codepoint < 0x20 && codepoint != '\t' && codepoint != '\n' && + codepoint != '\f') || + (codepoint >= 0x7f && codepoint <= 0x9f) || + (codepoint >= 0xd800 && codepoint <= 0xdfff) || codepoint > 0x10ffff || + ((codepoint & 0xfffe) == 0xfffe) || + (!(html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f) && + codepoint > 0xffff)) { + /* this catches null bytes, errors, codes out of range, disallowed + * control chars, permanently undefined chars, and surrogates. + */ + char c = *s; + *s = '\0'; + BUG_MSG("Numeric character reference '&#%s' is not valid.", tok); + *s = c; - if ((i = Html_entity_search(tok)) >= 0) { - isocode = Entities[i].isocode; + codepoint = (codepoint >= 145 && codepoint <= 151) ? + Html_ms_stupid_quotes_2ucs(codepoint) : -1; + } + if (codepoint != -1) { + if (codepoint >= 128) { + n = a_Utf8_encode(codepoint, buf); } else { - if (html->DocType == DT_XHTML && !strcmp(tok, "apos")) { - isocode = 0x27; - } else { - if ((html->DocType == DT_HTML && html->DocTypeVersion == 4.01f) || - html->DocType == DT_XHTML) - BUG_MSG("undefined character entity '%s'\n", tok); - isocode = -3; - } + n = 1; + buf[0] = (char) codepoint; + } + assert(n < 5); + buf[n] = '\0'; + *entsize = s-tok+2; + return buf; + } else { + return NULL; + } +} + +/* + * Comparison function for binary search + */ +static int Html_charref_comp(const void *a, const void *b) +{ + return strcmp(((Charref_t *)a)->ref, ((Charref_t *)b)->ref); +} + +/* + * Binary search of 'key' in charref list + */ +static Charref_t *Html_charref_search(char *key) +{ + Charref_t RefKey; + + RefKey.ref = key; + return (Charref_t*) bsearch(&RefKey, Charrefs, NumRef, + sizeof(Charref_t), Html_charref_comp); +} + +/* + * Parse a named character reference (e.g., "&" or "…"). + * The "&" has already been consumed. + */ +static const char *Html_parse_named_charref(DilloHtml *html, char *tok, + bool_t is_attr, int *entsize) +{ + Charref_t *p; + char c; + char *s = tok; + const char *ret = NULL; + + while (*++s && (isalnum(*s) || strchr(":_.-", *s))) ; + c = *s; + *s = '\0'; + if (c != ';') { + if (prefs.show_extra_warnings && (html->DocType == DT_XHTML || + (html->DocType == DT_HTML && html->DocTypeVersion <= 4.01f))) + BUG_MSG("Character reference '&%s' lacks ';'.", tok); + + /* Don't require ';' for old HTML, except that our current heuristic + * is to require it in attributes to avoid cases like "©=1" found + * in URLs. + */ + if (is_attr || html->DocType == DT_XHTML || + (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)) { + return ret; } - if (c == ';') - s++; - else if (prefs.show_extra_warnings) - BUG_MSG("character entity reference without trailing ';'\n"); } + if ((p = Html_charref_search(tok))) { + ret = (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f) ? + p->html5_str : p->html4_str; + } + + if (!ret && html->DocType == DT_XHTML && !strcmp(tok, "apos")) + ret = "'"; + + *s = c; + if (c == ';') + s++; + + if (!ret) { + c = *s; + *s = '\0'; + BUG_MSG("Undefined character reference '&%s'.", tok); + *s = c; + } *entsize = s-tok+1; - dFree(tok); + return ret; +} + +/* + * Given an entity, return the corresponding string. + * Returns NULL if not a valid entity. + * + * The first character *token is assumed to be == '&' + * + * For valid entities, *entsize is set to the length of the parsed entity. + */ +static const char *Html_parse_entity(DilloHtml *html, const char *token, + int toksize, int *entsize, bool_t is_attr) +{ + const char *ret = NULL; + char *tok; - if (isocode >= 145 && isocode <= 151) { - /* TODO: remove this hack. */ - isocode = Html_ms_stupid_quotes_2ucs(isocode); - } else if (isocode == -1 && prefs.show_extra_warnings) - BUG_MSG("literal '&'\n"); + token++; + tok = dStrndup(token, (uint_t)toksize); + + if (*tok == '#') { + ret = Html_parse_numeric_charref(html, tok+1, is_attr, entsize); + } else if (isalpha(*tok)) { + ret = Html_parse_named_charref(html, tok, is_attr, entsize); + } else if (prefs.show_extra_warnings && + (!(html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f))) { + // HTML5 doesn't mind literal '&'s. + BUG_MSG("Literal '&'."); + } + dFree(tok); - return isocode; + return ret; } /* - * Convert all the entities in a token to utf8 encoding. Takes - * a token and its length, and returns a newly allocated string. + * Parse all the entities in a token. Takes the token and its length, and + * returns a newly allocated string. */ char *a_Html_parse_entities(DilloHtml *html, const char *token, int toksize) { const char *esc_set = "&"; - char *new_str, buf[4]; - int i, j, k, n, s, isocode, entsize; - - new_str = dStrndup(token, toksize); - s = strcspn(new_str, esc_set); - if (new_str[s] == 0) - return new_str; - - for (i = j = s; i < toksize; i++) { - if (token[i] == '&' && - (isocode = Html_parse_entity(html, token+i, - toksize-i, &entsize)) >= 0) { - if (isocode >= 128) { - /* multibyte encoding */ - n = a_Utf8_encode(isocode, buf); - for (k = 0; k < n; ++k) - new_str[j++] = buf[k]; + int i, s, entsize; + char *str; + + s = strcspn(token, esc_set); + if (s >= toksize) { + /* no ampersands */ + str = dStrndup(token, toksize); + } else { + Dstr *ds = dStr_sized_new(toksize); + + dStr_append_l(ds, token, s); + + for (i = s; i < toksize; i++) { + const char *entstr; + const bool_t is_attr = FALSE; + + if (token[i] == '&' && + (entstr = Html_parse_entity(html, token+i, toksize-i, &entsize, + is_attr))) { + dStr_append(ds, entstr); + i += entsize-1; } else { - new_str[j++] = (char) isocode; + dStr_append_c(ds, token[i]); } - i += entsize-1; - } else { - new_str[j++] = token[i]; } + str = ds->str; + dStr_free(ds, 0); } - new_str[j] = '\0'; - return new_str; + return str; } /* @@ -1095,7 +1112,7 @@ static void Html_process_space(DilloHtml *html, const char *space, break; case '\t': if (prefs.show_extra_warnings) - BUG_MSG("TAB character inside <PRE>\n"); + BUG_MSG("TAB character inside <pre>."); offset = TAB_SIZE - html->pre_column % TAB_SIZE; spaceCnt += offset; html->pre_column += offset; @@ -1314,7 +1331,7 @@ static void Html_tag_cleanup_to_idx(DilloHtml *html, int idx) int toptag_idx = S_TOP(html)->tag_idx; TagInfo toptag = Tags[toptag_idx]; if (s_sz > idx + 1 && toptag.EndTag != 'O') - BUG_MSG(" - forcing close of open tag: <%s>\n", toptag.name); + BUG_MSG(" - forcing close of open tag: <%s>.", toptag.name); _MSG("Close: %*s%s\n", size," ", toptag.name); if (toptag.close) toptag.close(html); @@ -1372,10 +1389,10 @@ static void Html_tag_cleanup_at_close(DilloHtml *html, int new_idx) if (matched) { Html_tag_cleanup_to_idx(html, stack_idx); } else if (expected) { - BUG_MSG("unexpected closing tag: </%s> -- expected </%s>.\n", + BUG_MSG("Unexpected closing tag: </%s> -- expected </%s>.", new_tag.name, Tags[tag_idx].name); } else { - BUG_MSG("unexpected closing tag: </%s>.\n", new_tag.name); + BUG_MSG("Unexpected closing tag: </%s>.", new_tag.name); } } @@ -1411,7 +1428,7 @@ static void Html_tag_cleanup_nested_inputs(DilloHtml *html, int new_idx) } if (matched) { - BUG_MSG("attempt to nest <%s> element inside <%s> -- closing <%s>\n", + BUG_MSG("Attempt to nest <%s> element inside <%s> -- closing <%s>.", Tags[new_idx].name, Tags[u_idx].name, Tags[u_idx].name); Html_tag_cleanup_to_idx(html, stack_idx); } else { @@ -1481,7 +1498,7 @@ CssLength a_Html_parse_length (DilloHtml *html, const char *attr) else { /* allow only whitespaces */ if (*end && !isspace (*end)) { - BUG_MSG("Garbage after length: %s\n", attr); + BUG_MSG("Garbage after length: '%s'.", attr); l = CSS_CREATE_LENGTH(0.0, CSS_LENGTH_TYPE_AUTO); } } @@ -1501,7 +1518,7 @@ int32_t a_Html_color_parse(DilloHtml *html, const char *str, int32_t color = a_Color_parse(str, default_color, &err); if (err) { - BUG_MSG("color \"%s\" is not in \"#RRGGBB\" format\n", str); + BUG_MSG("Color '%s' is not in \"#RRGGBB\" format.", str); } return color; } @@ -1518,8 +1535,8 @@ static int bool valid = *val && !strchr(val, ' '); if (!valid) { - BUG_MSG("'%s' value must not be empty and must not contain spaces.\n", - attrname); + BUG_MSG("'%s' value \"%s\" must not be empty and must not contain " + "spaces.", attrname, val); } return valid ? 1 : 0; } else { @@ -1530,8 +1547,8 @@ static int break; if (val[i] || !(isascii(val[0]) && isalpha(val[0]))) - BUG_MSG("'%s' value \"%s\" is not of the form " - "[A-Za-z][A-Za-z0-9:_.-]*\n", attrname, val); + BUG_MSG("%s attribute value \"%s\" is not of the form " + "'[A-Za-z][A-Za-z0-9:_.-]*'.", attrname, val); return !(val[i]); } @@ -1559,7 +1576,6 @@ static int static void Html_parse_doctype(DilloHtml *html, const char *tag, int tagsize) { static const char HTML_SGML_sig [] = "<!DOCTYPE HTML PUBLIC "; - static const char HTML5_sig [] = "<!DOCTYPE html>"; static const char HTML20 [] = "-//IETF//DTD HTML"; static const char HTML32 [] = "-//W3C//DTD HTML 3.2"; static const char HTML40 [] = "-//W3C//DTD HTML 4.0"; @@ -1596,7 +1612,7 @@ static void Html_parse_doctype(DilloHtml *html, const char *tag, int tagsize) _MSG("New: {%s}\n", ntag); if (html->DocType != DT_NONE) - BUG_MSG("Multiple DOCTYPE declarations.\n"); + BUG_MSG("Multiple DOCTYPE declarations."); /* The default DT_NONE type is TagSoup */ if (i > strlen(HTML_SGML_sig) && // avoid out of bounds reads! @@ -1624,13 +1640,14 @@ static void Html_parse_doctype(DilloHtml *html, const char *tag, int tagsize) html->DocType = DT_HTML; html->DocTypeVersion = 2.0f; } - } else if (!dStrAsciiCasecmp(ntag, HTML5_sig)) { + } else if (!dStrAsciiCasecmp(ntag, "<!DOCTYPE html>") || + !dStrAsciiCasecmp(ntag, "<!DOCTYPE html >")) { html->DocType = DT_HTML; html->DocTypeVersion = 5.0f; } if (html->DocType == DT_NONE) { html->DocType = DT_UNRECOGNIZED; - BUG_MSG("DOCTYPE not recognized:\n%s.\n", ntag); + BUG_MSG("DOCTYPE not recognized: ('%s').", ntag); } dFree(ntag); } @@ -1649,7 +1666,7 @@ static void Html_tag_open_html(DilloHtml *html, const char *tag, int tagsize) ++html->Num_HTML; if (html->Num_HTML > 1) { - BUG_MSG("HTML element was already open\n"); + BUG_MSG("<html> was already open."); html->ReqTagClose = true; } } @@ -1668,7 +1685,7 @@ static void Html_tag_close_html(DilloHtml *html) static void Html_tag_open_head(DilloHtml *html, const char *tag, int tagsize) { if (html->InFlags & IN_BODY) { - BUG_MSG("HEAD element must go before the BODY section\n"); + BUG_MSG("<head> must go before the BODY section."); html->ReqTagClose = true; return; } @@ -1676,10 +1693,10 @@ static void Html_tag_open_head(DilloHtml *html, const char *tag, int tagsize) if (html->Num_HEAD < UCHAR_MAX) ++html->Num_HEAD; if (html->InFlags & IN_HEAD) { - BUG_MSG("HEAD element was already open\n"); + BUG_MSG("<head> was already open."); html->ReqTagClose = true; } else if (html->Num_HEAD > 1) { - BUG_MSG("HEAD section already finished -- ignoring\n"); + BUG_MSG("<head> already finished -- ignoring."); html->ReqTagClose = true; } else { html->InFlags |= IN_HEAD; @@ -1696,7 +1713,7 @@ static void Html_tag_close_head(DilloHtml *html) if (html->Num_HEAD == 1) { /* match for the well formed start of HEAD section */ if (html->Num_TITLE == 0) - BUG_MSG("HEAD section lacks the TITLE element\n"); + BUG_MSG("<head> lacks <title>."); html->InFlags &= ~IN_HEAD; @@ -1726,9 +1743,9 @@ static void Html_tag_open_title(DilloHtml *html, const char *tag, int tagsize) if (html->Num_TITLE < UCHAR_MAX) ++html->Num_TITLE; if (html->Num_TITLE > 1) - BUG_MSG("A redundant TITLE element was found\n"); + BUG_MSG("Redundant <title>."); } else { - BUG_MSG("TITLE element must be inside the HEAD section -- ignoring\n"); + BUG_MSG("<title> must be inside <head> -- ignoring."); } } @@ -1776,7 +1793,7 @@ static void Html_tag_open_style(DilloHtml *html, const char *tag, int tagsize) if (!(attrbuf = a_Html_get_attr(html, tag, tagsize, "type"))) { if (html->DocType != DT_HTML || html->DocTypeVersion <= 4.01f) - BUG_MSG("type attribute is required for <style>\n"); + BUG_MSG("<style> requires type attribute."); } else if (dStrAsciiCasecmp(attrbuf, "text/css")) { html->loadCssFromStash = false; } @@ -1800,8 +1817,8 @@ static void Html_tag_open_style(DilloHtml *html, const char *tag, int tagsize) static void Html_tag_close_style(DilloHtml *html) { if (prefs.parse_embedded_css && html->loadCssFromStash) - html->styleEngine->parse(html, html->base_url, html->Stash->str, html->Stash->len, - CSS_ORIGIN_AUTHOR); + html->styleEngine->parse(html, html->base_url, html->Stash->str, + html->Stash->len, CSS_ORIGIN_AUTHOR); } /* @@ -1825,21 +1842,21 @@ static void Html_tag_open_body(DilloHtml *html, const char *tag, int tagsize) ++html->Num_BODY; if (html->Num_BODY > 1) { - BUG_MSG("BODY element was already open\n"); + BUG_MSG("<body> was already open."); html->ReqTagClose = true; return; } if (html->InFlags & IN_HEAD) { /* if we're here, it's bad XHTML, no need to recover */ - BUG_MSG("unclosed HEAD element\n"); + BUG_MSG("Unclosed <head>."); } if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "bgcolor"))) { color = a_Html_color_parse(html, attrbuf, -1); if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f) - BUG_MSG("<body> bgcolor attribute is obsolete.\n"); + BUG_MSG("<body> bgcolor attribute is obsolete."); if (color != -1) html->styleEngine->setNonCssHint (CSS_PROPERTY_BACKGROUND_COLOR, @@ -1850,7 +1867,7 @@ static void Html_tag_open_body(DilloHtml *html, const char *tag, int tagsize) color = a_Html_color_parse(html, attrbuf, -1); if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f) - BUG_MSG("<body> text attribute is obsolete.\n"); + BUG_MSG("<body> text attribute is obsolete."); if (color != -1) html->styleEngine->setNonCssHint (CSS_PROPERTY_COLOR, @@ -1862,13 +1879,13 @@ static void Html_tag_open_body(DilloHtml *html, const char *tag, int tagsize) if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "link"))) { html->non_css_link_color = a_Html_color_parse(html, attrbuf, -1); if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f) - BUG_MSG("<body> link attribute is obsolete.\n"); + BUG_MSG("<body> link attribute is obsolete."); } if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "vlink"))) { html->non_css_visited_color = a_Html_color_parse(html, attrbuf, -1); if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f) - BUG_MSG("<body> vlink attribute is obsolete.\n"); + BUG_MSG("<body> vlink attribute is obsolete."); } html->dw->setStyle (html->style ()); @@ -2012,7 +2029,7 @@ static void Html_tag_content_frameset (DilloHtml *html, { HT2TB(html)->addParbreak (9, html->wordStyle ()); HT2TB(html)->addText("--FRAME--", html->wordStyle ()); - Html_add_textblock(html, 5); + Html_add_textblock(html, true, 5); } /* @@ -2089,8 +2106,8 @@ void a_Html_common_image_attrs(DilloHtml *html, const char *tag, int tagsize) { char *width_ptr, *height_ptr; const char *attrbuf; - CssLength l_w = CSS_CREATE_LENGTH(0.0, CSS_LENGTH_TYPE_AUTO); - CssLength l_h = CSS_CREATE_LENGTH(0.0, CSS_LENGTH_TYPE_AUTO); + CssLength l_w = CSS_CREATE_LENGTH(0.0, CSS_LENGTH_TYPE_AUTO); + CssLength l_h = CSS_CREATE_LENGTH(0.0, CSS_LENGTH_TYPE_AUTO); int w = 0, h = 0; if (prefs.show_tooltip && @@ -2123,7 +2140,7 @@ void a_Html_common_image_attrs(DilloHtml *html, const char *tag, int tagsize) */ if (w < 0 || h < 0 || w > IMAGE_MAX_AREA || h > IMAGE_MAX_AREA || - (h > 0 && w > IMAGE_MAX_AREA / h)) { + (h > 0 && w > IMAGE_MAX_AREA / h)) { dFree(width_ptr); dFree(height_ptr); width_ptr = height_ptr = NULL; @@ -2176,6 +2193,8 @@ DilloImage *a_Html_image_new(DilloHtml *html, const char *tag, int tagsize) dw::Image *dw = new dw::Image(alt_ptr); image = a_Image_new(html->dw->getLayout(), (void*)(dw::core::ImgRenderer*)dw, 0); + + a_Image_ref(image); if (HT2TB(html)->getBgColor()) image->bg_color = HT2TB(html)->getBgColor()->getColor(); @@ -2192,10 +2211,10 @@ DilloImage *a_Html_image_new(DilloHtml *html, const char *tag, int tagsize) if (load_now && Html_load_image(html->bw, url, html->page_url, image)) { // hi->image is NULL if dillo tries to load the image immediately hi->image = NULL; + a_Image_unref(image); } else { // otherwise a reference is kept in html->images hi->image = image; - a_Image_ref(image); } dFree(alt_ptr); @@ -2338,7 +2357,7 @@ static void Html_tag_content_map(DilloHtml *html, const char *tag, int tagsize) DilloUrl *url; if (html->InFlags & IN_MAP) { - BUG_MSG("nested <map>\n"); + BUG_MSG("Nested <map>."); } else { if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "name"))) { html->InFlags |= IN_MAP; @@ -2348,7 +2367,7 @@ static void Html_tag_content_map(DilloHtml *html, const char *tag, int tagsize) a_Url_free (url); dFree(hash_name); } else { - BUG_MSG("name attribute is required for <map>\n"); + BUG_MSG("<map> requires name attribute."); } } } @@ -2400,7 +2419,7 @@ misc::SimpleVector<int> *Html_read_coords(DilloHtml *html, const char *str) if (!*newtail) break; if (*newtail != ',') { - BUG_MSG("area coords must be integers separated by commas.\n"); + BUG_MSG("<area> coords must be integers separated by commas."); } tail = newtail + 1; } @@ -2423,7 +2442,7 @@ static void Shape *shape = NULL; if (!(html->InFlags & IN_MAP)) { - BUG_MSG("<area> element not inside <map>\n"); + BUG_MSG("<area> not inside <map>."); return; } attrbuf = a_Html_get_attr(html, tag, tagsize, "shape"); @@ -2439,7 +2458,7 @@ static void } else if (dStrnAsciiCasecmp(attrbuf, "poly", 4) == 0) { type = POLYGON; } else { - BUG_MSG("<area> unknown shape: \"%s\"\n", attrbuf); + BUG_MSG("<area> unknown shape: '%s'.", attrbuf); type = UNKNOWN; } if (type == RECTANGLE || type == CIRCLE || type == POLYGON) { @@ -2449,7 +2468,7 @@ static void if (type == RECTANGLE) { if (coords->size() != 4) - BUG_MSG("<area> rectangle must have four coordinate values\n"); + BUG_MSG("<area> rectangle must have four coordinate values."); if (coords->size() >= 4) shape = new Rectangle(coords->get(0), coords->get(1), @@ -2457,7 +2476,7 @@ static void coords->get(3) - coords->get(1)); } else if (type == CIRCLE) { if (coords->size() != 3) - BUG_MSG("<area> circle must have three coordinate values\n"); + BUG_MSG("<area> circle must have three coordinate values."); if (coords->size() >= 3) shape = new Circle(coords->get(0), coords->get(1), coords->get(2)); @@ -2465,7 +2484,7 @@ static void Polygon *poly; int i; if (coords->size() % 2) - BUG_MSG("<area> polygon with odd number of coordinates\n"); + BUG_MSG("<area> polygon with odd number of coordinates."); shape = poly = new Polygon(); for (i = 0; i < (coords->size() / 2); i++) poly->addPoint(coords->get(2*i), coords->get(2*i + 1)); @@ -2601,11 +2620,11 @@ static void Html_tag_open_source(DilloHtml *html, const char *tag, const char *attrbuf; if (!(html->InFlags & IN_MEDIA)) { - BUG_MSG("<source> element not inside a media element.\n"); + BUG_MSG("<source> not inside a media element."); return; } if (!(attrbuf = a_Html_get_attr(html, tag, tagsize, "src"))) { - BUG_MSG("src attribute is required in <source> element.\n"); + BUG_MSG("<source> requires src attribute."); return; } else { DilloUrl *url = a_Html_url_new(html, attrbuf, NULL, 0); @@ -2682,7 +2701,7 @@ static const char* Html_get_javascript_link(DilloHtml *html) if ((ch == '"' || ch == '\'') && (p2 = strchr(Buf->str + i + 1 , ch))) { p1 = Buf->str + i; - BUG_MSG("link depends on javascript()\n"); + BUG_MSG("Link depends on javascript()."); dStr_truncate(Buf, p2 - Buf->str); dStr_erase(Buf, 0, p1 - Buf->str + 1); } @@ -2697,7 +2716,8 @@ static void Html_add_anchor(DilloHtml *html, const char *name) { _MSG("Registering ANCHOR: %s\n", name); if (!HT2TB(html)->addAnchor (name, html->style ())) - BUG_MSG("Anchor names must be unique within the document ('%s')\n",name); + BUG_MSG("Anchor names must be unique within the document (\"%s\").", + name); /* * According to Sec. 12.2.1 of the HTML 4.01 spec, "anchor names that * differ only in case may not appear in the same document", but @@ -2767,7 +2787,8 @@ static void Html_tag_open_a(DilloHtml *html, const char *tag, int tagsize) /* We compare the "id" value with the url-decoded "name" value */ if (!id || strcmp(nameVal, id)) { if (id) - BUG_MSG("'id' and 'name' attribute of <a> tag differ\n"); + BUG_MSG("In <a>, id ('%s') and name ('%s') attributes differ.", + id, nameVal); Html_add_anchor(html, nameVal); } @@ -2790,7 +2811,7 @@ static void Html_tag_close_a(DilloHtml *html) static void Html_tag_open_blockquote(DilloHtml *html, const char *tag, int tagsize) { - Html_add_textblock(html, 9); + Html_add_textblock(html, true, 9); } /* @@ -2843,7 +2864,7 @@ static void Html_tag_open_ul(DilloHtml *html, const char *tag, int tagsize) html->styleEngine->setNonCssHint (CSS_PROPERTY_LIST_STYLE_TYPE, CSS_TYPE_ENUM, list_style_type); if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f) - BUG_MSG("<ul> type attribute is obsolete.\n"); + BUG_MSG("<ul> type attribute is obsolete."); } S_TOP(html)->list_type = HTML_LIST_UNORDERED; @@ -2865,7 +2886,7 @@ static void Html_tag_open_dir(DilloHtml *html, const char *tag, int tagsize) S_TOP(html)->ref_list_item = NULL; if (prefs.show_extra_warnings) - BUG_MSG("Obsolete list type; use <UL> instead\n"); + BUG_MSG("Obsolete list type; use <ul> instead."); } /* @@ -2873,7 +2894,16 @@ static void Html_tag_open_dir(DilloHtml *html, const char *tag, int tagsize) */ static void Html_tag_open_menu(DilloHtml *html, const char *tag, int tagsize) { - Html_tag_open_dir(html, tag, tagsize); + /* In another bit of ridiculous mess from the HTML5 world, the menu + * element, which was deprecated in HTML4: + * - does not appear at all in W3C's HTML5 spec + * - appears in WHATWG's HTML5 doc and the W3C's 5.1 draft, where it + * means something totally different than it did in the old days + * (now it's for popup menus and toolbar menus rather than being a + * sort of list). + */ + if (!(html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)) + Html_tag_open_dir(html, tag, tagsize); } /* @@ -2906,7 +2936,7 @@ static void Html_tag_open_ol(DilloHtml *html, const char *tag, int tagsize) if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "start")) && (n = (int) strtol(attrbuf, NULL, 10)) < 0) { - BUG_MSG( "illegal '-' character in START attribute; Starting from 0\n"); + BUG_MSG("Illegal '-' character in START attribute; Starting from 0."); n = 0; } S_TOP(html)->list_number = n; @@ -2923,7 +2953,7 @@ static void Html_tag_open_li(DilloHtml *html, const char *tag, int tagsize) const char *attrbuf; if (S_TOP(html)->list_type == HTML_LIST_NONE) - BUG_MSG("<li> outside <ul> or <ol>\n"); + BUG_MSG("<li> outside <ul> or <ol>."); html->InFlags |= IN_LI; @@ -2934,7 +2964,7 @@ static void Html_tag_open_li(DilloHtml *html, const char *tag, int tagsize) // ordered if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "value")) && (*list_number = strtol(attrbuf, NULL, 10)) < 0) { - BUG_MSG("illegal negative LIST VALUE attribute; Starting from 0\n"); + BUG_MSG("Illegal negative list value attribute; Starting from 0."); *list_number = 0; } } @@ -2961,7 +2991,7 @@ static void Html_tag_open_hr(DilloHtml *html, const char *tag, int tagsize) width_ptr = a_Html_get_attr_wdef(html, tag, tagsize, "width", NULL); if (width_ptr) { if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f) - BUG_MSG("<hr> width attribute is obsolete.\n"); + BUG_MSG("<hr> width attribute is obsolete."); html->styleEngine->setNonCssHint (CSS_PROPERTY_WIDTH, CSS_TYPE_LENGTH_PERCENTAGE, a_Html_parse_length (html, width_ptr)); @@ -2971,7 +3001,7 @@ static void Html_tag_open_hr(DilloHtml *html, const char *tag, int tagsize) if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "size"))) { size = strtol(attrbuf, NULL, 10); if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f) - BUG_MSG("<hr> size attribute is obsolete.\n"); + BUG_MSG("<hr> size attribute is obsolete."); } a_Html_tag_set_align_attr(html, tag, tagsize); @@ -2979,7 +3009,7 @@ static void Html_tag_open_hr(DilloHtml *html, const char *tag, int tagsize) /* TODO: evaluate attribute */ if (a_Html_get_attr(html, tag, tagsize, "noshade")) { if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f) - BUG_MSG("<hr> noshade attribute is obsolete.\n"); + BUG_MSG("<hr> noshade attribute is obsolete."); html->styleEngine->setNonCssHint (CSS_PROPERTY_BORDER_TOP_STYLE, CSS_TYPE_ENUM, BORDER_SOLID); html->styleEngine->setNonCssHint (CSS_PROPERTY_BORDER_BOTTOM_STYLE, @@ -3045,7 +3075,7 @@ static void Html_tag_open_dt(DilloHtml *html, const char *tag, int tagsize) */ static void Html_tag_open_dd(DilloHtml *html, const char *tag, int tagsize) { - Html_add_textblock(html, 9); + Html_add_textblock(html, true, 9); } /* @@ -3134,7 +3164,7 @@ static void Html_tag_open_meta(DilloHtml *html, const char *tag, int tagsize) /* only valid inside HEAD */ if (!(html->InFlags & IN_HEAD)) { - BUG_MSG("META element must be inside the HEAD section\n"); + BUG_MSG("<meta> must be inside the HEAD section."); return; } @@ -3167,7 +3197,7 @@ static void Html_tag_open_meta(DilloHtml *html, const char *tag, int tagsize) if (a_Url_cmp(html->base_url, new_url) == 0) { /* redirection loop, or empty url string: ignore */ - BUG_MSG("META refresh: %s\n", + BUG_MSG("<meta> refresh: %s.", *mr_url ? "redirection loop" : "no target URL"); } else if (delay == 0) { /* zero-delay redirection */ @@ -3237,27 +3267,26 @@ void a_Html_load_stylesheet(DilloHtml *html, DilloUrl *url) dReturn_if (url == NULL || ! prefs.load_stylesheets); _MSG("Html_load_stylesheet: "); - if (a_Capi_get_buf(url, &data, &len)) { + if ((a_Capi_get_flags_with_redirection(url) & CAPI_Completed) && + a_Capi_get_buf(url, &data, &len)) { _MSG("cached URL=%s len=%d", URL_STR(url), len); - if (a_Capi_get_flags_with_redirection(url) & CAPI_Completed) { - if (strncmp("@charset \"", data, 10) == 0) { - char *endq = strchr(data+10, '"'); - - if (endq && (endq - data <= 51)) { - /* IANA limits charset names to 40 characters */ - char *content_type; - - *endq = '\0'; - content_type = dStrconcat("text/css; charset=", data+10, NULL); - *endq = '"'; - a_Capi_unref_buf(url); - a_Capi_set_content_type(url, content_type, "meta"); - dFree(content_type); - a_Capi_get_buf(url, &data, &len); - } + if (strncmp("@charset \"", data, 10) == 0) { + char *endq = strchr(data+10, '"'); + + if (endq && (endq - data <= 51)) { + /* IANA limits charset names to 40 characters */ + char *content_type; + + *endq = '\0'; + content_type = dStrconcat("text/css; charset=", data+10, NULL); + *endq = '"'; + a_Capi_unref_buf(url); + a_Capi_set_content_type(url, content_type, "meta"); + dFree(content_type); + a_Capi_get_buf(url, &data, &len); } - html->styleEngine->parse(html, url, data, len, CSS_ORIGIN_AUTHOR); } + html->styleEngine->parse(html, url, data, len, CSS_ORIGIN_AUTHOR); a_Capi_unref_buf(url); } else { /* Fill a Web structure for the cache query */ @@ -3296,7 +3325,7 @@ static void Html_tag_open_link(DilloHtml *html, const char *tag, int tagsize) /* Ignore LINK outside HEAD */ if (!(html->InFlags & IN_HEAD)) { - BUG_MSG("LINK element must be inside the HEAD section\n"); + BUG_MSG("<link> must be inside the HEAD section."); return; } /* Remote stylesheets enabled? */ @@ -3341,12 +3370,12 @@ static void Html_tag_open_base(DilloHtml *html, const char *tag, int tagsize) a_Url_free(html->base_url); html->base_url = BaseUrl; } else { - BUG_MSG("base URI is relative (it MUST be absolute)\n"); + BUG_MSG("<base> URI is relative (it MUST be absolute)."); a_Url_free(BaseUrl); } } } else { - BUG_MSG("the BASE element must appear in the HEAD section\n"); + BUG_MSG("<base> not inside HEAD section."); } } @@ -3635,10 +3664,10 @@ static int Html_needs_optional_close(int old_idx, int cur_idx) } else if (old_idx == i_TR) { /* TR closes TR */ return (cur_idx == i_TR); - } else if (old_idx == i_DD) { + } else if (old_idx == i_DD) { /* DD is closed by DD and DT */ return (cur_idx == i_DD || cur_idx == i_DT); - } else if (old_idx == i_OPTION) { + } else if (old_idx == i_OPTION) { return 1; // OPTION always needs close } @@ -3684,7 +3713,7 @@ static void Html_stack_cleanup_at_open(DilloHtml *html, int new_idx) /* we have an inline (or empty) container... */ if (Tags[oldtag_idx].EndTag == 'R') { - BUG_MSG("<%s> is not allowed to contain <%s>. -- closing <%s>\n", + BUG_MSG("<%s> is not allowed to contain <%s>. -- closing <%s>.", Tags[oldtag_idx].name, Tags[new_idx].name, Tags[oldtag_idx].name); } @@ -3713,7 +3742,7 @@ static void Html_test_section(DilloHtml *html, int new_idx, int IsCloseTag) int tag_idx; if (!(html->InFlags & IN_HTML) && html->DocType == DT_NONE) - BUG_MSG("the required DOCTYPE declaration is missing.\n"); + BUG_MSG("The required DOCTYPE declaration is missing."); if (!(html->InFlags & IN_HTML)) { tag = "<html>"; @@ -3763,6 +3792,7 @@ static void Html_test_section(DilloHtml *html, int new_idx, int IsCloseTag) static void Html_parse_common_attrs(DilloHtml *html, char *tag, int tagsize) { const char *attrbuf; + char lang[3]; if (tagsize >= 8 && /* length of "<t id=i>" */ (attrbuf = a_Html_get_attr(html, tag, tagsize, "id"))) { @@ -3788,24 +3818,25 @@ static void Html_parse_common_attrs(DilloHtml *html, char *tag, int tagsize) html->styleEngine->setStyle (attrbuf); } - /* handle "xml:lang" and "lang" attributes */ - int hasXmlLang = 0; + /* handle "xml:lang" and "lang" attributes + * We use only the first two chars of the value to deal with + * extended language tags (see http://www.rfc-editor.org/rfc/bcp/bcp47.txt) + */ + memset(lang, 0, sizeof(lang)); if (tagsize >= 14) { /* length of "<t xml:lang=i>" */ attrbuf = a_Html_get_attr(html, tag, tagsize, "xml:lang"); - if (attrbuf) { - html->styleEngine->setNonCssHint(PROPERTY_X_LANG, CSS_TYPE_STRING, - attrbuf); - hasXmlLang = 1; - } + if (attrbuf) + strncpy(lang, attrbuf, 2); } - if (!hasXmlLang && tagsize >= 10) { /* 'xml:lang' prevails over 'lang' */ + if (!lang[0] && tagsize >= 10) { /* 'xml:lang' prevails over 'lang' */ /* length of "<t lang=i>" */ attrbuf = a_Html_get_attr(html, tag, tagsize, "lang"); if (attrbuf) - html->styleEngine->setNonCssHint(PROPERTY_X_LANG, CSS_TYPE_STRING, - attrbuf); + strncpy(lang, attrbuf, 2); } + if (lang[0]) + html->styleEngine->setNonCssHint(PROPERTY_X_LANG, CSS_TYPE_STRING, lang); } /* @@ -3829,7 +3860,7 @@ static void Html_check_html5_obsolete(DilloHtml *html, int ni) } for (int i = 0; i < 9; i++) { if (indexes[i] == ni) { - BUG_MSG("<%s> is obsolete in HTML5.\n", Tags[ni].name); + BUG_MSG("<%s> is obsolete in HTML5.", Tags[ni].name); break; } } @@ -3837,8 +3868,12 @@ static void Html_check_html5_obsolete(DilloHtml *html, int ni) static void Html_display_block(DilloHtml *html) { - //HT2TB(html)->addParbreak (5, html->styleEngine->wordStyle ()); - Html_add_textblock(html, 0); + Html_add_textblock(html, !Html_will_textblock_be_out_of_flow (html), 0); +} + +static void Html_display_inline_block(DilloHtml *html) +{ + Html_add_textblock(html, false, 0); } static void Html_display_listitem(DilloHtml *html) @@ -3919,7 +3954,7 @@ static void Html_process_tag(DilloHtml *html, char *tag, int tagsize) /* TODO: this is only raising a warning, take some defined action. * Note: apache uses IMG inside PRE (we could use its "alt"). */ if ((html->InFlags & IN_PRE) && Html_tag_pre_excludes(ni)) - BUG_MSG("<pre> is not allowed to contain <%s>\n", Tags[ni].name); + BUG_MSG("<pre> is not allowed to contain <%s>.", Tags[ni].name); /* Make sure these elements don't nest each other */ if (html->InFlags & (IN_BUTTON | IN_SELECT | IN_TEXTAREA)) @@ -3943,6 +3978,9 @@ static void Html_process_tag(DilloHtml *html, char *tag, int tagsize) case DISPLAY_BLOCK: Html_display_block(html); break; + case DISPLAY_INLINE_BLOCK: + Html_display_inline_block(html); + break; case DISPLAY_LIST_ITEM: Html_display_listitem(html); break; @@ -3950,7 +3988,6 @@ static void Html_process_tag(DilloHtml *html, char *tag, int tagsize) S_TOP(html)->display_none = true; break; case DISPLAY_INLINE: - case DISPLAY_INLINE_BLOCK: // TODO: implement inline-block default: break; } @@ -4018,7 +4055,7 @@ static const char *Html_get_attr2(DilloHtml *html, const char *attrname, int tag_parsing_flags) { - int i, isocode, entsize, Found = 0, delimiter = 0, attr_pos = 0; + int i, entsize, Found = 0, delimiter = 0, attr_pos = 0; Dstr *Buf = html->attr_data; DilloHtmlTagParsingState state = SEEK_ATTR_START; @@ -4077,16 +4114,12 @@ static const char *Html_get_attr2(DilloHtml *html, state = FINISHED; } else if (tag[i] == '&' && (tag_parsing_flags & HTML_ParseEntities)) { - if ((isocode = Html_parse_entity(html, tag+i, - tagsize-i, &entsize)) >= 0) { - if (isocode >= 128) { - char buf[4]; - int k, n = a_Utf8_encode(isocode, buf); - for (k = 0; k < n; ++k) - dStr_append_c(Buf, buf[k]); - } else { - dStr_append_c(Buf, (char) isocode); - } + const char *entstr; + const bool_t is_attr = TRUE; + + if ((entstr = Html_parse_entity(html, tag+i, tagsize-i, &entsize, + is_attr))) { + dStr_append(Buf, entstr); i += entsize-1; } else { dStr_append_c(Buf, tag[i]); @@ -4228,7 +4261,7 @@ static int Html_write_raw(DilloHtml *html, char *buf, int bufsize, int Eof) buf_index = bufsize; } else { /* Tag: search end of tag (skipping over quoted strings) */ - html->CurrTagOfs = html->Start_Ofs + token_start; + html->CurrOfs = html->Start_Ofs + token_start; while ( buf_index < bufsize ) { buf_index++; @@ -4249,7 +4282,7 @@ static int Html_write_raw(DilloHtml *html, char *buf, int bufsize, int Eof) if (buf[offset] == ch || !buf[offset]) { buf_index = offset; } else { - BUG_MSG("attribute lacks closing quote\n"); + BUG_MSG("Attribute lacks closing quote."); break; } } @@ -4257,7 +4290,7 @@ static int Html_write_raw(DilloHtml *html, char *buf, int bufsize, int Eof) /* unterminated tag detected */ p = dStrndup(buf+token_start+1, strcspn(buf+token_start+1, " <\n\r\t")); - BUG_MSG("<%s> element lacks its closing '>'\n", p); + BUG_MSG("<%s> lacks its closing '>'.", p); dFree(p); --buf_index; break; @@ -4272,6 +4305,8 @@ static int Html_write_raw(DilloHtml *html, char *buf, int bufsize, int Eof) } } else { /* A Word: search for whitespace or tag open */ + html->CurrOfs = html->Start_Ofs + token_start; + while (++buf_index < bufsize) { buf_index += strcspn(buf + buf_index, " <\n\r\t\f\v"); if (buf[buf_index] == '<' && (ch = buf[buf_index + 1]) && |