summaryrefslogtreecommitdiff
path: root/src/html.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/html.cc')
-rw-r--r--src/html.cc697
1 files changed, 366 insertions, 331 deletions
diff --git a/src/html.cc b/src/html.cc
index a8c70879..a1452858 100644
--- a/src/html.cc
+++ b/src/html.cc
@@ -26,6 +26,7 @@
#include "msg.h"
#include "binaryconst.h"
#include "colors.h"
+#include "html_charrefs.h"
#include "utf8.hh"
#include "misc.h"
@@ -132,9 +133,11 @@ void DilloHtml::bugMessage(const char *format, ... )
{
va_list argp;
+ if (bw->num_page_bugs)
+ dStr_append_c(bw->page_bugs, '\n');
dStr_sprintfa(bw->page_bugs,
"HTML warning: line %d, ",
- getCurTagLineNumber());
+ getCurrLineNumber());
va_start(argp, format);
dStr_vsprintfa(bw->page_bugs, format, argp);
va_end(argp);
@@ -158,15 +161,15 @@ DilloUrl *a_Html_url_new(DilloHtml *html,
const char *suffix = (n_ic) > 1 ? "s" : "";
n_ic_spc = URL_ILLEGAL_CHARS_SPC(url);
if (n_ic == n_ic_spc) {
- BUG_MSG("URL has %d illegal space%s\n", n_ic, suffix);
+ BUG_MSG("URL has %d illegal space%s ('%s').", n_ic, suffix, url_str);
} else if (n_ic_spc == 0) {
- BUG_MSG("URL has %d illegal character%s in {00-1F, 7F} range\n",
- n_ic, suffix);
+ BUG_MSG("URL has %d illegal byte%s in {00-1F, 7F-FF} range ('%s').",
+ n_ic, suffix, url_str);
} else {
- BUG_MSG("URL has %d illegal character%s: "
- "%d space%s, and %d in {00-1F, 7F} range\n",
+ BUG_MSG("URL has %d illegal byte%s: "
+ "%d space%s and %d in {00-1F, 7F-FF} range ('%s').",
n_ic, suffix,
- n_ic_spc, n_ic_spc > 1 ? "s" : "", n_ic-n_ic_spc);
+ n_ic_spc, n_ic_spc > 1 ? "s" : "", n_ic-n_ic_spc, url_str);
}
}
return url;
@@ -290,7 +293,7 @@ void a_Html_tag_set_align_attr(DilloHtml *html, const char *tag, int tagsize)
TextAlignType textAlignType = TEXT_ALIGN_LEFT;
if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
- BUG_MSG("The align attribute is obsolete in HTML5.\n");
+ BUG_MSG("The align attribute is obsolete in HTML5.");
if (dStrAsciiCasecmp (align, "left") == 0)
textAlignType = TEXT_ALIGN_LEFT;
@@ -334,7 +337,7 @@ bool a_Html_tag_set_valign_attr(DilloHtml *html, const char *tag, int tagsize)
if ((attr = a_Html_get_attr(html, tag, tagsize, "valign"))) {
if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
- BUG_MSG("The valign attribute is obsolete in HTML5.\n");
+ BUG_MSG("The valign attribute is obsolete in HTML5.");
if (dStrAsciiCasecmp (attr, "top") == 0)
valign = VALIGN_TOP;
@@ -356,15 +359,24 @@ bool a_Html_tag_set_valign_attr(DilloHtml *html, const char *tag, int tagsize)
/*
* Create and add a new Textblock to the current Textblock
*/
-static void Html_add_textblock(DilloHtml *html, int space)
+static void Html_add_textblock(DilloHtml *html, bool addBreaks, int breakSpace)
{
Textblock *textblock = new Textblock (prefs.limit_text_width);
- HT2TB(html)->addParbreak (space, html->wordStyle ());
- HT2TB(html)->addWidget (textblock, html->style ());
- HT2TB(html)->addParbreak (space, html->wordStyle ());
+ if (addBreaks)
+ HT2TB(html)->addParbreak (breakSpace, html->wordStyle ());
+ HT2TB(html)->addWidget (textblock, html->style ()); /* Works also for floats
+ etc. */
+ if (addBreaks)
+ HT2TB(html)->addParbreak (breakSpace, html->wordStyle ());
S_TOP(html)->textblock = html->dw = textblock;
- S_TOP(html)->hand_over_break = true;
+ if (addBreaks)
+ S_TOP(html)->hand_over_break = true;
+}
+
+static bool Html_will_textblock_be_out_of_flow(DilloHtml *html)
+{
+ return HT2TB(html)->isStyleOutOfFlow (html->style ());
}
/*
@@ -397,9 +409,8 @@ DilloHtml::DilloHtml(BrowserWindow *p_bw, const DilloUrl *url,
stop_parser = false;
- CurrTagOfs = 0;
- OldTagOfs = 0;
- OldTagLine = 1;
+ CurrOfs = OldOfs = 0;
+ OldLine = 1;
DocType = DT_NONE; /* assume Tag Soup 0.0! :-) */
DocTypeVersion = 0.0f;
@@ -539,10 +550,10 @@ void DilloHtml::write(char *Buf, int BufSize, int Eof)
}
/*
- * Return the line number of the tag being processed by the parser.
+ * Return the line number of the tag/word being processed by the parser.
* Also update the offsets.
*/
-int DilloHtml::getCurTagLineNumber()
+int DilloHtml::getCurrLineNumber()
{
int i, ofs, line;
const char *p = Start_Buf;
@@ -551,13 +562,13 @@ int DilloHtml::getCurTagLineNumber()
/* Disable line counting for META hack. Buffers differ. */
dReturn_val_if((InFlags & IN_META_HACK), -1);
- ofs = CurrTagOfs;
- line = OldTagLine;
- for (i = OldTagOfs; i < ofs; ++i)
+ ofs = CurrOfs;
+ line = OldLine;
+ for (i = OldOfs; i < ofs; ++i)
if (p[i] == '\n' || (p[i] == '\r' && p[i+1] != '\n'))
++line;
- OldTagOfs = CurrTagOfs;
- OldTagLine = line;
+ OldOfs = CurrOfs;
+ OldLine = line;
return line;
}
@@ -787,113 +798,16 @@ void a_Html_stash_init(DilloHtml *html)
dStr_truncate(html->Stash, 0);
}
-/* Entities list from the HTML 4.01 DTD */
-typedef struct {
- const char *entity;
- int isocode;
-} Ent_t;
-
-#define NumEnt 252
-static const Ent_t Entities[NumEnt] = {
- {"AElig",0306}, {"Aacute",0301}, {"Acirc",0302}, {"Agrave",0300},
- {"Alpha",01621},{"Aring",0305}, {"Atilde",0303}, {"Auml",0304},
- {"Beta",01622}, {"Ccedil",0307}, {"Chi",01647}, {"Dagger",020041},
- {"Delta",01624},{"ETH",0320}, {"Eacute",0311}, {"Ecirc",0312},
- {"Egrave",0310},{"Epsilon",01625},{"Eta",01627}, {"Euml",0313},
- {"Gamma",01623},{"Iacute",0315}, {"Icirc",0316}, {"Igrave",0314},
- {"Iota",01631}, {"Iuml",0317}, {"Kappa",01632}, {"Lambda",01633},
- {"Mu",01634}, {"Ntilde",0321}, {"Nu",01635}, {"OElig",0522},
- {"Oacute",0323},{"Ocirc",0324}, {"Ograve",0322}, {"Omega",01651},
- {"Omicron",01637},{"Oslash",0330},{"Otilde",0325},{"Ouml",0326},
- {"Phi",01646}, {"Pi",01640}, {"Prime",020063},{"Psi",01650},
- {"Rho",01641}, {"Scaron",0540}, {"Sigma",01643}, {"THORN",0336},
- {"Tau",01644}, {"Theta",01630}, {"Uacute",0332}, {"Ucirc",0333},
- {"Ugrave",0331},{"Upsilon",01645},{"Uuml",0334}, {"Xi",01636},
- {"Yacute",0335},{"Yuml",0570}, {"Zeta",01626}, {"aacute",0341},
- {"acirc",0342}, {"acute",0264}, {"aelig",0346}, {"agrave",0340},
- {"alefsym",020465},{"alpha",01661},{"amp",38}, {"and",021047},
- {"ang",021040}, {"aring",0345}, {"asymp",021110},{"atilde",0343},
- {"auml",0344}, {"bdquo",020036},{"beta",01662}, {"brvbar",0246},
- {"bull",020042},{"cap",021051}, {"ccedil",0347}, {"cedil",0270},
- {"cent",0242}, {"chi",01707}, {"circ",01306}, {"clubs",023143},
- {"cong",021105},{"copy",0251}, {"crarr",020665},{"cup",021052},
- {"curren",0244},{"dArr",020723}, {"dagger",020040},{"darr",020623},
- {"deg",0260}, {"delta",01664}, {"diams",023146},{"divide",0367},
- {"eacute",0351},{"ecirc",0352}, {"egrave",0350}, {"empty",021005},
- {"emsp",020003},{"ensp",020002}, {"epsilon",01665},{"equiv",021141},
- {"eta",01667}, {"eth",0360}, {"euml",0353}, {"euro",020254},
- {"exist",021003},{"fnof",0622}, {"forall",021000},{"frac12",0275},
- {"frac14",0274},{"frac34",0276}, {"frasl",020104},{"gamma",01663},
- {"ge",021145}, {"gt",62}, {"hArr",020724}, {"harr",020624},
- {"hearts",023145},{"hellip",020046},{"iacute",0355},{"icirc",0356},
- {"iexcl",0241}, {"igrave",0354}, {"image",020421},{"infin",021036},
- {"int",021053}, {"iota",01671}, {"iquest",0277}, {"isin",021010},
- {"iuml",0357}, {"kappa",01672}, {"lArr",020720}, {"lambda",01673},
- {"lang",021451},{"laquo",0253}, {"larr",020620}, {"lceil",021410},
- {"ldquo",020034},{"le",021144}, {"lfloor",021412},{"lowast",021027},
- {"loz",022712}, {"lrm",020016}, {"lsaquo",020071},{"lsquo",020030},
- {"lt",60}, {"macr",0257}, {"mdash",020024},{"micro",0265},
- {"middot",0267},{"minus",021022},{"mu",01674}, {"nabla",021007},
- {"nbsp",0240}, {"ndash",020023},{"ne",021140}, {"ni",021013},
- {"not",0254}, {"notin",021011},{"nsub",021204}, {"ntilde",0361},
- {"nu",01675}, {"oacute",0363}, {"ocirc",0364}, {"oelig",0523},
- {"ograve",0362},{"oline",020076},{"omega",01711}, {"omicron",01677},
- {"oplus",021225},{"or",021050}, {"ordf",0252}, {"ordm",0272},
- {"oslash",0370},{"otilde",0365}, {"otimes",021227},{"ouml",0366},
- {"para",0266}, {"part",021002}, {"permil",020060},{"perp",021245},
- {"phi",01706}, {"pi",01700}, {"piv",01726}, {"plusmn",0261},
- {"pound",0243}, {"prime",020062},{"prod",021017}, {"prop",021035},
- {"psi",01710}, {"quot",34}, {"rArr",020722}, {"radic",021032},
- {"rang",021452},{"raquo",0273}, {"rarr",020622}, {"rceil",021411},
- {"rdquo",020035},{"real",020434},{"reg",0256}, {"rfloor",021413},
- {"rho",01701}, {"rlm",020017}, {"rsaquo",020072},{"rsquo",020031},
- {"sbquo",020032},{"scaron",0541},{"sdot",021305}, {"sect",0247},
- {"shy",0255}, {"sigma",01703}, {"sigmaf",01702},{"sim",021074},
- {"spades",023140},{"sub",021202},{"sube",021206}, {"sum",021021},
- {"sup",021203}, {"sup1",0271}, {"sup2",0262}, {"sup3",0263},
- {"supe",021207},{"szlig",0337}, {"tau",01704}, {"there4",021064},
- {"theta",01670},{"thetasym",01721},{"thinsp",020011},{"thorn",0376},
- {"tilde",01334},{"times",0327}, {"trade",020442},{"uArr",020721},
- {"uacute",0372},{"uarr",020621}, {"ucirc",0373}, {"ugrave",0371},
- {"uml",0250}, {"upsih",01722}, {"upsilon",01705},{"uuml",0374},
- {"weierp",020430},{"xi",01676}, {"yacute",0375}, {"yen",0245},
- {"yuml",0377}, {"zeta",01666}, {"zwj",020015}, {"zwnj",020014}
-};
-
-
-/*
- * Comparison function for binary search
- */
-static int Html_entity_comp(const void *a, const void *b)
-{
- return strcmp(((Ent_t *)a)->entity, ((Ent_t *)b)->entity);
-}
-
-/*
- * Binary search of 'key' in entity list
- */
-static int Html_entity_search(char *key)
-{
- Ent_t *res, EntKey;
-
- EntKey.entity = key;
- res = (Ent_t*) bsearch(&EntKey, Entities, NumEnt,
- sizeof(Ent_t), Html_entity_comp);
- if (res)
- return (res - Entities);
- return -1;
-}
-
/*
* This is M$ non-standard "smart quotes" (w1252). Now even deprecated by them!
*
* SGML for HTML4.01 defines c >= 128 and c <= 159 as UNUSED.
- * TODO: Probably I should remove this hack, and add a HTML warning. --Jcid
+ * TODO: Probably I should remove this hack. --Jcid
*/
-static int Html_ms_stupid_quotes_2ucs(int isocode)
+static int Html_ms_stupid_quotes_2ucs(int codepoint)
{
int ret;
- switch (isocode) {
+ switch (codepoint) {
case 145:
case 146: ret = '\''; break;
case 147:
@@ -901,130 +815,233 @@ static int Html_ms_stupid_quotes_2ucs(int isocode)
case 149: ret = 176; break;
case 150:
case 151: ret = '-'; break;
- default: ret = isocode; break;
+ default: ret = codepoint; break;
}
return ret;
}
/*
- * Given an entity, return the UCS character code.
- * Returns a negative value (error code) if not a valid entity.
- *
- * The first character *token is assumed to be == '&'
- *
- * For valid entities, *entsize is set to the length of the parsed entity.
+ * Parse a numeric character reference (e.g., "&#47;" or "&#x2F;").
+ * The "&#" has already been consumed.
*/
-static int Html_parse_entity(DilloHtml *html, const char *token,
- int toksize, int *entsize)
+static const char *Html_parse_numeric_charref(DilloHtml *html, char *tok,
+ bool_t is_attr, int *entsize)
{
- int isocode, i;
- char *tok, *s, c;
+ static char buf[5];
+ char *s = tok;
+ int n, codepoint = -1;
- token++;
- tok = s = toksize ? dStrndup(token, (uint_t)toksize) : dStrdup(token);
-
- isocode = -1;
-
- if (*s == '#') {
- /* numeric character reference */
- errno = 0;
- if (*++s == 'x' || *s == 'X') {
- if (isxdigit(*++s)) {
- /* strtol with base 16 accepts leading "0x" - we don't */
- if (*s == '0' && s[1] == 'x') {
- s++;
- isocode = 0;
- } else {
- isocode = strtol(s, &s, 16);
- }
+ errno = 0;
+
+ if (*s == 'x' || *s == 'X') {
+ if (isxdigit(*++s)) {
+ /* strtol with base 16 accepts leading "0x" - we don't */
+ if (*s == '0' && s[1] == 'x') {
+ s++;
+ codepoint = 0;
+ } else {
+ codepoint = strtol(s, &s, 16);
}
- } else if (isdigit(*s)) {
- isocode = strtol(s, &s, 10);
}
+ } else if (isdigit(*s)) {
+ codepoint = strtol(s, &s, 10);
+ }
+ if (errno)
+ codepoint = -1;
- if (!isocode || errno || isocode > 0xffff) {
- /* this catches null bytes, errors and codes >= 0xFFFF */
- BUG_MSG("numeric character reference \"%s\" out of range\n", tok);
- isocode = -2;
+ if (*s == ';')
+ s++;
+ else {
+ if (prefs.show_extra_warnings && (html->DocType == DT_XHTML ||
+ (html->DocType == DT_HTML && html->DocTypeVersion <= 4.01f))) {
+ char c = *s;
+ *s = '\0';
+ BUG_MSG("Character reference '&#%s' lacks ';'.", tok);
+ *s = c;
}
-
- if (isocode != -1) {
- if (*s == ';')
- s++;
- else if (prefs.show_extra_warnings)
- BUG_MSG("numeric character reference without trailing ';'\n");
+ /* Don't require ';' for old HTML, except that our current heuristic
+ * is to require it in attributes to avoid cases like "&copy=1" found
+ * in URLs.
+ */
+ if (is_attr || html->DocType == DT_XHTML ||
+ (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)) {
+ return NULL;
}
- } else if (isalpha(*s)) {
- /* character entity reference */
- while (*++s && (isalnum(*s) || strchr(":_.-", *s))) ;
- c = *s;
- *s = 0;
+ }
+ if ((codepoint < 0x20 && codepoint != '\t' && codepoint != '\n' &&
+ codepoint != '\f') ||
+ (codepoint >= 0x7f && codepoint <= 0x9f) ||
+ (codepoint >= 0xd800 && codepoint <= 0xdfff) || codepoint > 0x10ffff ||
+ ((codepoint & 0xfffe) == 0xfffe) ||
+ (!(html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f) &&
+ codepoint > 0xffff)) {
+ /* this catches null bytes, errors, codes out of range, disallowed
+ * control chars, permanently undefined chars, and surrogates.
+ */
+ char c = *s;
+ *s = '\0';
+ BUG_MSG("Numeric character reference '&#%s' is not valid.", tok);
+ *s = c;
- if ((i = Html_entity_search(tok)) >= 0) {
- isocode = Entities[i].isocode;
+ codepoint = (codepoint >= 145 && codepoint <= 151) ?
+ Html_ms_stupid_quotes_2ucs(codepoint) : -1;
+ }
+ if (codepoint != -1) {
+ if (codepoint >= 128) {
+ n = a_Utf8_encode(codepoint, buf);
} else {
- if (html->DocType == DT_XHTML && !strcmp(tok, "apos")) {
- isocode = 0x27;
- } else {
- if ((html->DocType == DT_HTML && html->DocTypeVersion == 4.01f) ||
- html->DocType == DT_XHTML)
- BUG_MSG("undefined character entity '%s'\n", tok);
- isocode = -3;
- }
+ n = 1;
+ buf[0] = (char) codepoint;
+ }
+ assert(n < 5);
+ buf[n] = '\0';
+ *entsize = s-tok+2;
+ return buf;
+ } else {
+ return NULL;
+ }
+}
+
+/*
+ * Comparison function for binary search
+ */
+static int Html_charref_comp(const void *a, const void *b)
+{
+ return strcmp(((Charref_t *)a)->ref, ((Charref_t *)b)->ref);
+}
+
+/*
+ * Binary search of 'key' in charref list
+ */
+static Charref_t *Html_charref_search(char *key)
+{
+ Charref_t RefKey;
+
+ RefKey.ref = key;
+ return (Charref_t*) bsearch(&RefKey, Charrefs, NumRef,
+ sizeof(Charref_t), Html_charref_comp);
+}
+
+/*
+ * Parse a named character reference (e.g., "&amp;" or "&hellip;").
+ * The "&" has already been consumed.
+ */
+static const char *Html_parse_named_charref(DilloHtml *html, char *tok,
+ bool_t is_attr, int *entsize)
+{
+ Charref_t *p;
+ char c;
+ char *s = tok;
+ const char *ret = NULL;
+
+ while (*++s && (isalnum(*s) || strchr(":_.-", *s))) ;
+ c = *s;
+ *s = '\0';
+ if (c != ';') {
+ if (prefs.show_extra_warnings && (html->DocType == DT_XHTML ||
+ (html->DocType == DT_HTML && html->DocTypeVersion <= 4.01f)))
+ BUG_MSG("Character reference '&%s' lacks ';'.", tok);
+
+ /* Don't require ';' for old HTML, except that our current heuristic
+ * is to require it in attributes to avoid cases like "&copy=1" found
+ * in URLs.
+ */
+ if (is_attr || html->DocType == DT_XHTML ||
+ (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)) {
+ return ret;
}
- if (c == ';')
- s++;
- else if (prefs.show_extra_warnings)
- BUG_MSG("character entity reference without trailing ';'\n");
}
+ if ((p = Html_charref_search(tok))) {
+ ret = (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f) ?
+ p->html5_str : p->html4_str;
+ }
+
+ if (!ret && html->DocType == DT_XHTML && !strcmp(tok, "apos"))
+ ret = "'";
+
+ *s = c;
+ if (c == ';')
+ s++;
+
+ if (!ret) {
+ c = *s;
+ *s = '\0';
+ BUG_MSG("Undefined character reference '&%s'.", tok);
+ *s = c;
+ }
*entsize = s-tok+1;
- dFree(tok);
+ return ret;
+}
+
+/*
+ * Given an entity, return the corresponding string.
+ * Returns NULL if not a valid entity.
+ *
+ * The first character *token is assumed to be == '&'
+ *
+ * For valid entities, *entsize is set to the length of the parsed entity.
+ */
+static const char *Html_parse_entity(DilloHtml *html, const char *token,
+ int toksize, int *entsize, bool_t is_attr)
+{
+ const char *ret = NULL;
+ char *tok;
- if (isocode >= 145 && isocode <= 151) {
- /* TODO: remove this hack. */
- isocode = Html_ms_stupid_quotes_2ucs(isocode);
- } else if (isocode == -1 && prefs.show_extra_warnings)
- BUG_MSG("literal '&'\n");
+ token++;
+ tok = dStrndup(token, (uint_t)toksize);
+
+ if (*tok == '#') {
+ ret = Html_parse_numeric_charref(html, tok+1, is_attr, entsize);
+ } else if (isalpha(*tok)) {
+ ret = Html_parse_named_charref(html, tok, is_attr, entsize);
+ } else if (prefs.show_extra_warnings &&
+ (!(html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f))) {
+ // HTML5 doesn't mind literal '&'s.
+ BUG_MSG("Literal '&'.");
+ }
+ dFree(tok);
- return isocode;
+ return ret;
}
/*
- * Convert all the entities in a token to utf8 encoding. Takes
- * a token and its length, and returns a newly allocated string.
+ * Parse all the entities in a token. Takes the token and its length, and
+ * returns a newly allocated string.
*/
char *a_Html_parse_entities(DilloHtml *html, const char *token, int toksize)
{
const char *esc_set = "&";
- char *new_str, buf[4];
- int i, j, k, n, s, isocode, entsize;
-
- new_str = dStrndup(token, toksize);
- s = strcspn(new_str, esc_set);
- if (new_str[s] == 0)
- return new_str;
-
- for (i = j = s; i < toksize; i++) {
- if (token[i] == '&' &&
- (isocode = Html_parse_entity(html, token+i,
- toksize-i, &entsize)) >= 0) {
- if (isocode >= 128) {
- /* multibyte encoding */
- n = a_Utf8_encode(isocode, buf);
- for (k = 0; k < n; ++k)
- new_str[j++] = buf[k];
+ int i, s, entsize;
+ char *str;
+
+ s = strcspn(token, esc_set);
+ if (s >= toksize) {
+ /* no ampersands */
+ str = dStrndup(token, toksize);
+ } else {
+ Dstr *ds = dStr_sized_new(toksize);
+
+ dStr_append_l(ds, token, s);
+
+ for (i = s; i < toksize; i++) {
+ const char *entstr;
+ const bool_t is_attr = FALSE;
+
+ if (token[i] == '&' &&
+ (entstr = Html_parse_entity(html, token+i, toksize-i, &entsize,
+ is_attr))) {
+ dStr_append(ds, entstr);
+ i += entsize-1;
} else {
- new_str[j++] = (char) isocode;
+ dStr_append_c(ds, token[i]);
}
- i += entsize-1;
- } else {
- new_str[j++] = token[i];
}
+ str = ds->str;
+ dStr_free(ds, 0);
}
- new_str[j] = '\0';
- return new_str;
+ return str;
}
/*
@@ -1095,7 +1112,7 @@ static void Html_process_space(DilloHtml *html, const char *space,
break;
case '\t':
if (prefs.show_extra_warnings)
- BUG_MSG("TAB character inside <PRE>\n");
+ BUG_MSG("TAB character inside <pre>.");
offset = TAB_SIZE - html->pre_column % TAB_SIZE;
spaceCnt += offset;
html->pre_column += offset;
@@ -1314,7 +1331,7 @@ static void Html_tag_cleanup_to_idx(DilloHtml *html, int idx)
int toptag_idx = S_TOP(html)->tag_idx;
TagInfo toptag = Tags[toptag_idx];
if (s_sz > idx + 1 && toptag.EndTag != 'O')
- BUG_MSG(" - forcing close of open tag: <%s>\n", toptag.name);
+ BUG_MSG(" - forcing close of open tag: <%s>.", toptag.name);
_MSG("Close: %*s%s\n", size," ", toptag.name);
if (toptag.close)
toptag.close(html);
@@ -1372,10 +1389,10 @@ static void Html_tag_cleanup_at_close(DilloHtml *html, int new_idx)
if (matched) {
Html_tag_cleanup_to_idx(html, stack_idx);
} else if (expected) {
- BUG_MSG("unexpected closing tag: </%s> -- expected </%s>.\n",
+ BUG_MSG("Unexpected closing tag: </%s> -- expected </%s>.",
new_tag.name, Tags[tag_idx].name);
} else {
- BUG_MSG("unexpected closing tag: </%s>.\n", new_tag.name);
+ BUG_MSG("Unexpected closing tag: </%s>.", new_tag.name);
}
}
@@ -1411,7 +1428,7 @@ static void Html_tag_cleanup_nested_inputs(DilloHtml *html, int new_idx)
}
if (matched) {
- BUG_MSG("attempt to nest <%s> element inside <%s> -- closing <%s>\n",
+ BUG_MSG("Attempt to nest <%s> element inside <%s> -- closing <%s>.",
Tags[new_idx].name, Tags[u_idx].name, Tags[u_idx].name);
Html_tag_cleanup_to_idx(html, stack_idx);
} else {
@@ -1481,7 +1498,7 @@ CssLength a_Html_parse_length (DilloHtml *html, const char *attr)
else {
/* allow only whitespaces */
if (*end && !isspace (*end)) {
- BUG_MSG("Garbage after length: %s\n", attr);
+ BUG_MSG("Garbage after length: '%s'.", attr);
l = CSS_CREATE_LENGTH(0.0, CSS_LENGTH_TYPE_AUTO);
}
}
@@ -1501,7 +1518,7 @@ int32_t a_Html_color_parse(DilloHtml *html, const char *str,
int32_t color = a_Color_parse(str, default_color, &err);
if (err) {
- BUG_MSG("color \"%s\" is not in \"#RRGGBB\" format\n", str);
+ BUG_MSG("Color '%s' is not in \"#RRGGBB\" format.", str);
}
return color;
}
@@ -1518,8 +1535,8 @@ static int
bool valid = *val && !strchr(val, ' ');
if (!valid) {
- BUG_MSG("'%s' value must not be empty and must not contain spaces.\n",
- attrname);
+ BUG_MSG("'%s' value \"%s\" must not be empty and must not contain "
+ "spaces.", attrname, val);
}
return valid ? 1 : 0;
} else {
@@ -1530,8 +1547,8 @@ static int
break;
if (val[i] || !(isascii(val[0]) && isalpha(val[0])))
- BUG_MSG("'%s' value \"%s\" is not of the form "
- "[A-Za-z][A-Za-z0-9:_.-]*\n", attrname, val);
+ BUG_MSG("%s attribute value \"%s\" is not of the form "
+ "'[A-Za-z][A-Za-z0-9:_.-]*'.", attrname, val);
return !(val[i]);
}
@@ -1559,7 +1576,6 @@ static int
static void Html_parse_doctype(DilloHtml *html, const char *tag, int tagsize)
{
static const char HTML_SGML_sig [] = "<!DOCTYPE HTML PUBLIC ";
- static const char HTML5_sig [] = "<!DOCTYPE html>";
static const char HTML20 [] = "-//IETF//DTD HTML";
static const char HTML32 [] = "-//W3C//DTD HTML 3.2";
static const char HTML40 [] = "-//W3C//DTD HTML 4.0";
@@ -1596,7 +1612,7 @@ static void Html_parse_doctype(DilloHtml *html, const char *tag, int tagsize)
_MSG("New: {%s}\n", ntag);
if (html->DocType != DT_NONE)
- BUG_MSG("Multiple DOCTYPE declarations.\n");
+ BUG_MSG("Multiple DOCTYPE declarations.");
/* The default DT_NONE type is TagSoup */
if (i > strlen(HTML_SGML_sig) && // avoid out of bounds reads!
@@ -1624,13 +1640,14 @@ static void Html_parse_doctype(DilloHtml *html, const char *tag, int tagsize)
html->DocType = DT_HTML;
html->DocTypeVersion = 2.0f;
}
- } else if (!dStrAsciiCasecmp(ntag, HTML5_sig)) {
+ } else if (!dStrAsciiCasecmp(ntag, "<!DOCTYPE html>") ||
+ !dStrAsciiCasecmp(ntag, "<!DOCTYPE html >")) {
html->DocType = DT_HTML;
html->DocTypeVersion = 5.0f;
}
if (html->DocType == DT_NONE) {
html->DocType = DT_UNRECOGNIZED;
- BUG_MSG("DOCTYPE not recognized:\n%s.\n", ntag);
+ BUG_MSG("DOCTYPE not recognized: ('%s').", ntag);
}
dFree(ntag);
}
@@ -1649,7 +1666,7 @@ static void Html_tag_open_html(DilloHtml *html, const char *tag, int tagsize)
++html->Num_HTML;
if (html->Num_HTML > 1) {
- BUG_MSG("HTML element was already open\n");
+ BUG_MSG("<html> was already open.");
html->ReqTagClose = true;
}
}
@@ -1668,7 +1685,7 @@ static void Html_tag_close_html(DilloHtml *html)
static void Html_tag_open_head(DilloHtml *html, const char *tag, int tagsize)
{
if (html->InFlags & IN_BODY) {
- BUG_MSG("HEAD element must go before the BODY section\n");
+ BUG_MSG("<head> must go before the BODY section.");
html->ReqTagClose = true;
return;
}
@@ -1676,10 +1693,10 @@ static void Html_tag_open_head(DilloHtml *html, const char *tag, int tagsize)
if (html->Num_HEAD < UCHAR_MAX)
++html->Num_HEAD;
if (html->InFlags & IN_HEAD) {
- BUG_MSG("HEAD element was already open\n");
+ BUG_MSG("<head> was already open.");
html->ReqTagClose = true;
} else if (html->Num_HEAD > 1) {
- BUG_MSG("HEAD section already finished -- ignoring\n");
+ BUG_MSG("<head> already finished -- ignoring.");
html->ReqTagClose = true;
} else {
html->InFlags |= IN_HEAD;
@@ -1696,7 +1713,7 @@ static void Html_tag_close_head(DilloHtml *html)
if (html->Num_HEAD == 1) {
/* match for the well formed start of HEAD section */
if (html->Num_TITLE == 0)
- BUG_MSG("HEAD section lacks the TITLE element\n");
+ BUG_MSG("<head> lacks <title>.");
html->InFlags &= ~IN_HEAD;
@@ -1726,9 +1743,9 @@ static void Html_tag_open_title(DilloHtml *html, const char *tag, int tagsize)
if (html->Num_TITLE < UCHAR_MAX)
++html->Num_TITLE;
if (html->Num_TITLE > 1)
- BUG_MSG("A redundant TITLE element was found\n");
+ BUG_MSG("Redundant <title>.");
} else {
- BUG_MSG("TITLE element must be inside the HEAD section -- ignoring\n");
+ BUG_MSG("<title> must be inside <head> -- ignoring.");
}
}
@@ -1776,7 +1793,7 @@ static void Html_tag_open_style(DilloHtml *html, const char *tag, int tagsize)
if (!(attrbuf = a_Html_get_attr(html, tag, tagsize, "type"))) {
if (html->DocType != DT_HTML || html->DocTypeVersion <= 4.01f)
- BUG_MSG("type attribute is required for <style>\n");
+ BUG_MSG("<style> requires type attribute.");
} else if (dStrAsciiCasecmp(attrbuf, "text/css")) {
html->loadCssFromStash = false;
}
@@ -1800,8 +1817,8 @@ static void Html_tag_open_style(DilloHtml *html, const char *tag, int tagsize)
static void Html_tag_close_style(DilloHtml *html)
{
if (prefs.parse_embedded_css && html->loadCssFromStash)
- html->styleEngine->parse(html, html->base_url, html->Stash->str, html->Stash->len,
- CSS_ORIGIN_AUTHOR);
+ html->styleEngine->parse(html, html->base_url, html->Stash->str,
+ html->Stash->len, CSS_ORIGIN_AUTHOR);
}
/*
@@ -1825,21 +1842,21 @@ static void Html_tag_open_body(DilloHtml *html, const char *tag, int tagsize)
++html->Num_BODY;
if (html->Num_BODY > 1) {
- BUG_MSG("BODY element was already open\n");
+ BUG_MSG("<body> was already open.");
html->ReqTagClose = true;
return;
}
if (html->InFlags & IN_HEAD) {
/* if we're here, it's bad XHTML, no need to recover */
- BUG_MSG("unclosed HEAD element\n");
+ BUG_MSG("Unclosed <head>.");
}
if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "bgcolor"))) {
color = a_Html_color_parse(html, attrbuf, -1);
if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
- BUG_MSG("<body> bgcolor attribute is obsolete.\n");
+ BUG_MSG("<body> bgcolor attribute is obsolete.");
if (color != -1)
html->styleEngine->setNonCssHint (CSS_PROPERTY_BACKGROUND_COLOR,
@@ -1850,7 +1867,7 @@ static void Html_tag_open_body(DilloHtml *html, const char *tag, int tagsize)
color = a_Html_color_parse(html, attrbuf, -1);
if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
- BUG_MSG("<body> text attribute is obsolete.\n");
+ BUG_MSG("<body> text attribute is obsolete.");
if (color != -1)
html->styleEngine->setNonCssHint (CSS_PROPERTY_COLOR,
@@ -1862,13 +1879,13 @@ static void Html_tag_open_body(DilloHtml *html, const char *tag, int tagsize)
if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "link"))) {
html->non_css_link_color = a_Html_color_parse(html, attrbuf, -1);
if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
- BUG_MSG("<body> link attribute is obsolete.\n");
+ BUG_MSG("<body> link attribute is obsolete.");
}
if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "vlink"))) {
html->non_css_visited_color = a_Html_color_parse(html, attrbuf, -1);
if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
- BUG_MSG("<body> vlink attribute is obsolete.\n");
+ BUG_MSG("<body> vlink attribute is obsolete.");
}
html->dw->setStyle (html->style ());
@@ -2012,7 +2029,7 @@ static void Html_tag_content_frameset (DilloHtml *html,
{
HT2TB(html)->addParbreak (9, html->wordStyle ());
HT2TB(html)->addText("--FRAME--", html->wordStyle ());
- Html_add_textblock(html, 5);
+ Html_add_textblock(html, true, 5);
}
/*
@@ -2089,8 +2106,8 @@ void a_Html_common_image_attrs(DilloHtml *html, const char *tag, int tagsize)
{
char *width_ptr, *height_ptr;
const char *attrbuf;
- CssLength l_w = CSS_CREATE_LENGTH(0.0, CSS_LENGTH_TYPE_AUTO);
- CssLength l_h = CSS_CREATE_LENGTH(0.0, CSS_LENGTH_TYPE_AUTO);
+ CssLength l_w = CSS_CREATE_LENGTH(0.0, CSS_LENGTH_TYPE_AUTO);
+ CssLength l_h = CSS_CREATE_LENGTH(0.0, CSS_LENGTH_TYPE_AUTO);
int w = 0, h = 0;
if (prefs.show_tooltip &&
@@ -2123,7 +2140,7 @@ void a_Html_common_image_attrs(DilloHtml *html, const char *tag, int tagsize)
*/
if (w < 0 || h < 0 ||
w > IMAGE_MAX_AREA || h > IMAGE_MAX_AREA ||
- (h > 0 && w > IMAGE_MAX_AREA / h)) {
+ (h > 0 && w > IMAGE_MAX_AREA / h)) {
dFree(width_ptr);
dFree(height_ptr);
width_ptr = height_ptr = NULL;
@@ -2176,6 +2193,8 @@ DilloImage *a_Html_image_new(DilloHtml *html, const char *tag, int tagsize)
dw::Image *dw = new dw::Image(alt_ptr);
image =
a_Image_new(html->dw->getLayout(), (void*)(dw::core::ImgRenderer*)dw, 0);
+
+ a_Image_ref(image);
if (HT2TB(html)->getBgColor())
image->bg_color = HT2TB(html)->getBgColor()->getColor();
@@ -2192,10 +2211,10 @@ DilloImage *a_Html_image_new(DilloHtml *html, const char *tag, int tagsize)
if (load_now && Html_load_image(html->bw, url, html->page_url, image)) {
// hi->image is NULL if dillo tries to load the image immediately
hi->image = NULL;
+ a_Image_unref(image);
} else {
// otherwise a reference is kept in html->images
hi->image = image;
- a_Image_ref(image);
}
dFree(alt_ptr);
@@ -2338,7 +2357,7 @@ static void Html_tag_content_map(DilloHtml *html, const char *tag, int tagsize)
DilloUrl *url;
if (html->InFlags & IN_MAP) {
- BUG_MSG("nested <map>\n");
+ BUG_MSG("Nested <map>.");
} else {
if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "name"))) {
html->InFlags |= IN_MAP;
@@ -2348,7 +2367,7 @@ static void Html_tag_content_map(DilloHtml *html, const char *tag, int tagsize)
a_Url_free (url);
dFree(hash_name);
} else {
- BUG_MSG("name attribute is required for <map>\n");
+ BUG_MSG("<map> requires name attribute.");
}
}
}
@@ -2400,7 +2419,7 @@ misc::SimpleVector<int> *Html_read_coords(DilloHtml *html, const char *str)
if (!*newtail)
break;
if (*newtail != ',') {
- BUG_MSG("area coords must be integers separated by commas.\n");
+ BUG_MSG("<area> coords must be integers separated by commas.");
}
tail = newtail + 1;
}
@@ -2423,7 +2442,7 @@ static void
Shape *shape = NULL;
if (!(html->InFlags & IN_MAP)) {
- BUG_MSG("<area> element not inside <map>\n");
+ BUG_MSG("<area> not inside <map>.");
return;
}
attrbuf = a_Html_get_attr(html, tag, tagsize, "shape");
@@ -2439,7 +2458,7 @@ static void
} else if (dStrnAsciiCasecmp(attrbuf, "poly", 4) == 0) {
type = POLYGON;
} else {
- BUG_MSG("<area> unknown shape: \"%s\"\n", attrbuf);
+ BUG_MSG("<area> unknown shape: '%s'.", attrbuf);
type = UNKNOWN;
}
if (type == RECTANGLE || type == CIRCLE || type == POLYGON) {
@@ -2449,7 +2468,7 @@ static void
if (type == RECTANGLE) {
if (coords->size() != 4)
- BUG_MSG("<area> rectangle must have four coordinate values\n");
+ BUG_MSG("<area> rectangle must have four coordinate values.");
if (coords->size() >= 4)
shape = new Rectangle(coords->get(0),
coords->get(1),
@@ -2457,7 +2476,7 @@ static void
coords->get(3) - coords->get(1));
} else if (type == CIRCLE) {
if (coords->size() != 3)
- BUG_MSG("<area> circle must have three coordinate values\n");
+ BUG_MSG("<area> circle must have three coordinate values.");
if (coords->size() >= 3)
shape = new Circle(coords->get(0), coords->get(1),
coords->get(2));
@@ -2465,7 +2484,7 @@ static void
Polygon *poly;
int i;
if (coords->size() % 2)
- BUG_MSG("<area> polygon with odd number of coordinates\n");
+ BUG_MSG("<area> polygon with odd number of coordinates.");
shape = poly = new Polygon();
for (i = 0; i < (coords->size() / 2); i++)
poly->addPoint(coords->get(2*i), coords->get(2*i + 1));
@@ -2601,11 +2620,11 @@ static void Html_tag_open_source(DilloHtml *html, const char *tag,
const char *attrbuf;
if (!(html->InFlags & IN_MEDIA)) {
- BUG_MSG("<source> element not inside a media element.\n");
+ BUG_MSG("<source> not inside a media element.");
return;
}
if (!(attrbuf = a_Html_get_attr(html, tag, tagsize, "src"))) {
- BUG_MSG("src attribute is required in <source> element.\n");
+ BUG_MSG("<source> requires src attribute.");
return;
} else {
DilloUrl *url = a_Html_url_new(html, attrbuf, NULL, 0);
@@ -2682,7 +2701,7 @@ static const char* Html_get_javascript_link(DilloHtml *html)
if ((ch == '"' || ch == '\'') &&
(p2 = strchr(Buf->str + i + 1 , ch))) {
p1 = Buf->str + i;
- BUG_MSG("link depends on javascript()\n");
+ BUG_MSG("Link depends on javascript().");
dStr_truncate(Buf, p2 - Buf->str);
dStr_erase(Buf, 0, p1 - Buf->str + 1);
}
@@ -2697,7 +2716,8 @@ static void Html_add_anchor(DilloHtml *html, const char *name)
{
_MSG("Registering ANCHOR: %s\n", name);
if (!HT2TB(html)->addAnchor (name, html->style ()))
- BUG_MSG("Anchor names must be unique within the document ('%s')\n",name);
+ BUG_MSG("Anchor names must be unique within the document (\"%s\").",
+ name);
/*
* According to Sec. 12.2.1 of the HTML 4.01 spec, "anchor names that
* differ only in case may not appear in the same document", but
@@ -2767,7 +2787,8 @@ static void Html_tag_open_a(DilloHtml *html, const char *tag, int tagsize)
/* We compare the "id" value with the url-decoded "name" value */
if (!id || strcmp(nameVal, id)) {
if (id)
- BUG_MSG("'id' and 'name' attribute of <a> tag differ\n");
+ BUG_MSG("In <a>, id ('%s') and name ('%s') attributes differ.",
+ id, nameVal);
Html_add_anchor(html, nameVal);
}
@@ -2790,7 +2811,7 @@ static void Html_tag_close_a(DilloHtml *html)
static void Html_tag_open_blockquote(DilloHtml *html,
const char *tag, int tagsize)
{
- Html_add_textblock(html, 9);
+ Html_add_textblock(html, true, 9);
}
/*
@@ -2843,7 +2864,7 @@ static void Html_tag_open_ul(DilloHtml *html, const char *tag, int tagsize)
html->styleEngine->setNonCssHint (CSS_PROPERTY_LIST_STYLE_TYPE,
CSS_TYPE_ENUM, list_style_type);
if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
- BUG_MSG("<ul> type attribute is obsolete.\n");
+ BUG_MSG("<ul> type attribute is obsolete.");
}
S_TOP(html)->list_type = HTML_LIST_UNORDERED;
@@ -2865,7 +2886,7 @@ static void Html_tag_open_dir(DilloHtml *html, const char *tag, int tagsize)
S_TOP(html)->ref_list_item = NULL;
if (prefs.show_extra_warnings)
- BUG_MSG("Obsolete list type; use <UL> instead\n");
+ BUG_MSG("Obsolete list type; use <ul> instead.");
}
/*
@@ -2873,7 +2894,16 @@ static void Html_tag_open_dir(DilloHtml *html, const char *tag, int tagsize)
*/
static void Html_tag_open_menu(DilloHtml *html, const char *tag, int tagsize)
{
- Html_tag_open_dir(html, tag, tagsize);
+ /* In another bit of ridiculous mess from the HTML5 world, the menu
+ * element, which was deprecated in HTML4:
+ * - does not appear at all in W3C's HTML5 spec
+ * - appears in WHATWG's HTML5 doc and the W3C's 5.1 draft, where it
+ * means something totally different than it did in the old days
+ * (now it's for popup menus and toolbar menus rather than being a
+ * sort of list).
+ */
+ if (!(html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f))
+ Html_tag_open_dir(html, tag, tagsize);
}
/*
@@ -2906,7 +2936,7 @@ static void Html_tag_open_ol(DilloHtml *html, const char *tag, int tagsize)
if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "start")) &&
(n = (int) strtol(attrbuf, NULL, 10)) < 0) {
- BUG_MSG( "illegal '-' character in START attribute; Starting from 0\n");
+ BUG_MSG("Illegal '-' character in START attribute; Starting from 0.");
n = 0;
}
S_TOP(html)->list_number = n;
@@ -2923,7 +2953,7 @@ static void Html_tag_open_li(DilloHtml *html, const char *tag, int tagsize)
const char *attrbuf;
if (S_TOP(html)->list_type == HTML_LIST_NONE)
- BUG_MSG("<li> outside <ul> or <ol>\n");
+ BUG_MSG("<li> outside <ul> or <ol>.");
html->InFlags |= IN_LI;
@@ -2934,7 +2964,7 @@ static void Html_tag_open_li(DilloHtml *html, const char *tag, int tagsize)
// ordered
if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "value")) &&
(*list_number = strtol(attrbuf, NULL, 10)) < 0) {
- BUG_MSG("illegal negative LIST VALUE attribute; Starting from 0\n");
+ BUG_MSG("Illegal negative list value attribute; Starting from 0.");
*list_number = 0;
}
}
@@ -2961,7 +2991,7 @@ static void Html_tag_open_hr(DilloHtml *html, const char *tag, int tagsize)
width_ptr = a_Html_get_attr_wdef(html, tag, tagsize, "width", NULL);
if (width_ptr) {
if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
- BUG_MSG("<hr> width attribute is obsolete.\n");
+ BUG_MSG("<hr> width attribute is obsolete.");
html->styleEngine->setNonCssHint (CSS_PROPERTY_WIDTH,
CSS_TYPE_LENGTH_PERCENTAGE,
a_Html_parse_length (html, width_ptr));
@@ -2971,7 +3001,7 @@ static void Html_tag_open_hr(DilloHtml *html, const char *tag, int tagsize)
if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "size"))) {
size = strtol(attrbuf, NULL, 10);
if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
- BUG_MSG("<hr> size attribute is obsolete.\n");
+ BUG_MSG("<hr> size attribute is obsolete.");
}
a_Html_tag_set_align_attr(html, tag, tagsize);
@@ -2979,7 +3009,7 @@ static void Html_tag_open_hr(DilloHtml *html, const char *tag, int tagsize)
/* TODO: evaluate attribute */
if (a_Html_get_attr(html, tag, tagsize, "noshade")) {
if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
- BUG_MSG("<hr> noshade attribute is obsolete.\n");
+ BUG_MSG("<hr> noshade attribute is obsolete.");
html->styleEngine->setNonCssHint (CSS_PROPERTY_BORDER_TOP_STYLE,
CSS_TYPE_ENUM, BORDER_SOLID);
html->styleEngine->setNonCssHint (CSS_PROPERTY_BORDER_BOTTOM_STYLE,
@@ -3045,7 +3075,7 @@ static void Html_tag_open_dt(DilloHtml *html, const char *tag, int tagsize)
*/
static void Html_tag_open_dd(DilloHtml *html, const char *tag, int tagsize)
{
- Html_add_textblock(html, 9);
+ Html_add_textblock(html, true, 9);
}
/*
@@ -3134,7 +3164,7 @@ static void Html_tag_open_meta(DilloHtml *html, const char *tag, int tagsize)
/* only valid inside HEAD */
if (!(html->InFlags & IN_HEAD)) {
- BUG_MSG("META element must be inside the HEAD section\n");
+ BUG_MSG("<meta> must be inside the HEAD section.");
return;
}
@@ -3167,7 +3197,7 @@ static void Html_tag_open_meta(DilloHtml *html, const char *tag, int tagsize)
if (a_Url_cmp(html->base_url, new_url) == 0) {
/* redirection loop, or empty url string: ignore */
- BUG_MSG("META refresh: %s\n",
+ BUG_MSG("<meta> refresh: %s.",
*mr_url ? "redirection loop" : "no target URL");
} else if (delay == 0) {
/* zero-delay redirection */
@@ -3237,27 +3267,26 @@ void a_Html_load_stylesheet(DilloHtml *html, DilloUrl *url)
dReturn_if (url == NULL || ! prefs.load_stylesheets);
_MSG("Html_load_stylesheet: ");
- if (a_Capi_get_buf(url, &data, &len)) {
+ if ((a_Capi_get_flags_with_redirection(url) & CAPI_Completed) &&
+ a_Capi_get_buf(url, &data, &len)) {
_MSG("cached URL=%s len=%d", URL_STR(url), len);
- if (a_Capi_get_flags_with_redirection(url) & CAPI_Completed) {
- if (strncmp("@charset \"", data, 10) == 0) {
- char *endq = strchr(data+10, '"');
-
- if (endq && (endq - data <= 51)) {
- /* IANA limits charset names to 40 characters */
- char *content_type;
-
- *endq = '\0';
- content_type = dStrconcat("text/css; charset=", data+10, NULL);
- *endq = '"';
- a_Capi_unref_buf(url);
- a_Capi_set_content_type(url, content_type, "meta");
- dFree(content_type);
- a_Capi_get_buf(url, &data, &len);
- }
+ if (strncmp("@charset \"", data, 10) == 0) {
+ char *endq = strchr(data+10, '"');
+
+ if (endq && (endq - data <= 51)) {
+ /* IANA limits charset names to 40 characters */
+ char *content_type;
+
+ *endq = '\0';
+ content_type = dStrconcat("text/css; charset=", data+10, NULL);
+ *endq = '"';
+ a_Capi_unref_buf(url);
+ a_Capi_set_content_type(url, content_type, "meta");
+ dFree(content_type);
+ a_Capi_get_buf(url, &data, &len);
}
- html->styleEngine->parse(html, url, data, len, CSS_ORIGIN_AUTHOR);
}
+ html->styleEngine->parse(html, url, data, len, CSS_ORIGIN_AUTHOR);
a_Capi_unref_buf(url);
} else {
/* Fill a Web structure for the cache query */
@@ -3296,7 +3325,7 @@ static void Html_tag_open_link(DilloHtml *html, const char *tag, int tagsize)
/* Ignore LINK outside HEAD */
if (!(html->InFlags & IN_HEAD)) {
- BUG_MSG("LINK element must be inside the HEAD section\n");
+ BUG_MSG("<link> must be inside the HEAD section.");
return;
}
/* Remote stylesheets enabled? */
@@ -3341,12 +3370,12 @@ static void Html_tag_open_base(DilloHtml *html, const char *tag, int tagsize)
a_Url_free(html->base_url);
html->base_url = BaseUrl;
} else {
- BUG_MSG("base URI is relative (it MUST be absolute)\n");
+ BUG_MSG("<base> URI is relative (it MUST be absolute).");
a_Url_free(BaseUrl);
}
}
} else {
- BUG_MSG("the BASE element must appear in the HEAD section\n");
+ BUG_MSG("<base> not inside HEAD section.");
}
}
@@ -3635,10 +3664,10 @@ static int Html_needs_optional_close(int old_idx, int cur_idx)
} else if (old_idx == i_TR) {
/* TR closes TR */
return (cur_idx == i_TR);
- } else if (old_idx == i_DD) {
+ } else if (old_idx == i_DD) {
/* DD is closed by DD and DT */
return (cur_idx == i_DD || cur_idx == i_DT);
- } else if (old_idx == i_OPTION) {
+ } else if (old_idx == i_OPTION) {
return 1; // OPTION always needs close
}
@@ -3684,7 +3713,7 @@ static void Html_stack_cleanup_at_open(DilloHtml *html, int new_idx)
/* we have an inline (or empty) container... */
if (Tags[oldtag_idx].EndTag == 'R') {
- BUG_MSG("<%s> is not allowed to contain <%s>. -- closing <%s>\n",
+ BUG_MSG("<%s> is not allowed to contain <%s>. -- closing <%s>.",
Tags[oldtag_idx].name, Tags[new_idx].name,
Tags[oldtag_idx].name);
}
@@ -3713,7 +3742,7 @@ static void Html_test_section(DilloHtml *html, int new_idx, int IsCloseTag)
int tag_idx;
if (!(html->InFlags & IN_HTML) && html->DocType == DT_NONE)
- BUG_MSG("the required DOCTYPE declaration is missing.\n");
+ BUG_MSG("The required DOCTYPE declaration is missing.");
if (!(html->InFlags & IN_HTML)) {
tag = "<html>";
@@ -3763,6 +3792,7 @@ static void Html_test_section(DilloHtml *html, int new_idx, int IsCloseTag)
static void Html_parse_common_attrs(DilloHtml *html, char *tag, int tagsize)
{
const char *attrbuf;
+ char lang[3];
if (tagsize >= 8 && /* length of "<t id=i>" */
(attrbuf = a_Html_get_attr(html, tag, tagsize, "id"))) {
@@ -3788,24 +3818,25 @@ static void Html_parse_common_attrs(DilloHtml *html, char *tag, int tagsize)
html->styleEngine->setStyle (attrbuf);
}
- /* handle "xml:lang" and "lang" attributes */
- int hasXmlLang = 0;
+ /* handle "xml:lang" and "lang" attributes
+ * We use only the first two chars of the value to deal with
+ * extended language tags (see http://www.rfc-editor.org/rfc/bcp/bcp47.txt)
+ */
+ memset(lang, 0, sizeof(lang));
if (tagsize >= 14) {
/* length of "<t xml:lang=i>" */
attrbuf = a_Html_get_attr(html, tag, tagsize, "xml:lang");
- if (attrbuf) {
- html->styleEngine->setNonCssHint(PROPERTY_X_LANG, CSS_TYPE_STRING,
- attrbuf);
- hasXmlLang = 1;
- }
+ if (attrbuf)
+ strncpy(lang, attrbuf, 2);
}
- if (!hasXmlLang && tagsize >= 10) { /* 'xml:lang' prevails over 'lang' */
+ if (!lang[0] && tagsize >= 10) { /* 'xml:lang' prevails over 'lang' */
/* length of "<t lang=i>" */
attrbuf = a_Html_get_attr(html, tag, tagsize, "lang");
if (attrbuf)
- html->styleEngine->setNonCssHint(PROPERTY_X_LANG, CSS_TYPE_STRING,
- attrbuf);
+ strncpy(lang, attrbuf, 2);
}
+ if (lang[0])
+ html->styleEngine->setNonCssHint(PROPERTY_X_LANG, CSS_TYPE_STRING, lang);
}
/*
@@ -3829,7 +3860,7 @@ static void Html_check_html5_obsolete(DilloHtml *html, int ni)
}
for (int i = 0; i < 9; i++) {
if (indexes[i] == ni) {
- BUG_MSG("<%s> is obsolete in HTML5.\n", Tags[ni].name);
+ BUG_MSG("<%s> is obsolete in HTML5.", Tags[ni].name);
break;
}
}
@@ -3837,8 +3868,12 @@ static void Html_check_html5_obsolete(DilloHtml *html, int ni)
static void Html_display_block(DilloHtml *html)
{
- //HT2TB(html)->addParbreak (5, html->styleEngine->wordStyle ());
- Html_add_textblock(html, 0);
+ Html_add_textblock(html, !Html_will_textblock_be_out_of_flow (html), 0);
+}
+
+static void Html_display_inline_block(DilloHtml *html)
+{
+ Html_add_textblock(html, false, 0);
}
static void Html_display_listitem(DilloHtml *html)
@@ -3919,7 +3954,7 @@ static void Html_process_tag(DilloHtml *html, char *tag, int tagsize)
/* TODO: this is only raising a warning, take some defined action.
* Note: apache uses IMG inside PRE (we could use its "alt"). */
if ((html->InFlags & IN_PRE) && Html_tag_pre_excludes(ni))
- BUG_MSG("<pre> is not allowed to contain <%s>\n", Tags[ni].name);
+ BUG_MSG("<pre> is not allowed to contain <%s>.", Tags[ni].name);
/* Make sure these elements don't nest each other */
if (html->InFlags & (IN_BUTTON | IN_SELECT | IN_TEXTAREA))
@@ -3943,6 +3978,9 @@ static void Html_process_tag(DilloHtml *html, char *tag, int tagsize)
case DISPLAY_BLOCK:
Html_display_block(html);
break;
+ case DISPLAY_INLINE_BLOCK:
+ Html_display_inline_block(html);
+ break;
case DISPLAY_LIST_ITEM:
Html_display_listitem(html);
break;
@@ -3950,7 +3988,6 @@ static void Html_process_tag(DilloHtml *html, char *tag, int tagsize)
S_TOP(html)->display_none = true;
break;
case DISPLAY_INLINE:
- case DISPLAY_INLINE_BLOCK: // TODO: implement inline-block
default:
break;
}
@@ -4018,7 +4055,7 @@ static const char *Html_get_attr2(DilloHtml *html,
const char *attrname,
int tag_parsing_flags)
{
- int i, isocode, entsize, Found = 0, delimiter = 0, attr_pos = 0;
+ int i, entsize, Found = 0, delimiter = 0, attr_pos = 0;
Dstr *Buf = html->attr_data;
DilloHtmlTagParsingState state = SEEK_ATTR_START;
@@ -4077,16 +4114,12 @@ static const char *Html_get_attr2(DilloHtml *html,
state = FINISHED;
} else if (tag[i] == '&' &&
(tag_parsing_flags & HTML_ParseEntities)) {
- if ((isocode = Html_parse_entity(html, tag+i,
- tagsize-i, &entsize)) >= 0) {
- if (isocode >= 128) {
- char buf[4];
- int k, n = a_Utf8_encode(isocode, buf);
- for (k = 0; k < n; ++k)
- dStr_append_c(Buf, buf[k]);
- } else {
- dStr_append_c(Buf, (char) isocode);
- }
+ const char *entstr;
+ const bool_t is_attr = TRUE;
+
+ if ((entstr = Html_parse_entity(html, tag+i, tagsize-i, &entsize,
+ is_attr))) {
+ dStr_append(Buf, entstr);
i += entsize-1;
} else {
dStr_append_c(Buf, tag[i]);
@@ -4228,7 +4261,7 @@ static int Html_write_raw(DilloHtml *html, char *buf, int bufsize, int Eof)
buf_index = bufsize;
} else {
/* Tag: search end of tag (skipping over quoted strings) */
- html->CurrTagOfs = html->Start_Ofs + token_start;
+ html->CurrOfs = html->Start_Ofs + token_start;
while ( buf_index < bufsize ) {
buf_index++;
@@ -4249,7 +4282,7 @@ static int Html_write_raw(DilloHtml *html, char *buf, int bufsize, int Eof)
if (buf[offset] == ch || !buf[offset]) {
buf_index = offset;
} else {
- BUG_MSG("attribute lacks closing quote\n");
+ BUG_MSG("Attribute lacks closing quote.");
break;
}
}
@@ -4257,7 +4290,7 @@ static int Html_write_raw(DilloHtml *html, char *buf, int bufsize, int Eof)
/* unterminated tag detected */
p = dStrndup(buf+token_start+1,
strcspn(buf+token_start+1, " <\n\r\t"));
- BUG_MSG("<%s> element lacks its closing '>'\n", p);
+ BUG_MSG("<%s> lacks its closing '>'.", p);
dFree(p);
--buf_index;
break;
@@ -4272,6 +4305,8 @@ static int Html_write_raw(DilloHtml *html, char *buf, int bufsize, int Eof)
}
} else {
/* A Word: search for whitespace or tag open */
+ html->CurrOfs = html->Start_Ofs + token_start;
+
while (++buf_index < bufsize) {
buf_index += strcspn(buf + buf_index, " <\n\r\t\f\v");
if (buf[buf_index] == '<' && (ch = buf[buf_index + 1]) &&