etc. */
style_attrs.textAlign = TEXT_ALIGN_STRING;
if ((charattr = a_Html_get_attr(html, tag, tagsize, "char"))) {
if (charattr[0] == 0)
/* todo: ALIGN=" ", and even ALIGN="&32;" will reult in
* an empty string (don't know whether the latter is
* correct, has to be clarified with the specs), so
* that for empty strings, " " is assumed. */
style_attrs.textAlignChar = ' ';
else
style_attrs.textAlignChar = charattr[0];
} else
/* todo: Examine LANG attr of . */
style_attrs.textAlignChar = '.';
}
S_TOP(html)->style = Style::create (HT2LT(html), &style_attrs);
old_style->unref ();
}
}
/*
* Evaluates the VALIGN attribute (top|bottom|middle|baseline) and
* sets the style in style_attrs. Returns true when set.
*/
bool a_Html_tag_set_valign_attr(DilloHtml *html, const char *tag,
int tagsize, StyleAttrs *style_attrs)
{
const char *attr;
if ((attr = a_Html_get_attr(html, tag, tagsize, "valign"))) {
if (dStrcasecmp (attr, "top") == 0)
style_attrs->valign = VALIGN_TOP;
else if (dStrcasecmp (attr, "bottom") == 0)
style_attrs->valign = VALIGN_BOTTOM;
else if (dStrcasecmp (attr, "baseline") == 0)
style_attrs->valign = VALIGN_BASELINE;
else
style_attrs->valign = VALIGN_MIDDLE;
return true;
} else
return false;
}
/*
* Add a new DwPage into the current DwPage, for indentation.
* left and right are the horizontal indentation amounts, space is the
* vertical space around the block.
*/
static void Html_add_indented_widget(DilloHtml *html, Widget *textblock,
int left, int right, int space)
{
StyleAttrs style_attrs;
Style *style;
style_attrs = *S_TOP(html)->style;
style_attrs.margin.setVal (0);
style_attrs.borderWidth.setVal (0);
style_attrs.padding.setVal(0);
/* Activate this for debugging */
#if 0
style_attrs.borderWidth.setVal (1);
style_attrs.setBorderColor (
Color::createShaded (HT2LT(html), style_attrs.color->getColor());
style_attrs.setBorderStyle (BORDER_DASHED);
#endif
style_attrs.margin.left = left;
style_attrs.margin.right = right;
style = Style::create (HT2LT(html), &style_attrs);
DW2TB(html->dw)->addParbreak (space, style);
DW2TB(html->dw)->addWidget (textblock, style);
DW2TB(html->dw)->addParbreak (space, style);
S_TOP(html)->textblock = html->dw = textblock;
S_TOP(html)->hand_over_break = true;
style->unref ();
/* Handle it when the user clicks on a link */
html->connectSignals(textblock);
}
/*
* Create and add a new indented DwPage to the current DwPage
*/
static void Html_add_indented(DilloHtml *html, int left, int right, int space)
{
Textblock *textblock = new Textblock (prefs.limit_text_width);
Html_add_indented_widget (html, textblock, left, right, space);
}
/*
* Given a font_size, this will return the correct 'level'.
* (or the closest, if the exact level isn't found).
*/
static int Html_fontsize_to_level(int fontsize)
{
int i, level;
double normalized_size = fontsize / prefs.font_factor,
approximation = FontSizes[FontSizesNum-1] + 1;
for (i = level = 0; i < FontSizesNum; i++)
if (approximation >= fabs(normalized_size - FontSizes[i])) {
approximation = fabs(normalized_size - FontSizes[i]);
level = i;
} else {
break;
}
return level;
}
/*
* Given a level of a font, this will return the correct 'size'.
*/
static int Html_level_to_fontsize(int level)
{
level = MAX(0, level);
level = MIN(FontSizesNum - 1, level);
return (int)rint(FontSizes[level]*prefs.font_factor);
}
/*
* Create and initialize a new DilloHtml class
*/
DilloHtml::DilloHtml(BrowserWindow *p_bw, const DilloUrl *url,
const char *content_type)
{
/* Init event receiver */
linkReceiver.html = this;
/* Init main variables */
bw = p_bw;
page_url = a_Url_dup(url);
base_url = a_Url_dup(url);
dw = NULL;
a_Bw_add_doc(p_bw, this);
/* Init for-parsing variables */
Buf_Consumed = 0;
Start_Buf = NULL;
Start_Ofs = 0;
MSG("DilloHtml(): content type: %s\n", content_type);
this->content_type = dStrdup(content_type);
/* get charset */
a_Misc_parse_content_type(content_type, NULL, NULL, &charset);
stop_parser = false;
CurrTagOfs = 0;
OldTagOfs = 0;
OldTagLine = 1;
DocType = DT_NONE; /* assume Tag Soup 0.0! :-) */
DocTypeVersion = 0.0f;
stack = new misc::SimpleVector
* todo: what's the point between adding the parbreak before and
* after the push?
*/
static void Html_tag_open_p(DilloHtml *html, const char *tag, int tagsize)
{
if ((html->InFlags & IN_LI) && !html->WordAfterLI) {
/* ignore first parbreak after an empty \n");
offset = TAB_SIZE - html->pre_column % TAB_SIZE;
spaceCnt += offset;
html->pre_column += offset;
break;
default:
spaceCnt++;
html->pre_column++;
break;
}
html->PrevWasCR = (space[i] == '\r');
}
if (spaceCnt) {
spc = dStrnfill(spaceCnt, ' ');
DW2TB(html->dw)->addText (spc, S_TOP(html)->style);
dFree(spc);
}
} else {
if (SGML_SPCDEL) {
/* SGML_SPCDEL ignores white space inmediately after an open tag */
} else if (!html->PrevWasSPC) {
DW2TB(html->dw)->addSpace(S_TOP(html)->style);
html->PrevWasSPC = true;
}
if (parse_mode == DILLO_HTML_PARSE_MODE_STASH_AND_BODY)
html->StashSpace = (html->Stash->len > 0);
}
}
/*
* Handles putting the word into its proper place
* > STASH and VERBATIM --> html->Stash
* > otherwise it goes through addText()
*
* Entities are parsed (or not) according to parse_mode.
* 'word' is a '\0'-terminated string.
*/
static void Html_process_word(DilloHtml *html, const char *word, int size)
{
int i, j, start;
char *Pword, ch;
DilloHtmlParseMode parse_mode = S_TOP(html)->parse_mode;
if (parse_mode == DILLO_HTML_PARSE_MODE_STASH ||
parse_mode == DILLO_HTML_PARSE_MODE_STASH_AND_BODY) {
if (html->StashSpace) {
dStr_append_c(html->Stash, ' ');
html->StashSpace = false;
}
Pword = a_Html_parse_entities(html, word, size);
dStr_append(html->Stash, Pword);
dFree(Pword);
} else if (parse_mode == DILLO_HTML_PARSE_MODE_VERBATIM) {
/* word goes in untouched, it is not processed here. */
dStr_append_l(html->Stash, word, size);
}
if (parse_mode == DILLO_HTML_PARSE_MODE_STASH ||
parse_mode == DILLO_HTML_PARSE_MODE_VERBATIM) {
/* skip until the closing instructions */
} else if (parse_mode == DILLO_HTML_PARSE_MODE_PRE) {
/* all this overhead is to catch white-space entities */
Pword = a_Html_parse_entities(html, word, size);
for (start = i = 0; Pword[i]; start = i)
if (isspace(Pword[i])) {
while (Pword[++i] && isspace(Pword[i]));
Html_process_space(html, Pword + start, i - start);
} else {
while (Pword[++i] && !isspace(Pword[i]));
ch = Pword[i];
Pword[i] = 0;
DW2TB(html->dw)->addText(Pword, S_TOP(html)->style);
Pword[i] = ch;
html->pre_column += i - start;
html->PreFirstChar = false;
}
dFree(Pword);
} else {
if (!memchr(word,'&', size)) {
/* No entities */
DW2TB(html->dw)->addText(word, S_TOP(html)->style);
} else {
/* Collapse white-space entities inside the word (except ) */
Pword = a_Html_parse_entities(html, word, size);
for (i = 0; Pword[i]; ++i)
if (strchr("\t\f\n\r", Pword[i]))
for (j = i; (Pword[j] = Pword[j+1]); ++j);
DW2TB(html->dw)->addText(Pword, S_TOP(html)->style);
dFree(Pword);
}
}
html->PrevWasOpenTag = false;
html->PrevWasSPC = false;
if (html->InFlags & IN_LI)
html->WordAfterLI = true;
}
/*
* Does the tag in tagstr (e.g. "p") match the tag in the tag, tagsize
* structure, with the initial < skipped over (e.g. "P align=center>")?
*/
static bool Html_match_tag(const char *tagstr, char *tag, int tagsize)
{
int i;
for (i = 0; i < tagsize && tagstr[i] != '\0'; i++) {
if (tolower(tagstr[i]) != tolower(tag[i]))
return false;
}
/* The test for '/' is for xml compatibility: "empty/>" will be matched. */
if (i < tagsize && (isspace(tag[i]) || tag[i] == '>' || tag[i] == '/'))
return true;
return false;
}
/*
* This function is called after popping the stack, to
* handle nested DwPage widgets.
*/
static void Html_eventually_pop_dw(DilloHtml *html, bool hand_over_break)
{
if (html->dw != S_TOP(html)->textblock) {
if (hand_over_break)
DW2TB(html->dw)->handOverBreak (S_TOP(html)->style);
DW2TB(html->dw)->flush ();
html->dw = S_TOP(html)->textblock;
}
}
/*
* Push the tag (copying attributes from the top of the stack)
*/
static void Html_push_tag(DilloHtml *html, int tag_idx)
{
int n_items;
n_items = html->stack->size ();
html->stack->increase ();
/* We'll copy the former stack item and just change the tag and its index
* instead of copying all fields except for tag. --Jcid */
*html->stack->getRef(n_items) = *html->stack->getRef(n_items - 1);
html->stack->getRef(n_items)->tag_idx = tag_idx;
/* proper memory management, may be unref'd later */
(S_TOP(html)->style)->ref ();
if (S_TOP(html)->table_cell_style)
(S_TOP(html)->table_cell_style)->ref ();
html->dw = S_TOP(html)->textblock;
}
/*
* Push the tag (used to force en element with optional open into the stack)
* Note: now it's the same as Html_push_tag(), but things may change...
*/
static void Html_force_push_tag(DilloHtml *html, int tag_idx)
{
Html_push_tag(html, tag_idx);
}
/*
* Pop the top tag in the stack
*/
static void Html_real_pop_tag(DilloHtml *html)
{
bool hand_over_break;
(S_TOP(html)->style)->unref ();
if (S_TOP(html)->table_cell_style)
(S_TOP(html)->table_cell_style)->unref ();
hand_over_break = S_TOP(html)->hand_over_break;
html->stack->setSize (html->stack->size() - 1);
Html_eventually_pop_dw(html, hand_over_break);
}
/*
* Default close function for tags.
* (conditional cleanup of the stack)
* There are several ways of doing it. Considering the HTML 4.01 spec
* which defines optional close tags, and the will to deliver useful diagnose
* messages for bad-formed HTML, it'll go as follows:
* 1.- Search the stack for the first tag that requires a close tag.
* 2.- If it matches, clean all the optional-close tags in between.
* 3.- Cleanup the matching tag. (on error, give a warning message)
*
* If 'w3c_mode' is NOT enabled:
* 1.- Search the stack for a matching tag based on tag level.
* 2.- If it exists, clean all the tags in between.
* 3.- Cleanup the matching tag. (on error, give a warning message)
*/
static void Html_tag_cleanup_at_close(DilloHtml *html, int TagIdx)
{
int w3c_mode = !prefs.w3c_plus_heuristics;
int stack_idx, cmp = 1;
int new_idx = TagIdx;
if (html->CloseOneTag) {
Html_real_pop_tag(html);
html->CloseOneTag = false;
return;
}
/* Look for the candidate tag to close */
stack_idx = html->stack->size() - 1;
while (stack_idx &&
(cmp = (new_idx != html->stack->getRef(stack_idx)->tag_idx)) &&
((w3c_mode &&
Tags[html->stack->getRef(stack_idx)->tag_idx].EndTag == 'O') ||
(!w3c_mode &&
(Tags[html->stack->getRef(stack_idx)->tag_idx].EndTag == 'O') ||
Tags[html->stack->getRef(stack_idx)->tag_idx].TagLevel <
Tags[new_idx].TagLevel))) {
--stack_idx;
}
/* clean, up to the matching tag */
if (cmp == 0 && stack_idx > 0) {
/* There's a valid matching tag in the stack */
while (html->stack->size() > stack_idx) {
int toptag_idx = S_TOP(html)->tag_idx;
/* Warn when we decide to close an open tag (for !w3c_mode) */
if (html->stack->size() > stack_idx + 1 &&
Tags[toptag_idx].EndTag != 'O')
BUG_MSG(" - forcing close of open tag: <%s>\n",
Tags[toptag_idx].name);
/* Close this and only this tag */
html->CloseOneTag = true;
Tags[toptag_idx].close (html, toptag_idx);
}
} else {
if (stack_idx == 0) {
BUG_MSG("unexpected closing tag: %s>.\n", Tags[new_idx].name);
} else {
BUG_MSG("unexpected closing tag: %s>. -- expected %s>\n",
Tags[new_idx].name,
Tags[html->stack->getRef(stack_idx)->tag_idx].name);
}
}
}
/*
* Cleanup (conditional), and Pop the tag (if it matches)
*/
void a_Html_pop_tag(DilloHtml *html, int TagIdx)
{
Html_tag_cleanup_at_close(html, TagIdx);
}
/*
* Some parsing routines.
*/
/*
* Used by a_Html_parse_length
*/
static Length Html_parse_length_or_multi_length (const char *attr,
char **endptr)
{
Length l;
double v;
char *end;
v = strtod (attr, &end);
switch (*end) {
case '%':
end++;
l = createPerLength (v / 100);
break;
case '*':
end++;
l = createRelLength (v);
break;
/*
The "px" suffix seems not allowed by HTML4.01 SPEC.
case 'p':
if (end[1] == 'x')
end += 2;
*/
default:
l = createAbsLength ((int)v);
break;
}
if (endptr)
*endptr = end;
return l;
}
/*
* Returns a length or a percentage, or UNDEF_LENGTH in case
* of an error, or if attr is NULL.
*/
Length a_Html_parse_length (DilloHtml *html, const char *attr)
{
Length l;
char *end;
l = Html_parse_length_or_multi_length (attr, &end);
if (isRelLength (l))
/* not allowed as &Length; */
return LENGTH_AUTO;
else {
/* allow only whitespaces */
if (*end && !isspace (*end)) {
BUG_MSG("Garbage after length: %s\n", attr);
return LENGTH_AUTO;
}
}
_MSG("a_Html_parse_length: \"%s\" %d\n", attr, absLengthVal(l));
return l;
}
/*
* Parse a color attribute.
* Return value: parsed color, or default_color (+ error msg) on error.
*/
int32_t a_Html_color_parse(DilloHtml *html,
const char *subtag, int32_t default_color)
{
int err = 1;
int32_t color = a_Color_parse(subtag, default_color, &err);
if (err) {
BUG_MSG("color is not in \"#RRGGBB\" format\n");
}
return color;
}
/*
* Check that 'val' is composed of characters inside [A-Za-z0-9:_.-]
* Note: ID can't have entities, but this check is enough (no '&').
* Return value: 1 if OK, 0 otherwise.
*/
static int
Html_check_name_val(DilloHtml *html, const char *val, const char *attrname)
{
int i;
for (i = 0; val[i]; ++i)
if (!(isalnum(val[i]) || strchr(":_.-", val[i])))
break;
if (val[i] || !isalpha(val[0]))
BUG_MSG("'%s' value is not of the form "
"[A-Za-z][A-Za-z0-9:_.-]*\n", attrname);
return !(val[i]);
}
/*
* Handle DOCTYPE declaration
*
* Follows the convention that HTML 4.01
* doctypes which include a full w3c DTD url are treated as
* standards-compliant, but 4.01 without the url and HTML 4.0 and
* earlier are not. XHTML doctypes are always standards-compliant
* whether or not an url is present.
*
* Note: I'm not sure about this convention. The W3C validator
* recognizes the "HTML Level" with or without the URL. The convention
* comes from mozilla (see URLs below), but Dillo doesn't have the same
* rendering modes, so it may be better to chose another behaviour. --Jcid
*
* http://www.mozilla.org/docs/web-developer/quirks/doctypes.html
* http://lists.auriga.wearlab.de/pipermail/dillo-dev/2004-October/002300.html
*
* This is not a full DOCTYPE parser, just enough for what Dillo uses.
*/
static void Html_parse_doctype(DilloHtml *html, const char *tag, int tagsize)
{
static const char HTML_sig [] = "DocType = DT_HTML;
html->DocTypeVersion = 4.01f;
} else if (!strncmp(p, XHTML1, strlen(XHTML1)) &&
dStristr(p + strlen(XHTML1), XHTML1_url)) {
html->DocType = DT_XHTML;
html->DocTypeVersion = 1.0f;
} else if (!strncmp(p, XHTML11, strlen(XHTML11)) &&
dStristr(p + strlen(XHTML11), XHTML11_url)) {
html->DocType = DT_XHTML;
html->DocTypeVersion = 1.1f;
} else if (!strncmp(p, HTML40, strlen(HTML40))) {
html->DocType = DT_HTML;
html->DocTypeVersion = 4.0f;
} else if (!strncmp(p, HTML32, strlen(HTML32))) {
html->DocType = DT_HTML;
html->DocTypeVersion = 3.2f;
} else if (!strncmp(p, HTML20, strlen(HTML20))) {
html->DocType = DT_HTML;
html->DocTypeVersion = 2.0f;
}
}
dFree(ntag);
}
/*
* Handle open HTML element
*/
static void Html_tag_open_html(DilloHtml *html, const char *tag, int tagsize)
{
if (!(html->InFlags & IN_HTML))
html->InFlags |= IN_HTML;
++html->Num_HTML;
if (html->Num_HTML > 1) {
BUG_MSG("HTML element was already open\n");
}
}
/*
* Handle close HTML element
*/
static void Html_tag_close_html(DilloHtml *html, int TagIdx)
{
/* todo: may add some checks here */
if (html->Num_HTML == 1) {
/* beware of pages with multiple HTML close tags... :-P */
html->InFlags &= ~IN_HTML;
}
a_Html_pop_tag(html, TagIdx);
}
/*
* Handle open HEAD element
*/
static void Html_tag_open_head(DilloHtml *html, const char *tag, int tagsize)
{
if (html->InFlags & IN_BODY || html->Num_BODY > 0) {
BUG_MSG("HEAD element must go before the BODY section\n");
html->ReqTagClose = true;
return;
}
if (!(html->InFlags & IN_HEAD))
html->InFlags |= IN_HEAD;
++html->Num_HEAD;
if (html->Num_HEAD > 1) {
BUG_MSG("HEAD element was already open\n");
}
}
/*
* Handle close HEAD element
* Note: as a side effect of Html_test_section() this function is called
* twice when the head element is closed implicitly.
*/
static void Html_tag_close_head(DilloHtml *html, int TagIdx)
{
if (html->InFlags & IN_HEAD) {
if (html->Num_TITLE == 0)
BUG_MSG("HEAD section lacks the TITLE element\n");
html->InFlags &= ~IN_HEAD;
}
a_Html_pop_tag(html, TagIdx);
}
/*
* Handle open TITLE
* calls stash init, where the title string will be stored
*/
static void Html_tag_open_title(DilloHtml *html, const char *tag, int tagsize)
{
++html->Num_TITLE;
a_Html_stash_init(html);
}
/*
* Handle close TITLE
* set page-title in the browser window and in the history.
*/
static void Html_tag_close_title(DilloHtml *html, int TagIdx)
{
if (html->InFlags & IN_HEAD) {
/* title is only valid inside HEAD */
a_UIcmd_set_page_title(html->bw, html->Stash->str);
a_History_set_title(NAV_TOP_UIDX(html->bw),html->Stash->str);
} else {
BUG_MSG("the TITLE element must be inside the HEAD section\n");
}
a_Html_pop_tag(html, TagIdx);
}
/*
* Handle open SCRIPT
* initializes stash, where the embedded code will be stored.
* MODE_VERBATIM is used because MODE_STASH catches entities.
*/
static void Html_tag_open_script(DilloHtml *html, const char *tag, int tagsize)
{
a_Html_stash_init(html);
S_TOP(html)->parse_mode = DILLO_HTML_PARSE_MODE_VERBATIM;
}
/*
* Handle close SCRIPT
*/
static void Html_tag_close_script(DilloHtml *html, int TagIdx)
{
/* eventually the stash will be sent to an interpreter for parsing */
a_Html_pop_tag(html, TagIdx);
}
/*
* Handle open STYLE
* store the contents to the stash where (in the future) the style
* sheet interpreter can get it.
*/
static void Html_tag_open_style(DilloHtml *html, const char *tag, int tagsize)
{
a_Html_stash_init(html);
S_TOP(html)->parse_mode = DILLO_HTML_PARSE_MODE_VERBATIM;
}
/*
* Handle close STYLE
*/
static void Html_tag_close_style(DilloHtml *html, int TagIdx)
{
/* eventually the stash will be sent to an interpreter for parsing */
a_Html_pop_tag(html, TagIdx);
}
/*
*
*/
static void Html_tag_open_body(DilloHtml *html, const char *tag, int tagsize)
{
const char *attrbuf;
Textblock *textblock;
StyleAttrs style_attrs;
Style *style;
int32_t color;
if (!(html->InFlags & IN_BODY))
html->InFlags |= IN_BODY;
++html->Num_BODY;
if (html->Num_BODY > 1) {
BUG_MSG("BODY element was already open\n");
return;
}
if (html->InFlags & IN_HEAD) {
/* if we're here, it's bad XHTML, no need to recover */
BUG_MSG("unclosed HEAD element\n");
}
textblock = DW2TB(html->dw);
if (!prefs.force_my_colors) {
if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "bgcolor"))) {
color = a_Html_color_parse(html, attrbuf, prefs.bg_color);
if (color == 0xffffff && !prefs.allow_white_bg)
color = prefs.bg_color;
style_attrs = *html->dw->getStyle ();
style_attrs.backgroundColor = Color::createShaded(HT2LT(html), color);
style = Style::create (HT2LT(html), &style_attrs);
html->dw->setStyle (style);
style->unref ();
HTML_SET_TOP_ATTR (html, backgroundColor,
Color::createShaded (HT2LT(html), color));
}
if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "text"))) {
color = a_Html_color_parse(html, attrbuf, prefs.text_color);
HTML_SET_TOP_ATTR (html, color,
Color::createSimple (HT2LT(html),color));
}
if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "link")))
html->link_color = a_Html_color_parse(html,attrbuf,prefs.link_color);
if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "vlink")))
html->visited_color = a_Html_color_parse(html, attrbuf,
prefs.visited_color);
if (prefs.contrast_visited_color) {
/* get a color that has a "safe distance" from text, link and bg */
html->visited_color =
a_Color_vc(html->visited_color,
S_TOP(html)->style->color->getColor(),
html->link_color,
S_TOP(html)->style->backgroundColor->getColor());
}
}
S_TOP(html)->parse_mode = DILLO_HTML_PARSE_MODE_BODY;
}
/*
* BODY
*/
static void Html_tag_close_body(DilloHtml *html, int TagIdx)
{
if (html->Num_BODY == 1) {
/* some tag soup pages use multiple BODY tags... */
html->InFlags &= ~IN_BODY;
}
a_Html_pop_tag(html, TagIdx);
}
/*
*