#include "bw.h" /* for BrowserWindow */
#include "msg.h"
#include "binaryconst.h"
#include "colors.h"
#include "html_charrefs.h"
#include "utf8.hh"
#include "misc.h"
#include "uicmd.hh"
#include "history.h"
#include "menu.hh"
#include "prefs.h"
#include "capi.h"
#include "html.hh"
#include "html_common.hh"
#include "form.hh"
#include "table.hh"
#include "dw/textblock.hh"
#include "dw/bullet.hh"
#include "dw/listitem.hh"
#include "dw/image.hh"
#include "dw/ruler.hh"
/*-----------------------------------------------------------------------------
* Defines
*---------------------------------------------------------------------------*/
/* Define to 1 to ignore white space immediately after an open tag,
* and immediately before a close tag. */
#define SGML_SPCDEL 0
#define TAB_SIZE 8
/*-----------------------------------------------------------------------------
* Name spaces
*---------------------------------------------------------------------------*/
using namespace lout;
using namespace dw;
using namespace dw::core;
using namespace dw::core::ui;
using namespace dw::core::style;
/*-----------------------------------------------------------------------------
* Typedefs
*---------------------------------------------------------------------------*/
class DilloHtml;
typedef void (*TagOpenFunct) (DilloHtml *html, const char *tag, int tagsize);
typedef void (*TagCloseFunct) (DilloHtml *html);
typedef enum {
SEEK_ATTR_START,
MATCH_ATTR_NAME,
SEEK_TOKEN_START,
SEEK_VALUE_START,
SKIP_VALUE,
GET_VALUE,
FINISHED
} DilloHtmlTagParsingState;
typedef enum {
HTML_LeftTrim = 1 << 0,
HTML_RightTrim = 1 << 1,
HTML_ParseEntities = 1 << 2
} DilloHtmlTagParsingFlags;
/*
* Exported function with C linkage.
*/
extern "C" {
void *a_Html_text(const char *type, void *P, CA_Callback_t *Call,void **Data);
}
/*-----------------------------------------------------------------------------
* Forward declarations
*---------------------------------------------------------------------------*/
static int Html_write_raw(DilloHtml *html, char *buf, int bufsize, int Eof);
static bool Html_load_image(BrowserWindow *bw, DilloUrl *url,
const DilloUrl *requester, DilloImage *image);
static void Html_callback(int Op, CacheClient_t *Client);
static void Html_tag_cleanup_at_close(DilloHtml *html, int TagIdx);
/*-----------------------------------------------------------------------------
* Local Data
*---------------------------------------------------------------------------*/
/* Parsing table structure */
typedef struct {
const char *name; /* element name */
unsigned char Flags; /* flags (explained near the table data) */
char EndTag; /* Is it Required, Optional or Forbidden */
uchar_t TagLevel; /* Used to heuristically parse bad HTML */
TagOpenFunct open; /* Open function */
TagOpenFunct content; /* Content function */
TagCloseFunct close; /* Close function */
} TagInfo;
extern const TagInfo Tags[];
/*-----------------------------------------------------------------------------
*-----------------------------------------------------------------------------
* Main Code
*-----------------------------------------------------------------------------
*---------------------------------------------------------------------------*/
/*
* Collect HTML error strings.
*/
void DilloHtml::bugMessage(const char *format, ... )
{
va_list argp;
if (bw->num_page_bugs)
dStr_append_c(bw->page_bugs, '\n');
dStr_sprintfa(bw->page_bugs,
"HTML warning: line %d, ",
getCurrLineNumber());
va_start(argp, format);
dStr_vsprintfa(bw->page_bugs, format, argp);
va_end(argp);
a_UIcmd_set_bug_prog(bw, ++bw->num_page_bugs);
}
/*
* Wrapper for a_Url_new that adds an error detection message.
* If use_base_url is TRUE, it uses base_url. Otherwise it uses html->base_url.
*/
DilloUrl *a_Html_url_new(DilloHtml *html,
const char *url_str, const char *base_url,
int use_base_url)
{
DilloUrl *url;
int n_ic, n_ic_spc;
url = a_Url_new(url_str,
(use_base_url) ? base_url : URL_STR_(html->base_url));
if ((n_ic = URL_ILLEGAL_CHARS(url)) != 0) {
const char *suffix = (n_ic) > 1 ? "s" : "";
n_ic_spc = URL_ILLEGAL_CHARS_SPC(url);
if (n_ic == n_ic_spc) {
BUG_MSG("URL has %d illegal space%s ('%s').", n_ic, suffix, url_str);
} else if (n_ic_spc == 0) {
BUG_MSG("URL has %d illegal byte%s in {00-1F, 7F-FF} range ('%s').",
n_ic, suffix, url_str);
} else {
BUG_MSG("URL has %d illegal byte%s: "
"%d space%s and %d in {00-1F, 7F-FF} range ('%s').",
n_ic, suffix,
n_ic_spc, n_ic_spc > 1 ? "s" : "", n_ic-n_ic_spc, url_str);
}
}
return url;
}
/*
* Set callback function and callback data for the "html/text" MIME type.
*/
void *a_Html_text(const char *Type, void *P, CA_Callback_t *Call, void **Data)
{
DilloWeb *web = (DilloWeb*)P;
DilloHtml *html = new DilloHtml(web->bw, web->url, Type);
*Data = (void*)html;
*Call = (CA_Callback_t)Html_callback;
return (void*)html->dw;
}
static void Html_free(void *data)
{
delete ((DilloHtml*)data);
}
/*
* Used by the "Load images" page menuitem.
*/
void a_Html_load_images(void *v_html, DilloUrl *pattern)
{
DilloHtml *html = (DilloHtml*)v_html;
html->loadImages(pattern);
}
/*
* Search for form
*/
static bool Html_contains_form(DilloHtml *html, void *v_form)
{
for (int i = 0; i < html->forms->size(); i++) {
if (html->forms->get(i) == v_form) {
return true;
}
}
return false;
}
/*
* Used by the "Submit form" form menuitem.
*/
void a_Html_form_submit(void *v_html, void *v_form)
{
DilloHtml *html = (DilloHtml*)v_html;
if (Html_contains_form(html, v_form)) {
/* it's still valid */
a_Html_form_submit2(v_form);
}
}
/*
* Used by the "Reset form" form menuitem.
*/
void a_Html_form_reset(void *v_html, void *v_form)
{
DilloHtml *html = (DilloHtml*)v_html;
if (Html_contains_form(html, v_form)) {
/* it's still valid */
a_Html_form_reset2(v_form);
}
}
/*
* Used by the "Show/Hide hiddens" form menuitem.
*/
void a_Html_form_display_hiddens(void *v_html, void *v_form, bool_t display)
{
DilloHtml *html = (DilloHtml*)v_html;
if (Html_contains_form(html, v_form)) {
/* it's still valid */
a_Html_form_display_hiddens2(v_form, (display != 0));
}
}
/*
* Set the URL data for image maps.
*/
static void Html_set_link_coordinates(DilloHtml *html, int link, int x, int y)
{
char data[64];
if (x != -1) {
snprintf(data, 64, "?%d,%d", x, y);
a_Url_set_ismap_coords(html->links->get(link), data);
}
}
/*
* Create a new link, set it as the url's parent
* and return the index.
*/
static int Html_set_new_link(DilloHtml *html, DilloUrl **url)
{
int nl = html->links->size();
html->links->increase();
html->links->set(nl, (*url) ? *url : NULL);
return nl;
}
/*
* Evaluates the ALIGN attribute (left|center|right|justify) and
* sets the style at the top of the stack.
*/
void a_Html_tag_set_align_attr(DilloHtml *html, const char *tag, int tagsize)
{
const char *align;
if ((align = a_Html_get_attr(html, tag, tagsize, "align"))) {
TextAlignType textAlignType = TEXT_ALIGN_LEFT;
if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
BUG_MSG("The align attribute is obsolete in HTML5.");
if (dStrAsciiCasecmp (align, "left") == 0)
textAlignType = TEXT_ALIGN_LEFT;
else if (dStrAsciiCasecmp (align, "right") == 0)
textAlignType = TEXT_ALIGN_RIGHT;
else if (dStrAsciiCasecmp (align, "center") == 0)
textAlignType = TEXT_ALIGN_CENTER;
else if (dStrAsciiCasecmp (align, "justify") == 0)
textAlignType = TEXT_ALIGN_JUSTIFY;
#if 0
else if (dStrAsciiCasecmp (align, "char") == 0) {
/* TODO: Actually not supported for etc. */
v.textAlign = TEXT_ALIGN_STRING;
if ((charattr = a_Html_get_attr(html, tag, tagsize, "char"))) {
if (charattr[0] == 0)
/* TODO: ALIGN=" ", and even ALIGN="&32;" will reult in
* an empty string (don't know whether the latter is
* correct, has to be clarified with the specs), so
* that for empty strings, " " is assumed. */
style_attrs.textAlignChar = ' ';
else
style_attrs.textAlignChar = charattr[0];
} else
/* TODO: Examine LANG attr of . */
style_attrs.textAlignChar = '.';
}
#endif
html->styleEngine->setNonCssHint(CSS_PROPERTY_TEXT_ALIGN, CSS_TYPE_ENUM,
textAlignType);
}
}
/*
* Evaluates the VALIGN attribute (top|bottom|middle|baseline) and
* sets the style in style_attrs. Returns true when set.
*/
bool a_Html_tag_set_valign_attr(DilloHtml *html, const char *tag, int tagsize)
{
const char *attr;
VAlignType valign;
if ((attr = a_Html_get_attr(html, tag, tagsize, "valign"))) {
if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
BUG_MSG("The valign attribute is obsolete in HTML5.");
if (dStrAsciiCasecmp (attr, "top") == 0)
valign = VALIGN_TOP;
else if (dStrAsciiCasecmp (attr, "bottom") == 0)
valign = VALIGN_BOTTOM;
else if (dStrAsciiCasecmp (attr, "baseline") == 0)
valign = VALIGN_BASELINE;
else
valign = VALIGN_MIDDLE;
html->styleEngine->setNonCssHint (CSS_PROPERTY_VERTICAL_ALIGN,
CSS_TYPE_ENUM, valign);
return true;
} else
return false;
}
/*
* Create and add a new Textblock to the current Textblock. Typically
* only one of addBreaks and addBreakOpt is true.
*/
static void Html_add_textblock(DilloHtml *html, bool addBreaks, int breakSpace,
bool addBreakOpt)
{
Textblock *textblock = new Textblock (prefs.limit_text_width);
if (addBreaks)
HT2TB(html)->addParbreak (breakSpace, html->wordStyle ());
HT2TB(html)->addWidget (textblock, html->style ()); /* Works also for floats
etc. */
if (addBreakOpt)
HT2TB(html)->addBreakOption (html->style (), false);
if (addBreaks)
HT2TB(html)->addParbreak (breakSpace, html->wordStyle ());
S_TOP(html)->textblock = html->dw = textblock;
if (addBreaks)
S_TOP(html)->hand_over_break = true;
}
static bool Html_must_add_breaks(DilloHtml *html)
{
return HT2TB(html)->mustAddBreaks (html->style ());
}
/*
* Create and initialize a new DilloHtml class
*/
DilloHtml::DilloHtml(BrowserWindow *p_bw, const DilloUrl *url,
const char *content_type)
{
/* Init main variables */
bw = p_bw;
page_url = a_Url_dup(url);
base_url = a_Url_dup(url);
dw = NULL;
/* Init event receiver */
linkReceiver.html = this;
HT2LT(this)->connectLink (&linkReceiver);
a_Bw_add_doc(p_bw, this);
/* Init for-parsing variables */
Start_Buf = NULL;
Start_Ofs = 0;
_MSG("DilloHtml(): content type: %s\n", content_type);
this->content_type = dStrdup(content_type);
/* get charset */
a_Misc_parse_content_type(content_type, NULL, NULL, &charset);
stop_parser = false;
CurrOfs = OldOfs = 0;
OldLine = 1;
DocType = DT_NONE; /* assume Tag Soup 0.0! :-) */
DocTypeVersion = 0.0f;
styleEngine = new StyleEngine (HT2LT (this), page_url, base_url);
cssUrls = new misc::SimpleVector (1);
stack = new misc::SimpleVector (16);
stack->increase();
stack->getRef(0)->parse_mode = DILLO_HTML_PARSE_MODE_INIT;
stack->getRef(0)->table_mode = DILLO_HTML_TABLE_MODE_NONE;
stack->getRef(0)->table_border_mode = DILLO_HTML_TABLE_BORDER_SEPARATE;
stack->getRef(0)->cell_text_align_set = false;
stack->getRef(0)->display_none = false;
stack->getRef(0)->list_type = HTML_LIST_NONE;
stack->getRef(0)->list_number = 0;
stack->getRef(0)->tag_idx = -1; /* MUST not be used */
stack->getRef(0)->textblock = NULL;
stack->getRef(0)->table = NULL;
stack->getRef(0)->ref_list_item = NULL;
stack->getRef(0)->hand_over_break = false;
InFlags = IN_NONE;
Stash = dStr_new("");
StashSpace = false;
pre_column = 0;
PreFirstChar = false;
PrevWasCR = false;
InVisitedLink = false;
ReqTagClose = false;
TagSoup = true;
loadCssFromStash = false;
Num_HTML = Num_HEAD = Num_BODY = Num_TITLE = 0;
attr_data = dStr_sized_new(1024);
non_css_link_color = -1;
non_css_visited_color = -1;
visited_color = -1;
/* Init page-handling variables */
forms = new misc::SimpleVector (1);
inputs_outside_form = new misc::SimpleVector (1);
links = new misc::SimpleVector (64);
images = new misc::SimpleVector (16);
/* Initialize the main widget */
initDw();
/* Hook destructor to the dw delete call */
dw->setDeleteCallback(Html_free, this);
}
/*
* Miscellaneous initializations for Dw
*/
void DilloHtml::initDw()
{
dReturn_if_fail (dw == NULL);
/* Create the main widget */
dw = stack->getRef(0)->textblock = new Textblock (prefs.limit_text_width);
bw->num_page_bugs = 0;
dStr_truncate(bw->page_bugs, 0);
}
/*
* Free memory used by the DilloHtml class.
*/
DilloHtml::~DilloHtml()
{
_MSG("::~DilloHtml(this=%p)\n", this);
freeParseData();
a_Bw_remove_doc(bw, this);
a_Url_free(page_url);
a_Url_free(base_url);
for (int i = 0; i < cssUrls->size(); i++)
a_Url_free(cssUrls->get(i));
delete (cssUrls);
for (int i = 0; i < forms->size(); i++)
a_Html_form_delete (forms->get(i));
delete(forms);
for (int i = 0; i < inputs_outside_form->size(); i++)
a_Html_input_delete(inputs_outside_form->get(i));
delete(inputs_outside_form);
for (int i = 0; i < links->size(); i++)
a_Url_free(links->get(i));
delete (links);
for (int i = 0; i < images->size(); i++) {
DilloHtmlImage *img = images->get(i);
a_Url_free(img->url);
a_Image_unref(img->image);
dFree(img);
}
delete (images);
delete styleEngine;
}
/*
* Process the newly arrived html and put it into the page structure.
* (This function is called by Html_callback whenever there's new data)
*/
void DilloHtml::write(char *Buf, int BufSize, int Eof)
{
int token_start;
char *buf = Buf + Start_Ofs;
int bufsize = BufSize - Start_Ofs;
_MSG("DilloHtml::write BufSize=%d Start_Ofs=%d\n", BufSize, Start_Ofs);
#if 0
char *aux = dStrndup(Buf, BufSize);
MSG(" {%s}\n", aux);
dFree(aux);
#endif
/* Update Start_Buf. It may be used after the parser is stopped */
Start_Buf = Buf;
dReturn_if (dw == NULL);
dReturn_if (stop_parser == true);
token_start = Html_write_raw(this, buf, bufsize, Eof);
Start_Ofs += token_start;
}
/*
* Return the line number of the tag/word being processed by the parser.
* Also update the offsets.
*/
int DilloHtml::getCurrLineNumber()
{
int i, ofs, line;
const char *p = Start_Buf;
dReturn_val_if_fail(p != NULL, -1);
/* Disable line counting for META hack. Buffers differ. */
dReturn_val_if((InFlags & IN_META_HACK), -1);
ofs = CurrOfs;
line = OldLine;
for (i = OldOfs; i < ofs; ++i)
if (p[i] == '\n' || (p[i] == '\r' && p[i+1] != '\n'))
++line;
OldOfs = CurrOfs;
OldLine = line;
return line;
}
/*
* Free parsing data.
*/
void DilloHtml::freeParseData()
{
delete(stack);
dStr_free(Stash, TRUE);
dStr_free(attr_data, TRUE);
dFree(content_type);
dFree(charset);
}
/*
* Finish parsing a HTML page. Close the parser and close the client.
* The class is not deleted here, it remains until the widget is destroyed.
*/
void DilloHtml::finishParsing(int ClientKey)
{
int si;
dReturn_if (stop_parser == true);
/* flag we've already parsed up to the last byte */
InFlags |= IN_EOF;
/* force the close of elements left open (TODO: not for XHTML) */
while ((si = stack->size() - 1)) {
if (stack->getRef(si)->tag_idx != -1) {
Html_tag_cleanup_at_close(this, stack->getRef(si)->tag_idx);
}
}
/* Nothing left to do with the parser. Clear all flags, except EOF. */
InFlags = IN_EOF;
/* Remove this client from our active list */
a_Bw_close_client(bw, ClientKey);
}
/*
* Allocate and insert form information.
*/
int DilloHtml::formNew(DilloHtmlMethod method, const DilloUrl *action,
DilloHtmlEnc enc, const char *charset)
{
// avoid data loss on repush after CSS stylesheets have been loaded
bool enabled = bw->NumPendingStyleSheets == 0;
DilloHtmlForm *form = a_Html_form_new (this, method, action,
enc, charset, enabled);
int nf = forms->size ();
forms->increase ();
forms->set (nf, form);
_MSG("Html formNew: action=%s nform=%d\n", action, nf);
return forms->size();
}
/*
* Get the current form.
*/
DilloHtmlForm *DilloHtml::getCurrentForm ()
{
return forms->get (forms->size() - 1);
}
bool_t DilloHtml::unloadedImages()
{
for (int i = 0; i < images->size(); i++) {
if (images->get(i)->image != NULL) {
return TRUE;
}
}
return FALSE;
}
/*
* Load images if they were disabled.
*/
void DilloHtml::loadImages (const DilloUrl *pattern)
{
dReturn_if (a_Bw_expecting(bw));
/* If the user asked for a specific image, the user (NULL) is the requester,
* and the domain mechanism will always permit the request. But if the user
* just asked for all images (clicking "Load images"), use the page URL as
* the requester so that the domain mechanism can act as a filter.
* If the possible patterns become more complex, it might be good to have
* the caller supply the requester instead.
*/
const DilloUrl *requester = pattern ? NULL : this->page_url;
for (int i = 0; i < images->size(); i++) {
DilloHtmlImage *hi = images->get(i);
if (hi->image) {
assert(hi->url);
if ((!pattern) || (!a_Url_cmp(hi->url, pattern))) {
if (Html_load_image(bw, hi->url, requester, hi->image)) {
a_Image_unref (hi->image);
hi->image = NULL; // web owns it now
}
}
}
}
}
/*
* Save URL in a vector (may be loaded later).
*/
void DilloHtml::addCssUrl(const DilloUrl *url)
{
int nu = cssUrls->size();
cssUrls->increase();
cssUrls->set(nu, a_Url_dup(url));
}
bool DilloHtml::HtmlLinkReceiver::enter (Widget *widget, int link, int img,
int x, int y)
{
BrowserWindow *bw = html->bw;
_MSG(" ** ");
if (link == -1) {
_MSG(" Link LEAVE notify...\n");
a_UIcmd_set_msg(bw, "");
} else {
_MSG(" Link ENTER notify...\n");
Html_set_link_coordinates(html, link, x, y);
a_UIcmd_set_msg(bw, "%s", URL_STR(html->links->get(link)));
}
return true;
}
/*
* Handle the "press" signal.
*/
bool DilloHtml::HtmlLinkReceiver::press (Widget *widget, int link, int img,
int x, int y, EventButton *event)
{
BrowserWindow *bw = html->bw;
int ret = false;
DilloUrl *linkurl = NULL;
_MSG("pressed button %d\n", event->button);
if (event->button == 3) {
// popup menus
if (img != -1) {
// image menu
if (link != -1)
linkurl = html->links->get(link);
const bool_t loaded_img = (html->images->get(img)->image == NULL);
a_UIcmd_image_popup(bw, html->images->get(img)->url, loaded_img,
html->page_url, linkurl);
ret = true;
} else {
if (link == -1) {
a_UIcmd_page_popup(bw, bw->num_page_bugs != 0, html->cssUrls);
ret = true;
} else {
a_UIcmd_link_popup(bw, html->links->get(link));
ret = true;
}
}
}
return ret;
}
/*
* Handle the "click" signal.
*/
bool DilloHtml::HtmlLinkReceiver::click (Widget *widget, int link, int img,
int x, int y, EventButton *event)
{
BrowserWindow *bw = html->bw;
if ((img != -1) && (html->images->get(img)->image)) {
// clicked an image that has not already been loaded
if (event->button == 1){
// load all instances of this image
DilloUrl *pattern = html->images->get(img)->url;
html->loadImages(pattern);
return true;
}
}
if (link != -1) {
DilloUrl *url = html->links->get(link);
_MSG("clicked on URL %d: %s\n", link, a_Url_str (url));
Html_set_link_coordinates(html, link, x, y);
if (event->button == 1) {
a_UIcmd_open_url(bw, url);
} else if (event->button == 2) {
if (prefs.middle_click_opens_new_tab) {
int focus = prefs.focus_new_tab ? 1 : 0;
if (event->state == SHIFT_MASK) focus = !focus;
a_UIcmd_open_url_nt(bw, url, focus);
} else
a_UIcmd_open_url_nw(bw, url);
} else {
return false;
}
/* Change the link color to "visited" as visual feedback */
for (Widget *w = widget; w; w = w->getParent()) {
_MSG(" ->%s\n", w->getClassName());
if (w->instanceOf(dw::Textblock::CLASS_ID)) {
((Textblock*)w)->changeLinkColor (link, html->visited_color);
break;
}
}
}
return true;
}
/*
* Initialize the stash buffer
*/
void a_Html_stash_init(DilloHtml *html)
{
S_TOP(html)->parse_mode = DILLO_HTML_PARSE_MODE_STASH;
html->StashSpace = false;
dStr_truncate(html->Stash, 0);
}
/*
* This is M$ non-standard "smart quotes" (w1252). Now even deprecated by them!
*
* SGML for HTML4.01 defines c >= 128 and c <= 159 as UNUSED.
* TODO: Probably I should remove this hack. --Jcid
*/
static int Html_ms_stupid_quotes_2ucs(int codepoint)
{
int ret;
switch (codepoint) {
case 145:
case 146: ret = '\''; break;
case 147:
case 148: ret = '"'; break;
case 149: ret = 176; break;
case 150:
case 151: ret = '-'; break;
default: ret = codepoint; break;
}
return ret;
}
/*
* Parse a numeric character reference (e.g., "/" or "/").
* The "" has already been consumed.
*/
static const char *Html_parse_numeric_charref(DilloHtml *html, char *tok,
bool_t is_attr, int *entsize)
{
static char buf[5];
char *s = tok;
int n, codepoint = -1;
errno = 0;
if (*s == 'x' || *s == 'X') {
if (isxdigit(*++s)) {
/* strtol with base 16 accepts leading "0x" - we don't */
if (*s == '0' && s[1] == 'x') {
s++;
codepoint = 0;
} else {
codepoint = strtol(s, &s, 16);
}
}
} else if (isdigit(*s)) {
codepoint = strtol(s, &s, 10);
}
if (errno)
codepoint = -1;
if (*s == ';')
s++;
else {
if (prefs.show_extra_warnings && (html->DocType == DT_XHTML ||
(html->DocType == DT_HTML && html->DocTypeVersion <= 4.01f))) {
char c = *s;
*s = '\0';
BUG_MSG("Character reference '%s' lacks ';'.", tok);
*s = c;
}
/* Don't require ';' for old HTML, except that our current heuristic
* is to require it in attributes to avoid cases like "©=1" found
* in URLs.
*/
if (is_attr || html->DocType == DT_XHTML ||
(html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)) {
return NULL;
}
}
if ((codepoint < 0x20 && codepoint != '\t' && codepoint != '\n' &&
codepoint != '\f') ||
(codepoint >= 0x7f && codepoint <= 0x9f) ||
(codepoint >= 0xd800 && codepoint <= 0xdfff) || codepoint > 0x10ffff ||
((codepoint & 0xfffe) == 0xfffe) ||
(!(html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f) &&
codepoint > 0xffff)) {
/* this catches null bytes, errors, codes out of range, disallowed
* control chars, permanently undefined chars, and surrogates.
*/
char c = *s;
*s = '\0';
BUG_MSG("Numeric character reference '%s' is not valid.", tok);
*s = c;
codepoint = (codepoint >= 145 && codepoint <= 151) ?
Html_ms_stupid_quotes_2ucs(codepoint) : -1;
}
if (codepoint != -1) {
if (codepoint >= 128) {
n = a_Utf8_encode(codepoint, buf);
} else {
n = 1;
buf[0] = (char) codepoint;
}
assert(n < 5);
buf[n] = '\0';
*entsize = s-tok+2;
return buf;
} else {
return NULL;
}
}
/*
* Comparison function for binary search
*/
static int Html_charref_comp(const void *a, const void *b)
{
return strcmp(((Charref_t *)a)->ref, ((Charref_t *)b)->ref);
}
/*
* Binary search of 'key' in charref list
*/
static Charref_t *Html_charref_search(char *key)
{
Charref_t RefKey;
RefKey.ref = key;
return (Charref_t*) bsearch(&RefKey, Charrefs, NumRef,
sizeof(Charref_t), Html_charref_comp);
}
/*
* Parse a named character reference (e.g., "&" or "…").
* The "&" has already been consumed.
*/
static const char *Html_parse_named_charref(DilloHtml *html, char *tok,
bool_t is_attr, int *entsize)
{
Charref_t *p;
char c;
char *s = tok;
const char *ret = NULL;
while (*++s && (isalnum(*s) || strchr(":_.-", *s))) ;
c = *s;
*s = '\0';
if (c != ';') {
if (prefs.show_extra_warnings && (html->DocType == DT_XHTML ||
(html->DocType == DT_HTML && html->DocTypeVersion <= 4.01f)))
BUG_MSG("Character reference '&%s' lacks ';'.", tok);
/* Don't require ';' for old HTML, except that our current heuristic
* is to require it in attributes to avoid cases like "©=1" found
* in URLs.
*/
if (is_attr || html->DocType == DT_XHTML ||
(html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)) {
return ret;
}
}
if ((p = Html_charref_search(tok))) {
ret = (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f) ?
p->html5_str : p->html4_str;
}
if (!ret && html->DocType == DT_XHTML && !strcmp(tok, "apos"))
ret = "'";
*s = c;
if (c == ';')
s++;
if (!ret) {
c = *s;
*s = '\0';
BUG_MSG("Undefined character reference '&%s'.", tok);
*s = c;
}
*entsize = s-tok+1;
return ret;
}
/*
* Given an entity, return the corresponding string.
* Returns NULL if not a valid entity.
*
* The first character *token is assumed to be == '&'
*
* For valid entities, *entsize is set to the length of the parsed entity.
*/
static const char *Html_parse_entity(DilloHtml *html, const char *token,
int toksize, int *entsize, bool_t is_attr)
{
const char *ret = NULL;
char *tok;
token++;
tok = dStrndup(token, (uint_t)toksize);
if (*tok == '#') {
ret = Html_parse_numeric_charref(html, tok+1, is_attr, entsize);
} else if (isalpha(*tok)) {
ret = Html_parse_named_charref(html, tok, is_attr, entsize);
} else if (prefs.show_extra_warnings &&
(!(html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f))) {
// HTML5 doesn't mind literal '&'s.
BUG_MSG("Literal '&'.");
}
dFree(tok);
return ret;
}
/*
* Parse all the entities in a token. Takes the token and its length, and
* returns a newly allocated string.
*/
char *a_Html_parse_entities(DilloHtml *html, const char *token, int toksize)
{
const char *esc_set = "&";
int i, s, entsize;
char *str;
s = strcspn(token, esc_set);
if (s >= toksize) {
/* no ampersands */
str = dStrndup(token, toksize);
} else {
Dstr *ds = dStr_sized_new(toksize);
dStr_append_l(ds, token, s);
for (i = s; i < toksize; i++) {
const char *entstr;
const bool_t is_attr = FALSE;
if (token[i] == '&' &&
(entstr = Html_parse_entity(html, token+i, toksize-i, &entsize,
is_attr))) {
dStr_append(ds, entstr);
i += entsize-1;
} else {
dStr_append_c(ds, token[i]);
}
}
str = ds->str;
dStr_free(ds, 0);
}
return str;
}
/*
* For white-space: pre-line, we must break the line if encountering a newline.
* Otherwise, collapse whitespace as usual.
*/
static void Html_process_space_pre_line(DilloHtml *html, const char *space,
int spacesize)
{
int i, breakCnt = 0;
for (i = 0; i < spacesize; i++) {
/* Support for "\r", "\n" and "\r\n" line breaks */
if (space[i] == '\r' || (space[i] == '\n' && !html->PrevWasCR)) {
breakCnt++;
html->PrevWasCR = (space[i] == '\r');
HT2TB(html)->addLinebreak (html->wordStyle ());
}
}
if (breakCnt == 0) {
HT2TB(html)->addSpace(html->wordStyle ());
}
}
/*
* Parse spaces
*/
static void Html_process_space(DilloHtml *html, const char *space,
int spacesize)
{
char *spc;
int i, offset;
DilloHtmlParseMode parse_mode = S_TOP(html)->parse_mode;
if (S_TOP(html)->display_none) {
/* do nothing */
} else if (parse_mode == DILLO_HTML_PARSE_MODE_STASH) {
html->StashSpace = (html->Stash->len > 0);
} else if (parse_mode == DILLO_HTML_PARSE_MODE_VERBATIM) {
dStr_append_l(html->Stash, space, spacesize);
} else if (parse_mode == DILLO_HTML_PARSE_MODE_PRE) {
int spaceCnt = 0;
/* re-scan the string for characters that cause line breaks */
for (i = 0; i < spacesize; i++) {
/* Support for "\r", "\n" and "\r\n" line breaks (skips the first) */
if (!html->PreFirstChar &&
(space[i] == '\r' || (space[i] == '\n' && !html->PrevWasCR))) {
if (spaceCnt) {
spc = dStrnfill(spaceCnt, ' ');
HT2TB(html)->addText (spc, spaceCnt, html->wordStyle ());
dFree(spc);
spaceCnt = 0;
}
HT2TB(html)->addLinebreak (html->wordStyle ());
html->pre_column = 0;
}
html->PreFirstChar = false;
/* cr and lf should not be rendered -- they appear as a break */
switch (space[i]) {
case '\r':
case '\n':
break;
case '\t':
if (prefs.show_extra_warnings)
BUG_MSG("TAB character inside .");
offset = TAB_SIZE - html->pre_column % TAB_SIZE;
spaceCnt += offset;
html->pre_column += offset;
break;
default:
spaceCnt++;
html->pre_column++;
break;
}
html->PrevWasCR = (space[i] == '\r');
}
if (spaceCnt) {
// add break possibility for the white-space:pre-wrap case
HT2TB(html)->addBreakOption (html->wordStyle (), false);
spc = dStrnfill(spaceCnt, ' ');
HT2TB(html)->addText (spc, spaceCnt, html->wordStyle ());
dFree(spc);
}
} else {
if (SGML_SPCDEL) {
/* SGML_SPCDEL ignores white space immediately after an open tag */
} else if (html->wordStyle ()->whiteSpace == WHITE_SPACE_PRE_LINE) {
Html_process_space_pre_line(html, space, spacesize);
} else {
HT2TB(html)->addSpace(html->wordStyle ());
}
if (parse_mode == DILLO_HTML_PARSE_MODE_STASH_AND_BODY)
html->StashSpace = (html->Stash->len > 0);
}
}
/*
* Handles putting the word into its proper place
* > STASH and VERBATIM --> html->Stash
* > otherwise it goes through addText()
*
* Entities are parsed (or not) according to parse_mode.
* 'word' is a '\0'-terminated string.
*/
static void Html_process_word(DilloHtml *html, const char *word, int size)
{
int i, j, start;
char *Pword;
DilloHtmlParseMode parse_mode = S_TOP(html)->parse_mode;
if (S_TOP(html)->display_none)
return;
if (parse_mode == DILLO_HTML_PARSE_MODE_STASH ||
parse_mode == DILLO_HTML_PARSE_MODE_STASH_AND_BODY) {
if (html->StashSpace) {
dStr_append_c(html->Stash, ' ');
html->StashSpace = false;
}
Pword = a_Html_parse_entities(html, word, size);
dStr_append(html->Stash, Pword);
dFree(Pword);
} else if (parse_mode == DILLO_HTML_PARSE_MODE_VERBATIM) {
/* word goes in untouched, it is not processed here. */
dStr_append_l(html->Stash, word, size);
}
if (parse_mode == DILLO_HTML_PARSE_MODE_STASH ||
parse_mode == DILLO_HTML_PARSE_MODE_VERBATIM) {
/* skip until the closing instructions */
} else if (parse_mode == DILLO_HTML_PARSE_MODE_PRE) {
/* all this overhead is to catch white-space entities */
Pword = a_Html_parse_entities(html, word, size);
for (start = i = 0; Pword[i]; start = i)
if (isspace(Pword[i])) {
while (Pword[++i] && isspace(Pword[i])) ;
Html_process_space(html, Pword + start, i - start);
} else {
while (Pword[++i] && !isspace(Pword[i])) ;
HT2TB(html)->addText(Pword + start, i - start, html->wordStyle ());
html->pre_column += i - start;
html->PreFirstChar = false;
}
dFree(Pword);
} else {
const char *word2, *beyond_word2;
Pword = NULL;
if (!memchr(word,'&', size)) {
/* No entities */
word2 = word;
beyond_word2 = word + size;
} else {
/* Collapse white-space entities inside the word (except ) */
Pword = a_Html_parse_entities(html, word, size);
/* Collapse adjacent " \t\f\n\r" characters into a single space */
for (i = j = 0; (Pword[i] = Pword[j]); ++i, ++j) {
if (strchr(" \t\f\n\r", Pword[i])) {
if (i == 0 || (i > 0 && Pword[i-1] != ' '))
Pword[i] = ' ';
else
for (--i; Pword[j+1] && strchr(" \t\f\n\r", Pword[j+1]); ++j)
;
}
}
word2 = Pword;
beyond_word2 = word2 + strlen(word2);
}
for (start = i = 0; word2[i]; start = i) {
int len;
if (isspace(word2[i])) {
while (word2[++i] && isspace(word2[i])) ;
Html_process_space(html, word2 + start, i - start);
} else if (!strncmp(word2+i, utf8_zero_width_space, 3)) {
i += 3;
HT2TB(html)->addBreakOption(html->wordStyle (), false);
} else if (a_Utf8_ideographic(word2+i, beyond_word2, &len)) {
i += len;
HT2TB(html)->addText(word2 + start, i - start, html->wordStyle ());
HT2TB(html)->addBreakOption(html->wordStyle (), false);
} else {
do {
i += len;
} while (word2[i] && !isspace(word2[i]) &&
strncmp(word2+i, utf8_zero_width_space, 3) &&
(!a_Utf8_ideographic(word2+i, beyond_word2, &len)));
HT2TB(html)->addText(word2 + start, i - start, html->wordStyle ());
}
}
if (Pword == word2)
dFree(Pword);
}
}
/*
* Does the tag in tagstr (e.g. "p") match the tag in the tag, tagsize
* structure, with the initial < skipped over (e.g. "P align=center>")?
*/
static bool Html_match_tag(const char *tagstr, char *tag, int tagsize)
{
int i;
for (i = 0; i < tagsize && tagstr[i] != '\0'; i++) {
if (D_ASCII_TOLOWER(tagstr[i]) != D_ASCII_TOLOWER(tag[i]))
return false;
}
/* The test for '/' is for xml compatibility: "empty/>" will be matched. */
if (i < tagsize && (isspace(tag[i]) || tag[i] == '>' || tag[i] == '/'))
return true;
return false;
}
/*
* This function is called after popping the stack, to
* handle nested Textblock widgets.
*/
static void Html_eventually_pop_dw(DilloHtml *html, bool hand_over_break)
{
if (html->dw != S_TOP(html)->textblock) {
if (hand_over_break)
HT2TB(html)->handOverBreak (html->style ());
HT2TB(html)->flush ();
html->dw = S_TOP(html)->textblock;
}
}
/*
* Push the tag (copying attributes from the top of the stack)
*/
static void Html_push_tag(DilloHtml *html, int tag_idx)
{
int n_items;
n_items = html->stack->size ();
html->stack->increase ();
/* We'll copy the former stack item and just change the tag and its index
* instead of copying all fields except for tag. --Jcid */
*html->stack->getRef(n_items) = *html->stack->getRef(n_items - 1);
html->stack->getRef(n_items)->tag_idx = tag_idx;
html->dw = S_TOP(html)->textblock;
}
/*
* Push the tag (used to force en element with optional open into the stack)
* Note: now it's the same as Html_push_tag(), but things may change...
*/
static void Html_force_push_tag(DilloHtml *html, int tag_idx)
{
html->startElement (tag_idx);
Html_push_tag(html, tag_idx);
}
/*
* Pop the top tag in the stack
*/
static void Html_real_pop_tag(DilloHtml *html)
{
bool hand_over_break;
html->styleEngine->endElement (S_TOP(html)->tag_idx);
hand_over_break = S_TOP(html)->hand_over_break;
html->stack->setSize (html->stack->size() - 1);
Html_eventually_pop_dw(html, hand_over_break);
}
/*
* Cleanup the stack to a given index.
*/
static void Html_tag_cleanup_to_idx(DilloHtml *html, int idx)
{
int s_sz;
while ((s_sz = html->stack->size()) > idx) {
int toptag_idx = S_TOP(html)->tag_idx;
TagInfo toptag = Tags[toptag_idx];
if (s_sz > idx + 1 && toptag.EndTag != 'O')
BUG_MSG(" - forcing close of open tag: <%s>.", toptag.name);
_MSG("Close: %*s%s\n", size," ", toptag.name);
if (toptag.close)
toptag.close(html);
Html_real_pop_tag(html);
}
}
/*
* Default close function for tags.
* (conditional cleanup of the stack)
* There are several ways of doing it. Considering the HTML 4.01 spec
* which defines optional close tags, and the will to deliver useful diagnose
* messages for bad-formed HTML, it'll go as follows:
* 1.- Search the stack for the first tag that requires a close tag.
* 2.- If it matches, clean all the optional-close tags in between.
* 3.- Cleanup the matching tag. (on error, give a warning message)
*
* If 'w3c_mode' is NOT enabled:
* 1.- Search the stack for a matching tag based on tag level.
* 2.- If it exists, clean all the tags in between.
* 3.- Cleanup the matching tag. (on error, give a warning message)
*/
static void Html_tag_cleanup_at_close(DilloHtml *html, int new_idx)
{
static int i_BUTTON = a_Html_tag_index("button"),
i_SELECT = a_Html_tag_index("select"),
i_TEXTAREA = a_Html_tag_index("textarea");
int w3c_mode = !prefs.w3c_plus_heuristics;
int stack_idx, tag_idx, matched = 0, expected = 0;
TagInfo new_tag = Tags[new_idx];
/* Look for the candidate tag to close */
stack_idx = html->stack->size();
while (--stack_idx) {
tag_idx = html->stack->getRef(stack_idx)->tag_idx;
if (tag_idx == new_idx) {
/* matching tag found */
matched = 1;
break;
} else if (Tags[tag_idx].EndTag == 'O') {
/* skip an optional tag */
continue;
} else if ((new_idx == i_BUTTON && html->InFlags & IN_BUTTON) ||
(new_idx == i_SELECT && html->InFlags & IN_SELECT) ||
(new_idx == i_TEXTAREA && html->InFlags & IN_TEXTAREA)) {
/* let these elements close tags inside them */
continue;
} else if (w3c_mode || Tags[tag_idx].TagLevel >= new_tag.TagLevel) {
/* this is the tag that should have been closed */
expected = 1;
break;
}
}
if (matched) {
Html_tag_cleanup_to_idx(html, stack_idx);
} else if (expected) {
BUG_MSG("Unexpected closing tag: %s> -- expected %s>.",
new_tag.name, Tags[tag_idx].name);
} else {
BUG_MSG("Unexpected closing tag: %s>.", new_tag.name);
}
}
/*
* Avoid nesting and inter-nesting of BUTTON, SELECT and TEXTAREA,
* by closing them before opening another.
* This is not an HTML SPEC restriction , but it avoids lots of trouble
* inside dillo (concurrent inputs), and makes almost no sense to have.
*/
static void Html_tag_cleanup_nested_inputs(DilloHtml *html, int new_idx)
{
static int i_BUTTON = a_Html_tag_index("button"),
i_SELECT = a_Html_tag_index("select"),
i_TEXTAREA = a_Html_tag_index("textarea");
int stack_idx, u_idx, matched = 0;
dReturn_if_fail(html->InFlags & (IN_BUTTON | IN_SELECT | IN_TEXTAREA));
dReturn_if_fail(new_idx == i_BUTTON || new_idx == i_SELECT ||
new_idx == i_TEXTAREA);
/* Get the unclosed tag index */
u_idx = (html->InFlags & IN_BUTTON) ? i_BUTTON :
(html->InFlags & IN_SELECT) ? i_SELECT : i_TEXTAREA;
/* Look for it inside the stack */
stack_idx = html->stack->size();
while (--stack_idx) {
if (html->stack->getRef(stack_idx)->tag_idx == u_idx) {
/* matching tag found */
matched = 1;
break;
}
}
if (matched) {
BUG_MSG("Attempt to nest <%s> element inside <%s> -- closing <%s>.",
Tags[new_idx].name, Tags[u_idx].name, Tags[u_idx].name);
Html_tag_cleanup_to_idx(html, stack_idx);
} else {
MSG_WARN("Inconsistent parser state, flag is SET but no '%s' element"
"was found in the stack\n", Tags[u_idx].name);
}
html->InFlags &= ~(IN_BUTTON | IN_SELECT | IN_TEXTAREA);
}
/*
* Some parsing routines.
*/
/*
* Used by a_Html_parse_length
*/
static CssLength Html_parse_length_or_multi_length (const char *attr,
char **endptr)
{
CssLength l;
double v;
char *end;
v = strtod (attr, &end);
switch (*end) {
case '%':
end++;
l = CSS_CREATE_LENGTH (v / 100, CSS_LENGTH_TYPE_PERCENTAGE);
break;
case '*':
end++;
l = CSS_CREATE_LENGTH (v, CSS_LENGTH_TYPE_RELATIVE);
break;
/*
The "px" suffix seems not allowed by HTML4.01 SPEC.
case 'p':
if (end[1] == 'x')
end += 2;
*/
default:
l = CSS_CREATE_LENGTH (v, CSS_LENGTH_TYPE_PX);
break;
}
if (endptr)
*endptr = end;
return l;
}
/*
* Returns a length or a percentage, or UNDEF_LENGTH in case
* of an error, or if attr is NULL.
*/
CssLength a_Html_parse_length (DilloHtml *html, const char *attr)
{
CssLength l;
char *end;
l = Html_parse_length_or_multi_length (attr, &end);
if (CSS_LENGTH_TYPE (l) == CSS_LENGTH_TYPE_RELATIVE)
/* not allowed as &Length; */
l = CSS_CREATE_LENGTH(0.0, CSS_LENGTH_TYPE_AUTO);
else {
/* allow only whitespaces */
if (*end && !isspace (*end)) {
BUG_MSG("Garbage after length: '%s'.", attr);
l = CSS_CREATE_LENGTH(0.0, CSS_LENGTH_TYPE_AUTO);
}
}
_MSG("a_Html_parse_length: \"%s\" %d\n", attr, CSS_LENGTH_VALUE(l));
return l;
}
/*
* Parse a color attribute.
* Return value: parsed color, or default_color (+ error msg) on error.
*/
int32_t a_Html_color_parse(DilloHtml *html, const char *str,
int32_t default_color)
{
int err = 1;
int32_t color = a_Color_parse(str, default_color, &err);
if (err) {
BUG_MSG("Color '%s' is not in \"#RRGGBB\" format.", str);
}
return color;
}
/*
* Check that 'val' is composed of characters inside [A-Za-z0-9:_.-]
* Note: ID can't have entities, but this check is enough (no '&').
* Return value: 1 if OK, 0 otherwise.
*/
static int
Html_check_name_val(DilloHtml *html, const char *val, const char *attrname)
{
if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f) {
bool valid = *val && !strchr(val, ' ');
if (!valid) {
BUG_MSG("'%s' value \"%s\" must not be empty and must not contain "
"spaces.", attrname, val);
}
return valid ? 1 : 0;
} else {
int i;
for (i = 0; val[i]; ++i)
if (!isascii(val[i]) || !(isalnum(val[i]) || strchr(":_.-", val[i])))
break;
if (val[i] || !(isascii(val[0]) && isalpha(val[0])))
BUG_MSG("%s attribute value \"%s\" is not of the form "
"'[A-Za-z][A-Za-z0-9:_.-]*'.", attrname, val);
return !(val[i]);
}
}
/*
* Handle DOCTYPE declaration
*
* Follows the convention that HTML 4.01
* doctypes which include a full w3c DTD url are treated as
* standards-compliant, but 4.01 without the url and HTML 4.0 and
* earlier are not. XHTML doctypes are always standards-compliant
* whether or not an url is present.
*
* Note: I'm not sure about this convention. The W3C validator
* recognizes the "HTML Level" with or without the URL. The convention
* comes from mozilla (see URLs below), but Dillo doesn't have the same
* rendering modes, so it may be better to chose another behaviour. --Jcid
*
* http://www.mozilla.org/docs/web-developer/quirks/doctypes.html
* http://lists.auriga.wearlab.de/pipermail/dillo-dev/2004-October/002300.html
*
* This is not a full DOCTYPE parser, just enough for what Dillo uses.
*/
static void Html_parse_doctype(DilloHtml *html, const char *tag, int tagsize)
{
static const char HTML_SGML_sig [] = "DocType != DT_NONE)
BUG_MSG("Multiple DOCTYPE declarations.");
/* The default DT_NONE type is TagSoup */
if (i > strlen(HTML_SGML_sig) && // avoid out of bounds reads!
!dStrnAsciiCasecmp(ntag, HTML_SGML_sig, strlen(HTML_SGML_sig))) {
p = ntag + strlen(HTML_SGML_sig) + 1;
if (!strncmp(p, HTML401, strlen(HTML401)) &&
dStriAsciiStr(p + strlen(HTML401), HTML401_url)) {
html->DocType = DT_HTML;
html->DocTypeVersion = 4.01f;
} else if (!strncmp(p, XHTML1, strlen(XHTML1)) &&
dStriAsciiStr(p + strlen(XHTML1), XHTML1_url)) {
html->DocType = DT_XHTML;
html->DocTypeVersion = 1.0f;
} else if (!strncmp(p, XHTML11, strlen(XHTML11)) &&
dStriAsciiStr(p + strlen(XHTML11), XHTML11_url)) {
html->DocType = DT_XHTML;
html->DocTypeVersion = 1.1f;
} else if (!strncmp(p, HTML40, strlen(HTML40))) {
html->DocType = DT_HTML;
html->DocTypeVersion = 4.0f;
} else if (!strncmp(p, HTML32, strlen(HTML32))) {
html->DocType = DT_HTML;
html->DocTypeVersion = 3.2f;
} else if (!strncmp(p, HTML20, strlen(HTML20))) {
html->DocType = DT_HTML;
html->DocTypeVersion = 2.0f;
}
} else if (!dStrAsciiCasecmp(ntag, "") ||
!dStrAsciiCasecmp(ntag, "")) {
html->DocType = DT_HTML;
html->DocTypeVersion = 5.0f;
}
if (html->DocType == DT_NONE) {
html->DocType = DT_UNRECOGNIZED;
BUG_MSG("DOCTYPE not recognized: ('%s').", ntag);
}
dFree(ntag);
}
/*
* Handle open HTML element
*/
static void Html_tag_open_html(DilloHtml *html, const char *tag, int tagsize)
{
/* The IN_HTML flag will be kept set until at IN_EOF condition.
* This allows to handle pages with multiple or uneven HTML tags */
if (!(html->InFlags & IN_HTML))
html->InFlags |= IN_HTML;
if (html->Num_HTML < UCHAR_MAX)
++html->Num_HTML;
if (html->Num_HTML > 1) {
BUG_MSG(" was already open.");
html->ReqTagClose = true;
}
}
/*
* Handle close HTML element
*/
static void Html_tag_close_html(DilloHtml *html)
{
_MSG("Html_tag_close_html: Num_HTML=%d\n", html->Num_HTML);
}
/*
* Handle open HEAD element
*/
static void Html_tag_open_head(DilloHtml *html, const char *tag, int tagsize)
{
if (html->InFlags & IN_BODY) {
BUG_MSG(" must go before the BODY section.");
html->ReqTagClose = true;
return;
}
if (html->Num_HEAD < UCHAR_MAX)
++html->Num_HEAD;
if (html->InFlags & IN_HEAD) {
BUG_MSG(" was already open.");
html->ReqTagClose = true;
} else if (html->Num_HEAD > 1) {
BUG_MSG(" already finished -- ignoring.");
html->ReqTagClose = true;
} else {
html->InFlags |= IN_HEAD;
}
}
/*
* Handle close HEAD element
* Note: HEAD is parsed once completely got.
*/
static void Html_tag_close_head(DilloHtml *html)
{
if (html->InFlags & IN_HEAD) {
if (html->Num_HEAD == 1) {
/* match for the well formed start of HEAD section */
if (html->Num_TITLE == 0)
BUG_MSG(" lacks .");
html->InFlags &= ~IN_HEAD;
/* charset is already set, load remote stylesheets now */
for (int i = 0; i < html->cssUrls->size(); i++) {
a_Html_load_stylesheet(html, html->cssUrls->get(i));
}
} else if (html->Num_HEAD > 1) {
--html->Num_HEAD;
}
} else {
/* not reached, see Html_tag_cleanup_at_close() */
}
}
/*
* Handle open TITLE
* calls stash init, where the title string will be stored
*/
static void Html_tag_open_title(DilloHtml *html, const char *tag, int tagsize)
{
/* fill the stash buffer so TITLE content can be ignored
* when not valid, redundant or outside HEAD section */
a_Html_stash_init(html);
if (html->InFlags & IN_HEAD) {
if (html->Num_TITLE < UCHAR_MAX)
++html->Num_TITLE;
if (html->Num_TITLE > 1)
BUG_MSG("Redundant .");
} else {
BUG_MSG(" must be inside -- ignoring.");
}
}
/*
* Handle close TITLE
* set page-title in the browser window and in the history.
*/
static void Html_tag_close_title(DilloHtml *html)
{
if (html->InFlags & IN_HEAD && html->Num_TITLE == 1) {
/* title is only valid inside HEAD */
a_UIcmd_set_page_title(html->bw, html->Stash->str);
a_History_set_title_by_url(html->page_url, html->Stash->str);
}
}
/*
* Handle open SCRIPT
* initializes stash, where the embedded code will be stored.
* MODE_VERBATIM is used because MODE_STASH catches entities.
*/
static void Html_tag_open_script(DilloHtml *html, const char *tag, int tagsize)
{
a_Html_stash_init(html);
S_TOP(html)->parse_mode = DILLO_HTML_PARSE_MODE_VERBATIM;
}
/*
* Handle close SCRIPT
*/
static void Html_tag_close_script(DilloHtml *html)
{
/* eventually the stash will be sent to an interpreter for parsing */
}
/*
* Handle open STYLE
* Store contents in the stash where the style sheet interpreter can get it.
*/
static void Html_tag_open_style(DilloHtml *html, const char *tag, int tagsize)
{
const char *attrbuf;
html->loadCssFromStash = true;
if (!(attrbuf = a_Html_get_attr(html, tag, tagsize, "type"))) {
if (html->DocType != DT_HTML || html->DocTypeVersion <= 4.01f)
BUG_MSG("