From 93715c46a99c96d6c866968312691ec9ab0f6a03 Mon Sep 17 00:00:00 2001 From: jcid Date: Sun, 7 Oct 2007 00:36:34 +0200 Subject: Initial revision --- src/html.cc | 5123 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 5123 insertions(+) create mode 100644 src/html.cc (limited to 'src/html.cc') diff --git a/src/html.cc b/src/html.cc new file mode 100644 index 00000000..4661525a --- /dev/null +++ b/src/html.cc @@ -0,0 +1,5123 @@ +/* + * File: html.cc + * + * Copyright (C) 2005 Jorge Arellano Cid + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + */ + +/* + * Dillo HTML parsing routines + */ + +/* Undefine if you want to unroll tables. For instance for PDAs */ +#define USE_TABLES + +/* Define to 1 to ignore white space immediately after an open tag, + * and immediately before a close tag. */ +#define SGML_SPCDEL 0 + + +#include /* for isspace and tolower */ +#include /* for memcpy and memmove */ +#include +#include /* for sprintf */ +#include /* for rint */ +#include + +#include /* for utf8encode */ + +#define DEBUG_LEVEL 10 +#include "debug.h" + +#include "msg.h" +#include "binaryconst.h" +#include "colors.h" + +#include "uicmd.hh" + +#define dillo_dbg_rendering 0 + +#include "history.h" +#include "nav.h" +#include "menu.hh" +#include "prefs.h" +#include "misc.h" +#include "capi.h" + +#include "html.hh" +#include "dw/textblock.hh" +#include "dw/bullet.hh" +#include "dw/table.hh" +#include "dw/tablecell.hh" +#include "dw/listitem.hh" +#include "dw/image.hh" +#include "dw/ruler.hh" + + +using namespace dw; +using namespace dw::core; +using namespace dw::core::ui; +using namespace dw::core::style; + +typedef void (*TagOpenFunct) (DilloHtml *Html, char *Tag, int Tagsize); +typedef void (*TagCloseFunct) (DilloHtml *Html, int TagIdx); + +#define TAB_SIZE 8 + +// Dw to Textblock +#define DW2TB(dw) ((Textblock*)dw) +// "html struct" to "Layout" +#define HT2LT(html) ((Layout*)html->bw->render_layout) +// "Image" to "Dw Widget" +#define IM2DW(Image) ((Widget*)Image->dw) +// Top of the parsing stack +#define S_TOP(html) (html->stack->getRef(html->stack->size()-1)) + +/* + * Exported function with C linkage. + */ +extern "C" { +void *a_Html_text(const char *type, void *P, CA_Callback_t *Call,void **Data); +} + +/* + * Forward declarations + */ +static const char *Html_get_attr(DilloHtml *html, + const char *tag, + int tagsize, + const char *attrname); +static const char *Html_get_attr2(DilloHtml *html, + const char *tag, + int tagsize, + const char *attrname, + int tag_parsing_flags); +static char *Html_get_attr_wdef(DilloHtml *html, + const char *tag, + int tagsize, + const char *attrname, + const char *def); +static void Html_add_widget(DilloHtml *html, Widget *widget, + char *width_str, char *height_str, + StyleAttrs *style_attrs); +static int Html_write_raw(DilloHtml *html, char *buf, int bufsize, int Eof); +static void Html_write(DilloHtml *html, char *Buf, int BufSize, int Eof); +static void Html_close(DilloHtml *html, int ClientKey); +static void Html_callback(int Op, CacheClient_t *Client); +static DilloHtml *Html_new(BrowserWindow *bw, const DilloUrl *url); +static void Html_tag_open_input(DilloHtml *html, char *tag, int tagsize); +static void Html_add_input(DilloHtmlForm *form, + DilloHtmlInputType type, + Widget *widget, + Embed *embed, + const char *name, + const char *init_str, + DilloHtmlSelect *select, + bool_t init_val); +//static void Html_reset_form(GtkWidget *reset, DilloHtmlLB *html_lb); +static int Html_tag_index(const char *tag); + + +/* + * Local Data + */ + +/* The following array of font sizes has to be _strictly_ crescent */ +static const int FontSizes[] = {8, 10, 12, 14, 18, 24}; +static const int FontSizesNum = 6; +static const int FontSizesBase = 2; + +/* Parsing table structure */ +typedef struct { + char *name; /* element name */ + unsigned char Flags; /* flags (explained near the table data) */ + char EndTag; /* Is it Required, Optional or Forbidden */ + uchar_t TagLevel; /* Used to heuristically parse bad HTML */ + TagOpenFunct open; /* Open function */ + TagCloseFunct close; /* Close function */ +} TagInfo; +extern const TagInfo Tags[]; + +/* + * Return the line number of the tag being processed by the parser. + */ +static int Html_get_line_number(DilloHtml *html) +{ + int i, ofs, line; + const char *p = html->Start_Buf; + + dReturn_val_if_fail(p != NULL, -1); + + ofs = html->CurrTagOfs; + line = html->OldTagLine; + for (i = html->OldTagOfs; i < ofs; ++i) + if (p[i] == '\n') + ++line; + html->OldTagOfs = html->CurrTagOfs; + html->OldTagLine = line; + return line; +} + +/* + * Collect HTML error strings inside the linkblock. + */ +static void Html_msg(DilloHtml *html, const char *format, ... ) +{ + va_list argp; + + dStr_sprintfa(html->bw->page_bugs, + "HTML warning: line %d, ", + Html_get_line_number(html)); + va_start(argp, format); + dStr_vsprintfa(html->bw->page_bugs, format, argp); + va_end(argp); + a_UIcmd_set_bug_prog(html->bw, ++html->bw->num_page_bugs); +} + +/* + * Wrapper for a_Url_new that adds an error detection message. + * (if use_base_url is TRUE, html->linkblock->base_url is used) + */ +static DilloUrl *Html_url_new(DilloHtml *html, + const char *url_str, const char *base_url, + int flags, int32_t posx, int32_t posy, + int use_base_url) +{ + DilloUrl *url; + int n_ic, n_ic_spc; + + url = a_Url_new( + url_str, + (use_base_url) ? base_url : URL_STR_(html->linkblock->base_url), + flags, posx, posy); + if ((n_ic = URL_ILLEGAL_CHARS(url)) != 0) { + const char *suffix = (n_ic) > 1 ? "s" : ""; + n_ic_spc = URL_ILLEGAL_CHARS_SPC(url); + if (n_ic == n_ic_spc) { + MSG_HTML("URL has %d illegal character%s [%d space%s]\n", + n_ic, suffix, n_ic_spc, suffix); + } else if (n_ic_spc == 0) { + MSG_HTML("URL has %d illegal character%s [%d in (00-1F or 7F)]\n", + n_ic, suffix, n_ic); + } else { + MSG_HTML("URL has %d illegal character%s " + "[%d space%s and %d in (00-1F or 7F)]\n", + n_ic, suffix, n_ic_spc, n_ic_spc ? "s" : "", n_ic-n_ic_spc); + } + } + return url; +} + +/* + * Set callback function and callback data for "html/text" MIME type. + */ +void *a_Html_text(const char *Type, void *P, CA_Callback_t *Call, void **Data) +{ + DilloWeb *web = (DilloWeb*)P; + DilloHtml *html = Html_new(web->bw, web->url); + + *Data = (void *) html; + *Call = (CA_Callback_t) Html_callback; + + return (void*) html->dw; +} + +bool DilloHtmlLB::HtmlLinkReceiver::enter (Widget *widget, int link, + int x, int y) +{ + BrowserWindow *bw = this->lb->bw; + + MSG(" ** "); + if (link == -1 && x == -1 && y == -1) { + _MSG(" Link LEAVE notify...\n"); + a_UIcmd_set_msg(bw, ""); + } else { + _MSG(" Link ENTER notify...\n"); + a_UIcmd_set_msg(bw, "%s", URL_STR(this->lb->links->get(link))); + } + return true; +} + +bool DilloHtmlLB::HtmlLinkReceiver::press (Widget *widget, int link, + int x, int y, EventButton *event) +{ + int ret = false; + + _MSG("pressed button %d\n", event->button); + if (event->button == 3) { + a_UIcmd_page_popup(lb->bw, lb->links->get(link), + lb->bw->num_page_bugs ? lb->bw->page_bugs->str:NULL); + //a_UIcmd_link_popup(lb->bw, lb->links->get(link)); + //a_UIcmd_bugmeter_popup(lb->bw); + ret = true; + } + return ret; +} + +bool DilloHtmlLB::HtmlLinkReceiver::click (Widget *widget, int link, + int x, int y, EventButton *event) +{ + DilloUrl *url = lb->links->get(link); + _MSG("clicked on URL %d: %s\n", link, a_Url_str (url)); + + + if (x != -1) { + char data[64]; + snprintf(data, 64, "?%d,%d", x, y); + a_Url_set_ismap_coords(url, data); + } + + if (event->button == 1) { + a_Nav_push(lb->bw, url); + } else if (event->button == 2) { + a_Nav_push_nw(lb->bw, url); + } else { + return false; + } + + /* Change the link color to "visited" as visual feedback */ + for (Widget *w = widget; w; w = w->getParent()) { + _MSG(" ->%s\n", w->getClassName()); + if (w->instanceOf(dw::Textblock::CLASS_ID)) { + ((Textblock*)w)->changeLinkColor (link, lb->visited_color); + break; + } + } + + return true; +} + + +/* + * We'll make the linkblock first to get it out of the way. + */ +static DilloHtmlLB *Html_lb_new(BrowserWindow *bw, const DilloUrl *url) +{ + DilloHtmlLB *html_lb = dNew(DilloHtmlLB, 1); + + html_lb->bw = bw; + html_lb->base_url = a_Url_dup(url); + html_lb->linkReceiver = new DilloHtmlLB::HtmlLinkReceiver (html_lb); + + html_lb->forms = new misc::SimpleVector (1); + + html_lb->links = new misc::SimpleVector (64); + + //a_Dw_image_map_list_init(&html_lb->maps); + + html_lb->link_color = prefs.link_color; + html_lb->visited_color = prefs.visited_color; + + return html_lb; +} + +/* + * Free the memory used by the linkblock + */ +static void Html_lb_free(void *lb) +{ + int i, j, k; + DilloHtmlForm *form; + DilloHtmlInput *input_j; + DilloHtmlLB *html_lb = (DilloHtmlLB*)lb; + + DEBUG_MSG(3, "Html_lb_free\n"); + + delete html_lb->linkReceiver; + a_Url_free(html_lb->base_url); + + for (i = 0; i < html_lb->forms->size(); i++) { + form = html_lb->forms->getRef(i); + a_Url_free(form->action); + for (j = 0; j < form->inputs->size(); j++) { + input_j = form->inputs->getRef(j); + dFree(input_j->name); + dFree(input_j->init_str); + + if (input_j->type == DILLO_HTML_INPUT_SELECT || + input_j->type == DILLO_HTML_INPUT_SEL_LIST) { + for (k = 0; k < input_j->select->num_options; k++) { + dFree(input_j->select->options[k].value); + } + dFree(input_j->select->options); + dFree(input_j->select); + } + } + delete(form->inputs); + delete(form->form_receiver); + } + delete(html_lb->forms); + + for (i = 0; i < html_lb->links->size(); i++) + if (html_lb->links->get(i)) + a_Url_free(html_lb->links->get(i)); + delete (html_lb->links); + + //a_Dw_image_map_list_free(&html_lb->maps); + + dFree(html_lb); +} + + +/* + * Set the URL data for image maps. + */ +//static void Html_set_link_coordinates(DilloHtmlLB *lb, +// int link, int x, int y) +//{ +// char data[64]; +// +// if (x != -1) { +// snprintf(data, 64, "?%d,%d", x, y); +// a_Url_set_ismap_coords(lb->links->get(link), data); +// } +//} + +///* +// * Handle the status function generated by the dw scroller, +// * and show the url in the browser status-bar. +// */ +//static void Html_handle_status(Widget *widget, int link, int x, int y, +// DilloHtmlLB *lb) +//{ +// DilloUrl *url; +// +// url = (link == -1) ? NULL : lb->links->get(link); +// if (url) { +// Html_set_link_coordinates(lb, link, x, y); +// a_UIcmd_set_msg(lb->bw, "%s", +// URL_ALT_(url) ? URL_ALT_(url) : URL_STR_(url)); +// lb->bw->status_is_link = 1; +// +// } else { +// if (lb->bw->status_is_link) +// a_UIcmd_set_msg(lb->bw, ""); +// } +//} + +///* +// * Activate a link ("link_clicked" callback of the page) +// */ +//static bool_t Html_link_clicked(Widget *widget, int link, int x, int y, +// EventButton *event, DilloHtmlLB *lb) +//{ +// Html_set_link_coordinates(lb, link, x, y); +// if (event->button == 1) +// a_Nav_push(lb->bw, lb->links->get(link)); +// else if (event->button == 2) { +// a_Nav_push_nw(lb->bw, lb->links->get(link)); +// } else { +// return FALSE; +// } +// +// if (widget->instanceOf (Textblock::CLASS_ID)) +// ((Textblock*)widget)->changeLinkColor (link, lb->visited_color); +// +// return TRUE; +//} + +/* + * Popup the image menu ("button_press_event" callback of image) + */ +static bool_t Html_image_menu(Widget *widget, + EventButton *event, + BrowserWindow *bw) +{ +// Image *image = (Image*)widget; +//:AL +// if (event->button == 3 && image->url) { +// a_Menu_popup_set_url(bw, image->url); +// a_Menu_popup_clear_url2(bw->menu_popup.over_image); +// +// gtk_menu_popup(GTK_MENU(bw->menu_popup.over_image), NULL, NULL, +// NULL, NULL, event->button, ((DwMouseEvent*)event)->time); +// return TRUE; +// } + + return FALSE; +} + +/* + * Connect all signals of a textblock or an image. + */ +static void Html_connect_signals(DilloHtml *html, Widget *widget) +{ + widget->connectLink (html->linkblock->linkReceiver); +} + + +/* + * Create a new link in the linkblock, set it as the url's parent + * and return the index. + */ +static int Html_set_new_link(DilloHtml *html, DilloUrl **url) +{ + int nl = html->linkblock->links->size(); + html->linkblock->links->increase(); + html->linkblock->links->set(nl, (*url) ? *url : NULL); + return nl; +} + + +/* + * Allocate and insert form information into the Html linkblock + */ +static int Html_form_new(DilloHtmlLB *html_lb, + DilloHtmlMethod method, + const DilloUrl *action, + DilloHtmlEnc enc) +{ + DilloHtmlForm *form; + + html_lb->forms->increase(); + form = html_lb->forms->getRef (html_lb->forms->size() - 1); + form->method = method; + form->action = a_Url_dup(action); + form->enc = enc; + form->inputs = new misc::SimpleVector (4); + form->num_entry_fields = 0; + form->num_submit_buttons = 0; + form->form_receiver = new form::Form(html_lb); + + _MSG("Html_form_new: action=%s nform=%d\n", action, nf); + return html_lb->forms->size(); +} + + +/* + * Change one toplevel attribute. var should be an identifier. val is + * only evaluated once, so you can safely use a function call for it. + */ +#define HTML_SET_TOP_ATTR(html, var, val) \ + do { \ + StyleAttrs style_attrs; \ + Style *old_style; \ + \ + old_style = S_TOP(html)->style; \ + style_attrs = *old_style; \ + style_attrs.var = (val); \ + S_TOP(html)->style = \ + Style::create (HT2LT(html), &style_attrs); \ + old_style->unref (); \ + } while (FALSE) + + + +/* + * Set the font at the top of the stack. BImask specifies which + * attributes in BI should be changed. + */ +static void Html_set_top_font(DilloHtml *html, char *name, int size, + int BI, int BImask) +{ + FontAttrs font_attrs; + + font_attrs = *S_TOP(html)->style->font; + if (name) + font_attrs.name = name; + if (size) + font_attrs.size = size; + if (BImask & 1) + font_attrs.weight = (BI & 1) ? 700 : 400; + if (BImask & 2) + font_attrs.style = (BI & 2) ? + (prefs.use_oblique ? + FONT_STYLE_OBLIQUE : FONT_STYLE_ITALIC) : + FONT_STYLE_NORMAL; + + HTML_SET_TOP_ATTR (html, font, + Font::create (HT2LT(html), &font_attrs)); +} + +/* + * Evaluates the ALIGN attribute (left|center|right|justify) and + * sets the style at the top of the stack. + */ +static void Html_tag_set_align_attr(DilloHtml *html, char *tag, int tagsize) +{ + const char *align, *charattr; + + if ((align = Html_get_attr(html, tag, tagsize, "align"))) { + if (dStrcasecmp (align, "left") == 0) + HTML_SET_TOP_ATTR (html, textAlign, TEXT_ALIGN_LEFT); + else if (dStrcasecmp (align, "right") == 0) + HTML_SET_TOP_ATTR (html, textAlign, TEXT_ALIGN_RIGHT); + else if (dStrcasecmp (align, "center") == 0) + HTML_SET_TOP_ATTR (html, textAlign, TEXT_ALIGN_CENTER); + else if (dStrcasecmp (align, "justify") == 0) + HTML_SET_TOP_ATTR (html, textAlign, TEXT_ALIGN_JUSTIFY); + else if (dStrcasecmp (align, "char") == 0) { + /* todo: Actually not supported for

etc. */ + HTML_SET_TOP_ATTR (html, textAlign, TEXT_ALIGN_STRING); + if ((charattr = Html_get_attr(html, tag, tagsize, "char"))) { + if (charattr[0] == 0) + /* todo: ALIGN=" ", and even ALIGN="&32;" will reult in + * an empty string (don't know whether the latter is + * correct, has to be clarified with the specs), so + * that for empty strings, " " is assumed. */ + HTML_SET_TOP_ATTR (html, textAlignChar, ' '); + else + HTML_SET_TOP_ATTR (html, textAlignChar, charattr[0]); + } else + /* todo: Examine LANG attr of . */ + HTML_SET_TOP_ATTR (html, textAlignChar, '.'); + } + } +} + +/* + * Evaluates the VALIGN attribute (top|bottom|middle|baseline) and + * sets the style in style_attrs. Returns TRUE when set. + */ +static bool_t Html_tag_set_valign_attr(DilloHtml *html, char *tag, + int tagsize, StyleAttrs *style_attrs) +{ + const char *attr; + + if ((attr = Html_get_attr(html, tag, tagsize, "valign"))) { + if (dStrcasecmp (attr, "top") == 0) + style_attrs->valign = VALIGN_TOP; + else if (dStrcasecmp (attr, "bottom") == 0) + style_attrs->valign = VALIGN_BOTTOM; + else if (dStrcasecmp (attr, "baseline") == 0) + style_attrs->valign = VALIGN_BASELINE; + else + style_attrs->valign = VALIGN_MIDDLE; + return TRUE; + } else + return FALSE; +} + + +/* + * Add a new DwPage into the current DwPage, for indentation. + * left and right are the horizontal indentation amounts, space is the + * vertical space around the block. + */ +static void Html_add_indented_widget(DilloHtml *html, Widget *textblock, + int left, int right, int space) +{ + StyleAttrs style_attrs; + Style *style; + + style_attrs = *S_TOP(html)->style; + + style_attrs.margin.setVal (0); + style_attrs.borderWidth.setVal (0); + style_attrs.padding.setVal(0); + + /* Activate this for debugging */ +#if 0 + style_attrs.borderWidth.setVal (1); + style_attrs.setBorderColor ( + Color::createShaded (HT2LT(html), style_attrs.color->getColor()); + style_attrs.setBorderStyle (BORDER_DASHED); +#endif + + style_attrs.margin.left = left; + style_attrs.margin.right = right; + style = Style::create (HT2LT(html), &style_attrs); + + DW2TB(html->dw)->addParbreak (space, style); + DW2TB(html->dw)->addWidget (textblock, style); + DW2TB(html->dw)->addParbreak (space, style); + S_TOP(html)->textblock = html->dw = textblock; + S_TOP(html)->hand_over_break = TRUE; + style->unref (); + + /* Handle it when the user clicks on a link */ + Html_connect_signals(html, textblock); +} + +/* + * Create and add a new indented DwPage to the current DwPage + */ +static void Html_add_indented(DilloHtml *html, int left, int right, int space) +{ + Textblock *textblock = new Textblock (false); + Html_add_indented_widget (html, textblock, left, right, space); +} + +/* + * Given a font_size, this will return the correct 'level'. + * (or the closest, if the exact level isn't found). + */ +static int Html_fontsize_to_level(int fontsize) +{ + int i, level; + double normalized_size = fontsize / prefs.font_factor, + approximation = FontSizes[FontSizesNum-1] + 1; + + for (i = level = 0; i < FontSizesNum; i++) + if (approximation >= fabs(normalized_size - FontSizes[i])) { + approximation = fabs(normalized_size - FontSizes[i]); + level = i; + } else { + break; + } + + return level; +} + +/* + * Given a level of a font, this will return the correct 'size'. + */ +static int Html_level_to_fontsize(int level) +{ + level = MAX(0, level); + level = MIN(FontSizesNum - 1, level); + + return (int)rint(FontSizes[level]*prefs.font_factor); +} + +/* + * Miscelaneous initializations for a DwPage + */ +static void Html_set_dwpage(DilloHtml *html) +{ + Widget *widget; + Textblock *textblock; + StyleAttrs style_attrs; + FontAttrs font_attrs; + + dReturn_if_fail (html->dw == NULL); + + widget = textblock = new Textblock (false); + html->dw = html->stack->getRef(0)->textblock = widget; + + /* Create a dummy font, attribute, and tag for the bottom of the stack. */ + font_attrs.name = prefs.vw_fontname; /* Helvetica */ + font_attrs.size = Html_level_to_fontsize(FontSizesBase); + font_attrs.weight = 400; + font_attrs.style = FONT_STYLE_NORMAL; + + style_attrs.initValues (); + style_attrs.font = Font::create (HT2LT(html), &font_attrs); + style_attrs.color = Color::createSimple (HT2LT(html), prefs.text_color); + html->stack->getRef(0)->style = Style::create (HT2LT(html), &style_attrs); + + html->stack->getRef(0)->table_cell_style = NULL; + + /* Handle it when the user clicks on a link */ + Html_connect_signals(html, widget); + + html->bw->num_page_bugs = 0; + dStr_truncate(html->bw->page_bugs, 0); + +// gtk_signal_connect_while_alive ( +// GTK_OBJECT(GTK_BIN(html->bw->render_main_scroll)->child), +// "button_press_event", GTK_SIGNAL_FUNC(Html_page_menu), +// html->bw, GTK_OBJECT (page)); +// +// /* Destroy the linkblock when the DwPage is destroyed */ +// gtk_signal_connect_object(GTK_OBJECT(page), "destroy", +// GTK_SIGNAL_FUNC(Html_lb_free), +// html->linkblock); +} + +/* + * Create and initialize a new DilloHtml structure + */ +static DilloHtml *Html_new(BrowserWindow *bw, const DilloUrl *url) +{ + DilloHtml *html; + + html = dNew(DilloHtml, 1); + + html->Start_Buf = NULL; + html->Start_Ofs = 0; + html->CurrTagOfs = 0; + html->OldTagOfs = 0; + html->OldTagLine = 1; + + html->DocType = DT_NONE; /* assume Tag Soup 0.0! :-) */ + html->DocTypeVersion = 0.0f; + + html->dw = NULL; + html->bw = bw; + html->linkblock = Html_lb_new(bw, url); + + html->stack = new misc::SimpleVector (16); + html->stack->increase(); + html->stack->getRef(0)->tag_name = dStrdup("none"); + html->stack->getRef(0)->style = NULL; + html->stack->getRef(0)->table_cell_style = NULL; + html->stack->getRef(0)->parse_mode = DILLO_HTML_PARSE_MODE_INIT; + html->stack->getRef(0)->table_mode = DILLO_HTML_TABLE_MODE_NONE; + html->stack->getRef(0)->cell_text_align_set = FALSE; + html->stack->getRef(0)->list_type = HTML_LIST_NONE; + html->stack->getRef(0)->list_number = 0; + html->stack->getRef(0)->tag_idx = -1; /* MUST not be used */ + html->stack->getRef(0)->textblock = NULL; + html->stack->getRef(0)->table = NULL; + html->stack->getRef(0)->ref_list_item = NULL; + html->stack->getRef(0)->current_bg_color = prefs.bg_color; + html->stack->getRef(0)->hand_over_break = FALSE; + + html->Stash = dStr_new(""); + html->StashSpace = FALSE; + + html->SPCBuf = NULL; + + html->pre_column = 0; + html->PreFirstChar = FALSE; + html->PrevWasCR = FALSE; + html->PrevWasOpenTag = FALSE; + html->SPCPending = FALSE; + html->InVisitedLink = FALSE; + html->ReqTagClose = FALSE; + html->CloseOneTag = FALSE; + html->TagSoup = TRUE; + html->NameVal = NULL; + + html->Num_HTML = html->Num_HEAD = html->Num_BODY = html->Num_TITLE = 0; + + html->InFlags = IN_NONE; + + html->attr_data = dStr_sized_new(1024); + + Html_set_dwpage(html); + + return html; +} + +/* + * Initialize the stash buffer + */ +static void Html_stash_init(DilloHtml *html) +{ + S_TOP(html)->parse_mode = DILLO_HTML_PARSE_MODE_STASH; + html->StashSpace = FALSE; + dStr_truncate(html->Stash, 0); +} + +/* Entities list from the HTML 4.01 DTD */ +typedef struct { + char *entity; + int isocode; +} Ent_t; + +#define NumEnt 252 +static const Ent_t Entities[NumEnt] = { + {"AElig",0306}, {"Aacute",0301}, {"Acirc",0302}, {"Agrave",0300}, + {"Alpha",01621},{"Aring",0305}, {"Atilde",0303}, {"Auml",0304}, + {"Beta",01622}, {"Ccedil",0307}, {"Chi",01647}, {"Dagger",020041}, + {"Delta",01624},{"ETH",0320}, {"Eacute",0311}, {"Ecirc",0312}, + {"Egrave",0310},{"Epsilon",01625},{"Eta",01627}, {"Euml",0313}, + {"Gamma",01623},{"Iacute",0315}, {"Icirc",0316}, {"Igrave",0314}, + {"Iota",01631}, {"Iuml",0317}, {"Kappa",01632}, {"Lambda",01633}, + {"Mu",01634}, {"Ntilde",0321}, {"Nu",01635}, {"OElig",0522}, + {"Oacute",0323},{"Ocirc",0324}, {"Ograve",0322}, {"Omega",01651}, + {"Omicron",01637},{"Oslash",0330},{"Otilde",0325},{"Ouml",0326}, + {"Phi",01646}, {"Pi",01640}, {"Prime",020063},{"Psi",01650}, + {"Rho",01641}, {"Scaron",0540}, {"Sigma",01643}, {"THORN",0336}, + {"Tau",01644}, {"Theta",01630}, {"Uacute",0332}, {"Ucirc",0333}, + {"Ugrave",0331},{"Upsilon",01645},{"Uuml",0334}, {"Xi",01636}, + {"Yacute",0335},{"Yuml",0570}, {"Zeta",01626}, {"aacute",0341}, + {"acirc",0342}, {"acute",0264}, {"aelig",0346}, {"agrave",0340}, + {"alefsym",020465},{"alpha",01661},{"amp",38}, {"and",021047}, + {"ang",021040}, {"aring",0345}, {"asymp",021110},{"atilde",0343}, + {"auml",0344}, {"bdquo",020036},{"beta",01662}, {"brvbar",0246}, + {"bull",020042},{"cap",021051}, {"ccedil",0347}, {"cedil",0270}, + {"cent",0242}, {"chi",01707}, {"circ",01306}, {"clubs",023143}, + {"cong",021105},{"copy",0251}, {"crarr",020665},{"cup",021052}, + {"curren",0244},{"dArr",020723}, {"dagger",020040},{"darr",020623}, + {"deg",0260}, {"delta",01664}, {"diams",023146},{"divide",0367}, + {"eacute",0351},{"ecirc",0352}, {"egrave",0350}, {"empty",021005}, + {"emsp",020003},{"ensp",020002}, {"epsilon",01665},{"equiv",021141}, + {"eta",01667}, {"eth",0360}, {"euml",0353}, {"euro",020254}, + {"exist",021003},{"fnof",0622}, {"forall",021000},{"frac12",0275}, + {"frac14",0274},{"frac34",0276}, {"frasl",020104},{"gamma",01663}, + {"ge",021145}, {"gt",62}, {"hArr",020724}, {"harr",020624}, + {"hearts",023145},{"hellip",020046},{"iacute",0355},{"icirc",0356}, + {"iexcl",0241}, {"igrave",0354}, {"image",020421},{"infin",021036}, + {"int",021053}, {"iota",01671}, {"iquest",0277}, {"isin",021010}, + {"iuml",0357}, {"kappa",01672}, {"lArr",020720}, {"lambda",01673}, + {"lang",021451},{"laquo",0253}, {"larr",020620}, {"lceil",021410}, + {"ldquo",020034},{"le",021144}, {"lfloor",021412},{"lowast",021027}, + {"loz",022712}, {"lrm",020016}, {"lsaquo",020071},{"lsquo",020030}, + {"lt",60}, {"macr",0257}, {"mdash",020024},{"micro",0265}, + {"middot",0267},{"minus",021022},{"mu",01674}, {"nabla",021007}, + {"nbsp",32}, {"ndash",020023},{"ne",021140}, {"ni",021013}, + {"not",0254}, {"notin",021011},{"nsub",021204}, {"ntilde",0361}, + {"nu",01675}, {"oacute",0363}, {"ocirc",0364}, {"oelig",0523}, + {"ograve",0362},{"oline",020076},{"omega",01711}, {"omicron",01677}, + {"oplus",021225},{"or",021050}, {"ordf",0252}, {"ordm",0272}, + {"oslash",0370},{"otilde",0365}, {"otimes",021227},{"ouml",0366}, + {"para",0266}, {"part",021002}, {"permil",020060},{"perp",021245}, + {"phi",01706}, {"pi",01700}, {"piv",01726}, {"plusmn",0261}, + {"pound",0243}, {"prime",020062},{"prod",021017}, {"prop",021035}, + {"psi",01710}, {"quot",34}, {"rArr",020722}, {"radic",021032}, + {"rang",021452},{"raquo",0273}, {"rarr",020622}, {"rceil",021411}, + {"rdquo",020035},{"real",020434},{"reg",0256}, {"rfloor",021413}, + {"rho",01701}, {"rlm",020017}, {"rsaquo",020072},{"rsquo",020031}, + {"sbquo",020032},{"scaron",0541},{"sdot",021305}, {"sect",0247}, + {"shy",0255}, {"sigma",01703}, {"sigmaf",01702},{"sim",021074}, + {"spades",023140},{"sub",021202},{"sube",021206}, {"sum",021021}, + {"sup",021203}, {"sup1",0271}, {"sup2",0262}, {"sup3",0263}, + {"supe",021207},{"szlig",0337}, {"tau",01704}, {"there4",021064}, + {"theta",01670},{"thetasym",01721},{"thinsp",020011},{"thorn",0376}, + {"tilde",01334},{"times",0327}, {"trade",020442},{"uArr",020721}, + {"uacute",0372},{"uarr",020621}, {"ucirc",0373}, {"ugrave",0371}, + {"uml",0250}, {"upsih",01722}, {"upsilon",01705},{"uuml",0374}, + {"weierp",020430},{"xi",01676}, {"yacute",0375}, {"yen",0245}, + {"yuml",0377}, {"zeta",01666}, {"zwj",020015}, {"zwnj",020014} +}; + + +/* + * Comparison function for binary search + */ +static int Html_entity_comp(const void *a, const void *b) +{ + return strcmp(((Ent_t *)a)->entity, ((Ent_t *)b)->entity); +} + +/* + * Binary search of 'key' in entity list + */ +static int Html_entity_search(char *key) +{ + Ent_t *res, EntKey; + + EntKey.entity = key; + res = (Ent_t*) bsearch(&EntKey, Entities, NumEnt, + sizeof(Ent_t), Html_entity_comp); + if (res) + return (res - Entities); + return -1; +} + +/* + * This is M$ non-standard "smart quotes" (w1252). Now even deprecated by them! + * + * SGML for HTML4.01 defines c >= 128 and c <= 159 as UNUSED. + * TODO: Probably I should remove this hack, and add a HTML warning. --Jcid + */ +static int Html_ms_stupid_quotes_2ucs(int isocode) +{ + int ret; + switch (isocode) { + case 145: + case 146: ret = '\''; break; + case 147: + case 148: ret = '"'; break; + case 149: ret = 176; break; + case 150: + case 151: ret = '-'; break; + default: ret = isocode; break; + } + return ret; +} + +/* + * Given an entity, return the UCS character code. + * Returns a negative value (error code) if not a valid entity. + * + * The first character *token is assumed to be == '&' + * + * For valid entities, *entsize is set to the length of the parsed entity. + */ +static int Html_parse_entity(DilloHtml *html, const char *token, + int toksize, int *entsize) +{ + int isocode, i; + char *tok, *s, c; + + token++; + tok = s = toksize ? dStrndup(token, (uint_t)toksize) : dStrdup(token); + + isocode = -1; + + if (*s == '#') { + /* numeric character reference */ + errno = 0; + if (*++s == 'x' || *s == 'X') { + if (isxdigit(*++s)) { + /* strtol with base 16 accepts leading "0x" - we don't */ + if (*s == '0' && s[1] == 'x') { + s++; + isocode = 0; + } else { + isocode = strtol(s, &s, 16); + } + } + } else if (isdigit(*s)) { + isocode = strtol(s, &s, 10); + } + + if (!isocode || errno || isocode > 0xffff) { + /* this catches null bytes, errors and codes >= 0xFFFF */ + MSG_HTML("numeric character reference out of range\n"); + isocode = -2; + } + + if (isocode != -1) { + if (*s == ';') + s++; + else if (prefs.show_extra_warnings) + MSG_HTML("numeric character reference without trailing ';'\n"); + } + + } else if (isalpha(*s)) { + /* character entity reference */ + while (isalnum(*++s) || strchr(":_.-", *s)); + c = *s; + *s = 0; + + if ((i = Html_entity_search(tok)) == -1) { + if ((html->DocType == DT_HTML && html->DocTypeVersion == 4.01f) || + html->DocType == DT_XHTML) + MSG_HTML("undefined character entity '%s'\n", tok); + isocode = -3; + } else + isocode = Entities[i].isocode; + + if (c == ';') + s++; + else if (prefs.show_extra_warnings) + MSG_HTML("character entity reference without trailing ';'\n"); + } + + *entsize = s-tok+1; + dFree(tok); + + if (isocode >= 145 && isocode <= 151) { + /* TODO: remove this hack. */ + isocode = Html_ms_stupid_quotes_2ucs(isocode); + } else if (isocode == -1 && prefs.show_extra_warnings) + MSG_HTML("literal '&'\n"); + + return isocode; +} + +/* + * Convert all the entities in a token to utf8 encoding. Takes + * a token and its length, and returns a newly allocated string. + */ +static char * + Html_parse_entities(DilloHtml *html, char *token, int toksize) +{ + char *esc_set = "&\xE2\xC2"; + char *new_str, buf[4]; + int i, j, k, n, isocode, entsize; + + new_str = dStrndup(token, toksize); + if (new_str[strcspn(new_str, esc_set)] == 0) + return new_str; + + for (i = j = 0; i < toksize; i++) { + if (token[i] == '&' && + (isocode = Html_parse_entity(html, token+i, + toksize-i, &entsize)) >= 0) { + if (isocode >= 128) { + /* multibyte encoding */ + n = utf8encode(isocode, buf); + for (k = 0; k < n; ++k) + new_str[j++] = buf[k]; + } else { + new_str[j++] = (char) isocode; + } + i += entsize-1; + } else { + new_str[j++] = token[i]; + } + } + new_str[j] = '\0'; + return new_str; +} + +/* + * Parse spaces + */ +static void Html_process_space(DilloHtml *html, char *space, int spacesize) +{ + int i, offset; + DilloHtmlParseMode parse_mode = S_TOP(html)->parse_mode; + + if (parse_mode == DILLO_HTML_PARSE_MODE_STASH) { + html->StashSpace = (html->Stash->len > 0); + html->SPCPending = FALSE; + + } else if (parse_mode == DILLO_HTML_PARSE_MODE_VERBATIM) { + char *Pword = dStrndup(space, spacesize); + dStr_append(html->Stash, Pword); + dFree(Pword); + html->SPCPending = FALSE; + + } else if (parse_mode == DILLO_HTML_PARSE_MODE_PRE) { + /* re-scan the string for characters that cause line breaks */ + for (i = 0; i < spacesize; i++) { + /* Support for "\r", "\n" and "\r\n" line breaks (skips the first) */ + if (!html->PreFirstChar && + (space[i] == '\r' || (space[i] == '\n' && !html->PrevWasCR))) { + DW2TB(html->dw)->addLinebreak (S_TOP(html)->style); + html->pre_column = 0; + } + html->PreFirstChar = FALSE; + + /* cr and lf should not be rendered -- they appear as a break */ + switch (space[i]) { + case '\r': + case '\n': + break; + case '\t': + if (prefs.show_extra_warnings) + MSG_HTML("TAB character inside

\n");
+            offset = TAB_SIZE - html->pre_column % TAB_SIZE;
+            DW2TB(html->dw)->addText (dStrnfill(offset, ' '),
+                                      S_TOP(html)->style);
+            html->pre_column += offset;
+            break;
+         default:
+            DW2TB(html->dw)->addText (dStrndup(space + i, 1),
+                                      S_TOP(html)->style);
+            html->pre_column++;
+            break;
+         }
+
+         html->PrevWasCR = (space[i] == '\r');
+      }
+      html->SPCPending = FALSE;
+
+   } else {
+      if (SGML_SPCDEL && html->PrevWasOpenTag) {
+         /* SGML_SPCDEL ignores white space inmediately after an open tag */
+         html->SPCPending = FALSE;
+      } else {
+         dFree(html->SPCBuf);
+         html->SPCBuf = dStrndup(space, spacesize);
+         html->SPCPending = TRUE;
+      }
+
+      if (parse_mode == DILLO_HTML_PARSE_MODE_STASH_AND_BODY)
+         html->StashSpace = (html->Stash->len > 0);
+   }
+}
+
+/*
+ * Handles putting the word into its proper place
+ *  > STASH and VERBATIM --> html->Stash
+ *  > otherwise it goes through addText()
+ *
+ * Entities are parsed (or not) according to parse_mode.
+ */
+static void Html_process_word(DilloHtml *html, char *word, int size)
+{
+   int i, j, start;
+   char *Pword;
+   DilloHtmlParseMode parse_mode = S_TOP(html)->parse_mode;
+
+   if (parse_mode == DILLO_HTML_PARSE_MODE_STASH ||
+       parse_mode == DILLO_HTML_PARSE_MODE_STASH_AND_BODY) {
+      if (html->StashSpace) {
+         dStr_append_c(html->Stash, ' ');
+         html->StashSpace = FALSE;
+      }
+      Pword = Html_parse_entities(html, word, size);
+      dStr_append(html->Stash, Pword);
+      dFree(Pword);
+
+   } else if (parse_mode == DILLO_HTML_PARSE_MODE_VERBATIM) {
+      /* word goes in untouched, it is not processed here. */
+      Pword = dStrndup(word, size);
+      dStr_append(html->Stash, Pword);
+      dFree(Pword);
+   }
+
+   if (parse_mode == DILLO_HTML_PARSE_MODE_STASH  ||
+       parse_mode == DILLO_HTML_PARSE_MODE_VERBATIM) {
+      /* skip until the closing instructions */
+
+   } else if (parse_mode == DILLO_HTML_PARSE_MODE_PRE) {
+      /* all this overhead is to catch white-space entities */
+      Pword = Html_parse_entities(html, word, size);
+      for (start = i = 0; Pword[i]; start = i)
+         if (isspace(Pword[i])) {
+            while (Pword[++i] && isspace(Pword[i]));
+            Html_process_space(html, Pword + start, i - start);
+         } else {
+            while (Pword[++i] && !isspace(Pword[i]));
+            DW2TB(html->dw)->addText(
+                               dStrndup(Pword + start, i - start),
+                               S_TOP(html)->style);
+            html->pre_column += i - start;
+            html->PreFirstChar = FALSE;
+         }
+      dFree(Pword);
+
+   } else {
+      /* add pending space if present */
+      if (html->SPCPending && (!SGML_SPCDEL || !html->PrevWasOpenTag))
+         /* SGML_SPCDEL ignores space after an open tag */
+         DW2TB(html->dw)->addSpace (S_TOP(html)->style);
+
+      /* Collapse white-space entities inside the word (except  ) */
+      Pword = Html_parse_entities(html, word, size);
+      for (i = 0; Pword[i]; ++i)
+         if (strchr("\t\f\n\r", Pword[i]))
+            for (j = i; (Pword[j] = Pword[j+1]); ++j);
+
+      DW2TB(html->dw)->addText(Pword, S_TOP(html)->style);
+   }
+
+   html->PrevWasOpenTag = FALSE;
+   html->SPCPending = FALSE;
+}
+
+/*
+ * Does the tag in tagstr (e.g. "p") match the tag in the tag, tagsize
+ * structure, with the initial < skipped over (e.g. "P align=center>")
+ */
+static bool_t Html_match_tag(const char *tagstr, char *tag, int tagsize)
+{
+   int i;
+
+   for (i = 0; i < tagsize && tagstr[i] != '\0'; i++) {
+      if (tolower(tagstr[i]) != tolower(tag[i]))
+         return FALSE;
+   }
+   /* The test for '/' is for xml compatibility: "empty/>" will be matched. */
+   if (i < tagsize && (isspace(tag[i]) || tag[i] == '>' || tag[i] == '/'))
+      return TRUE;
+   return FALSE;
+}
+
+/*
+ * This function is called after popping the stack, to
+ * handle nested DwPage widgets.
+ */
+static void Html_eventually_pop_dw(DilloHtml *html, bool_t hand_over_break)
+{
+   if (html->dw != S_TOP(html)->textblock) {
+      if (hand_over_break)
+         DW2TB(html->dw)->handOverBreak (S_TOP(html)->style);
+      DW2TB(html->dw)->flush ();
+      html->dw = S_TOP(html)->textblock;
+   }
+}
+
+/*
+ * Push the tag (copying attributes from the top of the stack)
+ */
+static void Html_push_tag(DilloHtml *html, int tag_idx)
+{
+   char *tagstr;
+   int n_items;
+
+   /* Save the element's name (no parameters) into tagstr. */
+   tagstr = dStrdup(Tags[tag_idx].name);
+
+   n_items = html->stack->size ();
+   html->stack->increase ();
+   /* We'll copy the former stack item and just change the tag and its index
+    * instead of copying all fields except for tag.  --Jcid */
+   *html->stack->getRef(n_items) = *html->stack->getRef(n_items - 1);
+   html->stack->getRef(n_items)->tag_name = tagstr;
+   html->stack->getRef(n_items)->tag_idx = tag_idx;
+   /* proper memory management, may be unref'd later */
+   (S_TOP(html)->style)->ref ();
+   if (S_TOP(html)->table_cell_style)
+      (S_TOP(html)->table_cell_style)->ref ();
+   html->dw = S_TOP(html)->textblock;
+}
+
+/*
+ * Push the tag (used to force en element with optional open into the stack)
+ * Note: now it's the same as Html_push_tag(), but things may change...
+ */
+static void Html_force_push_tag(DilloHtml *html, int tag_idx)
+{
+   Html_push_tag(html, tag_idx);
+}
+
+/*
+ * Pop the top tag in the stack
+ */
+static void Html_real_pop_tag(DilloHtml *html)
+{
+   bool_t hand_over_break;
+
+   (S_TOP(html)->style)->unref ();
+   if (S_TOP(html)->table_cell_style)
+      (S_TOP(html)->table_cell_style)->unref ();
+   dFree(S_TOP(html)->tag_name);
+   hand_over_break = S_TOP(html)->hand_over_break;
+   html->stack->setSize (html->stack->size() - 1);
+   Html_eventually_pop_dw(html, hand_over_break);
+}
+
+/*
+ * Default close function for tags.
+ * (conditional cleanup of the stack)
+ * There're several ways of doing it. Considering the HTML 4.01 spec
+ * which defines optional close tags, and the will to deliver useful diagnose
+ * messages for bad-formed HTML, it'll go as follows:
+ *   1.- Search the stack for the first tag that requires a close tag.
+ *   2.- If it matches, clean all the optional-close tags in between.
+ *   3.- Cleanup the matching tag. (on error, give a warning message)
+ *
+ * If 'w3c_mode' is NOT enabled:
+ *   1.- Search the stack for a matching tag based on tag level.
+ *   2.- If it exists, clean all the tags in between.
+ *   3.- Cleanup the matching tag. (on error, give a warning message)
+ */
+static void Html_tag_cleanup_at_close(DilloHtml *html, int TagIdx)
+{
+   int w3c_mode = !prefs.w3c_plus_heuristics;
+   int stack_idx, cmp = 1;
+   int new_idx = TagIdx;
+
+   if (html->CloseOneTag) {
+      Html_real_pop_tag(html);
+      html->CloseOneTag = FALSE;
+      return;
+   }
+
+   /* Look for the candidate tag to close */
+   stack_idx = html->stack->size() - 1;
+   while (stack_idx &&
+          (cmp = (new_idx != html->stack->getRef(stack_idx)->tag_idx)) &&
+          ((w3c_mode &&
+            Tags[html->stack->getRef(stack_idx)->tag_idx].EndTag == 'O') ||
+           (!w3c_mode &&
+            Tags[html->stack->getRef(stack_idx)->tag_idx].TagLevel <
+            Tags[new_idx].TagLevel))) {
+      --stack_idx;
+   }
+
+   /* clean, up to the matching tag */
+   if (cmp == 0 && stack_idx > 0) {
+      /* There's a valid matching tag in the stack */
+      while (html->stack->size() > stack_idx) {
+         int toptag_idx = S_TOP(html)->tag_idx;
+         /* Warn when we decide to close an open tag (for !w3c_mode) */
+         if (html->stack->size() > stack_idx + 1 &&
+             Tags[toptag_idx].EndTag != 'O')
+            MSG_HTML("  - forcing close of open tag: <%s>\n",
+                     Tags[toptag_idx].name);
+
+         /* Close this and only this tag */
+         html->CloseOneTag = TRUE;
+         Tags[toptag_idx].close (html, toptag_idx);
+      }
+
+   } else {
+      MSG_HTML("unexpected closing tag: . -- expected \n",
+               Tags[new_idx].name, html->stack->getRef(stack_idx)->tag_name);
+   }
+}
+
+/*
+ * Cleanup (conditional), and Pop the tag (if it matches)
+ */
+static void Html_pop_tag(DilloHtml *html, int TagIdx)
+{
+   Html_tag_cleanup_at_close(html, TagIdx);
+}
+
+/*
+ * Some parsing routines.
+ */
+
+/*
+ * Used by Html_parse_length
+ */
+static Length Html_parse_length_or_multi_length (const char *attr,
+                                                 char **endptr)
+{
+   Length l;
+   double v;
+   char *end;
+
+   v = strtod (attr, &end);
+   switch (*end) {
+   case '%':
+      end++;
+      l = createPerLength (v / 100);
+      break;
+
+   case '*':
+      end++;
+      l = createRelLength (v);
+      break;
+/*
+   The "px" suffix seems not allowed by HTML4.01 SPEC.
+   case 'p':
+      if (end[1] == 'x')
+         end += 2;
+*/
+   default:
+      l = createAbsLength ((int)v);
+      break;
+   }
+
+   if (endptr)
+      *endptr = end;
+   return l;
+}
+
+
+/*
+ * Returns a length or a percentage, or UNDEF_LENGTH in case
+ * of an error, or if attr is NULL.
+ */
+static Length Html_parse_length (DilloHtml *html, const char *attr)
+{
+   Length l;
+   char *end;
+
+   l = Html_parse_length_or_multi_length (attr, &end);
+   if (isRelLength (l))
+      /* not allowed as &Length; */
+      return LENGTH_AUTO;
+   else {
+      /* allow only whitespaces */
+      if (*end && !isspace (*end)) {
+         MSG_HTML("Garbage after length: %s\n", attr);
+         return LENGTH_AUTO;
+      }
+   }
+
+   _MSG("Html_parse_length: \"%s\" %d\n", attr, absLengthVal(l));
+   return l;
+}
+
+/*
+ * Parse a color attribute.
+ * Return value: parsed color, or default_color (+ error msg) on error.
+ */
+static int32_t
+ Html_color_parse(DilloHtml *html, const char *subtag, int32_t default_color)
+{
+   int err = 1;
+   int32_t color = a_Color_parse(subtag, default_color, &err);
+
+   if (err) {
+      MSG_HTML("color is not in \"#RRGGBB\" format\n");
+   }
+   return color;
+}
+
+/*
+ * Check that 'val' is composed of characters inside [A-Za-z0-9:_.-]
+ * Note: ID can't have entities, but this check is enough (no '&').
+ * Return value: 1 if OK, 0 otherwise.
+ */
+static int
+ Html_check_name_val(DilloHtml *html, const char *val, const char *attrname)
+{
+   int i;
+
+   for (i = 0; val[i]; ++i)
+      if (!(isalnum(val[i]) || strchr(":_.-", val[i])))
+         break;
+
+   if (val[i] || !isalpha(val[0]))
+      MSG_HTML("'%s' value is not of the form "
+               "[A-Za-z][A-Za-z0-9:_.-]*\n", attrname);
+
+   return !(val[i]);
+}
+
+/*
+ * Handle DOCTYPE declaration
+ *
+ * Follows the convention that HTML 4.01
+ * doctypes which include a full w3c DTD url are treated as
+ * standards-compliant, but 4.01 without the url and HTML 4.0 and
+ * earlier are not. XHTML doctypes are always standards-compliant
+ * whether or not an url is present.
+ *
+ * Note: I'm not sure about this convention. The W3C validator
+ * recognizes the "HTML Level" with or without the URL. The convention
+ * comes from mozilla (see URLs below), but Dillo doesn't have the same
+ * rendering modes, so it may be better to chose another behaviour. --Jcid
+ * 
+ * http://www.mozilla.org/docs/web-developer/quirks/doctypes.html
+ * http://lists.auriga.wearlab.de/pipermail/dillo-dev/2004-October/002300.html
+ *
+ * This is not a full DOCTYPE parser, just enough for what Dillo uses.
+ */
+static void Html_parse_doctype(DilloHtml *html, char *tag, int tagsize)
+{
+   char *HTML_sig    = "DocType = DT_HTML;
+         html->DocTypeVersion = 4.01f;
+      } else if (!strncmp(p, XHTML1, strlen(XHTML1)) &&
+                 dStristr(p + strlen(XHTML1), XHTML1_url)) {
+         html->DocType = DT_XHTML;
+         html->DocTypeVersion = 1.0f;
+      } else if (!strncmp(p, XHTML11, strlen(XHTML11)) &&
+                 dStristr(p + strlen(XHTML11), XHTML11_url)) {
+         html->DocType = DT_XHTML;
+         html->DocTypeVersion = 1.1f;
+      } else if (!strncmp(p, HTML40, strlen(HTML40))) {
+         html->DocType = DT_HTML;
+         html->DocTypeVersion = 4.0f;
+      } else if (!strncmp(p, HTML32, strlen(HTML32))) {
+         html->DocType = DT_HTML;
+         html->DocTypeVersion = 3.2f;
+      } else if (!strncmp(p, HTML20, strlen(HTML20))) {
+         html->DocType = DT_HTML;
+         html->DocTypeVersion = 2.0f;
+      }
+   }
+
+   dFree(ntag);
+}
+
+/*
+ * Handle open HTML element
+ */
+static void Html_tag_open_html(DilloHtml *html, char *tag, int tagsize)
+{
+   if (!(html->InFlags & IN_HTML))
+      html->InFlags |= IN_HTML;
+   ++html->Num_HTML;
+
+   if (html->Num_HTML > 1) {
+      MSG_HTML("HTML element was already open\n");
+   }
+}
+
+/*
+ * Handle close HTML element
+ */
+static void Html_tag_close_html(DilloHtml *html, int TagIdx)
+{
+   /* todo: may add some checks here */
+   if (html->Num_HTML == 1) {
+      /* beware of pages with multiple HTML close tags... :-P */
+      html->InFlags &= ~IN_HTML;
+   }
+   Html_pop_tag(html, TagIdx);
+}
+
+/*
+ * Handle open HEAD element
+ */
+static void Html_tag_open_head(DilloHtml *html, char *tag, int tagsize)
+{
+   if (html->InFlags & IN_BODY) {
+      MSG_HTML("HEAD element must go before the BODY section\n");
+      html->ReqTagClose = TRUE;
+      return;
+   }
+
+   if (!(html->InFlags & IN_HEAD))
+      html->InFlags |= IN_HEAD;
+   ++html->Num_HEAD;
+
+   if (html->Num_HEAD > 1) {
+      MSG_HTML("HEAD element was already open\n");
+   }
+}
+
+/*
+ * Handle close HEAD element
+ * Note: as a side effect of Html_test_section() this function is called
+ *       twice when the head element is closed implicitly.
+ */
+static void Html_tag_close_head(DilloHtml *html, int TagIdx)
+{
+   if (html->InFlags & IN_HEAD) {
+      if (html->Num_TITLE == 0)
+         MSG_HTML("HEAD section lacks the TITLE element\n");
+   
+      html->InFlags &= ~IN_HEAD;
+   }
+   Html_pop_tag(html, TagIdx);
+}
+
+/*
+ * Handle open TITLE
+ * calls stash init, where the title string will be stored
+ */
+static void Html_tag_open_title(DilloHtml *html, char *tag, int tagsize)
+{
+   ++html->Num_TITLE;
+   Html_stash_init(html);
+}
+
+/*
+ * Handle close TITLE
+ * set page-title in the browser window and in the history.
+ */
+static void Html_tag_close_title(DilloHtml *html, int TagIdx)
+{
+   if (html->InFlags & IN_HEAD) {
+      /* title is only valid inside HEAD */
+      a_UIcmd_set_page_title(html->linkblock->bw, html->Stash->str);
+      a_History_set_title(NAV_TOP(html->linkblock->bw), html->Stash->str);
+   } else {
+      MSG_HTML("the TITLE element must be inside the HEAD section\n");
+   }
+   Html_pop_tag(html, TagIdx);
+}
+
+/*
+ * Handle open SCRIPT
+ * initializes stash, where the embedded code will be stored.
+ * MODE_VERBATIM is used because MODE_STASH catches entities.
+ */
+static void Html_tag_open_script(DilloHtml *html, char *tag, int tagsize)
+{
+   Html_stash_init(html);
+   S_TOP(html)->parse_mode = DILLO_HTML_PARSE_MODE_VERBATIM;
+}
+
+/*
+ * Handle close SCRIPT
+ */
+static void Html_tag_close_script(DilloHtml *html, int TagIdx)
+{
+   /* eventually the stash will be sent to an interpreter for parsing */
+   Html_pop_tag(html, TagIdx);
+}
+
+/*
+ * Handle open STYLE
+ * store the contents to the stash where (in the future) the style
+ * sheet interpreter can get it.
+ */
+static void Html_tag_open_style(DilloHtml *html, char *tag, int tagsize)
+{
+   Html_stash_init(html);
+   S_TOP(html)->parse_mode = DILLO_HTML_PARSE_MODE_VERBATIM;
+}
+
+/*
+ * Handle close STYLE
+ */
+static void Html_tag_close_style(DilloHtml *html, int TagIdx)
+{
+   /* eventually the stash will be sent to an interpreter for parsing */
+   Html_pop_tag(html, TagIdx);
+}
+
+/*
+ * 
+ */
+static void Html_tag_open_body(DilloHtml *html, char *tag, int tagsize)
+{
+   const char *attrbuf;
+   Textblock *textblock;
+   StyleAttrs style_attrs;
+   Style *style;
+   int32_t color;
+
+   if (!(html->InFlags & IN_BODY))
+      html->InFlags |= IN_BODY;
+   ++html->Num_BODY;
+
+   if (html->Num_BODY > 1) {
+      MSG_HTML("BODY element was already open\n");
+      return;
+   }
+   if (html->InFlags & IN_HEAD) {
+      /* if we're here, it's bad XHTML, no need to recover */
+      MSG_HTML("unclosed HEAD element\n");
+   }
+
+   textblock = DW2TB(html->dw);
+
+   if (!prefs.force_my_colors) {
+      if ((attrbuf = Html_get_attr(html, tag, tagsize, "bgcolor"))) {
+         color = Html_color_parse(html, attrbuf, prefs.bg_color);
+         if ((color == 0xffffff && !prefs.allow_white_bg) ||
+             prefs.force_my_colors)
+            color = prefs.bg_color;
+
+         style_attrs = *html->dw->getStyle ();
+         style_attrs.backgroundColor =
+            Color::createSimple (HT2LT(html), color);
+         style = Style::create (HT2LT(html), &style_attrs);
+         html->dw->setStyle (style);
+         style->unref ();
+         S_TOP(html)->current_bg_color = color;
+      }
+
+      if ((attrbuf = Html_get_attr(html, tag, tagsize, "text"))) {
+         color = Html_color_parse(html, attrbuf, prefs.text_color);
+         HTML_SET_TOP_ATTR (html, color,
+                            Color::createSimple (HT2LT(html),color));
+      }
+
+      if ((attrbuf = Html_get_attr(html, tag, tagsize, "link")))
+         html->linkblock->link_color = Html_color_parse(html, attrbuf,
+                                                        prefs.link_color);
+
+      if ((attrbuf = Html_get_attr(html, tag, tagsize, "vlink")))
+         html->linkblock->visited_color =
+            Html_color_parse(html, attrbuf, prefs.visited_color);
+
+      if (prefs.contrast_visited_color) {
+         /* get a color that has a "safe distance" from text, link and bg */
+         html->linkblock->visited_color =
+            a_Color_vc(html->linkblock->visited_color,
+                       S_TOP(html)->style->color->getColor(),
+                       html->linkblock->link_color,
+                       S_TOP(html)->current_bg_color);
+      }
+   }
+
+   S_TOP(html)->parse_mode = DILLO_HTML_PARSE_MODE_BODY;
+}
+
+/*
+ * BODY
+ */
+static void Html_tag_close_body(DilloHtml *html, int TagIdx)
+{
+   if (html->Num_BODY == 1) {
+      /* some tag soup pages use multiple BODY tags... */
+      html->InFlags &= ~IN_BODY;
+   }
+   Html_pop_tag(html, TagIdx);
+}
+
+/*
+ * 

+ * todo: what's the point between adding the parbreak before and + * after the push? + */ +static void Html_tag_open_p(DilloHtml *html, char *tag, int tagsize) +{ + DW2TB(html->dw)->addParbreak (9, S_TOP(html)->style); + Html_tag_set_align_attr (html, tag, tagsize); +} + +/* + * + */ +static void Html_tag_open_table(DilloHtml *html, char *tag, int tagsize) +{ +#ifdef USE_TABLES + Widget *table; + StyleAttrs style_attrs; + Style *tstyle, *old_style; + const char *attrbuf; + int32_t border = 0, cellspacing = 1, cellpadding = 2, bgcolor; +#endif + + DW2TB(html->dw)->addParbreak (0, S_TOP(html)->style); + +#ifdef USE_TABLES + if ((attrbuf = Html_get_attr(html, tag, tagsize, "border"))) + border = isdigit(attrbuf[0]) ? strtol (attrbuf, NULL, 10) : 1; + if ((attrbuf = Html_get_attr(html, tag, tagsize, "cellspacing"))) + cellspacing = strtol (attrbuf, NULL, 10); + if ((attrbuf = Html_get_attr(html, tag, tagsize, "cellpadding"))) + cellpadding = strtol (attrbuf, NULL, 10); + + /* The style for the table */ + style_attrs = *S_TOP(html)->style; + + /* When dillo was started with the --debug-rendering option, there + * is always a border around the table. */ + if (dillo_dbg_rendering) + style_attrs.borderWidth.setVal (MIN (border, 1)); + else + style_attrs.borderWidth.setVal (border); + + style_attrs.setBorderColor ( + Color::createShaded (HT2LT(html), + S_TOP(html)->current_bg_color)); + style_attrs.setBorderStyle (BORDER_OUTSET); + style_attrs.hBorderSpacing = cellspacing; + + if ((attrbuf = Html_get_attr(html, tag, tagsize, "width"))) + style_attrs.width = Html_parse_length (html, attrbuf); + + if ((attrbuf = Html_get_attr(html, tag, tagsize, "align"))) { + if (dStrcasecmp (attrbuf, "left") == 0) + style_attrs.textAlign = TEXT_ALIGN_LEFT; + else if (dStrcasecmp (attrbuf, "right") == 0) + style_attrs.textAlign = TEXT_ALIGN_RIGHT; + else if (dStrcasecmp (attrbuf, "center") == 0) + style_attrs.textAlign = TEXT_ALIGN_CENTER; + } + + if (!prefs.force_my_colors && + (attrbuf = Html_get_attr(html, tag, tagsize, "bgcolor"))) { + bgcolor = Html_color_parse(html, attrbuf, -1); + if (bgcolor != -1) { + if (bgcolor == 0xffffff && !prefs.allow_white_bg) + bgcolor = prefs.bg_color; + S_TOP(html)->current_bg_color = bgcolor; + style_attrs.backgroundColor = + Color::createSimple (HT2LT(html), bgcolor); + } + } + + tstyle = Style::create (HT2LT(html), &style_attrs); + + /* The style for the cells */ + style_attrs = *S_TOP(html)->style; + /* When dillo was started with the --debug-rendering option, there + * is always a border around the cells. */ + if (dillo_dbg_rendering) + style_attrs.borderWidth.setVal (1); + else + style_attrs.borderWidth.setVal (border ? 1 : 0); + + style_attrs.padding.setVal(cellpadding); + style_attrs.setBorderColor (tstyle->borderColor.top); + style_attrs.setBorderStyle (BORDER_INSET); + + old_style = S_TOP(html)->table_cell_style; + S_TOP(html)->table_cell_style = + Style::create (HT2LT(html), &style_attrs); + if (old_style) + old_style->unref (); + + table = new Table(false); + DW2TB(html->dw)->addWidget (table, tstyle); + tstyle->unref (); + + S_TOP(html)->table_mode = DILLO_HTML_TABLE_MODE_TOP; + S_TOP(html)->cell_text_align_set = FALSE; + S_TOP(html)->table = table; +#endif +} + + +/* + * used by
and + */ +static void Html_tag_open_table_cell(DilloHtml *html, char *tag, int tagsize, + TextAlignType text_align) +{ +#ifdef USE_TABLES + Widget *col_tb; + int colspan = 1, rowspan = 1; + const char *attrbuf; + StyleAttrs style_attrs; + Style *style, *old_style; + int32_t bgcolor; + bool_t new_style; + + switch (S_TOP(html)->table_mode) { + case DILLO_HTML_TABLE_MODE_NONE: + MSG_HTML(" or outside \n"); + return; + + case DILLO_HTML_TABLE_MODE_TOP: + MSG_HTML("\n"); + /* a_Dw_table_add_cell takes care that dillo does not crash. */ + /* continues */ + case DILLO_HTML_TABLE_MODE_TR: + case DILLO_HTML_TABLE_MODE_TD: + /* todo: check errors? */ + if ((attrbuf = Html_get_attr(html, tag, tagsize, "colspan"))) + colspan = strtol (attrbuf, NULL, 10); + if ((attrbuf = Html_get_attr(html, tag, tagsize, "rowspan"))) + rowspan = strtol (attrbuf, NULL, 10); + + /* text style */ + old_style = S_TOP(html)->style; + style_attrs = *old_style; + if (!S_TOP(html)->cell_text_align_set) + style_attrs.textAlign = text_align; + if (Html_get_attr(html, tag, tagsize, "nowrap")) + style_attrs.whiteSpace = WHITE_SPACE_NOWRAP; + else + style_attrs.whiteSpace = WHITE_SPACE_NORMAL; + + S_TOP(html)->style = + Style::create (HT2LT(html), &style_attrs); + old_style->unref (); + Html_tag_set_align_attr (html, tag, tagsize); + + /* cell style */ + style_attrs = *S_TOP(html)->table_cell_style; + new_style = FALSE; + + if ((attrbuf = Html_get_attr(html, tag, tagsize, "width"))) { + style_attrs.width = Html_parse_length (html, attrbuf); + new_style = TRUE; + } + + if (Html_tag_set_valign_attr (html, tag, tagsize, &style_attrs)) + new_style = TRUE; + + if (!prefs.force_my_colors && + (attrbuf = Html_get_attr(html, tag, tagsize, "bgcolor"))) { + bgcolor = Html_color_parse(html, attrbuf, -1); + if (bgcolor != -1) { + if (bgcolor == 0xffffff && !prefs.allow_white_bg) + bgcolor = prefs.bg_color; + + new_style = TRUE; + style_attrs.backgroundColor = + Color::createSimple (HT2LT(html), bgcolor); + S_TOP(html)->current_bg_color = bgcolor; + } + } + + if (S_TOP(html)->style->textAlign + == TEXT_ALIGN_STRING) + col_tb = new TableCell ( + ((Table*)S_TOP(html)->table)->getCellRef (), + false); + else + col_tb = new Textblock (false); + + if (new_style) { + style = Style::create (HT2LT(html), &style_attrs); + col_tb->setStyle (style); + style->unref (); + } else + col_tb->setStyle (S_TOP(html)->table_cell_style); + + ((Table*)S_TOP(html)->table)->addCell ( + col_tb, colspan, rowspan); + S_TOP(html)->textblock = html->dw = col_tb; + + /* Handle it when the user clicks on a link */ + Html_connect_signals(html, col_tb); + break; + + default: + /* compiler happiness */ + break; + } + + S_TOP(html)->table_mode = DILLO_HTML_TABLE_MODE_TD; +#endif +} + + +/* + * + */ +static void Html_tag_open_tr(DilloHtml *html, char *tag, int tagsize) +{ + const char *attrbuf; + StyleAttrs style_attrs; + Style *style, *old_style; + int32_t bgcolor; + +#ifdef USE_TABLES + switch (S_TOP(html)->table_mode) { + case DILLO_HTML_TABLE_MODE_NONE: + _MSG("Invalid HTML syntax: outside
or outside
+ */ +static void Html_tag_open_td(DilloHtml *html, char *tag, int tagsize) +{ + Html_tag_open_table_cell (html, tag, tagsize, TEXT_ALIGN_LEFT); +} + + +/* + * + */ +static void Html_tag_open_th(DilloHtml *html, char *tag, int tagsize) +{ + Html_set_top_font(html, NULL, 0, 1, 1); + Html_tag_open_table_cell (html, tag, tagsize, TEXT_ALIGN_CENTER); +} + + +/* + *
\n"); + return; + + case DILLO_HTML_TABLE_MODE_TOP: + case DILLO_HTML_TABLE_MODE_TR: + case DILLO_HTML_TABLE_MODE_TD: + style = NULL; + + if (!prefs.force_my_colors && + (attrbuf = Html_get_attr(html, tag, tagsize, "bgcolor"))) { + bgcolor = Html_color_parse(html, attrbuf, -1); + if (bgcolor != -1) { + if (bgcolor == 0xffffff && !prefs.allow_white_bg) + bgcolor = prefs.bg_color; + + style_attrs = *S_TOP(html)->style; + style_attrs.backgroundColor = + Color::createSimple (HT2LT(html), bgcolor); + style = Style::create (HT2LT(html), &style_attrs); + S_TOP(html)->current_bg_color = bgcolor; + } + } + + ((Table*)S_TOP(html)->table)->addRow (style); + if (style) + style->unref (); + + if (Html_get_attr (html, tag, tagsize, "align")) { + S_TOP(html)->cell_text_align_set = TRUE; + Html_tag_set_align_attr (html, tag, tagsize); + } + + style_attrs = *S_TOP(html)->table_cell_style; + if (Html_tag_set_valign_attr (html, tag, tagsize, &style_attrs)) { + old_style = S_TOP(html)->table_cell_style; + S_TOP(html)->table_cell_style = + Style::create (HT2LT(html), &style_attrs); + old_style->unref (); + } else + + break; + + default: + break; + } + + S_TOP(html)->table_mode = DILLO_HTML_TABLE_MODE_TR; +#else + DW2TB(html->dw)->addParbreak (0, S_TOP(html)->style); +#endif +} + +/* + * ,