/* * File: html.cc * * Copyright (C) 2005-2007 Jorge Arellano Cid * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. */ /* * Dillo HTML parsing routines */ /*----------------------------------------------------------------------------- * Includes *---------------------------------------------------------------------------*/ #include /* for isspace and tolower */ #include /* for memcpy and memmove */ #include #include /* for sprintf */ #include /* for rint */ #include #include /* for utf8encode */ #include "bw.h" /* for BrowserWindow */ #include "msg.h" #include "binaryconst.h" #include "colors.h" #include "misc.h" #include "uicmd.hh" #include "history.h" #include "nav.h" #include "menu.hh" #include "prefs.h" #include "capi.h" #include "html.hh" #include "html_common.hh" #include "dw/textblock.hh" #include "dw/bullet.hh" #include "dw/table.hh" #include "dw/tablecell.hh" #include "dw/listitem.hh" #include "dw/image.hh" #include "dw/ruler.hh" /*----------------------------------------------------------------------------- * Defines *---------------------------------------------------------------------------*/ /* Undefine if you want to unroll tables. For instance for PDAs */ #define USE_TABLES /* Define to 1 to ignore white space immediately after an open tag, * and immediately before a close tag. */ #define SGML_SPCDEL 0 #define TAB_SIZE 8 #define dillo_dbg_rendering 0 /*----------------------------------------------------------------------------- * Name spaces *---------------------------------------------------------------------------*/ using namespace dw; using namespace dw::core; using namespace dw::core::ui; using namespace dw::core::style; /*----------------------------------------------------------------------------- * Typedefs *---------------------------------------------------------------------------*/ class DilloHtml; typedef void (*TagOpenFunct) (DilloHtml *Html, const char *Tag, int Tagsize); typedef void (*TagCloseFunct) (DilloHtml *Html, int TagIdx); typedef struct _DilloHtmlClass DilloHtmlClass; typedef enum { SEEK_ATTR_START, MATCH_ATTR_NAME, SEEK_TOKEN_START, SEEK_VALUE_START, SKIP_VALUE, GET_VALUE, FINISHED } DilloHtmlTagParsingState; typedef enum { HTML_LeftTrim = 1 << 0, HTML_RightTrim = 1 << 1, HTML_ParseEntities = 1 << 2 } DilloHtmlTagParsingFlags; /* * Exported function with C linkage. */ extern "C" { void *a_Html_text(const char *type, void *P, CA_Callback_t *Call,void **Data); } /*----------------------------------------------------------------------------- * Forward declarations *---------------------------------------------------------------------------*/ static const char *Html_get_attr2(DilloHtml *html, const char *tag, int tagsize, const char *attrname, int tag_parsing_flags); static void Html_add_widget(DilloHtml *html, Widget *widget, char *width_str, char *height_str, StyleAttrs *style_attrs); static int Html_write_raw(DilloHtml *html, char *buf, int bufsize, int Eof); static void Html_load_image(BrowserWindow *bw, DilloUrl *url, DilloImage *image); static void Html_callback(int Op, CacheClient_t *Client); static int Html_tag_index(const char *tag); static void Html_tag_cleanup_at_close(DilloHtml *html, int TagIdx); /*----------------------------------------------------------------------------- * Local Data *---------------------------------------------------------------------------*/ /* The following array of font sizes has to be _strictly_ crescent */ static const int FontSizes[] = {8, 10, 12, 14, 18, 24}; static const int FontSizesNum = 6; static const int FontSizesBase = 2; /* Parsing table structure */ typedef struct { const char *name; /* element name */ unsigned char Flags; /* flags (explained near the table data) */ char EndTag; /* Is it Required, Optional or Forbidden */ uchar_t TagLevel; /* Used to heuristically parse bad HTML */ TagOpenFunct open; /* Open function */ TagCloseFunct close; /* Close function */ } TagInfo; extern const TagInfo Tags[]; /*----------------------------------------------------------------------------- *----------------------------------------------------------------------------- * Main Code *----------------------------------------------------------------------------- *---------------------------------------------------------------------------*/ /* * Collect HTML error strings. */ void DilloHtml::bugMessage(const char *format, ... ) { va_list argp; dStr_sprintfa(bw->page_bugs, "HTML warning: line %d, ", getCurTagLineNumber()); va_start(argp, format); dStr_vsprintfa(bw->page_bugs, format, argp); va_end(argp); a_UIcmd_set_bug_prog(bw, ++bw->num_page_bugs); } /* * Wrapper for a_Url_new that adds an error detection message. * If use_base_url is TRUE, it uses base_url. Otherwise it uses html->base_url. */ DilloUrl *a_Html_url_new(DilloHtml *html, const char *url_str, const char *base_url, int flags, int32_t posx, int32_t posy, int use_base_url) { DilloUrl *url; int n_ic, n_ic_spc; url = a_Url_new( url_str, (use_base_url) ? base_url : URL_STR_(html->base_url), flags, posx, posy); if ((n_ic = URL_ILLEGAL_CHARS(url)) != 0) { const char *suffix = (n_ic) > 1 ? "s" : ""; n_ic_spc = URL_ILLEGAL_CHARS_SPC(url); if (n_ic == n_ic_spc) { BUG_MSG("URL has %d illegal character%s (%d space%s)\n", n_ic, suffix, n_ic_spc, suffix); } else if (n_ic_spc == 0) { BUG_MSG("URL has %d illegal character%s (%d in {00-1F, 7F} range)\n", n_ic, suffix, n_ic); } else { BUG_MSG("URL has %d illegal character%s: " "%d space%s, and %d in {00-1F, 7F} range\n", n_ic, suffix, n_ic_spc, n_ic_spc > 1 ? "s" : "", n_ic-n_ic_spc); } } return url; } /* * Set callback function and callback data for the "html/text" MIME type. */ void *a_Html_text(const char *Type, void *P, CA_Callback_t *Call, void **Data) { DilloWeb *web = (DilloWeb*)P; DilloHtml *html = new DilloHtml(web->bw, web->url, Type); *Data = (void*)html; *Call = (CA_Callback_t)Html_callback; return (void*)html->dw; } static void Html_free(void *data) { delete ((DilloHtml*)data); } /* * Used bye the "Load images" page menuitem. */ void a_Html_load_images(void *v_html, DilloUrl *pattern) { DilloHtml *html = (DilloHtml*)v_html; html->loadImages(pattern); } /* * Set the URL data for image maps. */ static void Html_set_link_coordinates(DilloHtml *html, int link, int x, int y) { char data[64]; if (x != -1) { snprintf(data, 64, "?%d,%d", x, y); a_Url_set_ismap_coords(html->links->get(link), data); } } /* * Create a new link, set it as the url's parent * and return the index. */ static int Html_set_new_link(DilloHtml *html, DilloUrl **url) { int nl = html->links->size(); html->links->increase(); html->links->set(nl, (*url) ? *url : NULL); return nl; } /* * Add a new image. */ static int Html_add_new_linkimage(DilloHtml *html, DilloUrl **url, DilloImage *image) { DilloLinkImage *li = dNew(DilloLinkImage, 1); li->url = *url; li->image = image; int ni = html->images->size(); html->images->increase(); html->images->set(ni, li); return ni; } /* * Set the font at the top of the stack. BImask specifies which * attributes in BI should be changed. */ static void Html_set_top_font(DilloHtml *html, const char *name, int size, int BI, int BImask) { FontAttrs font_attrs; font_attrs = *S_TOP(html)->style->font; if (name) font_attrs.name = name; if (size) font_attrs.size = size; if (BImask & 1) font_attrs.weight = (BI & 1) ? 700 : 400; if (BImask & 2) font_attrs.style = (BI & 2) ? FONT_STYLE_ITALIC : FONT_STYLE_NORMAL; HTML_SET_TOP_ATTR (html, font, Font::create (HT2LT(html), &font_attrs)); } /* * Evaluates the ALIGN attribute (left|center|right|justify) and * sets the style at the top of the stack. */ static void Html_tag_set_align_attr(DilloHtml *html, const char *tag, int tagsize) { const char *align, *charattr; if ((align = a_Html_get_attr(html, tag, tagsize, "align"))) { if (dStrcasecmp (align, "left") == 0) HTML_SET_TOP_ATTR (html, textAlign, TEXT_ALIGN_LEFT); else if (dStrcasecmp (align, "right") == 0) HTML_SET_TOP_ATTR (html, textAlign, TEXT_ALIGN_RIGHT); else if (dStrcasecmp (align, "center") == 0) HTML_SET_TOP_ATTR (html, textAlign, TEXT_ALIGN_CENTER); else if (dStrcasecmp (align, "justify") == 0) HTML_SET_TOP_ATTR (html, textAlign, TEXT_ALIGN_JUSTIFY); else if (dStrcasecmp (align, "char") == 0) { /* todo: Actually not supported for

etc. */ HTML_SET_TOP_ATTR (html, textAlign, TEXT_ALIGN_STRING); if ((charattr = a_Html_get_attr(html, tag, tagsize, "char"))) { if (charattr[0] == 0) /* todo: ALIGN=" ", and even ALIGN="&32;" will reult in * an empty string (don't know whether the latter is * correct, has to be clarified with the specs), so * that for empty strings, " " is assumed. */ HTML_SET_TOP_ATTR (html, textAlignChar, ' '); else HTML_SET_TOP_ATTR (html, textAlignChar, charattr[0]); } else /* todo: Examine LANG attr of . */ HTML_SET_TOP_ATTR (html, textAlignChar, '.'); } } } /* * Evaluates the VALIGN attribute (top|bottom|middle|baseline) and * sets the style in style_attrs. Returns TRUE when set. */ static bool_t Html_tag_set_valign_attr(DilloHtml *html, const char *tag, int tagsize, StyleAttrs *style_attrs) { const char *attr; if ((attr = a_Html_get_attr(html, tag, tagsize, "valign"))) { if (dStrcasecmp (attr, "top") == 0) style_attrs->valign = VALIGN_TOP; else if (dStrcasecmp (attr, "bottom") == 0) style_attrs->valign = VALIGN_BOTTOM; else if (dStrcasecmp (attr, "baseline") == 0) style_attrs->valign = VALIGN_BASELINE; else style_attrs->valign = VALIGN_MIDDLE; return TRUE; } else return FALSE; } /* * Add a new DwPage into the current DwPage, for indentation. * left and right are the horizontal indentation amounts, space is the * vertical space around the block. */ static void Html_add_indented_widget(DilloHtml *html, Widget *textblock, int left, int right, int space) { StyleAttrs style_attrs; Style *style; style_attrs = *S_TOP(html)->style; style_attrs.margin.setVal (0); style_attrs.borderWidth.setVal (0); style_attrs.padding.setVal(0); /* Activate this for debugging */ #if 0 style_attrs.borderWidth.setVal (1); style_attrs.setBorderColor ( Color::createShaded (HT2LT(html), style_attrs.color->getColor()); style_attrs.setBorderStyle (BORDER_DASHED); #endif style_attrs.margin.left = left; style_attrs.margin.right = right; style = Style::create (HT2LT(html), &style_attrs); DW2TB(html->dw)->addParbreak (space, style); DW2TB(html->dw)->addWidget (textblock, style); DW2TB(html->dw)->addParbreak (space, style); S_TOP(html)->textblock = html->dw = textblock; S_TOP(html)->hand_over_break = TRUE; style->unref (); /* Handle it when the user clicks on a link */ html->connectSignals(textblock); } /* * Create and add a new indented DwPage to the current DwPage */ static void Html_add_indented(DilloHtml *html, int left, int right, int space) { Textblock *textblock = new Textblock (prefs.limit_text_width); Html_add_indented_widget (html, textblock, left, right, space); } /* * Given a font_size, this will return the correct 'level'. * (or the closest, if the exact level isn't found). */ static int Html_fontsize_to_level(int fontsize) { int i, level; double normalized_size = fontsize / prefs.font_factor, approximation = FontSizes[FontSizesNum-1] + 1; for (i = level = 0; i < FontSizesNum; i++) if (approximation >= fabs(normalized_size - FontSizes[i])) { approximation = fabs(normalized_size - FontSizes[i]); level = i; } else { break; } return level; } /* * Given a level of a font, this will return the correct 'size'. */ static int Html_level_to_fontsize(int level) { level = MAX(0, level); level = MIN(FontSizesNum - 1, level); return (int)rint(FontSizes[level]*prefs.font_factor); } /* * Create and initialize a new DilloHtml class */ DilloHtml::DilloHtml(BrowserWindow *p_bw, const DilloUrl *url, const char *content_type) { /* Init event receiver */ linkReceiver.html = this; /* Init main variables */ bw = p_bw; page_url = a_Url_dup(url); base_url = a_Url_dup(url); dw = NULL; a_Bw_add_doc(p_bw, this); /* Init for-parsing variables */ Buf_Consumed = 0; Start_Buf = NULL; Start_Ofs = 0; MSG("DilloHtml(): content type: %s\n", content_type); this->content_type = dStrdup(content_type); /* get charset */ a_Misc_parse_content_type(content_type, NULL, NULL, &charset); stop_parser = false; CurrTagOfs = 0; OldTagOfs = 0; OldTagLine = 1; DocType = DT_NONE; /* assume Tag Soup 0.0! :-) */ DocTypeVersion = 0.0f; stack = new misc::SimpleVector (16); stack->increase(); stack->getRef(0)->style = NULL; stack->getRef(0)->table_cell_style = NULL; stack->getRef(0)->parse_mode = DILLO_HTML_PARSE_MODE_INIT; stack->getRef(0)->table_mode = DILLO_HTML_TABLE_MODE_NONE; stack->getRef(0)->cell_text_align_set = FALSE; stack->getRef(0)->list_type = HTML_LIST_NONE; stack->getRef(0)->list_number = 0; stack->getRef(0)->tag_idx = -1; /* MUST not be used */ stack->getRef(0)->textblock = NULL; stack->getRef(0)->table = NULL; stack->getRef(0)->ref_list_item = NULL; stack->getRef(0)->hand_over_break = FALSE; InFlags = IN_NONE; Stash = dStr_new(""); StashSpace = FALSE; pre_column = 0; PreFirstChar = FALSE; PrevWasCR = FALSE; PrevWasOpenTag = FALSE; PrevWasSPC = FALSE; SPCPending = FALSE; InVisitedLink = FALSE; ReqTagClose = FALSE; CloseOneTag = FALSE; TagSoup = TRUE; NameVal = NULL; Num_HTML = Num_HEAD = Num_BODY = Num_TITLE = 0; attr_data = dStr_sized_new(1024); parse_finished = FALSE; /* Init page-handling variables */ forms = new misc::SimpleVector (1); links = new misc::SimpleVector (64); images = new misc::SimpleVector (16); //a_Dw_image_map_list_init(&maps); link_color = prefs.link_color; visited_color = prefs.visited_color; /* Initialize the main widget */ initDw(); /* Hook destructor to the dw delete call */ dw->setDeleteCallback(Html_free, this); } /* * Miscelaneous initializations for Dw */ void DilloHtml::initDw() { StyleAttrs style_attrs; FontAttrs font_attrs; dReturn_if_fail (dw == NULL); /* Create the main widget */ dw = stack->getRef(0)->textblock = new Textblock (prefs.limit_text_width); /* Create a dummy font, attribute, and tag for the bottom of the stack. */ font_attrs.name = prefs.vw_fontname; font_attrs.size = Html_level_to_fontsize(FontSizesBase); font_attrs.weight = 400; font_attrs.style = FONT_STYLE_NORMAL; style_attrs.initValues (); style_attrs.font = Font::create (HT2LT(this), &font_attrs); style_attrs.color = Color::createSimple (HT2LT(this), prefs.text_color); style_attrs.backgroundColor = Color::createShaded (HT2LT(this), prefs.bg_color); stack->getRef(0)->style = Style::create (HT2LT(this), &style_attrs); stack->getRef(0)->table_cell_style = NULL; /* Handle it when the user clicks on a link */ connectSignals(dw); bw->num_page_bugs = 0; dStr_truncate(bw->page_bugs, 0); } /* * Free memory used by the DilloHtml class. */ DilloHtml::~DilloHtml() { _MSG("::~DilloHtml(this=%p)\n", this); if (!parse_finished) freeParseData(); a_Bw_remove_doc(bw, this); a_Url_free(page_url); a_Url_free(base_url); for (int i = 0; i < forms->size(); i++) a_Html_form_delete (forms->get(i)); delete(forms); for (int i = 0; i < links->size(); i++) if (links->get(i)) a_Url_free(links->get(i)); delete (links); for (int i = 0; i < images->size(); i++) { DilloLinkImage *li = images->get(i); a_Url_free(li->url); if (li->image) a_Image_unref(li->image); dFree(li); } delete (images); //a_Dw_image_map_list_free(&maps); } /* * Connect all signals of a textblock or an image. */ void DilloHtml::connectSignals(Widget *dw) { dw->connectLink (&linkReceiver); } /* * Process the newly arrived html and put it into the page structure. * (This function is called by Html_callback whenever there's new data) */ void DilloHtml::write(char *Buf, int BufSize, int Eof) { int token_start; char *buf = Buf + Start_Ofs; int bufsize = BufSize - Start_Ofs; dReturn_if_fail (dw != NULL); Start_Buf = Buf; token_start = Html_write_raw(this, buf, bufsize, Eof); Start_Ofs += token_start; if (bw) a_UIcmd_set_page_prog(bw, Start_Ofs, 1); } /* * Return the line number of the tag being processed by the parser. * Also update the offsets. */ int DilloHtml::getCurTagLineNumber() { int i, ofs, line; const char *p = Start_Buf; dReturn_val_if_fail(p != NULL, -1); ofs = CurrTagOfs; line = OldTagLine; for (i = OldTagOfs; i < ofs; ++i) if (p[i] == '\n') ++line; OldTagOfs = CurrTagOfs; OldTagLine = line; return line; } /* * Free parsing data. */ void DilloHtml::freeParseData() { (stack->getRef(0)->style)->unref (); /* template style */ delete(stack); dStr_free(Stash, TRUE); dStr_free(attr_data, TRUE); dFree(content_type); dFree(charset); } /* * Finish parsing a HTML page. Close the parser and close the client. * The class is not deleted here, it remains until the widget is destroyed. */ void DilloHtml::finishParsing(int ClientKey) { int si; /* force the close of elements left open (todo: not for XHTML) */ while ((si = stack->size() - 1)) { if (stack->getRef(si)->tag_idx != -1) { Html_tag_cleanup_at_close(this, stack->getRef(si)->tag_idx); } } /* Remove this client from our active list */ a_Bw_close_client(bw, ClientKey); /* Set progress bar insensitive */ a_UIcmd_set_page_prog(bw, 0, 0); freeParseData(); parse_finished = TRUE; } /* * Allocate and insert form information. */ int DilloHtml::formNew(DilloHtmlMethod method, const DilloUrl *action, DilloHtmlEnc enc, const char *charset) { DilloHtmlForm *form = a_Html_form_new (this,method,action,enc,charset); int nf = forms->size (); forms->increase (); forms->set (nf, form); _MSG("Html formNew: action=%s nform=%d\n", action, nf); return forms->size(); } /* * Get the current form. */ DilloHtmlForm *DilloHtml::getCurrentForm () { return forms->get (forms->size() - 1); } /* * Load images if they were disabled. */ void DilloHtml::loadImages (const DilloUrl *pattern) { for (int i = 0; i < images->size(); i++) { if (images->get(i)->image) { if ((!pattern) || (!a_Url_cmp(images->get(i)->url, pattern))) { Html_load_image(bw, images->get(i)->url, images->get(i)->image); images->get(i)->image = NULL; // web owns it now } } } } bool DilloHtml::HtmlLinkReceiver::enter (Widget *widget, int link, int img, int x, int y) { BrowserWindow *bw = html->bw; _MSG(" ** "); if (link == -1) { _MSG(" Link LEAVE notify...\n"); a_UIcmd_set_msg(bw, ""); } else { _MSG(" Link ENTER notify...\n"); Html_set_link_coordinates(html, link, x, y); a_UIcmd_set_msg(bw, "%s", URL_STR(html->links->get(link))); } return true; } /* * Handle the "press" signal. */ bool DilloHtml::HtmlLinkReceiver::press (Widget *widget, int link, int img, int x, int y, EventButton *event) { BrowserWindow *bw = html->bw; int ret = false; DilloUrl *linkurl = NULL; _MSG("pressed button %d\n", event->button); if (event->button == 3) { // popup menus if (img != -1) { // image menu if (link != -1) linkurl = html->links->get(link); a_UIcmd_image_popup(bw, html->images->get(img)->url, linkurl); ret = true; } else { if (link == -1) { a_UIcmd_page_popup(bw, a_History_get_url(NAV_TOP_UIDX(bw)), bw->num_page_bugs ? bw->page_bugs->str:NULL, prefs.load_images); ret = true; } else { a_UIcmd_link_popup(bw, html->links->get(link)); ret = true; } } } return ret; } /* * Handle the "click" signal. */ bool DilloHtml::HtmlLinkReceiver::click (Widget *widget, int link, int img, int x, int y, EventButton *event) { BrowserWindow *bw = html->bw; if ((img != -1) && (html->images->get(img)->image)) { // clicked an image that has not already been loaded DilloUrl *pattern; if (event->button == 1){ // load all instances of this image pattern = html->images->get(img)->url; } else { if (event->button == 2){ // load all images pattern = NULL; } else { return false; } } html->loadImages(pattern); return true; } if (link != -1) { DilloUrl *url = html->links->get(link); _MSG("clicked on URL %d: %s\n", link, a_Url_str (url)); Html_set_link_coordinates(html, link, x, y); if (event->button == 1) { a_Nav_push(bw, url); } else if (event->button == 2) { a_Nav_push_nw(bw, url); } else { return false; } /* Change the link color to "visited" as visual feedback */ for (Widget *w = widget; w; w = w->getParent()) { _MSG(" ->%s\n", w->getClassName()); if (w->instanceOf(dw::Textblock::CLASS_ID)) { ((Textblock*)w)->changeLinkColor (link, html->visited_color); break; } } } return true; } /* * Initialize the stash buffer */ void a_Html_stash_init(DilloHtml *html) { S_TOP(html)->parse_mode = DILLO_HTML_PARSE_MODE_STASH; html->StashSpace = FALSE; dStr_truncate(html->Stash, 0); } /* Entities list from the HTML 4.01 DTD */ typedef struct { const char *entity; int isocode; } Ent_t; #define NumEnt 252 static const Ent_t Entities[NumEnt] = { {"AElig",0306}, {"Aacute",0301}, {"Acirc",0302}, {"Agrave",0300}, {"Alpha",01621},{"Aring",0305}, {"Atilde",0303}, {"Auml",0304}, {"Beta",01622}, {"Ccedil",0307}, {"Chi",01647}, {"Dagger",020041}, {"Delta",01624},{"ETH",0320}, {"Eacute",0311}, {"Ecirc",0312}, {"Egrave",0310},{"Epsilon",01625},{"Eta",01627}, {"Euml",0313}, {"Gamma",01623},{"Iacute",0315}, {"Icirc",0316}, {"Igrave",0314}, {"Iota",01631}, {"Iuml",0317}, {"Kappa",01632}, {"Lambda",01633}, {"Mu",01634}, {"Ntilde",0321}, {"Nu",01635}, {"OElig",0522}, {"Oacute",0323},{"Ocirc",0324}, {"Ograve",0322}, {"Omega",01651}, {"Omicron",01637},{"Oslash",0330},{"Otilde",0325},{"Ouml",0326}, {"Phi",01646}, {"Pi",01640}, {"Prime",020063},{"Psi",01650}, {"Rho",01641}, {"Scaron",0540}, {"Sigma",01643}, {"THORN",0336}, {"Tau",01644}, {"Theta",01630}, {"Uacute",0332}, {"Ucirc",0333}, {"Ugrave",0331},{"Upsilon",01645},{"Uuml",0334}, {"Xi",01636}, {"Yacute",0335},{"Yuml",0570}, {"Zeta",01626}, {"aacute",0341}, {"acirc",0342}, {"acute",0264}, {"aelig",0346}, {"agrave",0340}, {"alefsym",020465},{"alpha",01661},{"amp",38}, {"and",021047}, {"ang",021040}, {"aring",0345}, {"asymp",021110},{"atilde",0343}, {"auml",0344}, {"bdquo",020036},{"beta",01662}, {"brvbar",0246}, {"bull",020042},{"cap",021051}, {"ccedil",0347}, {"cedil",0270}, {"cent",0242}, {"chi",01707}, {"circ",01306}, {"clubs",023143}, {"cong",021105},{"copy",0251}, {"crarr",020665},{"cup",021052}, {"curren",0244},{"dArr",020723}, {"dagger",020040},{"darr",020623}, {"deg",0260}, {"delta",01664}, {"diams",023146},{"divide",0367}, {"eacute",0351},{"ecirc",0352}, {"egrave",0350}, {"empty",021005}, {"emsp",020003},{"ensp",020002}, {"epsilon",01665},{"equiv",021141}, {"eta",01667}, {"eth",0360}, {"euml",0353}, {"euro",020254}, {"exist",021003},{"fnof",0622}, {"forall",021000},{"frac12",0275}, {"frac14",0274},{"frac34",0276}, {"frasl",020104},{"gamma",01663}, {"ge",021145}, {"gt",62}, {"hArr",020724}, {"harr",020624}, {"hearts",023145},{"hellip",020046},{"iacute",0355},{"icirc",0356}, {"iexcl",0241}, {"igrave",0354}, {"image",020421},{"infin",021036}, {"int",021053}, {"iota",01671}, {"iquest",0277}, {"isin",021010}, {"iuml",0357}, {"kappa",01672}, {"lArr",020720}, {"lambda",01673}, {"lang",021451},{"laquo",0253}, {"larr",020620}, {"lceil",021410}, {"ldquo",020034},{"le",021144}, {"lfloor",021412},{"lowast",021027}, {"loz",022712}, {"lrm",020016}, {"lsaquo",020071},{"lsquo",020030}, {"lt",60}, {"macr",0257}, {"mdash",020024},{"micro",0265}, {"middot",0267},{"minus",021022},{"mu",01674}, {"nabla",021007}, {"nbsp",32}, {"ndash",020023},{"ne",021140}, {"ni",021013}, {"not",0254}, {"notin",021011},{"nsub",021204}, {"ntilde",0361}, {"nu",01675}, {"oacute",0363}, {"ocirc",0364}, {"oelig",0523}, {"ograve",0362},{"oline",020076},{"omega",01711}, {"omicron",01677}, {"oplus",021225},{"or",021050}, {"ordf",0252}, {"ordm",0272}, {"oslash",0370},{"otilde",0365}, {"otimes",021227},{"ouml",0366}, {"para",0266}, {"part",021002}, {"permil",020060},{"perp",021245}, {"phi",01706}, {"pi",01700}, {"piv",01726}, {"plusmn",0261}, {"pound",0243}, {"prime",020062},{"prod",021017}, {"prop",021035}, {"psi",01710}, {"quot",34}, {"rArr",020722}, {"radic",021032}, {"rang",021452},{"raquo",0273}, {"rarr",020622}, {"rceil",021411}, {"rdquo",020035},{"real",020434},{"reg",0256}, {"rfloor",021413}, {"rho",01701}, {"rlm",020017}, {"rsaquo",020072},{"rsquo",020031}, {"sbquo",020032},{"scaron",0541},{"sdot",021305}, {"sect",0247}, {"shy",0255}, {"sigma",01703}, {"sigmaf",01702},{"sim",021074}, {"spades",023140},{"sub",021202},{"sube",021206}, {"sum",021021}, {"sup",021203}, {"sup1",0271}, {"sup2",0262}, {"sup3",0263}, {"supe",021207},{"szlig",0337}, {"tau",01704}, {"there4",021064}, {"theta",01670},{"thetasym",01721},{"thinsp",020011},{"thorn",0376}, {"tilde",01334},{"times",0327}, {"trade",020442},{"uArr",020721}, {"uacute",0372},{"uarr",020621}, {"ucirc",0373}, {"ugrave",0371}, {"uml",0250}, {"upsih",01722}, {"upsilon",01705},{"uuml",0374}, {"weierp",020430},{"xi",01676}, {"yacute",0375}, {"yen",0245}, {"yuml",0377}, {"zeta",01666}, {"zwj",020015}, {"zwnj",020014} }; /* * Comparison function for binary search */ static int Html_entity_comp(const void *a, const void *b) { return strcmp(((Ent_t *)a)->entity, ((Ent_t *)b)->entity); } /* * Binary search of 'key' in entity list */ static int Html_entity_search(char *key) { Ent_t *res, EntKey; EntKey.entity = key; res = (Ent_t*) bsearch(&EntKey, Entities, NumEnt, sizeof(Ent_t), Html_entity_comp); if (res) return (res - Entities); return -1; } /* * This is M$ non-standard "smart quotes" (w1252). Now even deprecated by them! * * SGML for HTML4.01 defines c >= 128 and c <= 159 as UNUSED. * TODO: Probably I should remove this hack, and add a HTML warning. --Jcid */ static int Html_ms_stupid_quotes_2ucs(int isocode) { int ret; switch (isocode) { case 145: case 146: ret = '\''; break; case 147: case 148: ret = '"'; break; case 149: ret = 176; break; case 150: case 151: ret = '-'; break; default: ret = isocode; break; } return ret; } /* * Given an entity, return the UCS character code. * Returns a negative value (error code) if not a valid entity. * * The first character *token is assumed to be == '&' * * For valid entities, *entsize is set to the length of the parsed entity. */ static int Html_parse_entity(DilloHtml *html, const char *token, int toksize, int *entsize) { int isocode, i; char *tok, *s, c; token++; tok = s = toksize ? dStrndup(token, (uint_t)toksize) : dStrdup(token); isocode = -1; if (*s == '#') { /* numeric character reference */ errno = 0; if (*++s == 'x' || *s == 'X') { if (isxdigit(*++s)) { /* strtol with base 16 accepts leading "0x" - we don't */ if (*s == '0' && s[1] == 'x') { s++; isocode = 0; } else { isocode = strtol(s, &s, 16); } } } else if (isdigit(*s)) { isocode = strtol(s, &s, 10); } if (!isocode || errno || isocode > 0xffff) { /* this catches null bytes, errors and codes >= 0xFFFF */ BUG_MSG("numeric character reference out of range\n"); isocode = -2; } if (isocode != -1) { if (*s == ';') s++; else if (prefs.show_extra_warnings) BUG_MSG("numeric character reference without trailing ';'\n"); } } else if (isalpha(*s)) { /* character entity reference */ while (*++s && (isalnum(*s) || strchr(":_.-", *s))); c = *s; *s = 0; if ((i = Html_entity_search(tok)) == -1) { if ((html->DocType == DT_HTML && html->DocTypeVersion == 4.01f) || html->DocType == DT_XHTML) BUG_MSG("undefined character entity '%s'\n", tok); isocode = -3; } else isocode = Entities[i].isocode; if (c == ';') s++; else if (prefs.show_extra_warnings) BUG_MSG("character entity reference without trailing ';'\n"); } *entsize = s-tok+1; dFree(tok); if (isocode >= 145 && isocode <= 151) { /* TODO: remove this hack. */ isocode = Html_ms_stupid_quotes_2ucs(isocode); } else if (isocode == -1 && prefs.show_extra_warnings) BUG_MSG("literal '&'\n"); return isocode; } /* * Convert all the entities in a token to utf8 encoding. Takes * a token and its length, and returns a newly allocated string. */ char *a_Html_parse_entities(DilloHtml *html, const char *token, int toksize) { const char *esc_set = "&\xE2\xC2"; char *new_str, buf[4]; int i, j, k, n, s, isocode, entsize; new_str = dStrndup(token, toksize); s = strcspn(new_str, esc_set); if (new_str[s] == 0) return new_str; for (i = j = s; i < toksize; i++) { if (token[i] == '&' && (isocode = Html_parse_entity(html, token+i, toksize-i, &entsize)) >= 0) { if (isocode >= 128) { /* multibyte encoding */ n = utf8encode(isocode, buf); for (k = 0; k < n; ++k) new_str[j++] = buf[k]; } else { new_str[j++] = (char) isocode; } i += entsize-1; } else { new_str[j++] = token[i]; } } new_str[j] = '\0'; return new_str; } /* * Parse spaces */ static void Html_process_space(DilloHtml *html, const char *space, int spacesize) { int i, offset; DilloHtmlParseMode parse_mode = S_TOP(html)->parse_mode; if (parse_mode == DILLO_HTML_PARSE_MODE_STASH) { html->StashSpace = (html->Stash->len > 0); html->SPCPending = FALSE; } else if (parse_mode == DILLO_HTML_PARSE_MODE_VERBATIM) { dStr_append_l(html->Stash, space, spacesize); html->SPCPending = FALSE; } else if (parse_mode == DILLO_HTML_PARSE_MODE_PRE) { int spaceCnt = 0; /* re-scan the string for characters that cause line breaks */ for (i = 0; i < spacesize; i++) { /* Support for "\r", "\n" and "\r\n" line breaks (skips the first) */ if (!html->PreFirstChar && (space[i] == '\r' || (space[i] == '\n' && !html->PrevWasCR))) { if (spaceCnt) { DW2TB(html->dw)->addText (dStrnfill(spaceCnt, ' '), S_TOP(html)->style); spaceCnt = 0; } DW2TB(html->dw)->addLinebreak (S_TOP(html)->style); html->pre_column = 0; } html->PreFirstChar = FALSE; /* cr and lf should not be rendered -- they appear as a break */ switch (space[i]) { case '\r': case '\n': break; case '\t': if (prefs.show_extra_warnings) BUG_MSG("TAB character inside

\n");
            offset = TAB_SIZE - html->pre_column % TAB_SIZE;
            spaceCnt += offset;
            html->pre_column += offset;
            break;
         default:
            spaceCnt++;
            html->pre_column++;
            break;
         }

         html->PrevWasCR = (space[i] == '\r');
      }

      if (spaceCnt) {
         DW2TB(html->dw)->addText (dStrnfill(spaceCnt, ' '),
               S_TOP(html)->style);
      }
      html->SPCPending = FALSE;

   } else {
      if (SGML_SPCDEL) {
         /* SGML_SPCDEL ignores white space inmediately after an open tag */
         if (html->PrevWasOpenTag)
            html->SPCPending = FALSE;
      } else if (!html->PrevWasSPC) {
         DW2TB(html->dw)->addSpace(S_TOP(html)->style);
         html->SPCPending = FALSE;
         html->PrevWasSPC = TRUE;
      }

      if (parse_mode == DILLO_HTML_PARSE_MODE_STASH_AND_BODY)
         html->StashSpace = (html->Stash->len > 0);
   }
}

/*
 * Handles putting the word into its proper place
 *  > STASH and VERBATIM --> html->Stash
 *  > otherwise it goes through addText()
 *
 * Entities are parsed (or not) according to parse_mode.
 */
static void Html_process_word(DilloHtml *html, const char *word, int size)
{
   int i, j, start;
   char *Pword;
   DilloHtmlParseMode parse_mode = S_TOP(html)->parse_mode;

   if (parse_mode == DILLO_HTML_PARSE_MODE_STASH ||
       parse_mode == DILLO_HTML_PARSE_MODE_STASH_AND_BODY) {
      if (html->StashSpace) {
         dStr_append_c(html->Stash, ' ');
         html->StashSpace = FALSE;
      }
      Pword = a_Html_parse_entities(html, word, size);
      dStr_append(html->Stash, Pword);
      dFree(Pword);

   } else if (parse_mode == DILLO_HTML_PARSE_MODE_VERBATIM) {
      /* word goes in untouched, it is not processed here. */
      dStr_append_l(html->Stash, word, size);
   }

   if (parse_mode == DILLO_HTML_PARSE_MODE_STASH  ||
       parse_mode == DILLO_HTML_PARSE_MODE_VERBATIM) {
      /* skip until the closing instructions */

   } else if (parse_mode == DILLO_HTML_PARSE_MODE_PRE) {
      /* all this overhead is to catch white-space entities */
      Pword = a_Html_parse_entities(html, word, size);
      for (start = i = 0; Pword[i]; start = i)
         if (isspace(Pword[i])) {
            while (Pword[++i] && isspace(Pword[i]));
            Html_process_space(html, Pword + start, i - start);
         } else {
            while (Pword[++i] && !isspace(Pword[i]));
            DW2TB(html->dw)->addText(
                               dStrndup(Pword + start, i - start),
                               S_TOP(html)->style);
            html->pre_column += i - start;
            html->PreFirstChar = FALSE;
         }
      dFree(Pword);

   } else {
      /* Collapse white-space entities inside the word (except  ) */
      Pword = a_Html_parse_entities(html, word, size);
      for (i = 0; Pword[i]; ++i)
         if (strchr("\t\f\n\r", Pword[i]))
            for (j = i; (Pword[j] = Pword[j+1]); ++j);

      DW2TB(html->dw)->addText(Pword, S_TOP(html)->style);
   }

   html->PrevWasOpenTag = FALSE;
   html->PrevWasSPC = FALSE;
   html->SPCPending = FALSE;
   if (html->InFlags & IN_LI)
      html->WordAfterLI = TRUE;
}

/*
 * Does the tag in tagstr (e.g. "p") match the tag in the tag, tagsize
 * structure, with the initial < skipped over (e.g. "P align=center>")
 */
static bool_t Html_match_tag(const char *tagstr, char *tag, int tagsize)
{
   int i;

   for (i = 0; i < tagsize && tagstr[i] != '\0'; i++) {
      if (tolower(tagstr[i]) != tolower(tag[i]))
         return FALSE;
   }
   /* The test for '/' is for xml compatibility: "empty/>" will be matched. */
   if (i < tagsize && (isspace(tag[i]) || tag[i] == '>' || tag[i] == '/'))
      return TRUE;
   return FALSE;
}

/*
 * This function is called after popping the stack, to
 * handle nested DwPage widgets.
 */
static void Html_eventually_pop_dw(DilloHtml *html, bool_t hand_over_break)
{
   if (html->dw != S_TOP(html)->textblock) {
      if (hand_over_break)
         DW2TB(html->dw)->handOverBreak (S_TOP(html)->style);
      DW2TB(html->dw)->flush (false);
      html->dw = S_TOP(html)->textblock;
   }
}

/*
 * Push the tag (copying attributes from the top of the stack)
 */
static void Html_push_tag(DilloHtml *html, int tag_idx)
{
   int n_items;

   n_items = html->stack->size ();
   html->stack->increase ();
   /* We'll copy the former stack item and just change the tag and its index
    * instead of copying all fields except for tag.  --Jcid */
   *html->stack->getRef(n_items) = *html->stack->getRef(n_items - 1);
   html->stack->getRef(n_items)->tag_idx = tag_idx;
   /* proper memory management, may be unref'd later */
   (S_TOP(html)->style)->ref ();
   if (S_TOP(html)->table_cell_style)
      (S_TOP(html)->table_cell_style)->ref ();
   html->dw = S_TOP(html)->textblock;
}

/*
 * Push the tag (used to force en element with optional open into the stack)
 * Note: now it's the same as Html_push_tag(), but things may change...
 */
static void Html_force_push_tag(DilloHtml *html, int tag_idx)
{
   Html_push_tag(html, tag_idx);
}

/*
 * Pop the top tag in the stack
 */
static void Html_real_pop_tag(DilloHtml *html)
{
   bool_t hand_over_break;

   (S_TOP(html)->style)->unref ();
   if (S_TOP(html)->table_cell_style)
      (S_TOP(html)->table_cell_style)->unref ();
   hand_over_break = S_TOP(html)->hand_over_break;
   html->stack->setSize (html->stack->size() - 1);
   Html_eventually_pop_dw(html, hand_over_break);
}

/*
 * Default close function for tags.
 * (conditional cleanup of the stack)
 * There are several ways of doing it. Considering the HTML 4.01 spec
 * which defines optional close tags, and the will to deliver useful diagnose
 * messages for bad-formed HTML, it'll go as follows:
 *   1.- Search the stack for the first tag that requires a close tag.
 *   2.- If it matches, clean all the optional-close tags in between.
 *   3.- Cleanup the matching tag. (on error, give a warning message)
 *
 * If 'w3c_mode' is NOT enabled:
 *   1.- Search the stack for a matching tag based on tag level.
 *   2.- If it exists, clean all the tags in between.
 *   3.- Cleanup the matching tag. (on error, give a warning message)
 */
static void Html_tag_cleanup_at_close(DilloHtml *html, int TagIdx)
{
   int w3c_mode = !prefs.w3c_plus_heuristics;
   int stack_idx, cmp = 1;
   int new_idx = TagIdx;

   if (html->CloseOneTag) {
      Html_real_pop_tag(html);
      html->CloseOneTag = FALSE;
      return;
   }

   /* Look for the candidate tag to close */
   stack_idx = html->stack->size() - 1;
   while (stack_idx &&
          (cmp = (new_idx != html->stack->getRef(stack_idx)->tag_idx)) &&
          ((w3c_mode &&
            Tags[html->stack->getRef(stack_idx)->tag_idx].EndTag == 'O') ||
           (!w3c_mode &&
            (Tags[html->stack->getRef(stack_idx)->tag_idx].EndTag == 'O') ||
             Tags[html->stack->getRef(stack_idx)->tag_idx].TagLevel <
             Tags[new_idx].TagLevel))) {
      --stack_idx;
   }

   /* clean, up to the matching tag */
   if (cmp == 0 && stack_idx > 0) {
      /* There's a valid matching tag in the stack */
      while (html->stack->size() > stack_idx) {
         int toptag_idx = S_TOP(html)->tag_idx;
         /* Warn when we decide to close an open tag (for !w3c_mode) */
         if (html->stack->size() > stack_idx + 1 &&
             Tags[toptag_idx].EndTag != 'O')
            BUG_MSG("  - forcing close of open tag: <%s>\n",
                    Tags[toptag_idx].name);

         /* Close this and only this tag */
         html->CloseOneTag = TRUE;
         Tags[toptag_idx].close (html, toptag_idx);
      }

   } else {
      if (stack_idx == 0) {
         BUG_MSG("unexpected closing tag: .\n", Tags[new_idx].name);
      } else {
         BUG_MSG("unexpected closing tag: . -- expected \n",
                 Tags[new_idx].name,
                 Tags[html->stack->getRef(stack_idx)->tag_idx].name);
      }
   }
}

/*
 * Cleanup (conditional), and Pop the tag (if it matches)
 */
void a_Html_pop_tag(DilloHtml *html, int TagIdx)
{
   Html_tag_cleanup_at_close(html, TagIdx);
}

/*
 * Some parsing routines.
 */

/*
 * Used by Html_parse_length
 */
static Length Html_parse_length_or_multi_length (const char *attr,
                                                 char **endptr)
{
   Length l;
   double v;
   char *end;

   v = strtod (attr, &end);
   switch (*end) {
   case '%':
      end++;
      l = createPerLength (v / 100);
      break;

   case '*':
      end++;
      l = createRelLength (v);
      break;
/*
   The "px" suffix seems not allowed by HTML4.01 SPEC.
   case 'p':
      if (end[1] == 'x')
         end += 2;
*/
   default:
      l = createAbsLength ((int)v);
      break;
   }

   if (endptr)
      *endptr = end;
   return l;
}


/*
 * Returns a length or a percentage, or UNDEF_LENGTH in case
 * of an error, or if attr is NULL.
 */
static Length Html_parse_length (DilloHtml *html, const char *attr)
{
   Length l;
   char *end;

   l = Html_parse_length_or_multi_length (attr, &end);
   if (isRelLength (l))
      /* not allowed as &Length; */
      return LENGTH_AUTO;
   else {
      /* allow only whitespaces */
      if (*end && !isspace (*end)) {
         BUG_MSG("Garbage after length: %s\n", attr);
         return LENGTH_AUTO;
      }
   }

   _MSG("Html_parse_length: \"%s\" %d\n", attr, absLengthVal(l));
   return l;
}

/*
 * Parse a color attribute.
 * Return value: parsed color, or default_color (+ error msg) on error.
 */
static int32_t
 Html_color_parse(DilloHtml *html, const char *subtag, int32_t default_color)
{
   int err = 1;
   int32_t color = a_Color_parse(subtag, default_color, &err);

   if (err) {
      BUG_MSG("color is not in \"#RRGGBB\" format\n");
   }
   return color;
}

/*
 * Check that 'val' is composed of characters inside [A-Za-z0-9:_.-]
 * Note: ID can't have entities, but this check is enough (no '&').
 * Return value: 1 if OK, 0 otherwise.
 */
static int
 Html_check_name_val(DilloHtml *html, const char *val, const char *attrname)
{
   int i;

   for (i = 0; val[i]; ++i)
      if (!(isalnum(val[i]) || strchr(":_.-", val[i])))
         break;

   if (val[i] || !isalpha(val[0]))
      BUG_MSG("'%s' value is not of the form "
              "[A-Za-z][A-Za-z0-9:_.-]*\n", attrname);

   return !(val[i]);
}

/*
 * Handle DOCTYPE declaration
 *
 * Follows the convention that HTML 4.01
 * doctypes which include a full w3c DTD url are treated as
 * standards-compliant, but 4.01 without the url and HTML 4.0 and
 * earlier are not. XHTML doctypes are always standards-compliant
 * whether or not an url is present.
 *
 * Note: I'm not sure about this convention. The W3C validator
 * recognizes the "HTML Level" with or without the URL. The convention
 * comes from mozilla (see URLs below), but Dillo doesn't have the same
 * rendering modes, so it may be better to chose another behaviour. --Jcid
 * 
 * http://www.mozilla.org/docs/web-developer/quirks/doctypes.html
 * http://lists.auriga.wearlab.de/pipermail/dillo-dev/2004-October/002300.html
 *
 * This is not a full DOCTYPE parser, just enough for what Dillo uses.
 */
static void Html_parse_doctype(DilloHtml *html, const char *tag, int tagsize)
{
   static const char HTML_sig   [] = "DocType = DT_HTML;
         html->DocTypeVersion = 4.01f;
      } else if (!strncmp(p, XHTML1, strlen(XHTML1)) &&
                 dStristr(p + strlen(XHTML1), XHTML1_url)) {
         html->DocType = DT_XHTML;
         html->DocTypeVersion = 1.0f;
      } else if (!strncmp(p, XHTML11, strlen(XHTML11)) &&
                 dStristr(p + strlen(XHTML11), XHTML11_url)) {
         html->DocType = DT_XHTML;
         html->DocTypeVersion = 1.1f;
      } else if (!strncmp(p, HTML40, strlen(HTML40))) {
         html->DocType = DT_HTML;
         html->DocTypeVersion = 4.0f;
      } else if (!strncmp(p, HTML32, strlen(HTML32))) {
         html->DocType = DT_HTML;
         html->DocTypeVersion = 3.2f;
      } else if (!strncmp(p, HTML20, strlen(HTML20))) {
         html->DocType = DT_HTML;
         html->DocTypeVersion = 2.0f;
      }
   }

   dFree(ntag);
}

/*
 * Handle open HTML element
 */
static void Html_tag_open_html(DilloHtml *html, const char *tag, int tagsize)
{
   if (!(html->InFlags & IN_HTML))
      html->InFlags |= IN_HTML;
   ++html->Num_HTML;

   if (html->Num_HTML > 1) {
      BUG_MSG("HTML element was already open\n");
   }
}

/*
 * Handle close HTML element
 */
static void Html_tag_close_html(DilloHtml *html, int TagIdx)
{
   /* todo: may add some checks here */
   if (html->Num_HTML == 1) {
      /* beware of pages with multiple HTML close tags... :-P */
      html->InFlags &= ~IN_HTML;
   }
   a_Html_pop_tag(html, TagIdx);
}

/*
 * Handle open HEAD element
 */
static void Html_tag_open_head(DilloHtml *html, const char *tag, int tagsize)
{
   if (html->InFlags & IN_BODY || html->Num_BODY > 0) {
      BUG_MSG("HEAD element must go before the BODY section\n");
      html->ReqTagClose = TRUE;
      return;
   }

   if (!(html->InFlags & IN_HEAD))
      html->InFlags |= IN_HEAD;
   ++html->Num_HEAD;

   if (html->Num_HEAD > 1) {
      BUG_MSG("HEAD element was already open\n");
   }
}

/*
 * Handle close HEAD element
 * Note: as a side effect of Html_test_section() this function is called
 *       twice when the head element is closed implicitly.
 */
static void Html_tag_close_head(DilloHtml *html, int TagIdx)
{
   if (html->InFlags & IN_HEAD) {
      if (html->Num_TITLE == 0)
         BUG_MSG("HEAD section lacks the TITLE element\n");
   
      html->InFlags &= ~IN_HEAD;
   }
   a_Html_pop_tag(html, TagIdx);
}

/*
 * Handle open TITLE
 * calls stash init, where the title string will be stored
 */
static void Html_tag_open_title(DilloHtml *html, const char *tag, int tagsize)
{
   ++html->Num_TITLE;
   a_Html_stash_init(html);
}

/*
 * Handle close TITLE
 * set page-title in the browser window and in the history.
 */
static void Html_tag_close_title(DilloHtml *html, int TagIdx)
{
   if (html->InFlags & IN_HEAD) {
      /* title is only valid inside HEAD */
      a_UIcmd_set_page_title(html->bw, html->Stash->str);
      a_History_set_title(NAV_TOP_UIDX(html->bw),html->Stash->str);
   } else {
      BUG_MSG("the TITLE element must be inside the HEAD section\n");
   }
   a_Html_pop_tag(html, TagIdx);
}

/*
 * Handle open SCRIPT
 * initializes stash, where the embedded code will be stored.
 * MODE_VERBATIM is used because MODE_STASH catches entities.
 */
static void Html_tag_open_script(DilloHtml *html, const char *tag, int tagsize)
{
   a_Html_stash_init(html);
   S_TOP(html)->parse_mode = DILLO_HTML_PARSE_MODE_VERBATIM;
}

/*
 * Handle close SCRIPT
 */
static void Html_tag_close_script(DilloHtml *html, int TagIdx)
{
   /* eventually the stash will be sent to an interpreter for parsing */
   a_Html_pop_tag(html, TagIdx);
}

/*
 * Handle open STYLE
 * store the contents to the stash where (in the future) the style
 * sheet interpreter can get it.
 */
static void Html_tag_open_style(DilloHtml *html, const char *tag, int tagsize)
{
   a_Html_stash_init(html);
   S_TOP(html)->parse_mode = DILLO_HTML_PARSE_MODE_VERBATIM;
}

/*
 * Handle close STYLE
 */
static void Html_tag_close_style(DilloHtml *html, int TagIdx)
{
   /* eventually the stash will be sent to an interpreter for parsing */
   a_Html_pop_tag(html, TagIdx);
}

/*
 * 
 */
static void Html_tag_open_body(DilloHtml *html, const char *tag, int tagsize)
{
   const char *attrbuf;
   Textblock *textblock;
   StyleAttrs style_attrs;
   Style *style;
   int32_t color;

   if (!(html->InFlags & IN_BODY))
      html->InFlags |= IN_BODY;
   ++html->Num_BODY;

   if (html->Num_BODY > 1) {
      BUG_MSG("BODY element was already open\n");
      return;
   }
   if (html->InFlags & IN_HEAD) {
      /* if we're here, it's bad XHTML, no need to recover */
      BUG_MSG("unclosed HEAD element\n");
   }

   textblock = DW2TB(html->dw);

   if (!prefs.force_my_colors) {
      if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "bgcolor"))) {
         color = Html_color_parse(html, attrbuf, prefs.bg_color);
         if (color == 0xffffff && !prefs.allow_white_bg)
            color = prefs.bg_color;

         style_attrs = *html->dw->getStyle ();
         style_attrs.backgroundColor = Color::createShaded(HT2LT(html), color);
         style = Style::create (HT2LT(html), &style_attrs);
         html->dw->setStyle (style);
         style->unref ();
         HTML_SET_TOP_ATTR (html, backgroundColor,
                            Color::createShaded (HT2LT(html), color));
      }

      if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "text"))) {
         color = Html_color_parse(html, attrbuf, prefs.text_color);
         HTML_SET_TOP_ATTR (html, color,
                            Color::createSimple (HT2LT(html),color));
      }

      if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "link")))
         html->link_color = Html_color_parse(html, attrbuf, prefs.link_color);

      if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "vlink")))
         html->visited_color = Html_color_parse(html, attrbuf,
                                                prefs.visited_color);

      if (prefs.contrast_visited_color) {
         /* get a color that has a "safe distance" from text, link and bg */
         html->visited_color =
            a_Color_vc(html->visited_color,
                       S_TOP(html)->style->color->getColor(),
                       html->link_color,
                       S_TOP(html)->style->backgroundColor->getColor());
      }
   }

   S_TOP(html)->parse_mode = DILLO_HTML_PARSE_MODE_BODY;
}

/*
 * BODY
 */
static void Html_tag_close_body(DilloHtml *html, int TagIdx)
{
   if (html->Num_BODY == 1) {
      /* some tag soup pages use multiple BODY tags... */
      html->InFlags &= ~IN_BODY;
   }
   a_Html_pop_tag(html, TagIdx);
}

/*
 * 

* todo: what's the point between adding the parbreak before and * after the push? */ static void Html_tag_open_p(DilloHtml *html, const char *tag, int tagsize) { if ((html->InFlags & IN_LI) && !html->WordAfterLI) { /* ignore first parbreak after an empty

  • */ html->WordAfterLI = TRUE; } else { DW2TB(html->dw)->addParbreak (9, S_TOP(html)->style); } Html_tag_set_align_attr (html, tag, tagsize); } /* * */ static void Html_tag_open_table(DilloHtml *html, const char *tag, int tagsize) { #ifdef USE_TABLES Widget *table; StyleAttrs style_attrs; Style *cell_style, *old_style; const char *attrbuf; int32_t border = 0, cellspacing = 1, cellpadding = 2, bgcolor; #endif DW2TB(html->dw)->addParbreak (0, S_TOP(html)->style); #ifdef USE_TABLES if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "border"))) border = isdigit(attrbuf[0]) ? strtol (attrbuf, NULL, 10) : 1; if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "cellspacing"))) cellspacing = strtol (attrbuf, NULL, 10); if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "cellpadding"))) cellpadding = strtol (attrbuf, NULL, 10); /* The style for the table */ style_attrs = *S_TOP(html)->style; /* When dillo was started with the --debug-rendering option, there * is always a border around the table. */ if (dillo_dbg_rendering) style_attrs.borderWidth.setVal (MIN (border, 1)); else style_attrs.borderWidth.setVal (border); style_attrs.setBorderColor ( Color::createShaded(HT2LT(html), style_attrs.backgroundColor->getColor())); style_attrs.setBorderStyle (BORDER_OUTSET); style_attrs.hBorderSpacing = cellspacing; style_attrs.vBorderSpacing = cellspacing; if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "width"))) style_attrs.width = Html_parse_length (html, attrbuf); if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "align"))) { if (dStrcasecmp (attrbuf, "left") == 0) style_attrs.textAlign = TEXT_ALIGN_LEFT; else if (dStrcasecmp (attrbuf, "right") == 0) style_attrs.textAlign = TEXT_ALIGN_RIGHT; else if (dStrcasecmp (attrbuf, "center") == 0) style_attrs.textAlign = TEXT_ALIGN_CENTER; } if (!prefs.force_my_colors && (attrbuf = a_Html_get_attr(html, tag, tagsize, "bgcolor"))) { bgcolor = Html_color_parse(html, attrbuf, -1); if (bgcolor != -1) { if (bgcolor == 0xffffff && !prefs.allow_white_bg) bgcolor = prefs.bg_color; style_attrs.backgroundColor = Color::createShaded (HT2LT(html), bgcolor); HTML_SET_TOP_ATTR (html, backgroundColor, Color::createShaded (HT2LT(html), bgcolor)); } } /* The style for the cells */ cell_style = Style::create (HT2LT(html), &style_attrs); style_attrs = *S_TOP(html)->style; /* When dillo was started with the --debug-rendering option, there * is always a border around the cells. */ if (dillo_dbg_rendering) style_attrs.borderWidth.setVal (1); else style_attrs.borderWidth.setVal (border ? 1 : 0); style_attrs.padding.setVal(cellpadding); style_attrs.setBorderColor (cell_style->borderColor.top); style_attrs.setBorderStyle (BORDER_INSET); old_style = S_TOP(html)->table_cell_style; S_TOP(html)->table_cell_style = Style::create (HT2LT(html), &style_attrs); if (old_style) old_style->unref (); table = new Table(prefs.limit_text_width); DW2TB(html->dw)->addWidget (table, cell_style); cell_style->unref (); S_TOP(html)->table_mode = DILLO_HTML_TABLE_MODE_TOP; S_TOP(html)->cell_text_align_set = FALSE; S_TOP(html)->table = table; #endif } /* * used by
    and */ static void Html_tag_open_table_cell(DilloHtml *html, const char *tag, int tagsize, TextAlignType text_align) { #ifdef USE_TABLES Widget *col_tb; int colspan = 1, rowspan = 1; const char *attrbuf; StyleAttrs style_attrs; Style *style, *old_style; int32_t bgcolor; bool_t new_style; switch (S_TOP(html)->table_mode) { case DILLO_HTML_TABLE_MODE_NONE: BUG_MSG(" or outside \n"); return; case DILLO_HTML_TABLE_MODE_TOP: BUG_MSG("\n"); /* a_Dw_table_add_cell takes care that dillo does not crash. */ /* continues */ case DILLO_HTML_TABLE_MODE_TR: case DILLO_HTML_TABLE_MODE_TD: if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "colspan"))) { char *invalid; colspan = strtol(attrbuf, &invalid, 10); if ((colspan < 0) || (attrbuf == invalid)) colspan = 1; } /* todo: check errors? */ if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "rowspan"))) rowspan = MAX(1, strtol (attrbuf, NULL, 10)); /* text style */ old_style = S_TOP(html)->style; style_attrs = *old_style; if (!S_TOP(html)->cell_text_align_set) style_attrs.textAlign = text_align; if (a_Html_get_attr(html, tag, tagsize, "nowrap")) style_attrs.whiteSpace = WHITE_SPACE_NOWRAP; else style_attrs.whiteSpace = WHITE_SPACE_NORMAL; S_TOP(html)->style = Style::create (HT2LT(html), &style_attrs); old_style->unref (); Html_tag_set_align_attr (html, tag, tagsize); /* cell style */ style_attrs = *S_TOP(html)->table_cell_style; new_style = FALSE; if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "width"))) { style_attrs.width = Html_parse_length (html, attrbuf); new_style = TRUE; } if (Html_tag_set_valign_attr (html, tag, tagsize, &style_attrs)) new_style = TRUE; if (!prefs.force_my_colors && (attrbuf = a_Html_get_attr(html, tag, tagsize, "bgcolor"))) { bgcolor = Html_color_parse(html, attrbuf, -1); if (bgcolor != -1) { if (bgcolor == 0xffffff && !prefs.allow_white_bg) bgcolor = prefs.bg_color; new_style = TRUE; style_attrs.backgroundColor = Color::createShaded (HT2LT(html), bgcolor); HTML_SET_TOP_ATTR (html, backgroundColor, Color::createShaded (HT2LT(html), bgcolor)); } } if (S_TOP(html)->style->textAlign == TEXT_ALIGN_STRING) col_tb = new TableCell ( ((Table*)S_TOP(html)->table)->getCellRef (), prefs.limit_text_width); else col_tb = new Textblock (prefs.limit_text_width); if (new_style) { style = Style::create (HT2LT(html), &style_attrs); col_tb->setStyle (style); style->unref (); } else col_tb->setStyle (S_TOP(html)->table_cell_style); ((Table*)S_TOP(html)->table)->addCell (col_tb, colspan, rowspan); S_TOP(html)->textblock = html->dw = col_tb; /* Handle it when the user clicks on a link */ html->connectSignals(col_tb); break; default: /* compiler happiness */ break; } S_TOP(html)->table_mode = DILLO_HTML_TABLE_MODE_TD; #endif } /* * */ static void Html_tag_open_tr(DilloHtml *html, const char *tag, int tagsize) { const char *attrbuf; StyleAttrs style_attrs; Style *style, *old_style; int32_t bgcolor; #ifdef USE_TABLES switch (S_TOP(html)->table_mode) { case DILLO_HTML_TABLE_MODE_NONE: _MSG("Invalid HTML syntax: outside
    or outside
    */ static void Html_tag_open_td(DilloHtml *html, const char *tag, int tagsize) { Html_tag_open_table_cell (html, tag, tagsize, TEXT_ALIGN_LEFT); } /* * */ static void Html_tag_open_th(DilloHtml *html, const char *tag, int tagsize) { Html_set_top_font(html, NULL, 0, 1, 1); Html_tag_open_table_cell (html, tag, tagsize, TEXT_ALIGN_CENTER); } /* *
    \n"); return; case DILLO_HTML_TABLE_MODE_TOP: case DILLO_HTML_TABLE_MODE_TR: case DILLO_HTML_TABLE_MODE_TD: style = NULL; if (!prefs.force_my_colors && (attrbuf = a_Html_get_attr(html, tag, tagsize, "bgcolor"))) { bgcolor = Html_color_parse(html, attrbuf, -1); if (bgcolor != -1) { if (bgcolor == 0xffffff && !prefs.allow_white_bg) bgcolor = prefs.bg_color; style_attrs = *S_TOP(html)->style; style_attrs.backgroundColor = Color::createShaded (HT2LT(html), bgcolor); style = Style::create (HT2LT(html), &style_attrs); HTML_SET_TOP_ATTR (html, backgroundColor, Color::createShaded (HT2LT(html), bgcolor)); } } ((Table*)S_TOP(html)->table)->addRow (style); if (style) style->unref (); if (a_Html_get_attr (html, tag, tagsize, "align")) { S_TOP(html)->cell_text_align_set = TRUE; Html_tag_set_align_attr (html, tag, tagsize); } style_attrs = *S_TOP(html)->table_cell_style; Html_tag_set_valign_attr (html, tag, tagsize, &style_attrs); style_attrs.backgroundColor = Color::createShaded (HT2LT(html), S_TOP(html)->style->backgroundColor->getColor()); old_style = S_TOP(html)->table_cell_style; S_TOP(html)->table_cell_style = Style::create (HT2LT(html), &style_attrs); old_style->unref (); break; default: break; } S_TOP(html)->table_mode = DILLO_HTML_TABLE_MODE_TR; #else DW2TB(html->dw)->addParbreak (0, S_TOP(html)->style); #endif } /* * ,