diff options
author | corvid <corvid@lavabit.com> | 2010-04-06 02:27:54 +0000 |
---|---|---|
committer | corvid <corvid@lavabit.com> | 2010-04-06 02:27:54 +0000 |
commit | 6f3fa9135b34e14c5330cb563b136e915d91ecf9 (patch) | |
tree | c27e6bd8327c305e062e225ea1e5200c86425db6 | |
parent | d632e3c9d3a9fb6e3fd2dd7b28edca99012d7782 (diff) |
filter automatic requests
same_domain option in preference so that spying on users isn't so easy.
-rw-r--r-- | dillorc | 7 | ||||
-rw-r--r-- | src/cache.c | 2 | ||||
-rw-r--r-- | src/capi.c | 58 | ||||
-rw-r--r-- | src/html.cc | 33 | ||||
-rw-r--r-- | src/nav.c | 26 | ||||
-rw-r--r-- | src/nav.h | 3 | ||||
-rw-r--r-- | src/prefs.c | 1 | ||||
-rw-r--r-- | src/prefs.h | 4 | ||||
-rw-r--r-- | src/prefsparser.cc | 11 | ||||
-rw-r--r-- | src/uicmd.cc | 4 | ||||
-rw-r--r-- | src/url.c | 118 | ||||
-rw-r--r-- | src/url.h | 2 | ||||
-rw-r--r-- | src/web.cc | 4 | ||||
-rw-r--r-- | src/web.hh | 4 |
14 files changed, 247 insertions, 30 deletions
@@ -26,6 +26,13 @@ # (While browsing, this can be changed from the tools/settings menu.) #parse_embedded_css=YES +# How should Dillo restrict automatic requests (e.g., redirections, +# pages containing images or stylesheets)? +# allow_all +# same_domain : Permit www.example.org to load an image from img.example.org, +# but not from the unrelated ad.doubleclick.net. +#filter_auto_requests=same_domain + # Change the buffering scheme for drawing # 0 no double buffering - useful for debugging # 1 light buffering using a single back buffer for all windows diff --git a/src/cache.c b/src/cache.c index cdcdeeaf..fcd27a05 100644 --- a/src/cache.c +++ b/src/cache.c @@ -941,7 +941,7 @@ static int Cache_redirect(CacheEntry_t *entry, int Flags, BrowserWindow *bw) NewUrl = a_Url_new(URL_STR_(entry->Location), URL_STR_(entry->Url)); if (entry->Flags & CA_TempRedirect) a_Url_set_flags(NewUrl, URL_FLAGS(NewUrl) | URL_E2EQuery); - a_Nav_push(bw, NewUrl); + a_Nav_push(bw, NewUrl, entry->Url); a_Url_free(NewUrl); } else { /* Sub entity redirection (most probably an image) */ @@ -354,6 +354,61 @@ static void Capi_dpi_send_source(BrowserWindow *bw, DilloUrl *url) } /* + * When dillo wants to open an URL, this can be either due to user action + * (e.g., typing in an URL, clicking a link), or automatic (HTTP header + * indicates redirection, META HTML tag with refresh attribute and 0 delay, + * and images and stylesheets on an HTML page when autoloading is enabled). + * + * For a user request, the action will be permitted. + * For an automatic request, permission to load depends on the filter set + * by the user. + */ +static bool_t Capi_filters_allow(const DilloUrl *wanted, + const DilloUrl *requester) +{ + bool_t ret; + + if (requester == NULL) { + /* request made by user */ + ret = TRUE; + } else { + switch (prefs.filter_auto_requests) { + case PREFS_FILTER_SAME_DOMAIN: + { + const char *req_host = URL_HOST(requester), + *want_host = URL_HOST(wanted), + *req_suffix, + *want_suffix; + if (!req_host && !want_host) { + ret = TRUE; + } else if (!req_host || !want_host) { + ret = FALSE; + } else { + /* This will regard "www.dillo.org" and "www.dillo.org." as + * different, but it doesn't seem worth caring about. + */ + req_suffix = a_Url_host_find_public_suffix(req_host); + want_suffix = a_Url_host_find_public_suffix(want_host); + + ret = dStrcasecmp(req_suffix, want_suffix) == 0; + } + + if (ret) + MSG("ALLOW\n"); + else + MSG("DENY\n"); + break; + } + case PREFS_FILTER_ALLOW_ALL: + default: + ret = TRUE; + break; + } + } + return ret; +} + +/* * Most used function for requesting a URL. * TODO: clean up the ad-hoc bindings with an API that allows dynamic * addition of new plugins. @@ -369,6 +424,9 @@ int a_Capi_open_url(DilloWeb *web, CA_Callback_t Call, void *CbData) const char *scheme = URL_SCHEME(web->url); int safe = 0, ret = 0, use_cache = 0; + dReturn_val_if_fail((a_Capi_get_flags(web->url) & CAPI_IsCached) || + Capi_filters_allow(web->url, web->requester), 0); + /* reload test */ reload = (!(a_Capi_get_flags(web->url) & CAPI_IsCached) || (URL_FLAGS(web->url) & URL_E2EQuery)); diff --git a/src/html.cc b/src/html.cc index b325c80a..9e72c5f2 100644 --- a/src/html.cc +++ b/src/html.cc @@ -105,8 +105,8 @@ static const char *Html_get_attr2(DilloHtml *html, const char *attrname, int tag_parsing_flags); static int Html_write_raw(DilloHtml *html, char *buf, int bufsize, int Eof); -static void Html_load_image(BrowserWindow *bw, DilloUrl *url, - DilloImage *image); +static bool Html_load_image(BrowserWindow *bw, DilloUrl *url, + const DilloUrl *requester, DilloImage *image); static void Html_callback(int Op, CacheClient_t *Client); static void Html_tag_cleanup_at_close(DilloHtml *html, int TagIdx); @@ -654,12 +654,21 @@ void DilloHtml::loadImages (const DilloUrl *pattern) { dReturn_if_fail (bw->nav_expecting == FALSE); + /* If the user asked for a specific URL, the user (NULL) is the requester, + * but if the user just asked for all URLs, use the page URL as the + * requester. If the possible patterns become more complex, it might be + * good to have the caller supply the requester instead. + */ + const DilloUrl *requester = pattern ? NULL : this->page_url; + for (int i = 0; i < images->size(); i++) { if (images->get(i)->image) { if ((!pattern) || (!a_Url_cmp(images->get(i)->url, pattern))) { - Html_load_image(bw, images->get(i)->url, images->get(i)->image); - a_Image_unref (images->get(i)->image); - images->get(i)->image = NULL; // web owns it now + if (Html_load_image(bw, images->get(i)->url, requester, + images->get(i)->image)) { + a_Image_unref (images->get(i)->image); + images->get(i)->image = NULL; // web owns it now + } } } } @@ -2089,9 +2098,10 @@ DilloImage *a_Html_image_new(DilloHtml *html, const char *tag, load_now = prefs.load_images || (a_Capi_get_flags_with_redirection(url) & CAPI_IsCached); - Html_add_new_htmlimage(html, &url, load_now ? NULL : Image); + bool loading = false; if (load_now) - Html_load_image(html->bw, url, Image); + loading = Html_load_image(html->bw, url, html->page_url, Image); + Html_add_new_htmlimage(html, &url, loading ? NULL : Image); dFree(tooltip_str); dFree(width_ptr); @@ -2103,13 +2113,13 @@ DilloImage *a_Html_image_new(DilloHtml *html, const char *tag, /* * Tell cache to retrieve image */ -static void Html_load_image(BrowserWindow *bw, DilloUrl *url, - DilloImage *Image) +static bool Html_load_image(BrowserWindow *bw, DilloUrl *url, + const DilloUrl *requester, DilloImage *Image) { DilloWeb *Web; int ClientKey; /* Fill a Web structure for the cache query */ - Web = a_Web_new(url); + Web = a_Web_new(url, requester); Web->bw = bw; Web->Image = Image; a_Image_ref(Image); @@ -2119,6 +2129,7 @@ static void Html_load_image(BrowserWindow *bw, DilloUrl *url, a_Bw_add_client(bw, ClientKey, 0); a_Bw_add_url(bw, url); } + return ClientKey != 0; } /* @@ -2938,7 +2949,7 @@ void a_Html_load_stylesheet(DilloHtml *html, DilloUrl *url) } else { /* Fill a Web structure for the cache query */ int ClientKey; - DilloWeb *Web = a_Web_new(url); + DilloWeb *Web = a_Web_new(url, html->page_url); Web->bw = html->bw; if ((ClientKey = a_Capi_open_url(Web, Html_css_load_callback, NULL))) { ++html->bw->NumPendingStyleSheets; @@ -191,7 +191,8 @@ static void Nav_stack_clean(BrowserWindow *bw) * This function requests the page's root-URL; images and related stuff * are fetched directly by the HTML module. */ -static void Nav_open_url(BrowserWindow *bw, const DilloUrl *url, int offset) +static void Nav_open_url(BrowserWindow *bw, const DilloUrl *url, + const DilloUrl *requester, int offset) { DilloUrl *old_url; bool_t MustLoad, ForceReload, Repush, IgnoreScroll; @@ -232,7 +233,7 @@ static void Nav_open_url(BrowserWindow *bw, const DilloUrl *url, int offset) // a_Menu_pagemarks_new(bw); - Web = a_Web_new(url); + Web = a_Web_new(url, requester); Web->bw = bw; Web->flags |= WEB_RootUrl; if ((ClientKey = a_Capi_open_url(Web, NULL, NULL)) != 0) { @@ -341,7 +342,8 @@ void a_Nav_expect_done(BrowserWindow *bw) * - Set bw to expect the URL data * - Ask the cache to feed back the requested URL (via Nav_open_url) */ -void a_Nav_push(BrowserWindow *bw, const DilloUrl *url) +void a_Nav_push(BrowserWindow *bw, const DilloUrl *url, + const DilloUrl *requester) { dReturn_if_fail (bw != NULL); @@ -353,7 +355,7 @@ void a_Nav_push(BrowserWindow *bw, const DilloUrl *url) a_Nav_cancel_expect(bw); bw->nav_expect_url = a_Url_dup(url); bw->nav_expecting = TRUE; - Nav_open_url(bw, url, 0); + Nav_open_url(bw, url, requester, 0); } /* @@ -370,7 +372,7 @@ static void Nav_repush(BrowserWindow *bw) a_Url_set_flags(url, URL_FLAGS(url) | URL_ReloadFromCache); bw->nav_expect_url = a_Url_dup(url); bw->nav_expecting = TRUE; - Nav_open_url(bw, url, 0); + Nav_open_url(bw, url, NULL, 0); a_Url_free(url); } } @@ -407,7 +409,7 @@ static void Nav_redirection0_callback(void *data) if (bw->meta_refresh_status == 2) { Nav_stack_move_ptr(bw, -1); - a_Nav_push(bw, bw->meta_refresh_url); + a_Nav_push(bw, bw->meta_refresh_url,a_History_get_url(NAV_TOP_UIDX(bw))); } a_Url_free(bw->meta_refresh_url); bw->meta_refresh_url = NULL; @@ -441,7 +443,7 @@ void a_Nav_back(BrowserWindow *bw) a_Nav_cancel_expect(bw); if (--idx >= 0){ a_UIcmd_set_msg(bw, ""); - Nav_open_url(bw, a_History_get_url(NAV_UIDX(bw,idx)), -1); + Nav_open_url(bw, a_History_get_url(NAV_UIDX(bw,idx)), NULL, -1); } } @@ -455,7 +457,7 @@ void a_Nav_forw(BrowserWindow *bw) a_Nav_cancel_expect(bw); if (++idx < a_Nav_stack_size(bw)) { a_UIcmd_set_msg(bw, ""); - Nav_open_url(bw, a_History_get_url(NAV_UIDX(bw,idx)), +1); + Nav_open_url(bw, a_History_get_url(NAV_UIDX(bw,idx)), NULL, +1); } } @@ -464,7 +466,7 @@ void a_Nav_forw(BrowserWindow *bw) */ void a_Nav_home(BrowserWindow *bw) { - a_Nav_push(bw, prefs.home); + a_Nav_push(bw, prefs.home, NULL); } /* @@ -499,7 +501,7 @@ static void Nav_reload_callback(void *data) a_Url_set_flags(r_url, URL_FLAGS(r_url) & ~URL_SpamSafe); bw->nav_expect_url = r_url; bw->nav_expecting = TRUE; - Nav_open_url(bw, r_url, 0); + Nav_open_url(bw, r_url, NULL, 0); } } } @@ -526,7 +528,7 @@ void a_Nav_jump(BrowserWindow *bw, int offset, int new_bw) a_UIcmd_open_url_nw(bw, a_History_get_url(NAV_UIDX(bw,idx))); } else { a_Nav_cancel_expect(bw); - Nav_open_url(bw, a_History_get_url(NAV_UIDX(bw,idx)), offset); + Nav_open_url(bw, a_History_get_url(NAV_UIDX(bw,idx)), NULL, offset); a_UIcmd_set_buttons_sens(bw); } } @@ -563,7 +565,7 @@ static void Nav_save_cb(int Op, CacheClient_t *Client) void a_Nav_save_url(BrowserWindow *bw, const DilloUrl *url, const char *filename) { - DilloWeb *Web = a_Web_new(url); + DilloWeb *Web = a_Web_new(url, NULL); Web->bw = bw; Web->filename = dStrdup(filename); Web->flags |= WEB_Download; @@ -14,7 +14,8 @@ extern "C" { #endif /* __cplusplus */ void a_Nav_redirection0(BrowserWindow *bw, const DilloUrl *new_url); -void a_Nav_push(BrowserWindow *bw, const DilloUrl *url); +void a_Nav_push(BrowserWindow *bw, const DilloUrl *url, + const DilloUrl *requester); void a_Nav_repush(BrowserWindow *bw); void a_Nav_back(BrowserWindow *bw); void a_Nav_forw(BrowserWindow *bw); diff --git a/src/prefs.c b/src/prefs.c index 464c496b..ca0b9c15 100644 --- a/src/prefs.c +++ b/src/prefs.c @@ -39,6 +39,7 @@ void a_Prefs_init(void) prefs.buffered_drawing = 1; prefs.contrast_visited_color = TRUE; prefs.enterpress_forces_submit = FALSE; + prefs.filter_auto_requests = PREFS_FILTER_SAME_DOMAIN; prefs.focus_new_tab = TRUE; prefs.font_cursive = dStrdup(PREFS_FONT_CURSIVE); prefs.font_factor = 1.0; diff --git a/src/prefs.h b/src/prefs.h index 6015f2fe..684262ed 100644 --- a/src/prefs.h +++ b/src/prefs.h @@ -26,6 +26,9 @@ extern "C" { /* Panel sizes */ enum { P_tiny = 0, P_small, P_medium, P_large }; +enum {PREFS_FILTER_ALLOW_ALL, + PREFS_FILTER_SAME_DOMAIN}; + typedef struct _DilloPrefs DilloPrefs; struct _DilloPrefs { @@ -71,6 +74,7 @@ struct _DilloPrefs { bool_t load_images; bool_t load_stylesheets; bool_t parse_embedded_css; + int filter_auto_requests; int32_t buffered_drawing; char *font_serif; char *font_sans_serif; diff --git a/src/prefsparser.cc b/src/prefsparser.cc index d31c835b..78cade0b 100644 --- a/src/prefsparser.cc +++ b/src/prefsparser.cc @@ -26,6 +26,7 @@ typedef enum { PREFS_INT32, PREFS_DOUBLE, PREFS_GEOMETRY, + PREFS_FILTER, PREFS_PANEL_SIZE } PrefType_t; @@ -50,6 +51,7 @@ int PrefsParser::parseOption(char *name, char *value) { "contrast_visited_color", &prefs.contrast_visited_color, PREFS_BOOL }, { "enterpress_forces_submit", &prefs.enterpress_forces_submit, PREFS_BOOL }, + { "filter_auto_requests", &prefs.filter_auto_requests, PREFS_FILTER }, { "focus_new_tab", &prefs.focus_new_tab, PREFS_BOOL }, { "font_cursive", &prefs.font_cursive, PREFS_STRING }, { "font_factor", &prefs.font_factor, PREFS_DOUBLE }, @@ -138,6 +140,15 @@ int PrefsParser::parseOption(char *name, char *value) a_Misc_parse_geometry(value, &prefs.xpos, &prefs.ypos, &prefs.width, &prefs.height); break; + case PREFS_FILTER: + if (!dStrcasecmp(value, "same_domain")) + prefs.filter_auto_requests = PREFS_FILTER_SAME_DOMAIN; + else { + if (dStrcasecmp(value, "allow_all")) + MSG_WARN("prefs: unrecognized value for filter_auto_requests\n"); + prefs.filter_auto_requests = PREFS_FILTER_ALLOW_ALL; + } + break; case PREFS_PANEL_SIZE: if (!dStrcasecmp(value, "tiny")) prefs.panel_size = P_tiny; diff --git a/src/uicmd.cc b/src/uicmd.cc index dd5478cc..df57e855 100644 --- a/src/uicmd.cc +++ b/src/uicmd.cc @@ -612,7 +612,7 @@ void a_UIcmd_open_urlstr(void *vbw, const char *urlstr) */ void a_UIcmd_open_url(BrowserWindow *bw, const DilloUrl *url) { - a_Nav_push(bw, url); + a_Nav_push(bw, url, NULL); if (BW2UI(bw)->get_panelmode() == UI_TEMPORARILY_SHOW_PANELS) BW2UI(bw)->set_panelmode(UI_HIDDEN); a_UIcmd_focus_main_area(bw); @@ -624,7 +624,7 @@ static void UIcmd_open_url_nbw(BrowserWindow *new_bw, const DilloUrl *url) * Location if we don't yet have an URL, main otherwise. */ if (url) { - a_Nav_push(new_bw, url); + a_Nav_push(new_bw, url, NULL); BW2UI(new_bw)->focus_main(); } else { BW2UI(new_bw)->focus_location(); @@ -634,3 +634,121 @@ char *a_Url_string_strip_delimiters(const char *str) } return new_str; } + +/* + * Is the provided hostname an IP address? + */ +static bool_t Url_host_is_ip(const char *host) +{ + uint_t len; + + if (!host || !*host) + return FALSE; + + len = strlen(host); + + if (len == strspn(host, "0123456789.")) { + _MSG("an IPv4 address\n"); + return TRUE; + } + if (*host == '[' && + (len == strspn(host, "0123456789abcdefABCDEF:.[]"))) { + /* The precise format is shown in section 3.2.2 of rfc 3986 */ + _MSG("an IPv6 address\n"); + return TRUE; + } + return FALSE; +} + +/* + * How many internal dots are in the public portion of this hostname? + * e.g., for "www.dillo.org", it is one because everything under "dillo.org", + * as a .org domain, is part of one organization. + * + * Of course this is only a simple and imperfect approximation of + * organizational boundaries. + */ +static uint_t Url_host_public_internal_dots(const char *host) +{ + uint_t ret = 1; + + if (host) { + int start, after, tld_len; + + /* We may be able to trust the format of the host string more than + * I am here. Trailing dots and no dots are real possibilities, though. + */ + after = strlen(host); + if (after > 0 && host[after - 1] == '.') + after--; + start = after; + while (start > 0 && host[start - 1] != '.') + start--; + tld_len = after - start; + + if (tld_len > 0) { + /* These TLDs were chosen by examining the current publicsuffix list + * in January 2010 and picking out those where it was simplest for + * them to describe the situation by beginning with a "*.[tld]" rule. + */ + const char *const tlds[] = {"ar","au","bd","bn","bt","ck","cy","do", + "eg","er","et","fj","fk","gt","gu","id", + "il","jm","ke","kh","kw","ml","mm","mt", + "mz","ni","np","nz","om","pg","py","qa", + "sv","tr","uk","uy","ve","ye","yu","za", + "zm","zw"}; + uint_t i, tld_num = sizeof(tlds) / sizeof(tlds[0]); + + for (i = 0; i < tld_num; i++) { + if (strlen(tlds[i]) == (uint_t) tld_len && + !dStrncasecmp(tlds[i], host + start, tld_len)) { + MSG("TLD code matched %s\n", tlds[i]); + ret++; + break; + } + } + } + } + return ret; +} + +/* + * Given a URL host string, return the portion that is public, i.e., the + * domain that is in a registry outside the organization. + * For 'www.dillo.org', that would be 'dillo.org'. + */ +const char *a_Url_host_find_public_suffix(const char *host) +{ + const char *s; + uint_t dots; + + if (!host || !*host || Url_host_is_ip(host)) + return host; + + s = host; + + while (s[1]) + s++; + + if (s > host && *s == '.') { + /* don't want to deal with trailing dot */ + s--; + } + + dots = Url_host_public_internal_dots(host); + + /* With a proper host string, we should not be pointing to a dot now. */ + + while (s > host) { + if (s[-1] == '.') { + if (dots == 0) + break; + else + dots--; + } + s--; + } + + MSG("public suffix of %s is %s\n", host, s); + return s; +} @@ -123,7 +123,7 @@ void a_Url_set_ismap_coords(DilloUrl *u, char *coord_str); char *a_Url_decode_hex_str(const char *str); char *a_Url_encode_hex_str(const char *str); char *a_Url_string_strip_delimiters(const char *str); - +const char *a_Url_host_find_public_suffix(const char *host); #ifdef __cplusplus } #endif /* __cplusplus */ @@ -103,12 +103,13 @@ int a_Web_dispatch_by_type (const char *Type, DilloWeb *Web, /* * Allocate and set safe values for a DilloWeb structure */ -DilloWeb* a_Web_new(const DilloUrl *url) +DilloWeb* a_Web_new(const DilloUrl *url, const DilloUrl *requester) { DilloWeb *web= dNew(DilloWeb, 1); _MSG(" a_Web_new: ValidWebs ==> %d\n", dList_length(ValidWebs)); web->url = a_Url_dup(url); + web->requester = a_Url_dup(requester); web->bw = NULL; web->flags = 0; web->Image = NULL; @@ -136,6 +137,7 @@ void a_Web_free(DilloWeb *web) { if (!web) return; a_Url_free(web->url); + a_Url_free(web->requester); a_Image_unref(web->Image); dFree(web->filename); dList_remove(ValidWebs, (void *)web); @@ -22,6 +22,8 @@ typedef struct _DilloWeb DilloWeb; struct _DilloWeb { DilloUrl *url; /* Requested URL */ + DilloUrl *requester; /* URL that caused this request, or + * NULL if user-initiated. */ BrowserWindow *bw; /* The requesting browser window [reference] */ int flags; /* Additional info */ @@ -34,7 +36,7 @@ struct _DilloWeb { }; void a_Web_init(void); -DilloWeb* a_Web_new (const DilloUrl* url); +DilloWeb* a_Web_new (const DilloUrl* url, const DilloUrl *requester); int a_Web_valid(DilloWeb *web); void a_Web_free (DilloWeb*); int a_Web_dispatch_by_type (const char *Type, DilloWeb *web, |