summaryrefslogtreecommitdiff
path: root/old/test/public_suffix.diff
diff options
context:
space:
mode:
Diffstat (limited to 'old/test/public_suffix.diff')
-rw-r--r--old/test/public_suffix.diff341
1 files changed, 341 insertions, 0 deletions
diff --git a/old/test/public_suffix.diff b/old/test/public_suffix.diff
new file mode 100644
index 0000000..fbe7950
--- /dev/null
+++ b/old/test/public_suffix.diff
@@ -0,0 +1,341 @@
+Just making a backup. I just started testing and it appears to be
+functioning, but it hasn't been cleaned up or anything yet.
+
+diff -r 96f19eb5687f dpi/cookies.c
+--- a/dpi/cookies.c Fri Jan 15 19:23:04 2010 +0000
++++ b/dpi/cookies.c Sat Jan 16 02:38:26 2010 +0000
+@@ -100,6 +100,13 @@
+ Dlist *dlist;
+ } CookieNode;
+
++/* TODO Probably combine these two using "key" */
++
++typedef struct {
++ char *tld;
++ Dlist *list;
++} CookieSuffixNode;
++
+ typedef struct {
+ char *name;
+ char *value;
+@@ -112,6 +119,12 @@
+ } CookieData_t;
+
+ typedef struct {
++ bool_t exception;
++ bool_t wildcard;
++ char *name;
++} CookiesPSRule;
++
++typedef struct {
+ Dsh *sh;
+ int status;
+ } ClientInfo;
+@@ -137,6 +150,8 @@
+ "# This is a generated file! Do not edit.\n"
+ "# [domain TRUE path secure expiry_time name value]\n\n";
+
++static Dlist *Cookies_public_suffixes;
++static char *Cookies_public_suffix_filename;
+
+ /*
+ * Forward declarations
+@@ -157,6 +172,13 @@
+ return dStrcasecmp(n1->domain, n2->domain);
+ }
+
++static int Cookies_suffix_node_cmp(const void *v1, const void *v2)
++{
++ const CookieSuffixNode *n1 = v1, *n2 = v2;
++
++ return dStrcasecmp(n1->tld, n2->tld);
++}
++
+ /*
+ * Compare function for searching a cookie node by domain
+ */
+@@ -168,6 +190,14 @@
+ return dStrcasecmp(node->domain, domain);
+ }
+
++static int Cookies_suffix_node_by_tld_cmp(const void *v1, const void *v2)
++{
++ const CookieSuffixNode *node = v1;
++ const char *tld = v2;
++
++ return dStrcasecmp(node->tld, tld);
++}
++
+ /*
+ * Return a file pointer. If the file doesn't exist, try to create it,
+ * with the optional 'init_str' as its content.
+@@ -215,6 +245,107 @@
+ dFree(cookie);
+ }
+
++static char *Cookies_get_tld(const char *host)
++{
++ char *tld;
++ uint_t after, start;
++
++ if (host) {
++ after = strlen(host);
++ if (after > 0 && host[after - 1] == '.')
++ after--;
++ start = after;
++ while (start > 0 && host[start - 1] != '.')
++ start--;
++ tld = dStrndup(host + start, after - start);
++ } else {
++ tld = dStrdup("");
++ }
++ return tld;
++}
++
++static void Cookies_get_public_suffix_list()
++{
++ FILE *stream;
++ char line[LINE_MAXLEN];
++ int rule_count = 0;
++
++ dReturn_if_fail(Cookies_public_suffix_filename);
++
++ if (!(stream = fopen(Cookies_public_suffix_filename, "r"))) {
++ MSG("Cannot read public suffix list \"%s\"\n",
++ Cookies_public_suffix_filename);
++ return;
++ }
++ MSG("Reading public suffix list \"%s\"\n", Cookies_public_suffix_filename);
++
++ while (!feof(stream)) {
++ line[0] = '\0';
++ if (!fgets(line, LINE_MAXLEN, stream) && ferror(stream)) {
++ MSG("Error while reading public suffix file \"%s\": %s\n",
++ Cookies_public_suffix_filename, dStrerror(errno));
++ break; /* bail out */
++ }
++
++ /* Remove leading and trailing whitespaces */
++ dStrstrip(line);
++
++ if (!*line || (line[0] == '/' && line[1] == '/'))
++ continue;
++
++ CookieSuffixNode *tld_node;
++ CookiesPSRule *rule;
++ char *suffix;
++ char *tld = Cookies_get_tld(line);
++
++ if (rule_count++ == 0)
++ Cookies_public_suffixes = dList_new(250);
++
++ tld_node = dList_find_sorted(Cookies_public_suffixes, tld,
++ Cookies_suffix_node_by_tld_cmp);
++ if (!tld_node) {
++ tld_node = dNew(CookieSuffixNode, 1);
++ tld_node->tld = tld;
++ tld_node->list = dList_new(5);
++ dList_insert_sorted(Cookies_public_suffixes, tld_node,
++ Cookies_suffix_node_cmp);
++ } else {
++ dFree(tld);
++ }
++ rule = dNew0(CookiesPSRule, 1);
++ suffix = line;
++ if (*suffix == '!') {
++ rule->exception = TRUE;
++ suffix++;
++ }
++ if (*suffix == '*') {
++ rule->wildcard = TRUE;
++ suffix++;
++ if (*suffix != '.')
++ MSG("WARNING: Dillo assumes that . follows * (rule \"%s\")\n",
++ line);
++ if (strchr(suffix, '*'))
++ MSG("WARNING: Dillo assumes only one * (rule \"%s\")\n", line);
++ }
++
++ rule->name = dStrdup(suffix);
++
++ dList_append(tld_node->list, rule);
++ }
++
++ if (feof(stream)) {
++ /* all is well */
++
++ /* TODO We should consider a file with suspiciously few rules to be an
++ * error case as well.
++ */
++ MSG("%d rules read from public suffix list\n", rule_count);
++ } else {
++ /* TODO Tear down what we've built. */
++ }
++ fclose(stream);
++}
++
+ /*
+ * Initialize the cookies module
+ * (The 'disabled' variable is writeable only within Cookies_init)
+@@ -331,6 +462,7 @@
+ Cookies_add_cookie(cookie);
+ }
+ }
++ Cookies_get_public_suffix_list();
+ }
+
+ /*
+@@ -873,6 +1005,67 @@
+ }
+ }
+
++static bool_t Cookies_is_public_suffix(const char *d)
++{
++ char *tld, *domain;
++ uint_t domain_len;
++ CookieSuffixNode *node;
++ int i;
++ bool_t ret;
++
++ dReturn_val_if_fail(d, TRUE);
++
++ domain = dStrdup(*d == '.' ? d + 1 : d);
++ domain_len = strlen(domain);
++ if (domain_len && domain[domain_len - 1] == '.')
++ domain[--domain_len] = '\0';
++
++ tld = Cookies_get_tld(domain);
++ node = dList_find_sorted(Cookies_public_suffixes, tld,
++ Cookies_suffix_node_by_tld_cmp);
++ dFree(tld);
++
++ if (!node) {
++ /* Perhaps we could use the dots-counting code in this case */
++ MSG("Warning: unrecognised tld \"%s\"\n", tld);
++ dFree(domain);
++ return TRUE;
++ }
++
++ ret = FALSE;
++
++ for (i = 0; i < dList_length(node->list); i++) {
++ const CookiesPSRule *rule = dList_nth_data(node->list, i);
++
++ /* BUG: We have to learn to handle punycode, unfortunately, since the
++ * stupid list insists upon having non-ascii domains. If it's possible,
++ * I'd like to put de-punycoded rules in the list.
++ */
++ if (!dStrcasecmp(domain, rule->name)) {
++ if (rule->exception) {
++ ret = FALSE;
++ MSG("Public suffix specifically allows %s\n", rule->name);
++ break;
++ } else {
++ ret = TRUE;
++ MSG("Public suffix has a rule to disallow %s\n", rule->name);
++ }
++ } else if (rule->wildcard) {
++ uint_t rule_len = strlen(rule->name);
++
++ if (domain_len > rule_len &&
++ !dStrcasecmp(domain + domain_len - rule_len, rule->name) &&
++ strchr(domain, '.') >= domain + domain_len - rule_len) {
++ ret = TRUE;
++ MSG("Public suffix has a wildcard rule to disallow *%s\n",
++ rule->name);
++ }
++ }
++ }
++ dFree(domain);
++ return ret;
++}
++
+ /*
+ * Based on the host, how many internal dots do we need in a cookie domain
+ * to make it valid? e.g., "org" is not on the list, so dillo.org is a safe
+@@ -887,20 +1080,9 @@
+ uint_t ret = 1;
+
+ if (host) {
+- int start, after, tld_len;
++ char *tld = Cookies_get_tld(host);
+
+- /* We may be able to trust the format of the host string more than
+- * I am here. Trailing dots and no dots are real possibilities, though.
+- */
+- after = strlen(host);
+- if (after > 0 && host[after - 1] == '.')
+- after--;
+- start = after;
+- while (start > 0 && host[start - 1] != '.')
+- start--;
+- tld_len = after - start;
+-
+- if (tld_len > 0) {
++ if (*tld) {
+ /* These TLDs were chosen by examining the current publicsuffix list
+ * in January 2010 and picking out those where it was simplest for
+ * them to describe the situation by beginning with a "*.[tld]" rule.
+@@ -914,14 +1096,14 @@
+ uint_t i, tld_num = sizeof(tlds) / sizeof(tlds[0]);
+
+ for (i = 0; i < tld_num; i++) {
+- if (strlen(tlds[i]) == (uint_t) tld_len &&
+- !dStrncasecmp(tlds[i], host + start, tld_len)) {
+- MSG("TLD code matched %s\n", tlds[i]);
++ if (!dStrcasecmp(tlds[i], tld)) {
++ MSG("TLD code matched %s\n", tld);
+ ret++;
+ break;
+ }
+ }
+ }
++ dFree(tld);
+ }
+ return ret;
+ }
+@@ -947,18 +1129,21 @@
+ if (!Cookies_domain_matches(host, cookie->domain))
+ return FALSE;
+
+- internal_dots = 0;
+- for (i = 1; i < strlen(cookie->domain) - 1; i++) {
+- if (cookie->domain[i] == '.')
+- internal_dots++;
+- }
++ if (Cookies_public_suffixes) {
++ if (Cookies_is_public_suffix(cookie->domain))
++ return FALSE;
++ } else {
++ /* fall back on our dot-counting heuristics */
++ internal_dots = 0;
++ for (i = 1; i < strlen(cookie->domain) - 1; i++) {
++ if (cookie->domain[i] == '.')
++ internal_dots++;
++ }
+
+- /* All of this dots business is a weak hack.
+- * TODO: accept the publicsuffix.org list as an optional external file.
+- */
+- if (internal_dots < Cookies_internal_dots_required(host)) {
+- MSG("not enough dots in %s\n", cookie->domain);
+- return FALSE;
++ if (internal_dots < Cookies_internal_dots_required(host)) {
++ MSG("not enough dots in %s\n", cookie->domain);
++ return FALSE;
++ }
+ }
+
+ MSG("host %s and domain %s is all right\n", host, cookie->domain);
+@@ -1187,6 +1372,11 @@
+ rule[j++] = line[i++];
+ rule[j] = '\0';
+
++ if (!dStrcasecmp(domain, "public_suffix_file")) {
++ Cookies_public_suffix_filename = dStrdup(rule);
++ continue;
++ }
++
+ if (dStrcasecmp(rule, "ACCEPT") == 0)
+ cc.action = COOKIE_ACCEPT;
+ else if (dStrcasecmp(rule, "ACCEPT_SESSION") == 0)