1 files changed, 17 insertions, 14 deletions
diff --git a/src/url.c b/src/url.c
index 9d3e14b2..c1a8396d 100644
--- a/src/url.c
+++ b/src/url.c
@@ -366,23 +366,23 @@ DilloUrl* a_Url_new(const char *url_str, const char *base_url)
 
    dReturn_val_if_fail (url_str != NULL, NULL);
 
-   /* Count illegal characters (0x00-0x1F, 0x7F and space) */
+   /* Count illegal characters (0x00-0x1F, 0x7F-0xFF and space) */
    n_ic = n_ic_spc = 0;
    for (p = (char*)url_str; *p; p++) {
       n_ic_spc += (*p == ' ') ? 1 : 0;
-      n_ic += (*p != ' ' && *p > 0x1F && *p != 0x7F) ? 0 : 1;
+      n_ic += (*p != ' ' && *p > 0x1F && *p < 0x7F) ? 0 : 1;
    }
    if (n_ic) {
       /* Encode illegal characters (they could also be stripped).
        * There's no standard for illegal chars; we chose to encode. */
       p = str1 = dNew(char, strlen(url_str) + 2*n_ic + 1);
       for (i = 0; url_str[i]; ++i)
-         if (url_str[i] > 0x1F && url_str[i] != 0x7F && url_str[i] != ' ')
+         if (url_str[i] > 0x1F && url_str[i] < 0x7F && url_str[i] != ' ')
             *p++ = url_str[i];
-         else  {
-           *p++ = '%';
-           *p++ = HEX[(url_str[i] >> 4) & 15];
-           *p++ = HEX[url_str[i] & 15];
+         else {
+            *p++ = '%';
+            *p++ = HEX[(url_str[i] >> 4) & 15];
+            *p++ = HEX[url_str[i] & 15];
          }
       *p = 0;
       urlstr = str1;
@@ -509,7 +509,7 @@ void a_Url_set_ismap_coords(DilloUrl *u, char *coord_str)
 
    if (!u->ismap_url_len) {
       /* Save base-url length (without coords) */
-      u->ismap_url_len  = URL_STR_(u) ? u->url_string->len : 0;
+      u->ismap_url_len = URL_STR_(u) ? u->url_string->len : 0;
       a_Url_set_flags(u, URL_FLAGS(u) | URL_Ismap);
    }
    if (u->url_string) {
@@ -611,7 +611,7 @@ char *a_Url_encode_hex_str(const char *str)
 /*
  * RFC-3986 suggests this stripping when "importing" URLs from other media.
  * Strip: "URL:", enclosing < >, and embedded whitespace.
- * (We also strip illegal chars: 00-1F and 7F)
+ * (We also strip illegal chars: 00-1F and 7F-FF)
  */
 char *a_Url_string_strip_delimiters(const char *str)
 {
@@ -626,7 +626,7 @@ char *a_Url_string_strip_delimiters(const char *str)
          text++;
 
       for (p = new_str; *text; text++)
-         if (*text > 0x1F && *text != 0x7F && *text != ' ')
+         if (*text > 0x1F && *text < 0x7F && *text != ' ')
             *p++ = *text;
       if (p > new_str && p[-1] == '>')
          --p;
@@ -688,14 +688,17 @@ static uint_t Url_host_public_internal_dots(const char *host)
 
       if (tld_len > 0) {
          /* These TLDs were chosen by examining the current publicsuffix list
-          * in February 2014 and picking out those where it was simplest for
+          * in October 2014 and picking out those where it was simplest for
           * them to describe the situation by beginning with a "*.[tld]" rule
           * or every rule was "[something].[tld]".
+          *
+          * TODO: Consider the old publicsuffix code again. This TLD list has
+          * shrunk and shrunk over the years, and has become a poorer and
+          * poorer approximation of administrative boundaries.
           */
-         const char *const tlds[] = {"bd","bn","ck","cy","er","et","fj","fk",
+         const char *const tlds[] = {"bd","bn","ck","cy","er","fj","fk",
                                      "gu","il","jm","ke","kh","kw","mm","mz",
-                                     "ni","np","nz","pg","tr","uk","ye","za",
-                                     "zm","zw"};
+                                     "ni","np","pg","ye","za","zm","zw"};
          uint_t i, tld_num = sizeof(tlds) / sizeof(tlds[0]);
 
          for (i = 0; i < tld_num; i++) {