diff options
Diffstat (limited to 'src/url.c')
-rw-r--r-- | src/url.c | 31 |
1 files changed, 17 insertions, 14 deletions
@@ -366,23 +366,23 @@ DilloUrl* a_Url_new(const char *url_str, const char *base_url) dReturn_val_if_fail (url_str != NULL, NULL); - /* Count illegal characters (0x00-0x1F, 0x7F and space) */ + /* Count illegal characters (0x00-0x1F, 0x7F-0xFF and space) */ n_ic = n_ic_spc = 0; for (p = (char*)url_str; *p; p++) { n_ic_spc += (*p == ' ') ? 1 : 0; - n_ic += (*p != ' ' && *p > 0x1F && *p != 0x7F) ? 0 : 1; + n_ic += (*p != ' ' && *p > 0x1F && *p < 0x7F) ? 0 : 1; } if (n_ic) { /* Encode illegal characters (they could also be stripped). * There's no standard for illegal chars; we chose to encode. */ p = str1 = dNew(char, strlen(url_str) + 2*n_ic + 1); for (i = 0; url_str[i]; ++i) - if (url_str[i] > 0x1F && url_str[i] != 0x7F && url_str[i] != ' ') + if (url_str[i] > 0x1F && url_str[i] < 0x7F && url_str[i] != ' ') *p++ = url_str[i]; - else { - *p++ = '%'; - *p++ = HEX[(url_str[i] >> 4) & 15]; - *p++ = HEX[url_str[i] & 15]; + else { + *p++ = '%'; + *p++ = HEX[(url_str[i] >> 4) & 15]; + *p++ = HEX[url_str[i] & 15]; } *p = 0; urlstr = str1; @@ -509,7 +509,7 @@ void a_Url_set_ismap_coords(DilloUrl *u, char *coord_str) if (!u->ismap_url_len) { /* Save base-url length (without coords) */ - u->ismap_url_len = URL_STR_(u) ? u->url_string->len : 0; + u->ismap_url_len = URL_STR_(u) ? u->url_string->len : 0; a_Url_set_flags(u, URL_FLAGS(u) | URL_Ismap); } if (u->url_string) { @@ -611,7 +611,7 @@ char *a_Url_encode_hex_str(const char *str) /* * RFC-3986 suggests this stripping when "importing" URLs from other media. * Strip: "URL:", enclosing < >, and embedded whitespace. - * (We also strip illegal chars: 00-1F and 7F) + * (We also strip illegal chars: 00-1F and 7F-FF) */ char *a_Url_string_strip_delimiters(const char *str) { @@ -626,7 +626,7 @@ char *a_Url_string_strip_delimiters(const char *str) text++; for (p = new_str; *text; text++) - if (*text > 0x1F && *text != 0x7F && *text != ' ') + if (*text > 0x1F && *text < 0x7F && *text != ' ') *p++ = *text; if (p > new_str && p[-1] == '>') --p; @@ -688,14 +688,17 @@ static uint_t Url_host_public_internal_dots(const char *host) if (tld_len > 0) { /* These TLDs were chosen by examining the current publicsuffix list - * in February 2014 and picking out those where it was simplest for + * in October 2014 and picking out those where it was simplest for * them to describe the situation by beginning with a "*.[tld]" rule * or every rule was "[something].[tld]". + * + * TODO: Consider the old publicsuffix code again. This TLD list has + * shrunk and shrunk over the years, and has become a poorer and + * poorer approximation of administrative boundaries. */ - const char *const tlds[] = {"bd","bn","ck","cy","er","et","fj","fk", + const char *const tlds[] = {"bd","bn","ck","cy","er","fj","fk", "gu","il","jm","ke","kh","kw","mm","mz", - "ni","np","nz","pg","tr","uk","ye","za", - "zm","zw"}; + "ni","np","pg","ye","za","zm","zw"}; uint_t i, tld_num = sizeof(tlds) / sizeof(tlds[0]); for (i = 0; i < tld_num; i++) { |