summaryrefslogtreecommitdiff
path: root/src/url.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/url.c')
-rw-r--r--src/url.c31
1 files changed, 17 insertions, 14 deletions
diff --git a/src/url.c b/src/url.c
index 9d3e14b2..c1a8396d 100644
--- a/src/url.c
+++ b/src/url.c
@@ -366,23 +366,23 @@ DilloUrl* a_Url_new(const char *url_str, const char *base_url)
dReturn_val_if_fail (url_str != NULL, NULL);
- /* Count illegal characters (0x00-0x1F, 0x7F and space) */
+ /* Count illegal characters (0x00-0x1F, 0x7F-0xFF and space) */
n_ic = n_ic_spc = 0;
for (p = (char*)url_str; *p; p++) {
n_ic_spc += (*p == ' ') ? 1 : 0;
- n_ic += (*p != ' ' && *p > 0x1F && *p != 0x7F) ? 0 : 1;
+ n_ic += (*p != ' ' && *p > 0x1F && *p < 0x7F) ? 0 : 1;
}
if (n_ic) {
/* Encode illegal characters (they could also be stripped).
* There's no standard for illegal chars; we chose to encode. */
p = str1 = dNew(char, strlen(url_str) + 2*n_ic + 1);
for (i = 0; url_str[i]; ++i)
- if (url_str[i] > 0x1F && url_str[i] != 0x7F && url_str[i] != ' ')
+ if (url_str[i] > 0x1F && url_str[i] < 0x7F && url_str[i] != ' ')
*p++ = url_str[i];
- else {
- *p++ = '%';
- *p++ = HEX[(url_str[i] >> 4) & 15];
- *p++ = HEX[url_str[i] & 15];
+ else {
+ *p++ = '%';
+ *p++ = HEX[(url_str[i] >> 4) & 15];
+ *p++ = HEX[url_str[i] & 15];
}
*p = 0;
urlstr = str1;
@@ -509,7 +509,7 @@ void a_Url_set_ismap_coords(DilloUrl *u, char *coord_str)
if (!u->ismap_url_len) {
/* Save base-url length (without coords) */
- u->ismap_url_len = URL_STR_(u) ? u->url_string->len : 0;
+ u->ismap_url_len = URL_STR_(u) ? u->url_string->len : 0;
a_Url_set_flags(u, URL_FLAGS(u) | URL_Ismap);
}
if (u->url_string) {
@@ -611,7 +611,7 @@ char *a_Url_encode_hex_str(const char *str)
/*
* RFC-3986 suggests this stripping when "importing" URLs from other media.
* Strip: "URL:", enclosing < >, and embedded whitespace.
- * (We also strip illegal chars: 00-1F and 7F)
+ * (We also strip illegal chars: 00-1F and 7F-FF)
*/
char *a_Url_string_strip_delimiters(const char *str)
{
@@ -626,7 +626,7 @@ char *a_Url_string_strip_delimiters(const char *str)
text++;
for (p = new_str; *text; text++)
- if (*text > 0x1F && *text != 0x7F && *text != ' ')
+ if (*text > 0x1F && *text < 0x7F && *text != ' ')
*p++ = *text;
if (p > new_str && p[-1] == '>')
--p;
@@ -688,14 +688,17 @@ static uint_t Url_host_public_internal_dots(const char *host)
if (tld_len > 0) {
/* These TLDs were chosen by examining the current publicsuffix list
- * in February 2014 and picking out those where it was simplest for
+ * in October 2014 and picking out those where it was simplest for
* them to describe the situation by beginning with a "*.[tld]" rule
* or every rule was "[something].[tld]".
+ *
+ * TODO: Consider the old publicsuffix code again. This TLD list has
+ * shrunk and shrunk over the years, and has become a poorer and
+ * poorer approximation of administrative boundaries.
*/
- const char *const tlds[] = {"bd","bn","ck","cy","er","et","fj","fk",
+ const char *const tlds[] = {"bd","bn","ck","cy","er","fj","fk",
"gu","il","jm","ke","kh","kw","mm","mz",
- "ni","np","nz","pg","tr","uk","ye","za",
- "zm","zw"};
+ "ni","np","pg","ye","za","zm","zw"};
uint_t i, tld_num = sizeof(tlds) / sizeof(tlds[0]);
for (i = 0; i < tld_num; i++) {