diff options
-rw-r--r-- | dpi/cookies.c | 215 | ||||
-rw-r--r-- | test/cookies.c | 112 |
2 files changed, 263 insertions, 64 deletions
diff --git a/dpi/cookies.c b/dpi/cookies.c index 46be18fc..420bd69d 100644 --- a/dpi/cookies.c +++ b/dpi/cookies.c @@ -455,10 +455,9 @@ static void Cookies_save_and_free() } /* - * Take a month's name and return a number between 0-11. - * E.g. 'April' -> 3 + * Month parsing */ -static int Cookies_get_month(const char *month_name) +static bool_t Cookies_get_month(struct tm *tm, const char **str) { static const char *const months[] = { "Jan", "Feb", "Mar", @@ -469,76 +468,178 @@ static int Cookies_get_month(const char *month_name) int i; for (i = 0; i < 12; i++) { - if (!dStrnAsciiCasecmp(months[i], month_name, 3)) - return i; + if (!dStrnAsciiCasecmp(months[i], *str, 3)) { + _MSG("Found month: %s\n", months[i]); + tm->tm_mon = i; + *str += 3; + return TRUE; + } } - return -1; + return FALSE; } /* - * Accept: RFC-1123 | RFC-850 | ANSI asctime | Old Netscape format date string. - * - * Wdy, DD-Mon-YY HH:MM:SS GMT - * Wdy, DD-Mon-YYYY HH:MM:SS GMT - * Weekday, DD-Mon-YY HH:MM:SS GMT - * Weekday, DD-Mon-YYYY HH:MM:SS GMT - * Tue May 21 13:46:22 1991\n - * Tue May 21 13:46:22 1991 + * As seen in the production below, it's just one digit or two. + * Return the value, or -1 if no proper value found. + */ +static int Cookies_get_timefield(const char **str) +{ + int n; + const char *s = *str; + + if (!isdigit(*s)) + return -1; + + n = *(s++) - '0'; + if (isdigit(*s)) { + n *= 10; + n += *(s++) - '0'; + if (isdigit(*s)) + return -1; + } + *str = s; + return n; +} + +/* + * Time parsing: 'time-field ":" time-field ":" time-field' + * 'time-field = 1*2DIGIT' + */ +static bool_t Cookies_get_time(struct tm *tm, const char **str) +{ + const char *s = *str; + + if ((tm->tm_hour = Cookies_get_timefield(&s)) == -1) + return FALSE; + + if (*(s++) != ':') + return FALSE; + + if ((tm->tm_min = Cookies_get_timefield(&s)) == -1) + return FALSE; + + if (*(s++) != ':') + return FALSE; + + if ((tm->tm_sec = Cookies_get_timefield(&s)) == -1) + return FALSE; + + *str = s; + return TRUE; +} + +/* + * Day parsing: "day-of-month = 1*2DIGIT" + */ +static bool_t Cookies_get_day(struct tm *tm, const char **str) +{ + const char *s = *str; + + if ((tm->tm_mday = Cookies_get_timefield(&s)) == -1) + return FALSE; + + *str = s; + return TRUE; +} + +/* + * Date parsing: "year = 2*4DIGIT" + */ +static bool_t Cookies_get_year(struct tm *tm, const char **str) +{ + int n; + const char *s = *str; + + if (isdigit(*s)) + n = *(s++) - '0'; + else + return FALSE; + if (isdigit(*s)) { + n *= 10; + n += *(s++) - '0'; + } else + return FALSE; + if (isdigit(*s)) { + n *= 10; + n += *(s++) - '0'; + } + if (isdigit(*s)) { + n *= 10; + n += *(s++) - '0'; + } + if (isdigit(*s)) { + /* Sorry, users of prehistoric software in the year 10000! */ + return FALSE; + } + if (n >= 70 && n <= 99) + n += 1900; + else if (n <= 69) + n += 2000; + + tm->tm_year = n - 1900; + + *str = s; + return TRUE; +} + +/* + * As given in RFC 6265. + */ +static bool_t Cookies_date_delim(char c) +{ + return (c == '\x09' || + (c >= '\x20' && c <= '\x2F') || + (c >= '\x3B' && c <= '\x40') || + (c >= '\x5B' && c <= '\x60') || + (c >= '\x7B' && c <= '\x7E')); +} + +/* + * Parse date string. * - * Let's add: - * Mon Jan 11 08:00:00 2010 GMT + * A true nightmare of date formats appear in cookies, so one basically + * has to paw through the soup and look for anything that looks sufficiently + * like any of the date fields. * * Return a pointer to a struct tm, or NULL on error. - * - * NOTE that the RFC wants user agents to be more flexible in what - * they accept. For now, let's hack in special cases when they're encountered. - * Why? Because this function is currently understandable, and I don't want to - * abandon that (or at best decrease that -- see section 5.1.1) until there - * is known to be good reason. */ static struct tm *Cookies_parse_date(const char *date) { - struct tm *tm; - char *cp = strchr(date, ','); - - if (!cp && strlen(date)>20 && date[13] == ':' && date[16] == ':') { - /* Looks like ANSI asctime format... */ - tm = dNew0(struct tm, 1); - - cp = (char *)date; - tm->tm_mon = Cookies_get_month(cp + 4); - tm->tm_mday = strtol(cp + 8, NULL, 10); - tm->tm_hour = strtol(cp + 11, NULL, 10); - tm->tm_min = strtol(cp + 14, NULL, 10); - tm->tm_sec = strtol(cp + 17, NULL, 10); - tm->tm_year = strtol(cp + 20, NULL, 10) - 1900; - - } else if (cp && (cp - date == 3 || cp - date > 5) && - (strlen(cp) == 24 || strlen(cp) == 26)) { - /* RFC-1123 | RFC-850 format | Old Netscape format */ - tm = dNew0(struct tm, 1); - - tm->tm_mday = strtol(cp + 2, NULL, 10); - tm->tm_mon = Cookies_get_month(cp + 5); - tm->tm_year = strtol(cp + 9, &cp, 10); - /* tm_year is the number of years since 1900 */ - if (tm->tm_year < 70) - tm->tm_year += 100; - else if (tm->tm_year > 100) - tm->tm_year -= 1900; - tm->tm_hour = strtol(cp + 1, NULL, 10); - tm->tm_min = strtol(cp + 4, NULL, 10); - tm->tm_sec = strtol(cp + 7, NULL, 10); - - } else { + bool_t found_time = FALSE, found_day = FALSE, found_month = FALSE, + found_year = FALSE, matched; + struct tm *tm = dNew0(struct tm, 1); + const char *s = date; + + while (*s) { + matched = FALSE; + + if (!found_time) + matched = found_time = Cookies_get_time(tm, &s); + if (!matched && !found_day) + matched = found_day = Cookies_get_day(tm, &s); + if (!matched && !found_month) + matched = found_month = Cookies_get_month(tm, &s); + if (!matched && !found_year) + matched = found_year = Cookies_get_year(tm, &s); + while (*s && !Cookies_date_delim(*s)) + s++; + while (*s && Cookies_date_delim(*s)) + s++; + } + if (!found_time || !found_day || !found_month || !found_year) { + dFree(tm); tm = NULL; MSG("In date \"%s\", format not understood.\n", date); } - /* Error checks. This may be overkill. */ + /* Error checks. This may be overkill. + * + * RFC 6265: "Note that leap seconds cannot be represented in this + * syntax." I'm not sure whether that's good, but that's what it says. + */ if (tm && !(tm->tm_mday > 0 && tm->tm_mday < 32 && tm->tm_mon >= 0 && - tm->tm_mon < 12 && tm->tm_year >= 70 && tm->tm_hour >= 0 && + tm->tm_mon < 12 && tm->tm_year >= 0 && tm->tm_hour >= 0 && tm->tm_hour < 24 && tm->tm_min >= 0 && tm->tm_min < 60 && tm->tm_sec >= 0 && tm->tm_sec < 60)) { MSG("Date \"%s\" values not in range.\n", date); diff --git a/test/cookies.c b/test/cookies.c index 85031043..fe1f543f 100644 --- a/test/cookies.c +++ b/test/cookies.c @@ -688,16 +688,20 @@ static void expires_extremes() time_t t; char *server_date; - a_Cookies_set("name=val; expires=Fri Dec 13 20:45:52 1901", "expmin.com", + a_Cookies_set("name=val; expires=Fri Dec 13 20:45:52 1801", "expmin.com", "/", NULL); expect(__LINE__, "", "http", "expmin.com", "/"); + a_Cookies_set("name=val; expires=Fri Dec 13 20:45:52 1901", "expmin2.com", + "/", NULL); + expect(__LINE__, "", "http", "expmin2.com", "/"); + a_Cookies_set("name=val; expires=Wed Dec 31 23:59:59 1969", "expneg.com", "/", NULL); expect(__LINE__, "", "http", "expneg.com", "/"); - a_Cookies_set("name=val; expires=Thu Jan 1 00:00:00 1970", "expepoch.com", - "/", NULL); + a_Cookies_set("name=val; expires=Thu, 01-January-70 00:00:00 GMT", + "expepoch.com", "/", NULL); expect(__LINE__, "", "http", "expepoch.com", "/"); /* TODO: revisit these tests in a few decades */ @@ -705,8 +709,8 @@ static void expires_extremes() "/", NULL); expect(__LINE__, "Cookie: name=val\r\n", "http", "expmax.com", "/"); - a_Cookies_set("name=val; expires=Sun Jan 1 00:00:00 2040", "pastmax.com", - "/", NULL); + a_Cookies_set("name=val; expires=Sun January 1 00:00:00 2040", + "pastmax.com", "/", NULL); expect(__LINE__, "Cookie: name=val\r\n", "http", "pastmax.com", "/"); t = time(NULL)+1000; @@ -728,8 +732,8 @@ static void expires_extremes() "/", server_date); expect(__LINE__, "Cookie: name=val\r\n", "http", "expmaxa.com", "/"); - a_Cookies_set("name=val; expires=Sun Jan 1 00:00:00 2040", "pastmaxa.com", - "/", server_date); + a_Cookies_set("name=val; expires=Thu, 01-Jan-40 00:00:00 GMT", + "pastmaxa.com", "/", server_date); expect(__LINE__, "Cookie: name=val\r\n", "http", "pastmaxa.com", "/"); t = time(NULL)-1000; @@ -759,6 +763,99 @@ static void expires_extremes() dFree(server_date); } +/* + * On 11 Aug 2009, Dan Winship posted to the http-state list with a bunch of + * date formats he'd gathered. Let's work from that. I'll include his comments + * below in double quotes. + */ +static void expires_date_formats() +{ + /* "Revised Netscape spec format" */ + a_Cookies_set("name=val; expires=Mon, 10-Dec-2037 17:02:24 GMT", + "format1.com", "/", NULL); + expect(__LINE__, "Cookie: name=val\r\n", "http", "format1.com", "/"); + + /* "rfc1123-date" */ + a_Cookies_set("name=val; expires=Wed, 09 Dec 2037 16:27:23 GMT", + "format2.com", "/", NULL); + expect(__LINE__, "Cookie: name=val\r\n", "http", "format2.com", "/"); + + /* "4-digit-year version of Netscape spec example (see below). + * Seems to only come from sites using PHP, but it's not PHP + * itself; maybe some framework?" + */ + a_Cookies_set("name=val; expires=Thursday, 01-Jan-2036 00:00:00 GMT", + "format3.com", "/", NULL); + expect(__LINE__, "Cookie: name=val\r\n", "http", "format3.com", "/"); + + /* "The not-quite-asctime format used by Amazon." */ + a_Cookies_set("name=val; expires=Mon Dec 10 16:32:30 2037 GMT", + "format4.com", "/", NULL); + expect(__LINE__, "Cookie: name=val\r\n", "http", "format4.com", "/"); + + /* "The syntax used by the example text in the Netscape spec, + * although the actual grammar uses abbreviated weekday names" + */ + a_Cookies_set("name=val; expires=Wednesday, 01-Jan-37 00:00:00 GMT", + "format5.com", "/", NULL); + expect(__LINE__, "Cookie: name=val\r\n", "http", "format5.com", "/"); + + /* "Original Netscape spec" */ + a_Cookies_set("name=val; expires=Mon, 10-Dec-37 20:35:03 GMT", + "format6.com", "/", NULL); + expect(__LINE__, "Cookie: name=val\r\n", "http", "format6.com", "/"); + + /* "If this had '01 Jan' it would be an rfc1123-date. This *is* a + * legitimate rfc822 date, though not an rfc2822 date because 'GMT' + * is deprecated in favor of '+0000' there." + */ + a_Cookies_set("name=val; expires=Wed, 1 Jan 2035 00:00:00 GMT", + "format7.com", "/", NULL); + expect(__LINE__, "Cookie: name=val\r\n", "http", "format7.com", "/"); + + /* "Would match the 'weird php' syntax above if it was '08-Dec'" */ + a_Cookies_set("name=val; expires=Saturday, 8-Dec-2035 21:24:09 GMT", + "format8.com", "/", NULL); + expect(__LINE__, "Cookie: name=val\r\n", "http", "format8.com", "/"); + + /* "God only knows what they were thinking. This came from a hit-tracker + * site, and it's possible that it's just totally broken and no one parses + * it 'correctly'" + */ + a_Cookies_set("name=val; expires=Thu, 31 Dec 23:55:55 2037 GMT", + "format9.com", "/", NULL); + expect(__LINE__, "Cookie: name=val\r\n", "http", "format9.com", "/"); + + /* "Another kind of rfc822 / nearly-rfc1123 date, using superfluous + * whitespace." + */ + a_Cookies_set("name=val; expires=Sun, 9 Dec 2036 13:42:05 GMT", + "formata.com", "/", NULL); + expect(__LINE__, "Cookie: name=val\r\n", "http", "formata.com", "/"); + + /* "Another kind of 'lets throw components together at random'. The + * site that this cookie came has apparently been fixed since then. + * (It uses the Netscape spec format now.)" + */ + a_Cookies_set("name=val; expires=Wed Dec 12 2037 08:44:07 GMT-0500 (EST)", + "formatb.com", "/", NULL); + expect(__LINE__, "Cookie: name=val\r\n", "http", "formatb.com", "/"); + + a_Cookies_set("name=val; expires=Sun, 1-Jan-2035 00:00:00 GMT", + "formatc.com", "/", NULL); + expect(__LINE__, "Cookie: name=val\r\n", "http", "formatc.com", "/"); + + /* ...and the remaining handful that he encountered once or twice were + * far too broken to deserve our attention (e.g., times like "13:57:2"). + */ + + /* Now here's what github was sending in 2015. */ + a_Cookies_set("name=val; expires=Sat, 07 Jul 2035 21:41:24 -0000", + "formatd.com", "/", NULL); + expect(__LINE__, "Cookie: name=val\r\n", "http", "formatd.com", "/"); + +} + static void path() { a_Cookies_set("name=val; path=/", "p1.com", "/", NULL); @@ -887,6 +984,7 @@ int main() expires_server_ahead(); expires_server_behind(); expires_extremes(); + expires_date_formats(); a_Cookies_set("name=val; expires=\"Sun Jan 10 00:00:00 2038\"", "quoted-date.org", "/", NULL); |