From ba08ee0b4e87d71aae5c96aff1bd89e5c1dd6827 Mon Sep 17 00:00:00 2001 From: jcid Date: Wed, 28 Nov 2007 15:09:07 +0100 Subject: Added HTTP-1.1's chunked transfer support! --- src/IO/http.c | 6 ++-- src/cache.c | 82 +++++++++++++++++++++++++++++++++++++++----------- src/decode.c | 97 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/decode.h | 1 + 4 files changed, 166 insertions(+), 20 deletions(-) (limited to 'src') diff --git a/src/IO/http.c b/src/IO/http.c index c9dd944b..f6a2cc27 100644 --- a/src/IO/http.c +++ b/src/IO/http.c @@ -185,7 +185,7 @@ char *a_Http_make_query_str(const DilloUrl *url, bool_t use_proxy) if (URL_FLAGS(url) & URL_Post) { dStr_sprintfa( query, - "POST %s HTTP/1.0\r\n" + "POST %s HTTP/1.1\r\n" "Accept-Charset: utf-8, iso-8859-1\r\n" "Host: %s%s\r\n" "%s" @@ -195,6 +195,7 @@ char *a_Http_make_query_str(const DilloUrl *url, bool_t use_proxy) "%s" "Content-type: application/x-www-form-urlencoded\r\n" "Content-length: %ld\r\n" + "Connection: close\r\n" "\r\n" "%s", full_path->str, URL_HOST(url), s_port->str, @@ -205,7 +206,7 @@ char *a_Http_make_query_str(const DilloUrl *url, bool_t use_proxy) } else { dStr_sprintfa( query, - "GET %s HTTP/1.0\r\n" + "GET %s HTTP/1.1\r\n" "%s" "Accept-Charset: utf-8, iso-8859-1\r\n" "Host: %s%s\r\n" @@ -213,6 +214,7 @@ char *a_Http_make_query_str(const DilloUrl *url, bool_t use_proxy) "User-Agent: Dillo/%s\r\n" "Accept-Encoding: gzip\r\n" "Cookie2: $Version=\"1\"\r\n" + "Connection: close\r\n" "%s" "\r\n", full_path->str, diff --git a/src/cache.c b/src/cache.c index 448ce3db..35bbf41e 100644 --- a/src/cache.c +++ b/src/cache.c @@ -58,7 +58,8 @@ typedef struct { Dstr *Header; /* HTTP header */ const DilloUrl *Location; /* New URI for redirects */ Dstr *Data; /* Pointer to raw data */ - Decode *Decoder; /* Data decoder */ + Decode *TransferDecoder; /* Transfer decoder (e.g., chunked) */ + Decode *ContentDecoder; /* Data decoder (e.g., gzip) */ int ExpectedSize; /* Goal size of the HTTP transfer (0 if unknown)*/ int TransferSize; /* Actual length of the HTTP transfer */ uint_t Flags; /* Look Flag Defines in cache.h */ @@ -205,7 +206,8 @@ static void Cache_entry_init(CacheEntry_t *NewEntry, const DilloUrl *Url) NewEntry->Header = dStr_new(""); NewEntry->Location = NULL; NewEntry->Data = dStr_sized_new(8*1024); - NewEntry->Decoder = NULL; + NewEntry->TransferDecoder = NULL; + NewEntry->ContentDecoder = NULL; NewEntry->ExpectedSize = 0; NewEntry->TransferSize = 0; NewEntry->Flags = 0; @@ -469,6 +471,14 @@ static void Cache_parse_header(CacheEntry_t *entry, #endif if (HdrLen > 12) { + if (header[9] == '1' && header[10] == '0' && header[11] == '0') { + /* 100: Continue. The "real" header has not come yet. */ + MSG("An actual 100 Continue header!\n"); + entry->Flags &= ~CA_GotHeader; + dStr_free(entry->Header, 1); + entry->Header = dStr_new(""); + return; + } if (header[9] == '3' && header[10] == '0') { /* 30x: URL redirection */ entry->Flags |= CA_Redirect; @@ -487,8 +497,19 @@ static void Cache_parse_header(CacheEntry_t *entry, } if ((Length = Cache_parse_field(header, "Content-Length")) != NULL) { - entry->Flags |= CA_GotLength; - entry->ExpectedSize = MAX(strtol(Length, NULL, 10), 0); + char *tmp; + if ((tmp = Cache_parse_field(header, "Transfer-Encoding"))) { + /* + * BUG: Should test for _presence_ of headers, not whether they + * have content. + */ + MSG_HTTP("Both Content-Length and Transfer-Encoding headers" + " received.\n"); + dFree(tmp); + } else { + entry->Flags |= CA_GotLength; + entry->ExpectedSize = MAX(strtol(Length, NULL, 10), 0); + } dFree(Length); } @@ -505,17 +526,25 @@ static void Cache_parse_header(CacheEntry_t *entry, } #endif /* !DISABLE_COOKIES */ + /* + * Get Transfer-Encoding and initialize decoder + */ + encoding = Cache_parse_field(header, "Transfer-Encoding"); + entry->TransferDecoder = a_Decode_transfer_init(encoding); + dFree(encoding); + /* * Get Content-Encoding and initialize decoder */ encoding = Cache_parse_field(header, "Content-Encoding"); - entry->Decoder = a_Decode_content_init(encoding); + entry->ContentDecoder = a_Decode_content_init(encoding); dFree(encoding); dbuf = dStr_sized_new(buf_size - HdrLen); dStr_append_l(dbuf, buf + HdrLen, buf_size - HdrLen); - dbuf = a_Decode_process(entry->Decoder, dbuf); + dbuf = a_Decode_process(entry->TransferDecoder, dbuf); + dbuf = a_Decode_process(entry->ContentDecoder, dbuf); if (entry->ExpectedSize > 0) { if (entry->ExpectedSize > HUGE_FILESIZE) { @@ -582,6 +611,7 @@ static int Cache_get_header(CacheEntry_t *entry, void a_Cache_process_dbuf(int Op, const char *buf, size_t buf_size, const DilloUrl *Url) { + int start = 0; int len; CacheEntry_t *entry = Cache_entry_search(Url); Dstr *dbuf; @@ -599,9 +629,13 @@ void a_Cache_process_dbuf(int Op, const char *buf, size_t buf_size, } entry->Flags |= CA_GotData; entry->Flags &= ~CA_Stopped; /* it may catch up! */ - if (entry->Decoder) { - a_Decode_free(entry->Decoder); - entry->Decoder = NULL; + if (entry->TransferDecoder) { + a_Decode_free(entry->TransferDecoder); + entry->TransferDecoder = NULL; + } + if (entry->ContentDecoder) { + a_Decode_free(entry->ContentDecoder); + entry->ContentDecoder = NULL; } dStr_fit(entry->Data); /* fit buffer size! */ Cache_process_queue(entry); @@ -612,15 +646,23 @@ void a_Cache_process_dbuf(int Op, const char *buf, size_t buf_size, return; } + /* + * Cache_get_header() will set CA_GotHeader if it has a full header, and + * Cache_parse_header() will unset it if the header turns out to have been + * merely an informational response from the server (i.e., 100 Continue) + */ if (!(entry->Flags & CA_GotHeader)) { - /* Haven't got the whole header yet */ - len = Cache_get_header(entry, buf, buf_size); - if (entry->Flags & CA_GotHeader) { - entry->TransferSize = buf_size - len; /* body */ + while ((len = Cache_get_header(entry, buf + start, buf_size - start))) { + /* Let's scan, allocate, and set things according to header info */ - Cache_parse_header(entry, buf, buf_size, len); - /* Now that we have it parsed, let's update our clients */ - Cache_process_queue(entry); + Cache_parse_header(entry, buf + start, buf_size - start, len); + start += len; + if (entry->Flags & CA_GotHeader) { + entry->TransferSize = buf_size - start; /* body */ + /* Now that we have it parsed, let's update our clients */ + Cache_process_queue(entry); + return; + } } return; } @@ -633,9 +675,13 @@ void a_Cache_process_dbuf(int Op, const char *buf, size_t buf_size, /* Assert we have a Decoder. * BUG: this is a workaround, more study and a proper design * for handling redirects is required */ - if (entry->Decoder != NULL) { - dbuf = a_Decode_process(entry->Decoder, dbuf); + if (entry->TransferDecoder != NULL) { + dbuf = a_Decode_process(entry->TransferDecoder, dbuf); + } + if (entry->ContentDecoder != NULL) { + dbuf = a_Decode_process(entry->ContentDecoder, dbuf); } + dStr_append_l(entry->Data, dbuf->str, dbuf->len); dStr_free(dbuf, 1); diff --git a/src/decode.c b/src/decode.c index 30000e05..f53901f1 100644 --- a/src/decode.c +++ b/src/decode.c @@ -2,6 +2,7 @@ #include #include #include +#include /* strtol */ #include "decode.h" #include "msg.h" @@ -21,6 +22,75 @@ static void Decode_null_free(Decode *dc) { } +/* + * Decode chunked data + */ +static Dstr *Decode_chunked(Decode *dc, Dstr *input) +{ + char *inputPtr, *eol; + int inputRemaining; + int chunkRemaining = *((int *)dc->state); + Dstr *output = dStr_sized_new(input->len); + + dStr_append_l(dc->leftover, input->str, input->len); + dStr_free(input, 1); + input = dc->leftover; + inputPtr = input->str; + inputRemaining = input->len; + + while (inputRemaining > 0) { + if (chunkRemaining > 2) { + /* chunk body to copy */ + int copylen = MIN(chunkRemaining - 2, inputRemaining); + dStr_append_l(output, inputPtr, copylen); + chunkRemaining -= copylen; + inputRemaining -= copylen; + inputPtr += copylen; + } + + if ((chunkRemaining == 2) && (inputRemaining > 0)) { + /* CR to discard */ + chunkRemaining--; + inputRemaining--; + inputPtr++; + } + if ((chunkRemaining == 1) && (inputRemaining > 0)) { + /* LF to discard */ + chunkRemaining--; + inputRemaining--; + inputPtr++; + } + + /* + * A chunk has a one-line header that begins with the chunk length + * in hexadecimal. + */ + if (!(eol = (char *)memchr(inputPtr, '\n', inputRemaining))) { + break; /* We don't have the whole line yet. */ + } + + if (!(chunkRemaining = strtol(inputPtr, NULL, 0x10))) { + break; /* A chunk length of 0 means we're done! */ + } + inputRemaining -= (eol - inputPtr) + 1; + inputPtr = eol + 1; + chunkRemaining += 2; /* CRLF at the end of every chunk */ + } + + /* If we have a partial chunk header, save it for next time. */ + dc->leftover = input; + dStr_erase(dc->leftover, 0, inputPtr - input->str); + + *(int *)dc->state = chunkRemaining; + return output; +} + +static void Decode_chunked_free(Decode *dc) +{ + dFree(dc->state); + dStr_free(dc->leftover, 1); +} + /* * Decode gzipped data */ @@ -131,6 +201,33 @@ static void Decode_charset_free(Decode *dc) dStr_free(dc->leftover, 1); } +/* + * Initialize transfer decoder. Currently handles "chunked". + */ +Decode *a_Decode_transfer_init(const char *format) +{ + Decode *dc = (Decode *)dMalloc(sizeof(Decode)); + + /* not used */ + dc->buffer = NULL; + + dc->leftover = dStr_new(""); + + if (format && !dStrncasecmp(format, "chunked", 7)) { + int *chunk_remaining = (int *)dMalloc(sizeof(int)); + *chunk_remaining = 0; + dc->state = chunk_remaining; + dc->decode = Decode_chunked; + dc->free = Decode_chunked_free; + MSG("chunked!\n"); + } else { + dc->state = NULL; + dc->decode = Decode_null; + dc->free = Decode_null_free; + } + return dc; +} + /* * Initialize content decoder. Currently handles gzip. * diff --git a/src/decode.h b/src/decode.h index 7f8045cd..b3f56eb3 100644 --- a/src/decode.h +++ b/src/decode.h @@ -17,6 +17,7 @@ struct _Decode { void (*free) (Decode *dc); }; +Decode *a_Decode_transfer_init(const char *format); Decode *a_Decode_content_init(const char *format); Decode *a_Decode_charset_init(const char *format); Dstr *a_Decode_process(Decode *dc, Dstr *input); -- cgit v1.2.3