diff options
author | jcid <devnull@localhost> | 2007-11-27 00:24:52 +0100 |
---|---|---|
committer | jcid <devnull@localhost> | 2007-11-27 00:24:52 +0100 |
commit | b0358c3dea7a0f34dc494d82be08ae814a480a50 (patch) | |
tree | 264e3369e8a55e7c41131e5519c7eee8cb8656f0 /src | |
parent | 1f8d6679cf90bd66f514c8a9b1d5536358ca28d3 (diff) |
Made the decoder a filter by avoiding one copy pass.
Diffstat (limited to 'src')
-rw-r--r-- | src/cache.c | 28 | ||||
-rw-r--r-- | src/decode.c | 82 | ||||
-rw-r--r-- | src/decode.h | 4 |
3 files changed, 68 insertions, 46 deletions
diff --git a/src/cache.c b/src/cache.c index 4f7f074c..448ce3db 100644 --- a/src/cache.c +++ b/src/cache.c @@ -366,7 +366,7 @@ int a_Cache_get_buf(const DilloUrl *Url, char **PBuf, int *BufSize) /* Test for a redirection loop */ if (entry->Flags & CA_RedirectLoop || i == 3) { - MSG_WARN("Redirect loop for URL: >%s<\n", URL_STR_(Url)); + _MSG_WARN("Redirect loop for URL: >%s<\n", URL_STR_(Url)); break; } /* Test for a working redirection */ @@ -461,7 +461,7 @@ static void Cache_parse_header(CacheEntry_t *entry, { char *header = entry->Header->str; char *Length, *Type, *location_str, *encoding; - Dstr *decodedBuf; + Dstr *dbuf; #ifndef DISABLE_COOKIES Dlist *Cookies; void *data; @@ -512,8 +512,10 @@ static void Cache_parse_header(CacheEntry_t *entry, entry->Decoder = a_Decode_content_init(encoding); dFree(encoding); - decodedBuf = a_Decode_process(entry->Decoder, buf + HdrLen, - buf_size - HdrLen); + dbuf = dStr_sized_new(buf_size - HdrLen); + dStr_append_l(dbuf, buf + HdrLen, buf_size - HdrLen); + + dbuf = a_Decode_process(entry->Decoder, dbuf); if (entry->ExpectedSize > 0) { if (entry->ExpectedSize > HUGE_FILESIZE) { @@ -525,8 +527,8 @@ static void Cache_parse_header(CacheEntry_t *entry, dStr_free(entry->Data, 1); entry->Data = dStr_sized_new(MIN(entry->ExpectedSize+1, MAX_INIT_BUF)); } - dStr_append_l(entry->Data, decodedBuf->str, decodedBuf->len); - dStr_free(decodedBuf, 1); + dStr_append_l(entry->Data, dbuf->str, dbuf->len); + dStr_free(dbuf, 1); /* Get Content-Type */ if ((Type = Cache_parse_field(header, "Content-Type")) == NULL) { @@ -582,7 +584,7 @@ void a_Cache_process_dbuf(int Op, const char *buf, size_t buf_size, { int len; CacheEntry_t *entry = Cache_entry_search(Url); - Dstr *decodedBuf; + Dstr *dbuf; /* Assert a valid entry (not aborted) */ dReturn_if_fail (entry != NULL); @@ -625,16 +627,18 @@ void a_Cache_process_dbuf(int Op, const char *buf, size_t buf_size, entry->TransferSize += buf_size; + dbuf = dStr_sized_new(buf_size); + dStr_append_l(dbuf, buf, buf_size); + /* Assert we have a Decoder. * BUG: this is a workaround, more study and a proper design * for handling redirects is required */ if (entry->Decoder != NULL) { - decodedBuf = a_Decode_process(entry->Decoder, buf, buf_size); - dStr_append_l(entry->Data, decodedBuf->str, decodedBuf->len); - dStr_free(decodedBuf, 1); - } else { - dStr_append_l(entry->Data, buf, buf_size); + dbuf = a_Decode_process(entry->Decoder, dbuf); } + dStr_append_l(entry->Data, dbuf->str, dbuf->len); + dStr_free(dbuf, 1); + Cache_process_queue(entry); } diff --git a/src/decode.c b/src/decode.c index 7ae9bfa4..30000e05 100644 --- a/src/decode.c +++ b/src/decode.c @@ -7,22 +7,24 @@ #include "msg.h" -const int bufsize = 8*1024; +static const int bufsize = 8*1024; - -static Dstr *Decode_null(Decode *dc, const char *inData, int inLen) +/* + * null ("identity") decoding + */ +static Dstr *Decode_null(Decode *dc, Dstr *input) { - Dstr *d = dStr_new(""); - dStr_append_l(d, inData, inLen); - return d; + return input; } static void Decode_null_free(Decode *dc) { } - -static Dstr *Decode_gzip(Decode *dc, const char *inData, int inLen) +/* + * Decode gzipped data + */ +static Dstr *Decode_gzip(Decode *dc, Dstr *input) { int rc = Z_OK; @@ -31,9 +33,9 @@ static Dstr *Decode_gzip(Decode *dc, const char *inData, int inLen) int inputConsumed = 0; Dstr *output = dStr_new(""); - while ((rc == Z_OK) && (inputConsumed < inLen)) { - zs->next_in = (Bytef *)inData + inputConsumed; - zs->avail_in = inLen - inputConsumed; + while ((rc == Z_OK) && (inputConsumed < input->len)) { + zs->next_in = (Bytef *)input->str + inputConsumed; + zs->avail_in = input->len - inputConsumed; zs->next_out = (Bytef *)dc->buffer; zs->avail_out = bufsize; @@ -52,6 +54,7 @@ static Dstr *Decode_gzip(Decode *dc, const char *inData, int inLen) } } + dStr_free(input, 1); return output; } @@ -62,19 +65,23 @@ static void Decode_gzip_free(Decode *dc) dFree(dc->buffer); } - -static Dstr *Decode_charset(Decode *dc, const char *inData, int inLen) +/* + * Translate to desired character set (UTF-8) + */ +static Dstr *Decode_charset(Decode *dc, Dstr *input) { int rc = 0; - Dstr *input, *output; + Dstr *output; char *inPtr, *outPtr; size_t inLeft, outRoom; output = dStr_new(""); + dStr_append_l(dc->leftover, input->str, input->len); + dStr_free(input, 1); input = dc->leftover; - dStr_append_l(input, inData, inLen); + inPtr = input->str; inLeft = input->len; @@ -103,6 +110,7 @@ static Dstr *Decode_charset(Decode *dc, const char *inData, int inLen) * unknown or unrepresentable in Unicode." */ //dStr_append(output, "\ufffd"); + // \uxxxx is C99. UTF-8-specific: dStr_append_c(output, 0xEF); dStr_append_c(output, 0xBF); dStr_append_c(output, 0xBD); @@ -123,7 +131,12 @@ static void Decode_charset_free(Decode *dc) dStr_free(dc->leftover, 1); } - +/* + * Initialize content decoder. Currently handles gzip. + * + * zlib is also capable of handling "deflate"/zlib-encoded data, but web + * servers have not standardized on whether to send such data with a header. + */ Decode *a_Decode_content_init(const char *format) { Decode *dc = (Decode *)dMalloc(sizeof(Decode)); @@ -158,19 +171,9 @@ Decode *a_Decode_content_init(const char *format) return dc; } -static int Decode_is_latin1(const char *str) -{ - return (!(dStrcasecmp(str, "ISO-8859-1") || - dStrcasecmp(str, "latin1") || - dStrcasecmp(str, "ISO_8859-1:1987") || - dStrcasecmp(str, "ISO_8859-1") || - dStrcasecmp(str, "iso-ir-100") || - dStrcasecmp(str, "l1") || - dStrcasecmp(str, "IBM819") || - dStrcasecmp(str, "CP819") || - dStrcasecmp(str, "csISOLatin1"))); -} - +/* + * Legal names for the ASCII character set + */ static int Decode_is_ascii(const char *str) { return (!(dStrcasecmp(str, "ASCII") || @@ -186,6 +189,13 @@ static int Decode_is_ascii(const char *str) dStrcasecmp(str, "ISO646-US"))); } +/* + * Initialize decoder to translate from any character set known to iconv() + * to UTF-8. + * + * GNU iconv(1) will provide a list of known character sets if invoked with + * the "--list" flag. + */ Decode *a_Decode_charset_init(const char *format) { Decode *dc = (Decode *)dMalloc(sizeof(Decode)); @@ -193,7 +203,6 @@ Decode *a_Decode_charset_init(const char *format) if (format && strlen(format) && dStrcasecmp(format,"UTF-8") && - !Decode_is_latin1(format) && !Decode_is_ascii(format)) { iconv_t ic; @@ -216,11 +225,20 @@ Decode *a_Decode_charset_init(const char *format) return dc; } -Dstr *a_Decode_process(Decode *dc, const char *inData, int inLen) +/* + * Filter data using our decoder. + * + * The input string should not be used after this call. The decoder will + * free it if necessary. + */ +Dstr *a_Decode_process(Decode *dc, Dstr *input) { - return dc->decode(dc, inData, inLen); + return dc->decode(dc, input); } +/* + * free our decoder + */ void a_Decode_free(Decode *dc) { dc->free(dc); diff --git a/src/decode.h b/src/decode.h index 2cdc29e7..7f8045cd 100644 --- a/src/decode.h +++ b/src/decode.h @@ -13,13 +13,13 @@ struct _Decode { char *buffer; Dstr *leftover; void *state; - Dstr *(*decode) (Decode *dc, const char *inData, int inLen); + Dstr *(*decode) (Decode *dc, Dstr *input); void (*free) (Decode *dc); }; Decode *a_Decode_content_init(const char *format); Decode *a_Decode_charset_init(const char *format); -Dstr *a_Decode_process(Decode *dc, const char *inData, int inLen); +Dstr *a_Decode_process(Decode *dc, Dstr *input); void a_Decode_free(Decode *dc); #ifdef __cplusplus |