summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorjcid <devnull@localhost>2007-11-14 12:40:05 +0100
committerjcid <devnull@localhost>2007-11-14 12:40:05 +0100
commita157f391577f9dcb986215e0f1ded06c825934b5 (patch)
tree2e51f6cf4a1c4bec9579a4050397b04c598b5ad4 /src
parente329567b2984f1dee65d088002863355cf5b2a3b (diff)
Added the missing decode.[ch]
Diffstat (limited to 'src')
-rw-r--r--src/decode.c228
-rw-r--r--src/decode.h29
2 files changed, 257 insertions, 0 deletions
diff --git a/src/decode.c b/src/decode.c
new file mode 100644
index 00000000..6bb6acd8
--- /dev/null
+++ b/src/decode.c
@@ -0,0 +1,228 @@
+
+#include <zlib.h>
+#include <iconv.h>
+#include <errno.h>
+
+#include "decode.h"
+#include "msg.h"
+
+
+const int bufsize = 8*1024;
+
+
+static Dstr *Decode_null(Decode *dc, const char *inData, int inLen)
+{
+ Dstr *d = dStr_new("");
+ dStr_append_l(d, inData, inLen);
+ return d;
+}
+
+static void Decode_null_free(Decode *dc)
+{
+}
+
+
+static Dstr *Decode_gzip(Decode *dc, const char *inData, int inLen)
+{
+ int rc = Z_OK;
+
+ z_stream *zs = (z_stream *)dc->state;
+
+ int inputConsumed = 0;
+ Dstr *output = dStr_new("");
+
+ while ((rc == Z_OK) && (inputConsumed < inLen)) {
+ zs->next_in = (char *)inData + inputConsumed;
+ zs->avail_in = inLen - inputConsumed;
+
+ zs->next_out = dc->buffer;
+ zs->avail_out = bufsize;
+
+ rc = inflate(zs, Z_SYNC_FLUSH);
+
+ if ((rc == Z_OK) || (rc == Z_STREAM_END)) {
+ // Z_STREAM_END at end of file
+
+ inputConsumed += zs->total_in;
+
+ dStr_append_l(output, dc->buffer, zs->total_out);
+
+ zs->total_out = 0;
+ zs->total_in = 0;
+ }
+ }
+
+ return output;
+}
+
+static void Decode_gzip_free(Decode *dc)
+{
+ (void)inflateEnd((z_stream *)dc->state);
+
+ dFree(dc->buffer);
+}
+
+
+static Dstr *Decode_charset(Decode *dc, const char *inData, int inLen)
+{
+ int rc = 0;
+
+ Dstr *input, *output;
+ char *inPtr, *outPtr;
+ int inLeft, outRoom;
+
+ output = dStr_new("");
+
+ input = dc->leftover;
+ dStr_append_l(input, inData, inLen);
+ inPtr = input->str;
+ inLeft = input->len;
+
+
+ while ((rc != EINVAL) && (inLeft > 0)) {
+
+ outPtr = dc->buffer;
+ outRoom = bufsize;
+
+ rc = iconv((iconv_t)dc->state, &inPtr, &inLeft, &outPtr, &outRoom);
+
+ // iconv() on success, number of bytes converted
+ // -1, errno == EILSEQ illegal byte sequence found
+ // EINVAL partial character ends source buffer
+ // E2BIG destination buffer is full
+
+ dStr_append_l(output, dc->buffer, bufsize - outRoom);
+
+ if (rc == -1)
+ rc = errno;
+ if (rc == EILSEQ){
+ inPtr++;
+ inLeft--;
+ /*
+ * U+FFFD: "used to replace an incoming character whose value is
+ * unknown or unrepresentable in Unicode."
+ */
+ //dStr_append(output, "\ufffd");
+ dStr_append_c(output, 0xEF);
+ dStr_append_c(output, 0xBF);
+ dStr_append_c(output, 0xBD);
+ }
+ }
+
+ dc->leftover = input;
+ dStr_erase(dc->leftover, 0, dc->leftover->len - inLeft);
+
+ return output;
+}
+
+static void Decode_charset_free(Decode *dc)
+{
+ (void)iconv_close((iconv_t)(dc->state));
+
+ dFree(dc->buffer);
+ dStr_free(dc->leftover, 1);
+}
+
+
+Decode *a_Decode_content_init(const char *format)
+{
+ Decode *dc = (Decode *)dMalloc(sizeof(Decode));
+
+ dc->buffer = NULL;
+ dc->state = NULL;
+
+ /* not used */
+ dc->leftover = NULL;
+
+ if (format && !dStrcasecmp(format, "gzip")) {
+
+ MSG("compressed data! : %s\n", format);
+
+ z_stream *zs;
+ dc->buffer = (char *)dMalloc(bufsize);
+ dc->state = zs = (z_stream *)dMalloc(sizeof(z_stream));
+ zs->zalloc = NULL;
+ zs->zfree = NULL;
+ zs->next_in = NULL;
+ zs->avail_in = 0;
+
+ /* 16 is a magic number for gzip decoding */
+ inflateInit2(zs, MAX_WBITS+16);
+
+ dc->decode = Decode_gzip;
+ dc->free = Decode_gzip_free;
+ } else {
+ dc->decode = Decode_null;
+ dc->free = Decode_null_free;
+ }
+ return dc;
+}
+
+static int Decode_is_latin1(const char *str)
+{
+ return (!(dStrcasecmp(str, "ISO-8859-1") ||
+ dStrcasecmp(str, "latin1") ||
+ dStrcasecmp(str, "ISO_8859-1:1987") ||
+ dStrcasecmp(str, "ISO_8859-1") ||
+ dStrcasecmp(str, "iso-ir-100") ||
+ dStrcasecmp(str, "l1") ||
+ dStrcasecmp(str, "IBM819") ||
+ dStrcasecmp(str, "CP819") ||
+ dStrcasecmp(str, "csISOLatin1")));
+}
+
+static int Decode_is_ascii(const char *str)
+{
+ return (!(dStrcasecmp(str, "ASCII") ||
+ dStrcasecmp(str, "US-ASCII") ||
+ dStrcasecmp(str, "us") ||
+ dStrcasecmp(str, "IBM367") ||
+ dStrcasecmp(str, "cp367") ||
+ dStrcasecmp(str, "csASCII") ||
+ dStrcasecmp(str, "ANSI_X3.4-1968") ||
+ dStrcasecmp(str, "iso-ir-6") ||
+ dStrcasecmp(str, "ANSI_X3.4-1986") ||
+ dStrcasecmp(str, "ISO_646.irv:1991") ||
+ dStrcasecmp(str, "ISO646-US")));
+}
+
+Decode *a_Decode_charset_init(const char *format)
+{
+ Decode *dc = (Decode *)dMalloc(sizeof(Decode));
+
+ if (format &&
+ strlen(format) &&
+ dStrcasecmp(format,"UTF-8") &&
+ !Decode_is_latin1(format) &&
+ !Decode_is_ascii(format)) {
+
+ iconv_t ic;
+ dc->state = ic = iconv_open("UTF-8", format);
+ if (ic != (iconv_t) -1) {
+ dc->buffer = (char *)dMalloc(bufsize);
+ dc->leftover = dStr_new("");
+
+ dc->decode = Decode_charset;
+ dc->free = Decode_charset_free;
+ return dc;
+ } else {
+ MSG("Unable to convert from character encoding: '%s'\n", format);
+ }
+ }
+ dc->leftover = NULL;
+ dc->buffer = NULL;
+ dc->decode = Decode_null;
+ dc->free = Decode_null_free;
+ return dc;
+}
+
+Dstr *a_Decode_process(Decode *dc, const char *inData, int inLen)
+{
+ return dc->decode(dc, inData, inLen);
+}
+
+void a_Decode_free(Decode *dc)
+{
+ dc->free(dc);
+ dFree(dc);
+}
diff --git a/src/decode.h b/src/decode.h
new file mode 100644
index 00000000..2cdc29e7
--- /dev/null
+++ b/src/decode.h
@@ -0,0 +1,29 @@
+#ifndef __DECODE_H__
+#define __DECODE_H__
+
+#include "../dlib/dlib.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+typedef struct _Decode Decode;
+
+struct _Decode {
+ char *buffer;
+ Dstr *leftover;
+ void *state;
+ Dstr *(*decode) (Decode *dc, const char *inData, int inLen);
+ void (*free) (Decode *dc);
+};
+
+Decode *a_Decode_content_init(const char *format);
+Decode *a_Decode_charset_init(const char *format);
+Dstr *a_Decode_process(Decode *dc, const char *inData, int inLen);
+void a_Decode_free(Decode *dc);
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* __DECODE_H__ */