summaryrefslogtreecommitdiff
path: root/dpi/datauri.c
diff options
context:
space:
mode:
Diffstat (limited to 'dpi/datauri.c')
-rw-r--r--dpi/datauri.c323
1 files changed, 323 insertions, 0 deletions
diff --git a/dpi/datauri.c b/dpi/datauri.c
new file mode 100644
index 00000000..87afd2d9
--- /dev/null
+++ b/dpi/datauri.c
@@ -0,0 +1,323 @@
+/*
+ * File: datauri.c
+ *
+ * Copyright (C) 2006 Jorge Arellano Cid <jcid@dillo.org>
+ *
+ * Filter dpi for the "data:" URI scheme (RFC 2397).
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../dpip/dpip.h"
+#include "dpiutil.h"
+
+/*
+ * Debugging macros
+ */
+#define _MSG(...)
+#define MSG(...) printf("[datauri dpi]: " __VA_ARGS__)
+
+/*
+ * Global variables
+ */
+static SockHandler *sh = NULL;
+
+
+
+int b64decode(unsigned char* str)
+{
+ unsigned char *cur, *start;
+ int d, dlast, phase;
+ unsigned char c;
+ static int table[256] = {
+ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* 00-0F */
+ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* 10-1F */
+ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,62,-1,-1,-1,63, /* 20-2F */
+ 52,53,54,55,56,57,58,59,60,61,-1,-1,-1,-1,-1,-1, /* 30-3F */
+ -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14, /* 40-4F */
+ 15,16,17,18,19,20,21,22,23,24,25,-1,-1,-1,-1,-1, /* 50-5F */
+ -1,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40, /* 60-6F */
+ 41,42,43,44,45,46,47,48,49,50,51,-1,-1,-1,-1,-1, /* 70-7F */
+ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* 80-8F */
+ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* 90-9F */
+ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* A0-AF */
+ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* B0-BF */
+ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* C0-CF */
+ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* D0-DF */
+ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* E0-EF */
+ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 /* F0-FF */
+ };
+
+ d = dlast = phase = 0;
+ start = str;
+ for (cur = str; *cur != '\0'; ++cur )
+ {
+ // jer: treat line endings as physical breaks.
+ //if (*cur == '\n' || *cur == '\r'){phase = dlast = 0; continue;}
+ d = table[(int)*cur];
+ if(d != -1)
+ {
+ switch(phase)
+ {
+ case 0:
+ ++phase;
+ break;
+ case 1:
+ c = ((dlast << 2) | ((d & 0x30) >> 4));
+ *str++ = c;
+ ++phase;
+ break;
+ case 2:
+ c = (((dlast & 0xf) << 4) | ((d & 0x3c) >> 2));
+ *str++ = c;
+ ++phase;
+ break;
+ case 3:
+ c = (((dlast & 0x03 ) << 6) | d);
+ *str++ = c;
+ phase = 0;
+ break;
+ }
+ dlast = d;
+ }
+ }
+ *str = '\0';
+ return str - start;
+}
+
+/* Modified from src/url.c --------------------------------------------------*/
+
+/*
+ * Given an hex octet (e.g., e3, 2F, 20), return the corresponding
+ * character if the octet is valid, and -1 otherwise
+ */
+static int Url_decode_hex_octet(const char *s)
+{
+ int hex_value;
+ char *tail, hex[3];
+
+ if (s && (hex[0] = s[0]) && (hex[1] = s[1])) {
+ hex[2] = 0;
+ hex_value = strtol(hex, &tail, 16);
+ if (tail - hex == 2)
+ return hex_value;
+ }
+ return -1;
+}
+
+/*
+ * Parse possible hexadecimal octets in the URI path.
+ * Returns a new allocated string.
+ */
+char *a_Url_decode_hex_str(const char *str, size_t *p_sz)
+{
+ char *new_str, *dest;
+ int i, val;
+
+ if (!str) {
+ *p_sz = 0;
+ return NULL;
+ }
+
+ dest = new_str = dNew(char, strlen(str) + 1);
+ for (i = 0; str[i]; i++) {
+ *dest++ = (str[i] == '%' && (val = Url_decode_hex_octet(str+i+1)) >= 0) ?
+ i+=2, val : str[i];
+ }
+ *dest = 0;
+
+ new_str = dRealloc(new_str, sizeof(char) * (dest - new_str + 1));
+ *p_sz = (size_t)(dest - new_str);
+ return new_str;
+}
+
+/* end ----------------------------------------------------------------------*/
+
+/*
+ * Send decoded data to dillo in an HTTP envelope.
+ */
+void send_decoded_data(const char *url, const char *mime_type,
+ unsigned char *data, size_t data_sz)
+{
+ char *d_cmd;
+
+ /* Send dpip tag */
+ d_cmd = a_Dpip_build_cmd("cmd=%s url=%s", "start_send_page", url);
+ sock_handler_write_str(sh, 1, d_cmd);
+ dFree(d_cmd);
+
+ /* Send HTTP header. */
+ sock_handler_write_str(sh, 0, "Content-type: ");
+ sock_handler_write_str(sh, 0, mime_type);
+ sock_handler_write_str(sh, 1, "\n\n");
+
+ /* Send message */
+ sock_handler_write(sh, 0, (char *)data, data_sz);
+}
+
+void send_failure_message(const char *url, const char *mime_type,
+ unsigned char *data, size_t data_sz)
+{
+ char *d_cmd;
+ char buf[1024];
+
+ const char *msg =
+"<!DOCTYPE HTML PUBLIC '-//W3C//DTD HTML 4.01 Transitional//EN'>\n"
+"<html><body>\n"
+"<hr><h1>Datauri dpi</h1><hr>\n"
+"<p><b>Can't parse datauri:</b><br>\n";
+ const char *msg_mime_type="text/html";
+
+ /* Send dpip tag */
+ d_cmd = a_Dpip_build_cmd("cmd=%s url=%s", "start_send_page", url);
+ sock_handler_write_str(sh, 1, d_cmd);
+ dFree(d_cmd);
+
+ /* Send HTTP header. */
+ sock_handler_write_str(sh, 0, "Content-type: ");
+ sock_handler_write_str(sh, 0, msg_mime_type);
+ sock_handler_write_str(sh, 1, "\n\n");
+
+ /* Send message */
+ sock_handler_write_str(sh, 0, msg);
+
+ /* send some debug info */
+ snprintf(buf, 1024, "mime_type: %s<br>data size: %d<br>data: %s<br>",
+ mime_type, (int)data_sz, data);
+ sock_handler_write_str(sh, 0, buf);
+
+ /* close page */
+ sock_handler_write_str(sh, 0, "</body></html>");
+}
+
+/*
+ * Get mime type from the data URI.
+ * todo: there's no point in handling "charset" because current dillo
+ * only handles ISO-LATIN-1. The FLTK2 version (utf-8) could use it in the
+ * future.
+ */
+char *datauri_get_mime(char *url)
+{
+ char buf[256];
+ char *mime_type = NULL, *p;
+ size_t len = 0;
+
+ if (dStrncasecmp(url, "data:", 5) == 0) {
+ if ((p = strchr(url, ',')) && p - url < 256) {
+ url += 5;
+ len = p - url;
+ strncpy(buf, url, len);
+ buf[len] = 0;
+ /* strip ";base64" */
+ if (len >= 7 && dStrcasecmp(buf + len - 7, ";base64") == 0) {
+ len -= 7;
+ buf[len] = 0;
+ }
+ }
+
+ /* that's it, now handle omitted types */
+ if (len == 0) {
+ mime_type = dStrdup("text/plain;charset=US-ASCII");
+ } else if (!dStrncasecmp(buf, "charset", 7)) {
+ mime_type = dStrconcat("text/plain", buf, NULL);
+ } else {
+ mime_type = dStrdup(buf);
+ }
+ }
+
+ return mime_type;
+}
+
+/*
+ * Return a decoded data string.
+ */
+unsigned char *datauri_get_data(char *url, size_t *p_sz)
+{
+ char *p;
+ int is_base64 = 0;
+ unsigned char *data = NULL;
+
+ if ((p = strchr(url, ',')) && p - url >= 12 && /* "data:;base64" */
+ dStrncasecmp(p - 7, ";base64", 7) == 0) {
+ is_base64 = 1;
+ }
+
+ if (p) {
+ ++p;
+ if (is_base64) {
+ data = (unsigned char *)dStrdup(p);
+ *p_sz = (size_t) b64decode(data);
+ } else {
+ data = (unsigned char *)a_Url_decode_hex_str(p, p_sz);
+ }
+ } else {
+ data = (unsigned char *)dStrdup("");
+ *p_sz = 0;
+ }
+
+ return data;
+}
+
+/*
+ *
+ */
+int main(void)
+{
+ char *dpip_tag = NULL, *cmd = NULL, *url = NULL, *mime_type;
+ unsigned char *data;
+ size_t data_size = 0;
+
+ /* Initialize the SockHandler */
+ sh = sock_handler_new(STDIN_FILENO, STDOUT_FILENO, 8*1024);
+
+ /* wget may need to write a temporary file... */
+ chdir("/tmp");
+
+ /* Read the dpi command from STDIN */
+ dpip_tag = sock_handler_read(sh);
+ MSG("[%s]\n", dpip_tag);
+
+ cmd = a_Dpip_get_attr(dpip_tag, strlen(dpip_tag), "cmd");
+ url = a_Dpip_get_attr(dpip_tag, strlen(dpip_tag), "url");
+ if (!cmd || !url) {
+ MSG("Error, cmd=%s, url=%s\n", cmd, url);
+ exit (EXIT_FAILURE);
+ }
+
+ /* Parse the data URI */
+ mime_type = datauri_get_mime(url);
+ data = datauri_get_data(url, &data_size);
+
+ MSG("mime_type: %s\n", mime_type);
+ MSG("data_size: %d\n", data_size);
+ MSG("data: {%s}\n", data);
+
+ if (mime_type && data) {
+ /* good URI */
+ send_decoded_data(url, mime_type, data, data_size);
+ } else {
+ /* malformed URI */
+ send_failure_message(url, mime_type, data, data_size);
+ }
+
+ dFree(data);
+ dFree(mime_type);
+ dFree(url);
+ dFree(cmd);
+ dFree(dpip_tag);
+
+ /* Finish the SockHandler */
+ sock_handler_close(sh);
+ sock_handler_free(sh);
+
+ return 0;
+}
+