diff options
Diffstat (limited to 'src/misc.c')
-rw-r--r-- | src/misc.c | 271 |
1 files changed, 271 insertions, 0 deletions
diff --git a/src/misc.c b/src/misc.c new file mode 100644 index 00000000..f7ab00d3 --- /dev/null +++ b/src/misc.c @@ -0,0 +1,271 @@ +/* + * File: misc.c + * + * Copyright (C) 2000 Jorge Arellano Cid <jcid@dillo.org>, + * Jörgen Viksell <vsksga@hotmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <string.h> +#include <ctype.h> + +#include "msg.h" +#include "misc.h" + + +/* + * Escape characters as %XX sequences. + * Return value: New string. + */ +char *a_Misc_escape_chars(const char *str, char *esc_set) +{ + static const char *hex = "0123456789ABCDEF"; + char *p = NULL; + Dstr *dstr; + int i; + + dstr = dStr_sized_new(64); + for (i = 0; str[i]; ++i) { + if (str[i] <= 0x1F || str[i] == 0x7F || strchr(esc_set, str[i])) { + dStr_append_c(dstr, '%'); + dStr_append_c(dstr, hex[(str[i] >> 4) & 15]); + dStr_append_c(dstr, hex[str[i] & 15]); + } else { + dStr_append_c(dstr, str[i]); + } + } + p = dstr->str; + dStr_free(dstr, FALSE); + + return p; +} + + +#define TAB_SIZE 8 +/* + * Takes a string and converts any tabs to spaces. + */ +char *a_Misc_expand_tabs(const char *str) +{ + Dstr *New = dStr_new(""); + int len, i, j, pos, old_pos; + char *val; + + if ((len = strlen(str))) { + for (pos = 0, i = 0; i < len; i++) { + if (str[i] == '\t') { + /* Fill with whitespaces until the next tab. */ + old_pos = pos; + pos += TAB_SIZE - (pos % TAB_SIZE); + for (j = old_pos; j < pos; j++) + dStr_append_c(New, ' '); + } else { + dStr_append_c(New, str[i]); + pos++; + } + } + } + val = New->str; + dStr_free(New, FALSE); + return val; +} + +/* TODO: could use dStr ADT! */ +typedef struct ContentType_ { + const char *str; + int len; +} ContentType_t; + +static const ContentType_t MimeTypes[] = { + { "application/octet-stream", 24 }, + { "text/html", 9 }, + { "text/plain", 10 }, + { "image/gif", 9 }, + { "image/png", 9 }, + { "image/jpeg", 10 }, + { NULL, 0 } +}; + +/* + * Detects 'Content-Type' from a data stream sample. + * + * It uses the magic(5) logic from file(1). Currently, it + * only checks the few mime types that Dillo supports. + * + * 'Data' is a pointer to the first bytes of the raw data. + * + * Return value: (0 on success, 1 on doubt, 2 on lack of data). + */ +int a_Misc_get_content_type_from_data(void *Data, size_t Size, const char **PT) +{ + int st = 1; /* default to "doubt' */ + int Type = 0; /* default to "application/octet-stream" */ + char *p = Data; + size_t i, non_ascci; + + /* HTML try */ + for (i = 0; i < Size && isspace(p[i]); ++i); + if ((Size - i >= 5 && !dStrncasecmp(p+i, "<html", 5)) || + (Size - i >= 5 && !dStrncasecmp(p+i, "<head", 5)) || + (Size - i >= 6 && !dStrncasecmp(p+i, "<title", 6)) || + (Size - i >= 14 && !dStrncasecmp(p+i, "<!doctype html", 14)) || + /* this line is workaround for FTP through the Squid proxy */ + (Size - i >= 17 && !dStrncasecmp(p+i, "<!-- HTML listing", 17))) { + + Type = 1; + st = 0; + /* Images */ + } else if (Size >= 4 && !dStrncasecmp(p, "GIF8", 4)) { + Type = 3; + st = 0; + } else if (Size >= 4 && !dStrncasecmp(p, "\x89PNG", 4)) { + Type = 4; + st = 0; + } else if (Size >= 2 && !dStrncasecmp(p, "\xff\xd8", 2)) { + /* JPEG has the first 2 bytes set to 0xffd8 in BigEndian - looking + * at the character representation should be machine independent. */ + Type = 5; + st = 0; + + /* Text */ + } else { + /* We'll assume "text/plain" if the set of chars above 127 is <= 10 + * in a 256-bytes sample. Better heuristics are welcomed! :-) */ + non_ascci = 0; + Size = MIN (Size, 256); + for (i = 0; i < Size; i++) + if ((uchar_t) p[i] > 127) + ++non_ascci; + if (Size == 256) { + Type = (non_ascci > 10) ? 0 : 2; + st = 0; + } else { + Type = (non_ascci > 0) ? 0 : 2; + } + } + + *PT = MimeTypes[Type].str; + return st; +} + +/* + * Check the server-supplied 'Content-Type' against our detected type. + * (some servers seem to default to "text/plain"). + * + * Return value: + * 0, if they match + * -1, if a mismatch is detected + * + * There're many MIME types Dillo doesn't know, they're handled + * as "application/octet-stream" (as the SPEC says). + * + * A mismatch happens when receiving a binary stream as + * "text/plain" or "text/html", or an image that's not an image of its kind. + * + * Note: this is a basic security procedure. + * + */ +int a_Misc_content_type_check(const char *EntryType, const char *DetectedType) +{ + int i; + int st = -1; + + _MSG("Type check: [Srv: %s Det: %s]\n", EntryType, DetectedType); + + if (!EntryType) + return 0; /* there's no mismatch without server type */ + + for (i = 1; MimeTypes[i].str; ++i) + if (dStrncasecmp(EntryType, MimeTypes[i].str, MimeTypes[i].len) == 0) + break; + + if (!MimeTypes[i].str) { + /* type not found, no mismatch */ + st = 0; + } else if (dStrncasecmp(EntryType, "image/", 6) == 0 && + !dStrncasecmp(DetectedType,MimeTypes[i].str,MimeTypes[i].len)){ + /* An image, and there's an exact match */ + st = 0; + } else if (dStrncasecmp(EntryType, "text/", 5) || + dStrncasecmp(DetectedType, "application/", 12)) { + /* Not an application sent as text */ + st = 0; + } + + return st; +} + +/* + * Parse a geometry string. + */ +int a_Misc_parse_geometry(char *str, int *x, int *y, int *w, int *h) +{ + char *p, *t1, *t2; + int n1, n2; + int ret = 0; + + if ((p = strchr(str, 'x')) || (p = strchr(str, 'X'))) { + n1 = strtol(str, &t1, 10); + n2 = strtol(++p, &t2, 10); + if (t1 != str && t2 != p) { + *w = n1; + *h = n2; + ret = 1; + /* parse x,y now */ + p = t2; + n1 = strtol(p, &t1, 10); + n2 = strtol(t1, &t2, 10); + if (t1 != p && t2 != t1) { + *x = n1; + *y = n2; + } + } + } + _MSG("geom: w,h,x,y = (%d,%d,%d,%d)\n", *w, *h, *x, *y); + return ret; +} + +/* + * Encodes string using base64 encoding. + * Return value: new string or NULL if input string is empty. + */ +char *a_Misc_encode_base64(const char *in) +{ + static const char *base64_hex = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789+/"; + char *out = NULL; + int len, i = 0; + + if (in == NULL) return NULL; + len = strlen(in); + + out = (char *)dMalloc((len + 2) / 3 * 4 + 1); + + for (; len >= 3; len -= 3) { + out[i++] = base64_hex[in[0] >> 2]; + out[i++] = base64_hex[((in[0]<<4) & 0x30) | (in[1]>>4)]; + out[i++] = base64_hex[((in[1]<<2) & 0x3c) | (in[2]>>6)]; + out[i++] = base64_hex[in[2] & 0x3f]; + in += 3; + } + + if (len > 0) { + unsigned char fragment; + out[i++] = base64_hex[in[0] >> 2]; + fragment = (in[0] << 4) & 0x30; + if (len > 1) fragment |= in[1] >> 4; + out[i++] = base64_hex[fragment]; + out[i++] = (len < 2) ? '=' : base64_hex[(in[1] << 2) & 0x3c]; + out[i++] = '='; + } + out[i] = '\0'; + return out; +} |