/* * File: misc.c * * Copyright (C) 2000-2007 Jorge Arellano Cid , * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. */ #include #include #include #include #include #include "utf8.hh" #include "msg.h" #include "misc.h" /* * Escape characters as %XX sequences. * Return value: New string. */ char *a_Misc_escape_chars(const char *str, const char *esc_set) { static const char *const hex = "0123456789ABCDEF"; char *p = NULL; Dstr *dstr; int i; dstr = dStr_sized_new(64); for (i = 0; str[i]; ++i) { if (str[i] <= 0x1F || str[i] == 0x7F || strchr(esc_set, str[i])) { dStr_append_c(dstr, '%'); dStr_append_c(dstr, hex[(str[i] >> 4) & 15]); dStr_append_c(dstr, hex[str[i] & 15]); } else { dStr_append_c(dstr, str[i]); } } p = dstr->str; dStr_free(dstr, FALSE); return p; } #define TAB_SIZE 8 /* * Takes a string and converts any tabs to spaces. */ int a_Misc_expand_tabs(char **start, char *end, char *buf, int buflen) { int j, pos = 0, written = 0, old_pos, char_len; uint_t code; static const int combining_char_space = 32; while (*start < end && written < buflen - TAB_SIZE - combining_char_space) { code = a_Utf8_decode(*start, end, &char_len); if (code == '\t') { /* Fill with whitespaces until the next tab. */ old_pos = pos; pos += TAB_SIZE - (pos % TAB_SIZE); for (j = old_pos; j < pos; j++) buf[written++] = ' '; } else { assert(char_len <= 4); for (j = 0; j < char_len; j++) buf[written++] = (*start)[j]; pos++; } *start += char_len; } /* If following chars are combining chars (e.g. accents) add them to the * buffer. We have reserved combining_char_space bytes for this. * If there should be more combining chars, we split nevertheless. */ while (*start < end && written < buflen - 4) { code = a_Utf8_decode(*start, end, &char_len); if (! a_Utf8_combining_char(code)) break; assert(char_len <= 4); for (j = 0; j < char_len; j++) buf[written++] = (*start)[j]; *start += char_len; } return written; } /* TODO: could use dStr ADT! */ typedef struct ContentType_ { const char *str; int len; } ContentType_t; static const ContentType_t MimeTypes[] = { { "application/octet-stream", 24 }, { "application/xhtml+xml", 21 }, { "text/html", 9 }, { "text/plain", 10 }, { "image/gif", 9 }, { "image/png", 9 }, { "image/jpeg", 10 }, { NULL, 0 } }; typedef enum { DT_OCTET_STREAM = 0, DT_TEXT_HTML, DT_TEXT_PLAIN, DT_IMAGE_GIF, DT_IMAGE_PNG, DT_IMAGE_JPG, } DetectedContentType; /* * Detects 'Content-Type' from a data stream sample. * * It uses the magic(5) logic from file(1). Currently, it * only checks the few mime types that Dillo supports. * * 'Data' is a pointer to the first bytes of the raw data. * * Return value: (0 on success, 1 on doubt, 2 on lack of data). */ int a_Misc_get_content_type_from_data(void *Data, size_t Size, const char **PT) { size_t i, non_ascci, non_ascci_text, bin_chars; char *p = Data; int st = 1; /* default to "doubt' */ DetectedContentType Type = DT_OCTET_STREAM; /* default to binary */ /* HTML try */ for (i = 0; i < Size && dIsspace(p[i]); ++i); if ((Size - i >= 5 && !dStrncasecmp(p+i, "= 5 && !dStrncasecmp(p+i, "= 6 && !dStrncasecmp(p+i, "= 14 && !dStrncasecmp(p+i, "= 17 && !dStrncasecmp(p+i, "