From d10b35011731b4660e7b796b1e19a44144c63a3c Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Tue, 23 Apr 2024 22:57:28 +0200 Subject: Improve quirk for text/xhtml content type When a tag reports the "text/xhtml" content, we were correcting it to the type guessed in TypeDet. However, the current implementation to guess XHTML and HTML pages fails if the doctype is not at the start of the document, falling back to text/plain. A more robust solution is to set the TypeNorm to "application/xhtml+xml", which can be handled by a_Mime_get_viewer() as an HTML-like document. Reported-by: Kevin Koster See: https://lists.mailman3.com/hyperkitty/list/dillo-dev@mailman3.com/thread/7GJ4AAMFFPEHOIYEOH4NHVMSXMJDFYXG/ --- src/cache.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/cache.c b/src/cache.c index 7ee30d09..acdeb8b4 100644 --- a/src/cache.c +++ b/src/cache.c @@ -2,6 +2,7 @@ * File: cache.c * * Copyright 2000-2007 Jorge Arellano Cid + * Copyright 2024 Rodrigo Arias Mallo * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -521,7 +522,12 @@ const char *a_Cache_set_content_type(const DilloUrl *url, const char *ctype, } else if (*from == 'm' && !dStrnAsciiCasecmp(ctype, "text/xhtml", 10)) { /* WORKAROUND: doxygen uses "text/xhtml" in META */ - entry->TypeNorm = dStrdup(entry->TypeDet); + if (charset) { + entry->TypeNorm = dStrconcat("application/xhtml+xml", + "; charset=", charset, NULL); + } else { + entry->TypeNorm = dStrdup("application/xhtml+xml"); + } } if (charset) { if (entry->CharsetDecoder) -- cgit v1.2.3