From d10b35011731b4660e7b796b1e19a44144c63a3c Mon Sep 17 00:00:00 2001
From: Rodrigo Arias Mallo <rodarima@gmail.com>
Date: Tue, 23 Apr 2024 22:57:28 +0200
Subject: Improve quirk for text/xhtml content type

When a <meta> tag reports the "text/xhtml" content, we were correcting
it to the type guessed in TypeDet. However, the current
implementation to guess XHTML and HTML pages fails if the doctype is not
at the start of the document, falling back to text/plain.

A more robust solution is to set the TypeNorm to
"application/xhtml+xml", which can be handled by a_Mime_get_viewer() as
an HTML-like document.

Reported-by: Kevin Koster <dillo@ombertech.com>
See: https://lists.mailman3.com/hyperkitty/list/dillo-dev@mailman3.com/thread/7GJ4AAMFFPEHOIYEOH4NHVMSXMJDFYXG/
---
 src/cache.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'src')
diff --git a/src/cache.c b/src/cache.c
index 7ee30d09..acdeb8b4 100644
--- a/src/cache.c
+++ b/src/cache.c
@@ -2,6 +2,7 @@
  * File: cache.c
  *
  * Copyright 2000-2007 Jorge Arellano Cid <jcid@dillo.org>
+ * Copyright 2024 Rodrigo Arias Mallo <rodarima@gmail.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -521,7 +522,12 @@ const char *a_Cache_set_content_type(const DilloUrl *url, const char *ctype,
          } else if (*from == 'm' &&
                     !dStrnAsciiCasecmp(ctype, "text/xhtml", 10)) {
             /* WORKAROUND: doxygen uses "text/xhtml" in META */
-            entry->TypeNorm = dStrdup(entry->TypeDet);
+            if (charset) {
+               entry->TypeNorm = dStrconcat("application/xhtml+xml",
+                        "; charset=", charset, NULL);
+            } else {
+               entry->TypeNorm = dStrdup("application/xhtml+xml");
+            }
          }
          if (charset) {
             if (entry->CharsetDecoder)
-- 
cgit v1.2.3