From 759566fe99fe1fcf2857afc51d0b343b67aa4d44 Mon Sep 17 00:00:00 2001 From: Aleksander Machniak Date: Fri, 27 Feb 2015 09:13:39 +0100 Subject: Fix parsing invalid HTML messages with BOM after (#1490291) --- program/lib/Roundcube/rcube_washtml.php | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'program/lib/Roundcube') diff --git a/program/lib/Roundcube/rcube_washtml.php b/program/lib/Roundcube/rcube_washtml.php index e0cce685b..b042f5f80 100644 --- a/program/lib/Roundcube/rcube_washtml.php +++ b/program/lib/Roundcube/rcube_washtml.php @@ -403,16 +403,23 @@ class rcube_washtml { // special replacements (not properly handled by washtml class) $html_search = array( - '/(<\/nobr>)(\s+)()/i', // space(s) between - '/]*>[^<]*<\/title>/i', // PHP bug #32547 workaround: remove title tag - '/^(\0\0\xFE\xFF|\xFF\xFE\0\0|\xFE\xFF|\xFF\xFE|\xEF\xBB\xBF)/', // byte-order mark (only outlook?) - '/]+>/i', // washtml/DOMDocument cannot handle xml namespaces + // space(s) between + '/(<\/nobr>)(\s+)()/i', + // PHP bug #32547 workaround: remove title tag + '/]*>[^<]*<\/title>/i', + // remove before BOM (#1490291) + '/<\!doctype[^>]+>[^<]*/im', + // byte-order mark (only outlook?) + '/^(\0\0\xFE\xFF|\xFF\xFE\0\0|\xFE\xFF|\xFF\xFE|\xEF\xBB\xBF)/', + // washtml/DOMDocument cannot handle xml namespaces + '/]+>/i', ); $html_replace = array( '\\1'.'   '.'\\3', '', '', + '', '', ); $html = preg_replace($html_search, $html_replace, trim($html)); -- cgit v1.2.3