From 2337a82f72f6404d011453903bec8d53ae3580de Mon Sep 17 00:00:00 2001 From: alecpl Date: Tue, 28 Jul 2009 08:41:50 +0000 Subject: - Fix displaying of HTML messages with unknown/malformed tags (#1486003) - Some other changes for styled HTML display --- CHANGELOG | 4 ++++ program/lib/washtml.php | 19 +++++++++++-------- program/steps/mail/func.inc | 44 +++++++++++++++++++++++++++----------------- 3 files changed, 42 insertions(+), 25 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 2920238de..a4f2fc9f5 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,10 @@ CHANGELOG RoundCube Webmail =========================== +- Fix displaying of HTML messages with unknown/malformed tags (#1486003) + +RELEASE 0.3-RC1 +--------------- - Fix import of vCard entries with params (#1485453) - Fix HTML messages output with empty block elements (#1485974) - Use request tokens to protect POST requests from CSFR diff --git a/program/lib/washtml.php b/program/lib/washtml.php index afc5f47ff..efd525df3 100644 --- a/program/lib/washtml.php +++ b/program/lib/washtml.php @@ -69,6 +69,10 @@ * Dont be a fool: * - Dont alter data on a GET: '' * - ... + * + * Roundcube Changes: + * - added $block_elements + * - changed $ignore_elements behaviour */ class washtml @@ -76,8 +80,8 @@ class washtml /* Allowed HTML elements (default) */ static $html_elements = array('a', 'abbr', 'acronym', 'address', 'area', 'b', 'basefont', 'bdo', 'big', 'blockquote', 'br', 'caption', 'center', 'cite', 'code', 'col', 'colgroup', 'dd', 'del', 'dfn', 'dir', 'div', 'dl', 'dt', 'em', 'fieldset', 'font', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'ins', 'label', 'legend', 'li', 'map', 'menu', 'nobr', 'ol', 'p', 'pre', 'q', 's', 'samp', 'small', 'span', 'strike', 'strong', 'sub', 'sup', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr', 'tt', 'u', 'ul', 'var', 'wbr', 'img'); - /* Ignore these HTML tags but process their content */ - static $ignore_elements = array('html', 'head', 'body'); + /* Ignore these HTML tags and their content */ + static $ignore_elements = array('script', 'applet', 'embed', 'object', 'style'); /* Allowed HTML attributes */ static $html_attribs = array('name', 'class', 'title', 'alt', 'width', 'height', 'align', 'nowrap', 'col', 'row', 'id', 'rowspan', 'colspan', 'cellspacing', 'cellpadding', 'valign', 'bgcolor', 'color', 'border', 'bordercolorlight', 'bordercolordark', 'face', 'marginwidth', 'marginheight', 'axis', 'border', 'abbr', 'char', 'charoff', 'clear', 'compact', 'coords', 'vspace', 'hspace', 'cellborder', 'size', 'lang', 'dir'); @@ -209,14 +213,13 @@ class washtml } else if(isset($this->_html_elements[$tagName])) { $content = $this->dumpHtml($node); $dump .= '<' . $tagName . $this->wash_attribs($node) . -// ($content?">$content":' />'); -// Roundcube Trac: #1485974 ($content || isset($this->_block_elements[$tagName]) ? ">$content" : ' />'); } else if(isset($this->_ignore_elements[$tagName])) { - $dump .= ''; - $dump .= $this->dumpHtml($node); //Just ignored - } else $dump .= ''; + } else { + $dump .= ''; + $dump .= $this->dumpHtml($node); // ignore tags not its content + } break; case XML_CDATA_SECTION_NODE: $dump .= $node->nodeValue; @@ -249,4 +252,4 @@ class washtml } -?> \ No newline at end of file +?> diff --git a/program/steps/mail/func.inc b/program/steps/mail/func.inc index b969c04e9..5cf0f7664 100644 --- a/program/steps/mail/func.inc +++ b/program/steps/mail/func.inc @@ -428,7 +428,10 @@ function rcmail_js_message_list($a_headers, $insert_top=FALSE, $replace=TRUE) { $action = $mbox==$CONFIG['drafts_mbox'] ? 'compose' : 'show'; $uid_param = $mbox==$CONFIG['drafts_mbox'] ? '_draft_uid' : '_uid'; - $cont = abbreviate_string(trim($IMAP->decode_header($header->$col)), 160); + $cont = trim($IMAP->decode_header($header->$col)); + if ($browser->ie) + $cont = rc_utf8_clean($cont); + $cont = abbreviate_string($cont, 160); if (!$cont) $cont = rcube_label('nosubject'); $cont = $browser->ie ? Q($cont) : sprintf('%s', Q(rcmail_url($action, array($uid_param=>$header->uid, '_mbox'=>$mbox))), Q($cont)); } @@ -668,36 +671,28 @@ function rcmail_wash_html($html, $p = array(), $cid_replaces) global $REMOTE_OBJECTS; $p += array('safe' => false, 'inline_html' => true); - + // special replacements (not properly handled by washtml class) $html_search = array( '/(<\/nobr>)(\s+)()/i', // space(s) between - '/(<[\/]*st1:[^>]+>)/i', // Microsoft's Smart Tags - '/<\/?rte_text>/i', // Rich Text Editor tags (#1485647) - '/<\/?broadcast[^>]*>/i', // invoices from the Apple Store contains tags (#1485962) '/.*<\/title>/i', // PHP bug #32547 workaround: remove title tag - '/<html[^>]*>/im', // malformed html: remove html tags (#1485139) - '/<\/html>/i', // malformed html: remove html tags (#1485139) '/^(\0\0\xFE\xFF|\xFF\xFE\0\0|\xFE\xFF|\xFF\xFE|\xEF\xBB\xBF)/', // byte-order mark (only outlook?) ); $html_replace = array( '\\1'.'   '.'\\3', '', '', - '', - '', - '', - '', - '', ); $html = preg_replace($html_search, $html_replace, $html); + // fix (unknown/malformed) HTML tags before "wash" + $html = preg_replace_callback('/(<[\/!]*)([^ >]+)/', 'rcmail_html_tag_callback', $html); + // charset was converted to UTF-8 in rcube_imap::get_message_part() -> change charset specification in HTML accordingly $charset_pattern = '/(\s+content=[\'"]?\w+\/\w+;\s*charset)=([a-z0-9-_]+)/i'; if (preg_match($charset_pattern, $html)) { $html = preg_replace($charset_pattern, '\\1='.RCMAIL_CHARSET, $html); - } - else { + } else { // add head for malformed messages, washtml cannot work without that if (!preg_match('/<head[^>]*>(.*)<\/head>/Uims', $html)) $html = '<head></head>'. $html; @@ -728,9 +723,8 @@ function rcmail_wash_html($html, $p = array(), $cid_replaces) $washer = new washtml($wash_opts); $washer->add_callback('form', 'rcmail_washtml_callback'); - if ($p['safe']) { // allow CSS styles, will be sanitized by rcmail_washtml_callback() - $washer->add_callback('style', 'rcmail_washtml_callback'); - } + // allow CSS styles, will be sanitized by rcmail_washtml_callback() + $washer->add_callback('style', 'rcmail_washtml_callback'); $html = $washer->wash($html); $REMOTE_OBJECTS = $washer->extlinks; @@ -882,6 +876,22 @@ function rcmail_washtml_callback($tagname, $attrib, $content) } +/** + * Callback function for HTML tags fixing + */ +function rcmail_html_tag_callback($matches) +{ + $tagname = $matches[2]; + + $tagname = preg_replace(array( + '/:.*$/', // Microsoft's Smart Tags <st1:xxxx> + '/[^a-z0-9_-]/i', // forbidden characters + ), '', $tagname); + + return $matches[1].$tagname; +} + + /** * return table with message headers */ -- cgit v1.2.3