From 45f56c1c400ad5b21ddcd4d490f6f6c4ffe0d9fc Mon Sep 17 00:00:00 2001 From: thomascube Date: Thu, 29 May 2008 16:10:42 +0000 Subject: Replace our crappy html sanitization with the dom-based washtml script + fix inline message parts + remove old code + add some doc comments --- installer/check.php | 2 +- program/include/rcube_message.php | 17 +- program/include/rcube_user.php | 2 +- program/lib/washtml.php | 196 +++++++++++++ program/steps/mail/func.inc | 567 ++++++++------------------------------ 5 files changed, 322 insertions(+), 462 deletions(-) create mode 100644 program/lib/washtml.php diff --git a/installer/check.php b/installer/check.php index 7ca982f2d..c0f06d4ab 100644 --- a/installer/check.php +++ b/installer/check.php @@ -1,7 +1,7 @@
'pcre', 'Session' => 'session'); +$required_php_exts = array('PCRE' => 'pcre', 'Session' => 'session', 'DOM XML' => 'dom'); $optional_php_exts = array('FileInfo' => 'fileinfo', 'Libiconv' => 'iconv', 'Multibyte' => 'mbstring', 'OpenSSL' => 'openssl', 'Mcrypt' => 'mcrypt', 'GD' => 'gd'); diff --git a/program/include/rcube_message.php b/program/include/rcube_message.php index 174b1f314..7dc74ab28 100644 --- a/program/include/rcube_message.php +++ b/program/include/rcube_message.php @@ -21,9 +21,8 @@ /** - * Interface class for accessing an IMAP server - * - * This is a wrapper that implements the Iloha IMAP Library (IIL) + * Logical representation of a mail message with all its data + * and related functions * * @package Mail * @author Thomas Bruederli @@ -65,8 +64,8 @@ class rcube_message ); if ($this->structure = $this->imap->get_structure($uid)) { - $this->parse_structure($this->structure); $this->get_mime_numbers($this->structure); + $this->parse_structure($this->structure); } else { $this->body = $this->imap->get_body($uid); @@ -356,18 +355,18 @@ class rcube_message } // if this was a related part try to resolve references - if ($message_ctype_secondary == 'related' && sizeof($this->inline_objects)) { + if ($message_ctype_secondary == 'related' && sizeof($this->inline_parts)) { $a_replaces = array(); foreach ($this->inline_parts as $inline_object) { - $a_replaces['cid:'.$inline_object->content_id] = htmlspecialchars(sprintf($this->opt['get_url'], $inline_object->mime_id)); + $a_replaces['cid:'.$inline_object->content_id] = $this->get_part_url($inline_object->mime_id); } // add replace array to each content part // (will be applied later when part body is available) - for ($i=0; $itype=='content') - $a_return_parts[$i]->replaces = $a_replaces; + foreach ($this->parts as $i => $part) { + if ($part->type == 'content') + $this->parts[$i]->replaces = $a_replaces; } } } diff --git a/program/include/rcube_user.php b/program/include/rcube_user.php index e125f6309..c808d079e 100644 --- a/program/include/rcube_user.php +++ b/program/include/rcube_user.php @@ -24,7 +24,7 @@ /** * Class representing a system user * - * @package core + * @package Core * @author Thomas Bruederli */ class rcube_user diff --git a/program/lib/washtml.php b/program/lib/washtml.php new file mode 100644 index 000000000..82ccc0cd5 --- /dev/null +++ b/program/lib/washtml.php @@ -0,0 +1,196 @@ + + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* Please send me your comments about this code if you have some, thanks, Fred. */ + +/* OVERVIEW: + * + * Wahstml take an untrusted HTML and return a safe html string. + * + * SYNOPSIS: + * + * washtml::wash($html, $config, $full); + * It return a sanityzed string of the $html parameter without html and head tags. + * $html is a string containing the html code to wash. + * $config is an array containing options: + * $config['allow_remote'] is a boolean to allow link to remote images. + * $config['blocked_src'] string with image-src to be used for blocked remote images + * $config['show_washed'] is a boolean to include washed out attributes as x-washed + * $config['cid_map'] is an array where cid urls index urls to replace them. + * $config['charset'] is a string containing the charset of the HTML document if it is not defined in it. + * $full is a reference to a boolean that is set to true if no remote images are removed. (FE: show remote images link) + * + * INTERNALS: + * + * Only tags and attributes in the globals $html_elements and $html_attributes + * are kept, inline styles are also filtered: all style identifiers matching + * /[a-z\-]/i are allowed. Values matching colors, sizes, /[a-z\-]/i and safe + * urls if allowed and cid urls if mapped are kept. + * + * BUGS: It MUST be safe ! + * - Check regexp + * - urlencode URLs instead of htmlspecials + * - Check is a 3 bytes utf8 first char can eat '">' + * - Update PCRE: CVE-2007-1659 - CVE-2007-1660 - CVE-2007-1661 - CVE-2007-1662 + * CVE-2007-4766 - CVE-2007-4767 - CVE-2007-4768 + * http://lists.debian.org/debian-security-announce/debian-security-announce-2007/msg00177.html + * - ... + * + * MISSING: + * - relative links, can be implemented by prefixing an absolute path, ask me + * if you need it... + * - ... + * + * Dont be a fool: + * - Dont alter data on a GET: '' + * - ... + */ + +class washtml +{ + + /* Allowed HTML elements */ + static $html_elements = array('a', 'abbr', 'acronym', 'address', 'area', 'b', 'basefont', 'bdo', 'big', 'blockquote', 'br', 'caption', 'center', 'cite', 'code', 'col', 'colgroup', 'dd', 'del', 'dfn', 'dir', 'div', 'dl', 'dt', 'em', 'fieldset', 'font', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'ins', 'label', 'legend', 'li', 'map', 'menu', 'ol', 'p', 'pre', 'q', 's', 'samp', 'small', 'span', 'strike', 'strong', 'sub', 'sup', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'title', 'tr', 'tt', 'u', 'ul', 'var', 'img'); + + /* Allowed HTML attributes */ + static $html_attribs = array('name', 'class', 'title', 'alt', 'width', 'height', 'align', 'nowrap', 'col', 'row', 'id', 'rowspan', 'colspan', 'cellspacing', 'cellpadding', 'valign', 'bgcolor', 'color', 'border', 'bordercolorlight', 'bordercolordark', 'face', 'marginwidth', 'marginheight', 'axis', 'border', 'abbr', 'char', 'charoff', 'clear', 'compact', 'coords', 'vspace', 'hspace', 'cellborder', 'size', 'lang', 'dir'); + + /* Check CSS style */ + static function wash_style($style, $config, &$full) { + $s = ''; + + foreach(explode(';', $style) as $declaration) { + if(preg_match('/^\s*([a-z\-]+)\s*:\s*(.*)\s*$/i', $declaration, $match)) { + $cssid = $match[1]; + $str = $match[2]; + $value = ''; + while(sizeof($str) > 0 && + preg_match('/^(url\(\s*[\'"]?([^\'"\)]*)[\'"]?\s*\)'./*1,2*/ + '|rgb\(\s*[0-9]+\s*,\s*[0-9]+\s*,\s*[0-9]+\s*\)'. + '|-?[0-9.]+\s*(em|ex|px|cm|mm|in|pt|pc|deg|rad|grad|ms|s|hz|khz|%)?'. + '|#[0-9a-f]{3,6}|[a-z0-9\-]+'. + ')\s*/i', $str, $match)) { + if($match[2]) { + if(preg_match('/^(http|https|ftp):.*$/i', $match[2], $url)) { + if($config['allow_remote']) + $value .= ' url(\''.htmlspecialchars($url[0], ENT_QUOTES).'\')'; + else + $full = false; + } else if(preg_match('/^cid:(.*)$/i', $match[2], $cid)) + $value .= ' url(\''.htmlspecialchars($config['cid_map']['cid:'.$cid[1]], ENT_QUOTES) . '\')'; + } else if($match[0] != 'url' && $match[0] != 'rbg')//whitelist ? + $value .= ' ' . $match[0]; + $str = substr($str, strlen($match[0])); + } + if($value) + $s .= ($s?' ':'') . $cssid . ':' . $value . ';'; + } + } + return $s; + } + + /* Take a node and return allowed attributes and check values */ + static function wash_attribs($node, $config, &$full) { + $t = ''; + $washed; + + foreach($node->attributes as $key => $plop) { + $key = strtolower($key); + $value = $node->getAttribute($key); + if((in_array($key, self::$html_attribs)) || + ($key == 'href' && preg_match('/^(http|https|ftp|mailto):.*/i', $value))) + $t .= ' ' . $key . '="' . htmlspecialchars($value, ENT_QUOTES) . '"'; + else if($key == 'style' && ($style = self::wash_style($value, $config, $full))) + $t .= ' style="' . $style . '"'; + else if($key == 'src' && strtolower($node->tagName) == 'img') { //check tagName anyway + if(preg_match('/^(http|https|ftp):.*/i', $value)) { + if($config['allow_remote']) + $t .= ' ' . $key . '="' . htmlspecialchars($value, ENT_QUOTES) . '"'; + else { + $full = false; + if ($config['blocked_src']) + $t .= ' src="' . htmlspecialchars($config['blocked_src'], ENT_QUOTES) . '"'; + } + } else if(preg_match('/^cid:(.*)$/i', $value, $cid)) + $t .= ' ' . $key . '="' . htmlspecialchars($config['cid_map']['cid:'.$cid[1]], ENT_QUOTES) . '"'; + } else + $washed .= ($washed?' ':'') . $key; + } + return $t . ($washed && $config['show_washed']?' x-washed="'.$washed.'"':''); + } + + /* The main loop that recurse on a node tree. + * It output only allowed tags with allowed attributes + * and allowed inline styles */ + static function dumpHtml($node, $config, &$full) { + if(!$node->hasChildNodes()) + return ''; + + $node = $node->firstChild; + $dump = ''; + + do { + switch($node->nodeType) { + case XML_ELEMENT_NODE: //Check element + $tagName = strtolower($node->tagName); + if(in_array($tagName, self::$html_elements)) { + $content = self::dumpHtml($node, $config, $full); + $dump .= '<' . $tagName . self::wash_attribs($node, $config, $full) . + ($content?">$content":' />'); + } else if($tagName == 'html' || $tagName == 'body') { + $dump .= self::dumpHtml($node, $config, $full); //Just ignored + } else + $dump .= ''; + break; + case XML_TEXT_NODE: + $dump .= htmlspecialchars($node->nodeValue); + break; + case XML_HTML_DOCUMENT_NODE: + $dump .= self::dumpHtml($node, $config, $full); + break; + case XML_DOCUMENT_TYPE_NODE: break; + default: + } + } while($node = $node->nextSibling); + + return $dump; + } + + /* Main function, give it untrusted HTML, tell it if you allow loading + * remote images and give it a map to convert "cid:" urls. */ + static function wash($html, $config=array(), &$full=true) { + $config += array('show_washed'=>true, 'allow_remote'=>false, 'cid_map'=>array()); + //Charset seems to be ignored (probably if defined in the HTML document) + $node = new DOMDocument('1.0', $config['charset']); + $full = true; + @$node->loadHTML($html); + return self::dumpHtml($node, $config, $full); + } + +} + +?> \ No newline at end of file diff --git a/program/steps/mail/func.inc b/program/steps/mail/func.inc index d37a52126..0a1e4d4c7 100644 --- a/program/steps/mail/func.inc +++ b/program/steps/mail/func.inc @@ -74,7 +74,9 @@ if (empty($RCMAIL->action) || $RCMAIL->action == 'list') -// return the message list as HTML table +/** + * return the message list as HTML table + */ function rcmail_message_list($attrib) { global $IMAP, $CONFIG, $COMM_PATH, $OUTPUT; @@ -295,7 +297,9 @@ function rcmail_message_list($attrib) } -// return javascript commands to add rows to the message list +/** + * return javascript commands to add rows to the message list + */ function rcmail_js_message_list($a_headers, $insert_top=FALSE) { global $CONFIG, $IMAP, $OUTPUT; @@ -358,7 +362,9 @@ function rcmail_js_message_list($a_headers, $insert_top=FALSE) } -// return an HTML iframe for loading mail content +/** + * return an HTML iframe for loading mail content + */ function rcmail_messagecontent_frame($attrib) { global $OUTPUT; @@ -381,6 +387,9 @@ function rcmail_messagecontent_frame($attrib) } +/** + * + */ function rcmail_messagecount_display($attrib) { global $IMAP, $OUTPUT; @@ -401,6 +410,9 @@ function rcmail_messagecount_display($attrib) } +/** + * + */ function rcmail_quota_display($attrib) { global $OUTPUT, $COMM_PATH; @@ -423,6 +435,9 @@ function rcmail_quota_display($attrib) } +/** + * + */ function rcmail_quota_content($quota=NULL) { global $IMAP, $COMM_PATH; @@ -466,6 +481,9 @@ function rcmail_quota_content($quota=NULL) } +/** + * + */ function rcmail_get_messagecount_text($count=NULL, $page=NULL) { global $IMAP, $MESSAGE; @@ -495,246 +513,102 @@ function rcmail_get_messagecount_text($count=NULL, $page=NULL) } -/* Stolen from Squirrelmail */ -function sq_deent(&$attvalue, $regex, $hex=false) - { - $ret_match = false; - preg_match_all($regex, $attvalue, $matches); - if (is_array($matches) && sizeof($matches[0]) > 0) - { - $repl = Array(); - for ($i = 0; $i < sizeof($matches[0]); $i++) - { - $numval = $matches[1][$i]; - if ($hex) - $numval = hexdec($numval); - $repl{$matches[0][$i]} = chr($numval); - } - $attvalue = strtr($attvalue, $repl); - return true; - } - else - return false; - } - - -/* Stolen verbatim from Squirrelmail */ -function sq_defang(&$attvalue) - { - /* Skip this if there aren't ampersands or backslashes. */ - if ((strpos($attvalue, '&') === false) && - (strpos($attvalue, '\\') === false)) - return; - $m = false; - do - { - $m = false; - $m = $m || sq_deent($attvalue, '/\�*(\d+);*/s'); - $m = $m || sq_deent($attvalue, '/\�*((\d|[a-f])+);*/si', true); - $m = $m || sq_deent($attvalue, '/\\\\(\d+)/s', true); - } while ($m == true); - $attvalue = stripslashes($attvalue); - } - - -function rcmail_html_filter($html) - { - preg_match_all('/<\/?\w+((\s+\w+(\s*=\s*(?:".*?"|\'.*?\'|[^\'">\s]+))?)+\s*|\s*)\/?>/', $html, $tags); - - /* From Squirrelmail: Translate all dangerous Unicode or Shift_JIS characters which are accepted by - * IE as regular characters. */ - $replace = array(array('ʟ', 'ʟ', /* L UNICODE IPA Extension */ - 'ʀ', 'ʀ', /* R UNICODE IPA Extension */ - 'ɴ', 'ɴ', /* N UNICODE IPA Extension */ - 'E', 'E', /* Unicode FULLWIDTH LATIN CAPITAL LETTER E */ - 'e', 'e', /* Unicode FULLWIDTH LATIN SMALL LETTER E */ - 'X', 'X', /* Unicode FULLWIDTH LATIN CAPITAL LETTER X */ - 'x', 'x', /* Unicode FULLWIDTH LATIN SMALL LETTER X */ - 'P', 'P', /* Unicode FULLWIDTH LATIN CAPITAL LETTER P */ - 'p', 'p', /* Unicode FULLWIDTH LATIN SMALL LETTER P */ - 'R', 'R', /* Unicode FULLWIDTH LATIN CAPITAL LETTER R */ - 'r', 'r', /* Unicode FULLWIDTH LATIN SMALL LETTER R */ - 'S', 'S', /* Unicode FULLWIDTH LATIN CAPITAL LETTER S */ - 's', 's', /* Unicode FULLWIDTH LATIN SMALL LETTER S */ - 'I', 'I', /* Unicode FULLWIDTH LATIN CAPITAL LETTER I */ - 'i', 'i', /* Unicode FULLWIDTH LATIN SMALL LETTER I */ - 'O', 'O', /* Unicode FULLWIDTH LATIN CAPITAL LETTER O */ - 'o', 'o', /* Unicode FULLWIDTH LATIN SMALL LETTER O */ - 'N', 'N', /* Unicode FULLWIDTH LATIN CAPITAL LETTER N */ - 'n', 'n', /* Unicode FULLWIDTH LATIN SMALL LETTER N */ - 'L', 'L', /* Unicode FULLWIDTH LATIN CAPITAL LETTER L */ - 'l', 'l', /* Unicode FULLWIDTH LATIN SMALL LETTER L */ - 'U', 'U', /* Unicode FULLWIDTH LATIN CAPITAL LETTER U */ - 'u', 'u', /* Unicode FULLWIDTH LATIN SMALL LETTER U */ - 'ⁿ', 'ⁿ' , /* Unicode SUPERSCRIPT LATIN SMALL LETTER N */ - "\xEF\xBC\xA5", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER E */ - /* in unicode this is some Chinese char range */ - "\xEF\xBD\x85", /* Shift JIS FULLWIDTH LATIN SMALL LETTER E */ - "\xEF\xBC\xB8", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER X */ - "\xEF\xBD\x98", /* Shift JIS FULLWIDTH LATIN SMALL LETTER X */ - "\xEF\xBC\xB0", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER P */ - "\xEF\xBD\x90", /* Shift JIS FULLWIDTH LATIN SMALL LETTER P */ - "\xEF\xBC\xB2", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER R */ - "\xEF\xBD\x92", /* Shift JIS FULLWIDTH LATIN SMALL LETTER R */ - "\xEF\xBC\xB3", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER S */ - "\xEF\xBD\x93", /* Shift JIS FULLWIDTH LATIN SMALL LETTER S */ - "\xEF\xBC\xA9", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER I */ - "\xEF\xBD\x89", /* Shift JIS FULLWIDTH LATIN SMALL LETTER I */ - "\xEF\xBC\xAF", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER O */ - "\xEF\xBD\x8F", /* Shift JIS FULLWIDTH LATIN SMALL LETTER O */ - "\xEF\xBC\xAE", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER N */ - "\xEF\xBD\x8E", /* Shift JIS FULLWIDTH LATIN SMALL LETTER N */ - "\xEF\xBC\xAC", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER L */ - "\xEF\xBD\x8C", /* Shift JIS FULLWIDTH LATIN SMALL LETTER L */ - "\xEF\xBC\xB5", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER U */ - "\xEF\xBD\x95", /* Shift JIS FULLWIDTH LATIN SMALL LETTER U */ - "\xE2\x81\xBF", /* Shift JIS FULLWIDTH SUPERSCRIPT N */ - "\xCA\x9F", /* L UNICODE IPA Extension */ - "\xCA\x80", /* R UNICODE IPA Extension */ - "\xC9\xB4"), /* N UNICODE IPA Extension */ - array('l', 'l', 'r', 'r', 'n', 'n', 'E', 'E', 'e', 'e', 'X', 'X', 'x', 'x', - 'P', 'P', 'p', 'p', 'R', 'R', 'r', 'r', 'S', 'S', 's', 's', 'I', 'I', - 'i', 'i', 'O', 'O', 'o', 'o', 'N', 'N', 'n', 'n', 'L', 'L', 'l', 'l', - 'U', 'U', 'u', 'u', 'n', 'n', 'E', 'e', 'X', 'x', 'P', 'p', 'R', 'r', - 'S', 's', 'I', 'i', 'O', 'o', 'N', 'n', 'L', 'l', 'U', 'u', 'n', 'l', 'r', 'n')); - if ((count($tags)>3) && (count($tags[3])>0)) - foreach ($tags[3] as $nr=>$value) - { - /* Remove comments */ - $newvalue = preg_replace('/(\/\*.*\*\/)/','$2',$value); - /* Translate dangerous characters */ - $newvalue = str_replace($replace[0], $replace[1], $newvalue); - sq_defang($newvalue); - /* Rename dangerous CSS */ - $newvalue = preg_replace('/expression/i', 'idiocy', $newvalue); - $newvalue = preg_replace('/url/i', 'idiocy', $newvalue); - $newattrs = preg_replace('/'.preg_quote($value, '/').'$/', $newvalue, $tags[1][$nr]); - $newtag = preg_replace('/'.preg_quote($tags[1][$nr], '/').'/', $newattrs, $tags[0][$nr]); - $html = preg_replace('/'.preg_quote($tags[0][$nr], '/').'/', $newtag, $html); - } - return $html; - } - - +/** + * + */ function rcmail_print_body($part, $safe=FALSE, $plain=FALSE) - { - global $IMAP, $REMOTE_OBJECTS; +{ + global $REMOTE_OBJECTS; - $body = is_array($part->replaces) ? strtr($part->body, $part->replaces) : $part->body; - // convert html to text/plain - if ($part->ctype_secondary=='html' && $plain) - { - $txt = new html2text($body, false, true); + if ($part->ctype_secondary == 'html' && $plain) { + $txt = new html2text($part->body, false, true); $body = $txt->get_text(); $part->ctype_secondary = 'plain'; - } - + } // text/html - if ($part->ctype_secondary=='html') - { - // remove charset specification in HTML message - $body = preg_replace('/charset=[a-z0-9\-]+/i', '', $body); - - if (!$safe) // remove remote images and scripts - { - $remote_patterns = array('/)/Ui', - '/(src|background)=(["\']?)([hftps]{3,5}:\/{2}[^"\'\s]+)(\2|\s|>)/Ui', - '/()/i', - '/()/i', - '/url\s*\(["\']?([hftps]{3,5}:\/{2}[^"\'\s]+)["\']?\)/i', - '/url\s*\(["\']?([\.\/]+[^"\'\s]+)["\']?\)/i', - '//Umis'); - - $remote_replaces = array('ctype_secondary == 'html') { + // clean HTML with washhtml by Frederic Motte + $body = washtml::wash($part->body, array( + 'show_washed' => false, + 'allow_remote' => $safe, + 'blocked_src' => "./program/blocked.gif", + 'charset' => 'UTF-8', + 'cid_map' => $part->replaces, + ), $full_inline); + + $REMOTE_OBJECTS = !$full_inline; + + return $body; + } // text/enriched - if ($part->ctype_secondary=='enriched') - { + else if ($part->ctype_secondary=='enriched') { return Q(enriched_to_html($body), 'show'); - } + } else - { - // make links and email-addresses clickable - $convert_patterns = $convert_replaces = $replace_strings = array(); - - $url_chars = 'a-z0-9_\-\+\*\$\/&%=@#:;'; - $url_chars_within = '\?\.~,!'; + $body = $part->body; - $convert_patterns[] = "/([\w]+):\/\/([a-z0-9\-\.]+[a-z]{2,4}([$url_chars$url_chars_within]*[$url_chars])?)/ie"; - $convert_replaces[] = "rcmail_str_replacement('\\1://\\2', \$replace_strings)"; - $convert_patterns[] = "/([^\/:]|\s)(www\.)([a-z0-9\-]{2,}[a-z]{2,4}([$url_chars$url_chars_within]*[$url_chars])?)/ie"; - $convert_replaces[] = "rcmail_str_replacement('\\1\\2\\3', \$replace_strings)"; - - $convert_patterns[] = '/([a-z0-9][a-z0-9\-\.\+\_]*@[a-z0-9]([a-z0-9\-][.]?)*[a-z0-9]\\.[a-z]{2,5})/ie'; - $convert_replaces[] = "rcmail_str_replacement('\\1', \$replace_strings)"; - -// if ($part->ctype_parameters['format'] != 'flowed') -// $body = wordwrap(trim($body), 80); + /**** assert plaintext ****/ - $body = preg_replace($convert_patterns, $convert_replaces, $body); + // make links and email-addresses clickable + $convert_patterns = $convert_replaces = $replace_strings = array(); + + $url_chars = 'a-z0-9_\-\+\*\$\/&%=@#:;'; + $url_chars_within = '\?\.~,!'; - // split body into single lines - $a_lines = preg_split('/\r?\n/', $body); - $quote_level = 0; + $convert_patterns[] = "/([\w]+):\/\/([a-z0-9\-\.]+[a-z]{2,4}([$url_chars$url_chars_within]*[$url_chars])?)/ie"; + $convert_replaces[] = "rcmail_str_replacement('\\1://\\2', \$replace_strings)"; - // colorize quoted parts - for($n=0; $n+\s*)+/', $line, $regs)) - { - $q = strlen(preg_replace('/\s/', '', $regs[0])); - $line = substr($line, strlen($regs[0])); + $convert_patterns[] = "/([^\/:]|\s)(www\.)([a-z0-9\-]{2,}[a-z]{2,4}([$url_chars$url_chars_within]*[$url_chars])?)/ie"; + $convert_replaces[] = "rcmail_str_replacement('\\1\\2\\3', \$replace_strings)"; + + $convert_patterns[] = '/([a-z0-9][a-z0-9\-\.\+\_]*@[a-z0-9]([a-z0-9\-][.]?)*[a-z0-9]\\.[a-z]{2,5})/ie'; + $convert_replaces[] = "rcmail_str_replacement('\\1', \$replace_strings)"; + +// if ($part->ctype_parameters['format'] != 'flowed') +// $body = wordwrap(trim($body), 80); - if ($q > $quote_level) - $quotation = str_repeat('
', $q - $quote_level); - else if ($q < $quote_level) - $quotation = str_repeat("
", $quote_level - $q); - } - else if ($quote_level > 0) - $quotation = str_repeat("", $quote_level); + // search for patterns like links and e-mail addresses + $body = preg_replace($convert_patterns, $convert_replaces, $body); - $quote_level = $q; - $a_lines[$n] = $quotation . Q($line, 'replace', FALSE); - } + // split body into single lines + $a_lines = preg_split('/\r?\n/', $body); + $quote_level = 0; - // insert the links for urls and mailtos - $body = preg_replace("/##string_replacement\{([0-9]+)\}##/e", "\$replace_strings[\\1]", join("\n", $a_lines)); + // colorize quoted parts + for ($n=0; $n < sizeof($a_lines); $n++) { + $line = $a_lines[$n]; + $quotation = ''; + $q = 0; - return "
".$body."\n
"; + if (preg_match('/^(>+\s*)+/', $line, $regs)) { + $q = strlen(preg_replace('/\s/', '', $regs[0])); + $line = substr($line, strlen($regs[0])); + + if ($q > $quote_level) + $quotation = str_repeat('
', $q - $quote_level); + else if ($q < $quote_level) + $quotation = str_repeat("
", $quote_level - $q); } + else if ($quote_level > 0) + $quotation = str_repeat("", $quote_level); + + $quote_level = $q; + $a_lines[$n] = $quotation . Q($line, 'replace', false); // htmlquote plaintext } + // insert the links for urls and mailtos + $body = preg_replace("/##string_replacement\{([0-9]+)\}##/e", "\$replace_strings[\\1]", join("\n", $a_lines)); + + return "
".$body."\n
"; + } -// add a string to the replacement array and return a replacement string + +/** + * add a string to the replacement array and return a replacement string + */ function rcmail_str_replacement($str, &$rep) { static $count = 0; @@ -743,200 +617,10 @@ function rcmail_str_replacement($str, &$rep) } -function rcmail_parse_message(&$structure, $arg=array(), $recursive=FALSE) - { - global $IMAP; - static $sa_inline_objects = array(); - - // arguments are: (bool)$prefer_html, (string)$get_url - extract($arg); - - $a_attachments = array(); - $a_return_parts = array(); - $out = ''; - - $message_ctype_primary = strtolower($structure->ctype_primary); - $message_ctype_secondary = strtolower($structure->ctype_secondary); - - // show message headers - if ($recursive && is_array($structure->headers) && isset($structure->headers['subject'])) - { - $c = new stdClass; - $c->type = 'headers'; - $c->headers = &$structure->headers; - $a_return_parts[] = $c; - } - - // print body if message doesn't have multiple parts - if ($message_ctype_primary=='text') - { - $structure->type = 'content'; - $a_return_parts[] = &$structure; - } - - // message contains alternative parts - else if ($message_ctype_primary=='multipart' && $message_ctype_secondary=='alternative' && is_array($structure->parts)) - { - // get html/plaintext parts - $plain_part = $html_part = $print_part = $related_part = NULL; - - foreach ($structure->parts as $p => $sub_part) - { - $rel_parts = $attachmnts = null; - $sub_ctype_primary = strtolower($sub_part->ctype_primary); - $sub_ctype_secondary = strtolower($sub_part->ctype_secondary); - - // check if sub part is - if ($sub_ctype_primary=='text' && $sub_ctype_secondary=='plain') - $plain_part = $p; - else if ($sub_ctype_primary=='text' && $sub_ctype_secondary=='html') - $html_part = $p; - else if ($sub_ctype_primary=='text' && $sub_ctype_secondary=='enriched') - $enriched_part = $p; - else if ($sub_ctype_primary=='multipart' && ($sub_ctype_secondary=='related' || $sub_ctype_secondary=='mixed')) - $related_part = $p; - } - - // parse related part (alternative part could be in here) - if ($related_part!==NULL) - { - list($rel_parts, $attachmnts) = rcmail_parse_message($structure->parts[$related_part], $arg, TRUE); - $a_attachments = array_merge($a_attachments, $attachmnts); - } - - // merge related parts if any - if ($rel_parts && $prefer_html && !$html_part) - $a_return_parts = array_merge($a_return_parts, $rel_parts); - - // choose html/plain part to print - else if ($html_part!==NULL && $prefer_html) - $print_part = &$structure->parts[$html_part]; - else if ($enriched_part!==NULL) - $print_part = &$structure->parts[$enriched_part]; - else if ($plain_part!==NULL) - $print_part = &$structure->parts[$plain_part]; - - // show message body - if (is_object($print_part)) - { - $print_part->type = 'content'; - $a_return_parts[] = $print_part; - } - // show plaintext warning - else if ($html_part!==NULL && empty($a_return_parts)) - { - $c = new stdClass; - $c->type = 'content'; - $c->body = rcube_label('htmlmessage'); - $c->ctype_primary = 'text'; - $c->ctype_secondary = 'plain'; - - $a_return_parts[] = $c; - } - - // add html part as attachment - if ($html_part!==NULL && $structure->parts[$html_part]!==$print_part) - { - $html_part = &$structure->parts[$html_part]; - $html_part->filename = rcube_label('htmlmessage'); - $html_part->mimetype = 'text/html'; - - $a_attachments[] = $html_part; - } - } - - // message contains multiple parts - else if (is_array($structure->parts) && !empty($structure->parts)) - { - for ($i=0; $iparts); $i++) - { - $mail_part = &$structure->parts[$i]; - $primary_type = strtolower($mail_part->ctype_primary); - $secondary_type = strtolower($mail_part->ctype_secondary); - - // multipart/alternative - if ($primary_type=='multipart') - { - list($parts, $attachmnts) = rcmail_parse_message($mail_part, $arg, TRUE); - - $a_return_parts = array_merge($a_return_parts, $parts); - $a_attachments = array_merge($a_attachments, $attachmnts); - } - - // part text/[plain|html] OR message/delivery-status - else if (($primary_type=='text' && ($secondary_type=='plain' || $secondary_type=='html') && $mail_part->disposition!='attachment') || - ($primary_type=='message' && ($secondary_type=='delivery-status' || $secondary_type=='disposition-notification'))) - { - $mail_part->type = 'content'; - $a_return_parts[] = $mail_part; - } - - // part message/* - else if ($primary_type=='message') - { - list($parts, $attachmnts) = rcmail_parse_message($mail_part, $arg, TRUE); - - $a_return_parts = array_merge($a_return_parts, $parts); - $a_attachments = array_merge($a_attachments, $attachmnts); - } - - // ignore "virtual" protocol parts - else if ($primary_type=='protocol') - continue; - - // part is file/attachment - else if ($mail_part->disposition=='attachment' || $mail_part->disposition=='inline' || $mail_part->headers['content-id'] || - (empty($mail_part->disposition) && $mail_part->filename)) - { - // skip apple resource forks - if ($message_ctype_secondary=='appledouble' && $secondary_type=='applefile') - continue; - - // part belongs to a related message - if ($message_ctype_secondary=='related' && $mail_part->headers['content-id']) - { - $mail_part->content_id = preg_replace(array('/^$/'), '', $mail_part->headers['content-id']); - $sa_inline_objects[] = $mail_part; - } - // is regular attachment - else - { - if (!$mail_part->filename) - $mail_part->filename = 'Part '.$mail_part->mime_id; - $a_attachments[] = $mail_part; - } - } - } - - // if this was a related part try to resolve references - if ($message_ctype_secondary=='related' && sizeof($sa_inline_objects)) - { - $a_replaces = array(); - - foreach ($sa_inline_objects as $inline_object) - $a_replaces['cid:'.$inline_object->content_id] = htmlspecialchars(sprintf($get_url, $inline_object->mime_id)); - - // add replace array to each content part - // (will be applied later when part body is available) - for ($i=0; $itype=='content') - $a_return_parts[$i]->replaces = $a_replaces; - } - } - } - - // message is single part non-text - else if ($structure->filename) - $a_attachments[] = $structure; - - return array($a_return_parts, $a_attachments); - } - - - -// return table with message headers +/** + * return table with message headers + */ function rcmail_message_headers($attrib, $headers=NULL) { global $IMAP, $OUTPUT, $MESSAGE; @@ -989,7 +673,9 @@ function rcmail_message_headers($attrib, $headers=NULL) } - +/** + * + */ function rcmail_message_body($attrib) { global $CONFIG, $OUTPUT, $MESSAGE, $IMAP, $REMOTE_OBJECTS; @@ -1028,7 +714,7 @@ function rcmail_message_body($attrib) $out .= '
'; if ($part->ctype_secondary != 'plain') - $out .= rcmail_sanitize_html($body, $attrib['id']); + $out .= rcmail_html4inline($body, $attrib['id']); else $out .= $body; @@ -1068,12 +754,11 @@ function rcmail_message_body($attrib) -// modify a HTML message that it can be displayed inside a HTML page -function rcmail_sanitize_html($body, $container_id) +/** + * modify a HTML message that it can be displayed inside a HTML page + */ +function rcmail_html4inline($body, $container_id) { - // remove any null-byte characters before parsing - $body = preg_replace('/\x00/', '', $body); - $base_url = ""; $last_style_pos = 0; $body_lc = strtolower($body); @@ -1095,26 +780,6 @@ function rcmail_sanitize_html($body, $container_id) $last_style_pos = $pos2; } - - // remove SCRIPT tags - foreach (array('script', 'applet', 'object', 'embed', 'iframe') as $tag) - { - while (($pos = strpos($body_lc, '<'.$tag)) && (($pos2 = strpos($body_lc, '', $pos)) || ($pos3 = strpos($body_lc, '>', $pos)))) - { - $end = $pos2 ? $pos2 + strlen('') : $pos3 + 1; - $body = substr($body, 0, $pos) . substr($body, $end, strlen($body)-$end); - $body_lc = strtolower($body); - } - } - - // replace event handlers on any object - while ($body != $prev_body) - { - $prev_body = $body; - $body = preg_replace('/(<[^!][^>]*\s)on(?:load|unload|click|dblclick|mousedown|mouseup|mouseover|mousemove|mouseout|focus|blur|keypress|keydown|keyup|submit|reset|select|change)=([^>]+>)/im', '$1__removed=$2', $body); - $body = preg_replace('/(<[^!][^>]*\shref=["\']?)(javascript:)([^>]*?>)/im', '$1null:$3', $body); - } - // resolve if ($base_url) { @@ -1138,14 +803,8 @@ function rcmail_sanitize_html($body, $container_id) $body); $out = preg_replace( - array( - '/]*)>/i', - '/<\/body>/i', - ), - array( - '
', - '
', - ), + array('/]*)>/i', '/<\/body>/i'), + array('
', '
'), $out); // quote