summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorthomascube <thomas@roundcube.net>2008-05-29 16:10:42 +0000
committerthomascube <thomas@roundcube.net>2008-05-29 16:10:42 +0000
commit45f56c1c400ad5b21ddcd4d490f6f6c4ffe0d9fc (patch)
tree7353d707aaad98c937e41f231a5b03b52fd97491
parent06c1652d7fbde5d198eb3508106bea5076620d35 (diff)
Replace our crappy html sanitization with the dom-based washtml script + fix inline message parts + remove old code + add some doc comments
-rw-r--r--installer/check.php2
-rw-r--r--program/include/rcube_message.php17
-rw-r--r--program/include/rcube_user.php2
-rw-r--r--program/lib/washtml.php196
-rw-r--r--program/steps/mail/func.inc567
5 files changed, 322 insertions, 462 deletions
diff --git a/installer/check.php b/installer/check.php
index 7ca982f2d..c0f06d4ab 100644
--- a/installer/check.php
+++ b/installer/check.php
@@ -1,7 +1,7 @@
<form action="index.php" method="get">
<?php
-$required_php_exts = array('PCRE' => 'pcre', 'Session' => 'session');
+$required_php_exts = array('PCRE' => 'pcre', 'Session' => 'session', 'DOM XML' => 'dom');
$optional_php_exts = array('FileInfo' => 'fileinfo', 'Libiconv' => 'iconv',
'Multibyte' => 'mbstring', 'OpenSSL' => 'openssl', 'Mcrypt' => 'mcrypt', 'GD' => 'gd');
diff --git a/program/include/rcube_message.php b/program/include/rcube_message.php
index 174b1f314..7dc74ab28 100644
--- a/program/include/rcube_message.php
+++ b/program/include/rcube_message.php
@@ -21,9 +21,8 @@
/**
- * Interface class for accessing an IMAP server
- *
- * This is a wrapper that implements the Iloha IMAP Library (IIL)
+ * Logical representation of a mail message with all its data
+ * and related functions
*
* @package Mail
* @author Thomas Bruederli <roundcube@gmail.com>
@@ -65,8 +64,8 @@ class rcube_message
);
if ($this->structure = $this->imap->get_structure($uid)) {
- $this->parse_structure($this->structure);
$this->get_mime_numbers($this->structure);
+ $this->parse_structure($this->structure);
}
else {
$this->body = $this->imap->get_body($uid);
@@ -356,18 +355,18 @@ class rcube_message
}
// if this was a related part try to resolve references
- if ($message_ctype_secondary == 'related' && sizeof($this->inline_objects)) {
+ if ($message_ctype_secondary == 'related' && sizeof($this->inline_parts)) {
$a_replaces = array();
foreach ($this->inline_parts as $inline_object) {
- $a_replaces['cid:'.$inline_object->content_id] = htmlspecialchars(sprintf($this->opt['get_url'], $inline_object->mime_id));
+ $a_replaces['cid:'.$inline_object->content_id] = $this->get_part_url($inline_object->mime_id);
}
// add replace array to each content part
// (will be applied later when part body is available)
- for ($i=0; $i<count($a_return_parts); $i++) {
- if ($a_return_parts[$i]->type=='content')
- $a_return_parts[$i]->replaces = $a_replaces;
+ foreach ($this->parts as $i => $part) {
+ if ($part->type == 'content')
+ $this->parts[$i]->replaces = $a_replaces;
}
}
}
diff --git a/program/include/rcube_user.php b/program/include/rcube_user.php
index e125f6309..c808d079e 100644
--- a/program/include/rcube_user.php
+++ b/program/include/rcube_user.php
@@ -24,7 +24,7 @@
/**
* Class representing a system user
*
- * @package core
+ * @package Core
* @author Thomas Bruederli <roundcube@gmail.com>
*/
class rcube_user
diff --git a/program/lib/washtml.php b/program/lib/washtml.php
new file mode 100644
index 000000000..82ccc0cd5
--- /dev/null
+++ b/program/lib/washtml.php
@@ -0,0 +1,196 @@
+<?php
+/* Washtml, a HTML sanityzer.
+ *
+ * Copyright (c) 2007 Frederic Motte <fmotte@ubixis.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* Please send me your comments about this code if you have some, thanks, Fred. */
+
+/* OVERVIEW:
+ *
+ * Wahstml take an untrusted HTML and return a safe html string.
+ *
+ * SYNOPSIS:
+ *
+ * washtml::wash($html, $config, $full);
+ * It return a sanityzed string of the $html parameter without html and head tags.
+ * $html is a string containing the html code to wash.
+ * $config is an array containing options:
+ * $config['allow_remote'] is a boolean to allow link to remote images.
+ * $config['blocked_src'] string with image-src to be used for blocked remote images
+ * $config['show_washed'] is a boolean to include washed out attributes as x-washed
+ * $config['cid_map'] is an array where cid urls index urls to replace them.
+ * $config['charset'] is a string containing the charset of the HTML document if it is not defined in it.
+ * $full is a reference to a boolean that is set to true if no remote images are removed. (FE: show remote images link)
+ *
+ * INTERNALS:
+ *
+ * Only tags and attributes in the globals $html_elements and $html_attributes
+ * are kept, inline styles are also filtered: all style identifiers matching
+ * /[a-z\-]/i are allowed. Values matching colors, sizes, /[a-z\-]/i and safe
+ * urls if allowed and cid urls if mapped are kept.
+ *
+ * BUGS: It MUST be safe !
+ * - Check regexp
+ * - urlencode URLs instead of htmlspecials
+ * - Check is a 3 bytes utf8 first char can eat '">'
+ * - Update PCRE: CVE-2007-1659 - CVE-2007-1660 - CVE-2007-1661 - CVE-2007-1662
+ * CVE-2007-4766 - CVE-2007-4767 - CVE-2007-4768
+ * http://lists.debian.org/debian-security-announce/debian-security-announce-2007/msg00177.html
+ * - ...
+ *
+ * MISSING:
+ * - relative links, can be implemented by prefixing an absolute path, ask me
+ * if you need it...
+ * - ...
+ *
+ * Dont be a fool:
+ * - Dont alter data on a GET: '<img src="http://yourhost/mail?action=delete&uid=3267" />'
+ * - ...
+ */
+
+class washtml
+{
+
+ /* Allowed HTML elements */
+ static $html_elements = array('a', 'abbr', 'acronym', 'address', 'area', 'b', 'basefont', 'bdo', 'big', 'blockquote', 'br', 'caption', 'center', 'cite', 'code', 'col', 'colgroup', 'dd', 'del', 'dfn', 'dir', 'div', 'dl', 'dt', 'em', 'fieldset', 'font', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'ins', 'label', 'legend', 'li', 'map', 'menu', 'ol', 'p', 'pre', 'q', 's', 'samp', 'small', 'span', 'strike', 'strong', 'sub', 'sup', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'title', 'tr', 'tt', 'u', 'ul', 'var', 'img');
+
+ /* Allowed HTML attributes */
+ static $html_attribs = array('name', 'class', 'title', 'alt', 'width', 'height', 'align', 'nowrap', 'col', 'row', 'id', 'rowspan', 'colspan', 'cellspacing', 'cellpadding', 'valign', 'bgcolor', 'color', 'border', 'bordercolorlight', 'bordercolordark', 'face', 'marginwidth', 'marginheight', 'axis', 'border', 'abbr', 'char', 'charoff', 'clear', 'compact', 'coords', 'vspace', 'hspace', 'cellborder', 'size', 'lang', 'dir');
+
+ /* Check CSS style */
+ static function wash_style($style, $config, &$full) {
+ $s = '';
+
+ foreach(explode(';', $style) as $declaration) {
+ if(preg_match('/^\s*([a-z\-]+)\s*:\s*(.*)\s*$/i', $declaration, $match)) {
+ $cssid = $match[1];
+ $str = $match[2];
+ $value = '';
+ while(sizeof($str) > 0 &&
+ preg_match('/^(url\(\s*[\'"]?([^\'"\)]*)[\'"]?\s*\)'./*1,2*/
+ '|rgb\(\s*[0-9]+\s*,\s*[0-9]+\s*,\s*[0-9]+\s*\)'.
+ '|-?[0-9.]+\s*(em|ex|px|cm|mm|in|pt|pc|deg|rad|grad|ms|s|hz|khz|%)?'.
+ '|#[0-9a-f]{3,6}|[a-z0-9\-]+'.
+ ')\s*/i', $str, $match)) {
+ if($match[2]) {
+ if(preg_match('/^(http|https|ftp):.*$/i', $match[2], $url)) {
+ if($config['allow_remote'])
+ $value .= ' url(\''.htmlspecialchars($url[0], ENT_QUOTES).'\')';
+ else
+ $full = false;
+ } else if(preg_match('/^cid:(.*)$/i', $match[2], $cid))
+ $value .= ' url(\''.htmlspecialchars($config['cid_map']['cid:'.$cid[1]], ENT_QUOTES) . '\')';
+ } else if($match[0] != 'url' && $match[0] != 'rbg')//whitelist ?
+ $value .= ' ' . $match[0];
+ $str = substr($str, strlen($match[0]));
+ }
+ if($value)
+ $s .= ($s?' ':'') . $cssid . ':' . $value . ';';
+ }
+ }
+ return $s;
+ }
+
+ /* Take a node and return allowed attributes and check values */
+ static function wash_attribs($node, $config, &$full) {
+ $t = '';
+ $washed;
+
+ foreach($node->attributes as $key => $plop) {
+ $key = strtolower($key);
+ $value = $node->getAttribute($key);
+ if((in_array($key, self::$html_attribs)) ||
+ ($key == 'href' && preg_match('/^(http|https|ftp|mailto):.*/i', $value)))
+ $t .= ' ' . $key . '="' . htmlspecialchars($value, ENT_QUOTES) . '"';
+ else if($key == 'style' && ($style = self::wash_style($value, $config, $full)))
+ $t .= ' style="' . $style . '"';
+ else if($key == 'src' && strtolower($node->tagName) == 'img') { //check tagName anyway
+ if(preg_match('/^(http|https|ftp):.*/i', $value)) {
+ if($config['allow_remote'])
+ $t .= ' ' . $key . '="' . htmlspecialchars($value, ENT_QUOTES) . '"';
+ else {
+ $full = false;
+ if ($config['blocked_src'])
+ $t .= ' src="' . htmlspecialchars($config['blocked_src'], ENT_QUOTES) . '"';
+ }
+ } else if(preg_match('/^cid:(.*)$/i', $value, $cid))
+ $t .= ' ' . $key . '="' . htmlspecialchars($config['cid_map']['cid:'.$cid[1]], ENT_QUOTES) . '"';
+ } else
+ $washed .= ($washed?' ':'') . $key;
+ }
+ return $t . ($washed && $config['show_washed']?' x-washed="'.$washed.'"':'');
+ }
+
+ /* The main loop that recurse on a node tree.
+ * It output only allowed tags with allowed attributes
+ * and allowed inline styles */
+ static function dumpHtml($node, $config, &$full) {
+ if(!$node->hasChildNodes())
+ return '';
+
+ $node = $node->firstChild;
+ $dump = '';
+
+ do {
+ switch($node->nodeType) {
+ case XML_ELEMENT_NODE: //Check element
+ $tagName = strtolower($node->tagName);
+ if(in_array($tagName, self::$html_elements)) {
+ $content = self::dumpHtml($node, $config, $full);
+ $dump .= '<' . $tagName . self::wash_attribs($node, $config, $full) .
+ ($content?">$content</$tagName>":' />');
+ } else if($tagName == 'html' || $tagName == 'body') {
+ $dump .= self::dumpHtml($node, $config, $full); //Just ignored
+ } else
+ $dump .= '<!-- ' . htmlspecialchars($tagName, ENT_QUOTES) . ' not allowed -->';
+ break;
+ case XML_TEXT_NODE:
+ $dump .= htmlspecialchars($node->nodeValue);
+ break;
+ case XML_HTML_DOCUMENT_NODE:
+ $dump .= self::dumpHtml($node, $config, $full);
+ break;
+ case XML_DOCUMENT_TYPE_NODE: break;
+ default:
+ }
+ } while($node = $node->nextSibling);
+
+ return $dump;
+ }
+
+ /* Main function, give it untrusted HTML, tell it if you allow loading
+ * remote images and give it a map to convert "cid:" urls. */
+ static function wash($html, $config=array(), &$full=true) {
+ $config += array('show_washed'=>true, 'allow_remote'=>false, 'cid_map'=>array());
+ //Charset seems to be ignored (probably if defined in the HTML document)
+ $node = new DOMDocument('1.0', $config['charset']);
+ $full = true;
+ @$node->loadHTML($html);
+ return self::dumpHtml($node, $config, $full);
+ }
+
+}
+
+?> \ No newline at end of file
diff --git a/program/steps/mail/func.inc b/program/steps/mail/func.inc
index d37a52126..0a1e4d4c7 100644
--- a/program/steps/mail/func.inc
+++ b/program/steps/mail/func.inc
@@ -74,7 +74,9 @@ if (empty($RCMAIL->action) || $RCMAIL->action == 'list')
-// return the message list as HTML table
+/**
+ * return the message list as HTML table
+ */
function rcmail_message_list($attrib)
{
global $IMAP, $CONFIG, $COMM_PATH, $OUTPUT;
@@ -295,7 +297,9 @@ function rcmail_message_list($attrib)
}
-// return javascript commands to add rows to the message list
+/**
+ * return javascript commands to add rows to the message list
+ */
function rcmail_js_message_list($a_headers, $insert_top=FALSE)
{
global $CONFIG, $IMAP, $OUTPUT;
@@ -358,7 +362,9 @@ function rcmail_js_message_list($a_headers, $insert_top=FALSE)
}
-// return an HTML iframe for loading mail content
+/**
+ * return an HTML iframe for loading mail content
+ */
function rcmail_messagecontent_frame($attrib)
{
global $OUTPUT;
@@ -381,6 +387,9 @@ function rcmail_messagecontent_frame($attrib)
}
+/**
+ *
+ */
function rcmail_messagecount_display($attrib)
{
global $IMAP, $OUTPUT;
@@ -401,6 +410,9 @@ function rcmail_messagecount_display($attrib)
}
+/**
+ *
+ */
function rcmail_quota_display($attrib)
{
global $OUTPUT, $COMM_PATH;
@@ -423,6 +435,9 @@ function rcmail_quota_display($attrib)
}
+/**
+ *
+ */
function rcmail_quota_content($quota=NULL)
{
global $IMAP, $COMM_PATH;
@@ -466,6 +481,9 @@ function rcmail_quota_content($quota=NULL)
}
+/**
+ *
+ */
function rcmail_get_messagecount_text($count=NULL, $page=NULL)
{
global $IMAP, $MESSAGE;
@@ -495,246 +513,102 @@ function rcmail_get_messagecount_text($count=NULL, $page=NULL)
}
-/* Stolen from Squirrelmail */
-function sq_deent(&$attvalue, $regex, $hex=false)
- {
- $ret_match = false;
- preg_match_all($regex, $attvalue, $matches);
- if (is_array($matches) && sizeof($matches[0]) > 0)
- {
- $repl = Array();
- for ($i = 0; $i < sizeof($matches[0]); $i++)
- {
- $numval = $matches[1][$i];
- if ($hex)
- $numval = hexdec($numval);
- $repl{$matches[0][$i]} = chr($numval);
- }
- $attvalue = strtr($attvalue, $repl);
- return true;
- }
- else
- return false;
- }
-
-
-/* Stolen verbatim from Squirrelmail */
-function sq_defang(&$attvalue)
- {
- /* Skip this if there aren't ampersands or backslashes. */
- if ((strpos($attvalue, '&') === false) &&
- (strpos($attvalue, '\\') === false))
- return;
- $m = false;
- do
- {
- $m = false;
- $m = $m || sq_deent($attvalue, '/\&#0*(\d+);*/s');
- $m = $m || sq_deent($attvalue, '/\&#x0*((\d|[a-f])+);*/si', true);
- $m = $m || sq_deent($attvalue, '/\\\\(\d+)/s', true);
- } while ($m == true);
- $attvalue = stripslashes($attvalue);
- }
-
-
-function rcmail_html_filter($html)
- {
- preg_match_all('/<\/?\w+((\s+\w+(\s*=\s*(?:".*?"|\'.*?\'|[^\'">\s]+))?)+\s*|\s*)\/?>/', $html, $tags);
-
- /* From Squirrelmail: Translate all dangerous Unicode or Shift_JIS characters which are accepted by
- * IE as regular characters. */
- $replace = array(array('&#x029F;', '&#0671;', /* L UNICODE IPA Extension */
- '&#x0280;', '&#0640;', /* R UNICODE IPA Extension */
- '&#x0274;', '&#0628;', /* N UNICODE IPA Extension */
- '&#xFF25;', '&#65317;', /* Unicode FULLWIDTH LATIN CAPITAL LETTER E */
- '&#xFF45;', '&#65349;', /* Unicode FULLWIDTH LATIN SMALL LETTER E */
- '&#xFF38;', '&#65336;', /* Unicode FULLWIDTH LATIN CAPITAL LETTER X */
- '&#xFF58;', '&#65368;', /* Unicode FULLWIDTH LATIN SMALL LETTER X */
- '&#xFF30;', '&#65328;', /* Unicode FULLWIDTH LATIN CAPITAL LETTER P */
- '&#xFF50;', '&#65360;', /* Unicode FULLWIDTH LATIN SMALL LETTER P */
- '&#xFF32;', '&#65330;', /* Unicode FULLWIDTH LATIN CAPITAL LETTER R */
- '&#xFF52;', '&#65362;', /* Unicode FULLWIDTH LATIN SMALL LETTER R */
- '&#xFF33;', '&#65331;', /* Unicode FULLWIDTH LATIN CAPITAL LETTER S */
- '&#xFF53;', '&#65363;', /* Unicode FULLWIDTH LATIN SMALL LETTER S */
- '&#xFF29;', '&#65321;', /* Unicode FULLWIDTH LATIN CAPITAL LETTER I */
- '&#xFF49;', '&#65353;', /* Unicode FULLWIDTH LATIN SMALL LETTER I */
- '&#xFF2F;', '&#65327;', /* Unicode FULLWIDTH LATIN CAPITAL LETTER O */
- '&#xFF4F;', '&#65359;', /* Unicode FULLWIDTH LATIN SMALL LETTER O */
- '&#xFF2E;', '&#65326;', /* Unicode FULLWIDTH LATIN CAPITAL LETTER N */
- '&#xFF4E;', '&#65358;', /* Unicode FULLWIDTH LATIN SMALL LETTER N */
- '&#xFF2C;', '&#65324;', /* Unicode FULLWIDTH LATIN CAPITAL LETTER L */
- '&#xFF4C;', '&#65356;', /* Unicode FULLWIDTH LATIN SMALL LETTER L */
- '&#xFF35;', '&#65333;', /* Unicode FULLWIDTH LATIN CAPITAL LETTER U */
- '&#xFF55;', '&#65365;', /* Unicode FULLWIDTH LATIN SMALL LETTER U */
- '&#x207F;', '&#8319;' , /* Unicode SUPERSCRIPT LATIN SMALL LETTER N */
- "\xEF\xBC\xA5", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER E */
- /* in unicode this is some Chinese char range */
- "\xEF\xBD\x85", /* Shift JIS FULLWIDTH LATIN SMALL LETTER E */
- "\xEF\xBC\xB8", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER X */
- "\xEF\xBD\x98", /* Shift JIS FULLWIDTH LATIN SMALL LETTER X */
- "\xEF\xBC\xB0", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER P */
- "\xEF\xBD\x90", /* Shift JIS FULLWIDTH LATIN SMALL LETTER P */
- "\xEF\xBC\xB2", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER R */
- "\xEF\xBD\x92", /* Shift JIS FULLWIDTH LATIN SMALL LETTER R */
- "\xEF\xBC\xB3", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER S */
- "\xEF\xBD\x93", /* Shift JIS FULLWIDTH LATIN SMALL LETTER S */
- "\xEF\xBC\xA9", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER I */
- "\xEF\xBD\x89", /* Shift JIS FULLWIDTH LATIN SMALL LETTER I */
- "\xEF\xBC\xAF", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER O */
- "\xEF\xBD\x8F", /* Shift JIS FULLWIDTH LATIN SMALL LETTER O */
- "\xEF\xBC\xAE", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER N */
- "\xEF\xBD\x8E", /* Shift JIS FULLWIDTH LATIN SMALL LETTER N */
- "\xEF\xBC\xAC", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER L */
- "\xEF\xBD\x8C", /* Shift JIS FULLWIDTH LATIN SMALL LETTER L */
- "\xEF\xBC\xB5", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER U */
- "\xEF\xBD\x95", /* Shift JIS FULLWIDTH LATIN SMALL LETTER U */
- "\xE2\x81\xBF", /* Shift JIS FULLWIDTH SUPERSCRIPT N */
- "\xCA\x9F", /* L UNICODE IPA Extension */
- "\xCA\x80", /* R UNICODE IPA Extension */
- "\xC9\xB4"), /* N UNICODE IPA Extension */
- array('l', 'l', 'r', 'r', 'n', 'n', 'E', 'E', 'e', 'e', 'X', 'X', 'x', 'x',
- 'P', 'P', 'p', 'p', 'R', 'R', 'r', 'r', 'S', 'S', 's', 's', 'I', 'I',
- 'i', 'i', 'O', 'O', 'o', 'o', 'N', 'N', 'n', 'n', 'L', 'L', 'l', 'l',
- 'U', 'U', 'u', 'u', 'n', 'n', 'E', 'e', 'X', 'x', 'P', 'p', 'R', 'r',
- 'S', 's', 'I', 'i', 'O', 'o', 'N', 'n', 'L', 'l', 'U', 'u', 'n', 'l', 'r', 'n'));
- if ((count($tags)>3) && (count($tags[3])>0))
- foreach ($tags[3] as $nr=>$value)
- {
- /* Remove comments */
- $newvalue = preg_replace('/(\/\*.*\*\/)/','$2',$value);
- /* Translate dangerous characters */
- $newvalue = str_replace($replace[0], $replace[1], $newvalue);
- sq_defang($newvalue);
- /* Rename dangerous CSS */
- $newvalue = preg_replace('/expression/i', 'idiocy', $newvalue);
- $newvalue = preg_replace('/url/i', 'idiocy', $newvalue);
- $newattrs = preg_replace('/'.preg_quote($value, '/').'$/', $newvalue, $tags[1][$nr]);
- $newtag = preg_replace('/'.preg_quote($tags[1][$nr], '/').'/', $newattrs, $tags[0][$nr]);
- $html = preg_replace('/'.preg_quote($tags[0][$nr], '/').'/', $newtag, $html);
- }
- return $html;
- }
-
-
+/**
+ *
+ */
function rcmail_print_body($part, $safe=FALSE, $plain=FALSE)
- {
- global $IMAP, $REMOTE_OBJECTS;
+{
+ global $REMOTE_OBJECTS;
- $body = is_array($part->replaces) ? strtr($part->body, $part->replaces) : $part->body;
-
// convert html to text/plain
- if ($part->ctype_secondary=='html' && $plain)
- {
- $txt = new html2text($body, false, true);
+ if ($part->ctype_secondary == 'html' && $plain) {
+ $txt = new html2text($part->body, false, true);
$body = $txt->get_text();
$part->ctype_secondary = 'plain';
- }
-
+ }
// text/html
- if ($part->ctype_secondary=='html')
- {
- // remove charset specification in HTML message
- $body = preg_replace('/charset=[a-z0-9\-]+/i', '', $body);
-
- if (!$safe) // remove remote images and scripts
- {
- $remote_patterns = array('/<img\s+(.*)src=(["\']?)([hftps]{3,5}:\/{2}[^"\'\s]+)(\2|\s|>)/Ui',
- '/(src|background)=(["\']?)([hftps]{3,5}:\/{2}[^"\'\s]+)(\2|\s|>)/Ui',
- '/(<base.*href=["\']?)([hftps]{3,5}:\/{2}[^"\'\s]+)([^<]*>)/i',
- '/(<link.*href=["\']?)([hftps]{3,5}:\/{2}[^"\'\s]+)([^<]*>)/i',
- '/url\s*\(["\']?([hftps]{3,5}:\/{2}[^"\'\s]+)["\']?\)/i',
- '/url\s*\(["\']?([\.\/]+[^"\'\s]+)["\']?\)/i',
- '/<script.+<\/script>/Umis');
-
- $remote_replaces = array('<img \\1src=\\2./program/blocked.gif\\4',
- '',
- '',
- '',
- 'none',
- 'none',
- '');
-
- // set flag if message containes remote obejcts that where blocked
- foreach ($remote_patterns as $pattern)
- {
- if (preg_match($pattern, $body))
- {
- $REMOTE_OBJECTS = TRUE;
- break;
- }
- }
-
- $body = preg_replace($remote_patterns, $remote_replaces, $body);
- }
-
- return Q(rcmail_html_filter($body), 'show', FALSE);
- }
-
+ else if ($part->ctype_secondary == 'html') {
+ // clean HTML with washhtml by Frederic Motte
+ $body = washtml::wash($part->body, array(
+ 'show_washed' => false,
+ 'allow_remote' => $safe,
+ 'blocked_src' => "./program/blocked.gif",
+ 'charset' => 'UTF-8',
+ 'cid_map' => $part->replaces,
+ ), $full_inline);
+
+ $REMOTE_OBJECTS = !$full_inline;
+
+ return $body;
+ }
// text/enriched
- if ($part->ctype_secondary=='enriched')
- {
+ else if ($part->ctype_secondary=='enriched') {
return Q(enriched_to_html($body), 'show');
- }
+ }
else
- {
- // make links and email-addresses clickable
- $convert_patterns = $convert_replaces = $replace_strings = array();
-
- $url_chars = 'a-z0-9_\-\+\*\$\/&%=@#:;';
- $url_chars_within = '\?\.~,!';
+ $body = $part->body;
- $convert_patterns[] = "/([\w]+):\/\/([a-z0-9\-\.]+[a-z]{2,4}([$url_chars$url_chars_within]*[$url_chars])?)/ie";
- $convert_replaces[] = "rcmail_str_replacement('<a href=\"\\1://\\2\" target=\"_blank\">\\1://\\2</a>', \$replace_strings)";
- $convert_patterns[] = "/([^\/:]|\s)(www\.)([a-z0-9\-]{2,}[a-z]{2,4}([$url_chars$url_chars_within]*[$url_chars])?)/ie";
- $convert_replaces[] = "rcmail_str_replacement('\\1<a href=\"http://\\2\\3\" target=\"_blank\">\\2\\3</a>', \$replace_strings)";
-
- $convert_patterns[] = '/([a-z0-9][a-z0-9\-\.\+\_]*@[a-z0-9]([a-z0-9\-][.]?)*[a-z0-9]\\.[a-z]{2,5})/ie';
- $convert_replaces[] = "rcmail_str_replacement('<a href=\"mailto:\\1\" onclick=\"return ".JS_OBJECT_NAME.".command(\'compose\',\'\\1\',this)\">\\1</a>', \$replace_strings)";
-
-// if ($part->ctype_parameters['format'] != 'flowed')
-// $body = wordwrap(trim($body), 80);
+ /**** assert plaintext ****/
- $body = preg_replace($convert_patterns, $convert_replaces, $body);
+ // make links and email-addresses clickable
+ $convert_patterns = $convert_replaces = $replace_strings = array();
+
+ $url_chars = 'a-z0-9_\-\+\*\$\/&%=@#:;';
+ $url_chars_within = '\?\.~,!';
- // split body into single lines
- $a_lines = preg_split('/\r?\n/', $body);
- $quote_level = 0;
+ $convert_patterns[] = "/([\w]+):\/\/([a-z0-9\-\.]+[a-z]{2,4}([$url_chars$url_chars_within]*[$url_chars])?)/ie";
+ $convert_replaces[] = "rcmail_str_replacement('<a href=\"\\1://\\2\" target=\"_blank\">\\1://\\2</a>', \$replace_strings)";
- // colorize quoted parts
- for($n=0; $n<sizeof($a_lines); $n++)
- {
- $line = $a_lines[$n];
- $quotation = '';
- $q = 0;
-
- if (preg_match('/^(>+\s*)+/', $line, $regs))
- {
- $q = strlen(preg_replace('/\s/', '', $regs[0]));
- $line = substr($line, strlen($regs[0]));
+ $convert_patterns[] = "/([^\/:]|\s)(www\.)([a-z0-9\-]{2,}[a-z]{2,4}([$url_chars$url_chars_within]*[$url_chars])?)/ie";
+ $convert_replaces[] = "rcmail_str_replacement('\\1<a href=\"http://\\2\\3\" target=\"_blank\">\\2\\3</a>', \$replace_strings)";
+
+ $convert_patterns[] = '/([a-z0-9][a-z0-9\-\.\+\_]*@[a-z0-9]([a-z0-9\-][.]?)*[a-z0-9]\\.[a-z]{2,5})/ie';
+ $convert_replaces[] = "rcmail_str_replacement('<a href=\"mailto:\\1\" onclick=\"return ".JS_OBJECT_NAME.".command(\'compose\',\'\\1\',this)\">\\1</a>', \$replace_strings)";
+
+// if ($part->ctype_parameters['format'] != 'flowed')
+// $body = wordwrap(trim($body), 80);
- if ($q > $quote_level)
- $quotation = str_repeat('<blockquote>', $q - $quote_level);
- else if ($q < $quote_level)
- $quotation = str_repeat("</blockquote>", $quote_level - $q);
- }
- else if ($quote_level > 0)
- $quotation = str_repeat("</blockquote>", $quote_level);
+ // search for patterns like links and e-mail addresses
+ $body = preg_replace($convert_patterns, $convert_replaces, $body);
- $quote_level = $q;
- $a_lines[$n] = $quotation . Q($line, 'replace', FALSE);
- }
+ // split body into single lines
+ $a_lines = preg_split('/\r?\n/', $body);
+ $quote_level = 0;
- // insert the links for urls and mailtos
- $body = preg_replace("/##string_replacement\{([0-9]+)\}##/e", "\$replace_strings[\\1]", join("\n", $a_lines));
+ // colorize quoted parts
+ for ($n=0; $n < sizeof($a_lines); $n++) {
+ $line = $a_lines[$n];
+ $quotation = '';
+ $q = 0;
- return "<div class=\"pre\">".$body."\n</div>";
+ if (preg_match('/^(>+\s*)+/', $line, $regs)) {
+ $q = strlen(preg_replace('/\s/', '', $regs[0]));
+ $line = substr($line, strlen($regs[0]));
+
+ if ($q > $quote_level)
+ $quotation = str_repeat('<blockquote>', $q - $quote_level);
+ else if ($q < $quote_level)
+ $quotation = str_repeat("</blockquote>", $quote_level - $q);
}
+ else if ($quote_level > 0)
+ $quotation = str_repeat("</blockquote>", $quote_level);
+
+ $quote_level = $q;
+ $a_lines[$n] = $quotation . Q($line, 'replace', false); // htmlquote plaintext
}
+ // insert the links for urls and mailtos
+ $body = preg_replace("/##string_replacement\{([0-9]+)\}##/e", "\$replace_strings[\\1]", join("\n", $a_lines));
+
+ return "<div class=\"pre\">".$body."\n</div>";
+ }
-// add a string to the replacement array and return a replacement string
+
+/**
+ * add a string to the replacement array and return a replacement string
+ */
function rcmail_str_replacement($str, &$rep)
{
static $count = 0;
@@ -743,200 +617,10 @@ function rcmail_str_replacement($str, &$rep)
}
-function rcmail_parse_message(&$structure, $arg=array(), $recursive=FALSE)
- {
- global $IMAP;
- static $sa_inline_objects = array();
-
- // arguments are: (bool)$prefer_html, (string)$get_url
- extract($arg);
-
- $a_attachments = array();
- $a_return_parts = array();
- $out = '';
-
- $message_ctype_primary = strtolower($structure->ctype_primary);
- $message_ctype_secondary = strtolower($structure->ctype_secondary);
-
- // show message headers
- if ($recursive && is_array($structure->headers) && isset($structure->headers['subject']))
- {
- $c = new stdClass;
- $c->type = 'headers';
- $c->headers = &$structure->headers;
- $a_return_parts[] = $c;
- }
-
- // print body if message doesn't have multiple parts
- if ($message_ctype_primary=='text')
- {
- $structure->type = 'content';
- $a_return_parts[] = &$structure;
- }
-
- // message contains alternative parts
- else if ($message_ctype_primary=='multipart' && $message_ctype_secondary=='alternative' && is_array($structure->parts))
- {
- // get html/plaintext parts
- $plain_part = $html_part = $print_part = $related_part = NULL;
-
- foreach ($structure->parts as $p => $sub_part)
- {
- $rel_parts = $attachmnts = null;
- $sub_ctype_primary = strtolower($sub_part->ctype_primary);
- $sub_ctype_secondary = strtolower($sub_part->ctype_secondary);
-
- // check if sub part is
- if ($sub_ctype_primary=='text' && $sub_ctype_secondary=='plain')
- $plain_part = $p;
- else if ($sub_ctype_primary=='text' && $sub_ctype_secondary=='html')
- $html_part = $p;
- else if ($sub_ctype_primary=='text' && $sub_ctype_secondary=='enriched')
- $enriched_part = $p;
- else if ($sub_ctype_primary=='multipart' && ($sub_ctype_secondary=='related' || $sub_ctype_secondary=='mixed'))
- $related_part = $p;
- }
-
- // parse related part (alternative part could be in here)
- if ($related_part!==NULL)
- {
- list($rel_parts, $attachmnts) = rcmail_parse_message($structure->parts[$related_part], $arg, TRUE);
- $a_attachments = array_merge($a_attachments, $attachmnts);
- }
-
- // merge related parts if any
- if ($rel_parts && $prefer_html && !$html_part)
- $a_return_parts = array_merge($a_return_parts, $rel_parts);
-
- // choose html/plain part to print
- else if ($html_part!==NULL && $prefer_html)
- $print_part = &$structure->parts[$html_part];
- else if ($enriched_part!==NULL)
- $print_part = &$structure->parts[$enriched_part];
- else if ($plain_part!==NULL)
- $print_part = &$structure->parts[$plain_part];
-
- // show message body
- if (is_object($print_part))
- {
- $print_part->type = 'content';
- $a_return_parts[] = $print_part;
- }
- // show plaintext warning
- else if ($html_part!==NULL && empty($a_return_parts))
- {
- $c = new stdClass;
- $c->type = 'content';
- $c->body = rcube_label('htmlmessage');
- $c->ctype_primary = 'text';
- $c->ctype_secondary = 'plain';
-
- $a_return_parts[] = $c;
- }
-
- // add html part as attachment
- if ($html_part!==NULL && $structure->parts[$html_part]!==$print_part)
- {
- $html_part = &$structure->parts[$html_part];
- $html_part->filename = rcube_label('htmlmessage');
- $html_part->mimetype = 'text/html';
-
- $a_attachments[] = $html_part;
- }
- }
-
- // message contains multiple parts
- else if (is_array($structure->parts) && !empty($structure->parts))
- {
- for ($i=0; $i<count($structure->parts); $i++)
- {
- $mail_part = &$structure->parts[$i];
- $primary_type = strtolower($mail_part->ctype_primary);
- $secondary_type = strtolower($mail_part->ctype_secondary);
-
- // multipart/alternative
- if ($primary_type=='multipart')
- {
- list($parts, $attachmnts) = rcmail_parse_message($mail_part, $arg, TRUE);
-
- $a_return_parts = array_merge($a_return_parts, $parts);
- $a_attachments = array_merge($a_attachments, $attachmnts);
- }
-
- // part text/[plain|html] OR message/delivery-status
- else if (($primary_type=='text' && ($secondary_type=='plain' || $secondary_type=='html') && $mail_part->disposition!='attachment') ||
- ($primary_type=='message' && ($secondary_type=='delivery-status' || $secondary_type=='disposition-notification')))
- {
- $mail_part->type = 'content';
- $a_return_parts[] = $mail_part;
- }
-
- // part message/*
- else if ($primary_type=='message')
- {
- list($parts, $attachmnts) = rcmail_parse_message($mail_part, $arg, TRUE);
-
- $a_return_parts = array_merge($a_return_parts, $parts);
- $a_attachments = array_merge($a_attachments, $attachmnts);
- }
-
- // ignore "virtual" protocol parts
- else if ($primary_type=='protocol')
- continue;
-
- // part is file/attachment
- else if ($mail_part->disposition=='attachment' || $mail_part->disposition=='inline' || $mail_part->headers['content-id'] ||
- (empty($mail_part->disposition) && $mail_part->filename))
- {
- // skip apple resource forks
- if ($message_ctype_secondary=='appledouble' && $secondary_type=='applefile')
- continue;
-
- // part belongs to a related message
- if ($message_ctype_secondary=='related' && $mail_part->headers['content-id'])
- {
- $mail_part->content_id = preg_replace(array('/^</', '/>$/'), '', $mail_part->headers['content-id']);
- $sa_inline_objects[] = $mail_part;
- }
- // is regular attachment
- else
- {
- if (!$mail_part->filename)
- $mail_part->filename = 'Part '.$mail_part->mime_id;
- $a_attachments[] = $mail_part;
- }
- }
- }
-
- // if this was a related part try to resolve references
- if ($message_ctype_secondary=='related' && sizeof($sa_inline_objects))
- {
- $a_replaces = array();
-
- foreach ($sa_inline_objects as $inline_object)
- $a_replaces['cid:'.$inline_object->content_id] = htmlspecialchars(sprintf($get_url, $inline_object->mime_id));
-
- // add replace array to each content part
- // (will be applied later when part body is available)
- for ($i=0; $i<count($a_return_parts); $i++)
- {
- if ($a_return_parts[$i]->type=='content')
- $a_return_parts[$i]->replaces = $a_replaces;
- }
- }
- }
-
- // message is single part non-text
- else if ($structure->filename)
- $a_attachments[] = $structure;
-
- return array($a_return_parts, $a_attachments);
- }
-
-
-
-// return table with message headers
+/**
+ * return table with message headers
+ */
function rcmail_message_headers($attrib, $headers=NULL)
{
global $IMAP, $OUTPUT, $MESSAGE;
@@ -989,7 +673,9 @@ function rcmail_message_headers($attrib, $headers=NULL)
}
-
+/**
+ *
+ */
function rcmail_message_body($attrib)
{
global $CONFIG, $OUTPUT, $MESSAGE, $IMAP, $REMOTE_OBJECTS;
@@ -1028,7 +714,7 @@ function rcmail_message_body($attrib)
$out .= '<div class="message-part">';
if ($part->ctype_secondary != 'plain')
- $out .= rcmail_sanitize_html($body, $attrib['id']);
+ $out .= rcmail_html4inline($body, $attrib['id']);
else
$out .= $body;
@@ -1068,12 +754,11 @@ function rcmail_message_body($attrib)
-// modify a HTML message that it can be displayed inside a HTML page
-function rcmail_sanitize_html($body, $container_id)
+/**
+ * modify a HTML message that it can be displayed inside a HTML page
+ */
+function rcmail_html4inline($body, $container_id)
{
- // remove any null-byte characters before parsing
- $body = preg_replace('/\x00/', '', $body);
-
$base_url = "";
$last_style_pos = 0;
$body_lc = strtolower($body);
@@ -1095,26 +780,6 @@ function rcmail_sanitize_html($body, $container_id)
$last_style_pos = $pos2;
}
-
- // remove SCRIPT tags
- foreach (array('script', 'applet', 'object', 'embed', 'iframe') as $tag)
- {
- while (($pos = strpos($body_lc, '<'.$tag)) && (($pos2 = strpos($body_lc, '</'.$tag.'>', $pos)) || ($pos3 = strpos($body_lc, '>', $pos))))
- {
- $end = $pos2 ? $pos2 + strlen('</'.$tag.'>') : $pos3 + 1;
- $body = substr($body, 0, $pos) . substr($body, $end, strlen($body)-$end);
- $body_lc = strtolower($body);
- }
- }
-
- // replace event handlers on any object
- while ($body != $prev_body)
- {
- $prev_body = $body;
- $body = preg_replace('/(<[^!][^>]*\s)on(?:load|unload|click|dblclick|mousedown|mouseup|mouseover|mousemove|mouseout|focus|blur|keypress|keydown|keyup|submit|reset|select|change)=([^>]+>)/im', '$1__removed=$2', $body);
- $body = preg_replace('/(<[^!][^>]*\shref=["\']?)(javascript:)([^>]*?>)/im', '$1null:$3', $body);
- }
-
// resolve <base href>
if ($base_url)
{
@@ -1138,14 +803,8 @@ function rcmail_sanitize_html($body, $container_id)
$body);
$out = preg_replace(
- array(
- '/<body([^>]*)>/i',
- '/<\/body>/i',
- ),
- array(
- '<div class="rcmBody"\\1>',
- '</div>',
- ),
+ array('/<body([^>]*)>/i', '/<\/body>/i'),
+ array('<div class="rcmBody"\\1>', '</div>'),
$out);
// quote <? of php and xml files that are specified as text/html
@@ -1155,7 +814,9 @@ function rcmail_sanitize_html($body, $container_id)
}
-// parse link attributes and set correct target
+/**
+ * parse link attributes and set correct target
+ */
function rcmail_alter_html_link($tag, $attrs, $container_id)
{
$attrib = parse_attrib_string($attrs);
@@ -1176,7 +837,9 @@ function rcmail_alter_html_link($tag, $attrs, $container_id)
}
-// decode address string and re-format it as HTML links
+/**
+ * decode address string and re-format it as HTML links
+ */
function rcmail_address_string($input, $max=NULL, $addicon=NULL)
{
global $IMAP, $PRINT_MODE, $CONFIG, $OUTPUT, $EMAIL_ADDRESS_PATTERN;
@@ -1277,7 +940,9 @@ function rcmail_message_part_frame($attrib)
}
-// clear message composing settings
+/**
+ * clear message composing settings
+ */
function rcmail_compose_cleanup()
{
if (!isset($_SESSION['compose']))