From 0fa54df638a0b0f514d1bfba3cefb93e38991a35 Mon Sep 17 00:00:00 2001
From: Aleksander Machniak
Date: Sat, 1 Dec 2012 20:02:34 +0100
Subject: enriched.inc -> rcube_enriched
---
tests/Framework/Enriched.php | 74 ++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 74 insertions(+)
create mode 100644 tests/Framework/Enriched.php
(limited to 'tests/Framework')
diff --git a/tests/Framework/Enriched.php b/tests/Framework/Enriched.php
new file mode 100644
index 000000000..26bbc3b4e
--- /dev/null
+++ b/tests/Framework/Enriched.php
@@ -0,0 +1,74 @@
+assertInstanceOf('rcube_enriched', $object, "Class constructor");
+ }
+
+ /**
+ * Test to_html()
+ */
+ function test_to_html()
+ {
+ $enriched = 'the-text';
+ $expected = 'the-text';
+ $result = rcube_enriched::to_html($enriched);
+
+ $this->assertSame($expected, $result);
+ }
+
+ /**
+ * Data for test_formatting()
+ */
+ function data_formatting()
+ {
+ return array(
+ array('', ''),
+ array('', ''),
+ array('', ''),
+ array('', ''),
+ array('', ''),
+ array('', ''),
+ array('', ''),
+ array('', ''),
+ array('', ''),
+ array('', ''),
+ array('', ''),
+ array('', ''),
+ array('', ''),
+ array('', ''),
+ array('', ''),
+ array('', ''),
+ array('', ''),
+ array('', ''),
+ array('', ''),
+ array('', ''),
+ array('', ''),
+ array('', ''),
+ );
+ }
+
+ /**
+ * Test formatting conversion
+ * @dataProvider data_formatting
+ */
+ function test_formatting($enriched, $expected)
+ {
+ $result = rcube_enriched::to_html($enriched);
+
+ $this->assertSame($expected, $result);
+ }
+}
--
cgit v1.2.3
From a079269166d120bcbcb33d34f4b1c8f60d102e32 Mon Sep 17 00:00:00 2001
From: Aleksander Machniak
Date: Thu, 20 Dec 2012 08:53:48 +0100
Subject: Fix version comparisons with -stable suffix (#1488876)
---
CHANGELOG | 1 +
bin/installto.sh | 2 +-
bin/update.sh | 4 ++--
installer/rcube_install.php | 6 +++---
program/lib/Roundcube/bootstrap.php | 16 ++++++++++++++++
tests/Framework/Bootstrap.php | 8 ++++++++
6 files changed, 31 insertions(+), 6 deletions(-)
(limited to 'tests/Framework')
diff --git a/CHANGELOG b/CHANGELOG
index c4e63aaeb..02e20455c 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,6 +1,7 @@
CHANGELOG Roundcube Webmail
===========================
+- Fix version comparisons with -stable suffix (#1488876)
- Add unsupported alternative parts to attachments list (#1488870)
- Add Compose button on message view page (#1488747)
- Display 'Sender' header in message preview
diff --git a/bin/installto.sh b/bin/installto.sh
index de96bf004..e6cf79d7d 100755
--- a/bin/installto.sh
+++ b/bin/installto.sh
@@ -35,7 +35,7 @@ if (!preg_match('/define\(.RCMAIL_VERSION.,\s*.([0-9.]+[a-z-]*)/', $iniset, $m))
$oldversion = $m[1];
-if (version_compare($oldversion, RCMAIL_VERSION, '>='))
+if (version_compare(version_parse($oldversion), version_parse(RCMAIL_VERSION), '>='))
die("Installation at target location is up-to-date!\n");
echo "Upgrading from $oldversion. Do you want to continue? (y/N)\n";
diff --git a/bin/update.sh b/bin/update.sh
index 59aa596dd..2015aa904 100755
--- a/bin/update.sh
+++ b/bin/update.sh
@@ -34,7 +34,7 @@ if (!$opts['version']) {
$opts['version'] = $input;
}
-if ($opts['version'] && version_compare($opts['version'], RCMAIL_VERSION, '>'))
+if ($opts['version'] && version_compare(version_parse($opts['version']), version_parse(RCMAIL_VERSION), '>'))
die("Nothing to be done here. Bye!\n");
@@ -169,7 +169,7 @@ if ($RCI->configured) {
}
// index contacts for fulltext searching
- if (version_compare($opts['version'], '0.6', '<')) {
+ if (version_compare(version_parse($opts['version']), '0.6.0', '<')) {
system(INSTALL_PATH . 'bin/indexcontacts.sh');
}
diff --git a/installer/rcube_install.php b/installer/rcube_install.php
index dfd63562d..530be3ebe 100644
--- a/installer/rcube_install.php
+++ b/installer/rcube_install.php
@@ -633,8 +633,8 @@ class rcube_install
*/
function update_db($DB, $version)
{
- $version = strtolower($version);
- $engine = isset($this->db_map[$DB->db_provider]) ? $this->db_map[$DB->db_provider] : $DB->db_provider;
+ $version = version_parse(strtolower($version));
+ $engine = isset($this->db_map[$DB->db_provider]) ? $this->db_map[$DB->db_provider] : $DB->db_provider;
// read schema file from /SQL/*
$fname = INSTALL_PATH . "SQL/$engine.update.sql";
@@ -643,7 +643,7 @@ class rcube_install
foreach ($lines as $line) {
$is_comment = preg_match('/^--/', $line);
if (!$from && $is_comment && preg_match('/from version\s([0-9.]+[a-z-]*)/', $line, $m)) {
- $v = strtolower($m[1]);
+ $v = version_parse(strtolower($m[1]));
if ($v == $version || version_compare($version, $v, '<='))
$from = true;
}
diff --git a/program/lib/Roundcube/bootstrap.php b/program/lib/Roundcube/bootstrap.php
index 18c07ddab..530a7f855 100644
--- a/program/lib/Roundcube/bootstrap.php
+++ b/program/lib/Roundcube/bootstrap.php
@@ -357,6 +357,22 @@ function format_email($email)
}
+/**
+ * Fix version number so it can be used correctly in version_compare()
+ *
+ * @param string $version Version number string
+ *
+ * @param return Version number string
+ */
+function version_parse($version)
+{
+ return str_replace(
+ array('-stable', '-git'),
+ array('.0', '.99'),
+ $version);
+}
+
+
/**
* mbstring replacement functions
*/
diff --git a/tests/Framework/Bootstrap.php b/tests/Framework/Bootstrap.php
index d18fd371b..904be7e3b 100644
--- a/tests/Framework/Bootstrap.php
+++ b/tests/Framework/Bootstrap.php
@@ -207,4 +207,12 @@ class Framework_Bootstrap extends PHPUnit_Framework_TestCase
$this->assertFalse($result, "Invalid ASCII (UTF-8 character [2])");
}
+ /**
+ * bootstrap.php: version_parse()
+ */
+ function test_version_parse()
+ {
+ $this->assertEquals('0.9.0', version_parse('0.9-stable'));
+ $this->assertEquals('0.9.99', version_parse('0.9-git'));
+ }
}
--
cgit v1.2.3
From 0931a97c5fc7231df99fdf4cdeebb525392886ed Mon Sep 17 00:00:00 2001
From: Aleksander Machniak
Date: Sun, 23 Dec 2012 15:09:56 +0100
Subject: Fix handling of parentheses in URLs
---
program/lib/Roundcube/rcube_string_replacer.php | 27 ++++++++++++++++++++++++-
tests/Framework/StringReplacer.php | 6 ++++++
2 files changed, 32 insertions(+), 1 deletion(-)
(limited to 'tests/Framework')
diff --git a/program/lib/Roundcube/rcube_string_replacer.php b/program/lib/Roundcube/rcube_string_replacer.php
index 0fe982b26..6b289886b 100644
--- a/program/lib/Roundcube/rcube_string_replacer.php
+++ b/program/lib/Roundcube/rcube_string_replacer.php
@@ -36,7 +36,7 @@ class rcube_string_replacer
// Support unicode/punycode in top-level domain part
$utf_domain = '[^?&@"\'\\/()\s\r\t\n]+\\.?([^\\x00-\\x2f\\x3b-\\x40\\x5b-\\x60\\x7b-\\x7f]{2,}|xn--[a-zA-Z0-9]{2,})';
$url1 = '.:;,';
- $url2 = 'a-zA-Z0-9%=#$@+?!&\\/_~\\[\\]{}\*-';
+ $url2 = 'a-zA-Z0-9%=#$@+?!&\\/_~\\[\\]\\(\\){}\*-';
$this->link_pattern = "/([\w]+:\/\/|\W[Ww][Ww][Ww]\.|^[Ww][Ww][Ww]\.)($utf_domain([$url1]?[$url2]+)*)/";
$this->mailto_pattern = "/("
@@ -161,6 +161,9 @@ class rcube_string_replacer
// "http://example.com/?a[b]=c". However we need to handle
// properly situation when a bracket is placed at the end
// of the link e.g. "[http://example.com]"
+ // Yes, this is not perfect handles correctly only paired characters
+ // but it should work for common cases
+
if (preg_match('/(\\[|\\])/', $url)) {
$in = false;
for ($i=0, $len=strlen($url); $i<$len; $i++) {
@@ -182,6 +185,28 @@ class rcube_string_replacer
}
}
+ // Do the same for parentheses
+ if (preg_match('/(\\(|\\))/', $url)) {
+ $in = false;
+ for ($i=0, $len=strlen($url); $i<$len; $i++) {
+ if ($url[$i] == '(') {
+ if ($in)
+ break;
+ $in = true;
+ }
+ else if ($url[$i] == ')') {
+ if (!$in)
+ break;
+ $in = false;
+ }
+ }
+
+ if ($i < $len) {
+ $suffix = substr($url, $i);
+ $url = substr($url, 0, $i);
+ }
+ }
+
return $suffix;
}
}
diff --git a/tests/Framework/StringReplacer.php b/tests/Framework/StringReplacer.php
index a76ba00ee..60399cf6b 100644
--- a/tests/Framework/StringReplacer.php
+++ b/tests/Framework/StringReplacer.php
@@ -29,6 +29,12 @@ class Framework_StringReplacer extends PHPUnit_Framework_TestCase
array('Start http://localhost/?foo End', 'Start http://localhost/?foo End'),
array('www.domain.tld', 'www.domain.tld'),
array('WWW.DOMAIN.TLD', 'WWW.DOMAIN.TLD'),
+ array('[http://link.com]', '[http://link.com]'),
+ array('http://link.com?a[]=1', 'http://link.com?a[]=1'),
+ array('http://link.com?a[]', 'http://link.com?a[]'),
+ array('(http://link.com)', '(http://link.com)'),
+ array('http://link.com?a(b)c', 'http://link.com?a(b)c'),
+ array('http://link.com?(link)', 'http://link.com?(link)'),
);
}
--
cgit v1.2.3
From 7ac94421bf85eb04c00c5ed05390e1ea0c6bcb0b Mon Sep 17 00:00:00 2001
From: Aleksander Machniak
Date: Tue, 25 Dec 2012 18:06:17 +0100
Subject: Move washtml class into Roundcube Framework (rcube_washtml), add some
improvements
---
program/include/bc.php | 4 +
program/lib/Roundcube/rcube_washtml.php | 451 ++++++++++++++++++++++++++++++++
program/lib/washtml.php | 330 -----------------------
program/steps/mail/func.inc | 68 +----
tests/Framework/Washtml.php | 28 ++
tests/MailFunc.php | 2 +-
tests/phpunit.xml | 1 +
7 files changed, 486 insertions(+), 398 deletions(-)
create mode 100644 program/lib/Roundcube/rcube_washtml.php
delete mode 100644 program/lib/washtml.php
create mode 100644 tests/Framework/Washtml.php
(limited to 'tests/Framework')
diff --git a/program/include/bc.php b/program/include/bc.php
index dc4d54fd7..05d15b9e3 100644
--- a/program/include/bc.php
+++ b/program/include/bc.php
@@ -408,3 +408,7 @@ function enriched_to_html($data)
class rcube_html_page extends rcmail_html_page
{
}
+
+class washtml extends rcube_washtml
+{
+}
diff --git a/program/lib/Roundcube/rcube_washtml.php b/program/lib/Roundcube/rcube_washtml.php
new file mode 100644
index 000000000..715c46047
--- /dev/null
+++ b/program/lib/Roundcube/rcube_washtml.php
@@ -0,0 +1,451 @@
+ |
+ | Author: Aleksander Machniak |
+ | Author: Frederic Motte |
+ +-----------------------------------------------------------------------+
+ */
+
+/**
+ * Washtml, a HTML sanityzer.
+ *
+ * Copyright (c) 2007 Frederic Motte
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * OVERVIEW:
+ *
+ * Wahstml take an untrusted HTML and return a safe html string.
+ *
+ * SYNOPSIS:
+ *
+ * $washer = new washtml($config);
+ * $washer->wash($html);
+ * It return a sanityzed string of the $html parameter without html and head tags.
+ * $html is a string containing the html code to wash.
+ * $config is an array containing options:
+ * $config['allow_remote'] is a boolean to allow link to remote images.
+ * $config['blocked_src'] string with image-src to be used for blocked remote images
+ * $config['show_washed'] is a boolean to include washed out attributes as x-washed
+ * $config['cid_map'] is an array where cid urls index urls to replace them.
+ * $config['charset'] is a string containing the charset of the HTML document if it is not defined in it.
+ * $washer->extlinks is a reference to a boolean that is set to true if remote images were removed. (FE: show remote images link)
+ *
+ * INTERNALS:
+ *
+ * Only tags and attributes in the static lists $html_elements and $html_attributes
+ * are kept, inline styles are also filtered: all style identifiers matching
+ * /[a-z\-]/i are allowed. Values matching colors, sizes, /[a-z\-]/i and safe
+ * urls if allowed and cid urls if mapped are kept.
+ *
+ * Roundcube Changes:
+ * - added $block_elements
+ * - changed $ignore_elements behaviour
+ * - added RFC2397 support
+ * - base URL support
+ * - invalid HTML comments removal before parsing
+ * - "fixing" unitless CSS values for XHTML output
+ * - base url resolving
+ */
+
+/**
+ * Utility class providing HTML sanityzer
+ *
+ * @package Framework
+ * @subpackage Utils
+ */
+class rcube_washtml
+{
+ /* Allowed HTML elements (default) */
+ static $html_elements = array('a', 'abbr', 'acronym', 'address', 'area', 'b',
+ 'basefont', 'bdo', 'big', 'blockquote', 'br', 'caption', 'center',
+ 'cite', 'code', 'col', 'colgroup', 'dd', 'del', 'dfn', 'dir', 'div', 'dl',
+ 'dt', 'em', 'fieldset', 'font', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i',
+ 'ins', 'label', 'legend', 'li', 'map', 'menu', 'nobr', 'ol', 'p', 'pre', 'q',
+ 's', 'samp', 'small', 'span', 'strike', 'strong', 'sub', 'sup', 'table',
+ 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr', 'tt', 'u', 'ul', 'var', 'wbr', 'img',
+ // form elements
+ 'button', 'input', 'textarea', 'select', 'option', 'optgroup'
+ );
+
+ /* Ignore these HTML tags and their content */
+ static $ignore_elements = array('script', 'applet', 'embed', 'object', 'style');
+
+ /* Allowed HTML attributes */
+ static $html_attribs = array('name', 'class', 'title', 'alt', 'width', 'height',
+ 'align', 'nowrap', 'col', 'row', 'id', 'rowspan', 'colspan', 'cellspacing',
+ 'cellpadding', 'valign', 'bgcolor', 'color', 'border', 'bordercolorlight',
+ 'bordercolordark', 'face', 'marginwidth', 'marginheight', 'axis', 'border',
+ 'abbr', 'char', 'charoff', 'clear', 'compact', 'coords', 'vspace', 'hspace',
+ 'cellborder', 'size', 'lang', 'dir', 'usemap', 'shape', 'media',
+ // attributes of form elements
+ 'type', 'rows', 'cols', 'disabled', 'readonly', 'checked', 'multiple', 'value'
+ );
+
+ /* Block elements which could be empty but cannot be returned in short form () */
+ static $block_elements = array('div', 'p', 'pre', 'blockquote', 'a', 'font', 'center',
+ 'table', 'ul', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'ol', 'dl', 'strong',
+ 'i', 'b', 'u', 'span',
+ );
+
+ /* State for linked objects in HTML */
+ public $extlinks = false;
+
+ /* Current settings */
+ private $config = array();
+
+ /* Registered callback functions for tags */
+ private $handlers = array();
+
+ /* Allowed HTML elements */
+ private $_html_elements = array();
+
+ /* Ignore these HTML tags but process their content */
+ private $_ignore_elements = array();
+
+ /* Block elements which could be empty but cannot be returned in short form () */
+ private $_block_elements = array();
+
+ /* Allowed HTML attributes */
+ private $_html_attribs = array();
+
+
+ /**
+ * Class constructor
+ */
+ public function __construct($p = array())
+ {
+ $this->_html_elements = array_flip((array)$p['html_elements']) + array_flip(self::$html_elements) ;
+ $this->_html_attribs = array_flip((array)$p['html_attribs']) + array_flip(self::$html_attribs);
+ $this->_ignore_elements = array_flip((array)$p['ignore_elements']) + array_flip(self::$ignore_elements);
+ $this->_block_elements = array_flip((array)$p['block_elements']) + array_flip(self::$block_elements);
+
+ unset($p['html_elements'], $p['html_attribs'], $p['ignore_elements'], $p['block_elements']);
+
+ $this->config = $p + array('show_washed' => true, 'allow_remote' => false, 'cid_map' => array());
+ }
+
+ /**
+ * Register a callback function for a certain tag
+ */
+ public function add_callback($tagName, $callback)
+ {
+ $this->handlers[$tagName] = $callback;
+ }
+
+ /**
+ * Check CSS style
+ */
+ private function wash_style($style)
+ {
+ $s = '';
+
+ foreach (explode(';', $style) as $declaration) {
+ if (preg_match('/^\s*([a-z\-]+)\s*:\s*(.*)\s*$/i', $declaration, $match)) {
+ $cssid = $match[1];
+ $str = $match[2];
+ $value = '';
+
+ while (sizeof($str) > 0 &&
+ preg_match('/^(url\(\s*[\'"]?([^\'"\)]*)[\'"]?\s*\)'./*1,2*/
+ '|rgb\(\s*[0-9]+\s*,\s*[0-9]+\s*,\s*[0-9]+\s*\)'.
+ '|-?[0-9.]+\s*(em|ex|px|cm|mm|in|pt|pc|deg|rad|grad|ms|s|hz|khz|%)?'.
+ '|#[0-9a-f]{3,6}'.
+ '|[a-z0-9", -]+'.
+ ')\s*/i', $str, $match)
+ ) {
+ if ($match[2]) {
+ if (($src = $this->config['cid_map'][$match[2]])
+ || ($src = $this->config['cid_map'][$this->config['base_url'].$match[2]])
+ ) {
+ $value .= ' url('.htmlspecialchars($src, ENT_QUOTES) . ')';
+ }
+ else if (preg_match('!^(https?:)?//[a-z0-9/._+-]+$!i', $match[2], $url)) {
+ if ($this->config['allow_remote']) {
+ $value .= ' url('.htmlspecialchars($url[0], ENT_QUOTES).')';
+ }
+ else {
+ $this->extlinks = true;
+ }
+ }
+ else if (preg_match('/^data:.+/i', $match[2])) { // RFC2397
+ $value .= ' url('.htmlspecialchars($match[2], ENT_QUOTES).')';
+ }
+ }
+ else {
+ // whitelist ?
+ $value .= ' ' . $match[0];
+
+ // #1488535: Fix size units, so width:800 would be changed to width:800px
+ if (preg_match('/(left|right|top|bottom|width|height)/i', $cssid)
+ && preg_match('/^[0-9]+$/', $match[0])
+ ) {
+ $value .= 'px';
+ }
+ }
+
+ $str = substr($str, strlen($match[0]));
+ }
+
+ if (isset($value[0])) {
+ $s .= ($s?' ':'') . $cssid . ':' . $value . ';';
+ }
+ }
+ }
+
+ return $s;
+ }
+
+ /**
+ * Take a node and return allowed attributes and check values
+ */
+ private function wash_attribs($node)
+ {
+ $t = '';
+ $washed = '';
+
+ foreach ($node->attributes as $key => $plop) {
+ $key = strtolower($key);
+ $value = $node->getAttribute($key);
+
+ if (isset($this->_html_attribs[$key]) ||
+ ($key == 'href' && !preg_match('!^(javascript|vbscript|data:text)!i', $value)
+ && preg_match('!^([a-z][a-z0-9.+-]+:|//|#).+!i', $value))
+ ) {
+ $t .= ' ' . $key . '="' . htmlspecialchars($value, ENT_QUOTES) . '"';
+ }
+ else if ($key == 'style' && ($style = $this->wash_style($value))) {
+ $quot = strpos($style, '"') !== false ? "'" : '"';
+ $t .= ' style=' . $quot . $style . $quot;
+ }
+ else if ($key == 'background' || ($key == 'src' && strtolower($node->tagName) == 'img')) { //check tagName anyway
+ if (($src = $this->config['cid_map'][$value])
+ || ($src = $this->config['cid_map'][$this->config['base_url'].$value])
+ ) {
+ $t .= ' ' . $key . '="' . htmlspecialchars($src, ENT_QUOTES) . '"';
+ }
+ else if (preg_match('/^(http|https|ftp):.+/i', $value)) {
+ if ($this->config['allow_remote']) {
+ $t .= ' ' . $key . '="' . htmlspecialchars($value, ENT_QUOTES) . '"';
+ }
+ else {
+ $this->extlinks = true;
+ if ($this->config['blocked_src']) {
+ $t .= ' ' . $key . '="' . htmlspecialchars($this->config['blocked_src'], ENT_QUOTES) . '"';
+ }
+ }
+ }
+ else if (preg_match('/^data:.+/i', $value)) { // RFC2397
+ $t .= ' ' . $key . '="' . htmlspecialchars($value, ENT_QUOTES) . '"';
+ }
+ }
+ else {
+ $washed .= ($washed ? ' ' : '') . $key;
+ }
+ }
+
+ return $t . ($washed && $this->config['show_washed'] ? ' x-washed="'.$washed.'"' : '');
+ }
+
+ /**
+ * The main loop that recurse on a node tree.
+ * It output only allowed tags with allowed attributes
+ * and allowed inline styles
+ */
+ private function dumpHtml($node)
+ {
+ if (!$node->hasChildNodes()) {
+ return '';
+ }
+
+ $node = $node->firstChild;
+ $dump = '';
+
+ do {
+ switch($node->nodeType) {
+ case XML_ELEMENT_NODE: //Check element
+ $tagName = strtolower($node->tagName);
+ if ($callback = $this->handlers[$tagName]) {
+ $dump .= call_user_func($callback, $tagName,
+ $this->wash_attribs($node), $this->dumpHtml($node), $this);
+ }
+ else if (isset($this->_html_elements[$tagName])) {
+ $content = $this->dumpHtml($node);
+ $dump .= '<' . $tagName . $this->wash_attribs($node) .
+ ($content != '' || isset($this->_block_elements[$tagName]) ? ">$content$tagName>" : ' />');
+ }
+ else if (isset($this->_ignore_elements[$tagName])) {
+ $dump .= '';
+ }
+ else {
+ $dump .= '';
+ $dump .= $this->dumpHtml($node); // ignore tags not its content
+ }
+ break;
+
+ case XML_CDATA_SECTION_NODE:
+ $dump .= $node->nodeValue;
+ break;
+
+ case XML_TEXT_NODE:
+ $dump .= htmlspecialchars($node->nodeValue);
+ break;
+
+ case XML_HTML_DOCUMENT_NODE:
+ $dump .= $this->dumpHtml($node);
+ break;
+
+ case XML_DOCUMENT_TYPE_NODE:
+ break;
+
+ default:
+ $dump . '';
+ }
+ } while($node = $node->nextSibling);
+
+ return $dump;
+ }
+
+ /**
+ * Main function, give it untrusted HTML, tell it if you allow loading
+ * remote images and give it a map to convert "cid:" urls.
+ */
+ public function wash($html)
+ {
+ // Charset seems to be ignored (probably if defined in the HTML document)
+ $node = new DOMDocument('1.0', $this->config['charset']);
+ $this->extlinks = false;
+
+ $html = $this->cleanup($html);
+
+ // Find base URL for images
+ if (preg_match('/config['base_url'] = $matches[1];
+ }
+ else {
+ $this->config['base_url'] = '';
+ }
+
+ @$node->loadHTML($html);
+ return $this->dumpHtml($node);
+ }
+
+ /**
+ * Getter for config parameters
+ */
+ public function get_config($prop)
+ {
+ return $this->config[$prop];
+ }
+
+ /**
+ * Clean HTML input
+ */
+ private function cleanup($html)
+ {
+ // special replacements (not properly handled by washtml class)
+ $html_search = array(
+ '/(<\/nobr>)(\s+)()/i', // space(s) between
+ '/]*>[^<]*<\/title>/i', // PHP bug #32547 workaround: remove title tag
+ '/^(\0\0\xFE\xFF|\xFF\xFE\0\0|\xFE\xFF|\xFF\xFE|\xEF\xBB\xBF)/', // byte-order mark (only outlook?)
+ '/]+>/i', // washtml/DOMDocument cannot handle xml namespaces
+ );
+
+ $html_replace = array(
+ '\\1'.' '.'\\3',
+ '',
+ '',
+ '',
+ );
+ $html = preg_replace($html_search, $html_replace, trim($html));
+
+ // PCRE errors handling (#1486856), should we use something like for every preg_* use?
+ if ($html === null && ($preg_error = preg_last_error()) != PREG_NO_ERROR) {
+ $errstr = "Could not clean up HTML message! PCRE Error: $preg_error.";
+
+ if ($preg_error == PREG_BACKTRACK_LIMIT_ERROR) {
+ $errstr .= " Consider raising pcre.backtrack_limit!";
+ }
+ if ($preg_error == PREG_RECURSION_LIMIT_ERROR) {
+ $errstr .= " Consider raising pcre.recursion_limit!";
+ }
+
+ rcube::raise_error(array('code' => 620, 'type' => 'php',
+ 'line' => __LINE__, 'file' => __FILE__,
+ 'message' => $errstr), true, false);
+ return '';
+ }
+
+ // fix (unknown/malformed) HTML tags before "wash"
+ $html = preg_replace_callback('/(<[\/]*)([^\s>]+)/', array($this, 'html_tag_callback'), $html);
+
+ // Remove invalid HTML comments (#1487759)
+ // Don't remove valid conditional comments
+ $html = preg_replace('/';
- }
- else {
- $dump .= '';
- $dump .= $this->dumpHtml($node); // ignore tags not its content
- }
- break;
- case XML_CDATA_SECTION_NODE:
- $dump .= $node->nodeValue;
- break;
- case XML_TEXT_NODE:
- $dump .= htmlspecialchars($node->nodeValue);
- break;
- case XML_HTML_DOCUMENT_NODE:
- $dump .= $this->dumpHtml($node);
- break;
- case XML_DOCUMENT_TYPE_NODE:
- break;
- default:
- $dump . '';
- }
- } while($node = $node->nextSibling);
-
- return $dump;
- }
-
- /* Main function, give it untrusted HTML, tell it if you allow loading
- * remote images and give it a map to convert "cid:" urls. */
- public function wash($html)
- {
- // Charset seems to be ignored (probably if defined in the HTML document)
- $node = new DOMDocument('1.0', $this->config['charset']);
- $this->extlinks = false;
-
- // Find base URL for images
- if (preg_match('/config['base_url'] = $matches[1];
- else
- $this->config['base_url'] = '';
-
- // Remove invalid HTML comments (#1487759)
- // Don't remove valid conditional comments
- $html = preg_replace('//', $html, "No external links allowed");
- $this->assertRegExp('/]+ target="_blank">/', $html, "Set target to _blank");
+ $this->assertRegExp('/]+ target="_blank"/', $html, "Set target to _blank");
$this->assertTrue($GLOBALS['REMOTE_OBJECTS'], "Remote object detected");
// render HTML in safe mode
@@ -133,8 +133,8 @@ class MailFunc extends PHPUnit_Framework_TestCase
$html = rcmail_print_body($part, array('safe' => true));
$this->assertRegExp('/nobody@roundcube.net<\/a>/', $html, "Mailto links with onclick");
- $this->assertRegExp('#http://www.apple.com/legal/privacy#', $html, "Links with target=_blank");
- $this->assertRegExp('#\\[http://example.com/\\?tx\\[a\\]=5\\]#', $html, "Links with square brackets");
+ $this->assertRegExp('#http://www.apple.com/legal/privacy#', $html, "Links with target=_blank");
+ $this->assertRegExp('#\\[http://example.com/\\?tx\\[a\\]=5\\]#', $html, "Links with square brackets");
}
/**
@@ -148,7 +148,7 @@ class MailFunc extends PHPUnit_Framework_TestCase
$html = rcmail_html4inline(rcmail_print_body($part, array('safe' => false)), 'foo');
$mailto = 'e-mail';
+ .' onclick="return rcmail.command(\'compose\',\'me@me.com?subject=this is the subject&body=this is the body\',this)" rel="noreferrer">e-mail';
$this->assertRegExp('|'.preg_quote($mailto, '|').'|', $html, "Extended mailto links");
}
--
cgit v1.2.3
From 1e2468e4b9e9fb44055757a5460df1a6bf31313b Mon Sep 17 00:00:00 2001
From: Aleksander Machniak
Date: Fri, 22 Mar 2013 10:23:39 +0100
Subject: Added two tests for HTML comments handling in rcube_washtml class
---
tests/Framework/Washtml.php | 20 ++++++++++++++++++--
1 file changed, 18 insertions(+), 2 deletions(-)
(limited to 'tests/Framework')
diff --git a/tests/Framework/Washtml.php b/tests/Framework/Washtml.php
index 6f4aa9783..cd443266f 100644
--- a/tests/Framework/Washtml.php
+++ b/tests/Framework/Washtml.php
@@ -18,7 +18,6 @@ class Framework_Washtml extends PHPUnit_Framework_TestCase
.'Internet Explorer
';
$washer = new rcube_washtml;
-
$washed = $washer->wash($html);
$this->assertNotRegExp('/data:text/', $washed, "Remove data:text/html links");
@@ -33,10 +32,27 @@ class Framework_Washtml extends PHPUnit_Framework_TestCase
$html = "Firefox";
$washer = new rcube_washtml;
-
$washed = $washer->wash($html);
$this->assertRegExp('|href="http://test.com">|', $washed, "Link href with newlines (#1488940)");
}
+ /**
+ * Test handling HTML comments
+ */
+ function test_comments()
+ {
+ $washer = new rcube_washtml;
+
+ $html = "
p2
";
+ $washed = $washer->wash($html);
+
+ $this->assertEquals('p2
', $washed, "HTML conditional comments (#1489004)");
+
+ $html = "test
', $washed, "HTML invalid comments (#1487759)");
+ }
+
}
--
cgit v1.2.3