summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAleksander Machniak <alec@alec.pl>2013-12-28 19:14:51 +0100
committerAleksander Machniak <alec@alec.pl>2013-12-28 19:14:51 +0100
commitc7250749ab01721b55a26d6badf4dcdbbdaf7309 (patch)
tree7e1c44bf470294545f73ad3ad7c52108631afa17
parente1bb65c6d7ed3aa7bd4be48b37f4f47931ef203d (diff)
Fix issue where deprecated syntax for HTML lists was not handled properly (#1488768)
-rw-r--r--CHANGELOG1
-rw-r--r--program/lib/Roundcube/rcube_washtml.php83
-rw-r--r--tests/Framework/Washtml.php43
3 files changed, 123 insertions, 4 deletions
diff --git a/CHANGELOG b/CHANGELOG
index 7a64d1830..7a625bce3 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,6 +1,7 @@
CHANGELOG Roundcube Webmail
===========================
+- Fix issue where deprecated syntax for HTML lists was not handled properly (#1488768)
- Display different icons when Trash folder is empty or full (#1485775)
- Remember last position of more headers switch (#1488323)
- Fix so message flags modified by another client are applied on the list on refresh (#1485186)
diff --git a/program/lib/Roundcube/rcube_washtml.php b/program/lib/Roundcube/rcube_washtml.php
index 9cf3c6222..5a5b3dc55 100644
--- a/program/lib/Roundcube/rcube_washtml.php
+++ b/program/lib/Roundcube/rcube_washtml.php
@@ -418,7 +418,7 @@ class rcube_washtml
$html = preg_replace($html_search, $html_replace, trim($html));
//-> Replace all of those weird MS Word quotes and other high characters
- $badwordchars=array(
+ $badwordchars = array(
"\xe2\x80\x98", // left single quote
"\xe2\x80\x99", // right single quote
"\xe2\x80\x9c", // left double quote
@@ -426,7 +426,7 @@ class rcube_washtml
"\xe2\x80\x94", // em dash
"\xe2\x80\xa6" // elipses
);
- $fixedwordchars=array(
+ $fixedwordchars = array(
"'",
"'",
'"',
@@ -434,7 +434,7 @@ class rcube_washtml
'&mdash;',
'...'
);
- $html = str_replace($badwordchars,$fixedwordchars, $html);
+ $html = str_replace($badwordchars, $fixedwordchars, $html);
// PCRE errors handling (#1486856), should we use something like for every preg_* use?
if ($html === null && ($preg_error = preg_last_error()) != PREG_NO_ERROR) {
@@ -462,6 +462,9 @@ class rcube_washtml
// Don't remove MSOutlook (<!-->) conditional comments (#1489004)
$html = preg_replace('/<!--[^->\[\n]+>/', '', $html);
+ // fix broken nested lists
+ self::fix_broken_lists($html);
+
// turn relative into absolute urls
$html = self::resolve_base($html);
@@ -500,5 +503,77 @@ class rcube_washtml
return $body;
}
-}
+ /**
+ * Fix broken nested lists, they are not handled properly by DOMDocument (#1488768)
+ */
+ public static function fix_broken_lists(&$html)
+ {
+ // do two rounds, one for <ol>, one for <ul>
+ foreach (array('ol', 'ul') as $tag) {
+ $pos = 0;
+ while (($pos = stripos($html, '<' . $tag, $pos)) !== false) {
+ $pos++;
+
+ // make sure this is an ol/ul tag
+ if (!in_array($html[$pos+2], array(' ', '>'))) {
+ continue;
+ }
+
+ $p = $pos;
+ $in_li = false;
+ $li_pos = 0;
+
+ while (($p = strpos($html, '<', $p)) !== false) {
+ $tt = strtolower(substr($html, $p, 4));
+
+ // li open tag
+ if ($tt == '<li>' || $tt == '<li ') {
+ $in_li = true;
+ $p += 4;
+ }
+ // li close tag
+ else if ($tt == '</li' && in_array($html[$p+4], array(' ', '>'))) {
+ $li_pos = $p;
+ $p += 4;
+ $in_li = false;
+ }
+ // ul/ol closing tag
+ else if ($tt == '</' . $tag && in_array($html[$p+4], array(' ', '>'))) {
+ break;
+ }
+ // nested ol/ul element out of li
+ else if (!$in_li && $li_pos && ($tt == '<ol>' || $tt == '<ol ' || $tt == '<ul>' || $tt == '<ul ')) {
+ // find closing tag of this ul/ol element
+ $element = substr($tt, 1, 2);
+ $cpos = $p;
+ do {
+ $tpos = stripos($html, '<' . $element, $cpos+1);
+ $cpos = stripos($html, '</' . $element, $cpos+1);
+ }
+ while ($tpos !== false && $cpos !== false && $cpos > $tpos);
+
+ // not found, this is invalid HTML, skip it
+ if ($cpos === false) {
+ break;
+ }
+
+ // get element content
+ $end = strpos($html, '>', $cpos);
+ $len = $end - $p + 1;
+ $element = substr($html, $p, $len);
+
+ // move element to the end of the last li
+ $html = substr_replace($html, '', $p, $len);
+ $html = substr_replace($html, $element, $li_pos, 0);
+
+ $p = $end;
+ }
+ else {
+ $p++;
+ }
+ }
+ }
+ }
+ }
+}
diff --git a/tests/Framework/Washtml.php b/tests/Framework/Washtml.php
index 0d050ff30..7485d4383 100644
--- a/tests/Framework/Washtml.php
+++ b/tests/Framework/Washtml.php
@@ -81,4 +81,47 @@ class Framework_Washtml extends PHPUnit_Framework_TestCase
$this->assertRegExp('|</a>|', $washed, "Invalid closing tag (#1489446)");
}
+ /**
+ * Test fixing of invalid lists nesting (#1488768)
+ */
+ function test_lists()
+ {
+ $data = array(
+ array(
+ "<ol><li>First</li><li>Second</li><ul><li>First sub</li></ul><li>Third</li></ol>",
+ "<ol><li>First</li><li>Second<ul><li>First sub</li></ul></li><li>Third</li></ol>"
+ ),
+ array(
+ "<ol><li>First<ul><li>First sub</li></ul></li></ol>",
+ "<ol><li>First<ul><li>First sub</li></ul></li></ol>",
+ ),
+ array(
+ "<ol><li>First<ol><li>First sub</li></ol></li></ol>",
+ "<ol><li>First<ol><li>First sub</li></ol></li></ol>",
+ ),
+ array(
+ "<ul><li>First</li><ul><li>First sub</li><ul><li>sub sub</li></ul></ul><li></li></ul>",
+ "<ul><li>First<ul><li>First sub<ul><li>sub sub</li></ul></li></ul></li><li></li></ul>",
+ ),
+ array(
+ "<ul><li>First</li><li>second</li><ul><ul><li>sub sub</li></ul></ul></ul>",
+ "<ul><li>First</li><li>second<ul><ul><li>sub sub</li></ul></ul></li></ul>",
+ ),
+ array(
+ "<ol><ol><ol></ol></ol></ol>",
+ "<ol><ol><ol></ol></ol></ol>",
+ ),
+ array(
+ "<div><ol><ol><ol></ol></ol></ol></div>",
+ "<div><ol><ol><ol></ol></ol></ol></div>",
+ ),
+ );
+
+ foreach ($data as $element) {
+ rcube_washtml::fix_broken_lists($element[0]);
+
+ $this->assertSame($element[1], $element[0], "Broken nested lists (#1488768)");
+ }
+ }
+
}