diff options
author | Aleksander Machniak <alec@alec.pl> | 2013-12-28 19:14:51 +0100 |
---|---|---|
committer | Aleksander Machniak <alec@alec.pl> | 2013-12-28 19:14:51 +0100 |
commit | c7250749ab01721b55a26d6badf4dcdbbdaf7309 (patch) | |
tree | 7e1c44bf470294545f73ad3ad7c52108631afa17 | |
parent | e1bb65c6d7ed3aa7bd4be48b37f4f47931ef203d (diff) |
Fix issue where deprecated syntax for HTML lists was not handled properly (#1488768)
-rw-r--r-- | CHANGELOG | 1 | ||||
-rw-r--r-- | program/lib/Roundcube/rcube_washtml.php | 83 | ||||
-rw-r--r-- | tests/Framework/Washtml.php | 43 |
3 files changed, 123 insertions, 4 deletions
@@ -1,6 +1,7 @@ CHANGELOG Roundcube Webmail =========================== +- Fix issue where deprecated syntax for HTML lists was not handled properly (#1488768) - Display different icons when Trash folder is empty or full (#1485775) - Remember last position of more headers switch (#1488323) - Fix so message flags modified by another client are applied on the list on refresh (#1485186) diff --git a/program/lib/Roundcube/rcube_washtml.php b/program/lib/Roundcube/rcube_washtml.php index 9cf3c6222..5a5b3dc55 100644 --- a/program/lib/Roundcube/rcube_washtml.php +++ b/program/lib/Roundcube/rcube_washtml.php @@ -418,7 +418,7 @@ class rcube_washtml $html = preg_replace($html_search, $html_replace, trim($html)); //-> Replace all of those weird MS Word quotes and other high characters - $badwordchars=array( + $badwordchars = array( "\xe2\x80\x98", // left single quote "\xe2\x80\x99", // right single quote "\xe2\x80\x9c", // left double quote @@ -426,7 +426,7 @@ class rcube_washtml "\xe2\x80\x94", // em dash "\xe2\x80\xa6" // elipses ); - $fixedwordchars=array( + $fixedwordchars = array( "'", "'", '"', @@ -434,7 +434,7 @@ class rcube_washtml '—', '...' ); - $html = str_replace($badwordchars,$fixedwordchars, $html); + $html = str_replace($badwordchars, $fixedwordchars, $html); // PCRE errors handling (#1486856), should we use something like for every preg_* use? if ($html === null && ($preg_error = preg_last_error()) != PREG_NO_ERROR) { @@ -462,6 +462,9 @@ class rcube_washtml // Don't remove MSOutlook (<!-->) conditional comments (#1489004) $html = preg_replace('/<!--[^->\[\n]+>/', '', $html); + // fix broken nested lists + self::fix_broken_lists($html); + // turn relative into absolute urls $html = self::resolve_base($html); @@ -500,5 +503,77 @@ class rcube_washtml return $body; } -} + /** + * Fix broken nested lists, they are not handled properly by DOMDocument (#1488768) + */ + public static function fix_broken_lists(&$html) + { + // do two rounds, one for <ol>, one for <ul> + foreach (array('ol', 'ul') as $tag) { + $pos = 0; + while (($pos = stripos($html, '<' . $tag, $pos)) !== false) { + $pos++; + + // make sure this is an ol/ul tag + if (!in_array($html[$pos+2], array(' ', '>'))) { + continue; + } + + $p = $pos; + $in_li = false; + $li_pos = 0; + + while (($p = strpos($html, '<', $p)) !== false) { + $tt = strtolower(substr($html, $p, 4)); + + // li open tag + if ($tt == '<li>' || $tt == '<li ') { + $in_li = true; + $p += 4; + } + // li close tag + else if ($tt == '</li' && in_array($html[$p+4], array(' ', '>'))) { + $li_pos = $p; + $p += 4; + $in_li = false; + } + // ul/ol closing tag + else if ($tt == '</' . $tag && in_array($html[$p+4], array(' ', '>'))) { + break; + } + // nested ol/ul element out of li + else if (!$in_li && $li_pos && ($tt == '<ol>' || $tt == '<ol ' || $tt == '<ul>' || $tt == '<ul ')) { + // find closing tag of this ul/ol element + $element = substr($tt, 1, 2); + $cpos = $p; + do { + $tpos = stripos($html, '<' . $element, $cpos+1); + $cpos = stripos($html, '</' . $element, $cpos+1); + } + while ($tpos !== false && $cpos !== false && $cpos > $tpos); + + // not found, this is invalid HTML, skip it + if ($cpos === false) { + break; + } + + // get element content + $end = strpos($html, '>', $cpos); + $len = $end - $p + 1; + $element = substr($html, $p, $len); + + // move element to the end of the last li + $html = substr_replace($html, '', $p, $len); + $html = substr_replace($html, $element, $li_pos, 0); + + $p = $end; + } + else { + $p++; + } + } + } + } + } +} diff --git a/tests/Framework/Washtml.php b/tests/Framework/Washtml.php index 0d050ff30..7485d4383 100644 --- a/tests/Framework/Washtml.php +++ b/tests/Framework/Washtml.php @@ -81,4 +81,47 @@ class Framework_Washtml extends PHPUnit_Framework_TestCase $this->assertRegExp('|</a>|', $washed, "Invalid closing tag (#1489446)"); } + /** + * Test fixing of invalid lists nesting (#1488768) + */ + function test_lists() + { + $data = array( + array( + "<ol><li>First</li><li>Second</li><ul><li>First sub</li></ul><li>Third</li></ol>", + "<ol><li>First</li><li>Second<ul><li>First sub</li></ul></li><li>Third</li></ol>" + ), + array( + "<ol><li>First<ul><li>First sub</li></ul></li></ol>", + "<ol><li>First<ul><li>First sub</li></ul></li></ol>", + ), + array( + "<ol><li>First<ol><li>First sub</li></ol></li></ol>", + "<ol><li>First<ol><li>First sub</li></ol></li></ol>", + ), + array( + "<ul><li>First</li><ul><li>First sub</li><ul><li>sub sub</li></ul></ul><li></li></ul>", + "<ul><li>First<ul><li>First sub<ul><li>sub sub</li></ul></li></ul></li><li></li></ul>", + ), + array( + "<ul><li>First</li><li>second</li><ul><ul><li>sub sub</li></ul></ul></ul>", + "<ul><li>First</li><li>second<ul><ul><li>sub sub</li></ul></ul></li></ul>", + ), + array( + "<ol><ol><ol></ol></ol></ol>", + "<ol><ol><ol></ol></ol></ol>", + ), + array( + "<div><ol><ol><ol></ol></ol></ol></div>", + "<div><ol><ol><ol></ol></ol></ol></div>", + ), + ); + + foreach ($data as $element) { + rcube_washtml::fix_broken_lists($element[0]); + + $this->assertSame($element[1], $element[0], "Broken nested lists (#1488768)"); + } + } + } |