diff options
author | Aleksander Machniak <alec@alec.pl> | 2013-12-28 19:14:51 +0100 |
---|---|---|
committer | Aleksander Machniak <alec@alec.pl> | 2013-12-28 19:14:51 +0100 |
commit | c7250749ab01721b55a26d6badf4dcdbbdaf7309 (patch) | |
tree | 7e1c44bf470294545f73ad3ad7c52108631afa17 /program/lib/Roundcube/rcube_washtml.php | |
parent | e1bb65c6d7ed3aa7bd4be48b37f4f47931ef203d (diff) |
Fix issue where deprecated syntax for HTML lists was not handled properly (#1488768)
Diffstat (limited to 'program/lib/Roundcube/rcube_washtml.php')
-rw-r--r-- | program/lib/Roundcube/rcube_washtml.php | 83 |
1 files changed, 79 insertions, 4 deletions
diff --git a/program/lib/Roundcube/rcube_washtml.php b/program/lib/Roundcube/rcube_washtml.php index 9cf3c6222..5a5b3dc55 100644 --- a/program/lib/Roundcube/rcube_washtml.php +++ b/program/lib/Roundcube/rcube_washtml.php @@ -418,7 +418,7 @@ class rcube_washtml $html = preg_replace($html_search, $html_replace, trim($html)); //-> Replace all of those weird MS Word quotes and other high characters - $badwordchars=array( + $badwordchars = array( "\xe2\x80\x98", // left single quote "\xe2\x80\x99", // right single quote "\xe2\x80\x9c", // left double quote @@ -426,7 +426,7 @@ class rcube_washtml "\xe2\x80\x94", // em dash "\xe2\x80\xa6" // elipses ); - $fixedwordchars=array( + $fixedwordchars = array( "'", "'", '"', @@ -434,7 +434,7 @@ class rcube_washtml '—', '...' ); - $html = str_replace($badwordchars,$fixedwordchars, $html); + $html = str_replace($badwordchars, $fixedwordchars, $html); // PCRE errors handling (#1486856), should we use something like for every preg_* use? if ($html === null && ($preg_error = preg_last_error()) != PREG_NO_ERROR) { @@ -462,6 +462,9 @@ class rcube_washtml // Don't remove MSOutlook (<!-->) conditional comments (#1489004) $html = preg_replace('/<!--[^->\[\n]+>/', '', $html); + // fix broken nested lists + self::fix_broken_lists($html); + // turn relative into absolute urls $html = self::resolve_base($html); @@ -500,5 +503,77 @@ class rcube_washtml return $body; } -} + /** + * Fix broken nested lists, they are not handled properly by DOMDocument (#1488768) + */ + public static function fix_broken_lists(&$html) + { + // do two rounds, one for <ol>, one for <ul> + foreach (array('ol', 'ul') as $tag) { + $pos = 0; + while (($pos = stripos($html, '<' . $tag, $pos)) !== false) { + $pos++; + + // make sure this is an ol/ul tag + if (!in_array($html[$pos+2], array(' ', '>'))) { + continue; + } + + $p = $pos; + $in_li = false; + $li_pos = 0; + + while (($p = strpos($html, '<', $p)) !== false) { + $tt = strtolower(substr($html, $p, 4)); + + // li open tag + if ($tt == '<li>' || $tt == '<li ') { + $in_li = true; + $p += 4; + } + // li close tag + else if ($tt == '</li' && in_array($html[$p+4], array(' ', '>'))) { + $li_pos = $p; + $p += 4; + $in_li = false; + } + // ul/ol closing tag + else if ($tt == '</' . $tag && in_array($html[$p+4], array(' ', '>'))) { + break; + } + // nested ol/ul element out of li + else if (!$in_li && $li_pos && ($tt == '<ol>' || $tt == '<ol ' || $tt == '<ul>' || $tt == '<ul ')) { + // find closing tag of this ul/ol element + $element = substr($tt, 1, 2); + $cpos = $p; + do { + $tpos = stripos($html, '<' . $element, $cpos+1); + $cpos = stripos($html, '</' . $element, $cpos+1); + } + while ($tpos !== false && $cpos !== false && $cpos > $tpos); + + // not found, this is invalid HTML, skip it + if ($cpos === false) { + break; + } + + // get element content + $end = strpos($html, '>', $cpos); + $len = $end - $p + 1; + $element = substr($html, $p, $len); + + // move element to the end of the last li + $html = substr_replace($html, '', $p, $len); + $html = substr_replace($html, $element, $li_pos, 0); + + $p = $end; + } + else { + $p++; + } + } + } + } + } +} |