diff options
Diffstat (limited to 'program/lib/Roundcube')
| -rw-r--r-- | program/lib/Roundcube/rcube_washtml.php | 83 | 
1 files changed, 79 insertions, 4 deletions
diff --git a/program/lib/Roundcube/rcube_washtml.php b/program/lib/Roundcube/rcube_washtml.php index 9cf3c6222..5a5b3dc55 100644 --- a/program/lib/Roundcube/rcube_washtml.php +++ b/program/lib/Roundcube/rcube_washtml.php @@ -418,7 +418,7 @@ class rcube_washtml          $html = preg_replace($html_search, $html_replace, trim($html));          //-> Replace all of those weird MS Word quotes and other high characters -        $badwordchars=array( +        $badwordchars = array(              "\xe2\x80\x98", // left single quote              "\xe2\x80\x99", // right single quote              "\xe2\x80\x9c", // left double quote @@ -426,7 +426,7 @@ class rcube_washtml              "\xe2\x80\x94", // em dash              "\xe2\x80\xa6" // elipses          ); -        $fixedwordchars=array( +        $fixedwordchars = array(              "'",              "'",              '"', @@ -434,7 +434,7 @@ class rcube_washtml              '—',              '...'          ); -        $html = str_replace($badwordchars,$fixedwordchars, $html); +        $html = str_replace($badwordchars, $fixedwordchars, $html);          // PCRE errors handling (#1486856), should we use something like for every preg_* use?          if ($html === null && ($preg_error = preg_last_error()) != PREG_NO_ERROR) { @@ -462,6 +462,9 @@ class rcube_washtml          // Don't remove MSOutlook (<!-->) conditional comments (#1489004)          $html = preg_replace('/<!--[^->\[\n]+>/', '', $html); +        // fix broken nested lists +        self::fix_broken_lists($html); +          // turn relative into absolute urls          $html = self::resolve_base($html); @@ -500,5 +503,77 @@ class rcube_washtml          return $body;      } -} +    /** +     * Fix broken nested lists, they are not handled properly by DOMDocument (#1488768) +     */ +    public static function fix_broken_lists(&$html) +    { +        // do two rounds, one for <ol>, one for <ul> +        foreach (array('ol', 'ul') as $tag) { +            $pos = 0; +            while (($pos = stripos($html, '<' . $tag, $pos)) !== false) { +                $pos++; + +                // make sure this is an ol/ul tag +                if (!in_array($html[$pos+2], array(' ', '>'))) { +                    continue; +                } + +                $p      = $pos; +                $in_li  = false; +                $li_pos = 0; + +                while (($p = strpos($html, '<', $p)) !== false) { +                    $tt = strtolower(substr($html, $p, 4)); + +                    // li open tag +                    if ($tt == '<li>' || $tt == '<li ') { +                        $in_li = true; +                        $p += 4; +                    } +                    // li close tag +                    else if ($tt == '</li' && in_array($html[$p+4], array(' ', '>'))) { +                        $li_pos = $p; +                        $p += 4; +                        $in_li = false; +                    } +                    // ul/ol closing tag +                    else if ($tt == '</' . $tag && in_array($html[$p+4], array(' ', '>'))) { +                        break; +                    } +                    // nested ol/ul element out of li +                    else if (!$in_li && $li_pos && ($tt == '<ol>' || $tt == '<ol ' || $tt == '<ul>' || $tt == '<ul ')) { +                        // find closing tag of this ul/ol element +                        $element = substr($tt, 1, 2); +                        $cpos    = $p; +                        do { +                            $tpos = stripos($html, '<' . $element, $cpos+1); +                            $cpos = stripos($html, '</' . $element, $cpos+1); +                        } +                        while ($tpos !== false && $cpos !== false && $cpos > $tpos); + +                        // not found, this is invalid HTML, skip it +                        if ($cpos === false) { +                            break; +                        } + +                        // get element content +                        $end     = strpos($html, '>', $cpos); +                        $len     = $end - $p + 1; +                        $element = substr($html, $p, $len); + +                        // move element to the end of the last li +                        $html    = substr_replace($html, '', $p, $len); +                        $html    = substr_replace($html, $element, $li_pos, 0); + +                        $p = $end; +                    } +                    else { +                        $p++; +                    } +                } +            } +        } +    } +}  | 
