diff options
author | Thomas Bruederli <thomas@roundcube.net> | 2013-01-25 23:46:06 +0100 |
---|---|---|
committer | Thomas Bruederli <thomas@roundcube.net> | 2013-01-25 23:46:34 +0100 |
commit | bb6f4b2b5d0676ef0ed90f8050ad28e46f2dce35 (patch) | |
tree | 721b68c97b061b9429e3beb22e73052dd70a1cfa | |
parent | bd0daee7fd8d3e58433b0c394b1bc58b8b52e8e3 (diff) |
Refactored blockquote quotion routine in html2text conversion: it now correctly converts multiple and/or nested blockquotes
-rw-r--r-- | program/lib/Roundcube/rcube_html2text.php | 87 | ||||
-rw-r--r-- | tests/Framework/Html2text.php | 19 |
2 files changed, 64 insertions, 42 deletions
diff --git a/program/lib/Roundcube/rcube_html2text.php b/program/lib/Roundcube/rcube_html2text.php index 0b172ebfa..3d32fe766 100644 --- a/program/lib/Roundcube/rcube_html2text.php +++ b/program/lib/Roundcube/rcube_html2text.php @@ -571,55 +571,58 @@ class rcube_html2text */ protected function _convert_blockquotes(&$text) { - if (preg_match_all('/<\/*blockquote[^>]*>/i', $text, $matches, PREG_OFFSET_CAPTURE)) { - $level = 0; - $diff = 0; - foreach ($matches[0] as $m) { - if ($m[0][0] == '<' && $m[0][1] == '/') { + $level = 0; + $offset = 0; + while (($start = strpos($text, '<blockquote', $offset)) !== false) { + $offset = $start + 12; + do { + $end = strpos($text, '</blockquote>', $offset); + $next = strpos($text, '<blockquote', $offset); + + // nested <blockquote>, skip + if ($next !== false && $next < $end) { + $offset = $next + 12; + $level++; + } + // nested </blockquote> tag + if ($end !== false && $level > 0) { + $offset = $end + 12; $level--; - if ($level < 0) { - $level = 0; // malformed HTML: go to next blockquote - } - else if ($level > 0) { - // skip inner blockquote - } - else { - $end = $m[1]; - $len = $end - $taglen - $start; - // Get blockquote content - $body = substr($text, $start + $taglen - $diff, $len); - - // Set text width - $p_width = $this->width; - if ($this->width > 0) $this->width -= 2; - // Convert blockquote content - $body = trim($body); - $this->_converter($body); - // Add citation markers and create PRE block - $body = preg_replace('/((^|\n)>*)/', '\\1> ', trim($body)); - $body = '<pre>' . htmlspecialchars($body) . '</pre>'; - // Re-set text width - $this->width = $p_width; - // Replace content - $text = substr($text, 0, $start - $diff) - . $body . substr($text, $end + strlen($m[0]) - $diff); - - $diff = $len + $taglen + strlen($m[0]) - strlen($body); - unset($body); - } } - else { - if ($level == 0) { - $start = $m[1]; - $taglen = strlen($m[0]); - } - $level ++; + // found matching end tag + else if ($end !== false && $level == 0) { + $taglen = strpos($text, '>', $start) - $start; + $startpos = $start + $taglen + 1; + + // get blockquote content + $body = trim(substr($text, $startpos, $end - $startpos)); + + // replace content with inner blockquotes + $this->_converter($body); + + // Add citation markers and create <pre> block + $body = preg_replace_callback('/((?:^|\n)>*)([^\n]*)/', array($this, 'blockquote_citation_ballback'), trim($body)); + $body = '<pre>' . htmlspecialchars($body) . '</pre>'; + + $text = substr($text, 0, $start) . $body . "\n" . substr($text, $end + 13); + $offset = 0; + break; } - } + } while ($end || $next); } } /** + * Callback function to correctly add citation markers for blockquote contents + */ + public function blockquote_citation_ballback($m) + { + $line = ltrim($m[2]); + $space = $line[0] == '>' ? '' : ' '; + return $m[1] . '>' . $space . $line; + } + + /** * Callback function for preg_replace_callback use. * * @param array PREG matches diff --git a/tests/Framework/Html2text.php b/tests/Framework/Html2text.php index 1d8963878..3e0df48d9 100644 --- a/tests/Framework/Html2text.php +++ b/tests/Framework/Html2text.php @@ -56,4 +56,23 @@ class rc_html2text extends PHPUnit_Framework_TestCase $this->assertEquals($out, $res, $title); } + + /** + * + */ + function test_multiple_blockquotes() + { + $html = <<<EOF +<br>Begin<br><blockquote>OUTER BEGIN<blockquote>INNER 1<br></blockquote><div><br></div><div>Par 1</div> +<blockquote>INNER 2</blockquote><div><br></div><div>Par 2</div> +<div><br></div><div>Par 3</div><div><br></div> +<blockquote>INNER 3</blockquote>OUTER END</blockquote> +EOF; + $ht = new rcube_html2text($html, false, false); + $res = $ht->get_text(); + + $this->assertContains('>> INNER 1', $res, 'Quote inner'); + $this->assertContains('>> INNER 3', $res, 'Quote inner'); + $this->assertContains('> OUTER END', $res, 'Quote outer'); + } } |