From a2c2cb64e1d5860987d5674f6b81efd8e02af7b7 Mon Sep 17 00:00:00 2001 From: Thomas Bruederli Date: Fri, 25 Jan 2013 23:46:06 +0100 Subject: Refactored blockquote quotion routine in html2text conversion: it now correctly converts multiple and/or nested blockquotes --- program/lib/Roundcube/rcube_html2text.php | 87 ++++++++++++++++--------------- tests/Framework/Html2text.php | 19 +++++++ 2 files changed, 64 insertions(+), 42 deletions(-) diff --git a/program/lib/Roundcube/rcube_html2text.php b/program/lib/Roundcube/rcube_html2text.php index 0b172ebfa..3d32fe766 100644 --- a/program/lib/Roundcube/rcube_html2text.php +++ b/program/lib/Roundcube/rcube_html2text.php @@ -571,54 +571,57 @@ class rcube_html2text */ protected function _convert_blockquotes(&$text) { - if (preg_match_all('/<\/*blockquote[^>]*>/i', $text, $matches, PREG_OFFSET_CAPTURE)) { - $level = 0; - $diff = 0; - foreach ($matches[0] as $m) { - if ($m[0][0] == '<' && $m[0][1] == '/') { + $level = 0; + $offset = 0; + while (($start = strpos($text, '', $offset); + $next = strpos($text, ', skip + if ($next !== false && $next < $end) { + $offset = $next + 12; + $level++; + } + // nested tag + if ($end !== false && $level > 0) { + $offset = $end + 12; $level--; - if ($level < 0) { - $level = 0; // malformed HTML: go to next blockquote - } - else if ($level > 0) { - // skip inner blockquote - } - else { - $end = $m[1]; - $len = $end - $taglen - $start; - // Get blockquote content - $body = substr($text, $start + $taglen - $diff, $len); - - // Set text width - $p_width = $this->width; - if ($this->width > 0) $this->width -= 2; - // Convert blockquote content - $body = trim($body); - $this->_converter($body); - // Add citation markers and create PRE block - $body = preg_replace('/((^|\n)>*)/', '\\1> ', trim($body)); - $body = '
' . htmlspecialchars($body) . '
'; - // Re-set text width - $this->width = $p_width; - // Replace content - $text = substr($text, 0, $start - $diff) - . $body . substr($text, $end + strlen($m[0]) - $diff); - - $diff = $len + $taglen + strlen($m[0]) - strlen($body); - unset($body); - } } - else { - if ($level == 0) { - $start = $m[1]; - $taglen = strlen($m[0]); - } - $level ++; + // found matching end tag + else if ($end !== false && $level == 0) { + $taglen = strpos($text, '>', $start) - $start; + $startpos = $start + $taglen + 1; + + // get blockquote content + $body = trim(substr($text, $startpos, $end - $startpos)); + + // replace content with inner blockquotes + $this->_converter($body); + + // Add citation markers and create
 block
+                    $body = preg_replace_callback('/((?:^|\n)>*)([^\n]*)/', array($this, 'blockquote_citation_ballback'), trim($body));
+                    $body = '
' . htmlspecialchars($body) . '
'; + + $text = substr($text, 0, $start) . $body . "\n" . substr($text, $end + 13); + $offset = 0; + break; } - } + } while ($end || $next); } } + /** + * Callback function to correctly add citation markers for blockquote contents + */ + public function blockquote_citation_ballback($m) + { + $line = ltrim($m[2]); + $space = $line[0] == '>' ? '' : ' '; + return $m[1] . '>' . $space . $line; + } + /** * Callback function for preg_replace_callback use. * diff --git a/tests/Framework/Html2text.php b/tests/Framework/Html2text.php index 1d8963878..3e0df48d9 100644 --- a/tests/Framework/Html2text.php +++ b/tests/Framework/Html2text.php @@ -56,4 +56,23 @@ class rc_html2text extends PHPUnit_Framework_TestCase $this->assertEquals($out, $res, $title); } + + /** + * + */ + function test_multiple_blockquotes() + { + $html = <<Begin
OUTER BEGIN
INNER 1

Par 1
+
INNER 2

Par 2
+

Par 3

+
INNER 3
OUTER END
+EOF; + $ht = new rcube_html2text($html, false, false); + $res = $ht->get_text(); + + $this->assertContains('>> INNER 1', $res, 'Quote inner'); + $this->assertContains('>> INNER 3', $res, 'Quote inner'); + $this->assertContains('> OUTER END', $res, 'Quote outer'); + } } -- cgit v1.2.3