From f3599505f1f1f7d231ad10cd771ed271f4fbdafa Mon Sep 17 00:00:00 2001 From: alecpl Date: Wed, 8 Feb 2012 11:58:33 +0000 Subject: - Improved r5861 change: Content converted to upper case can contain HTML tags, handle them properly --- program/lib/html2text.php | 37 +++++++++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/program/lib/html2text.php b/program/lib/html2text.php index 0171f4bbb..22bf373bd 100644 --- a/program/lib/html2text.php +++ b/program/lib/html2text.php @@ -249,11 +249,11 @@ class html2text * @access public */ var $callback_search = array( + '/<(a) [^>]*href=("|\')([^"\']+)\2[^>]*>(.*?)<\/a>/i', + // '/<(h)[123456][^>]*>(.*?)<\/h[123456]>/i', // H1 - H3 '/<(b)[^>]*>(.*?)<\/b>/i', // '/<(strong)[^>]*>(.*?)<\/strong>/i', // - '/<(a) [^>]*href=("|\')([^"\']+)\2[^>]*>(.*?)<\/a>/i', - // '/<(th)[^>]*>(.*?)<\/th>/i', // and ); @@ -675,11 +675,11 @@ class html2text switch($matches[1]) { case 'b': case 'strong': - return $this->_strtoupper($matches[2]); + return $this->_toupper($matches[2]); case 'th': - return $this->_strtoupper("\t\t". $matches[2] ."\n"); + return $this->_toupper("\t\t". $matches[2] ."\n"); case 'h': - return $this->_strtoupper("\n\n". $matches[2] ."\n\n"); + return $this->_toupper("\n\n". $matches[2] ."\n\n"); case 'a': // Remove spaces in URL (#1487805) $url = str_replace(' ', '', $matches[3]); @@ -699,10 +699,31 @@ class html2text } /** - * Strtoupper multibyte wrapper function with HTML entities handling + * Strtoupper function with HTML tags and entities handling. + * + * @param string $str Text to convert + * @return string Converted text + */ + private function _toupper($str) + { + // string can containg HTML tags + $chunks = preg_split('/(<[^>]*>)/', $str, null, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE); + + // convert toupper only the text between HTML tags + foreach ($chunks as $idx => $chunk) { + if ($chunk[0] != '<') { + $chunks[$idx] = $this->_strtoupper($chunk); + } + } + + return implode($chunks); + } + + /** + * Strtoupper multibyte wrapper function with HTML entities handling. * - * @param string $str Text to convert - * @return string Converted text + * @param string $str Text to convert + * @return string Converted text */ private function _strtoupper($str) { -- cgit v1.2.3