diff options
author | alecpl <alec@alec.pl> | 2012-02-08 11:58:33 +0000 |
---|---|---|
committer | alecpl <alec@alec.pl> | 2012-02-08 11:58:33 +0000 |
commit | f3599505f1f1f7d231ad10cd771ed271f4fbdafa (patch) | |
tree | 70457c64161f6e7069de15b9a40198d76ac0c03f | |
parent | 67e5925897144effb8f4a9404bd2c5b25aa1aa4f (diff) |
- Improved r5861 change: Content converted to upper case can contain HTML tags, handle them properly
-rw-r--r-- | program/lib/html2text.php | 37 |
1 files changed, 29 insertions, 8 deletions
diff --git a/program/lib/html2text.php b/program/lib/html2text.php index 0171f4bbb..22bf373bd 100644 --- a/program/lib/html2text.php +++ b/program/lib/html2text.php @@ -249,11 +249,11 @@ class html2text * @access public */ var $callback_search = array( + '/<(a) [^>]*href=("|\')([^"\']+)\2[^>]*>(.*?)<\/a>/i', + // <a href=""> '/<(h)[123456][^>]*>(.*?)<\/h[123456]>/i', // H1 - H3 '/<(b)[^>]*>(.*?)<\/b>/i', // <b> '/<(strong)[^>]*>(.*?)<\/strong>/i', // <strong> - '/<(a) [^>]*href=("|\')([^"\']+)\2[^>]*>(.*?)<\/a>/i', - // <a href=""> '/<(th)[^>]*>(.*?)<\/th>/i', // <th> and </th> ); @@ -675,11 +675,11 @@ class html2text switch($matches[1]) { case 'b': case 'strong': - return $this->_strtoupper($matches[2]); + return $this->_toupper($matches[2]); case 'th': - return $this->_strtoupper("\t\t". $matches[2] ."\n"); + return $this->_toupper("\t\t". $matches[2] ."\n"); case 'h': - return $this->_strtoupper("\n\n". $matches[2] ."\n\n"); + return $this->_toupper("\n\n". $matches[2] ."\n\n"); case 'a': // Remove spaces in URL (#1487805) $url = str_replace(' ', '', $matches[3]); @@ -699,10 +699,31 @@ class html2text } /** - * Strtoupper multibyte wrapper function with HTML entities handling + * Strtoupper function with HTML tags and entities handling. + * + * @param string $str Text to convert + * @return string Converted text + */ + private function _toupper($str) + { + // string can containg HTML tags + $chunks = preg_split('/(<[^>]*>)/', $str, null, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE); + + // convert toupper only the text between HTML tags + foreach ($chunks as $idx => $chunk) { + if ($chunk[0] != '<') { + $chunks[$idx] = $this->_strtoupper($chunk); + } + } + + return implode($chunks); + } + + /** + * Strtoupper multibyte wrapper function with HTML entities handling. * - * @param string $str Text to convert - * @return string Converted text + * @param string $str Text to convert + * @return string Converted text */ private function _strtoupper($str) { |