diff options
Diffstat (limited to 'program/lib/html2text.php')
-rw-r--r-- | program/lib/html2text.php | 69 |
1 files changed, 55 insertions, 14 deletions
diff --git a/program/lib/html2text.php b/program/lib/html2text.php index 938dfecfe..d298ee2e4 100644 --- a/program/lib/html2text.php +++ b/program/lib/html2text.php @@ -149,25 +149,18 @@ class html2text '/<script[^>]*>.*?<\/script>/i', // <script>s -- which strip_tags supposedly has problems with '/<style[^>]*>.*?<\/style>/i', // <style>s -- which strip_tags supposedly has problems with //'/<!-- .* -->/', // Comments -- which strip_tags might have problem a with - '/<h[123][^>]*>(.*?)<\/h[123]>/ie', // H1 - H3 - '/<h[456][^>]*>(.*?)<\/h[456]>/ie', // H4 - H6 '/<p[^>]*>/i', // <P> '/<br[^>]*>/i', // <br> - '/<b[^>]*>(.*?)<\/b>/ie', // <b> - '/<strong[^>]*>(.*?)<\/strong>/ie', // <strong> '/<i[^>]*>(.*?)<\/i>/i', // <i> '/<em[^>]*>(.*?)<\/em>/i', // <em> '/(<ul[^>]*>|<\/ul>)/i', // <ul> and </ul> '/(<ol[^>]*>|<\/ol>)/i', // <ol> and </ol> '/<li[^>]*>(.*?)<\/li>/i', // <li> and </li> '/<li[^>]*>/i', // <li> - '/<a [^>]*href=("|\')([^"\']+)\1[^>]*>(.*?)<\/a>/ie', - // <a href=""> '/<hr[^>]*>/i', // <hr> '/(<table[^>]*>|<\/table>)/i', // <table> and </table> '/(<tr[^>]*>|<\/tr>)/i', // <tr> and </tr> '/<td[^>]*>(.*?)<\/td>/i', // <td> and </td> - '/<th[^>]*>(.*?)<\/th>/ie', // <th> and </th> '/&(nbsp|#160);/i', // Non-breaking space '/&(quot|rdquo|ldquo|#8220|#8221|#147|#148);/i', // Double quotes @@ -201,25 +194,18 @@ class html2text '', // <script>s -- which strip_tags supposedly has problems with '', // <style>s -- which strip_tags supposedly has problems with //'', // Comments -- which strip_tags might have problem a with - "strtoupper(\"\n\n\\1\n\n\")", // H1 - H3 - "ucwords(\"\n\n\\1\n\")", // H4 - H6 "\n\n", // <P> "\n", // <br> - 'strtoupper("\\1")', // <b> - 'strtoupper("\\1")', // <strong> '_\\1_', // <i> '_\\1_', // <em> "\n\n", // <ul> and </ul> "\n\n", // <ol> and </ol> "\t* \\1\n", // <li> and </li> "\n\t* ", // <li> - '$this->_build_link_list("\\2", "\\3")', - // <a href=""> "\n-------------------------\n", // <hr> "\n\n", // <table> and </table> "\n", // <tr> and </tr> "\t\t\\1\n", // <td> and </td> - "strtoupper(\"\t\t\\1\n\")", // <th> and </th> ' ', // Non-breaking space '"', // Double quotes "'", // Single quotes @@ -238,6 +224,22 @@ class html2text ' ' // Runs of spaces, post-handling ); + /** + * List of preg* regular expression patterns to search for + * and replace using callback function. + * + * @var array $callback_search + * @access public + */ + var $callback_search = array( + '/<(h)[123456][^>]*>(.*?)<\/h[123456]>/i', // H1 - H3 + '/<(b)[^>]*>(.*?)<\/b>/i', // <b> + '/<(strong)[^>]*>(.*?)<\/strong>/i', // <strong> + '/<(a) [^>]*href=("|\')([^"\']+)\2[^>]*>(.*?)<\/a>/i', + // <a href=""> + '/<(th)[^>]*>(.*?)<\/th>/i', // <th> and </th> + ); + /** * List of preg* regular expression patterns to search for in PRE body, * used in conjunction with $pre_replace. @@ -471,6 +473,7 @@ class html2text // Run our defined search-and-replace $text = preg_replace($this->search, $this->replace, $text); + $text = preg_replace_callback($this->callback_search, array('html2text', '_preg_callback'), $text); // Strip any other HTML tags $text = strip_tags($text, $this->allowed_tags); @@ -549,6 +552,44 @@ class html2text $text = preg_replace('/<pre[^>]*>.*<\/pre>/ismU', '<div><br>' . $result . '<br></div>', $text, 1); } } + + /** + * Callback function for preg_replace_callback use. + * + * @param array PREG matches + * @return string + * @access private + */ + function _preg_callback($matches) + { + switch($matches[1]) + { + case 'b': + case 'strong': + return $this->_strtoupper($matches[2]); + case 'hr': + return $this->_strtoupper("\t\t". $matches[2] ."\n"); + case 'h': + return $this->_strtoupper("\n\n". $matches[2] ."\n\n"); + case 'a': + return $this->_build_link_list($matches[3], $matches[4]); + } + } + + /** + * Strtoupper multibyte wrapper function + * + * @param string + * @return string + * @access private + */ + function _strtoupper($str) + { + if (function_exists('mb_strtoupper')) + return mb_strtoupper($str); + else + return strtoupper($str); + } } ?> |