From 4e17e6c9dbac8991ee8b302cb2581241247dc8bc Mon Sep 17 00:00:00 2001 From: thomascube Date: Sun, 25 Sep 2005 14:18:03 +0000 Subject: Initial revision --- program/lib/html2text.inc | 440 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 440 insertions(+) create mode 100644 program/lib/html2text.inc (limited to 'program/lib/html2text.inc') diff --git a/program/lib/html2text.inc b/program/lib/html2text.inc new file mode 100644 index 000000000..82a254e56 --- /dev/null +++ b/program/lib/html2text.inc @@ -0,0 +1,440 @@ + * +* All rights reserved. * +* * +* This script is free software; you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published by * +* the Free Software Foundation; either version 2 of the License, or * +* (at your option) any later version. * +* * +* The GNU General Public License can be found at * +* http://www.gnu.org/copyleft/gpl.html. * +* * +* This script is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* Author(s): Jon Abernathy * +* * +* Last modified: 04/06/05 * +* Modified: 2004/05/19 (tbr) * +* * +*************************************************************************/ + + +/** +* Takes HTML and converts it to formatted, plain text. +* +* Thanks to Alexander Krug (http://www.krugar.de/) to pointing out and +* correcting an error in the regexp search array. Fixed 7/30/03. +* +* Updated set_html() function's file reading mechanism, 9/25/03. +* +* Thanks to Joss Sanglier (http://www.dancingbear.co.uk/) for adding +* several more HTML entity codes to the $search and $replace arrays. +* Updated 11/7/03. +* +* Thanks to Darius Kasperavicius (http://www.dar.dar.lt/) for +* suggesting the addition of $allowed_tags and its supporting function +* (which I slightly modified). Updated 3/12/04. +* +* Thanks to Justin Dearing for pointing out that a replacement for the +* tag was missing, and suggesting an appropriate fix. +* Updated 8/25/04. +* +* Thanks to Mathieu Collas (http://www.myefarm.com/) for finding a +* display/formatting bug in the _build_link_list() function: email +* readers would show the left bracket and number ("[1") as part of the +* rendered email address. +* Updated 12/16/04. +* +* Thanks to Wojciech Bajon (http://histeria.pl/) for submitting code +* to handle relative links, which I hadn't considered. I modified his +* code a bit to handle normal HTTP links and MAILTO links. Also for +* suggesting three additional HTML entity codes to search for. +* Updated 03/02/05. +* +* Thanks to Jacob Chandler for pointing out another link condition +* for the _build_link_list() function: "https". +* Updated 04/06/05. +* +* @author Jon Abernathy +* @version 0.6.1 +* @since PHP 4.0.2 +*/ +class html2text +{ + + /** + * Contains the HTML content to convert. + * + * @var string $html + * @access public + */ + var $html; + + /** + * Contains the converted, formatted text. + * + * @var string $text + * @access public + */ + var $text; + + /** + * Maximum width of the formatted text, in columns. + * + * @var integer $width + * @access public + */ + var $width = 70; + + /** + * List of preg* regular expression patterns to search for, + * used in conjunction with $replace. + * + * @var array $search + * @access public + * @see $replace + */ + var $search = array( + "/\r/", // Non-legal carriage return + "/[\n\t]+/", // Newlines and tabs + '/]*>.*?<\/script>/i', //