summaryrefslogtreecommitdiff
path: root/program/lib
diff options
context:
space:
mode:
Diffstat (limited to 'program/lib')
-rw-r--r--program/lib/html2text.php30
-rw-r--r--program/lib/washtml.php8
2 files changed, 21 insertions, 17 deletions
diff --git a/program/lib/html2text.php b/program/lib/html2text.php
index 9de2e961e..28c5ae059 100644
--- a/program/lib/html2text.php
+++ b/program/lib/html2text.php
@@ -89,7 +89,7 @@
* out that extra spaces should be compressed--a problem addressed with
* Marcus Bointon's fixes but that I had not yet incorporated.
*
- * Thanks to Daniel Schledermann (http://www.typoconsult.dk/) for
+ * Thanks to Daniel Schledermann (http://www.typoconsult.dk/) for
* suggesting a valuable fix with <a> tag handling.
*
* Thanks to Wojciech Bajon (again!) for suggesting fixes and additions,
@@ -200,7 +200,7 @@ class html2text
var $ent_search = array(
'/&(nbsp|#160);/i', // Non-breaking space
'/&(quot|rdquo|ldquo|#8220|#8221|#147|#148);/i',
- // Double quotes
+ // Double quotes
'/&(apos|rsquo|lsquo|#8216|#8217);/i', // Single quotes
'/&gt;/i', // Greater-than
'/&lt;/i', // Less-than
@@ -437,11 +437,11 @@ class html2text
function set_base_url( $url = '' )
{
if ( empty($url) ) {
- if ( !empty($_SERVER['HTTP_HOST']) ) {
- $this->url = 'http://' . $_SERVER['HTTP_HOST'];
- } else {
- $this->url = '';
- }
+ if ( !empty($_SERVER['HTTP_HOST']) ) {
+ $this->url = 'http://' . $_SERVER['HTTP_HOST'];
+ } else {
+ $this->url = '';
+ }
} else {
// Strip any trailing slashes for consistency (relative
// URLs may already start with a slash like "/file.html")
@@ -515,7 +515,7 @@ class html2text
$text = preg_replace($this->ent_search, $this->ent_replace, $text);
// Replace known html entities
- $text = html_entity_decode($text, ENT_COMPAT, 'UTF-8');
+ $text = html_entity_decode($text, ENT_QUOTES, 'UTF-8');
// Remove unknown/unhandled entities (this cannot be done in search-and-replace block)
$text = preg_replace('/&([a-zA-Z0-9]{2,6}|#[0-9]{2,4});/', '', $text);
@@ -535,7 +535,7 @@ class html2text
// for PHP versions >= 4.0.2. Default width is 75
// If width is 0 or less, don't wrap the text.
if ( $this->width > 0 ) {
- $text = wordwrap($text, $this->width);
+ $text = wordwrap($text, $this->width);
}
}
@@ -554,16 +554,16 @@ class html2text
*/
function _build_link_list( $link, $display )
{
- if (!$this->_do_links || empty($link)) {
- return $display;
- }
+ if (!$this->_do_links || empty($link)) {
+ return $display;
+ }
// Ignored link types
- if (preg_match('!^(javascript:|mailto:|#)!i', $link)) {
- return $display;
+ if (preg_match('!^(javascript:|mailto:|#)!i', $link)) {
+ return $display;
}
- if (preg_match('!^([a-z][a-z0-9.+-]+:)!i', $link)) {
+ if (preg_match('!^([a-z][a-z0-9.+-]+:)!i', $link)) {
$url = $link;
}
else {
diff --git a/program/lib/washtml.php b/program/lib/washtml.php
index c12315fec..98ae5ed5a 100644
--- a/program/lib/washtml.php
+++ b/program/lib/washtml.php
@@ -214,8 +214,11 @@ class washtml
$key = strtolower($key);
$value = $node->getAttribute($key);
if (isset($this->_html_attribs[$key]) ||
- ($key == 'href' && preg_match('!^([a-z][a-z0-9.+-]+:|//|#).+!i', $value)))
+ ($key == 'href' && !preg_match('!^javascript!i', $value)
+ && preg_match('!^([a-z][a-z0-9.+-]+:|//|#).+!i', $value))
+ ) {
$t .= ' ' . $key . '="' . htmlspecialchars($value, ENT_QUOTES) . '"';
+ }
else if ($key == 'style' && ($style = $this->wash_style($value))) {
$quot = strpos($style, '"') !== false ? "'" : '"';
$t .= ' style=' . $quot . $style . $quot;
@@ -237,7 +240,8 @@ class washtml
else if (preg_match('/^data:.+/i', $value)) { // RFC2397
$t .= ' ' . $key . '="' . htmlspecialchars($value, ENT_QUOTES) . '"';
}
- } else
+ }
+ else
$washed .= ($washed?' ':'') . $key;
}
return $t . ($washed && $this->config['show_washed']?' x-washed="'.$washed.'"':'');