summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoralecpl <alec@alec.pl>2008-06-03 17:07:53 +0000
committeralecpl <alec@alec.pl>2008-06-03 17:07:53 +0000
commit68217c548a024f3f7d7d667ce4b825cf61ae46a9 (patch)
tree75b2e9b75c70531f3215a3a4bea1bed8ba9ef144
parent02548b976c092f0e2b8680627adbc04303ff5d67 (diff)
-add convert encoding before html parsing
-rw-r--r--program/lib/washtml.php1
1 files changed, 1 insertions, 0 deletions
diff --git a/program/lib/washtml.php b/program/lib/washtml.php
index 82ccc0cd5..f45fd458a 100644
--- a/program/lib/washtml.php
+++ b/program/lib/washtml.php
@@ -187,6 +187,7 @@ class washtml
//Charset seems to be ignored (probably if defined in the HTML document)
$node = new DOMDocument('1.0', $config['charset']);
$full = true;
+ $html = mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8');
@$node->loadHTML($html);
return self::dumpHtml($node, $config, $full);
}