From 68217c548a024f3f7d7d667ce4b825cf61ae46a9 Mon Sep 17 00:00:00 2001 From: alecpl Date: Tue, 3 Jun 2008 17:07:53 +0000 Subject: -add convert encoding before html parsing --- program/lib/washtml.php | 1 + 1 file changed, 1 insertion(+) diff --git a/program/lib/washtml.php b/program/lib/washtml.php index 82ccc0cd5..f45fd458a 100644 --- a/program/lib/washtml.php +++ b/program/lib/washtml.php @@ -187,6 +187,7 @@ class washtml //Charset seems to be ignored (probably if defined in the HTML document) $node = new DOMDocument('1.0', $config['charset']); $full = true; + $html = mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'); @$node->loadHTML($html); return self::dumpHtml($node, $config, $full); } -- cgit v1.2.3