diff options
author | Paweł Słowik <pawel.slowik@iq.pl> | 2012-09-13 14:24:01 +0200 |
---|---|---|
committer | Paweł Słowik <pawel.slowik@iq.pl> | 2012-09-13 14:24:01 +0200 |
commit | 2cdaa79dce689b2dc9ef5c7bf3dcbd9446d86c21 (patch) | |
tree | 7482d3bd9a71a9913f4e28392b09e11c519987de /program/include/rcube_charset.php | |
parent | 92a030d928246cfc5f3c0b1f2538dc1bfb4777e3 (diff) | |
parent | d7439260770eb1f70cdc5abf5df13e6c62ff3991 (diff) |
Merge branch 'master' of https://github.com/roundcube/roundcubemail
Diffstat (limited to 'program/include/rcube_charset.php')
-rw-r--r-- | program/include/rcube_charset.php | 25 |
1 files changed, 19 insertions, 6 deletions
diff --git a/program/include/rcube_charset.php b/program/include/rcube_charset.php index 1740a6096..35c69729b 100644 --- a/program/include/rcube_charset.php +++ b/program/include/rcube_charset.php @@ -86,7 +86,7 @@ class rcube_charset * Sometimes charset string is malformed, there are also charset aliases * but we need strict names for charset conversion (specially utf8 class) * - * @param string Input charset name + * @param string $input Input charset name * * @return string The validated charset name */ @@ -176,9 +176,10 @@ class rcube_charset { static $iconv_options = null; static $mbstring_list = null; + static $mbstring_sch = null; static $conv = null; - $to = empty($to) ? strtoupper(RCMAIL_CHARSET) : self::parse_charset($to); + $to = empty($to) ? strtoupper(RCMAIL_CHARSET) : $to; $from = self::parse_charset($from); // It is a common case when UTF-16 charset is used with US-ASCII content (#1488654) @@ -221,6 +222,7 @@ class rcube_charset if ($mbstring_list === null) { if (extension_loaded('mbstring')) { + $mbstring_sch = mb_substitute_character(); $mbstring_list = mb_list_encodings(); $mbstring_list = array_map('strtoupper', $mbstring_list); } @@ -229,14 +231,25 @@ class rcube_charset // convert charset using mbstring module if ($mbstring_list !== null) { $aliases['WINDOWS-1257'] = 'ISO-8859-13'; + // it happens that mbstring supports ASCII but not US-ASCII + if (($from == 'US-ASCII' || $to == 'US-ASCII') && !in_array('US-ASCII', $mbstring_list)) { + $aliases['US-ASCII'] = 'ASCII'; + } $mb_from = $aliases[$from] ? $aliases[$from] : $from; $mb_to = $aliases[$to] ? $aliases[$to] : $to; // return if encoding found, string matches encoding and convert succeeded if (in_array($mb_from, $mbstring_list) && in_array($mb_to, $mbstring_list)) { - if (mb_check_encoding($str, $mb_from) && ($out = mb_convert_encoding($str, $mb_to, $mb_from))) { - return $out; + if (mb_check_encoding($str, $mb_from)) { + // Do the same as //IGNORE with iconv + mb_substitute_character('none'); + $out = mb_convert_encoding($str, $mb_to, $mb_from); + mb_substitute_character($mbstring_sch); + + if ($out !== false) { + return $out; + } } } } @@ -646,14 +659,14 @@ class rcube_charset return $failover; } - // FIXME: the order is important, because sometimes + // FIXME: the order is important, because sometimes // iso string is detected as euc-jp and etc. $enc = array( 'UTF-8', 'SJIS', 'BIG5', 'GB2312', 'ISO-8859-1', 'ISO-8859-2', 'ISO-8859-3', 'ISO-8859-4', 'ISO-8859-5', 'ISO-8859-6', 'ISO-8859-7', 'ISO-8859-8', 'ISO-8859-9', 'ISO-8859-10', 'ISO-8859-13', 'ISO-8859-14', 'ISO-8859-15', 'ISO-8859-16', - 'WINDOWS-1252', 'WINDOWS-1251', 'EUC-JP', 'EUC-TW', 'KOI8-R', + 'WINDOWS-1252', 'WINDOWS-1251', 'EUC-JP', 'EUC-TW', 'KOI8-R', 'ISO-2022-KR', 'ISO-2022-JP' ); |