summaryrefslogtreecommitdiff
path: root/program/include/rcube_charset.php
diff options
context:
space:
mode:
authorThomas Bruederli <thomas@roundcube.net>2012-09-17 22:04:16 +0200
committerThomas Bruederli <thomas@roundcube.net>2012-09-17 22:04:16 +0200
commit8f098e8dead85b6512ac72b2d805314baec72a2f (patch)
tree96b89dbcc2a58dab0f2dd1183beef3036c13287f /program/include/rcube_charset.php
parent99d9f50a0000447d0a752e6c43716237dc0da176 (diff)
parent6898b420ed4eb2bd2d1933758cde27bdd305d72a (diff)
Merge branch 'master' of github.com:roundcube/roundcubemail
Diffstat (limited to 'program/include/rcube_charset.php')
-rw-r--r--program/include/rcube_charset.php25
1 files changed, 19 insertions, 6 deletions
diff --git a/program/include/rcube_charset.php b/program/include/rcube_charset.php
index 1740a6096..35c69729b 100644
--- a/program/include/rcube_charset.php
+++ b/program/include/rcube_charset.php
@@ -86,7 +86,7 @@ class rcube_charset
* Sometimes charset string is malformed, there are also charset aliases
* but we need strict names for charset conversion (specially utf8 class)
*
- * @param string Input charset name
+ * @param string $input Input charset name
*
* @return string The validated charset name
*/
@@ -176,9 +176,10 @@ class rcube_charset
{
static $iconv_options = null;
static $mbstring_list = null;
+ static $mbstring_sch = null;
static $conv = null;
- $to = empty($to) ? strtoupper(RCMAIL_CHARSET) : self::parse_charset($to);
+ $to = empty($to) ? strtoupper(RCMAIL_CHARSET) : $to;
$from = self::parse_charset($from);
// It is a common case when UTF-16 charset is used with US-ASCII content (#1488654)
@@ -221,6 +222,7 @@ class rcube_charset
if ($mbstring_list === null) {
if (extension_loaded('mbstring')) {
+ $mbstring_sch = mb_substitute_character();
$mbstring_list = mb_list_encodings();
$mbstring_list = array_map('strtoupper', $mbstring_list);
}
@@ -229,14 +231,25 @@ class rcube_charset
// convert charset using mbstring module
if ($mbstring_list !== null) {
$aliases['WINDOWS-1257'] = 'ISO-8859-13';
+ // it happens that mbstring supports ASCII but not US-ASCII
+ if (($from == 'US-ASCII' || $to == 'US-ASCII') && !in_array('US-ASCII', $mbstring_list)) {
+ $aliases['US-ASCII'] = 'ASCII';
+ }
$mb_from = $aliases[$from] ? $aliases[$from] : $from;
$mb_to = $aliases[$to] ? $aliases[$to] : $to;
// return if encoding found, string matches encoding and convert succeeded
if (in_array($mb_from, $mbstring_list) && in_array($mb_to, $mbstring_list)) {
- if (mb_check_encoding($str, $mb_from) && ($out = mb_convert_encoding($str, $mb_to, $mb_from))) {
- return $out;
+ if (mb_check_encoding($str, $mb_from)) {
+ // Do the same as //IGNORE with iconv
+ mb_substitute_character('none');
+ $out = mb_convert_encoding($str, $mb_to, $mb_from);
+ mb_substitute_character($mbstring_sch);
+
+ if ($out !== false) {
+ return $out;
+ }
}
}
}
@@ -646,14 +659,14 @@ class rcube_charset
return $failover;
}
- // FIXME: the order is important, because sometimes
+ // FIXME: the order is important, because sometimes
// iso string is detected as euc-jp and etc.
$enc = array(
'UTF-8', 'SJIS', 'BIG5', 'GB2312',
'ISO-8859-1', 'ISO-8859-2', 'ISO-8859-3', 'ISO-8859-4',
'ISO-8859-5', 'ISO-8859-6', 'ISO-8859-7', 'ISO-8859-8', 'ISO-8859-9',
'ISO-8859-10', 'ISO-8859-13', 'ISO-8859-14', 'ISO-8859-15', 'ISO-8859-16',
- 'WINDOWS-1252', 'WINDOWS-1251', 'EUC-JP', 'EUC-TW', 'KOI8-R',
+ 'WINDOWS-1252', 'WINDOWS-1251', 'EUC-JP', 'EUC-TW', 'KOI8-R',
'ISO-2022-KR', 'ISO-2022-JP'
);