diff options
author | alecpl <alec@alec.pl> | 2009-05-19 19:26:47 +0000 |
---|---|---|
committer | alecpl <alec@alec.pl> | 2009-05-19 19:26:47 +0000 |
commit | dbe44cd105d936fb82159eb1a8447db074ff5428 (patch) | |
tree | a50856bfbf5a1a4b1375a31990def5ddbf16e940 /program | |
parent | 3e8d8975a9a2988aaa8413dc440617c01c404c5a (diff) |
- Better support for malformed character names (#1485758)
Diffstat (limited to 'program')
-rw-r--r-- | program/include/main.inc | 72 |
1 files changed, 51 insertions, 21 deletions
diff --git a/program/include/main.inc b/program/include/main.inc index 80b9e61b7..f111ac9a6 100644 --- a/program/include/main.inc +++ b/program/include/main.inc @@ -182,30 +182,15 @@ function rcube_charset_convert($str, $from, $to=NULL) static $mbstring_list = null; static $convert_warning = false; - $from = strtoupper($from); - $to = $to==NULL ? strtoupper(RCMAIL_CHARSET) : strtoupper($to); - $error = false; $conv = null; + $error = false; + $conv = null; - # RFC1642 - if ($from == 'UNICODE-1-1-UTF-7') - $from = 'UTF-7'; - if ($to == 'UNICODE-1-1-UTF-7') - $to = 'UTF-7'; + $to = empty($to) ? $to = strtoupper(RCMAIL_CHARSET) : rcube_parse_charset($to); + $from = rcube_parse_charset($from); if ($from == $to || empty($str) || empty($from)) return $str; - $aliases = array( - 'US-ASCII' => 'ISO-8859-1', - 'ANSI_X3.110-1983' => 'ISO-8859-1', - 'ANSI_X3.4-1968' => 'ISO-8859-1', - 'UNKNOWN-8BIT' => 'ISO-8859-15', - 'X-UNKNOWN' => 'ISO-8859-15', - 'X-USER-DEFINED' => 'ISO-8859-15', - 'ISO-8859-8-I' => 'ISO-8859-8', - 'KS_C_5601-1987' => 'EUC-KR', - ); - // convert charset using iconv module if (function_exists('iconv') && $from != 'UTF-7' && $to != 'UTF-7') { $aliases['GB2312'] = 'GB18030'; @@ -227,7 +212,7 @@ function rcube_charset_convert($str, $from, $to=NULL) $mbstring_list = mb_list_encodings(); $mbstring_list = array_map('strtoupper', $mbstring_list); } - + $mb_from = $aliases[$from] ? $aliases[$from] : $from; $mb_to = $aliases[$to] ? $aliases[$to] : $to; @@ -235,6 +220,9 @@ function rcube_charset_convert($str, $from, $to=NULL) if (in_array($mb_from, $mbstring_list) && in_array($mb_to, $mbstring_list)) { if (mb_check_encoding($str, $mb_from) && ($out = mb_convert_encoding($str, $mb_to, $mb_from))) return $out; + else + // return here, encoding supported, but string is invalid + return $str; } } @@ -280,7 +268,7 @@ function rcube_charset_convert($str, $from, $to=NULL) 'code' => 500, 'type' => 'php', 'file' => __FILE__, - 'message' => "Could not convert string from $from to $to. Make sure iconv is installed or lib/utf8.class is available" + 'message' => "Could not convert string from $from to $to. Make sure iconv/mbstring is installed or lib/utf8.class is available." ), true, false); $convert_warning = true; @@ -292,6 +280,48 @@ function rcube_charset_convert($str, $from, $to=NULL) /** + * Parse and validate charset name string (see #1485758). + * Sometimes charset string is malformed, there are also charset aliases + * but we need strict names for charset conversion (specially utf8 class) + * + * @param string Input charset name + * @return The validated charset name + */ +function rcube_parse_charset($charset) + { + $charset = strtoupper($charset); + + # RFC1642 + $charset = str_replace('UNICODE-1-1-', '', $charset); + + $aliases = array( + 'USASCII' => 'ISO-8859-1', + 'ANSIX31101983' => 'ISO-8859-1', + 'ANSIX341968' => 'ISO-8859-1', + 'UNKNOWN8BIT' => 'ISO-8859-15', + 'XUNKNOWN' => 'ISO-8859-15', + 'XUSERDEFINED' => 'ISO-8859-15', + 'ISO88598I' => 'ISO-8859-8', + 'KSC56011987' => 'EUC-KR', + 'UNICODE' => 'UTF-8', + ); + + $str = preg_replace('/[^a-z0-9]/i', '', $charset); + + if (isset($aliases[$str])) + return $aliases[$str]; + + if (preg_match('/UTF(7|8|16|32)(BE|LE)*/', $str, $m)) + return 'UTF-' . $m[1] . $m[2]; + + if (preg_match('/ISO8859([0-9]{0,2})/', $str, $m)) + return 'ISO-8859-' . ($m[1] ? $m[1] : 1); + + return $charset; + } + + +/** * Replacing specials characters to a specific encoding type * * @param string Input string |