diff options
author | thomascube <thomas@roundcube.net> | 2012-01-16 15:14:41 +0000 |
---|---|---|
committer | thomascube <thomas@roundcube.net> | 2012-01-16 15:14:41 +0000 |
commit | c321a955a7b0f6d6b13ffaebf040a6c7091037ae (patch) | |
tree | 60c257d29a726d9bdda7fb75a198342aaef315fa /program/include/rcube_shared.inc | |
parent | 8764b6ecf0c8d1b0646915a8139cdf6bbac2ca14 (diff) |
Merged devel-framework branch (r5746:5779) back into trunk
Diffstat (limited to 'program/include/rcube_shared.inc')
-rw-r--r-- | program/include/rcube_shared.inc | 117 |
1 files changed, 0 insertions, 117 deletions
diff --git a/program/include/rcube_shared.inc b/program/include/rcube_shared.inc index 6767c93e7..936e8959a 100644 --- a/program/include/rcube_shared.inc +++ b/program/include/rcube_shared.inc @@ -419,123 +419,6 @@ function rc_image_content_type($data) /** - * A method to guess encoding of a string. - * - * @param string $string String. - * @param string $failover Default result for failover. - * - * @return string - */ -function rc_detect_encoding($string, $failover='') -{ - if (!function_exists('mb_detect_encoding')) { - return $failover; - } - - // FIXME: the order is important, because sometimes - // iso string is detected as euc-jp and etc. - $enc = array( - 'UTF-8', 'SJIS', 'BIG5', 'GB2312', - 'ISO-8859-1', 'ISO-8859-2', 'ISO-8859-3', 'ISO-8859-4', - 'ISO-8859-5', 'ISO-8859-6', 'ISO-8859-7', 'ISO-8859-8', 'ISO-8859-9', - 'ISO-8859-10', 'ISO-8859-13', 'ISO-8859-14', 'ISO-8859-15', 'ISO-8859-16', - 'WINDOWS-1252', 'WINDOWS-1251', 'EUC-JP', 'EUC-TW', 'KOI8-R', - 'ISO-2022-KR', 'ISO-2022-JP' - ); - - $result = mb_detect_encoding($string, join(',', $enc)); - - return $result ? $result : $failover; -} - -/** - * Removes non-unicode characters from input - * - * @param mixed $input String or array. - * @return string - */ -function rc_utf8_clean($input) -{ - // handle input of type array - if (is_array($input)) { - foreach ($input as $idx => $val) - $input[$idx] = rc_utf8_clean($val); - return $input; - } - - if (!is_string($input) || $input == '') - return $input; - - // iconv/mbstring are much faster (especially with long strings) - if (function_exists('mb_convert_encoding') && ($res = mb_convert_encoding($input, 'UTF-8', 'UTF-8')) !== false) - return $res; - - if (function_exists('iconv') && ($res = @iconv('UTF-8', 'UTF-8//IGNORE', $input)) !== false) - return $res; - - $regexp = '/^('. -// '[\x00-\x7F]'. // UTF8-1 - '|[\xC2-\xDF][\x80-\xBF]'. // UTF8-2 - '|\xE0[\xA0-\xBF][\x80-\xBF]'. // UTF8-3 - '|[\xE1-\xEC][\x80-\xBF][\x80-\xBF]'. // UTF8-3 - '|\xED[\x80-\x9F][\x80-\xBF]'. // UTF8-3 - '|[\xEE-\xEF][\x80-\xBF][\x80-\xBF]'. // UTF8-3 - '|\xF0[\x90-\xBF][\x80-\xBF][\x80-\xBF]'. // UTF8-4 - '|[\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF]'.// UTF8-4 - '|\xF4[\x80-\x8F][\x80-\xBF][\x80-\xBF]'. // UTF8-4 - ')$/'; - - $seq = ''; - $out = ''; - - for ($i = 0, $len = strlen($input); $i < $len; $i++) { - $chr = $input[$i]; - $ord = ord($chr); - // 1-byte character - if ($ord <= 0x7F) { - if ($seq) - $out .= preg_match($regexp, $seq) ? $seq : ''; - $seq = ''; - $out .= $chr; - // first (or second) byte of multibyte sequence - } else if ($ord >= 0xC0) { - if (strlen($seq)>1) { - $out .= preg_match($regexp, $seq) ? $seq : ''; - $seq = ''; - } else if ($seq && ord($seq) < 0xC0) { - $seq = ''; - } - $seq .= $chr; - // next byte of multibyte sequence - } else if ($seq) { - $seq .= $chr; - } - } - - if ($seq) - $out .= preg_match($regexp, $seq) ? $seq : ''; - - return $out; -} - - -/** - * Convert a variable into a javascript object notation - * - * @param mixed Input value - * @return string Serialized JSON string - */ -function json_serialize($input) -{ - $input = rc_utf8_clean($input); - - // sometimes even using rc_utf8_clean() the input contains invalid UTF-8 sequences - // that's why we have @ here - return @json_encode($input); -} - - -/** * Explode quoted string * * @param string Delimiter expression string for preg_match() |