From ce72e0125cf4188b8e7018672739641f30ba88fe Mon Sep 17 00:00:00 2001 From: alecpl Date: Thu, 9 Jul 2009 21:02:34 +0000 Subject: - simplify 'utf8' class use, make rcube_charset_convert() 5x faster on systems without mbstring and iconv installed --- program/include/main.inc | 60 ++++++++++++++++----------------- program/lib/utf8.class.php | 82 +++++++++++++++++++++------------------------- 2 files changed, 66 insertions(+), 76 deletions(-) diff --git a/program/include/main.inc b/program/include/main.inc index ac2906d4a..296e13fad 100644 --- a/program/include/main.inc +++ b/program/include/main.inc @@ -183,9 +183,9 @@ function rcube_charset_convert($str, $from, $to=NULL) static $mbstring_loaded = null; static $mbstring_list = null; static $convert_warning = false; - + static $conv = null; + $error = false; - $conv = null; $to = empty($to) ? $to = strtoupper(RCMAIL_CHARSET) : rcube_parse_charset($to); $from = rcube_parse_charset($from); @@ -223,34 +223,29 @@ function rcube_charset_convert($str, $from, $to=NULL) } } - # try to convert with custom classes - if (class_exists('utf8')) - $conv = new utf8(); - - // convert string to UTF-8 + // convert charset using bundled classes/functions if ($to == 'UTF-8') { if ($from == 'UTF7-IMAP') { if ($_str = utf7_to_utf8($str)) - $str = $_str; - else - $error = true; + return $_str; } else if ($from == 'UTF-7') { if ($_str = rcube_utf7_to_utf8($str)) - $str = $_str; - else - $error = true; + return $_str; } else if (($from == 'ISO-8859-1') && function_exists('utf8_encode')) { - $str = utf8_encode($str); + return utf8_encode($str); } - else if ($from != 'UTF-8' && $conv) { - $from = preg_replace(array('/^WINDOWS-*125([0-8])$/', '/^CP-/'), array('CP125\\1', 'CP'), $from); - $conv->loadCharset($from); - $str = $conv->strToUtf8($str); + else if (class_exists('utf8')) { + if (!$conv) + $conv = new utf8($from); + else + $conv->loadCharset($from); + + if($_str = $conv->strToUtf8($str)) + return $_str; } - else if ($from != 'UTF-8') - $error = true; + $error = true; } // encode string for output @@ -258,36 +253,37 @@ function rcube_charset_convert($str, $from, $to=NULL) // @TODO: we need a function for UTF-7 (RFC2152) conversion if ($to == 'UTF7-IMAP' || $to == 'UTF-7') { if ($_str = utf8_to_utf7($str)) - $str = $_str; - else - $error = true; + return $_str; } else if ($to == 'ISO-8859-1' && function_exists('utf8_decode')) { return utf8_decode($str); } - else if ($to != 'UTF-8' && $conv) { - $to = preg_replace(array('/^WINDOWS-*125([0-8])$/', '/^CP-/'), array('CP125\\1', 'CP'), $to); - $conv->loadCharset($to); - return $conv->utf8ToStr($str); - } - else if ($to != 'UTF-8') { - $error = true; + else if (class_exists('utf8')) { + if (!$conv) + $conv = new utf8($to); + else + $conv->loadCharset($from); + + if ($_str = $conv->strToUtf8($str)) + return $_str; } + $error = true; } // report error - if ($error && !$convert_warning){ + if ($error && !$convert_warning) { raise_error(array( 'code' => 500, 'type' => 'php', 'file' => __FILE__, + 'line' => __LINE__, 'message' => "Could not convert string from $from to $to. Make sure iconv/mbstring is installed or lib/utf8.class is available." ), true, false); $convert_warning = true; } - // return UTF-8 string + // return UTF-8 or original string return $str; } diff --git a/program/lib/utf8.class.php b/program/lib/utf8.class.php index 2bbe63663..9f718d52d 100644 --- a/program/lib/utf8.class.php +++ b/program/lib/utf8.class.php @@ -37,59 +37,48 @@ Note: // Charset maps // Adapted to fit RoundCube define("UTF8_MAP_DIR", "program/lib/encoding"); -$utf8_maps = array( - "CP1250" => UTF8_MAP_DIR . "/CP1250.map", - "CP1251" => UTF8_MAP_DIR . "/CP1251.map", - "CP1252" => UTF8_MAP_DIR . "/CP1252.map", - "CP1253" => UTF8_MAP_DIR . "/CP1253.map", - "CP1254" => UTF8_MAP_DIR . "/CP1254.map", - "CP1255" => UTF8_MAP_DIR . "/CP1255.map", - "CP1256" => UTF8_MAP_DIR . "/CP1256.map", - "CP1257" => UTF8_MAP_DIR . "/CP1257.map", - "CP1258" => UTF8_MAP_DIR . "/CP1258.map", - "ISO-8859-1" => UTF8_MAP_DIR . "/ISO-8859-1.map", - "ISO-8859-2" => UTF8_MAP_DIR . "/ISO-8859-2.map", - "ISO-8859-3" => UTF8_MAP_DIR . "/ISO-8859-3.map", - "ISO-8859-4" => UTF8_MAP_DIR . "/ISO-8859-4.map", - "ISO-8859-5" => UTF8_MAP_DIR . "/ISO-8859-5.map", - "ISO-8859-6" => UTF8_MAP_DIR . "/ISO-8859-6.map", - "ISO-8859-7" => UTF8_MAP_DIR . "/ISO-8859-7.map", - "ISO-8859-8" => UTF8_MAP_DIR . "/ISO-8859-8.map", - "ISO-8859-9" => UTF8_MAP_DIR . "/ISO-8859-9.map", - "KOI8-R" => UTF8_MAP_DIR . "/KOI8R.map", - "KOI8R" => UTF8_MAP_DIR . "/KOI8R.map" - ); //Error constants -define("ERR_OPEN_MAP_FILE","ERR_OPEN_MAP_FILE"); +define("ERR_OPEN_MAP_FILE", "ERR_OPEN_MAP_FILE"); //Class definition -Class utf8{ +Class utf8 { var $charset = "ISO-8859-1"; var $ascMap = array(); var $utfMap = array(); + var $aliases = array( + 'KOI8-R' => 'KOI8R' + ); + var $error = null; - function __construct($charset="ISO-8859-1"){ + function __construct($charset="ISO-8859-1") { $this->loadCharset($charset); } //Load charset - function loadCharset($charset){ - global $utf8_maps; + function loadCharset($charset) { + + $charset = preg_replace(array('/^WINDOWS-*125([0-8])$/', '/^CP-/'), array('CP125\\1', 'CP'), $charset); + if (isset($aliases[$charset])) + $charset = $aliases[$charset]; + + $this->charset = $charset; - if (!is_file($utf8_maps[$charset])) + if (empty($this->ascMap[$charset])) { - $this->onError(ERR_OPEN_MAP_FILE, "Failed to open map file for $charset"); - return; + $file = UTF8_MAP_DIR.'/'.$charset.'.map'; + + if (!is_file($file)) { + $this->onError(ERR_OPEN_MAP_FILE, "Failed to open map file for $charset"); + return; } - if (empty($this->ascMap[$charset])) - { - $lines = file_get_contents($utf8_maps[$charset]); + $lines = file_get_contents($file); $lines = preg_replace("/#.*$/m","",$lines); $lines = preg_replace("/\n\n/","",$lines); $lines = explode("\n",$lines); + foreach($lines as $line){ $parts = explode('0x',$line); if(count($parts)==3){ @@ -98,37 +87,42 @@ Class utf8{ $this->ascMap[$charset][$asc]=$utf; } } + + $this->utfMap = array_flip($this->ascMap[$charset]); } - - $this->charset = $charset; - $this->utfMap = array_flip($this->ascMap[$charset]); } //Error handler function onError($err_code,$err_text){ - //print($err_code . " : " . $err_text . "
\n"); - raise_error(array('code' => 500, - 'type' => 'php', - 'file' => __FILE__, - 'message' => $err_text), TRUE, FALSE); + $this->error = $err_text; + return null; } //Translate string ($str) to UTF-8 from given charset function strToUtf8($str){ + if (empty($this->ascMap[$this->charset])) + return null; + $chars = unpack('C*', $str); $cnt = count($chars); - for($i=1;$i<=$cnt;$i++) $this->_charToUtf8($chars[$i]); + for($i=1; $i<=$cnt; $i++) + $this->_charToUtf8($chars[$i]); + return implode("",$chars); } //Translate UTF-8 string to single byte string in the given charset function utf8ToStr($utf){ + if (empty($this->ascMap[$this->charset])) + return null; + $chars = unpack('C*', $utf); $cnt = count($chars); $res = ""; //No simple way to do it in place... concatenate char by char - for ($i=1;$i<=$cnt;$i++){ + + for ($i=1; $i<=$cnt; $i++) $res .= $this->_utf8ToChar($chars, $i); - } + return $res; } -- cgit v1.2.3