diff options
Diffstat (limited to 'program/lib/utf8.class.php')
-rw-r--r-- | program/lib/utf8.class.php | 171 |
1 files changed, 0 insertions, 171 deletions
diff --git a/program/lib/utf8.class.php b/program/lib/utf8.class.php deleted file mode 100644 index 0446159c7..000000000 --- a/program/lib/utf8.class.php +++ /dev/null @@ -1,171 +0,0 @@ -<?php -/* -utf8 1.0 -Copyright: Left ---------------------------------------------------------------------------------- -Version: 1.0 -Date: 23 November 2004 ---------------------------------------------------------------------------------- -Author: Alexander Minkovsky (a_minkovsky@hotmail.com) ---------------------------------------------------------------------------------- -License: Choose the more appropriated for You - I don't care. ---------------------------------------------------------------------------------- -Description: - Class provides functionality to convert single byte strings, such as CP1251 - ti UTF-8 multibyte format and vice versa. - Class loads a concrete charset map, for example CP1251. - (Refer to ftp://ftp.unicode.org/Public/MAPPINGS/ for map files) - Directory containing MAP files is predefined as constant. - Each charset is also predefined as constant pointing to the MAP file. ---------------------------------------------------------------------------------- -Example usage: - Pass the desired charset in the class constructor: - $utfConverter = new utf8(CP1251); //defaults to CP1250. - or load the charset MAP using loadCharset method like this: - $utfConverter->loadCharset(CP1252); - Then call - $res = $utfConverter->strToUtf8($str); - or - $res = $utfConverter->utf8ToStr($utf); - to get the needed encoding. ---------------------------------------------------------------------------------- -Note: - Rewrite or Override the onError method if needed. It's the error handler used from everywhere and takes 2 parameters: - err_code and err_text. By default it just prints out a message about the error. -*/ - -// Charset maps -// Adapted to fit Roundcube -define("UTF8_MAP_DIR", "program/lib/encoding"); - -//Error constants -define("ERR_OPEN_MAP_FILE", "ERR_OPEN_MAP_FILE"); - -//Class definition -Class utf8 { - - var $charset = "ISO-8859-1"; - var $ascMap = array(); - var $utfMap = array(); - var $aliases = array( - 'KOI8-R' => 'KOI8R' - ); - var $error = null; - - function __construct($charset="ISO-8859-1") { - $this->loadCharset($charset); - } - - //Load charset - function loadCharset($charset) { - - $charset = preg_replace(array('/^WINDOWS-*125([0-8])$/', '/^CP-/'), array('CP125\\1', 'CP'), $charset); - if (isset($this->aliases[$charset])) - $charset = $this->aliases[$charset]; - - $this->charset = $charset; - - if (empty($this->ascMap[$charset])) - { - $file = UTF8_MAP_DIR.'/'.$charset.'.map'; - - if (!is_file($file)) { - $this->onError(ERR_OPEN_MAP_FILE, "Failed to open map file for $charset"); - return; - } - - $lines = file_get_contents($file); - $lines = preg_replace("/#.*$/m","",$lines); - $lines = preg_replace("/\n\n/","",$lines); - $lines = explode("\n",$lines); - - foreach($lines as $line){ - $parts = explode('0x',$line); - if(count($parts)==3){ - $asc=hexdec(substr($parts[1],0,2)); - $utf=hexdec(substr($parts[2],0,4)); - $this->ascMap[$charset][$asc]=$utf; - } - } - - $this->utfMap = array_flip($this->ascMap[$charset]); - } - } - - //Error handler - function onError($err_code,$err_text){ - $this->error = $err_text; - return null; - } - - //Translate string ($str) to UTF-8 from given charset - function strToUtf8($str){ - if (empty($this->ascMap[$this->charset])) - return null; - - $chars = unpack('C*', $str); - $cnt = count($chars); - for($i=1; $i<=$cnt; $i++) - $this->_charToUtf8($chars[$i]); - - return implode("",$chars); - } - - //Translate UTF-8 string to single byte string in the given charset - function utf8ToStr($utf){ - if (empty($this->ascMap[$this->charset])) - return null; - - $chars = unpack('C*', $utf); - $cnt = count($chars); - $res = ""; //No simple way to do it in place... concatenate char by char - - for ($i=1; $i<=$cnt; $i++) - $res .= $this->_utf8ToChar($chars, $i); - - return $res; - } - - //Char to UTF-8 sequence - function _charToUtf8(&$char){ - $c = (int)$this->ascMap[$this->charset][$char]; - if ($c < 0x80){ - $char = chr($c); - } - else if($c<0x800) // 2 bytes - $char = (chr(0xC0 | $c>>6) . chr(0x80 | $c & 0x3F)); - else if($c<0x10000) // 3 bytes - $char = (chr(0xE0 | $c>>12) . chr(0x80 | $c>>6 & 0x3F) . chr(0x80 | $c & 0x3F)); - else if($c<0x200000) // 4 bytes - $char = (chr(0xF0 | $c>>18) . chr(0x80 | $c>>12 & 0x3F) . chr(0x80 | $c>>6 & 0x3F) . chr(0x80 | $c & 0x3F)); - } - - //UTF-8 sequence to single byte character - function _utf8ToChar(&$chars, &$idx){ - if(($chars[$idx] >= 240) && ($chars[$idx] <= 255)){ // 4 bytes - $utf = (intval($chars[$idx]-240) << 18) + - (intval($chars[++$idx]-128) << 12) + - (intval($chars[++$idx]-128) << 6) + - (intval($chars[++$idx]-128) << 0); - } - else if (($chars[$idx] >= 224) && ($chars[$idx] <= 239)){ // 3 bytes - $utf = (intval($chars[$idx]-224) << 12) + - (intval($chars[++$idx]-128) << 6) + - (intval($chars[++$idx]-128) << 0); - } - else if (($chars[$idx] >= 192) && ($chars[$idx] <= 223)){ // 2 bytes - $utf = (intval($chars[$idx]-192) << 6) + - (intval($chars[++$idx]-128) << 0); - } - else{ // 1 byte - $utf = $chars[$idx]; - } - if(array_key_exists($utf,$this->utfMap)) - return chr($this->utfMap[$utf]); - else - return "?"; - } - -} - -?> |