Initial revision

author: thomascube <thomas@roundcube.net> 2005-09-25 14:18:03 +0000
committer: thomascube <thomas@roundcube.net> 2005-09-25 14:18:03 +0000
commit: 4e17e6c9dbac8991ee8b302cb2581241247dc8bc (patch)
tree: d877546f6bd334b041734498e81f6299e005b01c /program/lib/utf8.inc
1 files changed, 102 insertions, 0 deletions
diff --git a/program/lib/utf8.inc b/program/lib/utf8.inc
new file mode 100644
index 000000000..72a96b4e9
--- /dev/null
+++ b/program/lib/utf8.inc
@@ -0,0 +1,102 @@
+<?php
+/////////////////////////////
+//	utf8.inc
+//	(C)2002 Ryo Chijiiwa <Ryo@IlohaMail.org>
+//		
+//		Description:
+//				UTF-8 handling functions
+//
+//	This file is part of IlohaMail. IlohaMail is free software released 
+//	under the GPL license.  See enclosed file COPYING for details, or 
+//	see http://www.fsf.org/copyleft/gpl.html
+////////////////////////////
+
+/**
+* takes a string of utf-8 encoded characters and converts it to a string of unicode entities
+* each unicode entitiy has the form &#nnnnn; n={0..9} and can be displayed by utf-8 supporting
+* browsers
+* @param $source string encoded using utf-8 [STRING]
+* @return string of unicode entities [STRING]
+* @access public
+*/
+/**
+* Author: ronen at greyzone dot com
+* Taken from php.net comment:
+*	http://www.php.net/manual/en/function.utf8-decode.php
+**/
+function utf8ToUnicodeEntities ($source) {
+	// array used to figure what number to decrement from character order value
+	// according to number of characters used to map unicode to ascii by utf-8
+	$decrement[4] = 240;
+	$decrement[3] = 224;
+	$decrement[2] = 192;
+	$decrement[1] = 0;
+
+	// the number of bits to shift each charNum by
+	$shift[1][0] = 0;
+	$shift[2][0] = 6;
+	$shift[2][1] = 0;
+	$shift[3][0] = 12;
+	$shift[3][1] = 6;
+	$shift[3][2] = 0;
+	$shift[4][0] = 18;
+	$shift[4][1] = 12;
+	$shift[4][2] = 6;
+	$shift[4][3] = 0;
+
+	$pos = 0;
+	$len = strlen ($source);
+	$encodedString = '';
+	while ($pos < $len) {
+		$asciiPos = ord (substr ($source, $pos, 1));
+		if (($asciiPos >= 240) && ($asciiPos <= 255)) {
+			// 4 chars representing one unicode character
+			$thisLetter = substr ($source, $pos, 4);
+			$pos += 4;
+		}
+		else if (($asciiPos >= 224) && ($asciiPos <= 239)) {
+			// 3 chars representing one unicode character
+			$thisLetter = substr ($source, $pos, 3);
+			$pos += 3;
+		}
+		else if (($asciiPos >= 192) && ($asciiPos <= 223)) {
+			// 2 chars representing one unicode character
+			$thisLetter = substr ($source, $pos, 2);
+			$pos += 2;
+		}
+		else {
+			// 1 char (lower ascii)
+			$thisLetter = substr ($source, $pos, 1);
+			$pos += 1;
+		}
+
+		// process the string representing the letter to a unicode entity
+		$thisLen = strlen ($thisLetter);
+		$thisPos = 0;
+		$decimalCode = 0;
+		while ($thisPos < $thisLen) {
+			$thisCharOrd = ord (substr ($thisLetter, $thisPos, 1));
+			if ($thisPos == 0) {
+				$charNum = intval ($thisCharOrd - $decrement[$thisLen]);
+				$decimalCode += ($charNum << $shift[$thisLen][$thisPos]);
+			}
+			else {
+				$charNum = intval ($thisCharOrd - 128);
+				$decimalCode += ($charNum << $shift[$thisLen][$thisPos]);
+			}
+
+			$thisPos++;
+		}
+
+		if ($thisLen == 1)
+			$encodedLetter = "&#". str_pad($decimalCode, 3, "0", STR_PAD_LEFT) . ';';
+		else
+			$encodedLetter = "&#". str_pad($decimalCode, 5, "0", STR_PAD_LEFT) . ';';
+
+		$encodedString .= $encodedLetter;
+	}
+
+	return $encodedString;
+}
+
+?>
+\ No newline at end of file
author	thomascube <thomas@roundcube.net>	2005-09-25 14:18:03 +0000
committer	thomascube <thomas@roundcube.net>	2005-09-25 14:18:03 +0000
commit	4e17e6c9dbac8991ee8b302cb2581241247dc8bc (patch)
tree	d877546f6bd334b041734498e81f6299e005b01c /program/lib/utf8.inc