From 12dac4911b91e80cf1d8c85ee8ad1ef191b630cb Mon Sep 17 00:00:00 2001 From: thomascube Date: Tue, 12 Apr 2011 18:01:49 +0000 Subject: Handle unicode strings when normalizing for search (#1487866) --- program/include/rcube_addressbook.php | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) (limited to 'program/include/rcube_addressbook.php') diff --git a/program/include/rcube_addressbook.php b/program/include/rcube_addressbook.php index 9e8254ca4..8ec0abbf2 100644 --- a/program/include/rcube_addressbook.php +++ b/program/include/rcube_addressbook.php @@ -397,14 +397,24 @@ abstract class rcube_addressbook */ protected static function normalize_string($str) { - $norm = strtolower(strtr(utf8_decode($str), - 'ÇçäâàåéêëèïîìÅÉöôòüûùÿøØáíóúñÑÁÂÀãÃÊËÈÍÎÏÓÔõÕÚÛÙýÝ', - 'ccaaaaeeeeiiiaeooouuuyooaiounnaaaaaeeeiiioooouuuyy')); - - return preg_replace( - array('/[\s;\+\-\/]+/i', '/(\d)\s+(\d)/', '/\s\w{1,3}\s/'), + // split by words + $arr = explode(" ", preg_replace( + array('/[\s;\+\-\/]+/i', '/(\d)[-.\s]+(\d)/', '/\s\w{1,3}\s/'), array(' ', '\\1\\2', ' '), - $norm); + $str)); + + foreach ($arr as $i => $part) { + if (utf8_encode(utf8_decode($part)) == $part) { // is latin-1 ? + $arr[$i] = strtr(strtolower(strtr(utf8_decode($part), + 'ÇçäâàåéêëèïîìÅÉöôòüûùÿøØáíóúñÑÁÂÀãÃÊËÈÍÎÏÓÔõÕÚÛÙýÝ', + 'ccaaaaeeeeiiiaeooouuuyooaiounnaaaaaeeeiiioooouuuyy')), + array('ß' => 'ss', 'ae' => 'a', 'oe' => 'o', 'ue' => 'u')); + } + else + $arr[$i] = strtolower($part); + } + + return join(" ", $arr); } } -- cgit v1.2.3