From ceb5b56c3b8db37425338e9c2661c5c4bc4ac069 Mon Sep 17 00:00:00 2001 From: Thomas Bruederli Date: Tue, 15 May 2012 13:48:13 +0200 Subject: Move rcube_addressbook::normalize_string() to rcube_utils::normalize_string() for general purpose --- program/include/rcube_addressbook.php | 21 ++---------------- program/include/rcube_contacts.php | 8 +++---- program/include/rcube_utils.php | 41 +++++++++++++++++++++++++++++++++++ 3 files changed, 47 insertions(+), 23 deletions(-) (limited to 'program') diff --git a/program/include/rcube_addressbook.php b/program/include/rcube_addressbook.php index 2f264f974..8e104ddee 100644 --- a/program/include/rcube_addressbook.php +++ b/program/include/rcube_addressbook.php @@ -447,30 +447,13 @@ abstract class rcube_addressbook * * @param string Input string (UTF-8) * @return string Normalized string + * @deprecated since 0.9-beta */ protected static function normalize_string($str) { - // split by words - $arr = explode(" ", preg_replace( - array('/[\s;\+\-\/]+/i', '/(\d)[-.\s]+(\d)/', '/\s\w{1,3}\s/'), - array(' ', '\\1\\2', ' '), - $str)); - - foreach ($arr as $i => $part) { - if (utf8_encode(utf8_decode($part)) == $part) { // is latin-1 ? - $arr[$i] = utf8_encode(strtr(strtolower(strtr(utf8_decode($part), - '', - 'ccaaaaeeeeiiiaeooouuuyooaiounnaaaaaeeeiiioooouuuyy')), - array('' => 'ss', 'ae' => 'a', 'oe' => 'o', 'ue' => 'u'))); - } - else - $arr[$i] = mb_strtolower($part); - } - - return join(" ", $arr); + return rcbe_utils::normalize_string($str); } - /** * Compose a valid display name from the given structured contact data * diff --git a/program/include/rcube_contacts.php b/program/include/rcube_contacts.php index 8834a7dbc..4ebe1b15e 100644 --- a/program/include/rcube_contacts.php +++ b/program/include/rcube_contacts.php @@ -313,7 +313,7 @@ class rcube_contacts extends rcube_addressbook // fulltext search in all fields else if ($col == '*') { $words = array(); - foreach (explode($WS, self::normalize_string($value)) as $word) { + foreach (explode($WS, rcube_utils::normalize_string($value)) as $word) { switch ($mode) { case 1: // strict $words[] = '(' . $this->db->ilike('words', $word . '%') @@ -352,7 +352,7 @@ class rcube_contacts extends rcube_addressbook // vCard field else { if (in_array($col, $this->fulltext_cols)) { - foreach (explode(" ", self::normalize_string($val)) as $word) { + foreach (rcube_utils::normalize_string($val, true) as $word) { switch ($mode) { case 1: // strict $words[] = '(' . $this->db->ilike('words', $word . $WS . '%') @@ -728,9 +728,9 @@ class rcube_contacts extends rcube_addressbook if (isset($value)) $vcard->set($field, $value, $section); if ($fulltext && is_array($value)) - $words .= ' ' . self::normalize_string(join(" ", $value)); + $words .= ' ' . rcube_utils::normalize_string(join(" ", $value)); else if ($fulltext && strlen($value) >= 3) - $words .= ' ' . self::normalize_string($value); + $words .= ' ' . rcube_utils::normalize_string($value); } } $out['vcard'] = $vcard->export(false); diff --git a/program/include/rcube_utils.php b/program/include/rcube_utils.php index 5b31537fd..c6d4805c8 100644 --- a/program/include/rcube_utils.php +++ b/program/include/rcube_utils.php @@ -790,4 +790,45 @@ class rcube_utils return $at ? $user . '@' . $domain : $domain; } + /** + * Split the given string into word tokens + * + * @param string Input to tokenize + * @return array List of tokens + */ + public static function tokenize_string($str) + { + return explode(" ", preg_replace( + array('/[\s;\/+-]+/i', '/(\d)[-.\s]+(\d)/', '/\s\w{1,3}\s/u'), + array(' ', '\\1\\2', ' '), + $str)); + } + + /** + * Normalize the given string for fulltext search. + * Currently only optimized for Latin-1 characters; to be extended + * + * @param string Input string (UTF-8) + * @param boolean True to return list of words as array + * @return mixed Normalized string or a list of normalized tokens + */ + public static function normalize_string($str, $as_array = false) + { + // split by words + $arr = self::tokenize_string($str); + + foreach ($arr as $i => $part) { + if (utf8_encode(utf8_decode($part)) == $part) { // is latin-1 ? + $arr[$i] = utf8_encode(strtr(strtolower(strtr(utf8_decode($part), + 'ÇçäâàåéêëèïîìÅÉöôòüûùÿøØáíóúñÑÁÂÀãÃÊËÈÍÎÏÓÔõÕÚÛÙýÝ', + 'ccaaaaeeeeiiiaeooouuuyooaiounnaaaaaeeeiiioooouuuyy')), + array('ß' => 'ss', 'ae' => 'a', 'oe' => 'o', 'ue' => 'u'))); + } + else + $arr[$i] = mb_strtolower($part); + } + + return $as_array ? $arr : join(" ", $arr); + } + } -- cgit v1.2.3