Move rcube_addressbook::normalize_string() to rcube_utils::normalize_string() for general purpose

author: Thomas Bruederli <thomas@roundcube.net> 2012-05-15 13:48:13 +0200
committer: Thomas Bruederli <thomas@roundcube.net> 2012-05-15 13:48:13 +0200
commit: ceb5b56c3b8db37425338e9c2661c5c4bc4ac069 (patch)
tree: cc28991a9f6aa3b3f55034f3d9ea4d0c97438a43
parent: 92be3ee4a471406bc75b345760584cc317b289fe (diff)
3 files changed, 47 insertions, 23 deletions
diff --git a/program/include/rcube_addressbook.php b/program/include/rcube_addressbook.php
index 2f264f974..8e104ddee 100644
--- a/program/include/rcube_addressbook.php
+++ b/program/include/rcube_addressbook.php
@@ -447,30 +447,13 @@ abstract class rcube_addressbook
      *
      * @param string Input string (UTF-8)
      * @return string Normalized string
+     * @deprecated since 0.9-beta
      */
     protected static function normalize_string($str)
     {
-        // split by words
-        $arr = explode(" ", preg_replace(
-            array('/[\s;\+\-\/]+/i', '/(\d)[-.\s]+(\d)/', '/\s\w{1,3}\s/'),
-            array(' ', '\\1\\2', ' '),
-            $str));
-
-        foreach ($arr as $i => $part) {
-            if (utf8_encode(utf8_decode($part)) == $part) {  // is latin-1 ?
-                $arr[$i] = utf8_encode(strtr(strtolower(strtr(utf8_decode($part),
-                    'ÇçäâàåéêëèïîìÅÉöôòüûùÿøØáíóúñÑÁÂÀãÃÊËÈÍÎÏÓÔõÕÚÛÙýÝ',
-                    'ccaaaaeeeeiiiaeooouuuyooaiounnaaaaaeeeiiioooouuuyy')),
-                    array('ß' => 'ss', 'ae' => 'a', 'oe' => 'o', 'ue' => 'u')));
-            }
-            else
-                $arr[$i] = mb_strtolower($part);
-        }
-
-        return join(" ", $arr);
+        return rcbe_utils::normalize_string($str);
     }
 
-
     /**
      * Compose a valid display name from the given structured contact data
      *
diff --git a/program/include/rcube_contacts.php b/program/include/rcube_contacts.php
index 8834a7dbc..4ebe1b15e 100644
--- a/program/include/rcube_contacts.php
+++ b/program/include/rcube_contacts.php
@@ -313,7 +313,7 @@ class rcube_contacts extends rcube_addressbook
             // fulltext search in all fields
             else if ($col == '*') {
                 $words = array();
-                foreach (explode($WS, self::normalize_string($value)) as $word) {
+                foreach (explode($WS, rcube_utils::normalize_string($value)) as $word) {
                     switch ($mode) {
                     case 1: // strict
                         $words[] = '(' . $this->db->ilike('words', $word . '%')
@@ -352,7 +352,7 @@ class rcube_contacts extends rcube_addressbook
                 // vCard field
                 else {
                     if (in_array($col, $this->fulltext_cols)) {
-                        foreach (explode(" ", self::normalize_string($val)) as $word) {
+                        foreach (rcube_utils::normalize_string($val, true) as $word) {
                             switch ($mode) {
                             case 1: // strict
                                 $words[] = '(' . $this->db->ilike('words', $word . $WS . '%')
@@ -728,9 +728,9 @@ class rcube_contacts extends rcube_addressbook
                 if (isset($value))
                     $vcard->set($field, $value, $section);
                 if ($fulltext && is_array($value))
-                    $words .= ' ' . self::normalize_string(join(" ", $value));
+                    $words .= ' ' . rcube_utils::normalize_string(join(" ", $value));
                 else if ($fulltext && strlen($value) >= 3)
-                    $words .= ' ' . self::normalize_string($value);
+                    $words .= ' ' . rcube_utils::normalize_string($value);
             }
         }
         $out['vcard'] = $vcard->export(false);
diff --git a/program/include/rcube_utils.php b/program/include/rcube_utils.php
index 5b31537fd..c6d4805c8 100644
--- a/program/include/rcube_utils.php
+++ b/program/include/rcube_utils.php
@@ -790,4 +790,45 @@ class rcube_utils
         return $at ? $user . '@' . $domain : $domain;
     }
 
+    /**
+     * Split the given string into word tokens
+     *
+     * @param string Input to tokenize
+     * @return array List of tokens
+     */
+    public static function tokenize_string($str)
+    {
+        return explode(" ", preg_replace(
+            array('/[\s;\/+-]+/i', '/(\d)[-.\s]+(\d)/', '/\s\w{1,3}\s/u'),
+            array(' ', '\\1\\2', ' '),
+            $str));
+    }
+
+    /**
+     * Normalize the given string for fulltext search.
+     * Currently only optimized for Latin-1 characters; to be extended
+     *
+     * @param string  Input string (UTF-8)
+     * @param boolean True to return list of words as array
+     * @return mixed  Normalized string or a list of normalized tokens
+     */
+    public static function normalize_string($str, $as_array = false)
+    {
+        // split by words
+        $arr = self::tokenize_string($str);
+
+        foreach ($arr as $i => $part) {
+            if (utf8_encode(utf8_decode($part)) == $part) {  // is latin-1 ?
+                $arr[$i] = utf8_encode(strtr(strtolower(strtr(utf8_decode($part),
+                    'Ã‡Ã§Ã¤Ã¢Ã Ã¥Ã©ÃªÃ«Ã¨Ã¯Ã®Ã¬Ã…Ã‰Ã¶Ã´Ã²Ã¼Ã»Ã¹Ã¿Ã¸Ã˜Ã¡ÃÃ³ÃºÃ±Ã‘ÃÃ‚Ã€Ã£ÃƒÃŠÃ‹ÃˆÃÃŽÃÃ“Ã”ÃµÃ•ÃšÃ›Ã™Ã½Ã',
+                    'ccaaaaeeeeiiiaeooouuuyooaiounnaaaaaeeeiiioooouuuyy')),
+                    array('ÃŸ' => 'ss', 'ae' => 'a', 'oe' => 'o', 'ue' => 'u')));
+            }
+            else
+                $arr[$i] = mb_strtolower($part);
+        }
+
+        return $as_array ? $arr : join(" ", $arr);
+    }
+
 }
author	Thomas Bruederli <thomas@roundcube.net>	2012-05-15 13:48:13 +0200
committer	Thomas Bruederli <thomas@roundcube.net>	2012-05-15 13:48:13 +0200
commit	ceb5b56c3b8db37425338e9c2661c5c4bc4ac069 (patch)
tree	cc28991a9f6aa3b3f55034f3d9ea4d0c97438a43
parent	92be3ee4a471406bc75b345760584cc317b289fe (diff)