1 files changed, 41 insertions, 0 deletions
diff --git a/program/include/rcube_utils.php b/program/include/rcube_utils.php
index 5b31537fd..c6d4805c8 100644
--- a/program/include/rcube_utils.php
+++ b/program/include/rcube_utils.php
@@ -790,4 +790,45 @@ class rcube_utils
         return $at ? $user . '@' . $domain : $domain;
     }
 
+    /**
+     * Split the given string into word tokens
+     *
+     * @param string Input to tokenize
+     * @return array List of tokens
+     */
+    public static function tokenize_string($str)
+    {
+        return explode(" ", preg_replace(
+            array('/[\s;\/+-]+/i', '/(\d)[-.\s]+(\d)/', '/\s\w{1,3}\s/u'),
+            array(' ', '\\1\\2', ' '),
+            $str));
+    }
+
+    /**
+     * Normalize the given string for fulltext search.
+     * Currently only optimized for Latin-1 characters; to be extended
+     *
+     * @param string  Input string (UTF-8)
+     * @param boolean True to return list of words as array
+     * @return mixed  Normalized string or a list of normalized tokens
+     */
+    public static function normalize_string($str, $as_array = false)
+    {
+        // split by words
+        $arr = self::tokenize_string($str);
+
+        foreach ($arr as $i => $part) {
+            if (utf8_encode(utf8_decode($part)) == $part) {  // is latin-1 ?
+                $arr[$i] = utf8_encode(strtr(strtolower(strtr(utf8_decode($part),
+                    'ÇçäâàåéêëèïîìÅÉöôòüûùÿøØáíóúñÑÁÂÀãÃÊËÈÍÎÏÓÔõÕÚÛÙýÝ',
+                    'ccaaaaeeeeiiiaeooouuuyooaiounnaaaaaeeeiiioooouuuyy')),
+                    array('ß' => 'ss', 'ae' => 'a', 'oe' => 'o', 'ue' => 'u')));
+            }
+            else
+                $arr[$i] = mb_strtolower($part);
+        }
+
+        return $as_array ? $arr : join(" ", $arr);
+    }
+
 }