summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Bruederli <thomas@roundcube.net>2012-05-15 13:48:13 +0200
committerThomas Bruederli <thomas@roundcube.net>2012-05-15 13:48:13 +0200
commitceb5b56c3b8db37425338e9c2661c5c4bc4ac069 (patch)
treecc28991a9f6aa3b3f55034f3d9ea4d0c97438a43
parent92be3ee4a471406bc75b345760584cc317b289fe (diff)
Move rcube_addressbook::normalize_string() to rcube_utils::normalize_string() for general purpose
-rw-r--r--program/include/rcube_addressbook.php21
-rw-r--r--program/include/rcube_contacts.php8
-rw-r--r--program/include/rcube_utils.php41
3 files changed, 47 insertions, 23 deletions
diff --git a/program/include/rcube_addressbook.php b/program/include/rcube_addressbook.php
index 2f264f974..8e104ddee 100644
--- a/program/include/rcube_addressbook.php
+++ b/program/include/rcube_addressbook.php
@@ -447,30 +447,13 @@ abstract class rcube_addressbook
*
* @param string Input string (UTF-8)
* @return string Normalized string
+ * @deprecated since 0.9-beta
*/
protected static function normalize_string($str)
{
- // split by words
- $arr = explode(" ", preg_replace(
- array('/[\s;\+\-\/]+/i', '/(\d)[-.\s]+(\d)/', '/\s\w{1,3}\s/'),
- array(' ', '\\1\\2', ' '),
- $str));
-
- foreach ($arr as $i => $part) {
- if (utf8_encode(utf8_decode($part)) == $part) { // is latin-1 ?
- $arr[$i] = utf8_encode(strtr(strtolower(strtr(utf8_decode($part),
- '',
- 'ccaaaaeeeeiiiaeooouuuyooaiounnaaaaaeeeiiioooouuuyy')),
- array('' => 'ss', 'ae' => 'a', 'oe' => 'o', 'ue' => 'u')));
- }
- else
- $arr[$i] = mb_strtolower($part);
- }
-
- return join(" ", $arr);
+ return rcbe_utils::normalize_string($str);
}
-
/**
* Compose a valid display name from the given structured contact data
*
diff --git a/program/include/rcube_contacts.php b/program/include/rcube_contacts.php
index 8834a7dbc..4ebe1b15e 100644
--- a/program/include/rcube_contacts.php
+++ b/program/include/rcube_contacts.php
@@ -313,7 +313,7 @@ class rcube_contacts extends rcube_addressbook
// fulltext search in all fields
else if ($col == '*') {
$words = array();
- foreach (explode($WS, self::normalize_string($value)) as $word) {
+ foreach (explode($WS, rcube_utils::normalize_string($value)) as $word) {
switch ($mode) {
case 1: // strict
$words[] = '(' . $this->db->ilike('words', $word . '%')
@@ -352,7 +352,7 @@ class rcube_contacts extends rcube_addressbook
// vCard field
else {
if (in_array($col, $this->fulltext_cols)) {
- foreach (explode(" ", self::normalize_string($val)) as $word) {
+ foreach (rcube_utils::normalize_string($val, true) as $word) {
switch ($mode) {
case 1: // strict
$words[] = '(' . $this->db->ilike('words', $word . $WS . '%')
@@ -728,9 +728,9 @@ class rcube_contacts extends rcube_addressbook
if (isset($value))
$vcard->set($field, $value, $section);
if ($fulltext && is_array($value))
- $words .= ' ' . self::normalize_string(join(" ", $value));
+ $words .= ' ' . rcube_utils::normalize_string(join(" ", $value));
else if ($fulltext && strlen($value) >= 3)
- $words .= ' ' . self::normalize_string($value);
+ $words .= ' ' . rcube_utils::normalize_string($value);
}
}
$out['vcard'] = $vcard->export(false);
diff --git a/program/include/rcube_utils.php b/program/include/rcube_utils.php
index 5b31537fd..c6d4805c8 100644
--- a/program/include/rcube_utils.php
+++ b/program/include/rcube_utils.php
@@ -790,4 +790,45 @@ class rcube_utils
return $at ? $user . '@' . $domain : $domain;
}
+ /**
+ * Split the given string into word tokens
+ *
+ * @param string Input to tokenize
+ * @return array List of tokens
+ */
+ public static function tokenize_string($str)
+ {
+ return explode(" ", preg_replace(
+ array('/[\s;\/+-]+/i', '/(\d)[-.\s]+(\d)/', '/\s\w{1,3}\s/u'),
+ array(' ', '\\1\\2', ' '),
+ $str));
+ }
+
+ /**
+ * Normalize the given string for fulltext search.
+ * Currently only optimized for Latin-1 characters; to be extended
+ *
+ * @param string Input string (UTF-8)
+ * @param boolean True to return list of words as array
+ * @return mixed Normalized string or a list of normalized tokens
+ */
+ public static function normalize_string($str, $as_array = false)
+ {
+ // split by words
+ $arr = self::tokenize_string($str);
+
+ foreach ($arr as $i => $part) {
+ if (utf8_encode(utf8_decode($part)) == $part) { // is latin-1 ?
+ $arr[$i] = utf8_encode(strtr(strtolower(strtr(utf8_decode($part),
+ 'ÇçäâàåéêëèïîìÅÉöôòüûùÿøØáíóúñÑÁÂÀãÃÊËÈÍÎÏÓÔõÕÚÛÙýÝ',
+ 'ccaaaaeeeeiiiaeooouuuyooaiounnaaaaaeeeiiioooouuuyy')),
+ array('ß' => 'ss', 'ae' => 'a', 'oe' => 'o', 'ue' => 'u')));
+ }
+ else
+ $arr[$i] = mb_strtolower($part);
+ }
+
+ return $as_array ? $arr : join(" ", $arr);
+ }
+
}