summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAleksander Machniak <alec@alec.pl>2014-05-27 11:39:31 +0200
committerAleksander Machniak <alec@alec.pl>2014-05-27 14:59:03 +0200
commitdbd5c184714a14b9944d4f4654fca6c94b81d0de (patch)
tree38fea99b8069eec0356e5723b55e8ae1bb285047
parent4f3f85da39b1ec0ad092a1f7e6a27277588cc689 (diff)
Fix broken normalize_string(), add support for ISO-8859-2
-rw-r--r--program/lib/Roundcube/rcube_utils.php34
-rw-r--r--tests/Framework/Utils.php3
2 files changed, 27 insertions, 10 deletions
diff --git a/program/lib/Roundcube/rcube_utils.php b/program/lib/Roundcube/rcube_utils.php
index c2009cee0..00999ba50 100644
--- a/program/lib/Roundcube/rcube_utils.php
+++ b/program/lib/Roundcube/rcube_utils.php
@@ -928,7 +928,7 @@ class rcube_utils
/**
* Normalize the given string for fulltext search.
- * Currently only optimized for Latin-1 characters; to be extended
+ * Currently only optimized for ISO-8859-1 and ISO-8859-2 characters; to be extended
*
* @param string Input string (UTF-8)
* @param boolean True to return list of words as array
@@ -949,15 +949,32 @@ class rcube_utils
// split by words
$arr = self::tokenize_string($str);
+ // detect character set
+ if (utf8_encode(utf8_decode($str)) == $str) {
+ // ISO-8859-1 (or ASCII)
+ preg_match_all('/./u', 'äâàåáãæçéêëèïîìíñöôòøõóüûùúýÿ', $keys);
+ preg_match_all('/./', 'aaaaaaaceeeeiiiinoooooouuuuyy', $values);
+
+ $mapping = array_combine($keys[0], $values[0]);
+ $mapping = array_merge($mapping, array('ß' => 'ss', 'ae' => 'a', 'oe' => 'o', 'ue' => 'u'));
+ }
+ else if (rcube_charset::convert(rcube_charset::convert($str, 'UTF-8', 'ISO-8859-2'), 'ISO-8859-2', 'UTF-8') == $str) {
+ // ISO-8859-2
+ preg_match_all('/./u', 'ąáâäćçčéęëěíîłľĺńňóôöŕřśšşťţůúűüźžżý', $keys);
+ preg_match_all('/./', 'aaaaccceeeeiilllnnooorrsssttuuuuzzzy', $values);
+
+ $mapping = array_combine($keys[0], $values[0]);
+ $mapping = array_merge($mapping, array('ß' => 'ss', 'ae' => 'a', 'oe' => 'o', 'ue' => 'u'));
+ }
+
foreach ($arr as $i => $part) {
- if (utf8_encode(utf8_decode($part)) == $part) { // is latin-1 ?
- $arr[$i] = utf8_encode(strtr(strtolower(strtr(utf8_decode($part),
- 'ÇçäâàåéêëèïîìÅÉöôòüûùÿøØáíóúñÑÁÂÀãÃÊËÈÍÎÏÓÔõÕÚÛÙýÝ',
- 'ccaaaaeeeeiiiaeooouuuyooaiounnaaaaaeeeiiioooouuuyy')),
- array('ß' => 'ss', 'ae' => 'a', 'oe' => 'o', 'ue' => 'u')));
+ $part = mb_strtolower($part);
+
+ if (!empty($mapping)) {
+ $part = strtr($part, $mapping);
}
- else
- $arr[$i] = mb_strtolower($part);
+
+ $arr[$i] = $part;
}
return $as_array ? $arr : join(" ", $arr);
@@ -1039,7 +1056,6 @@ class rcube_utils
}
}
-
/**
* Find out if the string content means true or false
*
diff --git a/tests/Framework/Utils.php b/tests/Framework/Utils.php
index 82b8ebb73..560a8bde7 100644
--- a/tests/Framework/Utils.php
+++ b/tests/Framework/Utils.php
@@ -327,7 +327,8 @@ class Framework_Utils extends PHPUnit_Framework_TestCase
$test = array(
'' => '',
'abc def' => 'abc def',
- 'ÇçäâàåéêëèïîìÅÉöôòüûùÿøØáíóúñÑÁÂÀãÃÊËÈÍÎÏÓÔõÕÚÛÙýÝ' => 'ccaaaaeeeeiiiaeooouuuyooaiounnaaaaaeeeiiioooouuuyy',
+ 'ÇçäâàåæéêëèïîìÅÉöôòüûùÿøØáíóúñÑÁÂÀãÃÊËÈÍÎÏÓÔõÕÚÛÙýÝ' => 'ccaaaaaeeeeiiiaeooouuuyooaiounnaaaaaeeeiiioooouuuyy',
+ 'ąáâäćçčéęëěíîłľĺńňóôöŕřśšşťţůúűüźžżýĄŚŻŹĆ' => 'aaaaccceeeeiilllnnooorrsssttuuuuzzzyaszzc',
'ß' => 'ss',
'ae' => 'a',
'oe' => 'o',