From 3e2637351da9559a4aa420004ac90e9fe30477ef Mon Sep 17 00:00:00 2001 From: thomascube Date: Mon, 14 Feb 2011 20:46:48 +0000 Subject: Fulltext search over contact fields. Attention: DATABASE SCHEMA CHANGED\! --- CHANGELOG | 5 ++++ SQL/mssql.initial.sql | 3 +- SQL/mssql.upgrade.sql | 6 +++- SQL/mysql.initial.sql | 1 + SQL/mysql.update.sql | 3 ++ SQL/postgres.initial.sql | 5 ++-- SQL/postgres.update.sql | 6 ++++ SQL/sqlite.initial.sql | 3 +- SQL/sqlite.update.sql | 39 +++++++++++++++++++++++++ bin/indexcontacts.sh | 54 +++++++++++++++++++++++++++++++++++ program/include/rcube_addressbook.php | 21 ++++++++++++++ program/include/rcube_contacts.php | 29 ++++++++++++++----- program/include/rcube_ldap.php | 11 +++++++ program/include/rcube_vcard.php | 12 +++++--- program/steps/addressbook/search.inc | 2 +- 15 files changed, 183 insertions(+), 17 deletions(-) create mode 100755 bin/indexcontacts.sh diff --git a/CHANGELOG b/CHANGELOG index 2ff5c59c8..abc0a0708 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,7 +1,12 @@ CHANGELOG Roundcube Webmail =========================== +- Fulltext search over (almost) all data for contacts +- Extend address book with rich contact information - Support strftime format in date_today option + +RELEASE 0.5.1 +------------- - Fix handling of attachments with invalid content type (#1487767) - Add workaround for DBMail's bug http://www.dbmail.org/mantis/view.php?id=881 (#1487766) - Use IMAP's ID extension (RFC2971) to print more info into debug log diff --git a/SQL/mssql.initial.sql b/SQL/mssql.initial.sql index 823d1b3ae..4e31c07d0 100644 --- a/SQL/mssql.initial.sql +++ b/SQL/mssql.initial.sql @@ -16,7 +16,8 @@ CREATE TABLE [dbo].[contacts] ( [email] [varchar] (255) COLLATE Latin1_General_CI_AI NOT NULL , [firstname] [varchar] (128) COLLATE Latin1_General_CI_AI NOT NULL , [surname] [varchar] (128) COLLATE Latin1_General_CI_AI NOT NULL , - [vcard] [text] COLLATE Latin1_General_CI_AI NULL + [vcard] [text] COLLATE Latin1_General_CI_AI NULL , + [words] [text] COLLATE Latin1_General_CI_AI NULL ) ON [PRIMARY] TEXTIMAGE_ON [PRIMARY] GO diff --git a/SQL/mssql.upgrade.sql b/SQL/mssql.upgrade.sql index 4072c25ff..3d067924c 100644 --- a/SQL/mssql.upgrade.sql +++ b/SQL/mssql.upgrade.sql @@ -96,4 +96,8 @@ CREATE UNIQUE INDEX [IX_users_username] ON [dbo].[users]([username],[mail_host]) GO ALTER TABLE [dbo].[contacts] ALTER COLUMN [email] [varchar] (255) COLLATE Latin1_General_CI_AI NOT NULL GO - + +-- Updates from version 0.5.x + +ALTER TABLE [dbo].[contacts] ADD [words] [text] COLLATE Latin1_General_CI_AI NULL +GO diff --git a/SQL/mysql.initial.sql b/SQL/mysql.initial.sql index 6e2c24731..accf212d4 100644 --- a/SQL/mysql.initial.sql +++ b/SQL/mysql.initial.sql @@ -87,6 +87,7 @@ CREATE TABLE `contacts` ( `firstname` varchar(128) NOT NULL DEFAULT '', `surname` varchar(128) NOT NULL DEFAULT '', `vcard` text NULL, + `words` text NULL, `user_id` int(10) UNSIGNED NOT NULL DEFAULT '0', PRIMARY KEY(`contact_id`), CONSTRAINT `user_id_fk_contacts` FOREIGN KEY (`user_id`) diff --git a/SQL/mysql.update.sql b/SQL/mysql.update.sql index aaab43fb2..f6c8d7869 100644 --- a/SQL/mysql.update.sql +++ b/SQL/mysql.update.sql @@ -133,3 +133,6 @@ ALTER TABLE `contacts` MODIFY `email` varchar(255) NOT NULL; TRUNCATE TABLE `messages`; +-- Updates from version 0.5.* + +ALTER TABLE `contacts` ADD `words` TEXT NULL AFTER `vcard`; diff --git a/SQL/postgres.initial.sql b/SQL/postgres.initial.sql index 089cae036..f52ebd572 100644 --- a/SQL/postgres.initial.sql +++ b/SQL/postgres.initial.sql @@ -103,14 +103,15 @@ CREATE SEQUENCE contact_ids CREATE TABLE contacts ( contact_id integer DEFAULT nextval('contact_ids'::text) PRIMARY KEY, user_id integer NOT NULL - REFERENCES users (user_id) ON DELETE CASCADE ON UPDATE CASCADE, + REFERENCES users (user_id) ON DELETE CASCADE ON UPDATE CASCADE, changed timestamp with time zone DEFAULT now() NOT NULL, del smallint DEFAULT 0 NOT NULL, name varchar(128) DEFAULT '' NOT NULL, email varchar(255) DEFAULT '' NOT NULL, firstname varchar(128) DEFAULT '' NOT NULL, surname varchar(128) DEFAULT '' NOT NULL, - vcard text + vcard text, + words text ); CREATE INDEX contacts_user_id_idx ON contacts (user_id, email); diff --git a/SQL/postgres.update.sql b/SQL/postgres.update.sql index 2f4498d36..6c3838868 100644 --- a/SQL/postgres.update.sql +++ b/SQL/postgres.update.sql @@ -89,3 +89,9 @@ ALTER TABLE users ADD UNIQUE (username, mail_host); ALTER TABLE contacts ALTER email TYPE varchar(255); TRUNCATE messages; + + +-- Updates from version 0.5.x + +ALTER TABLE contacts ADD words TEXT NULL; + diff --git a/SQL/sqlite.initial.sql b/SQL/sqlite.initial.sql index 875b3cba0..3aacccb95 100644 --- a/SQL/sqlite.initial.sql +++ b/SQL/sqlite.initial.sql @@ -31,7 +31,8 @@ CREATE TABLE contacts ( email varchar(255) NOT NULL default '', firstname varchar(128) NOT NULL default '', surname varchar(128) NOT NULL default '', - vcard text NOT NULL default '' + vcard text NOT NULL default '', + words text NOT NULL default '' ); CREATE INDEX ix_contacts_user_id ON contacts(user_id, email); diff --git a/SQL/sqlite.update.sql b/SQL/sqlite.update.sql index 6f2acf913..46373d8b0 100644 --- a/SQL/sqlite.update.sql +++ b/SQL/sqlite.update.sql @@ -182,3 +182,42 @@ DROP TABLE contacts_tmp; DELETE FROM messages; + +-- Updates from version 0.5.x + +CREATE TABLE contacts_tmp ( + contact_id integer NOT NULL PRIMARY KEY, + user_id integer NOT NULL default '0', + changed datetime NOT NULL default '0000-00-00 00:00:00', + del tinyint NOT NULL default '0', + name varchar(128) NOT NULL default '', + email varchar(255) NOT NULL default '', + firstname varchar(128) NOT NULL default '', + surname varchar(128) NOT NULL default '', + vcard text NOT NULL default '' +); + +INSERT INTO contacts_tmp (contact_id, user_id, changed, del, name, email, firstname, surname, vcard) + SELECT contact_id, user_id, changed, del, name, email, firstname, surname, vcard FROM contacts; + +DROP TABLE contacts; +CREATE TABLE contacts ( + contact_id integer NOT NULL PRIMARY KEY, + user_id integer NOT NULL default '0', + changed datetime NOT NULL default '0000-00-00 00:00:00', + del tinyint NOT NULL default '0', + name varchar(128) NOT NULL default '', + email varchar(255) NOT NULL default '', + firstname varchar(128) NOT NULL default '', + surname varchar(128) NOT NULL default '', + vcard text NOT NULL default '', + words text NOT NULL default '' +); + +INSERT INTO contacts (contact_id, user_id, changed, del, name, email, firstname, surname, vcard) + SELECT contact_id, user_id, changed, del, name, email, firstname, surname, vcard FROM contacts_tmp; + +CREATE INDEX ix_contacts_user_id ON contacts(user_id, email); +DROP TABLE contacts_tmp; + +DELETE FROM messages; diff --git a/bin/indexcontacts.sh b/bin/indexcontacts.sh new file mode 100755 index 000000000..ac20e0441 --- /dev/null +++ b/bin/indexcontacts.sh @@ -0,0 +1,54 @@ +#!/usr/bin/env php + | + +-----------------------------------------------------------------------+ + + $Id$ + +*/ + +define('INSTALL_PATH', realpath(dirname(__FILE__) . '/..') . '/' ); + +require_once INSTALL_PATH.'program/include/clisetup.php'; + + +// connect to DB +$RCMAIL = rcmail::get_instance(); + +$db = $RCMAIL->get_dbh(); +$db->db_connect('w'); + +if (!$db->is_connected() || $db->is_error()) + die("No DB connection\n"); + +// iterate over all users +$sql_result = $db->query("SELECT user_id FROM " . $RCMAIL->config->get('db_table_users', 'users')." WHERE 1"); +while ($sql_result && ($sql_arr = $db->fetch_assoc($sql_result))) { + echo "Indexing contacts for user " . $sql_arr['user_id'] . "..."; + + $contacts = new rcube_contacts($db, $sql_arr['user_id']); + $contacts->set_pagesize(9999); + + $result = $contacts->list_records(); + while ($result->count && ($row = $result->next())) { + unset($row['words']); + $contacts->update($row['ID'], $row); + } + + echo "done.\n"; +} + +?> diff --git a/program/include/rcube_addressbook.php b/program/include/rcube_addressbook.php index 648ef838e..d1b0f6281 100644 --- a/program/include/rcube_addressbook.php +++ b/program/include/rcube_addressbook.php @@ -32,6 +32,7 @@ abstract class rcube_addressbook const ERROR_NO_CONNECTION = 2; const ERROR_INCOMPLETE = 3; const ERROR_SAVING = 4; + const ERROR_SEARCH = 5; /** public properties (mandatory) */ public $primary_key; @@ -384,6 +385,26 @@ abstract class rcube_addressbook return $out; } + + + /** + * Normalize the given string for fulltext search. + * Currently only optimized for Latin-1 characters; to be extended + * + * @param string Input string (UTF-8) + * @return string Normalized string + */ + protected static function normalize_string($str) + { + $norm = strtolower(strtr(utf8_decode($str), + 'ÇçäâàåéêëèïîìÅÉöôòüûùÿøØáíóúñÑÁÂÀãÃÊËÈÍÎÏÓÔõÕÚÛÙýÝ', + 'ccaaaaeeeeiiiaeooouuuyooaiounnaaaaaeeeiiioooouuuyy')); + + return preg_replace( + array('/[\s;\+\-\/]+/i', '/(\d)\s+(\d)/', '/\s\w{1,3}\s/'), + array(' ', '\\1\\2', ' '), + $norm); + } } diff --git a/program/include/rcube_contacts.php b/program/include/rcube_contacts.php index 9ad4f17bb..8c9810fa9 100644 --- a/program/include/rcube_contacts.php +++ b/program/include/rcube_contacts.php @@ -5,7 +5,7 @@ | program/include/rcube_contacts.php | | | | This file is part of the Roundcube Webmail client | - | Copyright (C) 2006-2010, The Roundcube Dev Team | + | Copyright (C) 2006-2011, The Roundcube Dev Team | | Licensed under the GNU GPL | | | | PURPOSE: | @@ -41,10 +41,11 @@ class rcube_contacts extends rcube_addressbook private $user_id = 0; private $filter = null; private $result = null; - private $search_fields; - private $search_string; private $cache; - private $table_cols = array('name', 'email', 'firstname', 'surname', 'vcard'); + private $table_cols = array('name', 'email', 'firstname', 'surname'); + private $fulltext_cols = array('name', 'firstname', 'surname', 'middlename', 'nickname', + 'jobtitle', 'organization', 'department', 'maidenname', 'email', 'phone', + 'address', 'street', 'locality', 'zipcode', 'region', 'country', 'website', 'im', 'notes'); // public properties public $primary_key = 'contact_id'; @@ -115,8 +116,6 @@ class rcube_contacts extends rcube_addressbook { $this->result = null; $this->filter = null; - $this->search_fields = null; - $this->search_string = null; $this->cache = null; } @@ -253,8 +252,15 @@ class rcube_contacts extends rcube_addressbook $ids = $this->db->array2list($ids, 'integer'); $where[] = 'c.' . $this->primary_key.' IN ('.$ids.')'; } - else if ($strict) + else if ($strict) { $where[] = $this->db->quoteIdentifier($col).' = '.$this->db->quote($value); + } + else if ($col == '*') { + $words = array(); + foreach(explode(" ", self::normalize_string($value)) as $word) + $words[] = $this->db->ilike('words', '%'.$word.'%'); + $where[] = '(' . join(' AND ', $words) . ')'; + } else $where[] = $this->db->ilike($col, '%'.$value.'%'); } @@ -528,15 +534,21 @@ class rcube_contacts extends rcube_addressbook private function convert_save_data($save_data, $record = array()) { $out = array(); + $words = ''; // copy values into vcard object $vcard = new rcube_vcard($record['vcard'] ? $record['vcard'] : $save_data['vcard']); $vcard->reset(); foreach ($save_data as $key => $values) { list($field, $section) = explode(':', $key); + $fulltext = in_array($field, $this->fulltext_cols); foreach ((array)$values as $value) { if (isset($value)) $vcard->set($field, $value, $section); + if ($fulltext && is_array($value)) + $words .= ' ' . self::normalize_string(join(" ", $value)); + else if ($fulltext && strlen($value) >= 3) + $words .= ' ' . self::normalize_string($value); } } $out['vcard'] = $vcard->export(); @@ -552,6 +564,9 @@ class rcube_contacts extends rcube_addressbook // save all e-mails in database column $out['email'] = join(", ", $vcard->email); + // join words for fulltext search + $out['words'] = join(" ", array_unique(explode(" ", $words))); + return $out; } diff --git a/program/include/rcube_ldap.php b/program/include/rcube_ldap.php index 9c9973f25..3cb47480a 100644 --- a/program/include/rcube_ldap.php +++ b/program/include/rcube_ldap.php @@ -407,6 +407,17 @@ class rcube_ldap extends rcube_addressbook $filter = '(|'; $wc = !$strict && $this->prop['fuzzy_search'] ? '*' : ''; + if ($fields != '*') + { + // search_fields are required for fulltext search + if (!$this->prop['search_fields']) + { + $this->set_error(self::ERROR_SEARCH, 'nofulltextsearch'); + $this->result = new rcube_result_set(); + return $this->result; + } + } + if (is_array($this->prop['search_fields'])) { foreach ($this->prop['search_fields'] as $k => $field) diff --git a/program/include/rcube_vcard.php b/program/include/rcube_vcard.php index 82538379f..40544be90 100644 --- a/program/include/rcube_vcard.php +++ b/program/include/rcube_vcard.php @@ -135,11 +135,15 @@ class rcube_vcard $typemap = $this->typemap; // copy name fields to output array - foreach (array('firstname','surname','middlename','nickname','organization') as $col) - $out[$col] = $this->$col; + foreach (array('firstname','surname','middlename','nickname','organization') as $col) { + if (strlen($this->$col)) + $out[$col] = $this->$col; + } - $out['prefix'] = $this->raw['N'][0][3]; - $out['suffix'] = $this->raw['N'][0][4]; + if ($this->raw['N'][0][3]) + $out['prefix'] = $this->raw['N'][0][3]; + if ($this->raw['N'][0][4]) + $out['suffix'] = $this->raw['N'][0][4]; // convert from raw vcard data into associative data for Roundcube foreach (array_flip($this->fieldmap) as $tag => $col) { diff --git a/program/steps/addressbook/search.inc b/program/steps/addressbook/search.inc index 0b4397f6d..7d6775507 100644 --- a/program/steps/addressbook/search.inc +++ b/program/steps/addressbook/search.inc @@ -26,7 +26,7 @@ $search = trim(get_input_value('_q', RCUBE_INPUT_GET)); $search_request = md5('addr'.$search); // get contacts for this user -$result = $CONTACTS->search(array('name','email'), $search); +$result = $CONTACTS->search('*', $search); // save search settings in session $_SESSION['search'][$search_request] = $CONTACTS->get_search_set(); -- cgit v1.2.3