diff options
Diffstat (limited to 'program/lib/Roundcube/rcube_spellchecker.php')
| -rw-r--r-- | program/lib/Roundcube/rcube_spellchecker.php | 621 | 
1 files changed, 621 insertions, 0 deletions
| diff --git a/program/lib/Roundcube/rcube_spellchecker.php b/program/lib/Roundcube/rcube_spellchecker.php new file mode 100644 index 000000000..fce2cac75 --- /dev/null +++ b/program/lib/Roundcube/rcube_spellchecker.php @@ -0,0 +1,621 @@ +<?php + +/* + +-----------------------------------------------------------------------+ + | program/include/rcube_spellchecker.php                                | + |                                                                       | + | This file is part of the Roundcube Webmail client                     | + | Copyright (C) 2011, Kolab Systems AG                                  | + | Copyright (C) 2008-2011, The Roundcube Dev Team                       | + |                                                                       | + | Licensed under the GNU General Public License version 3 or            | + | any later version with exceptions for skins & plugins.                | + | See the README file for a full license statement.                     | + |                                                                       | + | PURPOSE:                                                              | + |   Spellchecking using different backends                              | + |                                                                       | + +-----------------------------------------------------------------------+ + | Author: Aleksander Machniak <machniak@kolabsys.com>                   | + | Author: Thomas Bruederli <roundcube@gmail.com>                        | + +-----------------------------------------------------------------------+ +*/ + + +/** + * Helper class for spellchecking with Googielspell and PSpell support. + * + * @package    Framework + * @subpackage Utils + */ +class rcube_spellchecker +{ +    private $matches = array(); +    private $engine; +    private $lang; +    private $rc; +    private $error; +    private $separator = '/[\s\r\n\t\(\)\/\[\]{}<>\\"]+|[:;?!,\.]([^\w]|$)/'; +    private $options = array(); +    private $dict; +    private $have_dict; + + +    // default settings +    const GOOGLE_HOST = 'ssl://www.google.com'; +    const GOOGLE_PORT = 443; +    const MAX_SUGGESTIONS = 10; + + +    /** +     * Constructor +     * +     * @param string $lang Language code +     */ +    function __construct($lang = 'en') +    { +        $this->rc     = rcube::get_instance(); +        $this->engine = $this->rc->config->get('spellcheck_engine', 'googie'); +        $this->lang   = $lang ? $lang : 'en'; + +        $this->options = array( +            'ignore_syms' => $this->rc->config->get('spellcheck_ignore_syms'), +            'ignore_nums' => $this->rc->config->get('spellcheck_ignore_nums'), +            'ignore_caps' => $this->rc->config->get('spellcheck_ignore_caps'), +            'dictionary'  => $this->rc->config->get('spellcheck_dictionary'), +        ); +    } + + +    /** +     * Set content and check spelling +     * +     * @param string $text    Text content for spellchecking +     * @param bool   $is_html Enables HTML-to-Text conversion +     * +     * @return bool True when no mispelling found, otherwise false +     */ +    function check($text, $is_html = false) +    { +        // convert to plain text +        if ($is_html) { +            $this->content = $this->html2text($text); +        } +        else { +            $this->content = $text; +        } + +        if ($this->engine == 'pspell') { +            $this->matches = $this->_pspell_check($this->content); +        } +        else { +            $this->matches = $this->_googie_check($this->content); +        } + +        return $this->found() == 0; +    } + + +    /** +     * Number of mispellings found (after check) +     * +     * @return int Number of mispellings +     */ +    function found() +    { +        return count($this->matches); +    } + + +    /** +     * Returns suggestions for the specified word +     * +     * @param string $word The word +     * +     * @return array Suggestions list +     */ +    function get_suggestions($word) +    { +        if ($this->engine == 'pspell') { +            return $this->_pspell_suggestions($word); +        } + +        return $this->_googie_suggestions($word); +    } + + +    /** +     * Returns misspelled words +     * +     * @param string $text The content for spellchecking. If empty content +     *                     used for check() method will be used. +     * +     * @return array List of misspelled words +     */ +    function get_words($text = null, $is_html=false) +    { +        if ($this->engine == 'pspell') { +            return $this->_pspell_words($text, $is_html); +        } + +        return $this->_googie_words($text, $is_html); +    } + + +    /** +     * Returns checking result in XML (Googiespell) format +     * +     * @return string XML content +     */ +    function get_xml() +    { +        // send output +        $out = '<?xml version="1.0" encoding="'.RCUBE_CHARSET.'"?><spellresult charschecked="'.mb_strlen($this->content).'">'; + +        foreach ($this->matches as $item) { +            $out .= '<c o="'.$item[1].'" l="'.$item[2].'">'; +            $out .= is_array($item[4]) ? implode("\t", $item[4]) : $item[4]; +            $out .= '</c>'; +        } + +        $out .= '</spellresult>'; + +        return $out; +    } + + +    /** +     * Returns checking result (misspelled words with suggestions) +     * +     * @return array Spellchecking result. An array indexed by word. +     */ +    function get() +    { +        $result = array(); + +        foreach ($this->matches as $item) { +            if ($this->engine == 'pspell') { +                $word = $item[0]; +            } +            else { +                $word = mb_substr($this->content, $item[1], $item[2], RCUBE_CHARSET); +            } +            $result[$word] = is_array($item[4]) ? implode("\t", $item[4]) : $item[4]; +        } + +        return $result; +    } + + +    /** +     * Returns error message +     * +     * @return string Error message +     */ +    function error() +    { +        return $this->error; +    } + + +    /** +     * Checks the text using pspell +     * +     * @param string $text Text content for spellchecking +     */ +    private function _pspell_check($text) +    { +        // init spellchecker +        $this->_pspell_init(); + +        if (!$this->plink) { +            return array(); +        } + +        // tokenize +        $text = preg_split($this->separator, $text, NULL, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE); + +        $diff       = 0; +        $matches    = array(); + +        foreach ($text as $w) { +            $word = trim($w[0]); +            $pos  = $w[1] - $diff; +            $len  = mb_strlen($word); + +            // skip exceptions +            if ($this->is_exception($word)) { +            } +            else if (!pspell_check($this->plink, $word)) { +                $suggestions = pspell_suggest($this->plink, $word); + +                if (sizeof($suggestions) > self::MAX_SUGGESTIONS) { +                    $suggestions = array_slice($suggestions, 0, self::MAX_SUGGESTIONS); +                } + +                $matches[] = array($word, $pos, $len, null, $suggestions); +            } + +            $diff += (strlen($word) - $len); +        } + +        return $matches; +    } + + +    /** +     * Returns the misspelled words +     */ +    private function _pspell_words($text = null, $is_html=false) +    { +        $result = array(); + +        if ($text) { +            // init spellchecker +            $this->_pspell_init(); + +            if (!$this->plink) { +                return array(); +            } + +            // With PSpell we don't need to get suggestions to return misspelled words +            if ($is_html) { +                $text = $this->html2text($text); +            } + +            $text = preg_split($this->separator, $text, NULL, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE); + +            foreach ($text as $w) { +                $word = trim($w[0]); + +                // skip exceptions +                if ($this->is_exception($word)) { +                    continue; +                } + +                if (!pspell_check($this->plink, $word)) { +                    $result[] = $word; +                } +            } + +            return $result; +        } + +        foreach ($this->matches as $m) { +            $result[] = $m[0]; +        } + +        return $result; +    } + + +    /** +     * Returns suggestions for misspelled word +     */ +    private function _pspell_suggestions($word) +    { +        // init spellchecker +        $this->_pspell_init(); + +        if (!$this->plink) { +            return array(); +        } + +        $suggestions = pspell_suggest($this->plink, $word); + +        if (sizeof($suggestions) > self::MAX_SUGGESTIONS) +            $suggestions = array_slice($suggestions, 0, self::MAX_SUGGESTIONS); + +        return is_array($suggestions) ? $suggestions : array(); +    } + + +    /** +     * Initializes PSpell dictionary +     */ +    private function _pspell_init() +    { +        if (!$this->plink) { +            if (!extension_loaded('pspell')) { +                $this->error = "Pspell extension not available"; +                rcube::raise_error(array( +                    'code' => 500, 'type' => 'php', +                    'file' => __FILE__, 'line' => __LINE__, +                    'message' => $this->error), true, false); + +                return; +            } + +            $this->plink = pspell_new($this->lang, null, null, RCUBE_CHARSET, PSPELL_FAST); +        } + +        if (!$this->plink) { +            $this->error = "Unable to load Pspell engine for selected language"; +        } +    } + + +    private function _googie_check($text) +    { +        // spell check uri is configured +        $url = $this->rc->config->get('spellcheck_uri'); + +        if ($url) { +            $a_uri = parse_url($url); +            $ssl   = ($a_uri['scheme'] == 'https' || $a_uri['scheme'] == 'ssl'); +            $port  = $a_uri['port'] ? $a_uri['port'] : ($ssl ? 443 : 80); +            $host  = ($ssl ? 'ssl://' : '') . $a_uri['host']; +            $path  = $a_uri['path'] . ($a_uri['query'] ? '?'.$a_uri['query'] : '') . $this->lang; +        } +        else { +            $host = self::GOOGLE_HOST; +            $port = self::GOOGLE_PORT; +            $path = '/tbproxy/spell?lang=' . $this->lang; +        } + +        // Google has some problem with spaces, use \n instead +        $gtext = str_replace(' ', "\n", $text); + +        $gtext = '<?xml version="1.0" encoding="utf-8" ?>' +            .'<spellrequest textalreadyclipped="0" ignoredups="0" ignoredigits="1" ignoreallcaps="1">' +            .'<text>' . $gtext . '</text>' +            .'</spellrequest>'; + +        $store = ''; +        if ($fp = fsockopen($host, $port, $errno, $errstr, 30)) { +            $out = "POST $path HTTP/1.0\r\n"; +            $out .= "Host: " . str_replace('ssl://', '', $host) . "\r\n"; +            $out .= "Content-Length: " . strlen($gtext) . "\r\n"; +            $out .= "Content-Type: application/x-www-form-urlencoded\r\n"; +            $out .= "Connection: Close\r\n\r\n"; +            $out .= $gtext; +            fwrite($fp, $out); + +            while (!feof($fp)) +                $store .= fgets($fp, 128); +            fclose($fp); +        } + +        if (!$store) { +            $this->error = "Empty result from spelling engine"; +        } + +        preg_match_all('/<c o="([^"]*)" l="([^"]*)" s="([^"]*)">([^<]*)<\/c>/', $store, $matches, PREG_SET_ORDER); + +        // skip exceptions (if appropriate options are enabled) +        if (!empty($this->options['ignore_syms']) || !empty($this->options['ignore_nums']) +            || !empty($this->options['ignore_caps']) || !empty($this->options['dictionary']) +        ) { +            foreach ($matches as $idx => $m) { +                $word = mb_substr($text, $m[1], $m[2], RCUBE_CHARSET); +                // skip  exceptions +                if ($this->is_exception($word)) { +                    unset($matches[$idx]); +                } +            } +        } + +        return $matches; +    } + + +    private function _googie_words($text = null, $is_html=false) +    { +        if ($text) { +            if ($is_html) { +                $text = $this->html2text($text); +            } + +            $matches = $this->_googie_check($text); +        } +        else { +            $matches = $this->matches; +            $text    = $this->content; +        } + +        $result = array(); + +        foreach ($matches as $m) { +            $result[] = mb_substr($text, $m[1], $m[2], RCUBE_CHARSET); +        } + +        return $result; +    } + + +    private function _googie_suggestions($word) +    { +        if ($word) { +            $matches = $this->_googie_check($word); +        } +        else { +            $matches = $this->matches; +        } + +        if ($matches[0][4]) { +            $suggestions = explode("\t", $matches[0][4]); +            if (sizeof($suggestions) > self::MAX_SUGGESTIONS) { +                $suggestions = array_slice($suggestions, 0, MAX_SUGGESTIONS); +            } + +            return $suggestions; +        } + +        return array(); +    } + + +    private function html2text($text) +    { +        $h2t = new html2text($text, false, true, 0); +        return $h2t->get_text(); +    } + + +    /** +     * Check if the specified word is an exception accoring to  +     * spellcheck options. +     * +     * @param string  $word  The word +     * +     * @return bool True if the word is an exception, False otherwise +     */ +    public function is_exception($word) +    { +        // Contain only symbols (e.g. "+9,0", "2:2") +        if (!$word || preg_match('/^[0-9@#$%^&_+~*=:;?!,.-]+$/', $word)) +            return true; + +        // Contain symbols (e.g. "g@@gle"), all symbols excluding separators +        if (!empty($this->options['ignore_syms']) && preg_match('/[@#$%^&_+~*=-]/', $word)) +            return true; + +        // Contain numbers (e.g. "g00g13") +        if (!empty($this->options['ignore_nums']) && preg_match('/[0-9]/', $word)) +            return true; + +        // Blocked caps (e.g. "GOOGLE") +        if (!empty($this->options['ignore_caps']) && $word == mb_strtoupper($word)) +            return true; + +        // Use exceptions from dictionary +        if (!empty($this->options['dictionary'])) { +            $this->load_dict(); + +            // @TODO: should dictionary be case-insensitive? +            if (!empty($this->dict) && in_array($word, $this->dict)) +                return true; +        } + +        return false; +    } + + +    /** +     * Add a word to dictionary +     * +     * @param string  $word  The word to add +     */ +    public function add_word($word) +    { +        $this->load_dict(); + +        foreach (explode(' ', $word) as $word) { +            // sanity check +            if (strlen($word) < 512) { +                $this->dict[] = $word; +                $valid = true; +            } +        } + +        if ($valid) { +            $this->dict = array_unique($this->dict); +            $this->update_dict(); +        } +    } + + +    /** +     * Remove a word from dictionary +     * +     * @param string  $word  The word to remove +     */ +    public function remove_word($word) +    { +        $this->load_dict(); + +        if (($key = array_search($word, $this->dict)) !== false) { +            unset($this->dict[$key]); +            $this->update_dict(); +        } +    } + + +    /** +     * Update dictionary row in DB +     */ +    private function update_dict() +    { +        if (strcasecmp($this->options['dictionary'], 'shared') != 0) { +            $userid = $this->rc->get_user_id(); +        } + +        $plugin = $this->rc->plugins->exec_hook('spell_dictionary_save', array( +            'userid' => $userid, 'language' => $this->lang, 'dictionary' => $this->dict)); + +        if (!empty($plugin['abort'])) { +            return; +        } + +        if ($this->have_dict) { +            if (!empty($this->dict)) { +                $this->rc->db->query( +                    "UPDATE ".$this->rc->db->table_name('dictionary') +                    ." SET data = ?" +                    ." WHERE user_id " . ($plugin['userid'] ? "= ".$this->rc->db->quote($plugin['userid']) : "IS NULL") +                        ." AND " . $this->rc->db->quoteIdentifier('language') . " = ?", +                    implode(' ', $plugin['dictionary']), $plugin['language']); +            } +            // don't store empty dict +            else { +                $this->rc->db->query( +                    "DELETE FROM " . $this->rc->db->table_name('dictionary') +                    ." WHERE user_id " . ($plugin['userid'] ? "= ".$this->rc->db->quote($plugin['userid']) : "IS NULL") +                        ." AND " . $this->rc->db->quoteIdentifier('language') . " = ?", +                    $plugin['language']); +            } +        } +        else if (!empty($this->dict)) { +            $this->rc->db->query( +                "INSERT INTO " .$this->rc->db->table_name('dictionary') +                ." (user_id, " . $this->rc->db->quoteIdentifier('language') . ", data) VALUES (?, ?, ?)", +                $plugin['userid'], $plugin['language'], implode(' ', $plugin['dictionary'])); +        } +    } + + +    /** +     * Get dictionary from DB +     */ +    private function load_dict() +    { +        if (is_array($this->dict)) { +            return $this->dict; +        } + +        if (strcasecmp($this->options['dictionary'], 'shared') != 0) { +            $userid = $this->rc->get_user_id(); +        } + +        $plugin = $this->rc->plugins->exec_hook('spell_dictionary_get', array( +            'userid' => $userid, 'language' => $this->lang, 'dictionary' => array())); + +        if (empty($plugin['abort'])) { +            $dict = array(); +            $this->rc->db->query( +                "SELECT data FROM ".$this->rc->db->table_name('dictionary') +                ." WHERE user_id ". ($plugin['userid'] ? "= ".$this->rc->db->quote($plugin['userid']) : "IS NULL") +                    ." AND " . $this->rc->db->quoteIdentifier('language') . " = ?", +                $plugin['language']); + +            if ($sql_arr = $this->rc->db->fetch_assoc($sql_result)) { +                $this->have_dict = true; +                if (!empty($sql_arr['data'])) { +                    $dict = explode(' ', $sql_arr['data']); +                } +            } + +            $plugin['dictionary'] = array_merge((array)$plugin['dictionary'], $dict); +        } + +        if (!empty($plugin['dictionary']) && is_array($plugin['dictionary'])) { +            $this->dict = $plugin['dictionary']; +        } +        else { +            $this->dict = array(); +        } + +        return $this->dict; +    } + +} | 
