diff options
author | Hugues Hiegel <root@paranoid> | 2014-08-05 16:44:07 +0200 |
---|---|---|
committer | Hugues Hiegel <root@paranoid> | 2014-08-05 16:44:07 +0200 |
commit | f91f8533678c388b879d9e999a6bcc9e22ad7e19 (patch) | |
tree | c0099e25a932399d6cd1066607a42e7fe220b977 /program/lib/Roundcube/rcube_spellchecker.php | |
parent | ec116d33c22b371328c8557158736e3d2ee479a8 (diff) |
lot of stuff
Diffstat (limited to 'program/lib/Roundcube/rcube_spellchecker.php')
-rw-r--r-- | program/lib/Roundcube/rcube_spellchecker.php | 343 |
1 files changed, 270 insertions, 73 deletions
diff --git a/program/lib/Roundcube/rcube_spellchecker.php b/program/lib/Roundcube/rcube_spellchecker.php index 3182ff378..672515204 100644 --- a/program/lib/Roundcube/rcube_spellchecker.php +++ b/program/lib/Roundcube/rcube_spellchecker.php @@ -3,8 +3,8 @@ /* +-----------------------------------------------------------------------+ | This file is part of the Roundcube Webmail client | - | Copyright (C) 2011-2013, Kolab Systems AG | - | Copyright (C) 2008-2013, The Roundcube Dev Team | + | Copyright (C) 2011, Kolab Systems AG | + | Copyright (C) 2008-2011, The Roundcube Dev Team | | | | Licensed under the GNU General Public License version 3 or | | any later version with exceptions for skins & plugins. | @@ -28,15 +28,21 @@ class rcube_spellchecker { private $matches = array(); private $engine; - private $backend; private $lang; private $rc; private $error; + private $separator = '/[\s\r\n\t\(\)\/\[\]{}<>\\"]+|[:;?!,\.](?=\W|$)/'; private $options = array(); private $dict; private $have_dict; + // default settings + const GOOGLE_HOST = 'ssl://spell.roundcube.net'; + const GOOGLE_PORT = 443; + const MAX_SUGGESTIONS = 10; + + /** * Constructor * @@ -54,63 +60,8 @@ class rcube_spellchecker 'ignore_caps' => $this->rc->config->get('spellcheck_ignore_caps'), 'dictionary' => $this->rc->config->get('spellcheck_dictionary'), ); - - $cls = 'rcube_spellcheck_' . $this->engine; - if (class_exists($cls)) { - $this->backend = new $cls($this, $this->lang); - $this->backend->options = $this->options; - } - else { - $this->error = "Unknown spellcheck engine '$this->engine'"; - } } - /** - * Return a list of supported languages - */ - function languages() - { - // trust configuration - $configured = $this->rc->config->get('spellcheck_languages'); - if (!empty($configured) && is_array($configured) && !$configured[0]) { - return $configured; - } - else if (!empty($configured)) { - $langs = (array)$configured; - } - else if ($this->backend) { - $langs = $this->backend->languages(); - } - - // load index - @include(RCUBE_LOCALIZATION_DIR . 'index.inc'); - - // add correct labels - $languages = array(); - foreach ($langs as $lang) { - $langc = strtolower(substr($lang, 0, 2)); - $alias = $rcube_language_aliases[$langc]; - if (!$alias) { - $alias = $langc.'_'.strtoupper($langc); - } - if ($rcube_languages[$lang]) { - $languages[$lang] = $rcube_languages[$lang]; - } - else if ($rcube_languages[$alias]) { - $languages[$lang] = $rcube_languages[$alias]; - } - else { - $languages[$lang] = ucfirst($lang); - } - } - - // remove possible duplicates (#1489395) - $languages = array_unique($languages); - - asort($languages); - - return $languages; - } /** * Set content and check spelling @@ -130,8 +81,11 @@ class rcube_spellchecker $this->content = $text; } - if ($this->backend) { - $this->matches = $this->backend->check($this->content); + if ($this->engine == 'pspell') { + $this->matches = $this->_pspell_check($this->content); + } + else { + $this->matches = $this->_googie_check($this->content); } return $this->found() == 0; @@ -158,11 +112,11 @@ class rcube_spellchecker */ function get_suggestions($word) { - if ($this->backend) { - return $this->backend->get_suggestions($word); + if ($this->engine == 'pspell') { + return $this->_pspell_suggestions($word); } - return array(); + return $this->_googie_suggestions($word); } @@ -176,15 +130,11 @@ class rcube_spellchecker */ function get_words($text = null, $is_html=false) { - if ($is_html) { - $text = $this->html2text($text); - } - - if ($this->backend) { - return $this->backend->get_words($text); + if ($this->engine == 'pspell') { + return $this->_pspell_words($text, $is_html); } - return array(); + return $this->_googie_words($text, $is_html); } @@ -198,7 +148,7 @@ class rcube_spellchecker // send output $out = '<?xml version="1.0" encoding="'.RCUBE_CHARSET.'"?><spellresult charschecked="'.mb_strlen($this->content).'">'; - foreach ((array)$this->matches as $item) { + foreach ($this->matches as $item) { $out .= '<c o="'.$item[1].'" l="'.$item[2].'">'; $out .= is_array($item[4]) ? implode("\t", $item[4]) : $item[4]; $out .= '</c>'; @@ -219,7 +169,7 @@ class rcube_spellchecker { $result = array(); - foreach ((array)$this->matches as $item) { + foreach ($this->matches as $item) { if ($this->engine == 'pspell') { $word = $item[0]; } @@ -240,7 +190,254 @@ class rcube_spellchecker */ function error() { - return $this->error ? $this->error : ($this->backend ? $this->backend->error() : false); + return $this->error; + } + + + /** + * Checks the text using pspell + * + * @param string $text Text content for spellchecking + */ + private function _pspell_check($text) + { + // init spellchecker + $this->_pspell_init(); + + if (!$this->plink) { + return array(); + } + + // tokenize + $text = preg_split($this->separator, $text, NULL, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE); + + $diff = 0; + $matches = array(); + + foreach ($text as $w) { + $word = trim($w[0]); + $pos = $w[1] - $diff; + $len = mb_strlen($word); + + // skip exceptions + if ($this->is_exception($word)) { + } + else if (!pspell_check($this->plink, $word)) { + $suggestions = pspell_suggest($this->plink, $word); + + if (sizeof($suggestions) > self::MAX_SUGGESTIONS) { + $suggestions = array_slice($suggestions, 0, self::MAX_SUGGESTIONS); + } + + $matches[] = array($word, $pos, $len, null, $suggestions); + } + + $diff += (strlen($word) - $len); + } + + return $matches; + } + + + /** + * Returns the misspelled words + */ + private function _pspell_words($text = null, $is_html=false) + { + $result = array(); + + if ($text) { + // init spellchecker + $this->_pspell_init(); + + if (!$this->plink) { + return array(); + } + + // With PSpell we don't need to get suggestions to return misspelled words + if ($is_html) { + $text = $this->html2text($text); + } + + $text = preg_split($this->separator, $text, NULL, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE); + + foreach ($text as $w) { + $word = trim($w[0]); + + // skip exceptions + if ($this->is_exception($word)) { + continue; + } + + if (!pspell_check($this->plink, $word)) { + $result[] = $word; + } + } + + return $result; + } + + foreach ($this->matches as $m) { + $result[] = $m[0]; + } + + return $result; + } + + + /** + * Returns suggestions for misspelled word + */ + private function _pspell_suggestions($word) + { + // init spellchecker + $this->_pspell_init(); + + if (!$this->plink) { + return array(); + } + + $suggestions = pspell_suggest($this->plink, $word); + + if (sizeof($suggestions) > self::MAX_SUGGESTIONS) + $suggestions = array_slice($suggestions, 0, self::MAX_SUGGESTIONS); + + return is_array($suggestions) ? $suggestions : array(); + } + + + /** + * Initializes PSpell dictionary + */ + private function _pspell_init() + { + if (!$this->plink) { + if (!extension_loaded('pspell')) { + $this->error = "Pspell extension not available"; + rcube::raise_error(array( + 'code' => 500, 'type' => 'php', + 'file' => __FILE__, 'line' => __LINE__, + 'message' => $this->error), true, false); + + return; + } + + $this->plink = pspell_new($this->lang, null, null, RCUBE_CHARSET, PSPELL_FAST); + } + + if (!$this->plink) { + $this->error = "Unable to load Pspell engine for selected language"; + } + } + + + private function _googie_check($text) + { + // spell check uri is configured + $url = $this->rc->config->get('spellcheck_uri'); + + if ($url) { + $a_uri = parse_url($url); + $ssl = ($a_uri['scheme'] == 'https' || $a_uri['scheme'] == 'ssl'); + $port = $a_uri['port'] ? $a_uri['port'] : ($ssl ? 443 : 80); + $host = ($ssl ? 'ssl://' : '') . $a_uri['host']; + $path = $a_uri['path'] . ($a_uri['query'] ? '?'.$a_uri['query'] : '') . $this->lang; + } + else { + $host = self::GOOGLE_HOST; + $port = self::GOOGLE_PORT; + $path = '/tbproxy/spell?lang=' . $this->lang; + } + + // Google has some problem with spaces, use \n instead + $gtext = str_replace(' ', "\n", $text); + + $gtext = '<?xml version="1.0" encoding="utf-8" ?>' + .'<spellrequest textalreadyclipped="0" ignoredups="0" ignoredigits="1" ignoreallcaps="1">' + .'<text>' . htmlspecialchars($gtext) . '</text>' + .'</spellrequest>'; + + $store = ''; + if ($fp = fsockopen($host, $port, $errno, $errstr, 30)) { + $out = "POST $path HTTP/1.0\r\n"; + $out .= "Host: " . str_replace('ssl://', '', $host) . "\r\n"; + $out .= "Content-Length: " . strlen($gtext) . "\r\n"; + $out .= "Content-Type: application/x-www-form-urlencoded\r\n"; + $out .= "Connection: Close\r\n\r\n"; + $out .= $gtext; + fwrite($fp, $out); + + while (!feof($fp)) + $store .= fgets($fp, 128); + fclose($fp); + } + + if (!$store) { + $this->error = "Empty result from spelling engine"; + } + + preg_match_all('/<c o="([^"]*)" l="([^"]*)" s="([^"]*)">([^<]*)<\/c>/', $store, $matches, PREG_SET_ORDER); + + // skip exceptions (if appropriate options are enabled) + if (!empty($this->options['ignore_syms']) || !empty($this->options['ignore_nums']) + || !empty($this->options['ignore_caps']) || !empty($this->options['dictionary']) + ) { + foreach ($matches as $idx => $m) { + $word = mb_substr($text, $m[1], $m[2], RCUBE_CHARSET); + // skip exceptions + if ($this->is_exception($word)) { + unset($matches[$idx]); + } + } + } + + return $matches; + } + + + private function _googie_words($text = null, $is_html=false) + { + if ($text) { + if ($is_html) { + $text = $this->html2text($text); + } + + $matches = $this->_googie_check($text); + } + else { + $matches = $this->matches; + $text = $this->content; + } + + $result = array(); + + foreach ($matches as $m) { + $result[] = mb_substr($text, $m[1], $m[2], RCUBE_CHARSET); + } + + return $result; + } + + + private function _googie_suggestions($word) + { + if ($word) { + $matches = $this->_googie_check($word); + } + else { + $matches = $this->matches; + } + + if ($matches[0][4]) { + $suggestions = explode("\t", $matches[0][4]); + if (sizeof($suggestions) > self::MAX_SUGGESTIONS) { + $suggestions = array_slice($suggestions, 0, MAX_SUGGESTIONS); + } + + return $suggestions; + } + + return array(); } |