summaryrefslogtreecommitdiff
path: root/program/lib/Roundcube/rcube_spellchecker.php
diff options
context:
space:
mode:
authorHugues Hiegel <root@paranoid>2014-08-05 16:44:07 +0200
committerHugues Hiegel <root@paranoid>2014-08-05 16:44:07 +0200
commitf91f8533678c388b879d9e999a6bcc9e22ad7e19 (patch)
treec0099e25a932399d6cd1066607a42e7fe220b977 /program/lib/Roundcube/rcube_spellchecker.php
parentec116d33c22b371328c8557158736e3d2ee479a8 (diff)
lot of stuff
Diffstat (limited to 'program/lib/Roundcube/rcube_spellchecker.php')
-rw-r--r--program/lib/Roundcube/rcube_spellchecker.php343
1 files changed, 270 insertions, 73 deletions
diff --git a/program/lib/Roundcube/rcube_spellchecker.php b/program/lib/Roundcube/rcube_spellchecker.php
index 3182ff378..672515204 100644
--- a/program/lib/Roundcube/rcube_spellchecker.php
+++ b/program/lib/Roundcube/rcube_spellchecker.php
@@ -3,8 +3,8 @@
/*
+-----------------------------------------------------------------------+
| This file is part of the Roundcube Webmail client |
- | Copyright (C) 2011-2013, Kolab Systems AG |
- | Copyright (C) 2008-2013, The Roundcube Dev Team |
+ | Copyright (C) 2011, Kolab Systems AG |
+ | Copyright (C) 2008-2011, The Roundcube Dev Team |
| |
| Licensed under the GNU General Public License version 3 or |
| any later version with exceptions for skins & plugins. |
@@ -28,15 +28,21 @@ class rcube_spellchecker
{
private $matches = array();
private $engine;
- private $backend;
private $lang;
private $rc;
private $error;
+ private $separator = '/[\s\r\n\t\(\)\/\[\]{}<>\\"]+|[:;?!,\.](?=\W|$)/';
private $options = array();
private $dict;
private $have_dict;
+ // default settings
+ const GOOGLE_HOST = 'ssl://spell.roundcube.net';
+ const GOOGLE_PORT = 443;
+ const MAX_SUGGESTIONS = 10;
+
+
/**
* Constructor
*
@@ -54,63 +60,8 @@ class rcube_spellchecker
'ignore_caps' => $this->rc->config->get('spellcheck_ignore_caps'),
'dictionary' => $this->rc->config->get('spellcheck_dictionary'),
);
-
- $cls = 'rcube_spellcheck_' . $this->engine;
- if (class_exists($cls)) {
- $this->backend = new $cls($this, $this->lang);
- $this->backend->options = $this->options;
- }
- else {
- $this->error = "Unknown spellcheck engine '$this->engine'";
- }
}
- /**
- * Return a list of supported languages
- */
- function languages()
- {
- // trust configuration
- $configured = $this->rc->config->get('spellcheck_languages');
- if (!empty($configured) && is_array($configured) && !$configured[0]) {
- return $configured;
- }
- else if (!empty($configured)) {
- $langs = (array)$configured;
- }
- else if ($this->backend) {
- $langs = $this->backend->languages();
- }
-
- // load index
- @include(RCUBE_LOCALIZATION_DIR . 'index.inc');
-
- // add correct labels
- $languages = array();
- foreach ($langs as $lang) {
- $langc = strtolower(substr($lang, 0, 2));
- $alias = $rcube_language_aliases[$langc];
- if (!$alias) {
- $alias = $langc.'_'.strtoupper($langc);
- }
- if ($rcube_languages[$lang]) {
- $languages[$lang] = $rcube_languages[$lang];
- }
- else if ($rcube_languages[$alias]) {
- $languages[$lang] = $rcube_languages[$alias];
- }
- else {
- $languages[$lang] = ucfirst($lang);
- }
- }
-
- // remove possible duplicates (#1489395)
- $languages = array_unique($languages);
-
- asort($languages);
-
- return $languages;
- }
/**
* Set content and check spelling
@@ -130,8 +81,11 @@ class rcube_spellchecker
$this->content = $text;
}
- if ($this->backend) {
- $this->matches = $this->backend->check($this->content);
+ if ($this->engine == 'pspell') {
+ $this->matches = $this->_pspell_check($this->content);
+ }
+ else {
+ $this->matches = $this->_googie_check($this->content);
}
return $this->found() == 0;
@@ -158,11 +112,11 @@ class rcube_spellchecker
*/
function get_suggestions($word)
{
- if ($this->backend) {
- return $this->backend->get_suggestions($word);
+ if ($this->engine == 'pspell') {
+ return $this->_pspell_suggestions($word);
}
- return array();
+ return $this->_googie_suggestions($word);
}
@@ -176,15 +130,11 @@ class rcube_spellchecker
*/
function get_words($text = null, $is_html=false)
{
- if ($is_html) {
- $text = $this->html2text($text);
- }
-
- if ($this->backend) {
- return $this->backend->get_words($text);
+ if ($this->engine == 'pspell') {
+ return $this->_pspell_words($text, $is_html);
}
- return array();
+ return $this->_googie_words($text, $is_html);
}
@@ -198,7 +148,7 @@ class rcube_spellchecker
// send output
$out = '<?xml version="1.0" encoding="'.RCUBE_CHARSET.'"?><spellresult charschecked="'.mb_strlen($this->content).'">';
- foreach ((array)$this->matches as $item) {
+ foreach ($this->matches as $item) {
$out .= '<c o="'.$item[1].'" l="'.$item[2].'">';
$out .= is_array($item[4]) ? implode("\t", $item[4]) : $item[4];
$out .= '</c>';
@@ -219,7 +169,7 @@ class rcube_spellchecker
{
$result = array();
- foreach ((array)$this->matches as $item) {
+ foreach ($this->matches as $item) {
if ($this->engine == 'pspell') {
$word = $item[0];
}
@@ -240,7 +190,254 @@ class rcube_spellchecker
*/
function error()
{
- return $this->error ? $this->error : ($this->backend ? $this->backend->error() : false);
+ return $this->error;
+ }
+
+
+ /**
+ * Checks the text using pspell
+ *
+ * @param string $text Text content for spellchecking
+ */
+ private function _pspell_check($text)
+ {
+ // init spellchecker
+ $this->_pspell_init();
+
+ if (!$this->plink) {
+ return array();
+ }
+
+ // tokenize
+ $text = preg_split($this->separator, $text, NULL, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE);
+
+ $diff = 0;
+ $matches = array();
+
+ foreach ($text as $w) {
+ $word = trim($w[0]);
+ $pos = $w[1] - $diff;
+ $len = mb_strlen($word);
+
+ // skip exceptions
+ if ($this->is_exception($word)) {
+ }
+ else if (!pspell_check($this->plink, $word)) {
+ $suggestions = pspell_suggest($this->plink, $word);
+
+ if (sizeof($suggestions) > self::MAX_SUGGESTIONS) {
+ $suggestions = array_slice($suggestions, 0, self::MAX_SUGGESTIONS);
+ }
+
+ $matches[] = array($word, $pos, $len, null, $suggestions);
+ }
+
+ $diff += (strlen($word) - $len);
+ }
+
+ return $matches;
+ }
+
+
+ /**
+ * Returns the misspelled words
+ */
+ private function _pspell_words($text = null, $is_html=false)
+ {
+ $result = array();
+
+ if ($text) {
+ // init spellchecker
+ $this->_pspell_init();
+
+ if (!$this->plink) {
+ return array();
+ }
+
+ // With PSpell we don't need to get suggestions to return misspelled words
+ if ($is_html) {
+ $text = $this->html2text($text);
+ }
+
+ $text = preg_split($this->separator, $text, NULL, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE);
+
+ foreach ($text as $w) {
+ $word = trim($w[0]);
+
+ // skip exceptions
+ if ($this->is_exception($word)) {
+ continue;
+ }
+
+ if (!pspell_check($this->plink, $word)) {
+ $result[] = $word;
+ }
+ }
+
+ return $result;
+ }
+
+ foreach ($this->matches as $m) {
+ $result[] = $m[0];
+ }
+
+ return $result;
+ }
+
+
+ /**
+ * Returns suggestions for misspelled word
+ */
+ private function _pspell_suggestions($word)
+ {
+ // init spellchecker
+ $this->_pspell_init();
+
+ if (!$this->plink) {
+ return array();
+ }
+
+ $suggestions = pspell_suggest($this->plink, $word);
+
+ if (sizeof($suggestions) > self::MAX_SUGGESTIONS)
+ $suggestions = array_slice($suggestions, 0, self::MAX_SUGGESTIONS);
+
+ return is_array($suggestions) ? $suggestions : array();
+ }
+
+
+ /**
+ * Initializes PSpell dictionary
+ */
+ private function _pspell_init()
+ {
+ if (!$this->plink) {
+ if (!extension_loaded('pspell')) {
+ $this->error = "Pspell extension not available";
+ rcube::raise_error(array(
+ 'code' => 500, 'type' => 'php',
+ 'file' => __FILE__, 'line' => __LINE__,
+ 'message' => $this->error), true, false);
+
+ return;
+ }
+
+ $this->plink = pspell_new($this->lang, null, null, RCUBE_CHARSET, PSPELL_FAST);
+ }
+
+ if (!$this->plink) {
+ $this->error = "Unable to load Pspell engine for selected language";
+ }
+ }
+
+
+ private function _googie_check($text)
+ {
+ // spell check uri is configured
+ $url = $this->rc->config->get('spellcheck_uri');
+
+ if ($url) {
+ $a_uri = parse_url($url);
+ $ssl = ($a_uri['scheme'] == 'https' || $a_uri['scheme'] == 'ssl');
+ $port = $a_uri['port'] ? $a_uri['port'] : ($ssl ? 443 : 80);
+ $host = ($ssl ? 'ssl://' : '') . $a_uri['host'];
+ $path = $a_uri['path'] . ($a_uri['query'] ? '?'.$a_uri['query'] : '') . $this->lang;
+ }
+ else {
+ $host = self::GOOGLE_HOST;
+ $port = self::GOOGLE_PORT;
+ $path = '/tbproxy/spell?lang=' . $this->lang;
+ }
+
+ // Google has some problem with spaces, use \n instead
+ $gtext = str_replace(' ', "\n", $text);
+
+ $gtext = '<?xml version="1.0" encoding="utf-8" ?>'
+ .'<spellrequest textalreadyclipped="0" ignoredups="0" ignoredigits="1" ignoreallcaps="1">'
+ .'<text>' . htmlspecialchars($gtext) . '</text>'
+ .'</spellrequest>';
+
+ $store = '';
+ if ($fp = fsockopen($host, $port, $errno, $errstr, 30)) {
+ $out = "POST $path HTTP/1.0\r\n";
+ $out .= "Host: " . str_replace('ssl://', '', $host) . "\r\n";
+ $out .= "Content-Length: " . strlen($gtext) . "\r\n";
+ $out .= "Content-Type: application/x-www-form-urlencoded\r\n";
+ $out .= "Connection: Close\r\n\r\n";
+ $out .= $gtext;
+ fwrite($fp, $out);
+
+ while (!feof($fp))
+ $store .= fgets($fp, 128);
+ fclose($fp);
+ }
+
+ if (!$store) {
+ $this->error = "Empty result from spelling engine";
+ }
+
+ preg_match_all('/<c o="([^"]*)" l="([^"]*)" s="([^"]*)">([^<]*)<\/c>/', $store, $matches, PREG_SET_ORDER);
+
+ // skip exceptions (if appropriate options are enabled)
+ if (!empty($this->options['ignore_syms']) || !empty($this->options['ignore_nums'])
+ || !empty($this->options['ignore_caps']) || !empty($this->options['dictionary'])
+ ) {
+ foreach ($matches as $idx => $m) {
+ $word = mb_substr($text, $m[1], $m[2], RCUBE_CHARSET);
+ // skip exceptions
+ if ($this->is_exception($word)) {
+ unset($matches[$idx]);
+ }
+ }
+ }
+
+ return $matches;
+ }
+
+
+ private function _googie_words($text = null, $is_html=false)
+ {
+ if ($text) {
+ if ($is_html) {
+ $text = $this->html2text($text);
+ }
+
+ $matches = $this->_googie_check($text);
+ }
+ else {
+ $matches = $this->matches;
+ $text = $this->content;
+ }
+
+ $result = array();
+
+ foreach ($matches as $m) {
+ $result[] = mb_substr($text, $m[1], $m[2], RCUBE_CHARSET);
+ }
+
+ return $result;
+ }
+
+
+ private function _googie_suggestions($word)
+ {
+ if ($word) {
+ $matches = $this->_googie_check($word);
+ }
+ else {
+ $matches = $this->matches;
+ }
+
+ if ($matches[0][4]) {
+ $suggestions = explode("\t", $matches[0][4]);
+ if (sizeof($suggestions) > self::MAX_SUGGESTIONS) {
+ $suggestions = array_slice($suggestions, 0, MAX_SUGGESTIONS);
+ }
+
+ return $suggestions;
+ }
+
+ return array();
}