From b4edf78e4b75bc40a829147941ba0cf6379fbc39 Mon Sep 17 00:00:00 2001 From: alecpl Date: Mon, 30 May 2011 15:08:26 +0000 Subject: - Provided rcube_spellchecker class, simplified code in utils task (less spell* files) --- program/include/rcube_spellchecker.php | 393 ++++++++++++++++++++++++++++++ program/js/editor.js | 2 +- program/steps/utils/spell.inc | 26 +- program/steps/utils/spell_googie.inc | 75 ------ program/steps/utils/spell_html.inc | 53 ++++ program/steps/utils/spell_html_googie.inc | 110 --------- program/steps/utils/spell_html_pspell.inc | 76 ------ program/steps/utils/spell_pspell.inc | 75 ------ 8 files changed, 465 insertions(+), 345 deletions(-) create mode 100644 program/include/rcube_spellchecker.php delete mode 100644 program/steps/utils/spell_googie.inc create mode 100644 program/steps/utils/spell_html.inc delete mode 100644 program/steps/utils/spell_html_googie.inc delete mode 100644 program/steps/utils/spell_html_pspell.inc delete mode 100644 program/steps/utils/spell_pspell.inc diff --git a/program/include/rcube_spellchecker.php b/program/include/rcube_spellchecker.php new file mode 100644 index 000000000..7acb70095 --- /dev/null +++ b/program/include/rcube_spellchecker.php @@ -0,0 +1,393 @@ + | + | Author: Thomas Bruederli | + +-----------------------------------------------------------------------+ + + $Id$ + +*/ + + +/** + * Helper class for spellchecking with Googielspell and PSpell support. + * + * @package Core + */ +class rcube_spellchecker +{ + private $matches = array(); + private $engine; + private $lang; + private $rc; + private $error; + private $separator = '/[ !"#$%&()*+\\,\/\n:;<=>?@\[\]^_{|}-]+|\.[^\w]/'; + + + // default settings + const GOOGLE_HOST = 'ssl://www.google.com'; + const GOOGLE_PORT = 443; + const MAX_SUGGESTIONS = 10; + + + /** + * Constructor + * + * @param string $lang Language code + */ + function __construct($lang = 'en') + { + $this->rc = rcmail::get_instance(); + $this->engine = $this->rc->config->get('spellcheck_engine', 'googie'); + $this->lang = $lang; + + if ($this->engine == 'pspell' && !extension_loaded('pspell')) { + raise_error(array( + 'code' => 500, 'type' => 'php', + 'file' => __FILE__, 'line' => __LINE__, + 'message' => "Pspell extension not available"), true, true); + } + } + + + /** + * Set content and check spelling + * + * @param string $text Text content for spellchecking + * @param bool $is_html Enables HTML-to-Text conversion + * + * @return bool True when no mispelling found, otherwise false + */ + function check($text, $is_html=false) + { + // convert to plain text + if ($is_html) { + $this->content = $this->html2text($text); + } + else { + $this->content = $text; + } + + if ($this->engine == 'pspell') { + $this->matches = $this->_pspell_check($this->content); + } + else { + $this->matches = $this->_googie_check($this->content); + } + + return $this->found() == 0; + } + + + /** + * Number of mispellings found (after check) + * + * @return int Number of mispellings + */ + function found() + { + return count($this->matches); + } + + + /** + * Returns suggestions for the specified word + * + * @param string $word The word + * + * @return array Suggestions list + */ + function get_suggestions($word) + { + if ($this->engine == 'pspell') { + return $this->_pspell_suggestions($word); + } + + return $this->_googie_suggestions($word); + } + + + /** + * Returns mispelled words + * + * @param string $text The content for spellchecking. If empty content + * used for check() method will be used. + * + * @return array List of mispelled words + */ + function get_words($text = null, $is_html=false) + { + if ($this->engine == 'pspell') { + return $this->_pspell_words($text, $is_html); + } + + return $this->_googie_words($text, $is_html); + } + + + /** + * Returns checking result in XML (Googiespell) format + * + * @return string XML content + */ + function get_xml() + { + // send output + $out = ''; + + foreach ($this->matches as $item) { + $out .= ''; + $out .= is_array($item[4]) ? implode("\t", $item[4]) : $item[4]; + $out .= ''; + } + + $out .= ''; + + return $out; + } + + + /** + * Returns error message + * + * @return string Error message + */ + function error() + { + return $this->error; + } + + + /** + * Checks the text using pspell + * + * @param string $text Text content for spellchecking + */ + private function _pspell_check($text) + { + // init spellchecker + $this->_pspell_init(); + + if (!$this->plink) { + return array(); + } + + // tokenize + $text = preg_split($this->separator, $text, NULL, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE); + + $diff = 0; + $matches = array(); + + foreach ($text as $w) { + $word = trim($w[0]); + $pos = $w[1] - $diff; + $len = mb_strlen($word); + + if ($word && preg_match('/[^0-9\.]/', $word) && !pspell_check($this->plink, $word)) { + $suggestions = pspell_suggest($this->plink, $word); + + if (sizeof($suggestions) > self::MAX_SUGGESTIONS) + $suggestions = array_slice($suggestions, 0, self::MAX_SUGGESTIONS); + + $matches[] = array($word, $pos, $len, null, $suggestions); + } + + $diff += (strlen($word) - $len); + } + + return $matches; + } + + + /** + * Returns the mispelled words + */ + private function _pspell_words($text = null, $is_html=false) + { + if ($text) { + // init spellchecker + $this->_pspell_init(); + + if (!$this->plink) { + return array(); + } + + // With PSpell we don't need to get suggestions to return mispelled words + if ($is_html) { + $text = $this->html2text($text); + } + + $text = preg_split($this->separator, $text, NULL, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE); + + foreach ($text as $w) { + $word = trim($w[0]); + if ($word && preg_match('/[^0-9\.]/', $word) && !pspell_check($this->plink, $word)) { + $result[] = $word; + } + } + + return $result; + } + + $result = array(); + + foreach ($this->matches as $m) { + $result[] = $m[0]; + } + + return $result; + } + + + /** + * Returns suggestions for mispelled word + */ + private function _pspell_suggestions($word) + { + // init spellchecker + $this->_pspell_init(); + + if (!$this->plink) { + return array(); + } + + $suggestions = pspell_suggest($this->plink, $word); + + if (sizeof($suggestions) > self::MAX_SUGGESTIONS) + $suggestions = array_slice($suggestions, 0, self::MAX_SUGGESTIONS); + + return is_array($suggestions) ? $suggestions : array(); + } + + + /** + * Initializes PSpell dictionary + */ + private function _pspell_init() + { + if (!$this->plink) { + $this->plink = pspell_new($this->lang, null, null, RCMAIL_CHARSET, PSPELL_FAST); + } + + if (!$this->plink) { + $this->error = "Unable to load Pspell engine for selected language"; + } + } + + + private function _googie_check($text) + { + // spell check uri is configured + $url = $this->rc->config->get('spellcheck_uri'); + + if ($url) { + $a_uri = parse_url($url); + $ssl = ($a_uri['scheme'] == 'https' || $a_uri['scheme'] == 'ssl'); + $port = $a_uri['port'] ? $a_uri['port'] : ($ssl ? 443 : 80); + $host = ($ssl ? 'ssl://' : '') . $a_uri['host']; + $path = $a_uri['path'] . ($a_uri['query'] ? '?'.$a_uri['query'] : '') . $this->lang; + } + else { + $host = self::GOOGLE_HOST; + $port = self::GOOGLE_PORT; + $path = '/tbproxy/spell?lang=' . $this->lang; + } + + // Google has some problem with spaces, use \n instead + $text = str_replace(' ', "\n", $text); + + $text = '' + .'' + .'' . $text . '' + .''; + + $store = ''; + if ($fp = fsockopen($host, $port, $errno, $errstr, 30)) { + $out = "POST $path HTTP/1.0\r\n"; + $out .= "Host: " . str_replace('ssl://', '', $host) . "\r\n"; + $out .= "Content-Length: " . strlen($text) . "\r\n"; + $out .= "Content-Type: application/x-www-form-urlencoded\r\n"; + $out .= "Connection: Close\r\n\r\n"; + $out .= $text; + fwrite($fp, $out); + + while (!feof($fp)) + $store .= fgets($fp, 128); + fclose($fp); + } + + if (!$store) { + $this->error = "Empty result from spelling engine"; + } + + preg_match_all('/([^<]*)<\/c>/', $store, $matches, PREG_SET_ORDER); + + return $matches; + } + + + private function _googie_words($text = null, $is_html=false) + { + if ($text) { + if ($is_html) { + $text = $this->html2text($text); + } + + $matches = $this->_googie_check($text); + } + else { + $matches = $this->matches; + $text = $this->content; + } + + $result = array(); + + foreach ($matches as $m) { + $result[] = mb_substr($text, $m[1], $m[2], RCMAIL_CHARSET); + } + + return $result; + } + + + private function _googie_suggestions($word) + { + if ($word) { + $matches = $this->_googie_check($word); + } + else { + $matches = $this->matches; + } + + if ($matches[0][4]) { + $suggestions = explode("\t", $matches[0][4]); + if (sizeof($suggestions) > self::MAX_SUGGESTIONS) { + $suggestions = array_slice($suggestions, 0, MAX_SUGGESTIONS); + } + + return $suggestions; + } + + return array(); + } + + + private function html2text($text) + { + $h2t = new html2text($text, false, true, 0); + return $h2t->get_text(); + } +} diff --git a/program/js/editor.js b/program/js/editor.js index 27ea06e37..a3aef72a2 100644 --- a/program/js/editor.js +++ b/program/js/editor.js @@ -47,7 +47,7 @@ function rcmail_editor_init(skin_path, editor_lang, spellcheck, mode) theme_advanced_buttons1: 'bold,italic,underline,|,justifyleft,justifycenter,justifyright,justifyfull,|,bullist,numlist,outdent,indent,ltr,rtl,blockquote,|,forecolor,backcolor,fontselect,fontsizeselect', theme_advanced_buttons2: 'link,unlink,table,|,emotions,charmap,image,media,|,code,search' + (spellcheck ? ',spellchecker' : '') + ',undo,redo', spellchecker_languages: (rcmail.env.spellcheck_langs ? rcmail.env.spellcheck_langs : 'Dansk=da,Deutsch=de,+English=en,Espanol=es,Francais=fr,Italiano=it,Nederlands=nl,Polski=pl,Portugues=pt,Suomi=fi,Svenska=sv'), - spellchecker_rpc_url: '?_task=utils&_action=spell&tiny=1', + spellchecker_rpc_url: '?_task=utils&_action=spell_html', accessibility_focus: false, oninit: 'rcmail_editor_callback' }); diff --git a/program/steps/utils/spell.inc b/program/steps/utils/spell.inc index f61939d68..358576c7c 100644 --- a/program/steps/utils/spell.inc +++ b/program/steps/utils/spell.inc @@ -5,6 +5,7 @@ | program/steps/utils/spell.inc | | | | This file is part of the Roundcube Webmail client | + | Copyright (C) 2005-2011, The Roundcube Dev Team | | Licensed under the GNU GPL | | | | PURPOSE: | @@ -18,15 +19,24 @@ */ -// max. number of suggestions for one word -define('MAX_SUGGESTIONS', 10); +// read input +$lang = get_input_value('lang', RCUBE_INPUT_GET); +$data = file_get_contents('php://input'); -$tiny = !empty($_GET['tiny']) ? 'html_' : ''; +// Get data string +$left = strpos($data, ''); +$right = strrpos($data, ''); +$data = substr($data, $left+6, $right-($left+6)); +$data = html_entity_decode($data, ENT_QUOTES, RCMAIL_CHARSET); -if ($spell_engine = $RCMAIL->config->get('spellcheck_engine', 'googie')) { - include('spell_'.$tiny.$spell_engine.'.inc'); -} +$spellchecker = new rcube_spellchecker($lang); +$spellchecker->check($data); +$result = $spellchecker->get_xml(); -header('HTTP/1.1 404 Not Found'); -exit; +// set response length +header("Content-Length: " . strlen($result)); +// Don't use server's default Content-Type charset (#1486406) +header("Content-Type: text/xml; charset=" . RCMAIL_CHARSET); +print $result; +exit; diff --git a/program/steps/utils/spell_googie.inc b/program/steps/utils/spell_googie.inc deleted file mode 100644 index bb6b9e106..000000000 --- a/program/steps/utils/spell_googie.inc +++ /dev/null @@ -1,75 +0,0 @@ - | - +-----------------------------------------------------------------------+ - - $Id$ - -*/ - -$REMOTE_REQUEST = TRUE; - -// default settings -$host = "ssl://www.google.com"; -$port = 443; -$lang = get_input_value('lang', RCUBE_INPUT_GET); -$path = "/tbproxy/spell?lang=$lang"; - -// spell check uri is configured -if (!empty($CONFIG['spellcheck_uri'])) - { - $a_uri = parse_url($CONFIG['spellcheck_uri']); - $ssl = ($a_uri['scheme']=='https' || $a_uri['scheme']=='ssl'); - $port = $a_uri['port'] ? $a_uri['port'] : ($ssl ? 443 : 80); - $host = ($ssl ? 'ssl://' : '') . $a_uri['host']; - $path = $a_uri['path'] . ($a_uri['query'] ? '?'.$a_uri['query'] : '') . $lang; - } - -$data = file_get_contents('php://input'); -// Google has some problem with spaces, use \n instead -$data = str_replace(' ', "\n", $data); -$store = ""; - -if ($fp = fsockopen($host, $port, $errno, $errstr, 30)) - { - $out = "POST $path HTTP/1.0\r\n"; - $out .= "Host: " . str_replace('ssl://', '', $host) . "\r\n"; - $out .= "Content-Length: " . strlen($data) . "\r\n"; - $out .= "Content-Type: application/x-www-form-urlencoded\r\n"; - $out .= "Connection: Close\r\n\r\n"; - $out .= $data; - fwrite($fp, $out); - - while (!feof($fp)) - $store .= fgets($fp, 128); - fclose($fp); - } - -// remove headers -$pos = strpos($store, ' | + +-----------------------------------------------------------------------+ + + $Id$ + +*/ + +// read input data +$data = file_get_contents('php://input'); + +// Decode JSON input +$request = json_decode($data, true); +$result = array(); + +$lang = $request['params'][0]; +$data = $request['params'][1]; +$data = implode("\n", (array) $data); + +$result['id'] = $request['id']; + +$spellchecker = new rcube_spellchecker($lang); + +if ($request['method'] == 'checkWords') { + $result['result'] = $spellchecker->get_words($data); +} +else if ($request['method'] == 'getSuggestions') { + $result['result'] = $spellchecker->get_suggestions($data); +} + +if ($error = $spellchecker->error()) { + echo '{"error":{"errstr":"' . addslashes($error) . '","errfile":"","errline":null,"errcontext":"","level":"FATAL"}}'; + exit; +} + +// send output +header("Content-Type: text/xml; charset=".RCMAIL_CHARSET); +echo json_encode($result); +exit; + diff --git a/program/steps/utils/spell_html_googie.inc b/program/steps/utils/spell_html_googie.inc deleted file mode 100644 index ceda62687..000000000 --- a/program/steps/utils/spell_html_googie.inc +++ /dev/null @@ -1,110 +0,0 @@ - | - +-----------------------------------------------------------------------+ - - $Id: spell_googie.inc 3780 2010-06-23 09:55:08Z alec $ - -*/ - -function json_error($str) -{ - echo '{"error":{"errstr":"' . addslashes($str) . '","errfile":"","errline":null,"errcontext":"","level":"FATAL"}}'; - exit; -} - -function googie_get($host, $port, $path, $data) -{ - $store = ''; - if ($fp = fsockopen($host, $port, $errno, $errstr, 30)) { - $out = "POST $path HTTP/1.0\r\n"; - $out .= "Host: " . str_replace('ssl://', '', $host) . "\r\n"; - $out .= "Content-Length: " . strlen($data) . "\r\n"; - $out .= "Content-Type: application/x-www-form-urlencoded\r\n"; - $out .= "Connection: Close\r\n\r\n"; - $out .= $data; - fwrite($fp, $out); - - while (!feof($fp)) - $store .= fgets($fp, 128); - fclose($fp); - } - - if (!$store) { - json_error("Empty result from spelling engine"); - } - - $matches = array(); - preg_match_all('/([^<]*)<\/c>/', $store, $matches, PREG_SET_ORDER); - - return $matches; -} - -$REMOTE_REQUEST = TRUE; - -// read input -$data = file_get_contents('php://input'); - -// Decode JSON input -$request = json_decode($data, true); -$result = array(); - -$lang = $request['params'][0]; -$data = $request['params'][1]; -$result['id'] = $request['id']; - -// default settings -$host = "ssl://www.google.com"; -$port = 443; -$path = "/tbproxy/spell?lang=$lang"; - -// spell check uri is configured -if (!empty($CONFIG['spellcheck_uri'])) - { - $a_uri = parse_url($CONFIG['spellcheck_uri']); - $ssl = ($a_uri['scheme']=='https' || $a_uri['scheme']=='ssl'); - $port = $a_uri['port'] ? $a_uri['port'] : ($ssl ? 443 : 80); - $host = ($ssl ? 'ssl://' : '') . $a_uri['host']; - $path = $a_uri['path'] . ($a_uri['query'] ? '?'.$a_uri['query'] : '') . $lang; - } - -$wordstr = implode("\n", (array) $data); -$data = '' - .'' - .'' . $wordstr . '' - .''; -$matches = googie_get($host, $port, $path, $data); - -if ($request['method'] == 'checkWords') { - $result['result'] = array(); - for ($i=0, $len=count($matches); $i<$len; $i++) - $result['result'][] = mb_substr($wordstr, $matches[$i][1], $matches[$i][2], RCMAIL_CHARSET); -} -else if ($request['method'] == 'getSuggestions') { - if ($matches[0][4]) { - $suggestions = explode("\t", $matches[0][4]); - if (sizeof($suggestions)>MAX_SUGGESTIONS) - $suggestions = array_slice($suggestions, 0, MAX_SUGGESTIONS); - $result['result'] = $suggestions; - } - else - $result['result'] = array(); -} - -// send output -header("Content-Type: text/xml; charset=".RCMAIL_CHARSET); -echo json_encode($result); -exit; - diff --git a/program/steps/utils/spell_html_pspell.inc b/program/steps/utils/spell_html_pspell.inc deleted file mode 100644 index f8c3d38cd..000000000 --- a/program/steps/utils/spell_html_pspell.inc +++ /dev/null @@ -1,76 +0,0 @@ - | - +-----------------------------------------------------------------------+ - - $Id: spell_pspell.inc 3780 2010-06-23 09:55:08Z alec $ - -*/ - -function json_error($str) -{ - echo '{"error":{"errstr":"' . addslashes($str) . '","errfile":"","errline":null,"errcontext":"","level":"FATAL"}}'; - exit; -} - -if (!extension_loaded('pspell')) { - raise_error(array( - 'code' => 500, - 'type' => 'php', - 'file' => __FILE__, 'line' => __LINE__, - 'message' => "Pspell extension not available"), true, false); - - json_error("Pspell extension not available"); -} - -// read input -$data = file_get_contents('php://input'); - -// Decode JSON input -$request = json_decode($data, true); -$result = array(); - -$lang = $request['params'][0]; -$data = $request['params'][1]; -$result['id'] = $request['id']; - -// init spellchecker -$plink = pspell_new($lang, null, null, RCMAIL_CHARSET, PSPELL_FAST); - -if (!$plink) { - json_error("Unable to load Pspell engine for selected language"); -} - -if ($request['method'] == 'checkWords') { - $result['result'] = array(); - foreach ((array)$data as $word) { - if ($word && preg_match('/[^0-9\.]/', $word) - && !pspell_check($plink, $word)) { - $result['result'][] = $word; - } - } -} -else if ($request['method'] == 'getSuggestions') { - $suggestions = pspell_suggest($plink, $data); - if (sizeof($suggestions)>MAX_SUGGESTIONS) - $suggestions = array_slice($suggestions, 0, MAX_SUGGESTIONS); - $result['result'] = $suggestions; -} - -// send output -header("Content-Type: text/xml; charset=".RCMAIL_CHARSET); -echo json_encode($result); -exit; - diff --git a/program/steps/utils/spell_pspell.inc b/program/steps/utils/spell_pspell.inc deleted file mode 100644 index 44415cd60..000000000 --- a/program/steps/utils/spell_pspell.inc +++ /dev/null @@ -1,75 +0,0 @@ - | - +-----------------------------------------------------------------------+ - - $Id$ - -*/ - -if (!extension_loaded('pspell')) { - raise_error(array( - 'code' => 500, - 'type' => 'php', - 'file' => __FILE__, 'line' => __LINE__, - 'message' => "Pspell extension not available"), true, false); - - header('HTTP/1.1 404 Not Found'); - exit; -} - -// read input -$data = file_get_contents('php://input'); - -// parse data (simplexml_load_string breaks CRLFs) -$left = strpos($data, ''); -$right = strrpos($data, ''); -$text = substr($data, $left+6, $right-($left+6)); -$text = html_entity_decode($text, ENT_QUOTES, RCMAIL_CHARSET); - -// tokenize -$words = preg_split('/[ !"#$%&()*+\\,\/\n:;<=>?@\[\]^_{|}-]+|\.[^\w]/', $text, NULL, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE ); - -// init spellchecker -$plink = pspell_new(get_input_value('lang', RCUBE_INPUT_GET), null, null, RCMAIL_CHARSET, PSPELL_FAST); - -// send output -$out = ''; - -$diff = 0; -foreach ($words as $w) { - $word = trim($w[0]); - $pos = $w[1] - $diff; - $len = mb_strlen($word); - if ($word && $plink && preg_match('/[^0-9\.]/', $word) - && !pspell_check($plink, $word)) { - $suggestions = pspell_suggest($plink, $word); - if (sizeof($suggestions)>MAX_SUGGESTIONS) - $suggestions = array_slice($suggestions, 0, MAX_SUGGESTIONS); - - $out .= ''; - $out .= implode("\t", $suggestions); - $out .= ''; - } - $diff += (strlen($word) - $len); -} - -$out .= ''; - -header("Content-Type: text/xml; charset=".RCMAIL_CHARSET); -echo $out; -exit; - - -- cgit v1.2.3