summaryrefslogtreecommitdiff
path: root/program/include/rcube_spellchecker.php
diff options
context:
space:
mode:
authorAleksander Machniak <alec@alec.pl>2012-11-21 19:52:03 +0100
committerAleksander Machniak <alec@alec.pl>2012-11-21 19:52:03 +0100
commitba6f21caeb405c7e8512a09941fefbc97286e45f (patch)
tree4a0e8f6fbab3260d37bf85cbf0bc9f506e627678 /program/include/rcube_spellchecker.php
parentf707fec0001d7dc7d46be114c42b37e49a052660 (diff)
Framework files moved to lib/Roundcube
Diffstat (limited to 'program/include/rcube_spellchecker.php')
-rw-r--r--program/include/rcube_spellchecker.php621
1 files changed, 0 insertions, 621 deletions
diff --git a/program/include/rcube_spellchecker.php b/program/include/rcube_spellchecker.php
deleted file mode 100644
index 30d15d721..000000000
--- a/program/include/rcube_spellchecker.php
+++ /dev/null
@@ -1,621 +0,0 @@
-<?php
-
-/*
- +-----------------------------------------------------------------------+
- | program/include/rcube_spellchecker.php |
- | |
- | This file is part of the Roundcube Webmail client |
- | Copyright (C) 2011, Kolab Systems AG |
- | Copyright (C) 2008-2011, The Roundcube Dev Team |
- | |
- | Licensed under the GNU General Public License version 3 or |
- | any later version with exceptions for skins & plugins. |
- | See the README file for a full license statement. |
- | |
- | PURPOSE: |
- | Spellchecking using different backends |
- | |
- +-----------------------------------------------------------------------+
- | Author: Aleksander Machniak <machniak@kolabsys.com> |
- | Author: Thomas Bruederli <roundcube@gmail.com> |
- +-----------------------------------------------------------------------+
-*/
-
-
-/**
- * Helper class for spellchecking with Googielspell and PSpell support.
- *
- * @package Framework
- * @subpackage Utils
- */
-class rcube_spellchecker
-{
- private $matches = array();
- private $engine;
- private $lang;
- private $rc;
- private $error;
- private $separator = '/[\s\r\n\t\(\)\/\[\]{}<>\\"]+|[:;?!,\.]([^\w]|$)/';
- private $options = array();
- private $dict;
- private $have_dict;
-
-
- // default settings
- const GOOGLE_HOST = 'ssl://www.google.com';
- const GOOGLE_PORT = 443;
- const MAX_SUGGESTIONS = 10;
-
-
- /**
- * Constructor
- *
- * @param string $lang Language code
- */
- function __construct($lang = 'en')
- {
- $this->rc = rcube::get_instance();
- $this->engine = $this->rc->config->get('spellcheck_engine', 'googie');
- $this->lang = $lang ? $lang : 'en';
-
- $this->options = array(
- 'ignore_syms' => $this->rc->config->get('spellcheck_ignore_syms'),
- 'ignore_nums' => $this->rc->config->get('spellcheck_ignore_nums'),
- 'ignore_caps' => $this->rc->config->get('spellcheck_ignore_caps'),
- 'dictionary' => $this->rc->config->get('spellcheck_dictionary'),
- );
- }
-
-
- /**
- * Set content and check spelling
- *
- * @param string $text Text content for spellchecking
- * @param bool $is_html Enables HTML-to-Text conversion
- *
- * @return bool True when no mispelling found, otherwise false
- */
- function check($text, $is_html = false)
- {
- // convert to plain text
- if ($is_html) {
- $this->content = $this->html2text($text);
- }
- else {
- $this->content = $text;
- }
-
- if ($this->engine == 'pspell') {
- $this->matches = $this->_pspell_check($this->content);
- }
- else {
- $this->matches = $this->_googie_check($this->content);
- }
-
- return $this->found() == 0;
- }
-
-
- /**
- * Number of mispellings found (after check)
- *
- * @return int Number of mispellings
- */
- function found()
- {
- return count($this->matches);
- }
-
-
- /**
- * Returns suggestions for the specified word
- *
- * @param string $word The word
- *
- * @return array Suggestions list
- */
- function get_suggestions($word)
- {
- if ($this->engine == 'pspell') {
- return $this->_pspell_suggestions($word);
- }
-
- return $this->_googie_suggestions($word);
- }
-
-
- /**
- * Returns misspelled words
- *
- * @param string $text The content for spellchecking. If empty content
- * used for check() method will be used.
- *
- * @return array List of misspelled words
- */
- function get_words($text = null, $is_html=false)
- {
- if ($this->engine == 'pspell') {
- return $this->_pspell_words($text, $is_html);
- }
-
- return $this->_googie_words($text, $is_html);
- }
-
-
- /**
- * Returns checking result in XML (Googiespell) format
- *
- * @return string XML content
- */
- function get_xml()
- {
- // send output
- $out = '<?xml version="1.0" encoding="'.RCMAIL_CHARSET.'"?><spellresult charschecked="'.mb_strlen($this->content).'">';
-
- foreach ($this->matches as $item) {
- $out .= '<c o="'.$item[1].'" l="'.$item[2].'">';
- $out .= is_array($item[4]) ? implode("\t", $item[4]) : $item[4];
- $out .= '</c>';
- }
-
- $out .= '</spellresult>';
-
- return $out;
- }
-
-
- /**
- * Returns checking result (misspelled words with suggestions)
- *
- * @return array Spellchecking result. An array indexed by word.
- */
- function get()
- {
- $result = array();
-
- foreach ($this->matches as $item) {
- if ($this->engine == 'pspell') {
- $word = $item[0];
- }
- else {
- $word = mb_substr($this->content, $item[1], $item[2], RCMAIL_CHARSET);
- }
- $result[$word] = is_array($item[4]) ? implode("\t", $item[4]) : $item[4];
- }
-
- return $result;
- }
-
-
- /**
- * Returns error message
- *
- * @return string Error message
- */
- function error()
- {
- return $this->error;
- }
-
-
- /**
- * Checks the text using pspell
- *
- * @param string $text Text content for spellchecking
- */
- private function _pspell_check($text)
- {
- // init spellchecker
- $this->_pspell_init();
-
- if (!$this->plink) {
- return array();
- }
-
- // tokenize
- $text = preg_split($this->separator, $text, NULL, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE);
-
- $diff = 0;
- $matches = array();
-
- foreach ($text as $w) {
- $word = trim($w[0]);
- $pos = $w[1] - $diff;
- $len = mb_strlen($word);
-
- // skip exceptions
- if ($this->is_exception($word)) {
- }
- else if (!pspell_check($this->plink, $word)) {
- $suggestions = pspell_suggest($this->plink, $word);
-
- if (sizeof($suggestions) > self::MAX_SUGGESTIONS) {
- $suggestions = array_slice($suggestions, 0, self::MAX_SUGGESTIONS);
- }
-
- $matches[] = array($word, $pos, $len, null, $suggestions);
- }
-
- $diff += (strlen($word) - $len);
- }
-
- return $matches;
- }
-
-
- /**
- * Returns the misspelled words
- */
- private function _pspell_words($text = null, $is_html=false)
- {
- $result = array();
-
- if ($text) {
- // init spellchecker
- $this->_pspell_init();
-
- if (!$this->plink) {
- return array();
- }
-
- // With PSpell we don't need to get suggestions to return misspelled words
- if ($is_html) {
- $text = $this->html2text($text);
- }
-
- $text = preg_split($this->separator, $text, NULL, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE);
-
- foreach ($text as $w) {
- $word = trim($w[0]);
-
- // skip exceptions
- if ($this->is_exception($word)) {
- continue;
- }
-
- if (!pspell_check($this->plink, $word)) {
- $result[] = $word;
- }
- }
-
- return $result;
- }
-
- foreach ($this->matches as $m) {
- $result[] = $m[0];
- }
-
- return $result;
- }
-
-
- /**
- * Returns suggestions for misspelled word
- */
- private function _pspell_suggestions($word)
- {
- // init spellchecker
- $this->_pspell_init();
-
- if (!$this->plink) {
- return array();
- }
-
- $suggestions = pspell_suggest($this->plink, $word);
-
- if (sizeof($suggestions) > self::MAX_SUGGESTIONS)
- $suggestions = array_slice($suggestions, 0, self::MAX_SUGGESTIONS);
-
- return is_array($suggestions) ? $suggestions : array();
- }
-
-
- /**
- * Initializes PSpell dictionary
- */
- private function _pspell_init()
- {
- if (!$this->plink) {
- if (!extension_loaded('pspell')) {
- $this->error = "Pspell extension not available";
- rcube::raise_error(array(
- 'code' => 500, 'type' => 'php',
- 'file' => __FILE__, 'line' => __LINE__,
- 'message' => $this->error), true, false);
-
- return;
- }
-
- $this->plink = pspell_new($this->lang, null, null, RCMAIL_CHARSET, PSPELL_FAST);
- }
-
- if (!$this->plink) {
- $this->error = "Unable to load Pspell engine for selected language";
- }
- }
-
-
- private function _googie_check($text)
- {
- // spell check uri is configured
- $url = $this->rc->config->get('spellcheck_uri');
-
- if ($url) {
- $a_uri = parse_url($url);
- $ssl = ($a_uri['scheme'] == 'https' || $a_uri['scheme'] == 'ssl');
- $port = $a_uri['port'] ? $a_uri['port'] : ($ssl ? 443 : 80);
- $host = ($ssl ? 'ssl://' : '') . $a_uri['host'];
- $path = $a_uri['path'] . ($a_uri['query'] ? '?'.$a_uri['query'] : '') . $this->lang;
- }
- else {
- $host = self::GOOGLE_HOST;
- $port = self::GOOGLE_PORT;
- $path = '/tbproxy/spell?lang=' . $this->lang;
- }
-
- // Google has some problem with spaces, use \n instead
- $gtext = str_replace(' ', "\n", $text);
-
- $gtext = '<?xml version="1.0" encoding="utf-8" ?>'
- .'<spellrequest textalreadyclipped="0" ignoredups="0" ignoredigits="1" ignoreallcaps="1">'
- .'<text>' . $gtext . '</text>'
- .'</spellrequest>';
-
- $store = '';
- if ($fp = fsockopen($host, $port, $errno, $errstr, 30)) {
- $out = "POST $path HTTP/1.0\r\n";
- $out .= "Host: " . str_replace('ssl://', '', $host) . "\r\n";
- $out .= "Content-Length: " . strlen($gtext) . "\r\n";
- $out .= "Content-Type: application/x-www-form-urlencoded\r\n";
- $out .= "Connection: Close\r\n\r\n";
- $out .= $gtext;
- fwrite($fp, $out);
-
- while (!feof($fp))
- $store .= fgets($fp, 128);
- fclose($fp);
- }
-
- if (!$store) {
- $this->error = "Empty result from spelling engine";
- }
-
- preg_match_all('/<c o="([^"]*)" l="([^"]*)" s="([^"]*)">([^<]*)<\/c>/', $store, $matches, PREG_SET_ORDER);
-
- // skip exceptions (if appropriate options are enabled)
- if (!empty($this->options['ignore_syms']) || !empty($this->options['ignore_nums'])
- || !empty($this->options['ignore_caps']) || !empty($this->options['dictionary'])
- ) {
- foreach ($matches as $idx => $m) {
- $word = mb_substr($text, $m[1], $m[2], RCMAIL_CHARSET);
- // skip exceptions
- if ($this->is_exception($word)) {
- unset($matches[$idx]);
- }
- }
- }
-
- return $matches;
- }
-
-
- private function _googie_words($text = null, $is_html=false)
- {
- if ($text) {
- if ($is_html) {
- $text = $this->html2text($text);
- }
-
- $matches = $this->_googie_check($text);
- }
- else {
- $matches = $this->matches;
- $text = $this->content;
- }
-
- $result = array();
-
- foreach ($matches as $m) {
- $result[] = mb_substr($text, $m[1], $m[2], RCMAIL_CHARSET);
- }
-
- return $result;
- }
-
-
- private function _googie_suggestions($word)
- {
- if ($word) {
- $matches = $this->_googie_check($word);
- }
- else {
- $matches = $this->matches;
- }
-
- if ($matches[0][4]) {
- $suggestions = explode("\t", $matches[0][4]);
- if (sizeof($suggestions) > self::MAX_SUGGESTIONS) {
- $suggestions = array_slice($suggestions, 0, MAX_SUGGESTIONS);
- }
-
- return $suggestions;
- }
-
- return array();
- }
-
-
- private function html2text($text)
- {
- $h2t = new html2text($text, false, true, 0);
- return $h2t->get_text();
- }
-
-
- /**
- * Check if the specified word is an exception accoring to
- * spellcheck options.
- *
- * @param string $word The word
- *
- * @return bool True if the word is an exception, False otherwise
- */
- public function is_exception($word)
- {
- // Contain only symbols (e.g. "+9,0", "2:2")
- if (!$word || preg_match('/^[0-9@#$%^&_+~*=:;?!,.-]+$/', $word))
- return true;
-
- // Contain symbols (e.g. "g@@gle"), all symbols excluding separators
- if (!empty($this->options['ignore_syms']) && preg_match('/[@#$%^&_+~*=-]/', $word))
- return true;
-
- // Contain numbers (e.g. "g00g13")
- if (!empty($this->options['ignore_nums']) && preg_match('/[0-9]/', $word))
- return true;
-
- // Blocked caps (e.g. "GOOGLE")
- if (!empty($this->options['ignore_caps']) && $word == mb_strtoupper($word))
- return true;
-
- // Use exceptions from dictionary
- if (!empty($this->options['dictionary'])) {
- $this->load_dict();
-
- // @TODO: should dictionary be case-insensitive?
- if (!empty($this->dict) && in_array($word, $this->dict))
- return true;
- }
-
- return false;
- }
-
-
- /**
- * Add a word to dictionary
- *
- * @param string $word The word to add
- */
- public function add_word($word)
- {
- $this->load_dict();
-
- foreach (explode(' ', $word) as $word) {
- // sanity check
- if (strlen($word) < 512) {
- $this->dict[] = $word;
- $valid = true;
- }
- }
-
- if ($valid) {
- $this->dict = array_unique($this->dict);
- $this->update_dict();
- }
- }
-
-
- /**
- * Remove a word from dictionary
- *
- * @param string $word The word to remove
- */
- public function remove_word($word)
- {
- $this->load_dict();
-
- if (($key = array_search($word, $this->dict)) !== false) {
- unset($this->dict[$key]);
- $this->update_dict();
- }
- }
-
-
- /**
- * Update dictionary row in DB
- */
- private function update_dict()
- {
- if (strcasecmp($this->options['dictionary'], 'shared') != 0) {
- $userid = $this->rc->get_user_id();
- }
-
- $plugin = $this->rc->plugins->exec_hook('spell_dictionary_save', array(
- 'userid' => $userid, 'language' => $this->lang, 'dictionary' => $this->dict));
-
- if (!empty($plugin['abort'])) {
- return;
- }
-
- if ($this->have_dict) {
- if (!empty($this->dict)) {
- $this->rc->db->query(
- "UPDATE ".$this->rc->db->table_name('dictionary')
- ." SET data = ?"
- ." WHERE user_id " . ($plugin['userid'] ? "= ".$this->rc->db->quote($plugin['userid']) : "IS NULL")
- ." AND " . $this->rc->db->quoteIdentifier('language') . " = ?",
- implode(' ', $plugin['dictionary']), $plugin['language']);
- }
- // don't store empty dict
- else {
- $this->rc->db->query(
- "DELETE FROM " . $this->rc->db->table_name('dictionary')
- ." WHERE user_id " . ($plugin['userid'] ? "= ".$this->rc->db->quote($plugin['userid']) : "IS NULL")
- ." AND " . $this->rc->db->quoteIdentifier('language') . " = ?",
- $plugin['language']);
- }
- }
- else if (!empty($this->dict)) {
- $this->rc->db->query(
- "INSERT INTO " .$this->rc->db->table_name('dictionary')
- ." (user_id, " . $this->rc->db->quoteIdentifier('language') . ", data) VALUES (?, ?, ?)",
- $plugin['userid'], $plugin['language'], implode(' ', $plugin['dictionary']));
- }
- }
-
-
- /**
- * Get dictionary from DB
- */
- private function load_dict()
- {
- if (is_array($this->dict)) {
- return $this->dict;
- }
-
- if (strcasecmp($this->options['dictionary'], 'shared') != 0) {
- $userid = $this->rc->get_user_id();
- }
-
- $plugin = $this->rc->plugins->exec_hook('spell_dictionary_get', array(
- 'userid' => $userid, 'language' => $this->lang, 'dictionary' => array()));
-
- if (empty($plugin['abort'])) {
- $dict = array();
- $this->rc->db->query(
- "SELECT data FROM ".$this->rc->db->table_name('dictionary')
- ." WHERE user_id ". ($plugin['userid'] ? "= ".$this->rc->db->quote($plugin['userid']) : "IS NULL")
- ." AND " . $this->rc->db->quoteIdentifier('language') . " = ?",
- $plugin['language']);
-
- if ($sql_arr = $this->rc->db->fetch_assoc($sql_result)) {
- $this->have_dict = true;
- if (!empty($sql_arr['data'])) {
- $dict = explode(' ', $sql_arr['data']);
- }
- }
-
- $plugin['dictionary'] = array_merge((array)$plugin['dictionary'], $dict);
- }
-
- if (!empty($plugin['dictionary']) && is_array($plugin['dictionary'])) {
- $this->dict = $plugin['dictionary'];
- }
- else {
- $this->dict = array();
- }
-
- return $this->dict;
- }
-
-}