summaryrefslogtreecommitdiff
path: root/program/steps/utils/spell_pspell.inc
blob: 2c9c2333c6d30b2639848f1e79937a66b481299e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
<?php

/*
 +-----------------------------------------------------------------------+
 | program/steps/utils/spell_pspell.inc                                  |
 |                                                                       |
 | This file is part of the RoundCube Webmail client                     |
 | Licensed under the GNU GPL                                            |
 |                                                                       |
 | PURPOSE:                                                              |
 |   Use the Pspell extension to check spelling, returns results         |
 |   compatible with spell_googie.inc.                                   |
 |                                                                       |
 +-----------------------------------------------------------------------+
 | Author: Kris Steinhoff <steinhof@umich.edu>                           |
 +-----------------------------------------------------------------------+

 $Id$

*/

if (!extension_loaded('pspell')) {
    raise_error(array(
      'code' => 500,
      'type' => 'php',
      'file' => __FILE__, 'line' => __LINE__,
      'message' => "Pspell extension not available"), true, false);

    header('HTTP/1.1 404 Not Found');
    exit;
}

// max. number of suggestions for one word
define('MAX_SUGGESTIONS', 10);

// read input
$data = file_get_contents('php://input');

// parse data (simplexml_load_string breaks CRLFs)
$left = strpos($data, '<text>');
$right = strrpos($data, '</text>');
$text = substr($data, $left+6, $right-($left+6));
$text = html_entity_decode($text, ENT_QUOTES, RCMAIL_CHARSET);

// tokenize
$words = preg_split('/[ !"#$%&()*+\\,\/\n:;<=>?@\[\]^_{|}-]+|\.[^\w]/', $text, NULL,  PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE );

// init spellchecker
$plink = pspell_new(get_input_value('lang', RCUBE_INPUT_GET), null, null, RCMAIL_CHARSET, PSPELL_FAST);

// send output
$out = '<?xml version="1.0" encoding="'.RCMAIL_CHARSET.'"?><spellresult charschecked="'.mb_strlen($text).'">';

$diff = 0;
foreach ($words as $w) {
    $word = trim($w[0]);
    $pos  = $w[1] - $diff;
    $len  = mb_strlen($word);
    if ($word && $plink && preg_match('/[^0-9\.]/', $word)
	&& !pspell_check($plink, $word)) {
        $suggestions = pspell_suggest($plink, $word);
	if (sizeof($suggestions)>10)
	  $suggestions = array_slice($suggestions, 0, MAX_SUGGESTIONS);

        $out .= '<c o="'.$pos.'" l="'.$len.'">';
        $out .= implode("\t", $suggestions);
        $out .= '</c>';
    }
    $diff += (strlen($word) - $len);
}

$out .= '</spellresult>';

header("Content-Type: text/xml; charset=".RCMAIL_CHARSET);
echo $out;
exit;