diff options
author | alecpl <alec@alec.pl> | 2008-10-14 13:32:48 +0000 |
---|---|---|
committer | alecpl <alec@alec.pl> | 2008-10-14 13:32:48 +0000 |
commit | b214f8d4d81a167cd651a0c021a7f1486600c1f4 (patch) | |
tree | cd61de7182b30852829f55edfe1ab07b736a4844 /program/steps/mail | |
parent | 32eb29fb994404a74b39e707995918f718948cfd (diff) |
#1485398, #1485441: fix (p)spell checking CRLF/multibyte issues
Diffstat (limited to 'program/steps/mail')
-rw-r--r-- | program/steps/mail/spell_pspell.inc | 21 |
1 files changed, 16 insertions, 5 deletions
diff --git a/program/steps/mail/spell_pspell.inc b/program/steps/mail/spell_pspell.inc index 4d15604bf..bf696f2fe 100644 --- a/program/steps/mail/spell_pspell.inc +++ b/program/steps/mail/spell_pspell.inc @@ -29,19 +29,29 @@ if (!extension_loaded('pspell')) { exit; } +// read input $data = file_get_contents('php://input'); -$xml = simplexml_load_string($data); -$text = (string)$xml->text; + +// parse data (simplexml_load_string breaks CRLFs) +$left = strpos($data, '<text>'); +$right = strrpos($data, '</text>'); +$text = substr($data, $left+6, $right-($left+6)); + +// tokenize $words = preg_split('/[ !"#$%&()*+\\,-.\/\n:;<=>?@\[\]^_{|}]+/', $text, NULL, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE ); -$plink = pspell_new(get_input_value('lang', RCUBE_INPUT_GET), null, null, 'utf-8'); + +// init spellchecker +$plink = pspell_new(get_input_value('lang', RCUBE_INPUT_GET), null, null, 'utf-8', PSPELL_FAST); + +// send output $out = '<?xml version="1.0" encoding="UTF-8"?><spellresult charschecked="'.rc_strlen($text).'">'; $diff = 0; foreach ($words as $w) { - $word = $w[0]; + $word = trim($w[0]); $pos = $w[1] - $diff; $len = rc_strlen($word); - if ($plink && !pspell_check($plink, $word)) { + if ($word && $plink && !pspell_check($plink, $word)) { $suggestions = pspell_suggest($plink, $word); $out .= '<c o="'.$pos.'" l="'.$len.'">'; $out .= implode("\t", $suggestions); @@ -49,6 +59,7 @@ foreach ($words as $w) { } $diff += (strlen($word) - $len); } + $out .= '</spellresult>'; header("Content-Type: text/xml"); |