diff options
author | alecpl <alec@alec.pl> | 2010-04-20 08:03:54 +0000 |
---|---|---|
committer | alecpl <alec@alec.pl> | 2010-04-20 08:03:54 +0000 |
commit | 46a13859743fd15ed7fb56cfdf6971024e1e622a (patch) | |
tree | 32666cdc1c5bc50748df69e2bbb08e1d4b308d64 /program | |
parent | 9096de8520ce4bfa50dcf896da1dea914f8c9ec8 (diff) |
- improve rcube_parse_charset() performance
Diffstat (limited to 'program')
-rw-r--r-- | program/include/main.inc | 38 |
1 files changed, 23 insertions, 15 deletions
diff --git a/program/include/main.inc b/program/include/main.inc index d2f28cdce..9e1813182 100644 --- a/program/include/main.inc +++ b/program/include/main.inc @@ -325,9 +325,13 @@ function rcube_charset_convert($str, $from, $to=NULL) * @param string Input charset name * @return The validated charset name */ -function rcube_parse_charset($charset) +function rcube_parse_charset($input) { - $charset = strtoupper($charset); + static $charsets = array(); + $charset = strtoupper($input); + + if (isset($charsets[$input])) + return $charsets[$input]; $charset = preg_replace(array( '/^[^0-9A-Z]+/', // e.g. _ISO-8859-JP$SIO @@ -367,24 +371,28 @@ function rcube_parse_charset($charset) $str = preg_replace(array('/[^A-Z0-9]/', '/^X+/'), '', $charset); if (isset($aliases[$str])) - return $aliases[$str]; - - if (preg_match('/U[A-Z][A-Z](7|8|16|32)(BE|LE)*/', $str, $m)) - return 'UTF-' . $m[1] . $m[2]; - - if (preg_match('/ISO8859([0-9]{0,2})/', $str, $m)) { + $result = $aliases[$str]; + // UTF + else if (preg_match('/U[A-Z][A-Z](7|8|16|32)(BE|LE)*/', $str, $m)) + $result = 'UTF-' . $m[1] . $m[2]; + // ISO-8859 + else if (preg_match('/ISO8859([0-9]{0,2})/', $str, $m)) { $iso = 'ISO-8859-' . ($m[1] ? $m[1] : 1); - # some clients sends windows-1252 text as latin1, - # it is safe to use windows-1252 for all latin1 - return $iso == 'ISO-8859-1' ? 'WINDOWS-1252' : $iso; + // some clients sends windows-1252 text as latin1, + // it is safe to use windows-1252 for all latin1 + $result = $iso == 'ISO-8859-1' ? 'WINDOWS-1252' : $iso; } - // handle broken charset names e.g. WINDOWS-1250HTTP-EQUIVCONTENT-TYPE - if (preg_match('/(WIN|WINDOWS)([0-9]+)/', $str, $m)) { - return 'WINDOWS-' . $m[2]; + else if (preg_match('/(WIN|WINDOWS)([0-9]+)/', $str, $m)) { + $result = 'WINDOWS-' . $m[2]; } + else { + $result = $charset; + } + + $charsets[$input] = $result; - return $charset; + return $result; } |