<?php

/*
 +-----------------------------------------------------------------------+
 | program/include/rcube_vcard.php                                       |
 |                                                                       |
 | This file is part of the Roundcube Webmail client                     |
 | Copyright (C) 2008-2009, The Roundcube Dev Team                       |
 | Licensed under the GNU GPL                                            |
 |                                                                       |
 | PURPOSE:                                                              |
 |   Logical representation of a vcard address record                    |
 +-----------------------------------------------------------------------+
 | Author: Thomas Bruederli <roundcube@gmail.com>                        |
 +-----------------------------------------------------------------------+

 $Id$

*/


/**
 * Logical representation of a vcard-based address record
 * Provides functions to parse and export vCard data format
 *
 * @package    Addressbook
 * @author     Thomas Bruederli <roundcube@gmail.com>
 */
class rcube_vcard
{
  private static $values_decoded = false;
  private $raw = array(
    'FN' => array(),
    'N' => array(array('','','','','')),
  );

  public $business = false;
  public $displayname;
  public $surname;
  public $firstname;
  public $middlename;
  public $nickname;
  public $organization;
  public $notes;
  public $email = array();


  /**
   * Constructor
   */
  public function __construct($vcard = null, $charset = RCMAIL_CHARSET, $detect = false)
  {
    if (!empty($vcard))
      $this->load($vcard, $charset, $detect);
  }


  /**
   * Load record from (internal, unfolded) vcard 3.0 format
   *
   * @param string vCard string to parse
   * @param string Charset of string values
   * @param boolean True if loading a 'foreign' vcard and extra heuristics for charset detection is required
   */
  public function load($vcard, $charset = RCMAIL_CHARSET, $detect = false)
  {
    self::$values_decoded = false;
    $this->raw = self::vcard_decode($vcard);

    // resolve charset parameters
    if ($charset == null) {
      $this->raw = self::charset_convert($this->raw);
    }
    // vcard has encoded values and charset should be detected
    else if ($detect && self::$values_decoded &&
      ($detected_charset = self::detect_encoding(self::vcard_encode($this->raw))) && $detected_charset != RCMAIL_CHARSET) {
        $this->raw = self::charset_convert($this->raw, $detected_charset);
    }

    // find well-known address fields
    $this->displayname = $this->raw['FN'][0][0];
    $this->surname = $this->raw['N'][0][0];
    $this->firstname = $this->raw['N'][0][1];
    $this->middlename = $this->raw['N'][0][2];
    $this->nickname = $this->raw['NICKNAME'][0][0];
    $this->organization = $this->raw['ORG'][0][0];
    $this->business = ($this->raw['X-ABSHOWAS'][0][0] == 'COMPANY') || (join('', (array)$this->raw['N'][0]) == '' && !empty($this->organization));
    
    foreach ((array)$this->raw['EMAIL'] as $i => $raw_email)
      $this->email[$i] = is_array($raw_email) ? $raw_email[0] : $raw_email;
    
    // make the pref e-mail address the first entry in $this->email
    $pref_index = $this->get_type_index('EMAIL', 'pref');
    if ($pref_index > 0) {
      $tmp = $this->email[0];
      $this->email[0] = $this->email[$pref_index];
      $this->email[$pref_index] = $tmp;
    }

    // make sure displayname is not empty (required by RFC2426)
    if (!strlen($this->displayname)) {
      // the same method is used in steps/mail/addcontact.inc
      $this->displayname = ucfirst(preg_replace('/[\.\-]/', ' ',
        substr($this->email[0], 0, strpos($this->email[0], '@'))));
    }
  }


  /**
   * Convert the data structure into a vcard 3.0 string
   */
  public function export()
  {
    return self::rfc2425_fold(self::vcard_encode($this->raw));
  }


  /**
   * Setter for address record fields
   *
   * @param string Field name
   * @param string Field value
   * @param string Section name
   */
  public function set($field, $value, $section = 'HOME')
  {
    switch ($field) {
      case 'name':
      case 'displayname':
        $this->raw['FN'][0][0] = $value;
        break;
        
      case 'firstname':
        $this->raw['N'][0][1] = $value;
        break;
        
      case 'surname':
        $this->raw['N'][0][0] = $value;
        break;
      
      case 'nickname':
        $this->raw['NICKNAME'][0][0] = $value;
        break;
        
      case 'organization':
        $this->raw['ORG'][0][0] = $value;
        break;
        
      case 'email':
        $index = $this->get_type_index('EMAIL', $section);
        if (!is_array($this->raw['EMAIL'][$index])) {
          $this->raw['EMAIL'][$index] = array(0 => $value, 'type' => array('INTERNET', $section, 'pref'));
        }
        else {
          $this->raw['EMAIL'][$index][0] = $value;
        }
        break;
    }
  }


  /**
   * Find index with the '$type' attribute
   *
   * @param string Field name
   * @return int Field index having $type set
   */
  private function get_type_index($field, $type = 'pref')
  {
    $result = 0;
    if ($this->raw[$field]) {
      foreach ($this->raw[$field] as $i => $data) {
        if (is_array($data['type']) && in_array_nocase('pref', $data['type']))
          $result = $i;
      }
    }
    
    return $result;
  }
  
  
  /**
   * Convert a whole vcard (array) to UTF-8.
   * If $force_charset is null, each member value that has a charset parameter will be converted
   */
  private static function charset_convert($card, $force_charset = null)
  {
    foreach ($card as $key => $node) {
      foreach ($node as $i => $subnode) {
        if (is_array($subnode) && (($charset = $force_charset) || ($subnode['charset'] && ($charset = $subnode['charset'][0])))) {
          foreach ($subnode as $j => $value) {
            if (is_numeric($j) && is_string($value))
              $card[$key][$i][$j] = rcube_charset_convert($value, $charset);
          }
          unset($card[$key][$i]['charset']);
        }
      }
    }

    return $card;
  }


  /**
   * Factory method to import a vcard file
   *
   * @param string vCard file content
   * @return array List of rcube_vcard objects
   */
  public static function import($data)
  {
    $out = array();

    // check if charsets are specified (usually vcard version < 3.0 but this is not reliable)
    if (preg_match('/charset=/i', substr($data, 0, 2048)))
      $charset = null;
    // detect charset and convert to utf-8
    else if (($charset = self::detect_encoding($data)) && $charset != RCMAIL_CHARSET) {
      $data = rcube_charset_convert($data, $charset);
      $data = preg_replace(array('/^[\xFE\xFF]{2}/', '/^\xEF\xBB\xBF/', '/^\x00+/'), '', $data); // also remove BOM
      $charset = RCMAIL_CHARSET;
    }

    $vcard_block = '';
    $in_vcard_block = false;

    foreach (preg_split("/[\r\n]+/", $data) as $i => $line) {
      if ($in_vcard_block && !empty($line))
        $vcard_block .= $line . "\n";

      $line = trim($line);

      if (preg_match('/^END:VCARD$/i', $line)) {
        // parse vcard
        $obj = new rcube_vcard(self::cleanup($vcard_block), $charset, true);
        if (!empty($obj->displayname))
          $out[] = $obj;

        $in_vcard_block = false;
      }
      else if (preg_match('/^BEGIN:VCARD$/i', $line)) {
        $vcard_block = $line . "\n";
        $in_vcard_block = true;
      }
    }

    return $out;
  }


  /**
   * Normalize vcard data for better parsing
   *
   * @param string vCard block
   * @return string Cleaned vcard block
   */
  private static function cleanup($vcard)
  {
    // Convert special types (like Skype) to normal type='skype' classes with this simple regex ;)
    $vcard = preg_replace(
      '/item(\d+)\.(TEL|URL)([^:]*?):(.*?)item\1.X-ABLabel:(?:_\$!<)?([\w-() ]*)(?:>!\$_)?./s',
      '\2;type=\5\3:\4',
      $vcard);

    // Remove cruft like item1.X-AB*, item1.ADR instead of ADR, and empty lines
    $vcard = preg_replace(array('/^item\d*\.X-AB.*$/m', '/^item\d*\./m', "/\n+/"), array('', '', "\n"), $vcard);

    // if N doesn't have any semicolons, add some 
    $vcard = preg_replace('/^(N:[^;\R]*)$/m', '\1;;;;', $vcard);

    return $vcard;
  }

  private static function rfc2425_fold_callback($matches)
  {
    return ":\n  ".rtrim(chunk_split($matches[1], 72, "\n  "));
  }

  private static function rfc2425_fold($val)
  {
    return preg_replace_callback('/:([^\n]{72,})/', array('self', 'rfc2425_fold_callback'), $val) . "\n";
  }


  /**
   * Decodes a vcard block (vcard 3.0 format, unfolded)
   * into an array structure
   *
   * @param string vCard block to parse
   * @return array Raw data structure
   */
  private static function vcard_decode($vcard)
  {
    // Perform RFC2425 line unfolding
    $vcard = preg_replace(array("/\r/", "/\n\s+/"), '', $vcard);
    
    $lines = preg_split('/\r?\n/', $vcard);
    $data = array();
    
    for ($i=0; $i < count($lines); $i++) {
      if (!preg_match('/^([^\\:]*):(.+)$/', $lines[$i], $line))
          continue;

      // convert 2.1-style "EMAIL;internet;home:" to 3.0-style "EMAIL;TYPE=internet;TYPE=home:"
      if (($data['VERSION'][0] == "2.1") && preg_match('/^([^;]+);([^:]+)/', $line[1], $regs2) && !preg_match('/^TYPE=/i', $regs2[2])) {
        $line[1] = $regs2[1];
        foreach (explode(';', $regs2[2]) as $prop)
          $line[1] .= ';' . (strpos($prop, '=') ? $prop : 'TYPE='.$prop);
      }

      if (!preg_match('/^(BEGIN|END)$/i', $line[1]) && preg_match_all('/([^\\;]+);?/', $line[1], $regs2)) {
        $entry = array();
        $field = strtoupper($regs2[1][0]);

        foreach($regs2[1] as $attrid => $attr) {
          if ((list($key, $value) = explode('=', $attr)) && $value) {
            $value = trim($value);
            if ($key == 'ENCODING') {
              // add next line(s) to value string if QP line end detected
              while ($value == 'QUOTED-PRINTABLE' && preg_match('/=$/', $lines[$i]))
                  $line[2] .= "\n" . $lines[++$i];
              
              $line[2] = self::decode_value($line[2], $value);
            }
            else
              $entry[strtolower($key)] = array_merge((array)$entry[strtolower($key)], (array)self::vcard_unquote($value, ','));
          }
          else if ($attrid > 0) {
            $entry[$key] = true;  // true means attr without =value
          }
        }

        $entry = array_merge($entry, (array)self::vcard_unquote($line[2]));
        $data[$field][] = $entry;
      }
    }

    unset($data['VERSION']);
    return $data;
  }


  /**
   * Split quoted string
   *
   * @param string vCard string to split
   * @param string Separator char/string
   * @return array List with splitted values
   */
  private static function vcard_unquote($s, $sep = ';')
  {
    // break string into parts separated by $sep, but leave escaped $sep alone
    if (count($parts = explode($sep, strtr($s, array("\\$sep" => "\007")))) > 1) {
      foreach($parts as $s) {
        $result[] = self::vcard_unquote(strtr($s, array("\007" => "\\$sep")), $sep);
      }
      return $result;
    }
    else {
      return strtr($s, array("\r" => '', '\\\\' => '\\', '\n' => "\n", '\,' => ',', '\;' => ';', '\:' => ':'));
    }
  }


  /**
   * Decode a given string with the encoding rule from ENCODING attributes
   *
   * @param string String to decode
   * @param string Encoding type (quoted-printable and base64 supported)
   * @return string Decoded 8bit value
   */
  private static function decode_value($value, $encoding)
  {
    switch (strtolower($encoding)) {
      case 'quoted-printable':
        self::$values_decoded = true;
        return quoted_printable_decode($value);

      case 'base64':
        self::$values_decoded = true;
        return base64_decode($value);

      default:
        return $value;
    }
  }


  /**
   * Encodes an entry for storage in our database (vcard 3.0 format, unfolded)
   *
   * @param array Raw data structure to encode
   * @return string vCard encoded string
   */
  static function vcard_encode($data)
  {
    foreach((array)$data as $type => $entries) {
      /* valid N has 5 properties */
      while ($type == "N" && is_array($entries[0]) && count($entries[0]) < 5)
        $entries[0][] = "";

      foreach((array)$entries as $entry) {
        $attr = '';
        if (is_array($entry)) {
          $value = array();
          foreach($entry as $attrname => $attrvalues) {
            if (is_int($attrname))
              $value[] = $attrvalues;
            elseif ($attrvalues === true)
              $attr .= ";$attrname";    // true means just tag, not tag=value, as in PHOTO;BASE64:...
            else {
              foreach((array)$attrvalues as $attrvalue)
                $attr .= ";$attrname=" . self::vcard_quote($attrvalue, ',');
            }
          }
        }
        else {
          $value = $entry;
        }

        $vcard .= self::vcard_quote($type) . $attr . ':' . self::vcard_quote($value) . "\n";
      }
    }

    return "BEGIN:VCARD\nVERSION:3.0\n{$vcard}END:VCARD";
  }


  /**
   * Join indexed data array to a vcard quoted string
   *
   * @param array Field data
   * @param string Separator
   * @return string Joined and quoted string
   */
  private static function vcard_quote($s, $sep = ';')
  {
    if (is_array($s)) {
      foreach($s as $part) {
        $r[] = self::vcard_quote($part, $sep);
      }
      return(implode($sep, (array)$r));
    }
    else {
      return strtr($s, array('\\' => '\\\\', "\r" => '', "\n" => '\n', ';' => '\;', ':' => '\:'));
    }
  }


  /**
   * Returns UNICODE type based on BOM (Byte Order Mark)
   *
   * @param string Input string to test
   * @return string Detected encoding
   */
  private static function detect_encoding($string)
  {
    if (substr($string, 0, 4) == "\0\0\xFE\xFF") return 'UTF-32BE';  // Big Endian
    if (substr($string, 0, 4) == "\xFF\xFE\0\0") return 'UTF-32LE';  // Little Endian
    if (substr($string, 0, 2) == "\xFE\xFF")     return 'UTF-16BE';  // Big Endian
    if (substr($string, 0, 2) == "\xFF\xFE")     return 'UTF-16LE';  // Little Endian
    if (substr($string, 0, 3) == "\xEF\xBB\xBF") return 'UTF-8';

    // use mb_detect_encoding()
    $encodings = array('UTF-8', 'ISO-8859-1', 'ISO-8859-2', 'ISO-8859-3',
      'ISO-8859-4', 'ISO-8859-5', 'ISO-8859-6', 'ISO-8859-7', 'ISO-8859-8', 'ISO-8859-9',
      'ISO-8859-10', 'ISO-8859-13', 'ISO-8859-14', 'ISO-8859-15', 'ISO-8859-16',
      'WINDOWS-1252', 'WINDOWS-1251', 'BIG5', 'GB2312');

    if (function_exists('mb_detect_encoding') && ($enc = mb_detect_encoding($string, $encodings)))
      return $enc;

    // No match, check for UTF-8
    // from http://w3.org/International/questions/qa-forms-utf-8.html
    if (preg_match('/\A(
        [\x09\x0A\x0D\x20-\x7E]
        | [\xC2-\xDF][\x80-\xBF]
        | \xE0[\xA0-\xBF][\x80-\xBF]
        | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}
        | \xED[\x80-\x9F][\x80-\xBF]
        | \xF0[\x90-\xBF][\x80-\xBF]{2}
        | [\xF1-\xF3][\x80-\xBF]{3}
        | \xF4[\x80-\x8F][\x80-\xBF]{2}
        )*\z/xs', substr($string, 0, 2048)))
      return 'UTF-8';

    return rcmail::get_instance()->config->get('default_charset', 'ISO-8859-1'); # fallback to Latin-1
  }

}