/**************************************************************************
 * 
 * Copyright 2009 VMware, Inc.
 * All Rights Reserved.
 * 
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sub license, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 * 
 * The above copyright notice and this permission notice (including the
 * next paragraph) shall be included in all copies or substantial portions
 * of the Software.
 * 
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 * 
 **************************************************************************/

#include <stdlib.h>
#include "sl_pp_context.h"
#include "sl_pp_token.h"


static int
_is_identifier_char(char c)
{
   return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_';
}


static int
_tokenise_identifier(struct sl_pp_context *context,
                     const char **pinput,
                     struct sl_pp_token_info *info)
{
   const char *input = *pinput;
   char identifier[256];   /* XXX: Remove this artifical limit. */
   unsigned int i = 0;

   info->token = SL_PP_IDENTIFIER;
   info->data.identifier = -1;

   identifier[i++] = *input++;
   while (_is_identifier_char(*input)) {
      if (i >= sizeof(identifier) - 1) {
         strcpy(context->error_msg, "out of memory");
         return -1;
      }
      identifier[i++] = *input++;
   }
   identifier[i++] = '\0';

   info->data.identifier = sl_pp_context_add_unique_str(context, identifier);
   if (info->data.identifier == -1) {
      return -1;
   }

   *pinput = input;
   return 0;
}


/*
 * Return the number of consecutive decimal digits in the input stream.
 */
static unsigned int
_parse_float_digits(const char *input)
{
   unsigned int eaten = 0;

   while (input[eaten] >= '0' && input[eaten] <= '9') {
      eaten++;
   }
   return eaten;
}


/*
 * Try to match one of the following patterns for the fractional part
 * of a floating point number.
 *
 * digits . [digits]
 * . digits
 *
 * Return 0 if the pattern could not be matched, otherwise the number
 * of eaten characters from the input stream.
 */
static unsigned int
_parse_float_frac(const char *input)
{
   unsigned int eaten;

   if (input[0] == '.') {
      eaten = _parse_float_digits(&input[1]);
      if (eaten) {
         return eaten + 1;
      }
      return 0;
   }

   eaten = _parse_float_digits(input);
   if (eaten && input[eaten] == '.') {
      unsigned int trailing;

      trailing = _parse_float_digits(&input[eaten + 1]);
      if (trailing) {
         return eaten + trailing + 1;
      }
      return eaten + 1;
   }

   return 0;
}


/*
 * Try to match the following pattern for the exponential part
 * of a floating point number.
 *
 * (e|E) [(+|-)] digits
 *
 * Return 0 if the pattern could not be matched, otherwise the number
 * of eaten characters from the input stream.
 */
static unsigned int
_parse_float_exp(const char *input)
{
   unsigned int eaten, digits;

   if (input[0] != 'e' && input[0] != 'E') {
      return 0;
   }

   if (input[1] == '-' || input[1] == '+') {
      eaten = 2;
   } else {
      eaten = 1;
   }

   digits = _parse_float_digits(&input[eaten]);
   if (!digits) {
      return 0;
   }

   return eaten + digits;
}


/*
 * Try to match one of the following patterns for a floating point number.
 *
 * fract [exp] [(f|F)]
 * digits exp [(f|F)]
 *
 * Return 0 if the pattern could not be matched, otherwise the number
 * of eaten characters from the input stream.
 */
static unsigned int
_parse_float(const char *input)
{
   unsigned int eaten;

   eaten = _parse_float_frac(input);
   if (eaten) {
      unsigned int exponent;

      exponent = _parse_float_exp(&input[eaten]);
      if (exponent) {
         eaten += exponent;
      }

      if (input[eaten] == 'f' || input[eaten] == 'F') {
         eaten++;
      }

      return eaten;
   }

   eaten = _parse_float_digits(input);
   if (eaten) {
      unsigned int exponent;

      exponent = _parse_float_exp(&input[eaten]);
      if (exponent) {
         eaten += exponent;

         if (input[eaten] == 'f' || input[eaten] == 'F') {
            eaten++;
         }

         return eaten;
      }
   }

   return 0;
}


static unsigned int
_parse_hex(const char *input)
{
   unsigned int n;

   if (input[0] != '0') {
      return 0;
   }

   if (input[1] != 'x' && input[1] != 'X') {
      return 0;
   }

   n = 2;
   while ((input[n] >= '0' && input[n] <= '9') ||
          (input[n] >= 'a' && input[n] <= 'f') ||
          (input[n] >= 'A' && input[n] <= 'F')) {
      n++;
   }

   if (n > 2) {
      return n;
   }

   return 0;
}


static unsigned int
_parse_oct(const char *input)
{
   unsigned int n;

   if (input[0] != '0') {
      return 0;
   }

   n = 1;
   while ((input[n] >= '0' && input[n] <= '7')) {
      n++;
   }

   return n;
}


static unsigned int
_parse_dec(const char *input)
{
   unsigned int n = 0;

   while ((input[n] >= '0' && input[n] <= '9')) {
      n++;
   }

   return n;
}


static int
_tokenise_number(struct sl_pp_context *context,
                 const char **pinput,
                 struct sl_pp_token_info *info)
{
   const char *input = *pinput;
   unsigned int eaten;
   char number[256];   /* XXX: Remove this artifical limit. */

   eaten = _parse_float(input);
   if (!eaten) {
      eaten = _parse_hex(input);
      if (!eaten) {
         eaten = _parse_oct(input);
         if (!eaten) {
            eaten = _parse_dec(input);
         }
      }
   }

   if (!eaten || _is_identifier_char(input[eaten])) {
      strcpy(context->error_msg, "expected a number");
      return -1;
   }

   if (eaten > sizeof(number) - 1) {
      strcpy(context->error_msg, "out of memory");
      return -1;
   }

   memcpy(number, input, eaten);
   number[eaten] = '\0';

   info->token = SL_PP_NUMBER;
   info->data.number = sl_pp_context_add_unique_str(context, number);
   if (info->data.number == -1) {
      return -1;
   }

   *pinput = input + eaten;
   return 0;
}


int
sl_pp_tokenise(struct sl_pp_context *context,
               const char *input,
               struct sl_pp_token_info **output)
{
   struct sl_pp_token_info *out = NULL;
   unsigned int out_len = 0;
   unsigned int out_max = 0;

   for (;;) {
      struct sl_pp_token_info info;

      switch (*input) {
      case ' ':
      case '\t':
         input++;
         info.token = SL_PP_WHITESPACE;
         break;

      case '\n':
         input++;
         info.token = SL_PP_NEWLINE;
         break;

      case '#':
         input++;
         info.token = SL_PP_HASH;
         break;

      case ',':
         input++;
         info.token = SL_PP_COMMA;
         break;

      case ';':
         input++;
         info.token = SL_PP_SEMICOLON;
         break;

      case '{':
         input++;
         info.token = SL_PP_LBRACE;
         break;

      case '}':
         input++;
         info.token = SL_PP_RBRACE;
         break;

      case '(':
         input++;
         info.token = SL_PP_LPAREN;
         break;

      case ')':
         input++;
         info.token = SL_PP_RPAREN;
         break;

      case '[':
         input++;
         info.token = SL_PP_LBRACKET;
         break;

      case ']':
         input++;
         info.token = SL_PP_RBRACKET;
         break;

      case '.':
         if (input[1] >= '0' && input[1] <= '9') {
            if (_tokenise_number(context, &input, &info)) {
               free(out);
               return -1;
            }
         } else {
            input++;
            info.token = SL_PP_DOT;
         }
         break;

      case '+':
         input++;
         if (*input == '+') {
            input++;
            info.token = SL_PP_INCREMENT;
         } else if (*input == '=') {
            input++;
            info.token = SL_PP_ADDASSIGN;
         } else {
            info.token = SL_PP_PLUS;
         }
         break;

      case '-':
         input++;
         if (*input == '-') {
            input++;
            info.token = SL_PP_DECREMENT;
         } else if (*input == '=') {
            input++;
            info.token = SL_PP_SUBASSIGN;
         } else {
            info.token = SL_PP_MINUS;
         }
         break;

      case '~':
         input++;
         info.token = SL_PP_BITNOT;
         break;

      case '!':
         input++;
         if (*input == '=') {
            input++;
            info.token = SL_PP_NOTEQUAL;
         } else {
            info.token = SL_PP_NOT;
         }
         break;

      case '*':
         input++;
         if (*input == '=') {
            input++;
            info.token = SL_PP_MULASSIGN;
         } else {
            info.token = SL_PP_STAR;
         }
         break;

      case '/':
         input++;
         if (*input == '=') {
            input++;
            info.token = SL_PP_DIVASSIGN;
         } else {
            info.token = SL_PP_SLASH;
         }
         break;

      case '%':
         input++;
         if (*input == '=') {
            input++;
            info.token = SL_PP_MODASSIGN;
         } else {
            info.token = SL_PP_MODULO;
         }
         break;

      case '<':
         input++;
         if (*input == '<') {
            input++;
            if (*input == '=') {
               input++;
               info.token = SL_PP_LSHIFTASSIGN;
            } else {
               info.token = SL_PP_LSHIFT;
            }
         } else if (*input == '=') {
            input++;
            info.token = SL_PP_LESSEQUAL;
         } else {
            info.token = SL_PP_LESS;
         }
         break;

      case '>':
         input++;
         if (*input == '>') {
            input++;
            if (*input == '=') {
               input++;
               info.token = SL_PP_RSHIFTASSIGN;
            } else {
               info.token = SL_PP_RSHIFT;
            }
         } else if (*input == '=') {
            input++;
            info.token = SL_PP_GREATEREQUAL;
         } else {
            info.token = SL_PP_GREATER;
         }
         break;

      case '=':
         input++;
         if (*input == '=') {
            input++;
            info.token = SL_PP_EQUAL;
         } else {
            info.token = SL_PP_ASSIGN;
         }
         break;

      case '&':
         input++;
         if (*input == '&') {
            input++;
            info.token = SL_PP_AND;
         } else if (*input == '=') {
            input++;
            info.token = SL_PP_BITANDASSIGN;
         } else {
            info.token = SL_PP_BITAND;
         }
         break;

      case '^':
         input++;
         if (*input == '^') {
            input++;
            info.token = SL_PP_XOR;
         } else if (*input == '=') {
            input++;
            info.token = SL_PP_BITXORASSIGN;
         } else {
            info.token = SL_PP_BITXOR;
         }
         break;

      case '|':
         input++;
         if (*input == '|') {
            input++;
            info.token = SL_PP_OR;
         } else if (*input == '=') {
            input++;
            info.token = SL_PP_BITORASSIGN;
         } else {
            info.token = SL_PP_BITOR;
         }
         break;

      case '?':
         input++;
         info.token = SL_PP_QUESTION;
         break;

      case ':':
         input++;
         info.token = SL_PP_COLON;
         break;

      case '\0':
         info.token = SL_PP_EOF;
         break;

      default:
         if ((*input >= 'a' && *input <= 'z') ||
             (*input >= 'A' && *input <= 'Z') ||
             (*input == '_')) {
            if (_tokenise_identifier(context, &input, &info)) {
               free(out);
               return -1;
            }
         } else if (*input >= '0' && *input <= '9') {
            if (_tokenise_number(context, &input, &info)) {
               free(out);
               return -1;
            }
         } else {
            info.data.other = *input++;
            info.token = SL_PP_OTHER;
         }
      }

      if (out_len >= out_max) {
         unsigned int new_max = out_max;

         if (new_max < 0x100) {
            new_max = 0x100;
         } else if (new_max < 0x10000) {
            new_max *= 2;
         } else {
            new_max += 0x10000;
         }

         out = realloc(out, new_max * sizeof(struct sl_pp_token_info));
         if (!out) {
            strcpy(context->error_msg, "out of memory");
            return -1;
         }
         out_max = new_max;
      }

      out[out_len++] = info;

      if (info.token == SL_PP_EOF) {
         break;
      }
   }

   *output = out;
   return 0;
}