8 files changed, 1106 insertions, 616 deletions
diff --git a/.gitignore b/.gitignore
index d67bd38c93..b88f0cc75c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,4 +6,5 @@ glcpp-parse.h
 *~
 tests/*.expected
 tests/*.gcc
+tests/*.glcpp
 tests/*.out
diff --git a/Makefile b/Makefile
index 88116128f8..0c06aa880f 100644
--- a/Makefile
+++ b/Makefile
@@ -7,7 +7,7 @@ CFLAGS = -g
 override CFLAGS += -Wall -Wextra -Wwrite-strings -Wswitch-enum -Wno-unused
 
 glcpp: glcpp.o glcpp-lex.o glcpp-parse.o hash_table.o xtalloc.o
-	gcc -o $@ -ltalloc $^
+	gcc -o $@ -ltalloc -lm $^
 
 %.c %.h: %.y
 	bison --debug --defines=$*.h --output=$*.c $^
diff --git a/README b/README
index ba833a49ff..ab42a3ffe1 100644
--- a/README
+++ b/README
@@ -12,3 +12,19 @@ preprocessors". To fill in these details, I've been using the C99
 standard (for which I had a convenient copy) as available from:
 
 http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1124.pdf
+
+Known limitations
+-----------------
+Macro invocations cannot include embedded newlines.
+
+The __LINE__, __FILE__, and __VERSION__ macros are not yet supported.
+
+The argument of the 'defined' operator cannot yet include enclosing
+parentheses.
+
+The #error, #pragma, #extension, #version, and #line macros are not
+yet supported.
+
+A file that ends with a function-like macro name as the last
+non-whitespace token will result in a parse error, (where it should be
+passed through as is).
+\ No newline at end of file
diff --git a/glcpp-lex.l b/glcpp-lex.l
index ee1f6e3aee..52269c6b30 100644
--- a/glcpp-lex.l
+++ b/glcpp-lex.l
@@ -32,21 +32,14 @@
 %option reentrant noyywrap
 %option extra-type="glcpp_parser_t *"
 
-%x ST_DEFINE
-%x ST_DEFINE_OBJ_OR_FUNC
-%x ST_DEFINE_PARAMETER
-%x ST_DEFINE_VALUE
-%x ST_IF
-%x ST_UNDEF
-%x ST_UNDEF_END
-
 SPACE		[[:space:]]
 NONSPACE	[^[:space:]]
 NEWLINE		[\n]
 HSPACE		[ \t]
 HASH		^{HSPACE}*#{HSPACE}*
 IDENTIFIER	[_a-zA-Z][_a-zA-Z0-9]*
-TOKEN		[^[:space:](),]+
+PUNCTUATION	[][(){}.&*~!/%<>^|;,=+-]
+OTHER		[^][(){}.&*~!/%<>^|;,=#[:space:]+-]+
 
 DECIMAL_INTEGER		[1-9][0-9]*[uU]?
 OCTAL_INTEGER		0[0-7]*[uU]?
@@ -54,210 +47,123 @@ HEXADECIMAL_INTEGER	0[xX][0-9a-fA-F]+[uU]?
 
 %%
 
-{HASH}if{HSPACE}* {
-	BEGIN ST_IF;
-	return IF;
+{HASH}define{HSPACE}+/{IDENTIFIER}"(" {
+	yyextra->space_tokens = 0;
+	return HASH_DEFINE_FUNC;
 }
 
-{HASH}elif{HSPACE}* {
-	BEGIN ST_IF;
-	return ELIF;
+{HASH}define {
+	yyextra->space_tokens = 0;
+	return HASH_DEFINE_OBJ;
 }
 
-<ST_IF>{DECIMAL_INTEGER} {
-	yylval.ival = strtoll (yytext, NULL, 10);
-	return INTEGER;
+{HASH}undef {
+	yyextra->space_tokens = 0;
+	return HASH_UNDEF;
 }
 
-<ST_IF>{OCTAL_INTEGER} {
-	yylval.ival = strtoll (yytext + 1, NULL, 8);
-	return INTEGER;
+{HASH}if {
+	yyextra->space_tokens = 0;
+	return HASH_IF;
 }
 
-<ST_IF>{HEXADECIMAL_INTEGER} {
-	yylval.ival = strtoll (yytext + 2, NULL, 16);
-	return INTEGER;
+{HASH}elif {
+	yyextra->space_tokens = 0;
+	return HASH_ELIF;
 }
 
-<ST_IF>"defined" {
-	return DEFINED;
+{HASH}else {
+	yyextra->space_tokens = 0;
+	return HASH_ELSE;
 }
 
-<ST_IF>"<<" {
-	return LEFT_SHIFT;
+{HASH}endif {
+	yyextra->space_tokens = 0;
+	return HASH_ENDIF;
 }
 
-<ST_IF>">>" {
-	return RIGHT_SHIFT;
+{HASH} {
+	yyextra->space_tokens = 0;
+	return HASH;
 }
 
-<ST_IF>"<=" {
-	return LESS_OR_EQUAL;
-}
-
-<ST_IF>">=" {
-	return GREATER_OR_EQUAL;
-}
-
-<ST_IF>"==" {
-	return EQUAL;
-}
-
-<ST_IF>"!=" {
-	return NOT_EQUAL;
-}
-
-<ST_IF>"&&" {
-	return AND;
-}
-
-<ST_IF>"||" {
-	return OR;
-}
-
-<ST_IF>[-+*/%<>&^|()~] {
-	return yytext[0];
+{DECIMAL_INTEGER} {
+	yylval.str = xtalloc_strdup (yyextra, yytext);
+	return INTEGER_STRING;
 }
 
-<ST_IF>{IDENTIFIER} {
+{OCTAL_INTEGER} {
 	yylval.str = xtalloc_strdup (yyextra, yytext);
-	return IDENTIFIER;
+	return INTEGER_STRING;
 }
 
-<ST_IF>{HSPACE}+
-
-<ST_IF>\n {
-	BEGIN INITIAL;
-	return NEWLINE;
+{HEXADECIMAL_INTEGER} {
+	yylval.str = xtalloc_strdup (yyextra, yytext);
+	return INTEGER_STRING;
 }
 
-{HASH}endif{HSPACE}* {
-	return ENDIF;
+"<<"  {
+	return LEFT_SHIFT;
 }
 
-{HASH}else{HSPACE}* {
-	return ELSE;
+">>" {
+	return RIGHT_SHIFT;
 }
 
-{HASH}undef{HSPACE}* {
-	BEGIN ST_UNDEF;
-	return UNDEF;
+"<=" {
+	return LESS_OR_EQUAL;
 }
 
-<ST_UNDEF>{IDENTIFIER} {
-	BEGIN ST_UNDEF_END;
-	yylval.str = xtalloc_strdup (yyextra, yytext);
-	return IDENTIFIER;
+">=" {
+	return GREATER_OR_EQUAL;
 }
 
-<ST_UNDEF_END>{HSPACE}*
-
-<ST_UNDEF_END>\n {
-	BEGIN INITIAL;
+"==" {
+	return EQUAL;
 }
 
-	/* We use the ST_DEFINE and ST_DEFVAL states so that we can
-	 * pass a space token, (yes, a token for whitespace!), since
-	 * the preprocessor specification requires distinguishing
-	 * "#define foo()" from "#define foo ()".
-	 */
-{HASH}define{HSPACE}* {
-	BEGIN ST_DEFINE;
-	return DEFINE;
+"!=" {
+	return NOT_EQUAL;
 }
 
-<ST_DEFINE>{IDENTIFIER}	{
-	BEGIN ST_DEFINE_OBJ_OR_FUNC;
-	yylval.str = xtalloc_strdup (yyextra, yytext);
-	return IDENTIFIER;
+"&&" {
+	return AND;
 }
 
-<ST_DEFINE_OBJ_OR_FUNC>\n {
-	BEGIN INITIAL;
-	return NEWLINE;
+"||" {
+	return OR;
 }
 
-<ST_DEFINE_OBJ_OR_FUNC>{HSPACE}+ {
-	BEGIN ST_DEFINE_VALUE;
-	return SPACE;
+"##" {
+	return PASTE;
 }
 
-<ST_DEFINE_OBJ_OR_FUNC>"(" {
-	BEGIN ST_DEFINE_PARAMETER;
-	return '(';
+"defined" {
+	return DEFINED;
 }
 
-<ST_DEFINE_PARAMETER>{IDENTIFIER} {
+{IDENTIFIER} {
 	yylval.str = xtalloc_strdup (yyextra, yytext);
 	return IDENTIFIER;
 }
 
-<ST_DEFINE_PARAMETER>"," {
-	return ',';
-}
-
-<ST_DEFINE_PARAMETER>")" {
-	BEGIN ST_DEFINE_VALUE;
-	return ')';
-}
-
-<ST_DEFINE_PARAMETER>{HSPACE}+
-
-<ST_DEFINE_VALUE>{TOKEN} {
-	yylval.token.type = TOKEN;
-	yylval.token.value = xtalloc_strdup (yyextra, yytext);
-	return TOKEN;
-}
-
-<ST_DEFINE_VALUE>[(),] {
-	yylval.token.type = TOKEN;
-	yylval.token.value = xtalloc_strdup (yyextra, yytext);
-	return TOKEN;
-}
-
-<ST_DEFINE_VALUE>{HSPACE}+
-
-<ST_DEFINE_VALUE>\n {
-	BEGIN INITIAL;
-	return NEWLINE;
+{PUNCTUATION} {
+	return yytext[0];
 }
 
-{IDENTIFIER} {
-	int parameter_index;
+{OTHER}+ {
 	yylval.str = xtalloc_strdup (yyextra, yytext);
-	switch (glcpp_parser_classify_token (yyextra, yylval.str,
-					     &parameter_index))
-	{
-		case TOKEN_CLASS_IDENTIFIER:
-			return IDENTIFIER;
-		break;
-		case TOKEN_CLASS_IDENTIFIER_FINALIZED:
-			return IDENTIFIER_FINALIZED;
-		break;
-		case TOKEN_CLASS_FUNC_MACRO:
-			return FUNC_MACRO;
-		break;
-		case TOKEN_CLASS_OBJ_MACRO:
-			return OBJ_MACRO;
-		break;
-
-	}
-}
-
-[(),]	{
-	return yytext[0];
+	return OTHER;
 }
 
-{TOKEN} {
-	yylval.token.type = TOKEN;
-	yylval.token.value = xtalloc_strdup (yyextra, yytext);
-	return TOKEN;
+{HSPACE}+ {
+	if (yyextra->space_tokens) {
+		return SPACE;
+	}
 }
 
 \n {
-	yyextra->need_newline = 1;
+	return NEWLINE;
 }
 
-{HSPACE}+
-
 %%
diff --git a/glcpp-parse.y b/glcpp-parse.y
index 2c0fe9a6af..f4c834e038 100644
--- a/glcpp-parse.y
+++ b/glcpp-parse.y
@@ -25,69 +25,88 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <assert.h>
+#include <inttypes.h>
 
 #include "glcpp.h"
 
-void
+static void
 yyerror (void *scanner, const char *error);
 
-void
+static void
 _define_object_macro (glcpp_parser_t *parser,
 		      const char *macro,
 		      token_list_t *replacements);
 
-void
+static void
 _define_function_macro (glcpp_parser_t *parser,
 			const char *macro,
 			string_list_t *parameters,
 			token_list_t *replacements);
 
-void
-_expand_object_macro (glcpp_parser_t *parser, const char *identifier);
-
-void
-_expand_function_macro (glcpp_parser_t *parser,
-			const char *identifier,
-			argument_list_t *arguments);
-
-string_list_t *
+static string_list_t *
 _string_list_create (void *ctx);
 
-void
+static void
 _string_list_append_item (string_list_t *list, const char *str);
 
-void
+static void
 _string_list_append_list (string_list_t *list, string_list_t *tail);
 
-int
+static void
+_string_list_push (string_list_t *list, const char *str);
+
+static void
+_string_list_pop (string_list_t *list);
+
+static int
 _string_list_contains (string_list_t *list, const char *member, int *index);
 
-int
+static int
 _string_list_length (string_list_t *list);
 
-argument_list_t *
+static argument_list_t *
 _argument_list_create (void *ctx);
 
-void
+static void
 _argument_list_append (argument_list_t *list, token_list_t *argument);
 
-int
+static int
 _argument_list_length (argument_list_t *list);
 
-token_list_t *
+static token_list_t *
 _argument_list_member_at (argument_list_t *list, int index);
 
-token_list_t *
+/* Note: This function talloc_steal()s the str pointer. */
+static token_t *
+_token_create_str (void *ctx, int type, char *str);
+
+static token_t *
+_token_create_ival (void *ctx, int type, int ival);
+
+static token_list_t *
 _token_list_create (void *ctx);
 
-void
-_token_list_append (token_list_t *list, int type, const char *value);
+/* Note: This function adds a talloc_reference() to token.
+ *
+ * You may want to talloc_unlink any current reference if you no
+ * longer need it. */
+static void
+_token_list_append (token_list_t *list, token_t *token);
 
-void
+static void
 _token_list_append_list (token_list_t *list, token_list_t *tail);
 
 static void
-glcpp_parser_pop_expansion (glcpp_parser_t *parser);
+_glcpp_parser_evaluate_defined (glcpp_parser_t *parser,
+				token_list_t *list);
+
+static void
+_glcpp_parser_expand_token_list (glcpp_parser_t *parser,
+				 token_list_t *list);
+
+static void
+_glcpp_parser_print_expanded_token_list (glcpp_parser_t *parser,
+					 token_list_t *list);
 
 static void
 _glcpp_parser_skip_stack_push_if (glcpp_parser_t *parser, int condition);
@@ -95,7 +114,7 @@ _glcpp_parser_skip_stack_push_if (glcpp_parser_t *parser, int condition);
 static void
 _glcpp_parser_skip_stack_change_if (glcpp_parser_t *parser, const char *type,
 				    int condition);
-			
+
 static void
 _glcpp_parser_skip_stack_pop (glcpp_parser_t *parser);
 
@@ -104,29 +123,21 @@ _glcpp_parser_skip_stack_pop (glcpp_parser_t *parser);
 static int
 glcpp_parser_lex (glcpp_parser_t *parser);
 
-%}
+static void
+glcpp_parser_lex_from (glcpp_parser_t *parser, token_list_t *list);
 
-%union {
-	intmax_t imaxval;
-	int ival;
-	char *str;
-	argument_list_t *argument_list;
-	string_list_t *string_list;
-	token_t token;
-	token_list_t *token_list;
-}
+%}
 
 %parse-param {glcpp_parser_t *parser}
 %lex-param {glcpp_parser_t *parser}
 
-%token DEFINE DEFINED ELIF ELSE ENDIF FUNC_MACRO IDENTIFIER IDENTIFIER_FINALIZED IF IFDEF IFNDEF INTEGER OBJ_MACRO NEWLINE SPACE TOKEN UNDEF
-%type <ival> punctuator
-%type <imaxval> expression INTEGER
-%type <str> content FUNC_MACRO IDENTIFIER IDENTIFIER_FINALIZED OBJ_MACRO
-%type <argument_list> argument_list
-%type <string_list> macro parameter_list
-%type <token> TOKEN argument_word argument_word_or_comma
-%type <token_list> argument argument_or_comma replacement_list pp_tokens
+%token COMMA_FINAL DEFINED ELIF_EXPANDED HASH HASH_DEFINE_FUNC HASH_DEFINE_OBJ HASH_ELIF HASH_ELSE HASH_ENDIF HASH_IF HASH_IFDEF HASH_IFNDEF HASH_UNDEF IDENTIFIER IF_EXPANDED INTEGER INTEGER_STRING NEWLINE OTHER PLACEHOLDER SPACE
+%token PASTE
+%type <ival> expression INTEGER operator SPACE
+%type <str> IDENTIFIER INTEGER_STRING OTHER
+%type <string_list> identifier_list
+%type <token> preprocessing_token
+%type <token_list> pp_tokens replacement_list text_line
 %left OR
 %left AND
 %left '|'
@@ -139,228 +150,120 @@ glcpp_parser_lex (glcpp_parser_t *parser);
 %left '*' '/' '%'
 %right UNARY
 
-/* Hard to remove shift/reduce conflicts documented as follows:
- *
- * 1. '(' after FUNC_MACRO name which is correctly resolved to shift
- *    to form macro invocation rather than reducing directly to
- *    content.
- *
- * 2. Similarly, '(' after FUNC_MACRO which is correctly resolved to
- *    shift to form macro invocation rather than reducing directly to
- *    argument.
- *
- * 3. Similarly again now that we added argument_or_comma as well.
- */
-%expect 3
-
 %%
 
-	 /* We do all printing at the input level. */
 input:
-	/* empty */ {
-		parser->just_printed_separator = 1;
-	}
-|	input content {
-		int is_token;
-		int skipping = 0;
-
-		if (parser->skip_stack && parser->skip_stack->type != SKIP_NO_SKIP)
-			skipping = 1;
-
-		if ($2 && strlen ($2) && ! skipping) {
-			int c = $2[0];
-			int is_not_separator = ((c >= 'a' && c <= 'z') ||
-						(c >= 'A' && c <= 'Z') ||
-						(c >= 'A' && c <= 'Z') ||
-						(c >= '0' && c <= '9') ||
-						(c == '_'));
-
-			if (! parser->just_printed_separator && is_not_separator)
-			{
-				printf (" ");
-			}
-			printf ("%s", $2);
-
-			if (is_not_separator)
-				parser->just_printed_separator = 0;
-			else
-				parser->just_printed_separator = 1;
-		}
-
-		if ($2)
-			talloc_free ($2);
+	/* empty */
+|	input line
+;
 
-		if (parser->need_newline) {
+line:
+	control_line {
+		if (parser->skip_stack == NULL ||
+		    parser->skip_stack->type == SKIP_NO_SKIP)
+		{
 			printf ("\n");
-			parser->just_printed_separator = 1;
-			parser->need_newline = 0;
 		}
 	}
-;
-
-content:
-	IDENTIFIER {
-		$$ = $1;
-	}
-|	IDENTIFIER_FINALIZED {
-		$$ = $1;
-	}
-|	TOKEN {
-		$$ = $1.value;
-	}
-|	FUNC_MACRO {
-		$$ = $1;
-	}
-|	directive {
-		$$ = talloc_strdup (parser, "\n");
-	}
-|	punctuator {
-		$$ = talloc_asprintf (parser, "%c", $1);
-	}
-|	macro {
-		$$ = NULL;
-	}
-;
-
-punctuator:
-	'('	{ $$ = '('; }
-|	')'	{ $$ = ')'; }
-|	','	{ $$ = ','; }
-	;
-
-macro:
-	FUNC_MACRO '(' argument_list ')' {
-		_expand_function_macro (parser, $1, $3);
-	}
-|	OBJ_MACRO {
-		_expand_object_macro (parser, $1);
+|	text_line {
+		if (parser->skip_stack == NULL ||
+		    parser->skip_stack->type == SKIP_NO_SKIP)
+		{
+			_glcpp_parser_print_expanded_token_list (parser, $1);
+			printf ("\n");
+		}
 		talloc_free ($1);
 	}
+|	expanded_line
+|	HASH non_directive
 ;
 
-argument_list:
-	/* empty */ {
-		$$ = _argument_list_create (parser);
-	}
-|	argument {
-		$$ = _argument_list_create (parser);
-		_argument_list_append ($$, $1);
-	}
-|	argument_list ',' argument {
-		_argument_list_append ($1, $3);
-		$$ = $1;
-	}
-;
-
-argument:
-	argument_word {
-		$$ = _token_list_create (parser);
-		_token_list_append ($$, $1.type, $1.value);
-	}
-|	argument argument_word {
-		_token_list_append ($1, $2.type, $2.value);
-		talloc_free ($2.value);
-		$$ = $1;
-	}
-|	argument '(' argument_or_comma ')' {
-		_token_list_append ($1, '(', "(");
-		_token_list_append_list ($1, $3);
-		_token_list_append ($1, ')', ")");
-		$$ = $1;
-	}
-;
-
-argument_word:
-	IDENTIFIER { $$.type = IDENTIFIER; $$.value = $1; }
-|	IDENTIFIER_FINALIZED { $$.type = IDENTIFIER_FINALIZED; $$.value = $1; }
-|	TOKEN { $$ = $1; }
-|	FUNC_MACRO { $$.type = FUNC_MACRO; $$.value = $1; }
-|	macro {	$$.type = TOKEN; $$.value = xtalloc_strdup (parser, ""); }
-;
-
-	/* XXX: The body of argument_or_comma is the same as the body
-	 * of argument, but with "argument" and "argument_word"
-	 * changed to "argument_or_comma" and
-	 * "argument_word_or_comma". It would be nice to have less
-	 * redundancy here, but I'm not sure how.
-	 *
-	 * It would also be nice to have a less ugly grammar to have
-	 * to implement, but such is the C preprocessor.
-	 */
-argument_or_comma:
-	argument_word_or_comma {
-		$$ = _token_list_create (parser);
-		_token_list_append ($$, $1.type, $1.value);
-	}
-|	argument_or_comma argument_word_or_comma {
-		_token_list_append ($1, $2.type, $2.value);
-		$$ = $1;
+expanded_line:
+	IF_EXPANDED expression NEWLINE {
+		_glcpp_parser_skip_stack_push_if (parser, $2);
 	}
-|	argument_or_comma '(' argument_or_comma ')' {
-		_token_list_append ($1, '(', "(");
-		_token_list_append_list ($1, $3);
-		_token_list_append ($1, ')', ")");
-		$$ = $1;
+|	ELIF_EXPANDED expression NEWLINE {
+		_glcpp_parser_skip_stack_change_if (parser, "elif", $2);
 	}
 ;
 
-argument_word_or_comma:
-	IDENTIFIER { $$.type = IDENTIFIER; $$.value = $1; }
-|	IDENTIFIER_FINALIZED { $$.type = IDENTIFIER_FINALIZED; $$.value = $1; }
-|	TOKEN { $$ = $1; }
-|	FUNC_MACRO { $$.type = FUNC_MACRO; $$.value = $1; }
-|	macro {	$$.type = TOKEN; $$.value = xtalloc_strdup (parser, ""); }
-|	',' { $$.type = ','; $$.value = xtalloc_strdup (parser, ","); }
-;
-
-directive:
-	DEFINE IDENTIFIER NEWLINE {
-		token_list_t *list = _token_list_create (parser);
-		_define_object_macro (parser, $2, list);
+control_line:
+	HASH_DEFINE_OBJ	IDENTIFIER replacement_list NEWLINE {
+		_define_object_macro (parser, $2, $3);
 	}
-|	DEFINE IDENTIFIER SPACE replacement_list NEWLINE {
-		_define_object_macro (parser, $2, $4);
+|	HASH_DEFINE_FUNC IDENTIFIER '(' ')' replacement_list NEWLINE {
+		_define_function_macro (parser, $2, NULL, $5);
 	}
-|	DEFINE IDENTIFIER '(' parameter_list ')' replacement_list NEWLINE {
+|	HASH_DEFINE_FUNC IDENTIFIER '(' identifier_list ')' replacement_list NEWLINE {
 		_define_function_macro (parser, $2, $4, $6);
 	}
-|	IF expression NEWLINE {
-		_glcpp_parser_skip_stack_push_if (parser, $2);
+|	HASH_UNDEF IDENTIFIER NEWLINE {
+		macro_t *macro = hash_table_find (parser->defines, $2);
+		if (macro) {
+			/* XXX: Need hash table to support a real way
+			 * to remove an element rather than prefixing
+			 * a new node with data of NULL like this. */
+			hash_table_insert (parser->defines, NULL, $2);
+			talloc_free (macro);
+		}
+		talloc_free ($2);
 	}
-|	IFDEF IDENTIFIER NEWLINE {
-		string_list_t *macro = hash_table_find (parser->defines, $2);
+|	HASH_IF pp_tokens NEWLINE {
+		token_list_t *expanded;
+		token_t *token;
+
+		expanded = _token_list_create (parser);
+		token = _token_create_ival (parser, IF_EXPANDED, IF_EXPANDED);
+		_token_list_append (expanded, token);
+		talloc_unlink (parser, token);
+		_glcpp_parser_evaluate_defined (parser, $2);
+		_glcpp_parser_expand_token_list (parser, $2);
+		_token_list_append_list (expanded, $2);
+		glcpp_parser_lex_from (parser, expanded);
+	}
+|	HASH_IFDEF IDENTIFIER NEWLINE {
+		macro_t *macro = hash_table_find (parser->defines, $2);
 		talloc_free ($2);
 		_glcpp_parser_skip_stack_push_if (parser, macro != NULL);
 	}
-|	IFNDEF IDENTIFIER NEWLINE {
-		string_list_t *macro = hash_table_find (parser->defines, $2);
+|	HASH_IFNDEF IDENTIFIER NEWLINE {
+		macro_t *macro = hash_table_find (parser->defines, $2);
 		talloc_free ($2);
 		_glcpp_parser_skip_stack_push_if (parser, macro == NULL);
 	}
-|	ELIF expression NEWLINE {
-		_glcpp_parser_skip_stack_change_if (parser, "#elif", $2);
+|	HASH_ELIF pp_tokens NEWLINE {
+		token_list_t *expanded;
+		token_t *token;
+
+		expanded = _token_list_create (parser);
+		token = _token_create_ival (parser, ELIF_EXPANDED, ELIF_EXPANDED);
+		_token_list_append (expanded, token);
+		talloc_unlink (parser, token);
+		_glcpp_parser_evaluate_defined (parser, $2);
+		_glcpp_parser_expand_token_list (parser, $2);
+		_token_list_append_list (expanded, $2);
+		glcpp_parser_lex_from (parser, expanded);
 	}
-|	ELSE {
+|	HASH_ELSE NEWLINE {
 		_glcpp_parser_skip_stack_change_if (parser, "else", 1);
 	}
-|	ENDIF {
+|	HASH_ENDIF NEWLINE {
 		_glcpp_parser_skip_stack_pop (parser);
 	}
-|	UNDEF IDENTIFIER {
-		string_list_t *macro = hash_table_find (parser->defines, $2);
-		if (macro) {
-			/* XXX: Need hash table to support a real way
-			 * to remove an element rather than prefixing
-			 * a new node with data of NULL like this. */
-			hash_table_insert (parser->defines, NULL, $2);
-			talloc_free (macro);
-		}
-		talloc_free ($2);
-	}
+|	HASH NEWLINE
 ;
 
 expression:
-	INTEGER {
+	INTEGER_STRING {
+		if (strlen ($1) >= 3 && strncmp ($1, "0x", 2) == 0) {
+			$$ = strtoll ($1 + 2, NULL, 16);
+		} else if ($1[0] == '0') {
+			$$ = strtoll ($1, NULL, 8);
+		} else {
+			$$ = strtoll ($1, NULL, 10);
+		}
+	}
+|	INTEGER {
 		$$ = $1;
 	}
 |	expression OR expression {
@@ -429,56 +332,105 @@ expression:
 |	'+' expression %prec UNARY {
 		$$ = + $2;
 	}
-|	DEFINED IDENTIFIER %prec UNARY {
-		string_list_t *macro = hash_table_find (parser->defines, $2);
-		talloc_free ($2);
-		if (macro)
-			$$ = 1;
-		else
-			$$ = 0;
-	}
 |	'(' expression ')' {
 		$$ = $2;
 	}
 ;
 
-parameter_list:
-	/* empty */ {
-		$$ = _string_list_create (parser);
-	}
-|	IDENTIFIER {
+identifier_list:
+	IDENTIFIER {
 		$$ = _string_list_create (parser);
 		_string_list_append_item ($$, $1);
-		talloc_free ($1);
+		talloc_steal ($$, $1);
 	}
-|	parameter_list ',' IDENTIFIER {
-		_string_list_append_item ($1, $3);
-		talloc_free ($3);
-		$$ = $1;
+|	identifier_list ',' IDENTIFIER {
+		$$ = $1;	
+		_string_list_append_item ($$, $3);
+		talloc_steal ($$, $3);
 	}
 ;
 
+text_line:
+	NEWLINE { $$ = NULL; }
+|	pp_tokens NEWLINE
+;
+
+non_directive:
+	pp_tokens NEWLINE
+;
+
 replacement_list:
-	/* empty */ {
+	/* empty */ { $$ = NULL; }
+|	pp_tokens
+;
+
+pp_tokens:
+	preprocessing_token {
+		parser->space_tokens = 1;
 		$$ = _token_list_create (parser);
+		_token_list_append ($$, $1);
+		talloc_unlink (parser, $1);
 	}
-|	pp_tokens {
+|	pp_tokens preprocessing_token {
 		$$ = $1;
+		_token_list_append ($$, $2);
+		talloc_unlink (parser, $2);
 	}
 ;
 
-
-pp_tokens:
-	TOKEN {
-		$$ = _token_list_create (parser);
-		_token_list_append ($$, $1.type, $1.value);
+preprocessing_token:
+	IDENTIFIER {
+		$$ = _token_create_str (parser, IDENTIFIER, $1);
 	}
-|	pp_tokens TOKEN {
-	_token_list_append ($1, $2.type, $2.value);
-		$$ = $1;
+|	INTEGER_STRING {
+		$$ = _token_create_str (parser, INTEGER_STRING, $1);
+	}
+|	operator {
+		$$ = _token_create_ival (parser, $1, $1);
+	}
+|	OTHER {
+		$$ = _token_create_str (parser, OTHER, $1);
+	}
+|	SPACE {
+		$$ = _token_create_ival (parser, SPACE, SPACE);
 	}
 ;
 
+operator:
+	'['			{ $$ = '['; }
+|	']'			{ $$ = ']'; }
+|	'('			{ $$ = '('; }
+|	')'			{ $$ = ')'; }
+|	'{'			{ $$ = '{'; }
+|	'}'			{ $$ = '}'; }
+|	'.'			{ $$ = '.'; }
+|	'&'			{ $$ = '&'; }
+|	'*'			{ $$ = '*'; }
+|	'+'			{ $$ = '+'; }
+|	'-'			{ $$ = '-'; }
+|	'~'			{ $$ = '~'; }
+|	'!'			{ $$ = '!'; }
+|	'/'			{ $$ = '/'; }
+|	'%'			{ $$ = '%'; }
+|	LEFT_SHIFT		{ $$ = LEFT_SHIFT; }
+|	RIGHT_SHIFT		{ $$ = RIGHT_SHIFT; }
+|	'<'			{ $$ = '<'; }
+|	'>'			{ $$ = '>'; }
+|	LESS_OR_EQUAL		{ $$ = LESS_OR_EQUAL; }
+|	GREATER_OR_EQUAL	{ $$ = GREATER_OR_EQUAL; }
+|	EQUAL			{ $$ = EQUAL; }
+|	NOT_EQUAL		{ $$ = NOT_EQUAL; }
+|	'^'			{ $$ = '^'; }
+|	'|'			{ $$ = '|'; }
+|	AND			{ $$ = AND; }
+|	OR			{ $$ = OR; }
+|	';'			{ $$ = ';'; }
+|	','			{ $$ = ','; }
+|	'='			{ $$ = '='; }
+|	PASTE			{ $$ = PASTE; }
+|	DEFINED			{ $$ = DEFINED; }
+;
+
 %%
 
 string_list_t *
@@ -512,7 +464,7 @@ _string_list_append_item (string_list_t *list, const char *str)
 
 	node = xtalloc (list, string_node_t);
 	node->str = xtalloc_strdup (node, str);
-		
+
 	node->next = NULL;
 
 	if (list->head == NULL) {
@@ -524,6 +476,42 @@ _string_list_append_item (string_list_t *list, const char *str)
 	list->tail = node;
 }
 
+void
+_string_list_push (string_list_t *list, const char *str)
+{
+	string_node_t *node;
+
+	node = xtalloc (list, string_node_t);
+	node->str = xtalloc_strdup (node, str);
+	node->next = list->head;
+
+	if (list->tail == NULL) {
+		list->tail = node;
+	}
+	list->head = node;
+}
+
+void
+_string_list_pop (string_list_t *list)
+{
+	string_node_t *node;
+
+	node = list->head;
+
+	if (node == NULL) {
+		fprintf (stderr, "Internal error: _string_list_pop called on an empty list.\n");
+		exit (1);
+	}
+
+	list->head = node->next;
+	if (list->tail == node) {
+		assert (node->next == NULL);
+		list->tail = NULL;
+	}
+
+	talloc_free (node);
+}
+
 int
 _string_list_contains (string_list_t *list, const char *member, int *index)
 {
@@ -576,9 +564,6 @@ _argument_list_append (argument_list_t *list, token_list_t *argument)
 {
 	argument_node_t *node;
 
-	if (argument == NULL || argument->head == NULL)
-		return;
-
 	node = xtalloc (list, argument_node_t);
 	node->argument = argument;
 
@@ -630,6 +615,31 @@ _argument_list_member_at (argument_list_t *list, int index)
 	return NULL;
 }
 
+/* Note: This function talloc_steal()s the str pointer. */
+token_t *
+_token_create_str (void *ctx, int type, char *str)
+{
+	token_t *token;
+
+	token = xtalloc (ctx, token_t);
+	token->type = type;
+	token->value.str = talloc_steal (token, str);
+
+	return token;
+}
+
+token_t *
+_token_create_ival (void *ctx, int type, int ival)
+{
+	token_t *token;
+
+	token = xtalloc (ctx, token_t);
+	token->type = type;
+	token->value.ival = ival;
+
+	return token;
+}
+
 token_list_t *
 _token_list_create (void *ctx)
 {
@@ -638,18 +648,18 @@ _token_list_create (void *ctx)
 	list = xtalloc (ctx, token_list_t);
 	list->head = NULL;
 	list->tail = NULL;
+	list->non_space_tail = NULL;
 
 	return list;
 }
 
 void
-_token_list_append (token_list_t *list, int type, const char *value)
+_token_list_append (token_list_t *list, token_t *token)
 {
 	token_node_t *node;
 
 	node = xtalloc (list, token_node_t);
-	node->type = type;
-	node->value = xtalloc_strdup (list, value);
+	node->token = xtalloc_reference (list, token);
 
 	node->next = NULL;
 
@@ -660,11 +670,16 @@ _token_list_append (token_list_t *list, int type, const char *value)
 	}
 
 	list->tail = node;
+	if (token->type != SPACE)
+		list->non_space_tail = node;
 }
 
 void
 _token_list_append_list (token_list_t *list, token_list_t *tail)
 {
+	if (tail == NULL || tail->head == NULL)
+		return;
+
 	if (list->head == NULL) {
 		list->head = tail->head;
 	} else {
@@ -672,8 +687,191 @@ _token_list_append_list (token_list_t *list, token_list_t *tail)
 	}
 
 	list->tail = tail->tail;
+	list->non_space_tail = tail->non_space_tail;
+}
+
+token_list_t *
+_token_list_copy (void *ctx, token_list_t *other)
+{
+	token_list_t *copy;
+	token_node_t *node;
+
+	if (other == NULL)
+		return NULL;
+
+	copy = _token_list_create (ctx);
+	for (node = other->head; node; node = node->next)
+		_token_list_append (copy, node->token);
+
+	return copy;
 }
-		
+
+void
+_token_list_trim_trailing_space (token_list_t *list)
+{
+	token_node_t *tail, *next;
+
+	if (list->non_space_tail) {
+		tail = list->non_space_tail->next;
+		list->non_space_tail->next = NULL;
+		list->tail = list->non_space_tail;
+
+		while (tail) {
+			next = tail->next;
+			talloc_free (tail);
+			tail = next;
+		}
+	}
+}
+
+static void
+_token_print (token_t *token)
+{
+	if (token->type < 256) {
+		printf ("%c", token->type);
+		return;
+	}
+
+	switch (token->type) {
+	case INTEGER:
+		printf ("%" PRIxMAX, token->value.ival);
+		break;
+	case IDENTIFIER:
+	case INTEGER_STRING:
+	case OTHER:
+		printf ("%s", token->value.str);
+		break;
+	case SPACE:
+		printf (" ");
+		break;
+	case LEFT_SHIFT:
+		printf ("<<");
+		break;
+	case RIGHT_SHIFT:
+		printf (">>");
+		break;
+	case LESS_OR_EQUAL:
+		printf ("<=");
+		break;
+	case GREATER_OR_EQUAL:
+		printf (">=");
+		break;
+	case EQUAL:
+		printf ("==");
+		break;
+	case NOT_EQUAL:
+		printf ("!=");
+		break;
+	case AND:
+		printf ("&&");
+		break;
+	case OR:
+		printf ("||");
+		break;
+	case PASTE:
+		printf ("##");
+		break;
+	case COMMA_FINAL:
+		printf (",");
+		break;
+	case PLACEHOLDER:
+		/* Nothing to print. */
+		break;
+	default:
+		fprintf (stderr, "Error: Don't know how to print token type %d\n", token->type);
+		break;
+	}
+}
+
+/* Return a new token (talloc()ed off of 'token') formed by pasting
+ * 'token' and 'other'. Note that this function may return 'token' or
+ * 'other' directly rather than allocating anything new.
+ *
+ * Caution: Only very cursory error-checking is performed to see if
+ * the final result is a valid single token. */
+static token_t *
+_token_paste (token_t *token, token_t *other)
+{
+	/* Pasting a placeholder onto anything makes no change. */
+	if (other->type == PLACEHOLDER)
+		return token;
+
+	/* When 'token' is a placeholder, just return 'other'. */
+	if (token->type == PLACEHOLDER)
+		return other;
+
+	/* A very few single-character punctuators can be combined
+	 * with another to form a multi-character punctuator. */
+	switch (token->type) {
+	case '<':
+		if (other->type == '<')
+			return _token_create_ival (token, LEFT_SHIFT, LEFT_SHIFT);
+		else if (other->type == '=')
+			return _token_create_ival (token, LESS_OR_EQUAL, LESS_OR_EQUAL);
+		break;
+	case '>':
+		if (other->type == '>')
+			return _token_create_ival (token, RIGHT_SHIFT, RIGHT_SHIFT);
+		else if (other->type == '=')
+			return _token_create_ival (token, GREATER_OR_EQUAL, GREATER_OR_EQUAL);
+		break;
+	case '=':
+		if (other->type == '=')
+			return _token_create_ival (token, EQUAL, EQUAL);
+		break;
+	case '!':
+		if (other->type == '=')
+			return _token_create_ival (token, NOT_EQUAL, NOT_EQUAL);
+		break;
+	case '&':
+		if (other->type == '&')
+			return _token_create_ival (token, AND, AND);
+		break;
+	case '|':
+		if (other->type == '|')
+			return _token_create_ival (token, OR, OR);
+		break;
+	}
+
+	/* Two string-valued tokens can usually just be mashed
+	 * together.
+	 *
+	 * XXX: This isn't actually legitimate. Several things here
+	 * should result in a diagnostic since the result cannot be a
+	 * valid, single pre-processing token. For example, pasting
+	 * "123" and "abc" is not legal, but we don't catch that
+	 * here. */
+	if ((token->type == IDENTIFIER || token->type == OTHER || token->type == INTEGER_STRING) &&
+	    (other->type == IDENTIFIER || other->type == OTHER || other->type == INTEGER_STRING))
+	{
+		char *str;
+
+		str = xtalloc_asprintf (token, "%s%s",
+					token->value.str, other->value.str);
+		return _token_create_str (token, token->type, str);
+	}
+
+	printf ("Error: Pasting \"");
+	_token_print (token);
+	printf ("\" and \"");
+	_token_print (other);
+	printf ("\" does not give a valid preprocessing token.\n");
+
+	return token;
+}
+
+static void
+_token_list_print (token_list_t *list)
+{
+	token_node_t *node;
+
+	if (list == NULL)
+		return;
+
+	for (node = list->head; node; node = node->next)
+		_token_print (node->token);
+}
+
 void
 yyerror (void *scanner, const char *error)
 {
@@ -690,13 +888,17 @@ glcpp_parser_create (void)
 	glcpp_lex_init_extra (parser, &parser->scanner);
 	parser->defines = hash_table_ctor (32, hash_table_string_hash,
 					   hash_table_string_compare);
-	parser->expansions = NULL;
-
-	parser->just_printed_separator = 1;
-	parser->need_newline = 0;
+	parser->active = _string_list_create (parser);
+	parser->space_tokens = 1;
+	parser->newline_as_space = 0;
+	parser->in_control_line = 0;
+	parser->paren_count = 0;
 
 	parser->skip_stack = NULL;
 
+	parser->lex_from_list = NULL;
+	parser->lex_from_node = NULL;
+
 	return parser;
 }
 
@@ -709,8 +911,6 @@ glcpp_parser_parse (glcpp_parser_t *parser)
 void
 glcpp_parser_destroy (glcpp_parser_t *parser)
 {
-	if (parser->need_newline)
-		printf ("\n");
 	if (parser->skip_stack)
 		fprintf (stderr, "Error: Unterminated #if\n");
 	glcpp_lex_destroy (parser->scanner);
@@ -718,247 +918,577 @@ glcpp_parser_destroy (glcpp_parser_t *parser)
 	talloc_free (parser);
 }
 
-static int
-glcpp_parser_is_expanding (glcpp_parser_t *parser, const char *member)
+/* Replace any occurences of DEFINED tokens in 'list' with either a
+ * '0' or '1' INTEGER token depending on whether the next token in the
+ * list is defined or not. */
+static void
+_glcpp_parser_evaluate_defined (glcpp_parser_t *parser,
+				token_list_t *list)
 {
-	expansion_node_t *node;
+	token_node_t *node, *next;
+	macro_t *macro;
+
+	if (list == NULL)
+		return;
 
-	for (node = parser->expansions; node; node = node->next) {
-		if (node->macro &&
-		    strcmp (node->macro->identifier, member) == 0)
+	for (node = list->head; node; node = node->next) {
+		if (node->token->type != DEFINED)
+			continue;
+		next = node->next;
+		while (next && next->token->type == SPACE)
+			next = next->next;
+		if (next == NULL || next->token->type != IDENTIFIER) {
+			fprintf (stderr, "Error: operator \"defined\" requires an identifier\n");
+			exit (1);
+		}
+		macro = hash_table_find (parser->defines,
+					 next->token->value.str);
+
+		node->token->type = INTEGER;
+		node->token->value.ival = (macro != NULL);
+		node->next = next->next;
+	}
+}
+	
+typedef enum function_status
+{
+	FUNCTION_STATUS_SUCCESS,
+	FUNCTION_NOT_A_FUNCTION,
+	FUNCTION_UNBALANCED_PARENTHESES
+} function_status_t;
+
+/* Find a set of function-like macro arguments by looking for a
+ * balanced set of parentheses.
+ *
+ * When called, 'node' should be the opening-parenthesis token, (or
+ * perhaps preceeding SPACE tokens). Upon successful return *last will
+ * be the last consumed node, (corresponding to the closing right
+ * parenthesis).
+ *
+ * Return values:
+ *
+ *   FUNCTION_STATUS_SUCCESS:
+ *
+ *	Successfully parsed a set of function arguments.	
+ *
+ *   FUNCTION_NOT_A_FUNCTION:
+ *
+ *	Macro name not followed by a '('. This is not an error, but
+ *	simply that the macro name should be treated as a non-macro.
+ *
+ *   FUNCTION_UNBLANCED_PARENTHESES
+ *
+ *	Macro name is not followed by a balanced set of parentheses.
+ */
+static function_status_t
+_arguments_parse (argument_list_t *arguments,
+		  token_node_t *node,
+		  token_node_t **last)
+{
+	token_list_t *argument;
+	int paren_count;
+
+	node = node->next;
+
+	/* Ignore whitespace before first parenthesis. */
+	while (node && node->token->type == SPACE)
+		node = node->next;
+
+	if (node == NULL || node->token->type != '(')
+		return FUNCTION_NOT_A_FUNCTION;
+
+	node = node->next;
+
+	argument = _token_list_create (arguments);
+	_argument_list_append (arguments, argument);
+
+	for (paren_count = 1; node; node = node->next) {
+		if (node->token->type == '(')
 		{
-			return 1;
+			paren_count++;
+		}
+		else if (node->token->type == ')')
+		{
+			paren_count--;
+			if (paren_count == 0)
+				break;
+		}
+
+		if (node->token->type == ',' &&
+			 paren_count == 1)
+		{
+			_token_list_trim_trailing_space (argument);
+			argument = _token_list_create (arguments);
+			_argument_list_append (arguments, argument);
+		}
+		else {
+			if (argument->head == NULL) {
+				/* Don't treat initial whitespace as
+				 * part of the arguement. */
+				if (node->token->type == SPACE)
+					continue;
+			}
+			_token_list_append (argument, node->token);
 		}
 	}
 
-	return 0;
+	if (paren_count)
+		return FUNCTION_UNBALANCED_PARENTHESES;
+
+	*last = node;
+
+	return FUNCTION_STATUS_SUCCESS;
 }
 
-token_class_t
-glcpp_parser_classify_token (glcpp_parser_t *parser,
-			     const char *identifier,
-			     int *parameter_index)
+/* This is a helper function that's essentially part of the
+ * implementation of _glcpp_parser_expand_node. It shouldn't be called
+ * except for by that function.
+ *
+ * Returns NULL if node is a simple token with no expansion, (that is,
+ * although 'node' corresponds to an identifier defined as a
+ * function-like macro, it is not followed with a parenthesized
+ * argument list).
+ *
+ * Compute the complete expansion of node (which is a function-like
+ * macro) and subsequent nodes which are arguments.
+ *
+ * Returns the token list that results from the expansion and sets
+ * *last to the last node in the list that was consumed by the
+ * expansion. Specificallty, *last will be set as follows: as the
+ * token of the closing right parenthesis.
+ */
+static token_list_t *
+_glcpp_parser_expand_function (glcpp_parser_t *parser,
+			       token_node_t *node,
+			       token_node_t **last)
+			       
 {
 	macro_t *macro;
+	const char *identifier;
+	argument_list_t *arguments;
+	function_status_t status;
+	token_list_t *substituted;
+	int parameter_index;
+
+	identifier = node->token->value.str;
 
-	/* Is this token a defined macro? */
 	macro = hash_table_find (parser->defines, identifier);
 
-	if (macro == NULL)
-		return TOKEN_CLASS_IDENTIFIER;
+	assert (macro->is_function);
 
-	/* Don't consider this a macro if we are already actively
-	 * expanding this macro. */
-	if (glcpp_parser_is_expanding (parser, identifier))
-		return TOKEN_CLASS_IDENTIFIER_FINALIZED;
+	arguments = _argument_list_create (parser);
+	status = _arguments_parse (arguments, node, last);
 
-	/* Definitely a macro. Just need to check if it's function-like. */
-	if (macro->is_function)
-		return TOKEN_CLASS_FUNC_MACRO;
-	else
-		return TOKEN_CLASS_OBJ_MACRO;
-}
+	switch (status) {
+	case FUNCTION_STATUS_SUCCESS:
+		break;
+	case FUNCTION_NOT_A_FUNCTION:
+		return NULL;
+	case FUNCTION_UNBALANCED_PARENTHESES:
+		return NULL;
+	}
 
-void
-_define_object_macro (glcpp_parser_t *parser,
-		      const char *identifier,
-		      token_list_t *replacements)
-{
-	macro_t *macro;
+	if (macro->replacements == NULL) {
+		talloc_free (arguments);
+		return _token_list_create (parser);
+	}
 
-	macro = xtalloc (parser, macro_t);
+	if (! ((_argument_list_length (arguments) == 
+		_string_list_length (macro->parameters)) ||
+	       (_string_list_length (macro->parameters) == 0 &&
+		_argument_list_length (arguments) == 1 &&
+		arguments->head->argument->head == NULL)))
+	{
+		fprintf (stderr,
+			 "Error: macro %s invoked with %d arguments (expected %d)\n",
+			 identifier,
+			 _argument_list_length (arguments),
+			 _string_list_length (macro->parameters));
+		return NULL;
+	}
 
-	macro->is_function = 0;
-	macro->parameters = NULL;
-	macro->identifier = talloc_strdup (macro, identifier);
-	macro->replacements = talloc_steal (macro, replacements);
+	/* Perform argument substitution on the replacement list. */
+	substituted = _token_list_create (arguments);
 
-	hash_table_insert (parser->defines, macro, identifier);
+	for (node = macro->replacements->head; node; node = node->next)
+	{
+		if (node->token->type == IDENTIFIER &&
+		    _string_list_contains (macro->parameters,
+					   node->token->value.str,
+					   &parameter_index))
+		{
+			token_list_t *argument;
+			argument = _argument_list_member_at (arguments,
+							     parameter_index);
+			/* Before substituting, we expand the argument
+			 * tokens, or append a placeholder token for
+			 * an empty argument. */
+			if (argument->head) {
+				_glcpp_parser_expand_token_list (parser,
+								 argument);
+				_token_list_append_list (substituted, argument);
+			} else {
+				token_t *new_token;
+
+				new_token = _token_create_ival (substituted,
+								PLACEHOLDER,
+								PLACEHOLDER);
+				_token_list_append (substituted, new_token);
+			}
+		} else {
+			_token_list_append (substituted, node->token);
+		}
+	}
+
+	/* After argument substitution, and before further expansion
+	 * below, implement token pasting. */
+
+	_token_list_trim_trailing_space (substituted);
+
+	node = substituted->head;
+	while (node)
+	{
+		token_node_t *next_non_space;
+
+		/* Look ahead for a PASTE token, skipping space. */
+		next_non_space = node->next;
+		while (next_non_space && next_non_space->token->type == SPACE)
+			next_non_space = next_non_space->next;
+
+		if (next_non_space == NULL)
+			break;
+
+		if (next_non_space->token->type != PASTE) {
+			node = next_non_space;
+			continue;
+		}
+
+		/* Now find the next non-space token after the PASTE. */
+		next_non_space = next_non_space->next;
+		while (next_non_space && next_non_space->token->type == SPACE)
+			next_non_space = next_non_space->next;
+
+		if (next_non_space == NULL) {
+			fprintf (stderr, "Error: '##' cannot appear at either end of a macro expansion\n");
+			return NULL;
+		}
+
+		node->token = _token_paste (node->token, next_non_space->token);
+		node->next = next_non_space->next;
+		if (next_non_space == substituted->tail)
+			substituted->tail = node;
+
+		node = node->next;
+	}
+
+	substituted->non_space_tail = substituted->tail;
+
+	_string_list_push (parser->active, identifier);
+	_glcpp_parser_expand_token_list (parser, substituted);
+	_string_list_pop (parser->active);
+
+	return substituted;
 }
 
-void
-_define_function_macro (glcpp_parser_t *parser,
-			const char *identifier,
-			string_list_t *parameters,
-			token_list_t *replacements)
+/* Compute the complete expansion of node, (and subsequent nodes after
+ * 'node' in the case that 'node' is a function-like macro and
+ * subsequent nodes are arguments).
+ *
+ * Returns NULL if node is a simple token with no expansion.
+ *
+ * Otherwise, returns the token list that results from the expansion
+ * and sets *last to the last node in the list that was consumed by
+ * the expansion. Specificallty, *last will be set as follows:
+ *
+ *	As 'node' in the case of object-like macro expansion.
+ *
+ *	As the token of the closing right parenthesis in the case of
+ *	function-like macro expansion.
+ */
+static token_list_t *
+_glcpp_parser_expand_node (glcpp_parser_t *parser,
+			   token_node_t *node,
+			   token_node_t **last)
 {
+	token_t *token = node->token;
+	const char *identifier;
 	macro_t *macro;
+	token_list_t *expansion;
+
+	/* We only expand identifiers */
+	if (token->type != IDENTIFIER) {
+		/* We change any COMMA into a COMMA_FINAL to prevent
+		 * it being mistaken for an argument separator
+		 * later. */
+		if (token->type == ',') {
+			token->type = COMMA_FINAL;
+			token->value.ival = COMMA_FINAL;
+		}
 
-	macro = xtalloc (parser, macro_t);
+		return NULL;
+	}
 
-	macro->is_function = 1;
-	macro->parameters = talloc_steal (macro, parameters);
-	macro->identifier = talloc_strdup (macro, identifier);
-	macro->replacements = talloc_steal (macro, replacements);
+	/* Look up this identifier in the hash table. */
+	identifier = token->value.str;
+	macro = hash_table_find (parser->defines, identifier);
 
-	hash_table_insert (parser->defines, macro, identifier);
+	/* Not a macro, so no expansion needed. */
+	if (macro == NULL)
+		return NULL;
+
+	/* Finally, don't expand this macro if we're already actively
+	 * expanding it, (to avoid infinite recursion). */
+	if (_string_list_contains (parser->active, identifier, NULL)) {
+		/* We change the token type here from IDENTIFIER to
+		 * OTHER to prevent any future expansion of this
+		 * unexpanded token. */
+		char *str;
+		token_list_t *expansion;
+		token_t *final;
+
+		str = xtalloc_strdup (parser, token->value.str);
+		final = _token_create_str (parser, OTHER, str);
+		expansion = _token_list_create (parser);
+		_token_list_append (expansion, final);
+		*last = node;
+		return expansion;
+	}
+
+	if (! macro->is_function)
+	{
+		*last = node;
+
+		if (macro->replacements == NULL)
+			return _token_list_create (parser);
+
+		expansion = _token_list_copy (parser, macro->replacements);
+
+		_string_list_push (parser->active, identifier);
+		_glcpp_parser_expand_token_list (parser, expansion);
+		_string_list_pop (parser->active);
+
+		return expansion;
+	}
+
+	return _glcpp_parser_expand_function (parser, node, last);
 }
 
+/* Walk over the token list replacing nodes with their expansion.
+ * Whenever nodes are expanded the walking will walk over the new
+ * nodes, continuing to expand as necessary. The results are placed in
+ * 'list' itself;
+ */
 static void
-_glcpp_parser_push_expansion (glcpp_parser_t *parser,
-			      macro_t *macro,
-			      token_node_t *replacements)
+_glcpp_parser_expand_token_list (glcpp_parser_t *parser,
+				 token_list_t *list)
 {
-	expansion_node_t *node;
+	token_node_t *node_prev;
+	token_node_t *node, *last;
+	token_list_t *expansion;
 
-	node = xtalloc (parser, expansion_node_t);
+	if (list == NULL)
+		return;
 
-	node->macro = macro;
-	node->replacements = replacements;
+	_token_list_trim_trailing_space (list);
 
-	node->next = parser->expansions;
-	parser->expansions = node;
+	node_prev = NULL;
+	node = list->head;
+
+	while (node) {
+		/* Find the expansion for node, which will replace all
+		 * nodes from node to last, inclusive. */
+		expansion = _glcpp_parser_expand_node (parser, node, &last);
+		if (expansion) {
+			/* Splice expansion into list, supporting a
+			 * simple deletion if the expansion is
+			 * empty. */
+			if (expansion->head) {
+				if (node_prev)
+					node_prev->next = expansion->head;
+				else
+					list->head = expansion->head;
+				expansion->tail->next = last->next;
+				if (last == list->tail)
+					list->tail = expansion->tail;
+			} else {
+				if (node_prev)
+					node_prev->next = last->next;
+				else
+					list->head = last->next;
+				if (last == list->tail)
+					list->tail == NULL;
+			}
+		} else {
+			node_prev = node;
+		}
+		node = node_prev ? node_prev->next : list->head;
+	}
+
+	list->non_space_tail = list->tail;
 }
 
 static void
-glcpp_parser_pop_expansion (glcpp_parser_t *parser)
+_glcpp_parser_expand_token_list_onto (glcpp_parser_t *parser,
+				      token_list_t *list,
+				      token_list_t *result)
 {
-	expansion_node_t *node;
+	_glcpp_parser_expand_token_list (parser, list);
 
-	node = parser->expansions;
+	_token_list_append_list (result, list);
+}
 
-	if (node == NULL) {
-		fprintf (stderr, "Internal error: _expansion_list_pop called on an empty list.\n");
-		exit (1);
-	}
+void
+_glcpp_parser_print_expanded_token_list (glcpp_parser_t *parser,
+					 token_list_t *list)
+{
+	if (list == NULL)
+		return;
 
-	parser->expansions = node->next;
+	_glcpp_parser_expand_token_list (parser, list);
 
-	talloc_free (node);
+	_token_list_trim_trailing_space (list);
+
+	_token_list_print (list);
 }
 
 void
-_expand_object_macro (glcpp_parser_t *parser, const char *identifier)
+_define_object_macro (glcpp_parser_t *parser,
+		      const char *identifier,
+		      token_list_t *replacements)
 {
 	macro_t *macro;
 
-	macro = hash_table_find (parser->defines, identifier);
-	assert (! macro->is_function);
-	assert (! glcpp_parser_is_expanding (parser, identifier));
+	macro = xtalloc (parser, macro_t);
 
-	_glcpp_parser_push_expansion (parser, macro, macro->replacements->head);
+	macro->is_function = 0;
+	macro->parameters = NULL;
+	macro->identifier = talloc_strdup (macro, identifier);
+	macro->replacements = talloc_steal (macro, replacements);
+
+	hash_table_insert (parser->defines, macro, identifier);
 }
 
 void
-_expand_function_macro (glcpp_parser_t *parser,
+_define_function_macro (glcpp_parser_t *parser,
 			const char *identifier,
-			argument_list_t *arguments)
+			string_list_t *parameters,
+			token_list_t *replacements)
 {
 	macro_t *macro;
-	token_list_t *expanded;
-	token_node_t *i, *j;
-	int parameter_index;
-
-	macro = hash_table_find (parser->defines, identifier);
-	assert (macro->is_function);
-	assert (! glcpp_parser_is_expanding (parser, identifier));
-
-	if (_argument_list_length (arguments) !=
-	    _string_list_length (macro->parameters))
-	{
-		fprintf (stderr,
-			 "Error: macro %s invoked with %d arguments (expected %d)\n",
-			 identifier,
-			 _argument_list_length (arguments),
-			 _string_list_length (macro->parameters));
-		return;
-	}
 
-	expanded = _token_list_create (macro);
+	macro = xtalloc (parser, macro_t);
 
-	for (i = macro->replacements->head; i; i = i->next) {
-		if (_string_list_contains (macro->parameters, i->value,
-					   &parameter_index))
-		{
-			token_list_t *argument;
-			argument = _argument_list_member_at (arguments,
-							     parameter_index);
-			for (j = argument->head; j; j = j->next)
-			{
-				_token_list_append (expanded, j->type,
-						    j->value);
-			}
-		} else {
-			_token_list_append (expanded, i->type, i->value);
-		}
-	}
+	macro->is_function = 1;
+	macro->parameters = talloc_steal (macro, parameters);
+	macro->identifier = talloc_strdup (macro, identifier);
+	macro->replacements = talloc_steal (macro, replacements);
 
-	_glcpp_parser_push_expansion (parser, macro, expanded->head);
+	hash_table_insert (parser->defines, macro, identifier);
 }
 
 static int
 glcpp_parser_lex (glcpp_parser_t *parser)
 {
-	expansion_node_t *expansion;
-	token_node_t *replacements;
-	int parameter_index;
-	const char *token;
-	token_class_t class;
-
-    /* Who says C can't do efficient tail recursion? */
-    RECURSE:
-
-	expansion = parser->expansions;
+	token_node_t *node;
+	int ret;
+
+	if (parser->lex_from_list == NULL) {
+		ret = glcpp_lex (parser->scanner);
+
+		/* XXX: This ugly block of code exists for the sole
+		 * purpose of converting a NEWLINE token into a SPACE
+		 * token, but only in the case where we have seen a
+		 * function-like macro name, but have not yet seen its
+		 * closing parenthesis.
+		 *
+		 * There's perhaps a more compact way to do this with
+		 * mid-rule actions in the grammar.
+		 *
+		 * I'm definitely not pleased with the complexity of
+		 * this code here.
+		 */
+		if (parser->newline_as_space)
+		{
+			if (ret == '(') {
+				parser->paren_count++;
+			} else if (ret == ')') {
+				parser->paren_count--;
+				if (parser->paren_count == 0)
+					parser->newline_as_space = 0;
+			} else if (ret == NEWLINE) {
+				ret = SPACE;
+			} else if (ret != SPACE) {
+				if (parser->paren_count == 0)
+					parser->newline_as_space = 0;
+			}
+		}
+		else if (parser->in_control_line)
+		{
+			if (ret == NEWLINE)
+				parser->in_control_line = 0;
+		}
+		else if (ret == HASH_DEFINE_OBJ || ret == HASH_DEFINE_FUNC ||
+			   ret == HASH_UNDEF || ret == HASH_IF ||
+			   ret == HASH_IFDEF || ret == HASH_IFNDEF ||
+			   ret == HASH_ELIF || ret == HASH_ELSE ||
+			   ret == HASH_ENDIF || ret == HASH)
+		{
+			parser->in_control_line = 1;
+		}
+		else if (ret == IDENTIFIER)
+		{
+			macro_t *macro;
+			macro = hash_table_find (parser->defines,
+						 yylval.str);
+			if (macro && macro->is_function) {
+				parser->newline_as_space = 1;
+				parser->paren_count = 0;
+			}
+		}
 
-	if (expansion == NULL)
-		return glcpp_lex (parser->scanner);
+		return ret;
+	}
 
-	replacements = expansion->replacements;
+	node = parser->lex_from_node;
 
-	/* Pop expansion when replacements is exhausted. */
-	if (replacements == NULL) {
-		glcpp_parser_pop_expansion (parser);
-		goto RECURSE;
+	if (node == NULL) {
+		talloc_free (parser->lex_from_list);
+		parser->lex_from_list = NULL;
+		return NEWLINE;
 	}
 
-	expansion->replacements = replacements->next;
-
-	token = replacements->value;
+	yylval = node->token->value;
+	ret = node->token->type;
 
-	/* Implement token pasting. */
-	if (replacements->next && strcmp (replacements->next->value, "##") == 0) {
-		token_node_t *next_node;
+	parser->lex_from_node = node->next;
 
-		next_node = replacements->next->next;
+	return ret;
+}
 
-		if (next_node == NULL) {
-			fprintf (stderr, "Error: '##' cannot appear at the end of a macro expansion.\n");
-			exit (1);
-		}
+static void
+glcpp_parser_lex_from (glcpp_parser_t *parser, token_list_t *list)
+{
+	token_node_t *node;
 
-		token = xtalloc_asprintf (parser, "%s%s",
-					  token, next_node->value);
-		expansion->replacements = next_node->next;
-	}
+	assert (parser->lex_from_list == NULL);
 
+	/* Copy list, eliminating any space tokens. */
+	parser->lex_from_list = _token_list_create (parser);
 
-	if (strcmp (token, "(") == 0)
-		return '(';
-	else if (strcmp (token, ")") == 0)
-		return ')';
+	for (node = list->head; node; node = node->next) {
+		if (node->token->type == SPACE)
+			continue;
+		_token_list_append (parser->lex_from_list, node->token);
+	}
 
-	yylval.str = xtalloc_strdup (parser, token);
+	talloc_free (list);
 
-	/* Carefully refuse to expand any finalized identifier. */
-	if (replacements->type == IDENTIFIER_FINALIZED)
-		return IDENTIFIER_FINALIZED;
+	parser->lex_from_node = parser->lex_from_list->head;
 
-	switch (glcpp_parser_classify_token (parser, yylval.str,
-					     &parameter_index))
-	{
-	case TOKEN_CLASS_IDENTIFIER:
-		return IDENTIFIER;
-		break;
-	case TOKEN_CLASS_IDENTIFIER_FINALIZED:
-		return IDENTIFIER_FINALIZED;
-		break;
-	case TOKEN_CLASS_FUNC_MACRO:
-		return FUNC_MACRO;
-		break;
-	default:
-	case TOKEN_CLASS_OBJ_MACRO:
-		return OBJ_MACRO;
-		break;
+	/* It's possible the list consisted of nothing but whitespace. */
+	if (parser->lex_from_node == NULL) {
+		talloc_free (parser->lex_from_list);
+		parser->lex_from_list = NULL;
 	}
 }
 
@@ -1002,7 +1532,7 @@ _glcpp_parser_skip_stack_change_if (glcpp_parser_t *parser, const char *type,
 		parser->skip_stack->type = SKIP_TO_ENDIF;
 	}
 }
-			
+
 static void
 _glcpp_parser_skip_stack_pop (glcpp_parser_t *parser)
 {
diff --git a/glcpp.h b/glcpp.h
index 503731b85b..5c8c304a9c 100644
--- a/glcpp.h
+++ b/glcpp.h
@@ -44,21 +44,36 @@ typedef struct string_list {
 	string_node_t *tail;
 } string_list_t;
 
-typedef struct token {
+typedef struct token token_t;
+typedef struct token_list token_list_t;
+
+typedef union YYSTYPE
+{
+	intmax_t ival;
+	char *str;
+	string_list_t *string_list;
+	token_t *token;
+	token_list_t *token_list;
+} YYSTYPE;
+
+# define YYSTYPE_IS_TRIVIAL 1
+# define YYSTYPE_IS_DECLARED 1
+
+struct token {
 	int type;
-	char *value;
-} token_t;
+	YYSTYPE value;
+};
 
 typedef struct token_node {
-	int type;
-	const char *value;
+	token_t *token;
 	struct token_node *next;
 } token_node_t;
 
-typedef struct token_list {
+struct token_list {
 	token_node_t *head;
 	token_node_t *tail;
-} token_list_t;
+	token_node_t *non_space_tail;
+};
 
 typedef struct argument_node {
 	token_list_t *argument;
@@ -111,16 +126,16 @@ typedef struct skip_node {
 struct glcpp_parser {
 	yyscan_t scanner;
 	struct hash_table *defines;
-	expansion_node_t *expansions;
-	int just_printed_separator;
-	int need_newline;
+	string_list_t *active;
+	int space_tokens;
+	int newline_as_space;
+	int in_control_line;
+	int paren_count;
 	skip_node_t *skip_stack;
+	token_list_t *lex_from_list;
+	token_node_t *lex_from_node;
 };
 
-void
-glcpp_parser_push_expansion_argument (glcpp_parser_t *parser,
-				      int argument_index);
-
 glcpp_parser_t *
 glcpp_parser_create (void);
 
@@ -164,4 +179,10 @@ xtalloc_strndup (const void *t, const char *p, size_t n);
 char *
 xtalloc_asprintf (const void *t, const char *fmt, ...);
 
+void *
+_xtalloc_reference_loc (const void *context,
+			const void *ptr, const char *location);
+
+#define xtalloc_reference(ctx, ptr) (_TALLOC_TYPEOF(ptr))_xtalloc_reference_loc((ctx),(ptr), __location__)
+
 #endif
diff --git a/tests/glcpp-test b/tests/glcpp-test
index 022a236712..ba398af0d5 100755
--- a/tests/glcpp-test
+++ b/tests/glcpp-test
@@ -2,8 +2,9 @@
 
 for test in *.c; do
     echo "Testing $test"
-    ../glcpp < $test > $test.out
+    ../glcpp < $test > $test.glcpp
+    grep -v '^$' < $test.glcpp > $test.out || true
     gcc -E $test -o $test.gcc
-    grep -v '^#' < $test.gcc > $test.expected
-    diff -B -u $test.expected $test.out
+    grep -v '^#' < $test.gcc | grep -v '^$' > $test.expected || true
+    diff -u $test.expected $test.out
 done
diff --git a/xtalloc.c b/xtalloc.c
index e52d12ac6b..656ac2d6cb 100644
--- a/xtalloc.c
+++ b/xtalloc.c
@@ -82,3 +82,18 @@ xtalloc_asprintf (const void *t, const char *fmt, ...)
 	va_end(ap);
 	return ret;
 }
+
+void *
+_xtalloc_reference_loc (const void *context,
+			const void *ptr, const char *location)
+{
+	void *ret;
+
+	ret = _talloc_reference_loc (context, ptr, location);
+	if (ret == NULL) {
+		fprintf (stderr, "Out of memory.\n");
+		exit (1);
+	}
+
+	return ret;
+}