Pass through literal space values from replacement lists.

This makes test 15 pass and also dramatically simplifies the lexer. We were previously using a CONTROL state in the lexer to only emit SPACE tokens when on text lines. But that's not actually what we want. We need SPACE tokens in the replacement lists as well. Instead of a lexer state for this, we now simply set a "space_tokens" flag whenever we start constructing a pp_tokens list and clear the flag whenever we see a '#' introducing a directive. Much cleaner this way.
author: Carl Worth <cworth@cworth.org> 2010-05-25 16:59:02 -0700
committer: Carl Worth <cworth@cworth.org> 2010-05-25 17:06:08 -0700
commit: f34a0009dd07dbca4de5491744bd3618eae9458e (patch)
tree: c96275b2c2abf42482717e5daaa685838003d344
parent: b1854fdfb6b567fa61d544d8080e2acb4cc78dc1 (diff)
3 files changed, 25 insertions, 71 deletions
diff --git a/glcpp-lex.l b/glcpp-lex.l
index b1980742d3..f6d0c8b7d6 100644
--- a/glcpp-lex.l
+++ b/glcpp-lex.l
@@ -32,21 +32,6 @@
 %option reentrant noyywrap
 %option extra-type="glcpp_parser_t *"
 
-	/* This lexer has two states:
-	 *
-	 * The CONTROL state is for control lines (directives)
-	 * It lexes exactly as specified in the C99 specification.
-	 *
-	 * The INITIAL state is for input lines. In this state, we
-	 * make the OTHER token much more broad in that it now
-	 * includes tokens consisting entirely of whitespace. This
-	 * allows us to pass text through verbatim. It avoids the
-	 * "inadvertent token pasting" problem that would occur if we
-	 * just printed tokens, while also avoiding excess whitespace
-	 * insertion in the output.*/
-
-%x CONTROL
-
 SPACE		[[:space:]]
 NONSPACE	[^[:space:]]
 NEWLINE		[\n]
@@ -63,116 +48,84 @@ HEXADECIMAL_INTEGER	0[xX][0-9a-fA-F]+[uU]?
 %%
 
 {HASH}define{HSPACE}+/{IDENTIFIER}"(" {
-	BEGIN CONTROL;
+	yyextra->space_tokens = 0;
 	return HASH_DEFINE_FUNC;
 }
 
 {HASH}define {
-	BEGIN CONTROL;
+	yyextra->space_tokens = 0;
 	return HASH_DEFINE_OBJ;
 }
 
 {HASH}undef {
-	BEGIN CONTROL;
+	yyextra->space_tokens = 0;
 	return HASH_UNDEF;
 }
 
 {HASH} {
-	BEGIN CONTROL;
+	yyextra->space_tokens = 0;
 	return HASH;
 }
 
-<CONTROL>{IDENTIFIER} {
+{IDENTIFIER} {
 	yylval.str = xtalloc_strdup (yyextra, yytext);
 	return IDENTIFIER;
 }
 
-<CONTROL>"<<"  {
+"<<"  {
 	return LEFT_SHIFT;
 }
 
-<CONTROL>">>" {
+">>" {
 	return RIGHT_SHIFT;
 }
 
-<CONTROL>"<=" {
+"<=" {
 	return LESS_OR_EQUAL;
 }
 
-<CONTROL>">=" {
+">=" {
 	return GREATER_OR_EQUAL;
 }
 
-<CONTROL>"==" {
+"==" {
 	return EQUAL;
 }
 
-<CONTROL>"!=" {
+"!=" {
 	return NOT_EQUAL;
 }
 
-<CONTROL>"&&" {
+"&&" {
 	return AND;
 }
 
-<CONTROL>"||" {
+"||" {
 	return OR;
 }
 
-<CONTROL>"##" {
+"##" {
 	return PASTE;
 }
 
-<CONTROL>{PUNCTUATION} {
+{PUNCTUATION} {
 	return yytext[0];
 }
 
-<CONTROL>{OTHER} {
-	yylval.str = xtalloc_strdup (yyextra, yytext);
-	return OTHER;
-}
-
-<CONTROL>{HSPACE}+
-
-<CONTROL>\n {
-	BEGIN INITIAL;
-	return NEWLINE;
-}
-
-{IDENTIFIER} {
-	yylval.str = xtalloc_strdup (yyextra, yytext);
-	return IDENTIFIER;
-}
-
-"(" {
-	return '(';
-}
-
-")" {
-	return ')';
-}
-
-"," {
-	return ',';
-}
-
 {OTHER}+ {
 	yylval.str = xtalloc_strdup (yyextra, yytext);
 	return OTHER;
 }
 
 {HSPACE}+ {
-	yylval.str = xtalloc_strdup (yyextra, yytext);
-	return SPACE;
+	if (yyextra->space_tokens) {
+		yylval.str = xtalloc_strdup (yyextra, yytext);
+		return SPACE;
+	}
 }
 
 \n {
 	return NEWLINE;
 }
 
-. {
-	yylval.str = xtalloc_strdup (yyextra, yytext);
-	return OTHER;
-}
-
 %%
diff --git a/glcpp-parse.y b/glcpp-parse.y
index 60b414e43a..a1981995fd 100644
--- a/glcpp-parse.y
+++ b/glcpp-parse.y
@@ -160,7 +160,7 @@ line:
 ;
 
 control_line:
-	HASH_DEFINE_OBJ IDENTIFIER replacement_list NEWLINE {
+	HASH_DEFINE_OBJ	IDENTIFIER replacement_list NEWLINE {
 		_define_object_macro (parser, $2, $3);
 	}
 |	HASH_DEFINE_FUNC IDENTIFIER '(' ')' replacement_list NEWLINE {
@@ -212,6 +212,7 @@ replacement_list:
 
 pp_tokens:
 	preprocessing_token {
+		parser->space_tokens = 1;
 		$$ = _token_list_create (parser);
 		_token_list_append ($$, $1);
 		talloc_unlink (parser, $1);
@@ -234,7 +235,7 @@ preprocessing_token:
 		$$ = _token_create_str (parser, OTHER, $1);
 	}
 |	SPACE {
-		$$ = _token_create_str (parser, OTHER, $1);
+		$$ = _token_create_str (parser, SPACE, $1);	
 	}
 ;
 
@@ -494,6 +495,7 @@ _token_print (token_t *token)
 	switch (token->type) {
 	case IDENTIFIER:
 	case OTHER:
+	case SPACE:
 		printf ("%s", token->value.str);
 		break;
 	case LEFT_SHIFT:
@@ -589,6 +591,7 @@ glcpp_parser_create (void)
 	parser->defines = hash_table_ctor (32, hash_table_string_hash,
 					   hash_table_string_compare);
 	parser->active = _string_list_create (parser);
+	parser->space_tokens = 1;
 	parser->expansions = NULL;
 
 	parser->just_printed_separator = 1;
@@ -835,9 +838,6 @@ _glcpp_parser_print_expanded_token_list (glcpp_parser_t *parser,
 	for (node = list->head; node; node = node->next) {
 		if (_glcpp_parser_print_expanded_token (parser, node->token))
 			_glcpp_parser_print_expanded_function (parser, &node);
-
-		if (node->next)
-			printf (" ");
 	}
 }
 
diff --git a/glcpp.h b/glcpp.h
index 043098b134..f3760fa7a4 100644
--- a/glcpp.h
+++ b/glcpp.h
@@ -126,6 +126,7 @@ struct glcpp_parser {
 	yyscan_t scanner;
 	struct hash_table *defines;
 	string_list_t *active;
+	int space_tokens;
 	expansion_node_t *expansions;
 	int just_printed_separator;
 	int need_newline;
author	Carl Worth <cworth@cworth.org>	2010-05-25 16:59:02 -0700
committer	Carl Worth <cworth@cworth.org>	2010-05-25 17:06:08 -0700
commit	f34a0009dd07dbca4de5491744bd3618eae9458e (patch)
tree	c96275b2c2abf42482717e5daaa685838003d344
parent	b1854fdfb6b567fa61d544d8080e2acb4cc78dc1 (diff)