Fix bug of consuming excess whitespace.

We fix this by moving printing up to the top-level "input" action and tracking whether a space is needed between one token and the next. This fixes all actual bugs in test-suite output, but does leave some tests failing due to differences in the amount of whitespace produced, (which aren't actual bugs per se).
author: Carl Worth <cworth@cworth.org> 2010-05-20 14:19:57 -0700
committer: Carl Worth <cworth@cworth.org> 2010-05-20 14:19:57 -0700
commit: 005b32061f77008530a290ed991980a579095002 (patch)
tree: 4239b1b0a348403c4e79354c182daa16757d3318
parent: ff13cfed81132eaaa8859f25f87ea5398d4864ba (diff)
1 files changed, 53 insertions, 18 deletions
diff --git a/glcpp-parse.y b/glcpp-parse.y
index 79a8ec2cf2..c6d64176b2 100644
--- a/glcpp-parse.y
+++ b/glcpp-parse.y
@@ -102,6 +102,7 @@ glcpp_parser_lex (glcpp_parser_t *parser);
 %}
 
 %union {
+	int ival;
 	char *str;
 	argument_list_t *argument_list;
 	string_list_t *string_list;
@@ -112,8 +113,9 @@ glcpp_parser_lex (glcpp_parser_t *parser);
 %parse-param {glcpp_parser_t *parser}
 %lex-param {glcpp_parser_t *parser}
 
-%token DEFINE FUNC_MACRO IDENTIFIER IDENTIFIER_FINALIZED OBJ_MACRO NEWLINE SPACE TOKEN UNDEF
-%type <str> FUNC_MACRO IDENTIFIER IDENTIFIER_FINALIZED OBJ_MACRO
+%token DEFINE FUNC_MACRO IDENTIFIER IDENTIFIER_FINALIZED OBJ_MACRO NEWLINE SEPARATOR SPACE TOKEN UNDEF
+%type <ival> input punctuator
+%type <str> content FUNC_MACRO IDENTIFIER IDENTIFIER_FINALIZED OBJ_MACRO
 %type <argument_list> argument_list
 %type <string_list> macro parameter_list
 %type <token> TOKEN argument_word argument_word_or_comma
@@ -135,38 +137,71 @@ glcpp_parser_lex (glcpp_parser_t *parser);
 
 %%
 
+	/* We do all printing at the input level.
+	 *
+	 * The value for "input" is simply TOKEN or SEPARATOR so we
+	 * can decide whether it's necessary to print a space
+	 * character between any two. */
 input:
-	/* empty */
-|	input content
+	/* empty */ {
+		$$ = SEPARATOR;
+	}
+|	input content {
+		int is_token;
+
+		if ($2 && strlen ($2)) {
+			int c = $2[0];
+			int is_not_separator = ((c >= 'a' && c <= 'z') ||
+						(c >= 'A' && c <= 'Z') ||
+						(c >= 'A' && c <= 'Z') ||
+						(c >= '0' && c <= '9') ||
+						(c == '_'));
+
+			if ($1 == TOKEN && is_not_separator)
+				printf (" ");
+			printf ("%s", $2);
+			if (is_not_separator)
+				$$ = TOKEN;
+			else
+				$$ = SEPARATOR;
+		} else {
+			$$ = $1;
+		}
+		if ($2)
+			talloc_free ($2);
+	}
 ;
 
-	/* We do all printing at the content level */
 content:
 	IDENTIFIER {
-		printf ("%s", $1);
-		talloc_free ($1);
+		$$ = $1;
 	}
 |	IDENTIFIER_FINALIZED {
-		printf ("%s", $1);
-		talloc_free ($1);
+		$$ = $1;
 	}
 |	TOKEN {
-		printf ("%s", $1.value);
-		talloc_free ($1.value);
+		$$ = $1.value;
 	}
 |	FUNC_MACRO {
-		printf ("%s", $1);
-		talloc_free ($1);
+		$$ = $1;
 	}
 |	directive {
-		printf ("\n");
+		$$ = talloc_strdup (parser, "\n");
+	}
+|	punctuator {
+		$$ = talloc_asprintf (parser, "%c", $1);
+	}
+|	macro {
+		$$ = NULL;
 	}
-|	'('	{ printf ("("); }
-|	')'	{ printf (")"); }
-|	','	{ printf (","); }
-|	macro
 ;
 
+punctuator:
+	'('	{ $$ = '('; }
+|	')'	{ $$ = ')'; }
+|	','	{ $$ = ','; }
+	;
+
 macro:
 	FUNC_MACRO '(' argument_list ')' {
 		_expand_function_macro (parser, $1, $3);
author	Carl Worth <cworth@cworth.org>	2010-05-20 14:19:57 -0700
committer	Carl Worth <cworth@cworth.org>	2010-05-20 14:19:57 -0700
commit	005b32061f77008530a290ed991980a579095002 (patch)
tree	4239b1b0a348403c4e79354c182daa16757d3318
parent	ff13cfed81132eaaa8859f25f87ea5398d4864ba (diff)