diff options
author | Carl Worth <cworth@cworth.org> | 2010-05-25 15:04:32 -0700 |
---|---|---|
committer | Carl Worth <cworth@cworth.org> | 2010-05-25 15:04:32 -0700 |
commit | 9fb8b7a495c9dc6f9a62cf82300fae5925af92fc (patch) | |
tree | 622648ad7c965b94608a2f4d6d05dfb9963e348f | |
parent | 808401fd79eea9fa2c965f9f235a753c0cb0d920 (diff) |
Make the lexer pass whitespace through (as OTHER tokens) for text lines.
With this change, we can recreate the original text-line input
exactly. Previously we were inserting a space between every pair of
tokens so our output had a lot more whitespace than our input.
With this change, we can drop the "-b" option to diff and match the
input exactly.
-rw-r--r-- | glcpp-lex.l | 72 | ||||
-rw-r--r-- | glcpp-parse.y | 2 | ||||
-rwxr-xr-x | tests/glcpp-test | 2 |
3 files changed, 59 insertions, 17 deletions
diff --git a/glcpp-lex.l b/glcpp-lex.l index f1dd11ea9b..7b5cdd57a0 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -32,6 +32,21 @@ %option reentrant noyywrap %option extra-type="glcpp_parser_t *" + /* This lexer has two states: + * + * The CONTROL state is for control lines (directives) + * It lexes exactly as specified in the C99 specification. + * + * The INITIAL state is for input lines. In this state, we + * make the OTHER token much more broad in that it now + * includes tokens consisting entirely of whitespace. This + * allows us to pass text through verbatim. It avoids the + * "inadvertent token pasting" problem that would occur if we + * just printed tokens, while also avoiding excess whitespace + * insertion in the output.*/ + +%x CONTROL + SPACE [[:space:]] NONSPACE [^[:space:]] NEWLINE [\n] @@ -48,75 +63,104 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? %% {HASH}define{HSPACE}+/{IDENTIFIER}"(" { + BEGIN CONTROL; return HASH_DEFINE_FUNC; } {HASH}define { + BEGIN CONTROL; return HASH_DEFINE_OBJ; } {HASH}undef { + BEGIN CONTROL; return HASH_UNDEF; } {HASH} { + BEGIN CONTROL; return HASH; } -{IDENTIFIER} { +<CONTROL>{IDENTIFIER} { yylval.str = xtalloc_strdup (yyextra, yytext); return IDENTIFIER; } -"<<" { +<CONTROL>"<<" { return LEFT_SHIFT; } -">>" { +<CONTROL>">>" { return RIGHT_SHIFT; } -"<=" { +<CONTROL>"<=" { return LESS_OR_EQUAL; } -">=" { +<CONTROL>">=" { return GREATER_OR_EQUAL; } -"==" { +<CONTROL>"==" { return EQUAL; } -"!=" { +<CONTROL>"!=" { return NOT_EQUAL; } -"&&" { +<CONTROL>"&&" { return AND; } -"||" { +<CONTROL>"||" { return OR; } -"##" { +<CONTROL>"##" { return PASTE; } -{PUNCTUATION} { +<CONTROL>{PUNCTUATION} { return yytext[0]; } -\n { +<CONTROL>{OTHER} { + yylval.str = xtalloc_strdup (yyextra, yytext); + return OTHER; +} + +<CONTROL>{HSPACE}+ + +<CONTROL>\n { + BEGIN INITIAL; return NEWLINE; } -{OTHER} { +{IDENTIFIER} { + yylval.str = xtalloc_strdup (yyextra, yytext); + return IDENTIFIER; +} + +{OTHER}+ { + yylval.str = xtalloc_strdup (yyextra, yytext); + return OTHER; +} + +{HSPACE}+ { yylval.str = xtalloc_strdup (yyextra, yytext); return OTHER; } -{HSPACE}+ +\n { + return NEWLINE; +} + +. { + yylval.str = xtalloc_strdup (yyextra, yytext); + return OTHER; +} %% diff --git a/glcpp-parse.y b/glcpp-parse.y index 991b8a0b85..957421b864 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -517,8 +517,6 @@ _token_list_print (token_list_t *list) for (node = list->head; node; node = node->next) { _token_print (node->token); - if (node->next) - printf (" "); } } diff --git a/tests/glcpp-test b/tests/glcpp-test index 34cca88330..8074e47119 100755 --- a/tests/glcpp-test +++ b/tests/glcpp-test @@ -9,5 +9,5 @@ for test in *.c; do gcc -E $test -o $test.gcc # grep -v '^#' < $test.gcc > $test.expected grep -v '^[ ]*#' < $test > $test.expected - diff -w -u $test.expected $test.out + diff -u $test.expected $test.out done |