From 9bb796f33ac67abdf6c0bf55a06b0d8448caa3d3 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 25 May 2010 14:40:47 -0700 Subject: Add xtalloc_reference. Yet another talloc wrapper that should come in handy. --- glcpp.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'glcpp.h') diff --git a/glcpp.h b/glcpp.h index 503731b85b..6171ce8b4a 100644 --- a/glcpp.h +++ b/glcpp.h @@ -164,4 +164,10 @@ xtalloc_strndup (const void *t, const char *p, size_t n); char * xtalloc_asprintf (const void *t, const char *fmt, ...); +void * +_xtalloc_reference_loc (const void *context, + const void *ptr, const char *location); + +#define xtalloc_reference(ctx, ptr) (_TALLOC_TYPEOF(ptr))_xtalloc_reference_loc((ctx),(ptr), __location__) + #endif -- cgit v1.2.3 From 808401fd79eea9fa2c965f9f235a753c0cb0d920 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 25 May 2010 14:52:43 -0700 Subject: Store parsed tokens as token list and print all text lines. Still not doing any macro expansion just yet. But it should be fairly easy from here. --- glcpp-parse.y | 227 +++++++++++++++++++++++++++++++++++++++++-------------- glcpp.h | 27 +++++-- tests/glcpp-test | 5 +- 3 files changed, 195 insertions(+), 64 deletions(-) (limited to 'glcpp.h') diff --git a/glcpp-parse.y b/glcpp-parse.y index c53370a89a..991b8a0b85 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -77,15 +77,29 @@ _argument_list_length (argument_list_t *list); token_list_t * _argument_list_member_at (argument_list_t *list, int index); +/* Note: This function talloc_steal()s the str pointer. */ +token_t * +_token_create_str (void *ctx, int type, char *str); + +token_t * +_token_create_ival (void *ctx, int type, int ival); + token_list_t * _token_list_create (void *ctx); +/* Note: This function add a talloc_reference() to token. + * + * You may want to talloc_unlink any current reference if you no + * longer need it. */ void -_token_list_append (token_list_t *list, int type, const char *value); +_token_list_append (token_list_t *list, token_t *token); void _token_list_append_list (token_list_t *list, token_list_t *tail); +void +_token_list_print (token_list_t *list); + static void glcpp_parser_pop_expansion (glcpp_parser_t *parser); @@ -107,12 +121,9 @@ glcpp_parser_lex (glcpp_parser_t *parser); %} %union { - intmax_t imaxval; int ival; char *str; - argument_list_t *argument_list; - string_list_t *string_list; - token_t token; + token_t *token; token_list_t *token_list; } @@ -121,6 +132,10 @@ glcpp_parser_lex (glcpp_parser_t *parser); %token HASH_DEFINE_FUNC HASH_DEFINE_OBJ HASH IDENTIFIER NEWLINE OTHER HASH_UNDEF %token LEFT_SHIFT RIGHT_SHIFT LESS_OR_EQUAL GREATER_OR_EQUAL EQUAL NOT_EQUAL AND OR PASTE +%type punctuator +%type IDENTIFIER OTHER +%type preprocessing_token +%type pp_tokens replacement_list text_line /* Stale stuff just to allow code to compile. */ %token IDENTIFIER_FINALIZED FUNC_MACRO OBJ_MACRO @@ -134,7 +149,11 @@ input: line: control_line -| text_line +| text_line { + _token_list_print ($1); + printf ("\n"); + talloc_free ($1); + } | HASH non_directive ; @@ -152,7 +171,7 @@ identifier_list: ; text_line: - NEWLINE + NEWLINE { $$ = NULL; } | pp_tokens NEWLINE ; @@ -161,55 +180,68 @@ non_directive: ; replacement_list: - /* empty */ + /* empty */ { $$ = NULL; } | pp_tokens ; pp_tokens: - preprocessing_token -| pp_tokens preprocessing_token + preprocessing_token { + $$ = _token_list_create (parser); + _token_list_append ($$, $1); + talloc_unlink (parser, $1); + } +| pp_tokens preprocessing_token { + $$ = $1; + _token_list_append ($$, $2); + talloc_unlink (parser, $2); + } ; preprocessing_token: - IDENTIFIER -| punctuator -| OTHER + IDENTIFIER { + $$ = _token_create_str (parser, IDENTIFIER, $1); + } +| punctuator { + $$ = _token_create_ival (parser, $1, $1); + } +| OTHER { + $$ = _token_create_str (parser, OTHER, $1); + } ; punctuator: - '[' -| ']' -| '(' -| ')' -| '{' -| '}' -| '.' -| '&' -| '*' -| '+' -| '-' -| '~' -| '!' -| '/' -| '%' -| LEFT_SHIFT -| RIGHT_SHIFT -| '<' -| '>' -| LESS_OR_EQUAL -| GREATER_OR_EQUAL -| EQUAL -| NOT_EQUAL -| '^' -| '|' -| AND -| OR -| ';' -| ',' -| PASTE + '[' { $$ = '['; } +| ']' { $$ = ']'; } +| '(' { $$ = '('; } +| ')' { $$ = ')'; } +| '{' { $$ = '{'; } +| '}' { $$ = '}'; } +| '.' { $$ = '.'; } +| '&' { $$ = '&'; } +| '*' { $$ = '*'; } +| '+' { $$ = '+'; } +| '-' { $$ = '-'; } +| '~' { $$ = '~'; } +| '!' { $$ = '!'; } +| '/' { $$ = '/'; } +| '%' { $$ = '%'; } +| LEFT_SHIFT { $$ = LEFT_SHIFT; } +| RIGHT_SHIFT { $$ = RIGHT_SHIFT; } +| '<' { $$ = '<'; } +| '>' { $$ = '>'; } +| LESS_OR_EQUAL { $$ = LESS_OR_EQUAL; } +| GREATER_OR_EQUAL { $$ = GREATER_OR_EQUAL; } +| EQUAL { $$ = EQUAL; } +| NOT_EQUAL { $$ = NOT_EQUAL; } +| '^' { $$ = '^'; } +| '|' { $$ = '|'; } +| AND { $$ = AND; } +| OR { $$ = OR; } +| ';' { $$ = ';'; } +| ',' { $$ = ','; } +| PASTE { $$ = PASTE; } ; - %% string_list_t * @@ -361,6 +393,77 @@ _argument_list_member_at (argument_list_t *list, int index) return NULL; } +/* Note: This function talloc_steal()s the str pointer. */ +token_t * +_token_create_str (void *ctx, int type, char *str) +{ + token_t *token; + + token = xtalloc (ctx, token_t); + token->type = type; + token->value.str = talloc_steal (token, str); + + return token; +} + +token_t * +_token_create_ival (void *ctx, int type, int ival) +{ + token_t *token; + + token = xtalloc (ctx, token_t); + token->type = type; + token->value.ival = ival; + + return token; +} + +void +_token_print (token_t *token) +{ + if (token->type < 256) { + printf ("%c", token->type); + return; + } + + switch (token->type) { + case IDENTIFIER: + case OTHER: + printf ("%s", token->value.str); + break; + case LEFT_SHIFT: + printf ("<<"); + break; + case RIGHT_SHIFT: + printf (">>"); + break; + case LESS_OR_EQUAL: + printf ("<="); + break; + case GREATER_OR_EQUAL: + printf (">="); + break; + case EQUAL: + printf ("=="); + break; + case NOT_EQUAL: + printf ("!="); + break; + case AND: + printf ("&&"); + break; + case OR: + printf ("||"); + break; + case PASTE: + printf ("##"); + break; + default: + fprintf (stderr, "Error: Don't know how to print token type %d\n", token->type); + break; + } +} + token_list_t * _token_list_create (void *ctx) { @@ -374,13 +477,12 @@ _token_list_create (void *ctx) } void -_token_list_append (token_list_t *list, int type, const char *value) +_token_list_append (token_list_t *list, token_t *token) { token_node_t *node; node = xtalloc (list, token_node_t); - node->type = type; - node->value = xtalloc_strdup (list, value); + node->token = xtalloc_reference (list, token); node->next = NULL; @@ -405,6 +507,21 @@ _token_list_append_list (token_list_t *list, token_list_t *tail) list->tail = tail->tail; } +void +_token_list_print (token_list_t *list) +{ + token_node_t *node; + + if (list == NULL) + return; + + for (node = list->head; node; node = node->next) { + _token_print (node->token); + if (node->next) + printf (" "); + } +} + void yyerror (void *scanner, const char *error) { @@ -598,7 +715,8 @@ _expand_function_macro (glcpp_parser_t *parser, expanded = _token_list_create (macro); for (i = macro->replacements->head; i; i = i->next) { - if (_string_list_contains (macro->parameters, i->value, + if (_string_list_contains (macro->parameters, + i->token->value.str, ¶meter_index)) { token_list_t *argument; @@ -606,11 +724,10 @@ _expand_function_macro (glcpp_parser_t *parser, parameter_index); for (j = argument->head; j; j = j->next) { - _token_list_append (expanded, j->type, - j->value); + _token_list_append (expanded, j->token); } } else { - _token_list_append (expanded, i->type, i->value); + _token_list_append (expanded, i->token); } } @@ -644,10 +761,10 @@ glcpp_parser_lex (glcpp_parser_t *parser) expansion->replacements = replacements->next; - token = replacements->value; + token = replacements->token->value.str; /* Implement token pasting. */ - if (replacements->next && strcmp (replacements->next->value, "##") == 0) { + if (replacements->next && strcmp (replacements->next->token->value.str, "##") == 0) { token_node_t *next_node; next_node = replacements->next->next; @@ -658,7 +775,7 @@ glcpp_parser_lex (glcpp_parser_t *parser) } token = xtalloc_asprintf (parser, "%s%s", - token, next_node->value); + token, next_node->token->value.str); expansion->replacements = next_node->next; } @@ -671,7 +788,7 @@ glcpp_parser_lex (glcpp_parser_t *parser) yylval.str = xtalloc_strdup (parser, token); /* Carefully refuse to expand any finalized identifier. */ - if (replacements->type == IDENTIFIER_FINALIZED) + if (replacements->token->type == IDENTIFIER_FINALIZED) return IDENTIFIER_FINALIZED; switch (glcpp_parser_classify_token (parser, yylval.str, diff --git a/glcpp.h b/glcpp.h index 6171ce8b4a..261254a17c 100644 --- a/glcpp.h +++ b/glcpp.h @@ -44,21 +44,34 @@ typedef struct string_list { string_node_t *tail; } string_list_t; -typedef struct token { +typedef struct token token_t; +typedef struct token_list token_list_t; + +typedef union YYSTYPE +{ + int ival; + char *str; + token_t *token; + token_list_t *token_list; +} YYSTYPE; + +# define YYSTYPE_IS_TRIVIAL 1 +# define YYSTYPE_IS_DECLARED 1 + +struct token { int type; - char *value; -} token_t; + YYSTYPE value; +}; typedef struct token_node { - int type; - const char *value; + token_t *token; struct token_node *next; } token_node_t; -typedef struct token_list { +struct token_list { token_node_t *head; token_node_t *tail; -} token_list_t; +}; typedef struct argument_node { token_list_t *argument; diff --git a/tests/glcpp-test b/tests/glcpp-test index 868b03cce8..34cca88330 100755 --- a/tests/glcpp-test +++ b/tests/glcpp-test @@ -7,6 +7,7 @@ for test in *.c; do echo "Testing $test" ../glcpp < $test > $test.out gcc -E $test -o $test.gcc - grep -v '^#' < $test.gcc > $test.expected -# diff -B -u $test.expected $test.out +# grep -v '^#' < $test.gcc > $test.expected + grep -v '^[ ]*#' < $test > $test.expected + diff -w -u $test.expected $test.out done -- cgit v1.2.3 From ae6517f4a83981ae363bbbfe439ec23e8deb04b1 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 25 May 2010 15:24:59 -0700 Subject: Implement expansion of object-like macros. For this we add an "active" string_list_t to the parser. This makes the current expansion_list_t in the parser obsolete, but we don't remove that yet. With this change we can now start passing some actual tests, so we turn on real testing in the test suite again. I expect to implement things more or less in the same order as before, so the test suite now halts on first error. With this change the first 8 tests in the suite pass, (object-like macros with chaining and recursion). --- glcpp-parse.y | 128 +++++++++++++++++++++++++++++++++++++++++++++++-------- glcpp.h | 1 + tests/glcpp-test | 5 +-- 3 files changed, 112 insertions(+), 22 deletions(-) (limited to 'glcpp.h') diff --git a/glcpp-parse.y b/glcpp-parse.y index 957421b864..b3ef177a6d 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -59,6 +59,12 @@ _string_list_append_item (string_list_t *list, const char *str); void _string_list_append_list (string_list_t *list, string_list_t *tail); +void +_string_list_push (string_list_t *list, const char *str); + +void +_string_list_pop (string_list_t *list); + int _string_list_contains (string_list_t *list, const char *member, int *index); @@ -98,7 +104,8 @@ void _token_list_append_list (token_list_t *list, token_list_t *tail); void -_token_list_print (token_list_t *list); +_glcpp_parser_print_expanded_token_list (glcpp_parser_t *parser, + token_list_t *list); static void glcpp_parser_pop_expansion (glcpp_parser_t *parser); @@ -144,21 +151,24 @@ glcpp_parser_lex (glcpp_parser_t *parser); input: /* empty */ -| input line +| input line { + printf ("\n"); + } ; line: control_line | text_line { - _token_list_print ($1); - printf ("\n"); + _glcpp_parser_print_expanded_token_list (parser, $1); talloc_free ($1); } | HASH non_directive ; control_line: - HASH_DEFINE_OBJ IDENTIFIER replacement_list NEWLINE + HASH_DEFINE_OBJ IDENTIFIER replacement_list NEWLINE { + _define_object_macro (parser, $2, $3); + } | HASH_DEFINE_FUNC IDENTIFIER '(' ')' replacement_list NEWLINE | HASH_DEFINE_FUNC IDENTIFIER '(' identifier_list ')' replacement_list NEWLINE | HASH_UNDEF IDENTIFIER NEWLINE @@ -287,6 +297,42 @@ _string_list_append_item (string_list_t *list, const char *str) list->tail = node; } +void +_string_list_push (string_list_t *list, const char *str) +{ + string_node_t *node; + + node = xtalloc (list, string_node_t); + node->str = xtalloc_strdup (node, str); + node->next = list->head; + + if (list->tail == NULL) { + list->tail = node; + } + list->head = node; +} + +void +_string_list_pop (string_list_t *list) +{ + string_node_t *node; + + node = list->head; + + if (node == NULL) { + fprintf (stderr, "Internal error: _string_list_pop called on an empty list.\n"); + exit (1); + } + + list->head = node->next; + if (list->tail == node) { + assert (node->next == NULL); + list->tail = NULL; + } + + talloc_free (node); +} + int _string_list_contains (string_list_t *list, const char *member, int *index) { @@ -507,19 +553,6 @@ _token_list_append_list (token_list_t *list, token_list_t *tail) list->tail = tail->tail; } -void -_token_list_print (token_list_t *list) -{ - token_node_t *node; - - if (list == NULL) - return; - - for (node = list->head; node; node = node->next) { - _token_print (node->token); - } -} - void yyerror (void *scanner, const char *error) { @@ -536,6 +569,7 @@ glcpp_parser_create (void) glcpp_lex_init_extra (parser, &parser->scanner); parser->defines = hash_table_ctor (32, hash_table_string_hash, hash_table_string_compare); + parser->active = _string_list_create (parser); parser->expansions = NULL; parser->just_printed_separator = 1; @@ -605,6 +639,64 @@ glcpp_parser_classify_token (glcpp_parser_t *parser, return TOKEN_CLASS_OBJ_MACRO; } +void +_glcpp_parser_print_expanded_token (glcpp_parser_t *parser, + token_t *token) +{ + const char *identifier; + macro_t *macro; + + /* We only expand identifiers */ + if (token->type != IDENTIFIER) { + _token_print (token); + return; + } + + /* Look up this identifier in the hash table. */ + identifier = token->value.str; + macro = hash_table_find (parser->defines, identifier); + + /* Not a macro, so just print directly. */ + if (macro == NULL) { + printf ("%s", identifier); + return; + } + + /* We're not (yet) supporting function-like macros. */ + if (macro->is_function) { + printf ("%s", identifier); + return; + } + + /* Finally, don't expand this macro if we're already actively + * expanding it, (to avoid infinite recursion). */ + if (_string_list_contains (parser->active, identifier, NULL)) { + printf ("%s", identifier); + return; + } + + _string_list_push (parser->active, identifier); + _glcpp_parser_print_expanded_token_list (parser, + macro->replacements); + _string_list_pop (parser->active); +} + +void +_glcpp_parser_print_expanded_token_list (glcpp_parser_t *parser, + token_list_t *list) +{ + token_node_t *node; + + if (list == NULL) + return; + + for (node = list->head; node; node = node->next) { + _glcpp_parser_print_expanded_token (parser, node->token); + if (node->next) + printf (" "); + } +} + void _define_object_macro (glcpp_parser_t *parser, const char *identifier, diff --git a/glcpp.h b/glcpp.h index 261254a17c..bd599d7301 100644 --- a/glcpp.h +++ b/glcpp.h @@ -124,6 +124,7 @@ typedef struct skip_node { struct glcpp_parser { yyscan_t scanner; struct hash_table *defines; + string_list_t *active; expansion_node_t *expansions; int just_printed_separator; int need_newline; diff --git a/tests/glcpp-test b/tests/glcpp-test index 8074e47119..6304155210 100755 --- a/tests/glcpp-test +++ b/tests/glcpp-test @@ -1,13 +1,10 @@ #!/bin/sh set -e -echo "Caution: These results are just verifying parse-ability, not correctness!" - for test in *.c; do echo "Testing $test" ../glcpp < $test > $test.out gcc -E $test -o $test.gcc -# grep -v '^#' < $test.gcc > $test.expected - grep -v '^[ ]*#' < $test > $test.expected + grep -v '^#' < $test.gcc > $test.expected diff -u $test.expected $test.out done -- cgit v1.2.3 From b1854fdfb6b567fa61d544d8080e2acb4cc78dc1 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 25 May 2010 16:28:26 -0700 Subject: Implement simplified substitution for function-like macro invocation. This supports function-like macro invocation but without any argument substitution. This now makes test 11 through 14 pass. --- glcpp-lex.l | 14 ++++- glcpp-parse.y | 174 +++++++++++++++++++++++++++++++++++++++++++++++++++------- glcpp.h | 1 + 3 files changed, 168 insertions(+), 21 deletions(-) (limited to 'glcpp.h') diff --git a/glcpp-lex.l b/glcpp-lex.l index 7b5cdd57a0..b1980742d3 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -144,6 +144,18 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? return IDENTIFIER; } +"(" { + return '('; +} + +")" { + return ')'; +} + +"," { + return ','; +} + {OTHER}+ { yylval.str = xtalloc_strdup (yyextra, yytext); return OTHER; @@ -151,7 +163,7 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? {HSPACE}+ { yylval.str = xtalloc_strdup (yyextra, yytext); - return OTHER; + return SPACE; } \n { diff --git a/glcpp-parse.y b/glcpp-parse.y index 830a6232d8..60b414e43a 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -127,20 +127,14 @@ glcpp_parser_lex (glcpp_parser_t *parser); %} -%union { - int ival; - char *str; - token_t *token; - token_list_t *token_list; -} - %parse-param {glcpp_parser_t *parser} %lex-param {glcpp_parser_t *parser} -%token HASH_DEFINE_FUNC HASH_DEFINE_OBJ HASH IDENTIFIER NEWLINE OTHER HASH_UNDEF +%token HASH HASH_DEFINE_FUNC HASH_DEFINE_OBJ HASH_UNDEF IDENTIFIER NEWLINE OTHER SPACE %token LEFT_SHIFT RIGHT_SHIFT LESS_OR_EQUAL GREATER_OR_EQUAL EQUAL NOT_EQUAL AND OR PASTE %type punctuator -%type IDENTIFIER OTHER +%type IDENTIFIER OTHER SPACE +%type identifier_list %type preprocessing_token %type pp_tokens replacement_list text_line @@ -169,8 +163,12 @@ control_line: HASH_DEFINE_OBJ IDENTIFIER replacement_list NEWLINE { _define_object_macro (parser, $2, $3); } -| HASH_DEFINE_FUNC IDENTIFIER '(' ')' replacement_list NEWLINE -| HASH_DEFINE_FUNC IDENTIFIER '(' identifier_list ')' replacement_list NEWLINE +| HASH_DEFINE_FUNC IDENTIFIER '(' ')' replacement_list NEWLINE { + _define_function_macro (parser, $2, NULL, $5); + } +| HASH_DEFINE_FUNC IDENTIFIER '(' identifier_list ')' replacement_list NEWLINE { + _define_function_macro (parser, $2, $4, $6); + } | HASH_UNDEF IDENTIFIER NEWLINE { string_list_t *macro = hash_table_find (parser->defines, $2); if (macro) { @@ -186,8 +184,16 @@ control_line: ; identifier_list: - IDENTIFIER -| identifier_list ',' IDENTIFIER + IDENTIFIER { + $$ = _string_list_create (parser); + _string_list_append_item ($$, $1); + talloc_steal ($$, $1); + } +| identifier_list ',' IDENTIFIER { + $$ = $1; + _string_list_append_item ($$, $3); + talloc_steal ($$, $3); + } ; text_line: @@ -227,6 +233,9 @@ preprocessing_token: | OTHER { $$ = _token_create_str (parser, OTHER, $1); } +| SPACE { + $$ = _token_create_str (parser, OTHER, $1); + } ; punctuator: @@ -649,7 +658,14 @@ glcpp_parser_classify_token (glcpp_parser_t *parser, return TOKEN_CLASS_OBJ_MACRO; } -void +/* Print a non-macro token, or the expansion of an object-like macro. + * + * Returns 0 if this token is completely printed. + * + * Returns 1 in the case that 'token' is a function-like macro that + * needs further expansion. + */ +static int _glcpp_parser_print_expanded_token (glcpp_parser_t *parser, token_t *token) { @@ -659,7 +675,7 @@ _glcpp_parser_print_expanded_token (glcpp_parser_t *parser, /* We only expand identifiers */ if (token->type != IDENTIFIER) { _token_print (token); - return; + return 0; } /* Look up this identifier in the hash table. */ @@ -669,20 +685,135 @@ _glcpp_parser_print_expanded_token (glcpp_parser_t *parser, /* Not a macro, so just print directly. */ if (macro == NULL) { printf ("%s", identifier); - return; + return 0; } - /* We're not (yet) supporting function-like macros. */ + /* For function-like macros return 1 for further processing. */ if (macro->is_function) { - printf ("%s", identifier); - return; + return 1; } /* Finally, don't expand this macro if we're already actively * expanding it, (to avoid infinite recursion). */ if (_string_list_contains (parser->active, identifier, NULL)) { + printf ("%s", identifier); + return 0; + } + + _string_list_push (parser->active, identifier); + _glcpp_parser_print_expanded_token_list (parser, + macro->replacements); + _string_list_pop (parser->active); + + return 0; +} + +typedef enum function_status +{ + FUNCTION_STATUS_SUCCESS, + FUNCTION_NOT_A_FUNCTION, + FUNCTION_UNBALANCED_PARENTHESES +} function_status_t; + +/* Find a set of function-like macro arguments by looking for a + * balanced set of parentheses. Upon return *node will be the last + * consumed node, such that further processing can continue with + * node->next. + * + * Return values: + * + * FUNCTION_STATUS_SUCCESS: + * + * Successfully parsed a set of function arguments. + * + * FUNCTION_NOT_A_FUNCTION: + * + * Macro name not followed by a '('. This is not an error, but + * simply that the macro name should be treated as a non-macro. + * + * FUNCTION_UNBLANCED_PARENTHESES + * + * Macro name is not followed by a balanced set of parentheses. + */ +static function_status_t +_find_arguments (token_node_t **node_ret, argument_list_t **arguments) +{ + token_node_t *node = *node_ret, *last; + int paren_count; + int arg_count; + + last = node; + node = node->next; + + /* Ignore whitespace before first parenthesis. */ + while (node && node->token->type == SPACE) + node = node->next; + + if (node == NULL || node->token->type != '(') + return FUNCTION_NOT_A_FUNCTION; + + paren_count = 0; + arg_count = 0; + do { + if (node->token->type == '(') + { + paren_count++; + } + else if (node->token->type == ')') + { + paren_count--; + } + else if (node->token->type == ',' && + paren_count == 1) + { + arg_count++; + } + + last = node; + node = node->next; + + } while (node && paren_count); + + if (node && paren_count) + return FUNCTION_UNBALANCED_PARENTHESES; + + *node_ret = last; + + return FUNCTION_STATUS_SUCCESS; +} + +/* Prints the expansion of *node (consuming further tokens from the + * list as necessary). Upon return *node will be the last consumed + * node, such that further processing can continue with node->next. */ +static void +_glcpp_parser_print_expanded_function (glcpp_parser_t *parser, + token_node_t **node_ret) +{ + macro_t *macro; + token_node_t *node; + const char *identifier; + argument_list_t *arguments; + function_status_t status; + + node = *node_ret; + identifier = node->token->value.str; + + macro = hash_table_find (parser->defines, identifier); + + assert (macro->is_function); + + status = _find_arguments (node_ret, &arguments); + + switch (status) { + case FUNCTION_STATUS_SUCCESS: + break; + case FUNCTION_NOT_A_FUNCTION: printf ("%s", identifier); return; + case FUNCTION_UNBALANCED_PARENTHESES: + fprintf (stderr, "Error: Macro %s call has unbalanced parentheses\n", + identifier); + exit (1); } _string_list_push (parser->active, identifier); @@ -696,12 +827,15 @@ _glcpp_parser_print_expanded_token_list (glcpp_parser_t *parser, token_list_t *list) { token_node_t *node; + function_status_t function_status; if (list == NULL) return; for (node = list->head; node; node = node->next) { - _glcpp_parser_print_expanded_token (parser, node->token); + if (_glcpp_parser_print_expanded_token (parser, node->token)) + _glcpp_parser_print_expanded_function (parser, &node); + if (node->next) printf (" "); } diff --git a/glcpp.h b/glcpp.h index bd599d7301..043098b134 100644 --- a/glcpp.h +++ b/glcpp.h @@ -51,6 +51,7 @@ typedef union YYSTYPE { int ival; char *str; + string_list_t *string_list; token_t *token; token_list_t *token_list; } YYSTYPE; -- cgit v1.2.3 From f34a0009dd07dbca4de5491744bd3618eae9458e Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 25 May 2010 16:59:02 -0700 Subject: Pass through literal space values from replacement lists. This makes test 15 pass and also dramatically simplifies the lexer. We were previously using a CONTROL state in the lexer to only emit SPACE tokens when on text lines. But that's not actually what we want. We need SPACE tokens in the replacement lists as well. Instead of a lexer state for this, we now simply set a "space_tokens" flag whenever we start constructing a pp_tokens list and clear the flag whenever we see a '#' introducing a directive. Much cleaner this way. --- glcpp-lex.l | 85 +++++++++++++---------------------------------------------- glcpp-parse.y | 10 +++---- glcpp.h | 1 + 3 files changed, 25 insertions(+), 71 deletions(-) (limited to 'glcpp.h') diff --git a/glcpp-lex.l b/glcpp-lex.l index b1980742d3..f6d0c8b7d6 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -32,21 +32,6 @@ %option reentrant noyywrap %option extra-type="glcpp_parser_t *" - /* This lexer has two states: - * - * The CONTROL state is for control lines (directives) - * It lexes exactly as specified in the C99 specification. - * - * The INITIAL state is for input lines. In this state, we - * make the OTHER token much more broad in that it now - * includes tokens consisting entirely of whitespace. This - * allows us to pass text through verbatim. It avoids the - * "inadvertent token pasting" problem that would occur if we - * just printed tokens, while also avoiding excess whitespace - * insertion in the output.*/ - -%x CONTROL - SPACE [[:space:]] NONSPACE [^[:space:]] NEWLINE [\n] @@ -63,116 +48,84 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? %% {HASH}define{HSPACE}+/{IDENTIFIER}"(" { - BEGIN CONTROL; + yyextra->space_tokens = 0; return HASH_DEFINE_FUNC; } {HASH}define { - BEGIN CONTROL; + yyextra->space_tokens = 0; return HASH_DEFINE_OBJ; } {HASH}undef { - BEGIN CONTROL; + yyextra->space_tokens = 0; return HASH_UNDEF; } {HASH} { - BEGIN CONTROL; + yyextra->space_tokens = 0; return HASH; } -{IDENTIFIER} { +{IDENTIFIER} { yylval.str = xtalloc_strdup (yyextra, yytext); return IDENTIFIER; } -"<<" { +"<<" { return LEFT_SHIFT; } -">>" { +">>" { return RIGHT_SHIFT; } -"<=" { +"<=" { return LESS_OR_EQUAL; } -">=" { +">=" { return GREATER_OR_EQUAL; } -"==" { +"==" { return EQUAL; } -"!=" { +"!=" { return NOT_EQUAL; } -"&&" { +"&&" { return AND; } -"||" { +"||" { return OR; } -"##" { +"##" { return PASTE; } -{PUNCTUATION} { +{PUNCTUATION} { return yytext[0]; } -{OTHER} { - yylval.str = xtalloc_strdup (yyextra, yytext); - return OTHER; -} - -{HSPACE}+ - -\n { - BEGIN INITIAL; - return NEWLINE; -} - -{IDENTIFIER} { - yylval.str = xtalloc_strdup (yyextra, yytext); - return IDENTIFIER; -} - -"(" { - return '('; -} - -")" { - return ')'; -} - -"," { - return ','; -} - {OTHER}+ { yylval.str = xtalloc_strdup (yyextra, yytext); return OTHER; } {HSPACE}+ { - yylval.str = xtalloc_strdup (yyextra, yytext); - return SPACE; + if (yyextra->space_tokens) { + yylval.str = xtalloc_strdup (yyextra, yytext); + return SPACE; + } } \n { return NEWLINE; } -. { - yylval.str = xtalloc_strdup (yyextra, yytext); - return OTHER; -} - %% diff --git a/glcpp-parse.y b/glcpp-parse.y index 60b414e43a..a1981995fd 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -160,7 +160,7 @@ line: ; control_line: - HASH_DEFINE_OBJ IDENTIFIER replacement_list NEWLINE { + HASH_DEFINE_OBJ IDENTIFIER replacement_list NEWLINE { _define_object_macro (parser, $2, $3); } | HASH_DEFINE_FUNC IDENTIFIER '(' ')' replacement_list NEWLINE { @@ -212,6 +212,7 @@ replacement_list: pp_tokens: preprocessing_token { + parser->space_tokens = 1; $$ = _token_list_create (parser); _token_list_append ($$, $1); talloc_unlink (parser, $1); @@ -234,7 +235,7 @@ preprocessing_token: $$ = _token_create_str (parser, OTHER, $1); } | SPACE { - $$ = _token_create_str (parser, OTHER, $1); + $$ = _token_create_str (parser, SPACE, $1); } ; @@ -494,6 +495,7 @@ _token_print (token_t *token) switch (token->type) { case IDENTIFIER: case OTHER: + case SPACE: printf ("%s", token->value.str); break; case LEFT_SHIFT: @@ -589,6 +591,7 @@ glcpp_parser_create (void) parser->defines = hash_table_ctor (32, hash_table_string_hash, hash_table_string_compare); parser->active = _string_list_create (parser); + parser->space_tokens = 1; parser->expansions = NULL; parser->just_printed_separator = 1; @@ -835,9 +838,6 @@ _glcpp_parser_print_expanded_token_list (glcpp_parser_t *parser, for (node = list->head; node; node = node->next) { if (_glcpp_parser_print_expanded_token (parser, node->token)) _glcpp_parser_print_expanded_function (parser, &node); - - if (node->next) - printf (" "); } } diff --git a/glcpp.h b/glcpp.h index 043098b134..f3760fa7a4 100644 --- a/glcpp.h +++ b/glcpp.h @@ -126,6 +126,7 @@ struct glcpp_parser { yyscan_t scanner; struct hash_table *defines; string_list_t *active; + int space_tokens; expansion_node_t *expansions; int just_printed_separator; int need_newline; -- cgit v1.2.3 From 5aa7ea08093f727761d424ad090f44b116c8f0bd Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 25 May 2010 18:39:43 -0700 Subject: Remove a bunch of old code and give the static treatment to what's left. We're no longer using the expansion stack, so its functions can go along with most of the body of glcpp_parser_lex that was using it. --- glcpp-parse.y | 262 +++++----------------------------------------------------- glcpp.h | 7 -- 2 files changed, 21 insertions(+), 248 deletions(-) (limited to 'glcpp.h') diff --git a/glcpp-parse.y b/glcpp-parse.y index 131102fab9..02286cd8e0 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -28,88 +28,77 @@ #include "glcpp.h" -void +static void yyerror (void *scanner, const char *error); -void +static void _define_object_macro (glcpp_parser_t *parser, const char *macro, token_list_t *replacements); -void +static void _define_function_macro (glcpp_parser_t *parser, const char *macro, string_list_t *parameters, token_list_t *replacements); -void -_expand_object_macro (glcpp_parser_t *parser, const char *identifier); - -void -_expand_function_macro (glcpp_parser_t *parser, - const char *identifier, - argument_list_t *arguments); - -string_list_t * +static string_list_t * _string_list_create (void *ctx); -void +static void _string_list_append_item (string_list_t *list, const char *str); -void +static void _string_list_append_list (string_list_t *list, string_list_t *tail); -void +static void _string_list_push (string_list_t *list, const char *str); -void +static void _string_list_pop (string_list_t *list); -int +static int _string_list_contains (string_list_t *list, const char *member, int *index); -int +static int _string_list_length (string_list_t *list); -argument_list_t * +static argument_list_t * _argument_list_create (void *ctx); -void +static void _argument_list_append (argument_list_t *list, token_list_t *argument); -int +static int _argument_list_length (argument_list_t *list); -token_list_t * +static token_list_t * _argument_list_member_at (argument_list_t *list, int index); /* Note: This function talloc_steal()s the str pointer. */ -token_t * +static token_t * _token_create_str (void *ctx, int type, char *str); -token_t * +static token_t * _token_create_ival (void *ctx, int type, int ival); -token_list_t * +static token_list_t * _token_list_create (void *ctx); /* Note: This function add a talloc_reference() to token. * * You may want to talloc_unlink any current reference if you no * longer need it. */ -void +static void _token_list_append (token_list_t *list, token_t *token); -void +static void _token_list_append_list (token_list_t *list, token_list_t *tail); -void +static void _glcpp_parser_print_expanded_token_list (glcpp_parser_t *parser, token_list_t *list); -static void -glcpp_parser_pop_expansion (glcpp_parser_t *parser); - static void _glcpp_parser_skip_stack_push_if (glcpp_parser_t *parser, int condition); @@ -591,10 +580,6 @@ glcpp_parser_create (void) hash_table_string_compare); parser->active = _string_list_create (parser); parser->space_tokens = 1; - parser->expansions = NULL; - - parser->just_printed_separator = 1; - parser->need_newline = 0; parser->skip_stack = NULL; @@ -610,8 +595,6 @@ glcpp_parser_parse (glcpp_parser_t *parser) void glcpp_parser_destroy (glcpp_parser_t *parser) { - if (parser->need_newline) - printf ("\n"); if (parser->skip_stack) fprintf (stderr, "Error: Unterminated #if\n"); glcpp_lex_destroy (parser->scanner); @@ -619,47 +602,6 @@ glcpp_parser_destroy (glcpp_parser_t *parser) talloc_free (parser); } -static int -glcpp_parser_is_expanding (glcpp_parser_t *parser, const char *member) -{ - expansion_node_t *node; - - for (node = parser->expansions; node; node = node->next) { - if (node->macro && - strcmp (node->macro->identifier, member) == 0) - { - return 1; - } - } - - return 0; -} - -token_class_t -glcpp_parser_classify_token (glcpp_parser_t *parser, - const char *identifier, - int *parameter_index) -{ - macro_t *macro; - - /* Is this token a defined macro? */ - macro = hash_table_find (parser->defines, identifier); - - if (macro == NULL) - return TOKEN_CLASS_IDENTIFIER; - - /* Don't consider this a macro if we are already actively - * expanding this macro. */ - if (glcpp_parser_is_expanding (parser, identifier)) - return TOKEN_CLASS_IDENTIFIER_FINALIZED; - - /* Definitely a macro. Just need to check if it's function-like. */ - if (macro->is_function) - return TOKEN_CLASS_FUNC_MACRO; - else - return TOKEN_CLASS_OBJ_MACRO; -} - /* Print a non-macro token, or the expansion of an object-like macro. * * Returns 0 if this token is completely printed. @@ -933,172 +875,10 @@ _define_function_macro (glcpp_parser_t *parser, hash_table_insert (parser->defines, macro, identifier); } -static void -_glcpp_parser_push_expansion (glcpp_parser_t *parser, - macro_t *macro, - token_node_t *replacements) -{ - expansion_node_t *node; - - node = xtalloc (parser, expansion_node_t); - - node->macro = macro; - node->replacements = replacements; - - node->next = parser->expansions; - parser->expansions = node; -} - -static void -glcpp_parser_pop_expansion (glcpp_parser_t *parser) -{ - expansion_node_t *node; - - node = parser->expansions; - - if (node == NULL) { - fprintf (stderr, "Internal error: _expansion_list_pop called on an empty list.\n"); - exit (1); - } - - parser->expansions = node->next; - - talloc_free (node); -} - -void -_expand_object_macro (glcpp_parser_t *parser, const char *identifier) -{ - macro_t *macro; - - macro = hash_table_find (parser->defines, identifier); - assert (! macro->is_function); - assert (! glcpp_parser_is_expanding (parser, identifier)); - - _glcpp_parser_push_expansion (parser, macro, macro->replacements->head); -} - -void -_expand_function_macro (glcpp_parser_t *parser, - const char *identifier, - argument_list_t *arguments) -{ - macro_t *macro; - token_list_t *expanded; - token_node_t *i, *j; - int parameter_index; - - macro = hash_table_find (parser->defines, identifier); - assert (macro->is_function); - assert (! glcpp_parser_is_expanding (parser, identifier)); - - if (_argument_list_length (arguments) != - _string_list_length (macro->parameters)) - { - fprintf (stderr, - "Error: macro %s invoked with %d arguments (expected %d)\n", - identifier, - _argument_list_length (arguments), - _string_list_length (macro->parameters)); - return; - } - - expanded = _token_list_create (macro); - - for (i = macro->replacements->head; i; i = i->next) { - if (_string_list_contains (macro->parameters, - i->token->value.str, - ¶meter_index)) - { - token_list_t *argument; - argument = _argument_list_member_at (arguments, - parameter_index); - for (j = argument->head; j; j = j->next) - { - _token_list_append (expanded, j->token); - } - } else { - _token_list_append (expanded, i->token); - } - } - - _glcpp_parser_push_expansion (parser, macro, expanded->head); -} - static int glcpp_parser_lex (glcpp_parser_t *parser) { - expansion_node_t *expansion; - token_node_t *replacements; - int parameter_index; - const char *token; - token_class_t class; - - /* Who says C can't do efficient tail recursion? */ - RECURSE: - - expansion = parser->expansions; - - if (expansion == NULL) - return glcpp_lex (parser->scanner); - - replacements = expansion->replacements; - - /* Pop expansion when replacements is exhausted. */ - if (replacements == NULL) { - glcpp_parser_pop_expansion (parser); - goto RECURSE; - } - - expansion->replacements = replacements->next; - - token = replacements->token->value.str; - - /* Implement token pasting. */ - if (replacements->next && strcmp (replacements->next->token->value.str, "##") == 0) { - token_node_t *next_node; - - next_node = replacements->next->next; - - if (next_node == NULL) { - fprintf (stderr, "Error: '##' cannot appear at the end of a macro expansion.\n"); - exit (1); - } - - token = xtalloc_asprintf (parser, "%s%s", - token, next_node->token->value.str); - expansion->replacements = next_node->next; - } - - - if (strcmp (token, "(") == 0) - return '('; - else if (strcmp (token, ")") == 0) - return ')'; - - yylval.str = xtalloc_strdup (parser, token); - - /* Carefully refuse to expand any finalized identifier. */ - if (replacements->token->type == IDENTIFIER_FINALIZED) - return IDENTIFIER_FINALIZED; - - switch (glcpp_parser_classify_token (parser, yylval.str, - ¶meter_index)) - { - case TOKEN_CLASS_IDENTIFIER: - return IDENTIFIER; - break; - case TOKEN_CLASS_IDENTIFIER_FINALIZED: - return IDENTIFIER_FINALIZED; - break; - case TOKEN_CLASS_FUNC_MACRO: - return FUNC_MACRO; - break; - default: - case TOKEN_CLASS_OBJ_MACRO: - return OBJ_MACRO; - break; - } + return glcpp_lex (parser->scanner); } static void diff --git a/glcpp.h b/glcpp.h index f3760fa7a4..6bd6e66a7c 100644 --- a/glcpp.h +++ b/glcpp.h @@ -127,16 +127,9 @@ struct glcpp_parser { struct hash_table *defines; string_list_t *active; int space_tokens; - expansion_node_t *expansions; - int just_printed_separator; - int need_newline; skip_node_t *skip_stack; }; -void -glcpp_parser_push_expansion_argument (glcpp_parser_t *parser, - int argument_index); - glcpp_parser_t * glcpp_parser_create (void); -- cgit v1.2.3 From 10ae438399f14367dd9e03032594c1e16c428999 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 25 May 2010 20:35:01 -0700 Subject: Avoid getting extra trailing whitespace from macros. This trailing whitespace was coming from macro definitions and from macro arguments. We fix this with a little extra state in the token_list. It now remembers the last non-space token added, so that these can be trimmed off just before printing the list. With this fix test 23 now passes. Tests 24 and 25 are also passing, but they probbably would ahve before this fix---just that they weren't being run earlier. --- glcpp-parse.y | 30 ++++++++++++++++++++++++++++-- glcpp.h | 1 + 2 files changed, 29 insertions(+), 2 deletions(-) (limited to 'glcpp.h') diff --git a/glcpp-parse.y b/glcpp-parse.y index 02286cd8e0..60eaf215b8 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -471,7 +471,7 @@ _token_create_ival (void *ctx, int type, int ival) } void -_token_print (token_t *token) +_glcpp_parser_print_token (glcpp_parser_t *parser, token_t *token) { if (token->type < 256) { printf ("%c", token->type); @@ -527,6 +527,7 @@ _token_list_create (void *ctx) list = xtalloc (ctx, token_list_t); list->head = NULL; list->tail = NULL; + list->non_space_tail = NULL; return list; } @@ -548,6 +549,8 @@ _token_list_append (token_list_t *list, token_t *token) } list->tail = node; + if (token->type != SPACE) + list->non_space_tail = node; } void @@ -560,6 +563,25 @@ _token_list_append_list (token_list_t *list, token_list_t *tail) } list->tail = tail->tail; + list->non_space_tail = tail->non_space_tail; +} + +void +_token_list_trim_trailing_space (token_list_t *list) +{ + token_node_t *tail, *next; + + if (list->non_space_tail) { + tail = list->non_space_tail->next; + list->non_space_tail->next = NULL; + list->tail = list->non_space_tail; + + while (tail) { + next = tail->next; + talloc_free (tail); + tail = next; + } + } } void @@ -618,7 +640,7 @@ _glcpp_parser_print_expanded_token (glcpp_parser_t *parser, /* We only expand identifiers */ if (token->type != IDENTIFIER) { - _token_print (token); + _glcpp_parser_print_token (parser, token); return 0; } @@ -719,6 +741,8 @@ _arguments_parse (argument_list_t *arguments, token_node_t **node_ret) if (node->token->type == ',' && paren_count == 1) { + if (argument) + _token_list_trim_trailing_space (argument); argument = NULL; } else { @@ -834,6 +858,8 @@ _glcpp_parser_print_expanded_token_list (glcpp_parser_t *parser, if (list == NULL) return; + _token_list_trim_trailing_space (list); + for (node = list->head; node; node = node->next) { if (_glcpp_parser_print_expanded_token (parser, node->token)) _glcpp_parser_print_expanded_function (parser, &node); diff --git a/glcpp.h b/glcpp.h index 6bd6e66a7c..21db918cdc 100644 --- a/glcpp.h +++ b/glcpp.h @@ -72,6 +72,7 @@ typedef struct token_node { struct token_list { token_node_t *head; token_node_t *tail; + token_node_t *non_space_tail; }; typedef struct argument_node { -- cgit v1.2.3 From f6914fd37b2b66d7be1ba0c31450d89d1785ccce Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 26 May 2010 09:32:57 -0700 Subject: Implement #if and friends. With this change, tests 41 through 49 all pass. (The defined operator appears to be somehow broken so that test 50 doesn't pass yet.) --- glcpp.h | 2 +- tests/049-if-expression-precedence.c | 1 - tests/050-if-defined.c | 2 -- tests/glcpp-test | 2 +- 4 files changed, 2 insertions(+), 5 deletions(-) (limited to 'glcpp.h') diff --git a/glcpp.h b/glcpp.h index 21db918cdc..36ab0e7ca5 100644 --- a/glcpp.h +++ b/glcpp.h @@ -49,7 +49,7 @@ typedef struct token_list token_list_t; typedef union YYSTYPE { - int ival; + intmax_t ival; char *str; string_list_t *string_list; token_t *token; diff --git a/tests/049-if-expression-precedence.c b/tests/049-if-expression-precedence.c index cea935220f..833ea03882 100644 --- a/tests/049-if-expression-precedence.c +++ b/tests/049-if-expression-precedence.c @@ -3,4 +3,3 @@ failure with operator precedence #else success #endif - diff --git a/tests/050-if-defined.c b/tests/050-if-defined.c index 9838cc747d..34f0f95140 100644 --- a/tests/050-if-defined.c +++ b/tests/050-if-defined.c @@ -15,5 +15,3 @@ failure_3 #else success_3 #endif - - diff --git a/tests/glcpp-test b/tests/glcpp-test index 6304155210..bf88d4462e 100755 --- a/tests/glcpp-test +++ b/tests/glcpp-test @@ -6,5 +6,5 @@ for test in *.c; do ../glcpp < $test > $test.out gcc -E $test -o $test.gcc grep -v '^#' < $test.gcc > $test.expected - diff -u $test.expected $test.out + diff -B -u $test.expected $test.out done -- cgit v1.2.3 From 8e82fcb070d5fae0ec2c763cee4cea225b459664 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 26 May 2010 11:15:21 -0700 Subject: Implement (and test) support for macro expansion within conditional expressions. To do this we have split the existing "HASH_IF expression" into two productions: First is HASH_IF pp_tokens which simply constructs a list of tokens. Then, with that resulting token list, we first evaluate all DEFINED operator tokens, then expand all macros, and finally start lexing from the resulting token list. This brings us to the second production, IF_EXPANDED expression This final production works just like our previous "HASH_IF expression", evaluating a constant integer expression. The new test (54) added for this case now passes. --- glcpp-parse.y | 155 ++++++++++++++++++++++++++++++++++++++------- glcpp.h | 2 + tests/054-if-with-macros.c | 34 ++++++++++ 3 files changed, 169 insertions(+), 22 deletions(-) create mode 100644 tests/054-if-with-macros.c (limited to 'glcpp.h') diff --git a/glcpp-parse.y b/glcpp-parse.y index 58e1e655fd..cce8a70156 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -96,6 +96,10 @@ _token_list_append (token_list_t *list, token_t *token); static void _token_list_append_list (token_list_t *list, token_list_t *tail); +static void +_glcpp_parser_evaluate_defined (glcpp_parser_t *parser, + token_list_t *list); + static void _glcpp_parser_print_expanded_token_list (glcpp_parser_t *parser, token_list_t *list); @@ -120,14 +124,17 @@ _glcpp_parser_skip_stack_pop (glcpp_parser_t *parser); static int glcpp_parser_lex (glcpp_parser_t *parser); +static void +glcpp_parser_lex_from (glcpp_parser_t *parser, token_list_t *list); + %} %parse-param {glcpp_parser_t *parser} %lex-param {glcpp_parser_t *parser} -%token DEFINED HASH HASH_DEFINE_FUNC HASH_DEFINE_OBJ HASH_ELIF HASH_ELSE HASH_ENDIF HASH_IF HASH_IFDEF HASH_IFNDEF HASH_UNDEF IDENTIFIER INTEGER NEWLINE OTHER SPACE +%token DEFINED ELIF_EXPANDED HASH HASH_DEFINE_FUNC HASH_DEFINE_OBJ HASH_ELIF HASH_ELSE HASH_ENDIF HASH_IF HASH_IFDEF HASH_IFNDEF HASH_UNDEF IDENTIFIER IF_EXPANDED INTEGER NEWLINE OTHER SPACE %token PASTE -%type expression INTEGER punctuator SPACE +%type expression INTEGER operator SPACE %type IDENTIFIER OTHER %type identifier_list %type preprocessing_token @@ -148,28 +155,39 @@ glcpp_parser_lex (glcpp_parser_t *parser); input: /* empty */ -| input line { +| input line +; + +line: + control_line { if (parser->skip_stack == NULL || parser->skip_stack->type == SKIP_NO_SKIP) { printf ("\n"); } } -; - -line: - control_line | text_line { if (parser->skip_stack == NULL || parser->skip_stack->type == SKIP_NO_SKIP) { _glcpp_parser_print_expanded_token_list (parser, $1); + printf ("\n"); } talloc_free ($1); } +| expanded_line | HASH non_directive ; +expanded_line: + IF_EXPANDED expression NEWLINE { + _glcpp_parser_skip_stack_push_if (parser, $2); + } +| ELIF_EXPANDED expression NEWLINE { + _glcpp_parser_skip_stack_change_if (parser, "elif", $2); + } +; + control_line: HASH_DEFINE_OBJ IDENTIFIER replacement_list NEWLINE { _define_object_macro (parser, $2, $3); @@ -191,8 +209,17 @@ control_line: } talloc_free ($2); } -| HASH_IF expression NEWLINE { - _glcpp_parser_skip_stack_push_if (parser, $2); +| HASH_IF pp_tokens NEWLINE { + token_list_t *expanded; + token_t *token; + + expanded = _token_list_create (parser); + token = _token_create_ival (parser, IF_EXPANDED, IF_EXPANDED); + _token_list_append (expanded, token); + talloc_unlink (parser, token); + _glcpp_parser_evaluate_defined (parser, $2); + _glcpp_parser_expand_token_list_onto (parser, $2, expanded); + glcpp_parser_lex_from (parser, expanded); } | HASH_IFDEF IDENTIFIER NEWLINE { string_list_t *macro = hash_table_find (parser->defines, $2); @@ -204,8 +231,17 @@ control_line: talloc_free ($2); _glcpp_parser_skip_stack_push_if (parser, macro == NULL); } -| HASH_ELIF expression NEWLINE { - _glcpp_parser_skip_stack_change_if (parser, "#elif", $2); +| HASH_ELIF pp_tokens NEWLINE { + token_list_t *expanded; + token_t *token; + + expanded = _token_list_create (parser); + token = _token_create_ival (parser, ELIF_EXPANDED, ELIF_EXPANDED); + _token_list_append (expanded, token); + talloc_unlink (parser, token); + _glcpp_parser_evaluate_defined (parser, $2); + _glcpp_parser_expand_token_list_onto (parser, $2, expanded); + glcpp_parser_lex_from (parser, expanded); } | HASH_ELSE NEWLINE { _glcpp_parser_skip_stack_change_if (parser, "else", 1); @@ -286,14 +322,6 @@ expression: | '+' expression %prec UNARY { $$ = + $2; } -| DEFINED IDENTIFIER %prec UNARY { - string_list_t *macro = hash_table_find (parser->defines, $2); - talloc_free ($2); - if (macro) - $$ = 1; - else - $$ = 0; - } | '(' expression ')' { $$ = $2; } @@ -347,7 +375,7 @@ preprocessing_token: | INTEGER { $$ = _token_create_ival (parser, INTEGER, $1); } -| punctuator { +| operator { $$ = _token_create_ival (parser, $1, $1); } | OTHER { @@ -358,7 +386,7 @@ preprocessing_token: } ; -punctuator: +operator: '[' { $$ = '['; } | ']' { $$ = ']'; } | '(' { $$ = '('; } @@ -389,6 +417,7 @@ punctuator: | ';' { $$ = ';'; } | ',' { $$ = ','; } | PASTE { $$ = PASTE; } +| DEFINED { $$ = DEFINED; } ; %% @@ -830,6 +859,9 @@ glcpp_parser_create (void) parser->skip_stack = NULL; + parser->lex_from_list = NULL; + parser->lex_from_node = NULL; + return parser; } @@ -849,6 +881,39 @@ glcpp_parser_destroy (glcpp_parser_t *parser) talloc_free (parser); } +/* Replace any occurences of DEFINED tokens in 'list' with either a + * '0' or '1' INTEGER token depending on whether the next token in the + * list is defined or not. */ +static void +_glcpp_parser_evaluate_defined (glcpp_parser_t *parser, + token_list_t *list) +{ + token_node_t *node, *next; + string_list_t *macro; + + if (list == NULL) + return; + + for (node = list->head; node; node = node->next) { + if (node->token->type != DEFINED) + continue; + next = node->next; + while (next && next->token->type == SPACE) + next = next->next; + if (next == NULL || next->token->type != IDENTIFIER) { + fprintf (stderr, "Error: operator \"defined\" requires an identifier\n"); + exit (1); + } + macro = hash_table_find (parser->defines, + next->token->value.str); + + node->token->type = INTEGER; + node->token->value.ival = (macro != NULL); + node->next = next->next; + } +} + + /* Appends onto 'expansion' a non-macro token or the expansion of an * object-like macro. * @@ -1206,7 +1271,53 @@ _define_function_macro (glcpp_parser_t *parser, static int glcpp_parser_lex (glcpp_parser_t *parser) { - return glcpp_lex (parser->scanner); + token_node_t *node; + int ret; + + if (parser->lex_from_list == NULL) + return glcpp_lex (parser->scanner); + + node = parser->lex_from_node; + + if (node == NULL) { + talloc_free (parser->lex_from_list); + parser->lex_from_list = NULL; + return NEWLINE; + } + + yylval = node->token->value; + ret = node->token->type; + + parser->lex_from_node = node->next; + + return ret; +} + +static void +glcpp_parser_lex_from (glcpp_parser_t *parser, token_list_t *list) +{ + token_node_t *node; + + assert (parser->lex_from_list == NULL); + + /* Copy list, eliminating any space tokens. */ + parser->lex_from_list = _token_list_create (parser); + + for (node = list->head; node; node = node->next) { + if (node->token->type == SPACE) + continue; + _token_list_append (parser->lex_from_list, node->token); + } + + talloc_free (list); + + parser->lex_from_node = parser->lex_from_list->head; + + /* It's possible the list consisted of nothing but whitespace. */ + if (parser->lex_from_node == NULL) { + talloc_free (parser->lex_from_list); + parser->lex_from_list = NULL; + } } static void diff --git a/glcpp.h b/glcpp.h index 36ab0e7ca5..e5be1a6cd6 100644 --- a/glcpp.h +++ b/glcpp.h @@ -129,6 +129,8 @@ struct glcpp_parser { string_list_t *active; int space_tokens; skip_node_t *skip_stack; + token_list_t *lex_from_list; + token_node_t *lex_from_node; }; glcpp_parser_t * diff --git a/tests/054-if-with-macros.c b/tests/054-if-with-macros.c new file mode 100644 index 0000000000..3da79a0d96 --- /dev/null +++ b/tests/054-if-with-macros.c @@ -0,0 +1,34 @@ +#define one 1 +#define two 2 +#define three 3 +#define five 5 +#if five < two +failure_1 +#else +success_1 +#endif +#if three >= two +success_2 +#else +failure_2 +#endif +#if two + three <= five +success_3 +#else +failure_3 +#endif +#if five - two == three +success_4 +#else +failure_4 +#endif +#if one > three +failure_5 +#else +success_5 +#endif +#if one != five +success_6 +#else +failure_6 +#endif -- cgit v1.2.3 From 95951ea7bb8728cf54ae4136cb59d0af9e8a06bd Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 26 May 2010 15:57:10 -0700 Subject: Treat newlines as space when invoking a function-like macro invocation. This adds three new pieces of state to the parser, (is_control_line, newline_as_space, and paren_count), and a large amount of messy code. I'd definitely like to see a cleaner solution for this. With this fix, the "define-func-extra-newlines" now passes so we put it back to test #26 where it was originally (lately it has been known as test #55). Also, we tweak test 25 slightly. Previously this test was ending a file function-like macro name that was not actually a macro (not followed by a left parenthesis). As is, this fix was making that test fail because the text_line production expects to see a terminating NEWLINE, but that NEWLINE is now getting turned into a SPACE here. This seems unlikely to be a problem in the wild, (function macros being used in a non-macro sense seems rare enough---but more than likely they won't happen at the end of a file). Still, we document this shortcoming in the README. --- README | 4 +++ glcpp-parse.y | 61 ++++++++++++++++++++++++++++++++-- glcpp.h | 3 ++ tests/025-func-macro-as-non-macro.c | 2 +- tests/026-define-func-extra-newlines.c | 6 ++++ tests/055-define-func-extra-newlines.c | 6 ---- 6 files changed, 73 insertions(+), 9 deletions(-) create mode 100644 tests/026-define-func-extra-newlines.c delete mode 100644 tests/055-define-func-extra-newlines.c (limited to 'glcpp.h') diff --git a/README b/README index f0f64c2644..ab42a3ffe1 100644 --- a/README +++ b/README @@ -24,3 +24,7 @@ parentheses. The #error, #pragma, #extension, #version, and #line macros are not yet supported. + +A file that ends with a function-like macro name as the last +non-whitespace token will result in a parse error, (where it should be +passed through as is). \ No newline at end of file diff --git a/glcpp-parse.y b/glcpp-parse.y index a809ebf3af..1346b65aff 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -856,6 +856,9 @@ glcpp_parser_create (void) hash_table_string_compare); parser->active = _string_list_create (parser); parser->space_tokens = 1; + parser->newline_as_space = 0; + parser->in_control_line = 0; + parser->paren_count = 0; parser->skip_stack = NULL; @@ -1274,8 +1277,62 @@ glcpp_parser_lex (glcpp_parser_t *parser) token_node_t *node; int ret; - if (parser->lex_from_list == NULL) - return glcpp_lex (parser->scanner); + if (parser->lex_from_list == NULL) { + ret = glcpp_lex (parser->scanner); + + /* XXX: This ugly block of code exists for the sole + * purpose of converting a NEWLINE token into a SPACE + * token, but only in the case where we have seen a + * function-like macro name, but have not yet seen its + * closing parenthesis. + * + * There's perhaps a more compact way to do this with + * mid-rule actions in the grammar. + * + * I'm definitely not pleased with the complexity of + * this code here. + */ + if (parser->newline_as_space) + { + if (ret == '(') { + parser->paren_count++; + } else if (ret == ')') { + parser->paren_count--; + if (parser->paren_count == 0) + parser->newline_as_space = 0; + } else if (ret == NEWLINE) { + ret = SPACE; + } else if (ret != SPACE) { + if (parser->paren_count == 0) + parser->newline_as_space = 0; + } + } + else if (parser->in_control_line) + { + if (ret == NEWLINE) + parser->in_control_line = 0; + } + else if (ret == HASH_DEFINE_OBJ || ret == HASH_DEFINE_FUNC || + ret == HASH_UNDEF || ret == HASH_IF || + ret == HASH_IFDEF || ret == HASH_IFNDEF || + ret == HASH_ELIF || ret == HASH_ELSE || + ret == HASH_ENDIF || ret == HASH) + { + parser->in_control_line = 1; + } + else if (ret == IDENTIFIER) + { + macro_t *macro; + macro = hash_table_find (parser->defines, + yylval.str); + if (macro && macro->is_function) { + parser->newline_as_space = 1; + parser->paren_count = 0; + } + } + + return ret; + } node = parser->lex_from_node; diff --git a/glcpp.h b/glcpp.h index e5be1a6cd6..5c8c304a9c 100644 --- a/glcpp.h +++ b/glcpp.h @@ -128,6 +128,9 @@ struct glcpp_parser { struct hash_table *defines; string_list_t *active; int space_tokens; + int newline_as_space; + int in_control_line; + int paren_count; skip_node_t *skip_stack; token_list_t *lex_from_list; token_node_t *lex_from_node; diff --git a/tests/025-func-macro-as-non-macro.c b/tests/025-func-macro-as-non-macro.c index 3dbe026d9d..b433671d1b 100644 --- a/tests/025-func-macro-as-non-macro.c +++ b/tests/025-func-macro-as-non-macro.c @@ -1,2 +1,2 @@ #define foo(bar) bar -foo +foo bar diff --git a/tests/026-define-func-extra-newlines.c b/tests/026-define-func-extra-newlines.c new file mode 100644 index 0000000000..0d83740530 --- /dev/null +++ b/tests/026-define-func-extra-newlines.c @@ -0,0 +1,6 @@ +#define foo(a) bar + +foo +( +1 +) diff --git a/tests/055-define-func-extra-newlines.c b/tests/055-define-func-extra-newlines.c deleted file mode 100644 index 0d83740530..0000000000 --- a/tests/055-define-func-extra-newlines.c +++ /dev/null @@ -1,6 +0,0 @@ -#define foo(a) bar - -foo -( -1 -) -- cgit v1.2.3