From 3a37b8701cd3e0a86fef59910b20b2af7e4573f6 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Mon, 10 May 2010 11:44:09 -0700 Subject: Add the tiniest shell of a flex/bison-based parser. It doesn't really *do* anything yet---merlely parsing a stream of whitespace-separated tokens, (and not interpreting them at all). --- Makefile | 12 ++++++++++++ glcpp-lex.l | 41 +++++++++++++++++++++++++++++++++++++++++ glcpp-parse.y | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ glcpp.c | 28 ++++++++++++++++++++++++++++ 4 files changed, 138 insertions(+) create mode 100644 Makefile create mode 100644 glcpp-lex.l create mode 100644 glcpp-parse.y create mode 100644 glcpp.c diff --git a/Makefile b/Makefile new file mode 100644 index 0000000000..d8357dda0f --- /dev/null +++ b/Makefile @@ -0,0 +1,12 @@ +glcpp: glcpp.o glcpp-lex.o glcpp-parse.o + +%.c %.h: %.y + bison --defines=$*.h --output=$*.c $^ + +%.c: %.l + flex --outfile=$@ $< + +glcpp-lex.c: glcpp-parse.h + +clean: + rm -f glcpp-lex.c glcpp-parse.c *.o *~ diff --git a/glcpp-lex.l b/glcpp-lex.l new file mode 100644 index 0000000000..9779f2b92e --- /dev/null +++ b/glcpp-lex.l @@ -0,0 +1,41 @@ +%{ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include + +#include "glcpp-parse.h" +%} + +%option noyywrap + +%% + + /* Silently eat all whitespace. */ +[[:space:]]+ + + /* Any non-whitespace is a token. */ +[^[:space:]]+ { return TOKEN; } + +%% diff --git a/glcpp-parse.y b/glcpp-parse.y new file mode 100644 index 0000000000..739b2935b3 --- /dev/null +++ b/glcpp-parse.y @@ -0,0 +1,57 @@ +%{ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include + +#define YYSTYPE int + +void +yyerror (const char *error); + +%} + +%token TOKEN + +%% + +input: /* empty */ + | tokens +; + + +tokens: token + | tokens token +; + +token: TOKEN +; + +%% + +void +yyerror (const char *error) +{ + fprintf (stderr, "Parse error: %s\n", error); +} diff --git a/glcpp.c b/glcpp.c new file mode 100644 index 0000000000..09641ceead --- /dev/null +++ b/glcpp.c @@ -0,0 +1,28 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +int +main (void) +{ + return yyparse (); +} -- cgit v1.2.3 From 38aa83560be3368b4e9784b3ef8f73144171ca45 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Mon, 10 May 2010 11:52:29 -0700 Subject: Make the lexer reentrant (to avoid "still reachable" memory). This allows the final program to be 100% "valgrind clean", (freeing all memory that it allocates). This will make it much easier to ensure that any allocation that parser actions perform are also cleaned up. --- glcpp-lex.l | 2 +- glcpp-parse.y | 7 +++++-- glcpp.c | 9 ++++++++- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/glcpp-lex.l b/glcpp-lex.l index 9779f2b92e..276f50ddfe 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -28,7 +28,7 @@ #include "glcpp-parse.h" %} -%option noyywrap +%option reentrant noyywrap %% diff --git a/glcpp-parse.y b/glcpp-parse.y index 739b2935b3..9acd549b24 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -28,10 +28,13 @@ #define YYSTYPE int void -yyerror (const char *error); +yyerror (const char *error, void *scanner); %} +%parse-param {void *scanner} +%lex-param {void *scanner} + %token TOKEN %% @@ -51,7 +54,7 @@ token: TOKEN %% void -yyerror (const char *error) +yyerror (const char *error, void *scanner) { fprintf (stderr, "Parse error: %s\n", error); } diff --git a/glcpp.c b/glcpp.c index 09641ceead..90a0e89cfa 100644 --- a/glcpp.c +++ b/glcpp.c @@ -24,5 +24,12 @@ int main (void) { - return yyparse (); + int ret; + void *scanner; + + yylex_init (&scanner); + ret = yyparse (scanner); + yylex_destroy (scanner); + + return ret; } -- cgit v1.2.3 From a1e32bcff0a04dbff61f28c8e725cf2bf120bf85 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Mon, 10 May 2010 13:17:25 -0700 Subject: Add some compiler warnings and corresponding fixes. Most of the current problems were (mostly) harmless things like missing declarations, but there was at least one real error, (reversed argument order for yyerrror). --- Makefile | 2 ++ glcpp-lex.l | 1 + glcpp-parse.y | 6 ++++-- glcpp.c | 2 ++ glcpp.h | 45 +++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 54 insertions(+), 2 deletions(-) create mode 100644 glcpp.h diff --git a/Makefile b/Makefile index d8357dda0f..d0ca78de74 100644 --- a/Makefile +++ b/Makefile @@ -1,3 +1,5 @@ +override CFLAGS += -Wall -Wextra -Wwrite-strings -Wswitch-enum -Wno-unused + glcpp: glcpp.o glcpp-lex.o glcpp-parse.o %.c %.h: %.y diff --git a/glcpp-lex.l b/glcpp-lex.l index 276f50ddfe..747e24056f 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -25,6 +25,7 @@ #include #include +#include "glcpp.h" #include "glcpp-parse.h" %} diff --git a/glcpp-parse.y b/glcpp-parse.y index 9acd549b24..a2d1094253 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -25,10 +25,12 @@ #include #include +#include "glcpp.h" + #define YYSTYPE int void -yyerror (const char *error, void *scanner); +yyerror (void *scanner, const char *error); %} @@ -54,7 +56,7 @@ token: TOKEN %% void -yyerror (const char *error, void *scanner) +yyerror (void *scanner, const char *error) { fprintf (stderr, "Parse error: %s\n", error); } diff --git a/glcpp.c b/glcpp.c index 90a0e89cfa..eefac74be9 100644 --- a/glcpp.c +++ b/glcpp.c @@ -21,6 +21,8 @@ * DEALINGS IN THE SOFTWARE. */ +#include "glcpp.h" + int main (void) { diff --git a/glcpp.h b/glcpp.h new file mode 100644 index 0000000000..485387b8a5 --- /dev/null +++ b/glcpp.h @@ -0,0 +1,45 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef GLCPP_H +#define GLCPP_H + +/* Generated by glcpp-lex.l to glcpp-lex.c */ + +#define yyscan_t void* + +int +yylex_init (yyscan_t *scanner); + +int +yylex (yyscan_t scanner); + +int +yylex_destroy (yyscan_t scanner); + +/* Generated by glcpp-parse.y to glcpp-parse.c */ + +int +yyparse (void *scanner); + +#endif -- cgit v1.2.3 From a70e7bab2b492f64455c74f2222b363f37dc8dfa Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Mon, 10 May 2010 13:32:42 -0700 Subject: Add .gitignore file. To ignore generated source files (and glcpp binary). --- .gitignore | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000..5bbd660f22 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +glcpp +glcpp-lex.c +glcpp-parse.c +glcpp-parse.h +*.o +*~ -- cgit v1.2.3 From 633a692225fcdad15ce84776a7a18d7d008d52b3 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Mon, 10 May 2010 13:36:26 -0700 Subject: Add hash table implementation from glsl2 project. The preprocessor here is intended to become part of the glsl2 codebase eventually anyway. --- Makefile | 2 +- hash_table.c | 159 ++++++++++++++++++++++++++++++++++++ hash_table.h | 125 ++++++++++++++++++++++++++++ main/imports.h | 6 ++ main/simple_list.h | 235 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 526 insertions(+), 1 deletion(-) create mode 100644 hash_table.c create mode 100644 hash_table.h create mode 100644 main/imports.h create mode 100644 main/simple_list.h diff --git a/Makefile b/Makefile index d0ca78de74..0af7e05d1b 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ override CFLAGS += -Wall -Wextra -Wwrite-strings -Wswitch-enum -Wno-unused -glcpp: glcpp.o glcpp-lex.o glcpp-parse.o +glcpp: glcpp.o glcpp-lex.o glcpp-parse.o hash_table.o %.c %.h: %.y bison --defines=$*.h --output=$*.c $^ diff --git a/hash_table.c b/hash_table.c new file mode 100644 index 0000000000..e89a2564d7 --- /dev/null +++ b/hash_table.c @@ -0,0 +1,159 @@ +/* + * Copyright © 2008 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file hash_table.c + * \brief Implementation of a generic, opaque hash table data type. + * + * \author Ian Romanick + */ + +#include "main/imports.h" +#include "main/simple_list.h" +#include "hash_table.h" + +struct node { + struct node *next; + struct node *prev; +}; + +struct hash_table { + hash_func_t hash; + hash_compare_func_t compare; + + unsigned num_buckets; + struct node buckets[1]; +}; + + +struct hash_node { + struct node link; + const void *key; + void *data; +}; + + +struct hash_table * +hash_table_ctor(unsigned num_buckets, hash_func_t hash, + hash_compare_func_t compare) +{ + struct hash_table *ht; + unsigned i; + + + if (num_buckets < 16) { + num_buckets = 16; + } + + ht = _mesa_malloc(sizeof(*ht) + ((num_buckets - 1) + * sizeof(ht->buckets[0]))); + if (ht != NULL) { + ht->hash = hash; + ht->compare = compare; + ht->num_buckets = num_buckets; + + for (i = 0; i < num_buckets; i++) { + make_empty_list(& ht->buckets[i]); + } + } + + return ht; +} + + +void +hash_table_dtor(struct hash_table *ht) +{ + hash_table_clear(ht); + _mesa_free(ht); +} + + +void +hash_table_clear(struct hash_table *ht) +{ + struct node *node; + struct node *temp; + unsigned i; + + + for (i = 0; i < ht->num_buckets; i++) { + foreach_s(node, temp, & ht->buckets[i]) { + remove_from_list(node); + _mesa_free(node); + } + + assert(is_empty_list(& ht->buckets[i])); + } +} + + +void * +hash_table_find(struct hash_table *ht, const void *key) +{ + const unsigned hash_value = (*ht->hash)(key); + const unsigned bucket = hash_value % ht->num_buckets; + struct node *node; + + foreach(node, & ht->buckets[bucket]) { + struct hash_node *hn = (struct hash_node *) node; + + if ((*ht->compare)(hn->key, key) == 0) { + return hn->data; + } + } + + return NULL; +} + + +void +hash_table_insert(struct hash_table *ht, void *data, const void *key) +{ + const unsigned hash_value = (*ht->hash)(key); + const unsigned bucket = hash_value % ht->num_buckets; + struct hash_node *node; + + node = _mesa_calloc(sizeof(*node)); + + node->data = data; + node->key = key; + + insert_at_head(& ht->buckets[bucket], & node->link); +} + + +unsigned +hash_table_string_hash(const void *key) +{ + const char *str = (const char *) key; + unsigned hash = 5381; + + + while (*str != '\0') { + hash = (hash * 33) + *str; + str++; + } + + return hash; +} diff --git a/hash_table.h b/hash_table.h new file mode 100644 index 0000000000..b9dd343dee --- /dev/null +++ b/hash_table.h @@ -0,0 +1,125 @@ +/* + * Copyright © 2008 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file hash_table.h + * \brief Implementation of a generic, opaque hash table data type. + * + * \author Ian Romanick + */ + +#ifndef HASH_TABLE_H +#define HASH_TABLE_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +struct hash_table; + +typedef unsigned (*hash_func_t)(const void *key); +typedef int (*hash_compare_func_t)(const void *key1, const void *key2); + +/** + * Hash table constructor + * + * Creates a hash table with the specified number of buckets. The supplied + * \c hash and \c compare routines are used when adding elements to the table + * and when searching for elements in the table. + * + * \param num_buckets Number of buckets (bins) in the hash table. + * \param hash Function used to compute hash value of input keys. + * \param compare Function used to compare keys. + */ +extern struct hash_table *hash_table_ctor(unsigned num_buckets, + hash_func_t hash, hash_compare_func_t compare); + + +/** + * Release all memory associated with a hash table + * + * \warning + * This function cannot release memory occupied either by keys or data. + */ +extern void hash_table_dtor(struct hash_table *ht); + + +/** + * Flush all entries from a hash table + * + * \param ht Table to be cleared of its entries. + */ +extern void hash_table_clear(struct hash_table *ht); + + +/** + * Search a hash table for a specific element + * + * \param ht Table to be searched + * \param key Key of the desired element + * + * \return + * The \c data value supplied to \c hash_table_insert when the element with + * the matching key was added. If no matching key exists in the table, + * \c NULL is returned. + */ +extern void *hash_table_find(struct hash_table *ht, const void *key); + + +/** + * Add an element to a hash table + */ +extern void hash_table_insert(struct hash_table *ht, void *data, + const void *key); + + +/** + * Compute hash value of a string + * + * Computes the hash value of a string using the DJB2 algorithm developed by + * Professor Daniel J. Bernstein. It was published on comp.lang.c once upon + * a time. I was unable to find the original posting in the archives. + * + * \param key Pointer to a NUL terminated string to be hashed. + * + * \sa hash_table_string_compare + */ +extern unsigned hash_table_string_hash(const void *key); + + +/** + * Compare two strings used as keys + * + * This is just a macro wrapper around \c strcmp. + * + * \sa hash_table_string_hash + */ +#define hash_table_string_compare ((hash_compare_func_t) strcmp) + +#ifdef __cplusplus +}; +#endif + +#endif /* HASH_TABLE_H */ diff --git a/main/imports.h b/main/imports.h new file mode 100644 index 0000000000..d2197342c0 --- /dev/null +++ b/main/imports.h @@ -0,0 +1,6 @@ +#include +#include + +#define _mesa_malloc(x) malloc(x) +#define _mesa_free(x) free(x) +#define _mesa_calloc(x) calloc(1,x) diff --git a/main/simple_list.h b/main/simple_list.h new file mode 100644 index 0000000000..5ef39e14cc --- /dev/null +++ b/main/simple_list.h @@ -0,0 +1,235 @@ +/** + * \file simple_list.h + * Simple macros for type-safe, intrusive lists. + * + * Intended to work with a list sentinal which is created as an empty + * list. Insert & delete are O(1). + * + * \author + * (C) 1997, Keith Whitwell + */ + +/* + * Mesa 3-D graphics library + * Version: 3.5 + * + * Copyright (C) 1999-2001 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + +#ifndef _SIMPLE_LIST_H +#define _SIMPLE_LIST_H + +struct simple_node { + struct simple_node *next; + struct simple_node *prev; +}; + +/** + * Remove an element from list. + * + * \param elem element to remove. + */ +#define remove_from_list(elem) \ +do { \ + (elem)->next->prev = (elem)->prev; \ + (elem)->prev->next = (elem)->next; \ +} while (0) + +/** + * Insert an element to the list head. + * + * \param list list. + * \param elem element to insert. + */ +#define insert_at_head(list, elem) \ +do { \ + (elem)->prev = list; \ + (elem)->next = (list)->next; \ + (list)->next->prev = elem; \ + (list)->next = elem; \ +} while(0) + +/** + * Insert an element to the list tail. + * + * \param list list. + * \param elem element to insert. + */ +#define insert_at_tail(list, elem) \ +do { \ + (elem)->next = list; \ + (elem)->prev = (list)->prev; \ + (list)->prev->next = elem; \ + (list)->prev = elem; \ +} while(0) + +/** + * Move an element to the list head. + * + * \param list list. + * \param elem element to move. + */ +#define move_to_head(list, elem) \ +do { \ + remove_from_list(elem); \ + insert_at_head(list, elem); \ +} while (0) + +/** + * Move an element to the list tail. + * + * \param list list. + * \param elem element to move. + */ +#define move_to_tail(list, elem) \ +do { \ + remove_from_list(elem); \ + insert_at_tail(list, elem); \ +} while (0) + +/** + * Consatinate a cyclic list to a list + * + * Appends the sequence of nodes starting with \c tail to the list \c head. + * A "cyclic list" is a list that does not have a sentinal node. This means + * that the data pointed to by \c tail is an actual node, not a dataless + * sentinal. Note that if \c tail constist of a single node, this macro + * behaves identically to \c insert_at_tail + * + * \param head Head of the list to be appended to. This may or may not + * be a cyclic list. + * \param tail Head of the cyclic list to be appended to \c head. + * \param temp Temporary \c simple_list used by the macro + * + * \sa insert_at_tail + */ +#define concat_list_and_cycle(head, tail, temp) \ +do { \ + (head)->prev->next = (tail); \ + (tail)->prev->next = (head); \ + (temp) = (head)->prev; \ + (head)->prev = (tail)->prev; \ + (tail)->prev = (temp); \ +} while (0) + +#define concat_list(head, next_list) \ +do { \ + (next_list)->next->prev = (head)->prev; \ + (next_list)->prev->next = (head); \ + (head)->prev->next = (next_list)->next; \ + (head)->prev = (next_list)->prev; \ +} while (0) + +/** + * Make a empty list empty. + * + * \param sentinal list (sentinal element). + */ +#define make_empty_list(sentinal) \ +do { \ + (sentinal)->next = sentinal; \ + (sentinal)->prev = sentinal; \ +} while (0) + +/** + * Get list first element. + * + * \param list list. + * + * \return pointer to first element. + */ +#define first_elem(list) ((list)->next) + +/** + * Get list last element. + * + * \param list list. + * + * \return pointer to last element. + */ +#define last_elem(list) ((list)->prev) + +/** + * Get next element. + * + * \param elem element. + * + * \return pointer to next element. + */ +#define next_elem(elem) ((elem)->next) + +/** + * Get previous element. + * + * \param elem element. + * + * \return pointer to previous element. + */ +#define prev_elem(elem) ((elem)->prev) + +/** + * Test whether element is at end of the list. + * + * \param list list. + * \param elem element. + * + * \return non-zero if element is at end of list, or zero otherwise. + */ +#define at_end(list, elem) ((elem) == (list)) + +/** + * Test if a list is empty. + * + * \param list list. + * + * \return non-zero if list empty, or zero otherwise. + */ +#define is_empty_list(list) ((list)->next == (list)) + +/** + * Walk through the elements of a list. + * + * \param ptr pointer to the current element. + * \param list list. + * + * \note It should be followed by a { } block or a single statement, as in a \c + * for loop. + */ +#define foreach(ptr, list) \ + for( ptr=(list)->next ; ptr!=list ; ptr=(ptr)->next ) + +/** + * Walk through the elements of a list. + * + * Same as #foreach but lets you unlink the current value during a list + * traversal. Useful for freeing a list, element by element. + * + * \param ptr pointer to the current element. + * \param t temporary pointer. + * \param list list. + * + * \note It should be followed by a { } block or a single statement, as in a \c + * for loop. + */ +#define foreach_s(ptr, t, list) \ + for(ptr=(list)->next,t=(ptr)->next; list != ptr; ptr=t, t=(t)->next) + +#endif -- cgit v1.2.3 From 725c17a9266c1141508da623c8781412853b70e4 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Mon, 10 May 2010 16:14:59 -0700 Subject: Makefile: Enable debugging of parser. This compiles the debugging code for teh parser. It's not active unless the yydebug variable is set to a non-zero value. --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0af7e05d1b..d37e9233ec 100644 --- a/Makefile +++ b/Makefile @@ -3,7 +3,7 @@ override CFLAGS += -Wall -Wextra -Wwrite-strings -Wswitch-enum -Wno-unused glcpp: glcpp.o glcpp-lex.o glcpp-parse.o hash_table.o %.c %.h: %.y - bison --defines=$*.h --output=$*.c $^ + bison --debug --defines=$*.h --output=$*.c $^ %.c: %.l flex --outfile=$@ $< -- cgit v1.2.3 From 0b27b5f05191f07ed31e65ff07e5233672f3c33a Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Mon, 10 May 2010 16:16:06 -0700 Subject: Implment #define By using the recently-imported hash_table implementation. --- glcpp-lex.l | 23 +++++++++++++++++++---- glcpp-parse.y | 49 +++++++++++++++++++++++++++++++++++++++++++------ glcpp.c | 10 ++++++---- glcpp.h | 21 +++++++++++++++++++-- 4 files changed, 87 insertions(+), 16 deletions(-) diff --git a/glcpp-lex.l b/glcpp-lex.l index 747e24056f..a220fef76b 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -31,12 +31,27 @@ %option reentrant noyywrap +%x ST_DEFINE +%x ST_DEFVAL + +SPACE [[:space:]] +NONSPACE [^[:space:]] +NOTNEWLINE [^\n] +HSPACE [ \t] +HASH ^{HSPACE}*# +IDENTIFIER [_a-zA-Z][_a-zA-Z0-9]* +DEFVAL {NONSPACE}{NOTNEWLINE}* %% - /* Silently eat all whitespace. */ -[[:space:]]+ +{HASH}define { BEGIN ST_DEFINE; return DEFINE; } + +{HSPACE}+ +{IDENTIFIER} { BEGIN ST_DEFVAL; yylval = strdup (yytext); return IDENTIFIER; } + +{SPACE}+ +{DEFVAL} { BEGIN INITIAL; yylval = strdup (yytext); return DEFVAL; } - /* Any non-whitespace is a token. */ -[^[:space:]]+ { return TOKEN; } + /* Anything we don't specifically recognize is a stream of tokens */ +{NONSPACE}+ { yylval = strdup (yytext); return TOKEN; } %% diff --git a/glcpp-parse.y b/glcpp-parse.y index a2d1094253..89dc46497f 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -27,30 +27,46 @@ #include "glcpp.h" -#define YYSTYPE int +#define YYLEX_PARAM parser->scanner void yyerror (void *scanner, const char *error); %} -%parse-param {void *scanner} +%parse-param {glcpp_parser_t *parser} %lex-param {void *scanner} +%token DEFINE +%token DEFVAL +%token IDENTIFIER %token TOKEN %% input: /* empty */ - | tokens + | content ; +content: token + | directive + | content token + | content directive +; -tokens: token - | tokens token +directive: DEFINE IDENTIFIER DEFVAL { + hash_table_insert (parser->defines, $3, $2); +} ; -token: TOKEN +token: TOKEN { + char *value = hash_table_find (parser->defines, $1); + if (value) + printf ("%s", value); + else + printf ("%s", $1); + free ($1); +} ; %% @@ -60,3 +76,24 @@ yyerror (void *scanner, const char *error) { fprintf (stderr, "Parse error: %s\n", error); } + +void +glcpp_parser_init (glcpp_parser_t *parser) +{ + yylex_init (&parser->scanner); + parser->defines = hash_table_ctor (32, hash_table_string_hash, + hash_table_string_compare); +} + +int +glcpp_parser_parse (glcpp_parser_t *parser) +{ + return yyparse (parser); +} + +void +glcpp_parser_fini (glcpp_parser_t *parser) +{ + yylex_destroy (parser->scanner); + hash_table_dtor (parser->defines); +} diff --git a/glcpp.c b/glcpp.c index eefac74be9..d6c89df2f9 100644 --- a/glcpp.c +++ b/glcpp.c @@ -26,12 +26,14 @@ int main (void) { + glcpp_parser_t parser; int ret; - void *scanner; - yylex_init (&scanner); - ret = yyparse (scanner); - yylex_destroy (scanner); + glcpp_parser_init (&parser); + + ret = glcpp_parser_parse (&parser); + + glcpp_parser_fini (&parser); return ret; } diff --git a/glcpp.h b/glcpp.h index 485387b8a5..5278e1b971 100644 --- a/glcpp.h +++ b/glcpp.h @@ -24,10 +24,27 @@ #ifndef GLCPP_H #define GLCPP_H -/* Generated by glcpp-lex.l to glcpp-lex.c */ +#include "hash_table.h" +#define YYSTYPE char * #define yyscan_t void* +typedef struct { + yyscan_t scanner; + struct hash_table *defines; +} glcpp_parser_t; + +void +glcpp_parser_init (glcpp_parser_t *parser); + +int +glcpp_parser_parse (glcpp_parser_t *parser); + +void +glcpp_parser_fini (glcpp_parser_t *parser); + +/* Generated by glcpp-lex.l to glcpp-lex.c */ + int yylex_init (yyscan_t *scanner); @@ -40,6 +57,6 @@ yylex_destroy (yyscan_t scanner); /* Generated by glcpp-parse.y to glcpp-parse.c */ int -yyparse (void *scanner); +yyparse (glcpp_parser_t *parser); #endif -- cgit v1.2.3 From e8c790b3ceab06eb0433c3a234d3e16980f7ef19 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Mon, 10 May 2010 16:21:10 -0700 Subject: Add a very simple test for the pre-processor. Validate desired test cases by ensuring the output of glcpp matches the output of the gcc preprocessor, (ignoring any lines of the gcc output beginning with '#'). Only one test case so far with a trivial #define. --- .gitignore | 3 +++ Makefile | 4 ++++ tests/001-define.c | 2 ++ tests/glcpp-test | 9 +++++++++ 4 files changed, 18 insertions(+) create mode 100644 tests/001-define.c create mode 100755 tests/glcpp-test diff --git a/.gitignore b/.gitignore index 5bbd660f22..d67bd38c93 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,6 @@ glcpp-parse.c glcpp-parse.h *.o *~ +tests/*.expected +tests/*.gcc +tests/*.out diff --git a/Makefile b/Makefile index d37e9233ec..38cc1f314a 100644 --- a/Makefile +++ b/Makefile @@ -10,5 +10,9 @@ glcpp: glcpp.o glcpp-lex.o glcpp-parse.o hash_table.o glcpp-lex.c: glcpp-parse.h +test: + @(cd tests; ./glcpp-test) + clean: rm -f glcpp-lex.c glcpp-parse.c *.o *~ + rm -f tests/*.out tests/*.gcc tests/*.expected diff --git a/tests/001-define.c b/tests/001-define.c new file mode 100644 index 0000000000..cbf2fee0e7 --- /dev/null +++ b/tests/001-define.c @@ -0,0 +1,2 @@ +#define foo 1 +foo diff --git a/tests/glcpp-test b/tests/glcpp-test new file mode 100755 index 0000000000..25685eeabe --- /dev/null +++ b/tests/glcpp-test @@ -0,0 +1,9 @@ +#!/bin/sh + +for test in *.c; do + echo "Testing $test" + ../glcpp < $test > $test.out + gcc -E $test -o $test.gcc + grep -v '^#' < $test.gcc > $test.expected + diff -u $test.expected $test.out +done -- cgit v1.2.3 From beb26e8ac3152c4a7be43d7ee068b50e17b3ba18 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 11 May 2010 12:04:42 -0700 Subject: Add README file describing glcpp. Mostly this is a place for me to write down the URLs of the GLSL and C99 specifications that I need to write this code. --- README | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 README diff --git a/README b/README new file mode 100644 index 0000000000..ba833a49ff --- /dev/null +++ b/README @@ -0,0 +1,14 @@ +glcpp -- GLSL "C" preprocessor + +This is a simple preprocessor designed to provide the preprocessing +needs of the GLSL language. The requirements for this preprocessor are +specified in the GLSL 1.30 specification availble from: + +http://www.opengl.org/registry/doc/GLSLangSpec.Full.1.30.08.pdf + +This specification is not precise on some semantics, (for example, +#define and #if), defining these merely "as is standard for C++ +preprocessors". To fill in these details, I've been using the C99 +standard (for which I had a convenient copy) as available from: + +http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1124.pdf -- cgit v1.2.3 From 49206ef4c8adba5427e9d9b5e0dfc11345262890 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 11 May 2010 12:29:22 -0700 Subject: Add test for chained #define directives. Where one macro is defined in terms of another macro. The current implementation does not yet deal with this correctly. --- tests/002-define-chain.c | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 tests/002-define-chain.c diff --git a/tests/002-define-chain.c b/tests/002-define-chain.c new file mode 100644 index 0000000000..87d75c6875 --- /dev/null +++ b/tests/002-define-chain.c @@ -0,0 +1,3 @@ +#define foo 1 +#define bar foo +bar -- cgit v1.2.3 From c6d5af335121f6027cc46ef9c5aa77aa4e5906ca Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 11 May 2010 12:30:09 -0700 Subject: Fix to handle chained #define directives. The fix is as simple as adding a loop to continue to lookup values in the hash table until one of the following termination conditions: 1. The token we look up has no definition 2. We get back the original symbol we started with This second termination condition prevents infinite iteration. --- glcpp-parse.y | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index 89dc46497f..a3a661b8be 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -32,6 +32,9 @@ void yyerror (void *scanner, const char *error); +const char * +_resolve_token (glcpp_parser_t *parser, const char *token); + %} %parse-param {glcpp_parser_t *parser} @@ -59,14 +62,7 @@ directive: DEFINE IDENTIFIER DEFVAL { } ; -token: TOKEN { - char *value = hash_table_find (parser->defines, $1); - if (value) - printf ("%s", value); - else - printf ("%s", $1); - free ($1); -} +token: TOKEN { printf ("%s", _resolve_token (parser, $1)); free ($1); } ; %% @@ -97,3 +93,22 @@ glcpp_parser_fini (glcpp_parser_t *parser) yylex_destroy (parser->scanner); hash_table_dtor (parser->defines); } + +const char * +_resolve_token (glcpp_parser_t *parser, const char *token) +{ + const char *orig = token; + const char *replacement; + + while (1) { + replacement = hash_table_find (parser->defines, token); + if (replacement == NULL) + break; + token = replacement; + if (strcmp (token, orig) == 0) + break; + } + + return token; +} + -- cgit v1.2.3 From 34db0d332e0a1477971b7c29c18899e7264f9bce Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 11 May 2010 12:35:06 -0700 Subject: Add a couple more tests for chained #define directives. One with the chained defines in the opposite order, and one with the potential to trigger an infinite-loop bug through mutual recursion. Each of these tests pass already. --- tests/003-define-chain-reverse.c | 3 +++ tests/004-define-recursive.c | 6 ++++++ 2 files changed, 9 insertions(+) create mode 100644 tests/003-define-chain-reverse.c create mode 100644 tests/004-define-recursive.c diff --git a/tests/003-define-chain-reverse.c b/tests/003-define-chain-reverse.c new file mode 100644 index 0000000000..a18b724eca --- /dev/null +++ b/tests/003-define-chain-reverse.c @@ -0,0 +1,3 @@ +#define bar foo +#define foo 1 +bar diff --git a/tests/004-define-recursive.c b/tests/004-define-recursive.c new file mode 100644 index 0000000000..2ac56ea3dc --- /dev/null +++ b/tests/004-define-recursive.c @@ -0,0 +1,6 @@ +#define foo bar +#define bar baz +#define baz foo +foo +bar +baz -- cgit v1.2.3 From df2ab5b99237ab0b6760226554b133a5ccd11579 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 11 May 2010 12:39:29 -0700 Subject: Add tests defining a macro to be a literal and another macro. These 3 new tests are modeled after 3 existing tests but made slightly more complex since now instead of definining a new macro to be an existing macro, we define it to be replaced with two tokens, (one a literal, and one an existing macro). These tests all fail currently because the replacement lookup is currently happening on the basis of the entire replacement string rather than on a list of tokens. --- tests/005-define-composite-chain.c | 3 +++ tests/006-define-composite-chain-reverse.c | 3 +++ tests/007-define-composite-recursive.c | 6 ++++++ 3 files changed, 12 insertions(+) create mode 100644 tests/005-define-composite-chain.c create mode 100644 tests/006-define-composite-chain-reverse.c create mode 100644 tests/007-define-composite-recursive.c diff --git a/tests/005-define-composite-chain.c b/tests/005-define-composite-chain.c new file mode 100644 index 0000000000..f5521df968 --- /dev/null +++ b/tests/005-define-composite-chain.c @@ -0,0 +1,3 @@ +#define foo 1 +#define bar a foo +bar diff --git a/tests/006-define-composite-chain-reverse.c b/tests/006-define-composite-chain-reverse.c new file mode 100644 index 0000000000..4bb91a1221 --- /dev/null +++ b/tests/006-define-composite-chain-reverse.c @@ -0,0 +1,3 @@ +#define bar a foo +#define foo 1 +bar diff --git a/tests/007-define-composite-recursive.c b/tests/007-define-composite-recursive.c new file mode 100644 index 0000000000..5784565bdf --- /dev/null +++ b/tests/007-define-composite-recursive.c @@ -0,0 +1,6 @@ +#define foo a bar +#define bar b baz +#define baz c foo +foo +bar +baz -- cgit v1.2.3 From 33cc400714f379ef13e876b4aedd0de8cb5d033d Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 12 May 2010 12:17:10 -0700 Subject: Fix defines involving both literals and other defined macros. We now store a list of tokens in our hash-table rather than a single string. This lets us replace each macro in the value as necessary. This code adds a link dependency on talloc which does exactly what we want in terms of memory management for a parser. The 3 tests added in the previous commit now pass. --- Makefile | 7 +++ glcpp-lex.l | 34 ++++++++--- glcpp-parse.y | 179 +++++++++++++++++++++++++++++++++++++++++++++++----------- glcpp.c | 10 ++-- glcpp.h | 25 +++++--- 5 files changed, 202 insertions(+), 53 deletions(-) diff --git a/Makefile b/Makefile index 38cc1f314a..83519328bf 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,13 @@ +# Debug symbols by default, but let the user avoid that with something +# like "make CFLAGS=-O2" +CFLAGS = -g + +# But we use 'override' here so that "make CFLAGS=-O2" will still have +# all the warnings enabled. override CFLAGS += -Wall -Wextra -Wwrite-strings -Wswitch-enum -Wno-unused glcpp: glcpp.o glcpp-lex.o glcpp-parse.o hash_table.o + gcc -o $@ -ltalloc $^ %.c %.h: %.y bison --debug --defines=$*.h --output=$*.c $^ diff --git a/glcpp-lex.l b/glcpp-lex.l index a220fef76b..f1a3560779 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -36,22 +36,40 @@ SPACE [[:space:]] NONSPACE [^[:space:]] -NOTNEWLINE [^\n] +NEWLINE [\n] HSPACE [ \t] HASH ^{HSPACE}*# IDENTIFIER [_a-zA-Z][_a-zA-Z0-9]* -DEFVAL {NONSPACE}{NOTNEWLINE}* +TOKEN {NONSPACE}+ + %% -{HASH}define { BEGIN ST_DEFINE; return DEFINE; } +{HASH}define{HSPACE}* { + BEGIN ST_DEFINE; + return DEFINE; +} + +{IDENTIFIER} { + yylval.str = strdup (yytext); + return IDENTIFIER; +} + +{TOKEN} { + yylval.str = strdup (yytext); + return TOKEN; +} -{HSPACE}+ -{IDENTIFIER} { BEGIN ST_DEFVAL; yylval = strdup (yytext); return IDENTIFIER; } +\n { + BEGIN INITIAL; + return NEWLINE; +} -{SPACE}+ -{DEFVAL} { BEGIN INITIAL; yylval = strdup (yytext); return DEFVAL; } +{SPACE}+ /* Anything we don't specifically recognize is a stream of tokens */ -{NONSPACE}+ { yylval = strdup (yytext); return TOKEN; } +{NONSPACE}+ { + yylval.str = strdup (yytext); + return TOKEN; +} %% diff --git a/glcpp-parse.y b/glcpp-parse.y index a3a661b8be..eae96efb30 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -24,61 +24,158 @@ #include #include +#include #include "glcpp.h" #define YYLEX_PARAM parser->scanner +struct glcpp_parser { + yyscan_t scanner; + struct hash_table *defines; +}; + void yyerror (void *scanner, const char *error); -const char * -_resolve_token (glcpp_parser_t *parser, const char *token); +void +_print_resolved_token (glcpp_parser_t *parser, const char *token); + +list_t * +_list_create (void *ctx); + +void +_list_append (list_t *list, const char *str); %} +%union { + char *str; + list_t *list; +} + %parse-param {glcpp_parser_t *parser} %lex-param {void *scanner} -%token DEFINE -%token DEFVAL -%token IDENTIFIER -%token TOKEN +%token DEFINE IDENTIFIER NEWLINE TOKEN +%type token IDENTIFIER TOKEN +%type replacement_list %% -input: /* empty */ - | content +input: + /* empty */ +| content ; -content: token - | directive - | content token - | content directive +content: + token { + _print_resolved_token (parser, $1); + free ($1); + } +| directive +| content token { + _print_resolved_token (parser, $2); + free ($2); + } +| content directive ; -directive: DEFINE IDENTIFIER DEFVAL { - hash_table_insert (parser->defines, $3, $2); -} +directive: + DEFINE IDENTIFIER replacement_list NEWLINE { + char *key = talloc_strdup ($3, $2); + free ($2); + hash_table_insert (parser->defines, $3, key); + printf ("\n"); + } ; -token: TOKEN { printf ("%s", _resolve_token (parser, $1)); free ($1); } +replacement_list: + /* empty */ { + $$ = _list_create (parser); + } + +| replacement_list token { + _list_append ($1, $2); + free ($2); + $$ = $1; + } +; + +token: + TOKEN { $$ = $1; } +| IDENTIFIER { $$ = $1; } ; %% +list_t * +_list_create (void *ctx) +{ + list_t *list; + + list = talloc (ctx, list_t); + if (list == NULL) { + fprintf (stderr, "Out of memory.\n"); + exit (1); + } + + list->head = NULL; + list->tail = NULL; + + return list; +} + +void +_list_append (list_t *list, const char *str) +{ + node_t *node; + + node = talloc (list, node_t); + if (node == NULL) { + fprintf (stderr, "Out of memory.\n"); + exit (1); + } + + node->str = talloc_strdup (node, str); + if (node->str == NULL) { + fprintf (stderr, "Out of memory.\n"); + exit (1); + } + + node->next = NULL; + + if (list->head == NULL) { + list->head = node; + } else { + list->tail->next = node; + } + + list->tail = node; +} + void yyerror (void *scanner, const char *error) { fprintf (stderr, "Parse error: %s\n", error); } -void -glcpp_parser_init (glcpp_parser_t *parser) +glcpp_parser_t * +glcpp_parser_create (void) { + glcpp_parser_t *parser; + + parser = talloc (NULL, glcpp_parser_t); + if (parser == NULL) { + fprintf (stderr, "Out of memory.\n"); + exit (1); + } + yylex_init (&parser->scanner); parser->defines = hash_table_ctor (32, hash_table_string_hash, hash_table_string_compare); + + return parser; } int @@ -88,27 +185,43 @@ glcpp_parser_parse (glcpp_parser_t *parser) } void -glcpp_parser_fini (glcpp_parser_t *parser) +glcpp_parser_destroy (glcpp_parser_t *parser) { yylex_destroy (parser->scanner); hash_table_dtor (parser->defines); + talloc_free (parser); } -const char * -_resolve_token (glcpp_parser_t *parser, const char *token) +static void +_print_resolved_recursive (glcpp_parser_t *parser, + const char *token, + const char *orig, + int *first) { - const char *orig = token; - const char *replacement; - - while (1) { - replacement = hash_table_find (parser->defines, token); - if (replacement == NULL) - break; - token = replacement; - if (strcmp (token, orig) == 0) - break; + list_t *replacement; + node_t *node; + + replacement = hash_table_find (parser->defines, token); + if (replacement == NULL) { + printf ("%s%s", *first ? "" : " ", token); + *first = 0; + } else { + for (node = replacement->head ; node ; node = node->next) { + token = node->str; + if (strcmp (token, orig) == 0) { + printf ("%s%s", *first ? "" : " ", token); + *first = 0; + } else { + _print_resolved_recursive (parser, token, orig, first); + } + } } - - return token; } +void +_print_resolved_token (glcpp_parser_t *parser, const char *token) +{ + int first = 1; + + _print_resolved_recursive (parser, token, token, &first); +} diff --git a/glcpp.c b/glcpp.c index d6c89df2f9..fcdc4ed8a0 100644 --- a/glcpp.c +++ b/glcpp.c @@ -23,17 +23,19 @@ #include "glcpp.h" +extern int yydebug; + int main (void) { - glcpp_parser_t parser; + glcpp_parser_t *parser; int ret; - glcpp_parser_init (&parser); + parser = glcpp_parser_create (); - ret = glcpp_parser_parse (&parser); + ret = glcpp_parser_parse (parser); - glcpp_parser_fini (&parser); + glcpp_parser_destroy (parser); return ret; } diff --git a/glcpp.h b/glcpp.h index 5278e1b971..6fea9333e8 100644 --- a/glcpp.h +++ b/glcpp.h @@ -26,22 +26,31 @@ #include "hash_table.h" -#define YYSTYPE char * #define yyscan_t void* -typedef struct { - yyscan_t scanner; - struct hash_table *defines; -} glcpp_parser_t; +/* Some data types used for parser value. */ -void -glcpp_parser_init (glcpp_parser_t *parser); + +typedef struct node { + const char *str; + struct node *next; +} node_t; + +typedef struct list { + node_t *head; + node_t *tail; +} list_t; + +typedef struct glcpp_parser glcpp_parser_t; + +glcpp_parser_t * +glcpp_parser_create (void); int glcpp_parser_parse (glcpp_parser_t *parser); void -glcpp_parser_fini (glcpp_parser_t *parser); +glcpp_parser_destroy (glcpp_parser_t *parser); /* Generated by glcpp-lex.l to glcpp-lex.c */ -- cgit v1.2.3 From 5070a20cd1e65d52856bd74558f9a34f8dca114f Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 12 May 2010 12:45:33 -0700 Subject: Convert lexer to talloc and add xtalloc wrappers. The lexer was previously using strdup (expecting the parser to free), but is now more consistent, easier to use, and slightly more efficent by using talloc along with the parser. Also, we add xtalloc and xtalloc_strdup wrappers around talloc and talloc_strdup to put all of the out-of-memory-checking code in one place. --- Makefile | 2 +- glcpp-lex.l | 7 ++++--- glcpp-parse.y | 39 ++++++++++----------------------------- glcpp.h | 12 +++++++++++- xtalloc.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 78 insertions(+), 34 deletions(-) create mode 100644 xtalloc.c diff --git a/Makefile b/Makefile index 83519328bf..7233150a80 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ CFLAGS = -g # all the warnings enabled. override CFLAGS += -Wall -Wextra -Wwrite-strings -Wswitch-enum -Wno-unused -glcpp: glcpp.o glcpp-lex.o glcpp-parse.o hash_table.o +glcpp: glcpp.o glcpp-lex.o glcpp-parse.o hash_table.o xtalloc.o gcc -o $@ -ltalloc $^ %.c %.h: %.y diff --git a/glcpp-lex.l b/glcpp-lex.l index f1a3560779..ec91538a73 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -30,6 +30,7 @@ %} %option reentrant noyywrap +%option extra-type="glcpp_parser_t *" %x ST_DEFINE %x ST_DEFVAL @@ -50,12 +51,12 @@ TOKEN {NONSPACE}+ } {IDENTIFIER} { - yylval.str = strdup (yytext); + yylval.str = xtalloc_strdup (yyextra, yytext); return IDENTIFIER; } {TOKEN} { - yylval.str = strdup (yytext); + yylval.str = xtalloc_strdup (yyextra, yytext); return TOKEN; } @@ -68,7 +69,7 @@ TOKEN {NONSPACE}+ /* Anything we don't specifically recognize is a stream of tokens */ {NONSPACE}+ { - yylval.str = strdup (yytext); + yylval.str = xtalloc_strdup (yyextra, yytext); return TOKEN; } diff --git a/glcpp-parse.y b/glcpp-parse.y index eae96efb30..1a7ec4970d 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -71,21 +71,20 @@ input: content: token { _print_resolved_token (parser, $1); - free ($1); + talloc_free ($1); } | directive | content token { _print_resolved_token (parser, $2); - free ($2); + talloc_free ($2); } | content directive ; directive: DEFINE IDENTIFIER replacement_list NEWLINE { - char *key = talloc_strdup ($3, $2); - free ($2); - hash_table_insert (parser->defines, $3, key); + talloc_steal ($3, $2); + hash_table_insert (parser->defines, $3, $2); printf ("\n"); } ; @@ -97,7 +96,7 @@ replacement_list: | replacement_list token { _list_append ($1, $2); - free ($2); + talloc_free ($2); $$ = $1; } ; @@ -114,12 +113,7 @@ _list_create (void *ctx) { list_t *list; - list = talloc (ctx, list_t); - if (list == NULL) { - fprintf (stderr, "Out of memory.\n"); - exit (1); - } - + list = xtalloc (ctx, list_t); list->head = NULL; list->tail = NULL; @@ -131,17 +125,8 @@ _list_append (list_t *list, const char *str) { node_t *node; - node = talloc (list, node_t); - if (node == NULL) { - fprintf (stderr, "Out of memory.\n"); - exit (1); - } - - node->str = talloc_strdup (node, str); - if (node->str == NULL) { - fprintf (stderr, "Out of memory.\n"); - exit (1); - } + node = xtalloc (list, node_t); + node->str = xtalloc_strdup (node, str); node->next = NULL; @@ -165,13 +150,9 @@ glcpp_parser_create (void) { glcpp_parser_t *parser; - parser = talloc (NULL, glcpp_parser_t); - if (parser == NULL) { - fprintf (stderr, "Out of memory.\n"); - exit (1); - } + parser = xtalloc (NULL, glcpp_parser_t); - yylex_init (&parser->scanner); + yylex_init_extra (parser, &parser->scanner); parser->defines = hash_table_ctor (32, hash_table_string_hash, hash_table_string_compare); diff --git a/glcpp.h b/glcpp.h index 6fea9333e8..8472570ccb 100644 --- a/glcpp.h +++ b/glcpp.h @@ -55,7 +55,7 @@ glcpp_parser_destroy (glcpp_parser_t *parser); /* Generated by glcpp-lex.l to glcpp-lex.c */ int -yylex_init (yyscan_t *scanner); +yylex_init_extra (glcpp_parser_t *parser, yyscan_t* scanner); int yylex (yyscan_t scanner); @@ -68,4 +68,14 @@ yylex_destroy (yyscan_t scanner); int yyparse (glcpp_parser_t *parser); +/* xtalloc - wrappers around talloc to check for out-of-memory */ + +#define xtalloc(ctx, type) (type *)xtalloc_named_const(ctx, sizeof(type), #type) + +void * +xtalloc_named_const (const void *context, size_t size, const char *name); + +char * +xtalloc_strdup (const void *t, const char *p); + #endif diff --git a/xtalloc.c b/xtalloc.c new file mode 100644 index 0000000000..849e12d349 --- /dev/null +++ b/xtalloc.c @@ -0,0 +1,52 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include + +void * +xtalloc_named_const (const void *context, size_t size, const char *name) +{ + void *ret; + + ret = talloc_named_const (context, size, name); + if (ret == NULL) { + fprintf (stderr, "Out of memory.\n"); + exit (1); + } + + return ret; +} + +char * +xtalloc_strdup (const void *t, const char *p) +{ + char *ret; + + ret = talloc_strdup (t, p); + if (ret == NULL) { + fprintf (stderr, "Out of memory.\n"); + exit (1); + } + + return ret; +} -- cgit v1.2.3 From 39cd7c2f2e2d27a93ad63191f02adb56be31c0ce Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 12 May 2010 12:49:07 -0700 Subject: Add test for an empty definition. Happily this one passes without needing any additional code. --- tests/008-define-empty.c | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 tests/008-define-empty.c diff --git a/tests/008-define-empty.c b/tests/008-define-empty.c new file mode 100644 index 0000000000..b1bd17ec21 --- /dev/null +++ b/tests/008-define-empty.c @@ -0,0 +1,2 @@ +#define foo +foo -- cgit v1.2.3 From 7bdd1f36d9f238e6af4846d46b9dd30fffc772a5 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 12 May 2010 12:51:31 -0700 Subject: Add test for #undef. Which hasn't been implemented yet, so this test fails. --- tests/009-undef.c | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 tests/009-undef.c diff --git a/tests/009-undef.c b/tests/009-undef.c new file mode 100644 index 0000000000..3fc1fb4424 --- /dev/null +++ b/tests/009-undef.c @@ -0,0 +1,4 @@ +#define foo 1 +foo +#undef foo +foo -- cgit v1.2.3 From cd27e6413a683d3ba1763ec68edfb1ff13193fc3 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 12 May 2010 13:11:50 -0700 Subject: Add support for the #undef macro. This isn't ideal for two reasons: 1. There's a bunch of stateful redundancy in the lexer that should be cleaned up. 2. The hash table does not provide a mechanism to delete an entry, so we waste memory to add a new NULL entry in front of the existing entry with the same key. But this does at least work, (it passes the recently added undef test case). --- glcpp-lex.l | 19 ++++++++++++++++++- glcpp-parse.y | 26 +++++++++++++++++++++----- 2 files changed, 39 insertions(+), 6 deletions(-) diff --git a/glcpp-lex.l b/glcpp-lex.l index ec91538a73..9ec4deb718 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -33,7 +33,7 @@ %option extra-type="glcpp_parser_t *" %x ST_DEFINE -%x ST_DEFVAL +%x ST_UNDEF SPACE [[:space:]] NONSPACE [^[:space:]] @@ -67,6 +67,23 @@ TOKEN {NONSPACE}+ {SPACE}+ +{HASH}undef{HSPACE}* { + BEGIN ST_UNDEF; + return UNDEF; +} + +{IDENTIFIER} { + yylval.str = xtalloc_strdup (yyextra, yytext); + return IDENTIFIER; +} + +\n { + BEGIN INITIAL; + return NEWLINE; +} + +{SPACE}+ + /* Anything we don't specifically recognize is a stream of tokens */ {NONSPACE}+ { yylval.str = xtalloc_strdup (yyextra, yytext); diff --git a/glcpp-parse.y b/glcpp-parse.y index 1a7ec4970d..29614fb1a4 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -57,7 +57,7 @@ _list_append (list_t *list, const char *str); %parse-param {glcpp_parser_t *parser} %lex-param {void *scanner} -%token DEFINE IDENTIFIER NEWLINE TOKEN +%token DEFINE IDENTIFIER NEWLINE TOKEN UNDEF %type token IDENTIFIER TOKEN %type replacement_list @@ -73,19 +73,35 @@ content: _print_resolved_token (parser, $1); talloc_free ($1); } -| directive +| directive_with_newline | content token { _print_resolved_token (parser, $2); talloc_free ($2); } -| content directive +| content directive_with_newline +; + +directive_with_newline: + directive NEWLINE { + printf ("\n"); + } ; directive: - DEFINE IDENTIFIER replacement_list NEWLINE { + DEFINE IDENTIFIER replacement_list { talloc_steal ($3, $2); hash_table_insert (parser->defines, $3, $2); - printf ("\n"); + } +| UNDEF IDENTIFIER { + list_t *replacement = hash_table_find (parser->defines, $2); + if (replacement) { + /* XXX: Need hash table to support a real way + * to remove an element rather than prefixing + * a new node with data of NULL like this. */ + hash_table_insert (parser->defines, NULL, $2); + talloc_free (replacement); + } + talloc_free ($2); } ; -- cgit v1.2.3 From a68e668b17a00ed5714cdb1e7809b7ba4522d89d Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 12 May 2010 13:14:08 -0700 Subject: Add test case to define, undef, and then again define a macro. Happily, this is another test case that works just fine without any additional code. --- tests/010-undef-re-define.c | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 tests/010-undef-re-define.c diff --git a/tests/010-undef-re-define.c b/tests/010-undef-re-define.c new file mode 100644 index 0000000000..32ff73798b --- /dev/null +++ b/tests/010-undef-re-define.c @@ -0,0 +1,6 @@ +#define foo 1 +foo +#undef foo +foo +#define foo 2 +foo -- cgit v1.2.3 From 012295f94c4b02d2683072d9aa6ab56f81409507 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 12 May 2010 13:19:23 -0700 Subject: Simplify lexer significantly (remove all stateful lexing). We are able to remove all state by simply passing NEWLINE through as a token unconditionally (as opposed to only passing newline when on a driective line as we did previously). --- glcpp-lex.l | 41 +++++++++-------------------------------- glcpp-parse.y | 6 ++++++ 2 files changed, 15 insertions(+), 32 deletions(-) diff --git a/glcpp-lex.l b/glcpp-lex.l index 9ec4deb718..18d9050d71 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -32,9 +32,6 @@ %option reentrant noyywrap %option extra-type="glcpp_parser_t *" -%x ST_DEFINE -%x ST_UNDEF - SPACE [[:space:]] NONSPACE [^[:space:]] NEWLINE [\n] @@ -46,48 +43,28 @@ TOKEN {NONSPACE}+ %% {HASH}define{HSPACE}* { - BEGIN ST_DEFINE; return DEFINE; } -{IDENTIFIER} { - yylval.str = xtalloc_strdup (yyextra, yytext); - return IDENTIFIER; -} - -{TOKEN} { - yylval.str = xtalloc_strdup (yyextra, yytext); - return TOKEN; -} - -\n { - BEGIN INITIAL; - return NEWLINE; -} - -{SPACE}+ - {HASH}undef{HSPACE}* { - BEGIN ST_UNDEF; return UNDEF; } -{IDENTIFIER} { + +{IDENTIFIER} { yylval.str = xtalloc_strdup (yyextra, yytext); return IDENTIFIER; } -\n { - BEGIN INITIAL; - return NEWLINE; -} - -{SPACE}+ - - /* Anything we don't specifically recognize is a stream of tokens */ -{NONSPACE}+ { +{TOKEN} { yylval.str = xtalloc_strdup (yyextra, yytext); return TOKEN; } +\n { + return NEWLINE; +} + +{SPACE}+ + %% diff --git a/glcpp-parse.y b/glcpp-parse.y index 29614fb1a4..9883a6f953 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -74,11 +74,17 @@ content: talloc_free ($1); } | directive_with_newline +| NEWLINE { + printf ("\n"); + } | content token { _print_resolved_token (parser, $2); talloc_free ($2); } | content directive_with_newline +| content NEWLINE { + printf ("\n"); + } ; directive_with_newline: -- cgit v1.2.3 From 8bcb6f1777ff8f763c67552c111ce8e637d78410 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 12 May 2010 13:21:20 -0700 Subject: Remove some redundancy in the top-level production. Previously we had two copies of all top-level actions, (once in a list context and once in a non-list context). Much simpler to instead have a single list-context production with no action and then only have the actions in their own non-list contexts. --- glcpp-parse.y | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index 9883a6f953..91fc5b98fc 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -65,7 +65,7 @@ _list_append (list_t *list, const char *str); input: /* empty */ -| content +| input content ; content: @@ -77,14 +77,6 @@ content: | NEWLINE { printf ("\n"); } -| content token { - _print_resolved_token (parser, $2); - talloc_free ($2); - } -| content directive_with_newline -| content NEWLINE { - printf ("\n"); - } ; directive_with_newline: -- cgit v1.2.3 From 9f62a7e9e25efd79ebf46c64166876436f88f08a Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 13 May 2010 07:38:29 -0700 Subject: Make the lexer distinguish between identifiers and defined macros. This is just a minor style improvement for now. But the same mechanism, (having the lexer peek into the table of defined macros), will be essential when we add function-like macros in addition to the current object-like macros. --- glcpp-lex.l | 5 ++++- glcpp-parse.y | 51 ++++++++++++++++++++++++++++++++++----------------- glcpp.h | 4 ++++ 3 files changed, 42 insertions(+), 18 deletions(-) diff --git a/glcpp-lex.l b/glcpp-lex.l index 18d9050d71..3622db939e 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -53,7 +53,10 @@ TOKEN {NONSPACE}+ {IDENTIFIER} { yylval.str = xtalloc_strdup (yyextra, yytext); - return IDENTIFIER; + if (glcpp_parser_macro_defined (yyextra, yylval.str)) + return MACRO; + else + return IDENTIFIER; } {TOKEN} { diff --git a/glcpp-parse.y b/glcpp-parse.y index 91fc5b98fc..4d6475497b 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -39,7 +39,7 @@ void yyerror (void *scanner, const char *error); void -_print_resolved_token (glcpp_parser_t *parser, const char *token); +_print_expanded_macro (glcpp_parser_t *parser, const char *macro); list_t * _list_create (void *ctx); @@ -57,8 +57,8 @@ _list_append (list_t *list, const char *str); %parse-param {glcpp_parser_t *parser} %lex-param {void *scanner} -%token DEFINE IDENTIFIER NEWLINE TOKEN UNDEF -%type token IDENTIFIER TOKEN +%token DEFINE IDENTIFIER MACRO NEWLINE TOKEN UNDEF +%type IDENTIFIER MACRO TOKEN string %type replacement_list %% @@ -69,8 +69,16 @@ input: ; content: - token { - _print_resolved_token (parser, $1); + IDENTIFIER { + printf ("%s", $1); + talloc_free ($1); + } +| TOKEN { + printf ("%s", $1); + talloc_free ($1); + } +| MACRO { + _print_expanded_macro (parser, $1); talloc_free ($1); } | directive_with_newline @@ -90,7 +98,7 @@ directive: talloc_steal ($3, $2); hash_table_insert (parser->defines, $3, $2); } -| UNDEF IDENTIFIER { +| UNDEF MACRO { list_t *replacement = hash_table_find (parser->defines, $2); if (replacement) { /* XXX: Need hash table to support a real way @@ -108,16 +116,17 @@ replacement_list: $$ = _list_create (parser); } -| replacement_list token { +| replacement_list string { _list_append ($1, $2); talloc_free ($2); $$ = $1; } ; -token: - TOKEN { $$ = $1; } -| IDENTIFIER { $$ = $1; } +string: + IDENTIFIER { $$ = $1; } +| MACRO { $$ = $1; } +| TOKEN { $$ = $1; } ; %% @@ -187,11 +196,17 @@ glcpp_parser_destroy (glcpp_parser_t *parser) talloc_free (parser); } +int +glcpp_parser_macro_defined (glcpp_parser_t *parser, const char *identifier) +{ + return (hash_table_find (parser->defines, identifier) != NULL); +} + static void -_print_resolved_recursive (glcpp_parser_t *parser, - const char *token, - const char *orig, - int *first) +_print_expanded_macro_recursive (glcpp_parser_t *parser, + const char *token, + const char *orig, + int *first) { list_t *replacement; node_t *node; @@ -207,16 +222,18 @@ _print_resolved_recursive (glcpp_parser_t *parser, printf ("%s%s", *first ? "" : " ", token); *first = 0; } else { - _print_resolved_recursive (parser, token, orig, first); + _print_expanded_macro_recursive (parser, + token, orig, + first); } } } } void -_print_resolved_token (glcpp_parser_t *parser, const char *token) +_print_expanded_macro (glcpp_parser_t *parser, const char *macro) { int first = 1; - _print_resolved_recursive (parser, token, token, &first); + _print_expanded_macro_recursive (parser, macro, macro, &first); } diff --git a/glcpp.h b/glcpp.h index 8472570ccb..39d6d5d0eb 100644 --- a/glcpp.h +++ b/glcpp.h @@ -52,6 +52,10 @@ glcpp_parser_parse (glcpp_parser_t *parser); void glcpp_parser_destroy (glcpp_parser_t *parser); +int +glcpp_parser_macro_defined (glcpp_parser_t *parser, + const char *identifier); + /* Generated by glcpp-lex.l to glcpp-lex.c */ int -- cgit v1.2.3 From 4abc3dec720933e78a266417cffb2ea7b16d497f Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 13 May 2010 09:34:21 -0700 Subject: Add tests for the structure of function-like macros. These test only the most basic aspect of parsing of function-like macros. Specifically, none of the definitions of these function like macros use the arguments of the function. No function-like macros are implemented yet, so all of these fail for now. --- tests/011-define-func-empty.c | 2 ++ tests/012-define-func-no-args.c | 2 ++ tests/013-define-func-1-arg-unused.c | 2 ++ tests/014-define-func-2-arg-unused.c | 2 ++ 4 files changed, 8 insertions(+) create mode 100644 tests/011-define-func-empty.c create mode 100644 tests/012-define-func-no-args.c create mode 100644 tests/013-define-func-1-arg-unused.c create mode 100644 tests/014-define-func-2-arg-unused.c diff --git a/tests/011-define-func-empty.c b/tests/011-define-func-empty.c new file mode 100644 index 0000000000..d9ce13c228 --- /dev/null +++ b/tests/011-define-func-empty.c @@ -0,0 +1,2 @@ +#define foo() +foo() diff --git a/tests/012-define-func-no-args.c b/tests/012-define-func-no-args.c new file mode 100644 index 0000000000..c2bb730b11 --- /dev/null +++ b/tests/012-define-func-no-args.c @@ -0,0 +1,2 @@ +#define foo() bar +foo() diff --git a/tests/013-define-func-1-arg-unused.c b/tests/013-define-func-1-arg-unused.c new file mode 100644 index 0000000000..f78fb8b118 --- /dev/null +++ b/tests/013-define-func-1-arg-unused.c @@ -0,0 +1,2 @@ +#define foo(x) 1 +foo(bar) diff --git a/tests/014-define-func-2-arg-unused.c b/tests/014-define-func-2-arg-unused.c new file mode 100644 index 0000000000..11feb2624b --- /dev/null +++ b/tests/014-define-func-2-arg-unused.c @@ -0,0 +1,2 @@ +#define foo(x,y) 1 +foo(bar,baz) -- cgit v1.2.3 From fcbbb4688641e46270ba0cd531639df9b964f697 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 13 May 2010 09:36:23 -0700 Subject: Add support for the structure of function-like macros. We accept the structure of arguments in both macro definition and macro invocation, but we don't yet expand those arguments. This is just enough code to pass the recently-added tests, but does not yet provide any sort of useful function-like macro. --- Makefile | 2 +- glcpp-lex.l | 20 ++++-- glcpp-parse.y | 214 +++++++++++++++++++++++++++++++++++++++++++++++++++------- glcpp.h | 12 +++- 4 files changed, 214 insertions(+), 34 deletions(-) diff --git a/Makefile b/Makefile index 7233150a80..c5472a86b3 100644 --- a/Makefile +++ b/Makefile @@ -22,4 +22,4 @@ test: clean: rm -f glcpp-lex.c glcpp-parse.c *.o *~ - rm -f tests/*.out tests/*.gcc tests/*.expected + rm -f tests/*.out tests/*.gcc tests/*.expected tests/*~ diff --git a/glcpp-lex.l b/glcpp-lex.l index 3622db939e..c6e545aa8e 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -38,7 +38,7 @@ NEWLINE [\n] HSPACE [ \t] HASH ^{HSPACE}*# IDENTIFIER [_a-zA-Z][_a-zA-Z0-9]* -TOKEN {NONSPACE}+ +TOKEN [^[:space:](),]+ %% @@ -53,12 +53,22 @@ TOKEN {NONSPACE}+ {IDENTIFIER} { yylval.str = xtalloc_strdup (yyextra, yytext); - if (glcpp_parser_macro_defined (yyextra, yylval.str)) - return MACRO; - else - return IDENTIFIER; + switch (glcpp_parser_macro_type (yyextra, yylval.str)) + { + case MACRO_TYPE_UNDEFINED: + return IDENTIFIER; + break; + case MACRO_TYPE_OBJECT: + return OBJ_MACRO; + break; + case MACRO_TYPE_FUNCTION: + return FUNC_MACRO; + break; + } } +[(),] { return yytext[0]; } + {TOKEN} { yylval.str = xtalloc_strdup (yyextra, yytext); return TOKEN; diff --git a/glcpp-parse.y b/glcpp-parse.y index 4d6475497b..2e40db525b 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -24,12 +24,19 @@ #include #include +#include #include #include "glcpp.h" #define YYLEX_PARAM parser->scanner +typedef struct { + int is_function; + list_t *parameter_list; + list_t *replacement_list; +} macro_t; + struct glcpp_parser { yyscan_t scanner; struct hash_table *defines; @@ -39,13 +46,32 @@ void yyerror (void *scanner, const char *error); void -_print_expanded_macro (glcpp_parser_t *parser, const char *macro); +_define_object_macro (glcpp_parser_t *parser, + const char *macro, + list_t *replacement_list); + +void +_define_function_macro (glcpp_parser_t *parser, + const char *macro, + list_t *parameter_list, + list_t *replacement_list); + +void +_print_expanded_object_macro (glcpp_parser_t *parser, const char *macro); + +void +_print_expanded_function_macro (glcpp_parser_t *parser, + const char *macro, + list_t *arguments); list_t * _list_create (void *ctx); void -_list_append (list_t *list, const char *str); +_list_append_item (list_t *list, const char *str); + +void +_list_append_list (list_t *list, list_t *tail); %} @@ -57,9 +83,9 @@ _list_append (list_t *list, const char *str); %parse-param {glcpp_parser_t *parser} %lex-param {void *scanner} -%token DEFINE IDENTIFIER MACRO NEWLINE TOKEN UNDEF -%type IDENTIFIER MACRO TOKEN string -%type replacement_list +%token DEFINE FUNC_MACRO IDENTIFIER NEWLINE OBJ_MACRO TOKEN UNDEF +%type FUNC_MACRO IDENTIFIER OBJ_MACRO TOKEN string +%type argument argument_list parameter_list replacement_list %% @@ -77,16 +103,48 @@ content: printf ("%s", $1); talloc_free ($1); } -| MACRO { - _print_expanded_macro (parser, $1); - talloc_free ($1); - } +| macro | directive_with_newline | NEWLINE { printf ("\n"); } ; +macro: + FUNC_MACRO '(' argument_list ')' { + _print_expanded_function_macro (parser, $1, $3); + } +| OBJ_MACRO { + _print_expanded_object_macro (parser, $1); + talloc_free ($1); + } +; + +argument_list: + /* empty */ { + $$ = _list_create (parser); + } +| argument { + $$ = _list_create (parser); + _list_append_list ($$, $1); + } +| argument_list ',' argument { + _list_append_list ($1, $3); + $$ = $1; + } +; + +argument: + /* empty */ { + $$ = _list_create (parser); + } +| argument string { + _list_append_item ($1, $2); + talloc_free ($2); + } +| argument '(' argument ')' +; + directive_with_newline: directive NEWLINE { printf ("\n"); @@ -95,10 +153,23 @@ directive_with_newline: directive: DEFINE IDENTIFIER replacement_list { - talloc_steal ($3, $2); - hash_table_insert (parser->defines, $3, $2); + _define_object_macro (parser, $2, $3); + } +| DEFINE IDENTIFIER '(' parameter_list ')' replacement_list { + _define_function_macro (parser, $2, $4, $6); + } +| UNDEF FUNC_MACRO { + list_t *replacement = hash_table_find (parser->defines, $2); + if (replacement) { + /* XXX: Need hash table to support a real way + * to remove an element rather than prefixing + * a new node with data of NULL like this. */ + hash_table_insert (parser->defines, NULL, $2); + talloc_free (replacement); + } + talloc_free ($2); } -| UNDEF MACRO { +| UNDEF OBJ_MACRO { list_t *replacement = hash_table_find (parser->defines, $2); if (replacement) { /* XXX: Need hash table to support a real way @@ -115,17 +186,33 @@ replacement_list: /* empty */ { $$ = _list_create (parser); } - | replacement_list string { - _list_append ($1, $2); + _list_append_item ($1, $2); talloc_free ($2); $$ = $1; } ; +parameter_list: + /* empty */ { + $$ = _list_create (parser); + } +| IDENTIFIER { + $$ = _list_create (parser); + _list_append_item ($$, $1); + talloc_free ($1); + } +| parameter_list ',' IDENTIFIER { + _list_append_item ($1, $3); + talloc_free ($3); + $$ = $1; + } +; + string: IDENTIFIER { $$ = $1; } -| MACRO { $$ = $1; } +| FUNC_MACRO { $$ = $1; } +| OBJ_MACRO { $$ = $1; } | TOKEN { $$ = $1; } ; @@ -144,7 +231,19 @@ _list_create (void *ctx) } void -_list_append (list_t *list, const char *str) +_list_append_list (list_t *list, list_t *tail) +{ + if (list->head == NULL) { + list->head = tail->head; + } else { + list->tail->next = tail->head; + } + + list->tail = tail->tail; +} + +void +_list_append_item (list_t *list, const char *str) { node_t *node; @@ -196,10 +295,20 @@ glcpp_parser_destroy (glcpp_parser_t *parser) talloc_free (parser); } -int -glcpp_parser_macro_defined (glcpp_parser_t *parser, const char *identifier) +macro_type_t +glcpp_parser_macro_type (glcpp_parser_t *parser, const char *identifier) { - return (hash_table_find (parser->defines, identifier) != NULL); + macro_t *macro; + + macro = hash_table_find (parser->defines, identifier); + + if (macro == NULL) + return MACRO_TYPE_UNDEFINED; + + if (macro->is_function) + return MACRO_TYPE_FUNCTION; + else + return MACRO_TYPE_OBJECT; } static void @@ -208,15 +317,17 @@ _print_expanded_macro_recursive (glcpp_parser_t *parser, const char *orig, int *first) { - list_t *replacement; + macro_t *macro; node_t *node; - replacement = hash_table_find (parser->defines, token); - if (replacement == NULL) { + macro = hash_table_find (parser->defines, token); + if (macro == NULL) { printf ("%s%s", *first ? "" : " ", token); *first = 0; } else { - for (node = replacement->head ; node ; node = node->next) { + list_t *replacement_list = macro->replacement_list; + + for (node = replacement_list->head ; node ; node = node->next) { token = node->str; if (strcmp (token, orig) == 0) { printf ("%s%s", *first ? "" : " ", token); @@ -231,9 +342,62 @@ _print_expanded_macro_recursive (glcpp_parser_t *parser, } void -_print_expanded_macro (glcpp_parser_t *parser, const char *macro) +_define_object_macro (glcpp_parser_t *parser, + const char *identifier, + list_t *replacement_list) +{ + macro_t *macro; + + macro = xtalloc (parser, macro_t); + + macro->is_function = 0; + macro->parameter_list = NULL; + macro->replacement_list = talloc_steal (macro, replacement_list); + + hash_table_insert (parser->defines, macro, identifier); +} + +void +_define_function_macro (glcpp_parser_t *parser, + const char *identifier, + list_t *parameter_list, + list_t *replacement_list) +{ + macro_t *macro; + + macro = xtalloc (parser, macro_t); + + macro->is_function = 1; + macro->parameter_list = talloc_steal (macro, parameter_list); + macro->replacement_list = talloc_steal (macro, replacement_list); + + hash_table_insert (parser->defines, macro, identifier); +} + +void +_print_expanded_object_macro (glcpp_parser_t *parser, const char *identifier) { int first = 1; + macro_t *macro; + + macro = hash_table_find (parser->defines, identifier); + assert (! macro->is_function); + + _print_expanded_macro_recursive (parser, identifier, identifier, &first); +} + +void +_print_expanded_function_macro (glcpp_parser_t *parser, + const char *identifier, + list_t *arguments) +{ + int first = 1; + macro_t *macro; + + macro = hash_table_find (parser->defines, identifier); + assert (macro->is_function); + + /* XXX: Need to use argument list here in the expansion. */ - _print_expanded_macro_recursive (parser, macro, macro, &first); + _print_expanded_macro_recursive (parser, identifier, identifier, &first); } diff --git a/glcpp.h b/glcpp.h index 39d6d5d0eb..69b3b840ae 100644 --- a/glcpp.h +++ b/glcpp.h @@ -52,9 +52,15 @@ glcpp_parser_parse (glcpp_parser_t *parser); void glcpp_parser_destroy (glcpp_parser_t *parser); -int -glcpp_parser_macro_defined (glcpp_parser_t *parser, - const char *identifier); +typedef enum { + MACRO_TYPE_UNDEFINED, + MACRO_TYPE_OBJECT, + MACRO_TYPE_FUNCTION +} macro_type_t; + +macro_type_t +glcpp_parser_macro_type (glcpp_parser_t *parser, + const char *identifier); /* Generated by glcpp-lex.l to glcpp-lex.c */ -- cgit v1.2.3 From db35d557a40b9fb56483f77da2fb98f541808dd0 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Fri, 14 May 2010 08:47:32 -0700 Subject: Eliminate a reduce/reduce conflict in the function-like macro production. Previously, an empty argument could be parsed as either an "argument_list" directly or first as an "argument" and then an "argument_list". We fix this by removing the possibility of an empty "argument_list" directly. --- glcpp-parse.y | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index 2e40db525b..66725db69e 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -121,10 +121,7 @@ macro: ; argument_list: - /* empty */ { - $$ = _list_create (parser); - } -| argument { + argument { $$ = _list_create (parser); _list_append_list ($$, $1); } -- cgit v1.2.3 From 67c27afc168f85ce6dc66820db864aaaef67f8ed Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 13 May 2010 10:26:58 -0700 Subject: Add test for an object-like macro with a definition beginning with '(' Our current parser sees "#define foo (" as an identifier token followed by a '(' token and parses this as a function-like macro. That would be correct for "#define foo(" but the preprocessor specification treats this whitespace as significant here so this test currently fails. --- tests/015-define-object-with-parens.c | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 tests/015-define-object-with-parens.c diff --git a/tests/015-define-object-with-parens.c b/tests/015-define-object-with-parens.c new file mode 100644 index 0000000000..7dcadfa24f --- /dev/null +++ b/tests/015-define-object-with-parens.c @@ -0,0 +1,2 @@ +#define foo ( ) 1 +foo() -- cgit v1.2.3 From 0a93cbbe4f00e0bdd0c61119d3598e3a98a37505 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 13 May 2010 10:29:07 -0700 Subject: Fix parsing of object-like macro with a definition that begins with '('. Previously our parser was incorrectly treating this case as a function-like macro. We fix this by conditionally passing a SPACE token from the lexer, (but only immediately after the identifier immediately after #define). --- glcpp-lex.l | 41 ++++++++++++++++++++++++++++++++++++----- glcpp-parse.y | 32 ++++++++++++++++++++++---------- 2 files changed, 58 insertions(+), 15 deletions(-) diff --git a/glcpp-lex.l b/glcpp-lex.l index c6e545aa8e..3c9dda46d4 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -32,6 +32,9 @@ %option reentrant noyywrap %option extra-type="glcpp_parser_t *" +%x ST_DEFINE +%x ST_DEFVAL + SPACE [[:space:]] NONSPACE [^[:space:]] NEWLINE [\n] @@ -42,16 +45,42 @@ TOKEN [^[:space:](),]+ %% -{HASH}define{HSPACE}* { +{HASH}undef{HSPACE}* { + return UNDEF; +} + + /* We use the ST_DEFINE and ST_DEFVAL states so that we can + * pass a space token, (yes, a token for whitespace!), since + * the preprocessor specification requires distinguishing + * "#define foo()" from "#define foo ()". + */ +{HASH}define{HSPACE}* { + BEGIN ST_DEFINE; return DEFINE; } -{HASH}undef{HSPACE}* { - return UNDEF; +{IDENTIFIER} { + BEGIN ST_DEFVAL; + yylval.str = xtalloc_strdup (yyextra, yytext); + return IDENTIFIER; } +\n { + BEGIN INITIAL; + return NEWLINE; +} + +{HSPACE}+ { + BEGIN INITIAL; + return SPACE; +} -{IDENTIFIER} { +"(" { + BEGIN INITIAL; + return '('; +} + +{IDENTIFIER} { yylval.str = xtalloc_strdup (yyextra, yytext); switch (glcpp_parser_macro_type (yyextra, yylval.str)) { @@ -67,7 +96,9 @@ TOKEN [^[:space:](),]+ } } -[(),] { return yytext[0]; } +[(),] { + return yytext[0]; +} {TOKEN} { yylval.str = xtalloc_strdup (yyextra, yytext); diff --git a/glcpp-parse.y b/glcpp-parse.y index 66725db69e..dc352de55b 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -83,8 +83,8 @@ _list_append_list (list_t *list, list_t *tail); %parse-param {glcpp_parser_t *parser} %lex-param {void *scanner} -%token DEFINE FUNC_MACRO IDENTIFIER NEWLINE OBJ_MACRO TOKEN UNDEF -%type FUNC_MACRO IDENTIFIER OBJ_MACRO TOKEN string +%token DEFINE FUNC_MACRO IDENTIFIER NEWLINE OBJ_MACRO SPACE TOKEN UNDEF +%type FUNC_MACRO IDENTIFIER OBJ_MACRO TOKEN word word_or_symbol %type argument argument_list parameter_list replacement_list %% @@ -105,9 +105,10 @@ content: } | macro | directive_with_newline -| NEWLINE { - printf ("\n"); - } +| NEWLINE { printf ("\n"); } +| '(' { printf ("("); } +| ')' { printf (")"); } +| ',' { printf (","); } ; macro: @@ -135,7 +136,7 @@ argument: /* empty */ { $$ = _list_create (parser); } -| argument string { +| argument word { _list_append_item ($1, $2); talloc_free ($2); } @@ -149,8 +150,12 @@ directive_with_newline: ; directive: - DEFINE IDENTIFIER replacement_list { - _define_object_macro (parser, $2, $3); + DEFINE IDENTIFIER { + list_t *list = _list_create (parser); + _define_object_macro (parser, $2, list); + } +| DEFINE IDENTIFIER SPACE replacement_list { + _define_object_macro (parser, $2, $4); } | DEFINE IDENTIFIER '(' parameter_list ')' replacement_list { _define_function_macro (parser, $2, $4, $6); @@ -183,7 +188,7 @@ replacement_list: /* empty */ { $$ = _list_create (parser); } -| replacement_list string { +| replacement_list word_or_symbol { _list_append_item ($1, $2); talloc_free ($2); $$ = $1; @@ -206,7 +211,14 @@ parameter_list: } ; -string: +word_or_symbol: + word { $$ = $1; } +| '(' { $$ = xtalloc_strdup (parser, "("); } +| ')' { $$ = xtalloc_strdup (parser, ")"); } +| ',' { $$ = xtalloc_strdup (parser, ","); } +; + +word: IDENTIFIER { $$ = $1; } | FUNC_MACRO { $$ = $1; } | OBJ_MACRO { $$ = $1; } -- cgit v1.2.3 From 27bc8930ba9ba67f2de29a03232a948316409ded Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 13 May 2010 10:41:53 -0700 Subject: Add some whitespace variations to test 15. This shows two minor failures in our current parsing (resulting in whitespace-only changes, oso not that significant): 1. We are inserting extra whitespace between tokens not originally separated by whitespace in the replacement list of a macro definition. 2. We are swallowing whitespace separating tokens in the general content. --- tests/015-define-object-with-parens.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/015-define-object-with-parens.c b/tests/015-define-object-with-parens.c index 7dcadfa24f..10bf7e31a3 100644 --- a/tests/015-define-object-with-parens.c +++ b/tests/015-define-object-with-parens.c @@ -1,2 +1,4 @@ #define foo ( ) 1 foo() +#define bar () 2 +bar( ) -- cgit v1.2.3 From 462cce1852c80a2d71bfec1a2ead10fe0a9e2486 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 13 May 2010 10:45:32 -0700 Subject: Makefile: Make "make test" depend on the main program. Otherwise, running "make test" can run an old version of the code, (even when new changes are sitting in the source waiting to be compiled). --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index c5472a86b3..550945abd3 100644 --- a/Makefile +++ b/Makefile @@ -17,7 +17,7 @@ glcpp: glcpp.o glcpp-lex.o glcpp-parse.o hash_table.o xtalloc.o glcpp-lex.c: glcpp-parse.h -test: +test: glcpp @(cd tests; ./glcpp-test) clean: -- cgit v1.2.3 From 48b94da0994b44e41324a2419117dcd81facce8b Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 13 May 2010 10:46:29 -0700 Subject: Make the lexer return SPACE tokens unconditionally. It seems strange to always be returning SPACE tokens, but since we were already needing to return a SPACE token in some cases, this actually simplifies our lexer. This also allows us to fix two whitespace-handling differences compared to "gcc -E" so that now the recent modification to the test suite passes once again. --- glcpp-lex.l | 29 +++-------------------------- glcpp-parse.y | 37 +++++++++++++++++++------------------ 2 files changed, 22 insertions(+), 44 deletions(-) diff --git a/glcpp-lex.l b/glcpp-lex.l index 3c9dda46d4..21b9e3530a 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -32,9 +32,6 @@ %option reentrant noyywrap %option extra-type="glcpp_parser_t *" -%x ST_DEFINE -%x ST_DEFVAL - SPACE [[:space:]] NONSPACE [^[:space:]] NEWLINE [\n] @@ -55,31 +52,9 @@ TOKEN [^[:space:](),]+ * "#define foo()" from "#define foo ()". */ {HASH}define{HSPACE}* { - BEGIN ST_DEFINE; return DEFINE; } -{IDENTIFIER} { - BEGIN ST_DEFVAL; - yylval.str = xtalloc_strdup (yyextra, yytext); - return IDENTIFIER; -} - -\n { - BEGIN INITIAL; - return NEWLINE; -} - -{HSPACE}+ { - BEGIN INITIAL; - return SPACE; -} - -"(" { - BEGIN INITIAL; - return '('; -} - {IDENTIFIER} { yylval.str = xtalloc_strdup (yyextra, yytext); switch (glcpp_parser_macro_type (yyextra, yylval.str)) @@ -109,6 +84,8 @@ TOKEN [^[:space:](),]+ return NEWLINE; } -{SPACE}+ +{HSPACE}+ { + return SPACE; +} %% diff --git a/glcpp-parse.y b/glcpp-parse.y index dc352de55b..7d1c3ab927 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -109,6 +109,7 @@ content: | '(' { printf ("("); } | ')' { printf (")"); } | ',' { printf (","); } +| SPACE { printf (" "); } ; macro: @@ -157,8 +158,12 @@ directive: | DEFINE IDENTIFIER SPACE replacement_list { _define_object_macro (parser, $2, $4); } -| DEFINE IDENTIFIER '(' parameter_list ')' replacement_list { - _define_function_macro (parser, $2, $4, $6); +| DEFINE IDENTIFIER '(' parameter_list ')' { + list_t *list = _list_create (parser); + _define_function_macro (parser, $2, $4, list); + } +| DEFINE IDENTIFIER '(' parameter_list ')' SPACE replacement_list { + _define_function_macro (parser, $2, $4, $7); } | UNDEF FUNC_MACRO { list_t *replacement = hash_table_find (parser->defines, $2); @@ -185,8 +190,10 @@ directive: ; replacement_list: - /* empty */ { + word_or_symbol { $$ = _list_create (parser); + _list_append_item ($$, $1); + talloc_free ($1); } | replacement_list word_or_symbol { _list_append_item ($1, $2); @@ -216,6 +223,7 @@ word_or_symbol: | '(' { $$ = xtalloc_strdup (parser, "("); } | ')' { $$ = xtalloc_strdup (parser, ")"); } | ',' { $$ = xtalloc_strdup (parser, ","); } +| SPACE { $$ = xtalloc_strdup (parser, " "); } ; word: @@ -323,29 +331,24 @@ glcpp_parser_macro_type (glcpp_parser_t *parser, const char *identifier) static void _print_expanded_macro_recursive (glcpp_parser_t *parser, const char *token, - const char *orig, - int *first) + const char *orig) { macro_t *macro; node_t *node; macro = hash_table_find (parser->defines, token); if (macro == NULL) { - printf ("%s%s", *first ? "" : " ", token); - *first = 0; + printf ("%s", token); } else { list_t *replacement_list = macro->replacement_list; for (node = replacement_list->head ; node ; node = node->next) { token = node->str; - if (strcmp (token, orig) == 0) { - printf ("%s%s", *first ? "" : " ", token); - *first = 0; - } else { + if (strcmp (token, orig) == 0) + printf ("%s", token); + else _print_expanded_macro_recursive (parser, - token, orig, - first); - } + token, orig); } } } @@ -386,13 +389,12 @@ _define_function_macro (glcpp_parser_t *parser, void _print_expanded_object_macro (glcpp_parser_t *parser, const char *identifier) { - int first = 1; macro_t *macro; macro = hash_table_find (parser->defines, identifier); assert (! macro->is_function); - _print_expanded_macro_recursive (parser, identifier, identifier, &first); + _print_expanded_macro_recursive (parser, identifier, identifier); } void @@ -400,7 +402,6 @@ _print_expanded_function_macro (glcpp_parser_t *parser, const char *identifier, list_t *arguments) { - int first = 1; macro_t *macro; macro = hash_table_find (parser->defines, identifier); @@ -408,5 +409,5 @@ _print_expanded_function_macro (glcpp_parser_t *parser, /* XXX: Need to use argument list here in the expansion. */ - _print_expanded_macro_recursive (parser, identifier, identifier, &first); + _print_expanded_macro_recursive (parser, identifier, identifier); } -- cgit v1.2.3 From af71ba41bdecbe9f971752c32c514ca7b319f588 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 13 May 2010 12:54:17 -0700 Subject: Add tests exercising substitution of arguments in function-like macros. This capability is the only thing that makes function-like macros interesting. This isn't supported yet so these tests fail for now. --- tests/016-define-func-1-arg.c | 2 ++ tests/017-define-func-2-args.c | 2 ++ 2 files changed, 4 insertions(+) create mode 100644 tests/016-define-func-1-arg.c create mode 100644 tests/017-define-func-2-args.c diff --git a/tests/016-define-func-1-arg.c b/tests/016-define-func-1-arg.c new file mode 100644 index 0000000000..dea38d1fed --- /dev/null +++ b/tests/016-define-func-1-arg.c @@ -0,0 +1,2 @@ +#define foo(x) ((x) + 1) +foo(bar) diff --git a/tests/017-define-func-2-args.c b/tests/017-define-func-2-args.c new file mode 100644 index 0000000000..c725383527 --- /dev/null +++ b/tests/017-define-func-2-args.c @@ -0,0 +1,2 @@ +#define foo(x,y) ((x)*(y)) +foo(bar,baz) -- cgit v1.2.3 From dcc2ecd30d2ff68792f192c867b301a10872d86d Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 13 May 2010 12:56:42 -0700 Subject: Implement substitution of macro arguments. Making the two recently-added tests for this functionality now pass. --- glcpp-parse.y | 169 ++++++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 141 insertions(+), 28 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index 7d1c3ab927..4b4a754f82 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -73,6 +73,15 @@ _list_append_item (list_t *list, const char *str); void _list_append_list (list_t *list, list_t *tail); +int +_list_contains (list_t *list, const char *member, int *index); + +const char * +_list_member_at (list_t *list, int index); + +int +_list_length (list_t *list); + %} %union { @@ -277,6 +286,62 @@ _list_append_item (list_t *list, const char *str) list->tail = node; } + +int +_list_contains (list_t *list, const char *member, int *index) +{ + node_t *node; + int i; + + if (list == NULL) + return 0; + + for (i = 0, node = list->head; node; i++, node = node->next) { + if (strcmp (node->str, member) == 0) { + *index = i; + return 1; + } + } + + return 0; +} + +int +_list_length (list_t *list) +{ + int length = 0; + node_t *node; + + if (list == NULL) + return 0; + + for (node = list->head; node; node = node->next) + length++; + + return length; +} + +const char * +_list_member_at (list_t *list, int index) +{ + node_t *node; + int i; + + if (list == NULL) + return NULL; + + node = list->head; + for (i = 0; i < index; i++) { + node = node->next; + if (node == NULL) + break; + } + + if (node) + return node->str; + + return NULL; +} void yyerror (void *scanner, const char *error) @@ -328,31 +393,6 @@ glcpp_parser_macro_type (glcpp_parser_t *parser, const char *identifier) return MACRO_TYPE_OBJECT; } -static void -_print_expanded_macro_recursive (glcpp_parser_t *parser, - const char *token, - const char *orig) -{ - macro_t *macro; - node_t *node; - - macro = hash_table_find (parser->defines, token); - if (macro == NULL) { - printf ("%s", token); - } else { - list_t *replacement_list = macro->replacement_list; - - for (node = replacement_list->head ; node ; node = node->next) { - token = node->str; - if (strcmp (token, orig) == 0) - printf ("%s", token); - else - _print_expanded_macro_recursive (parser, - token, orig); - } - } -} - void _define_object_macro (glcpp_parser_t *parser, const char *identifier, @@ -386,6 +426,70 @@ _define_function_macro (glcpp_parser_t *parser, hash_table_insert (parser->defines, macro, identifier); } +static void +_print_expanded_macro_recursive (glcpp_parser_t *parser, + const char *token, + const char *orig, + list_t *parameters, + list_t *arguments); + +static void +_print_expanded_list_recursive (glcpp_parser_t *parser, + list_t *list, + const char *orig, + list_t *parameters, + list_t *arguments) +{ + const char *token; + node_t *node; + int index; + + for (node = list->head ; node ; node = node->next) { + token = node->str; + + if (strcmp (token, orig) == 0) { + printf ("%s", token); + continue; + } + + if (_list_contains (parameters, token, &index)) { + const char *argument; + + argument = _list_member_at (arguments, index); + _print_expanded_macro_recursive (parser, argument, + orig, parameters, + arguments); + } else { + _print_expanded_macro_recursive (parser, token, + orig, parameters, + arguments); + } + } +} + + +static void +_print_expanded_macro_recursive (glcpp_parser_t *parser, + const char *token, + const char *orig, + list_t *parameters, + list_t *arguments) +{ + macro_t *macro; + list_t *replacement_list; + + macro = hash_table_find (parser->defines, token); + if (macro == NULL) { + printf ("%s", token); + return; + } + + replacement_list = macro->replacement_list; + + _print_expanded_list_recursive (parser, replacement_list, + orig, parameters, arguments); +} + void _print_expanded_object_macro (glcpp_parser_t *parser, const char *identifier) { @@ -394,7 +498,8 @@ _print_expanded_object_macro (glcpp_parser_t *parser, const char *identifier) macro = hash_table_find (parser->defines, identifier); assert (! macro->is_function); - _print_expanded_macro_recursive (parser, identifier, identifier); + _print_expanded_macro_recursive (parser, identifier, identifier, + NULL, NULL); } void @@ -407,7 +512,15 @@ _print_expanded_function_macro (glcpp_parser_t *parser, macro = hash_table_find (parser->defines, identifier); assert (macro->is_function); - /* XXX: Need to use argument list here in the expansion. */ + if (_list_length (arguments) != _list_length (macro->parameter_list)) { + fprintf (stderr, + "Error: macro %s invoked with %d arguments (expected %d)\n", + identifier, + _list_length (arguments), + _list_length (macro->parameter_list)); + return; + } - _print_expanded_macro_recursive (parser, identifier, identifier); + _print_expanded_macro_recursive (parser, identifier, identifier, + macro->parameter_list, arguments); } -- cgit v1.2.3 From 30140733112b09d531d949a9bfbd9daf0cae4781 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 13 May 2010 12:57:34 -0700 Subject: Add test where a macro formal parameter is the same as an existing macro. This is a well-defined condition, but something that currently trips up the implementation. Should be easy to fix. --- tests/018-define-func-macro-as-parameter.c | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 tests/018-define-func-macro-as-parameter.c diff --git a/tests/018-define-func-macro-as-parameter.c b/tests/018-define-func-macro-as-parameter.c new file mode 100644 index 0000000000..668130b8f9 --- /dev/null +++ b/tests/018-define-func-macro-as-parameter.c @@ -0,0 +1,3 @@ +#define x 0 +#define foo(x) x +foo(1) -- cgit v1.2.3 From 7f9aa36bbcf457e1a221ab6447de3bec30908000 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 13 May 2010 12:58:49 -0700 Subject: Fix case of a macro formal parameter matching a defined macro. Simply need to allow for a macro name to appear in the parameter list. This makes the recently-added test pass. --- glcpp-parse.y | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index 4b4a754f82..1b6c939a26 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -93,7 +93,7 @@ _list_length (list_t *list); %lex-param {void *scanner} %token DEFINE FUNC_MACRO IDENTIFIER NEWLINE OBJ_MACRO SPACE TOKEN UNDEF -%type FUNC_MACRO IDENTIFIER OBJ_MACRO TOKEN word word_or_symbol +%type FUNC_MACRO IDENTIFIER identifier_perhaps_macro OBJ_MACRO TOKEN word word_or_symbol %type argument argument_list parameter_list replacement_list %% @@ -215,18 +215,24 @@ parameter_list: /* empty */ { $$ = _list_create (parser); } -| IDENTIFIER { +| identifier_perhaps_macro { $$ = _list_create (parser); _list_append_item ($$, $1); talloc_free ($1); } -| parameter_list ',' IDENTIFIER { +| parameter_list ',' identifier_perhaps_macro { _list_append_item ($1, $3); talloc_free ($3); $$ = $1; } ; +identifier_perhaps_macro: + IDENTIFIER { $$ = $1; } +| FUNC_MACRO { $$ = $1; } +| OBJ_MACRO { $$ = $1; } +; + word_or_symbol: word { $$ = $1; } | '(' { $$ = xtalloc_strdup (parser, "("); } -- cgit v1.2.3 From 610053b2c63fe6bc1d11347dc87e63d958b04dd8 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Fri, 14 May 2010 10:05:11 -0700 Subject: Rename list_t and node_t to string_list_t and string_node_t. We'll soon be adding other types of lists, so it will be helpful to have a qualified name here. --- glcpp-parse.y | 128 +++++++++++++++++++++++++++++----------------------------- glcpp.h | 14 +++---- 2 files changed, 71 insertions(+), 71 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index 1b6c939a26..3b97743085 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -33,8 +33,8 @@ typedef struct { int is_function; - list_t *parameter_list; - list_t *replacement_list; + string_list_t *parameter_list; + string_list_t *replacement_list; } macro_t; struct glcpp_parser { @@ -48,13 +48,13 @@ yyerror (void *scanner, const char *error); void _define_object_macro (glcpp_parser_t *parser, const char *macro, - list_t *replacement_list); + string_list_t *replacement_list); void _define_function_macro (glcpp_parser_t *parser, const char *macro, - list_t *parameter_list, - list_t *replacement_list); + string_list_t *parameter_list, + string_list_t *replacement_list); void _print_expanded_object_macro (glcpp_parser_t *parser, const char *macro); @@ -62,31 +62,31 @@ _print_expanded_object_macro (glcpp_parser_t *parser, const char *macro); void _print_expanded_function_macro (glcpp_parser_t *parser, const char *macro, - list_t *arguments); + string_list_t *arguments); -list_t * -_list_create (void *ctx); +string_list_t * +_string_list_create (void *ctx); void -_list_append_item (list_t *list, const char *str); +_string_list_append_item (string_list_t *list, const char *str); void -_list_append_list (list_t *list, list_t *tail); +_string_list_append_list (string_list_t *list, string_list_t *tail); int -_list_contains (list_t *list, const char *member, int *index); +_string_list_contains (string_list_t *list, const char *member, int *index); const char * -_list_member_at (list_t *list, int index); +_string_list_member_at (string_list_t *list, int index); int -_list_length (list_t *list); +_string_list_length (string_list_t *list); %} %union { char *str; - list_t *list; + string_list_t *list; } %parse-param {glcpp_parser_t *parser} @@ -133,21 +133,21 @@ macro: argument_list: argument { - $$ = _list_create (parser); - _list_append_list ($$, $1); + $$ = _string_list_create (parser); + _string_list_append_list ($$, $1); } | argument_list ',' argument { - _list_append_list ($1, $3); + _string_list_append_list ($1, $3); $$ = $1; } ; argument: /* empty */ { - $$ = _list_create (parser); + $$ = _string_list_create (parser); } | argument word { - _list_append_item ($1, $2); + _string_list_append_item ($1, $2); talloc_free ($2); } | argument '(' argument ')' @@ -161,21 +161,21 @@ directive_with_newline: directive: DEFINE IDENTIFIER { - list_t *list = _list_create (parser); + string_list_t *list = _string_list_create (parser); _define_object_macro (parser, $2, list); } | DEFINE IDENTIFIER SPACE replacement_list { _define_object_macro (parser, $2, $4); } | DEFINE IDENTIFIER '(' parameter_list ')' { - list_t *list = _list_create (parser); + string_list_t *list = _string_list_create (parser); _define_function_macro (parser, $2, $4, list); } | DEFINE IDENTIFIER '(' parameter_list ')' SPACE replacement_list { _define_function_macro (parser, $2, $4, $7); } | UNDEF FUNC_MACRO { - list_t *replacement = hash_table_find (parser->defines, $2); + string_list_t *replacement = hash_table_find (parser->defines, $2); if (replacement) { /* XXX: Need hash table to support a real way * to remove an element rather than prefixing @@ -186,7 +186,7 @@ directive: talloc_free ($2); } | UNDEF OBJ_MACRO { - list_t *replacement = hash_table_find (parser->defines, $2); + string_list_t *replacement = hash_table_find (parser->defines, $2); if (replacement) { /* XXX: Need hash table to support a real way * to remove an element rather than prefixing @@ -200,12 +200,12 @@ directive: replacement_list: word_or_symbol { - $$ = _list_create (parser); - _list_append_item ($$, $1); + $$ = _string_list_create (parser); + _string_list_append_item ($$, $1); talloc_free ($1); } | replacement_list word_or_symbol { - _list_append_item ($1, $2); + _string_list_append_item ($1, $2); talloc_free ($2); $$ = $1; } @@ -213,15 +213,15 @@ replacement_list: parameter_list: /* empty */ { - $$ = _list_create (parser); + $$ = _string_list_create (parser); } | identifier_perhaps_macro { - $$ = _list_create (parser); - _list_append_item ($$, $1); + $$ = _string_list_create (parser); + _string_list_append_item ($$, $1); talloc_free ($1); } | parameter_list ',' identifier_perhaps_macro { - _list_append_item ($1, $3); + _string_list_append_item ($1, $3); talloc_free ($3); $$ = $1; } @@ -250,12 +250,12 @@ word: %% -list_t * -_list_create (void *ctx) +string_list_t * +_string_list_create (void *ctx) { - list_t *list; + string_list_t *list; - list = xtalloc (ctx, list_t); + list = xtalloc (ctx, string_list_t); list->head = NULL; list->tail = NULL; @@ -263,7 +263,7 @@ _list_create (void *ctx) } void -_list_append_list (list_t *list, list_t *tail) +_string_list_append_list (string_list_t *list, string_list_t *tail) { if (list->head == NULL) { list->head = tail->head; @@ -275,11 +275,11 @@ _list_append_list (list_t *list, list_t *tail) } void -_list_append_item (list_t *list, const char *str) +_string_list_append_item (string_list_t *list, const char *str) { - node_t *node; + string_node_t *node; - node = xtalloc (list, node_t); + node = xtalloc (list, string_node_t); node->str = xtalloc_strdup (node, str); node->next = NULL; @@ -294,9 +294,9 @@ _list_append_item (list_t *list, const char *str) } int -_list_contains (list_t *list, const char *member, int *index) +_string_list_contains (string_list_t *list, const char *member, int *index) { - node_t *node; + string_node_t *node; int i; if (list == NULL) @@ -313,10 +313,10 @@ _list_contains (list_t *list, const char *member, int *index) } int -_list_length (list_t *list) +_string_list_length (string_list_t *list) { int length = 0; - node_t *node; + string_node_t *node; if (list == NULL) return 0; @@ -328,9 +328,9 @@ _list_length (list_t *list) } const char * -_list_member_at (list_t *list, int index) +_string_list_member_at (string_list_t *list, int index) { - node_t *node; + string_node_t *node; int i; if (list == NULL) @@ -402,7 +402,7 @@ glcpp_parser_macro_type (glcpp_parser_t *parser, const char *identifier) void _define_object_macro (glcpp_parser_t *parser, const char *identifier, - list_t *replacement_list) + string_list_t *replacement_list) { macro_t *macro; @@ -418,8 +418,8 @@ _define_object_macro (glcpp_parser_t *parser, void _define_function_macro (glcpp_parser_t *parser, const char *identifier, - list_t *parameter_list, - list_t *replacement_list) + string_list_t *parameter_list, + string_list_t *replacement_list) { macro_t *macro; @@ -436,18 +436,18 @@ static void _print_expanded_macro_recursive (glcpp_parser_t *parser, const char *token, const char *orig, - list_t *parameters, - list_t *arguments); + string_list_t *parameters, + string_list_t *arguments); static void -_print_expanded_list_recursive (glcpp_parser_t *parser, - list_t *list, +_print_expanded_string_list_recursive (glcpp_parser_t *parser, + string_list_t *list, const char *orig, - list_t *parameters, - list_t *arguments) + string_list_t *parameters, + string_list_t *arguments) { const char *token; - node_t *node; + string_node_t *node; int index; for (node = list->head ; node ; node = node->next) { @@ -458,10 +458,10 @@ _print_expanded_list_recursive (glcpp_parser_t *parser, continue; } - if (_list_contains (parameters, token, &index)) { + if (_string_list_contains (parameters, token, &index)) { const char *argument; - argument = _list_member_at (arguments, index); + argument = _string_list_member_at (arguments, index); _print_expanded_macro_recursive (parser, argument, orig, parameters, arguments); @@ -478,11 +478,11 @@ static void _print_expanded_macro_recursive (glcpp_parser_t *parser, const char *token, const char *orig, - list_t *parameters, - list_t *arguments) + string_list_t *parameters, + string_list_t *arguments) { macro_t *macro; - list_t *replacement_list; + string_list_t *replacement_list; macro = hash_table_find (parser->defines, token); if (macro == NULL) { @@ -492,7 +492,7 @@ _print_expanded_macro_recursive (glcpp_parser_t *parser, replacement_list = macro->replacement_list; - _print_expanded_list_recursive (parser, replacement_list, + _print_expanded_string_list_recursive (parser, replacement_list, orig, parameters, arguments); } @@ -511,19 +511,19 @@ _print_expanded_object_macro (glcpp_parser_t *parser, const char *identifier) void _print_expanded_function_macro (glcpp_parser_t *parser, const char *identifier, - list_t *arguments) + string_list_t *arguments) { macro_t *macro; macro = hash_table_find (parser->defines, identifier); assert (macro->is_function); - if (_list_length (arguments) != _list_length (macro->parameter_list)) { + if (_string_list_length (arguments) != _string_list_length (macro->parameter_list)) { fprintf (stderr, "Error: macro %s invoked with %d arguments (expected %d)\n", identifier, - _list_length (arguments), - _list_length (macro->parameter_list)); + _string_list_length (arguments), + _string_list_length (macro->parameter_list)); return; } diff --git a/glcpp.h b/glcpp.h index 69b3b840ae..cee08faa98 100644 --- a/glcpp.h +++ b/glcpp.h @@ -31,15 +31,15 @@ /* Some data types used for parser value. */ -typedef struct node { +typedef struct string_node { const char *str; - struct node *next; -} node_t; + struct string_node *next; +} string_node_t; -typedef struct list { - node_t *head; - node_t *tail; -} list_t; +typedef struct string_list { + string_node_t *head; + string_node_t *tail; +} string_list_t; typedef struct glcpp_parser glcpp_parser_t; -- cgit v1.2.3 From c5e9855f130b928b480c18c913135a411ee921e7 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Fri, 14 May 2010 10:12:21 -0700 Subject: Remove _list suffix from several identifiers. Instead of "parameter_list" and "replacement_list" just use "parameters" and "replacements". This is consistent with the existing "arguments" and keeps the line length down in the face of the now-longer "string_list_t" rather than "list_t". --- glcpp-parse.y | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index 3b97743085..4e5de8254d 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -33,8 +33,8 @@ typedef struct { int is_function; - string_list_t *parameter_list; - string_list_t *replacement_list; + string_list_t *parameters; + string_list_t *replacements; } macro_t; struct glcpp_parser { @@ -48,13 +48,13 @@ yyerror (void *scanner, const char *error); void _define_object_macro (glcpp_parser_t *parser, const char *macro, - string_list_t *replacement_list); + string_list_t *replacements); void _define_function_macro (glcpp_parser_t *parser, const char *macro, - string_list_t *parameter_list, - string_list_t *replacement_list); + string_list_t *parameters, + string_list_t *replacements); void _print_expanded_object_macro (glcpp_parser_t *parser, const char *macro); @@ -402,15 +402,15 @@ glcpp_parser_macro_type (glcpp_parser_t *parser, const char *identifier) void _define_object_macro (glcpp_parser_t *parser, const char *identifier, - string_list_t *replacement_list) + string_list_t *replacements) { macro_t *macro; macro = xtalloc (parser, macro_t); macro->is_function = 0; - macro->parameter_list = NULL; - macro->replacement_list = talloc_steal (macro, replacement_list); + macro->parameters = NULL; + macro->replacements = talloc_steal (macro, replacements); hash_table_insert (parser->defines, macro, identifier); } @@ -418,16 +418,16 @@ _define_object_macro (glcpp_parser_t *parser, void _define_function_macro (glcpp_parser_t *parser, const char *identifier, - string_list_t *parameter_list, - string_list_t *replacement_list) + string_list_t *parameters, + string_list_t *replacements) { macro_t *macro; macro = xtalloc (parser, macro_t); macro->is_function = 1; - macro->parameter_list = talloc_steal (macro, parameter_list); - macro->replacement_list = talloc_steal (macro, replacement_list); + macro->parameters = talloc_steal (macro, parameters); + macro->replacements = talloc_steal (macro, replacements); hash_table_insert (parser->defines, macro, identifier); } @@ -482,7 +482,7 @@ _print_expanded_macro_recursive (glcpp_parser_t *parser, string_list_t *arguments) { macro_t *macro; - string_list_t *replacement_list; + string_list_t *replacements; macro = hash_table_find (parser->defines, token); if (macro == NULL) { @@ -490,10 +490,10 @@ _print_expanded_macro_recursive (glcpp_parser_t *parser, return; } - replacement_list = macro->replacement_list; + replacements = macro->replacements; - _print_expanded_string_list_recursive (parser, replacement_list, - orig, parameters, arguments); + _print_expanded_string_list_recursive (parser, replacements, + orig, parameters, arguments); } void @@ -518,15 +518,15 @@ _print_expanded_function_macro (glcpp_parser_t *parser, macro = hash_table_find (parser->defines, identifier); assert (macro->is_function); - if (_string_list_length (arguments) != _string_list_length (macro->parameter_list)) { + if (_string_list_length (arguments) != _string_list_length (macro->parameters)) { fprintf (stderr, "Error: macro %s invoked with %d arguments (expected %d)\n", identifier, _string_list_length (arguments), - _string_list_length (macro->parameter_list)); + _string_list_length (macro->parameters)); return; } _print_expanded_macro_recursive (parser, identifier, identifier, - macro->parameter_list, arguments); + macro->parameters, arguments); } -- cgit v1.2.3 From 04af13539a7a4bc72b566c111914b103d9e851a6 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Fri, 14 May 2010 10:17:38 -0700 Subject: Move most printing to the action in the content production. Previously, printing was occurring all over the place. Here we document that it should all be happening at the top-level content production, and we move the printing of directive newlines. The printing of expanded macros is still happening in lower-level productions, but we plan to fix that soon. --- glcpp-parse.y | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index 4e5de8254d..8dc7897511 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -103,6 +103,7 @@ input: | input content ; + /* We do all printing at the content level */ content: IDENTIFIER { printf ("%s", $1); @@ -113,7 +114,7 @@ content: talloc_free ($1); } | macro -| directive_with_newline +| directive_with_newline { printf ("\n"); } | NEWLINE { printf ("\n"); } | '(' { printf ("("); } | ')' { printf (")"); } @@ -154,9 +155,7 @@ argument: ; directive_with_newline: - directive NEWLINE { - printf ("\n"); - } + directive NEWLINE ; directive: -- cgit v1.2.3 From 2be8be0f742a7abf410be8176f6fd6fc49a6b361 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Fri, 14 May 2010 10:31:43 -0700 Subject: Make macro-expansion productions create string-list values rather than printing Then we print the final string list up at the top-level content production along with all other printing. Additionally, having macro-expansion productions that create values will make it easier to solve problems like composed function-like macro invocations in the future. --- glcpp-parse.y | 130 ++++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 81 insertions(+), 49 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index 8dc7897511..d0ee78e008 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -56,13 +56,16 @@ _define_function_macro (glcpp_parser_t *parser, string_list_t *parameters, string_list_t *replacements); -void -_print_expanded_object_macro (glcpp_parser_t *parser, const char *macro); +string_list_t * +_expand_object_macro (glcpp_parser_t *parser, const char *identifier); + +string_list_t * +_expand_function_macro (glcpp_parser_t *parser, + const char *identifier, + string_list_t *arguments); void -_print_expanded_function_macro (glcpp_parser_t *parser, - const char *macro, - string_list_t *arguments); +_print_string_list (string_list_t *list); string_list_t * _string_list_create (void *ctx); @@ -94,7 +97,7 @@ _string_list_length (string_list_t *list); %token DEFINE FUNC_MACRO IDENTIFIER NEWLINE OBJ_MACRO SPACE TOKEN UNDEF %type FUNC_MACRO IDENTIFIER identifier_perhaps_macro OBJ_MACRO TOKEN word word_or_symbol -%type argument argument_list parameter_list replacement_list +%type argument argument_list macro parameter_list replacement_list %% @@ -113,7 +116,9 @@ content: printf ("%s", $1); talloc_free ($1); } -| macro +| macro { + _print_string_list ($1); + } | directive_with_newline { printf ("\n"); } | NEWLINE { printf ("\n"); } | '(' { printf ("("); } @@ -124,10 +129,10 @@ content: macro: FUNC_MACRO '(' argument_list ')' { - _print_expanded_function_macro (parser, $1, $3); + $$ = _expand_function_macro (parser, $1, $3); } | OBJ_MACRO { - _print_expanded_object_macro (parser, $1); + $$ = _expand_object_macro (parser, $1); talloc_free ($1); } ; @@ -326,6 +331,18 @@ _string_list_length (string_list_t *list) return length; } +void +_print_string_list (string_list_t *list) +{ + string_node_t *node; + + if (list == NULL) + return; + + for (node = list->head; node; node = node->next) + printf ("%s", node->str); +} + const char * _string_list_member_at (string_list_t *list, int index) { @@ -431,29 +448,33 @@ _define_function_macro (glcpp_parser_t *parser, hash_table_insert (parser->defines, macro, identifier); } -static void -_print_expanded_macro_recursive (glcpp_parser_t *parser, - const char *token, - const char *orig, - string_list_t *parameters, - string_list_t *arguments); - -static void -_print_expanded_string_list_recursive (glcpp_parser_t *parser, - string_list_t *list, - const char *orig, - string_list_t *parameters, - string_list_t *arguments) +static string_list_t * +_expand_macro_recursive (glcpp_parser_t *parser, + const char *token, + const char *orig, + string_list_t *parameters, + string_list_t *arguments); + +static string_list_t * +_expand_string_list_recursive (glcpp_parser_t *parser, + string_list_t *list, + const char *orig, + string_list_t *parameters, + string_list_t *arguments) { + string_list_t *result; + string_list_t *child; const char *token; string_node_t *node; int index; + result = _string_list_create (parser); + for (node = list->head ; node ; node = node->next) { token = node->str; if (strcmp (token, orig) == 0) { - printf ("%s", token); + _string_list_append_item (result, token); continue; } @@ -461,71 +482,82 @@ _print_expanded_string_list_recursive (glcpp_parser_t *parser, const char *argument; argument = _string_list_member_at (arguments, index); - _print_expanded_macro_recursive (parser, argument, - orig, parameters, - arguments); + child = _expand_macro_recursive (parser, argument, + orig, NULL, NULL); + _string_list_append_list (result, child); } else { - _print_expanded_macro_recursive (parser, token, + child = _expand_macro_recursive (parser, token, orig, parameters, arguments); + _string_list_append_list (result, child); } } + + return result; } -static void -_print_expanded_macro_recursive (glcpp_parser_t *parser, - const char *token, - const char *orig, - string_list_t *parameters, - string_list_t *arguments) +static string_list_t * +_expand_macro_recursive (glcpp_parser_t *parser, + const char *token, + const char *orig, + string_list_t *parameters, + string_list_t *arguments) { macro_t *macro; string_list_t *replacements; macro = hash_table_find (parser->defines, token); if (macro == NULL) { - printf ("%s", token); - return; + string_list_t *result; + + result = _string_list_create (parser); + _string_list_append_item (result, token); + return result; } replacements = macro->replacements; - _print_expanded_string_list_recursive (parser, replacements, - orig, parameters, arguments); + return _expand_string_list_recursive (parser, replacements, + orig, parameters, arguments); } -void -_print_expanded_object_macro (glcpp_parser_t *parser, const char *identifier) +string_list_t * +_expand_object_macro (glcpp_parser_t *parser, const char *identifier) { macro_t *macro; macro = hash_table_find (parser->defines, identifier); assert (! macro->is_function); - _print_expanded_macro_recursive (parser, identifier, identifier, - NULL, NULL); + return _expand_macro_recursive (parser, identifier, identifier, + NULL, NULL); } -void -_print_expanded_function_macro (glcpp_parser_t *parser, - const char *identifier, - string_list_t *arguments) +string_list_t * +_expand_function_macro (glcpp_parser_t *parser, + const char *identifier, + string_list_t *arguments) { + string_list_t *result; macro_t *macro; + result = _string_list_create (parser); + macro = hash_table_find (parser->defines, identifier); assert (macro->is_function); - if (_string_list_length (arguments) != _string_list_length (macro->parameters)) { + if (_string_list_length (arguments) != + _string_list_length (macro->parameters)) + { fprintf (stderr, "Error: macro %s invoked with %d arguments (expected %d)\n", identifier, _string_list_length (arguments), _string_list_length (macro->parameters)); - return; + return NULL; } - _print_expanded_macro_recursive (parser, identifier, identifier, - macro->parameters, arguments); + return _expand_macro_recursive (parser, identifier, identifier, + macro->parameters, arguments); } -- cgit v1.2.3 From db272e6e6fbfe349ea6d9877bb7715ecb2d9f0c1 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Fri, 14 May 2010 10:00:59 -0700 Subject: Add test for function-like macro invocations with multiple-token arguments. These are not yet parsed correctly, so these tests fail. --- tests/019-define-func-1-arg-multi.c | 2 ++ tests/020-define-func-2-arg-multi.c | 2 ++ 2 files changed, 4 insertions(+) create mode 100644 tests/019-define-func-1-arg-multi.c create mode 100644 tests/020-define-func-2-arg-multi.c diff --git a/tests/019-define-func-1-arg-multi.c b/tests/019-define-func-1-arg-multi.c new file mode 100644 index 0000000000..c4e62b2550 --- /dev/null +++ b/tests/019-define-func-1-arg-multi.c @@ -0,0 +1,2 @@ +#define foo(x) (x) +foo(this is more than one word) diff --git a/tests/020-define-func-2-arg-multi.c b/tests/020-define-func-2-arg-multi.c new file mode 100644 index 0000000000..253421139d --- /dev/null +++ b/tests/020-define-func-2-arg-multi.c @@ -0,0 +1,2 @@ +#define foo(x,y) x, two fish, red fish, y +foo(one fish, blue fish) -- cgit v1.2.3 From 8f6a828e4a454e1bdce359c43e1108ff0315a89c Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Fri, 14 May 2010 10:44:19 -0700 Subject: Support macro invocations with multiple tokens for a single argument. We provide for this by changing the value of the argument-list production from a list of strings (string_list_t) to a new data-structure that holds a list of lists of strings (argument_list_t). --- glcpp-parse.y | 115 ++++++++++++++++++++++++++++++++++++++++++++++------------ glcpp.h | 11 +++++- 2 files changed, 102 insertions(+), 24 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index d0ee78e008..27b5514e92 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -62,7 +62,7 @@ _expand_object_macro (glcpp_parser_t *parser, const char *identifier); string_list_t * _expand_function_macro (glcpp_parser_t *parser, const char *identifier, - string_list_t *arguments); + argument_list_t *arguments); void _print_string_list (string_list_t *list); @@ -79,17 +79,27 @@ _string_list_append_list (string_list_t *list, string_list_t *tail); int _string_list_contains (string_list_t *list, const char *member, int *index); -const char * -_string_list_member_at (string_list_t *list, int index); - int _string_list_length (string_list_t *list); +argument_list_t * +_argument_list_create (void *ctx); + +void +_argument_list_append (argument_list_t *list, string_list_t *argument); + +int +_argument_list_length (argument_list_t *list); + +string_list_t * +_argument_list_member_at (argument_list_t *list, int index); + %} %union { char *str; - string_list_t *list; + string_list_t *string_list; + argument_list_t *argument_list; } %parse-param {glcpp_parser_t *parser} @@ -97,7 +107,8 @@ _string_list_length (string_list_t *list); %token DEFINE FUNC_MACRO IDENTIFIER NEWLINE OBJ_MACRO SPACE TOKEN UNDEF %type FUNC_MACRO IDENTIFIER identifier_perhaps_macro OBJ_MACRO TOKEN word word_or_symbol -%type argument argument_list macro parameter_list replacement_list +%type argument macro parameter_list replacement_list +%type argument_list %% @@ -139,11 +150,15 @@ macro: argument_list: argument { - $$ = _string_list_create (parser); - _string_list_append_list ($$, $1); + $$ = _argument_list_create (parser); + _argument_list_append ($$, $1); + } +| argument_list ',' SPACE argument { + _argument_list_append ($1, $4); + $$ = $1; } | argument_list ',' argument { - _string_list_append_list ($1, $3); + _argument_list_append ($1, $3); $$ = $1; } ; @@ -156,6 +171,11 @@ argument: _string_list_append_item ($1, $2); talloc_free ($2); } +| argument SPACE word { + _string_list_append_item ($1, " "); + _string_list_append_item ($1, $3); + talloc_free ($3); + } | argument '(' argument ')' ; @@ -343,10 +363,59 @@ _print_string_list (string_list_t *list) printf ("%s", node->str); } -const char * -_string_list_member_at (string_list_t *list, int index) +argument_list_t * +_argument_list_create (void *ctx) { - string_node_t *node; + argument_list_t *list; + + list = xtalloc (ctx, argument_list_t); + list->head = NULL; + list->tail = NULL; + + return list; +} + +void +_argument_list_append (argument_list_t *list, string_list_t *argument) +{ + argument_node_t *node; + + if (argument == NULL || argument->head == NULL) + return; + + node = xtalloc (list, argument_node_t); + node->argument = argument; + + node->next = NULL; + + if (list->head == NULL) { + list->head = node; + } else { + list->tail->next = node; + } + + list->tail = node; +} + +int +_argument_list_length (argument_list_t *list) +{ + int length = 0; + argument_node_t *node; + + if (list == NULL) + return 0; + + for (node = list->head; node; node = node->next) + length++; + + return length; +} + +string_list_t * +_argument_list_member_at (argument_list_t *list, int index) +{ + argument_node_t *node; int i; if (list == NULL) @@ -360,7 +429,7 @@ _string_list_member_at (string_list_t *list, int index) } if (node) - return node->str; + return node->argument; return NULL; } @@ -453,14 +522,14 @@ _expand_macro_recursive (glcpp_parser_t *parser, const char *token, const char *orig, string_list_t *parameters, - string_list_t *arguments); + argument_list_t *arguments); static string_list_t * _expand_string_list_recursive (glcpp_parser_t *parser, string_list_t *list, const char *orig, string_list_t *parameters, - string_list_t *arguments) + argument_list_t *arguments) { string_list_t *result; string_list_t *child; @@ -479,11 +548,11 @@ _expand_string_list_recursive (glcpp_parser_t *parser, } if (_string_list_contains (parameters, token, &index)) { - const char *argument; + string_list_t *argument; - argument = _string_list_member_at (arguments, index); - child = _expand_macro_recursive (parser, argument, - orig, NULL, NULL); + argument = _argument_list_member_at (arguments, index); + child = _expand_string_list_recursive (parser, argument, + orig, NULL, NULL); _string_list_append_list (result, child); } else { child = _expand_macro_recursive (parser, token, @@ -502,7 +571,7 @@ _expand_macro_recursive (glcpp_parser_t *parser, const char *token, const char *orig, string_list_t *parameters, - string_list_t *arguments) + argument_list_t *arguments) { macro_t *macro; string_list_t *replacements; @@ -537,7 +606,7 @@ _expand_object_macro (glcpp_parser_t *parser, const char *identifier) string_list_t * _expand_function_macro (glcpp_parser_t *parser, const char *identifier, - string_list_t *arguments) + argument_list_t *arguments) { string_list_t *result; macro_t *macro; @@ -547,13 +616,13 @@ _expand_function_macro (glcpp_parser_t *parser, macro = hash_table_find (parser->defines, identifier); assert (macro->is_function); - if (_string_list_length (arguments) != + if (_argument_list_length (arguments) != _string_list_length (macro->parameters)) { fprintf (stderr, "Error: macro %s invoked with %d arguments (expected %d)\n", identifier, - _string_list_length (arguments), + _argument_list_length (arguments), _string_list_length (macro->parameters)); return NULL; } diff --git a/glcpp.h b/glcpp.h index cee08faa98..7966a2a3d2 100644 --- a/glcpp.h +++ b/glcpp.h @@ -30,7 +30,6 @@ /* Some data types used for parser value. */ - typedef struct string_node { const char *str; struct string_node *next; @@ -41,6 +40,16 @@ typedef struct string_list { string_node_t *tail; } string_list_t; +typedef struct argument_node { + string_list_t *argument; + struct argument_node *next; +} argument_node_t; + +typedef struct argument_list { + argument_node_t *head; + argument_node_t *tail; +} argument_list_t; + typedef struct glcpp_parser glcpp_parser_t; glcpp_parser_t * -- cgit v1.2.3 From ac070e8bf5005151dd702f2cd3fbfb2d1eaaf00d Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Fri, 14 May 2010 11:33:00 -0700 Subject: Eliminate a shift/reduce conflict. By simply allowing for the argument_list production to be empty rather than the lower-level argument production to be empty. --- glcpp-parse.y | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index 27b5514e92..e70b3298d8 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -149,7 +149,10 @@ macro: ; argument_list: - argument { + /* empty */ { + $$ = _argument_list_create (parser); + } +| argument { $$ = _argument_list_create (parser); _argument_list_append ($$, $1); } @@ -164,8 +167,9 @@ argument_list: ; argument: - /* empty */ { + word { $$ = _string_list_create (parser); + _string_list_append_item ($$, $1); } | argument word { _string_list_append_item ($1, $2); -- cgit v1.2.3 From 92e7bf0f50ff673b7441b2f2be9ef99a4af8cae4 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Fri, 14 May 2010 10:01:44 -0700 Subject: Add test for composed invocation of function-like macros. This is a case like "foo(bar(x))" where both foo and bar are defined function-like macros. This is not yet parsed correctly so this test fails. --- tests/021-define-func-compose.c | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 tests/021-define-func-compose.c diff --git a/tests/021-define-func-compose.c b/tests/021-define-func-compose.c new file mode 100644 index 0000000000..21ddd0e65f --- /dev/null +++ b/tests/021-define-func-compose.c @@ -0,0 +1,3 @@ +#define bar(x) (1+(x)) +#define foo(y) (2*(y)) +foo(bar(3)) -- cgit v1.2.3 From 38bd27b444f610904320b5aa9d37e43be9164697 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Fri, 14 May 2010 12:05:37 -0700 Subject: Fix expansion of composited macros. This is a case such as "foo(bar(x))". The recently added test for this now passes. --- glcpp-parse.y | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index e70b3298d8..f972ec372b 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -106,7 +106,7 @@ _argument_list_member_at (argument_list_t *list, int index); %lex-param {void *scanner} %token DEFINE FUNC_MACRO IDENTIFIER NEWLINE OBJ_MACRO SPACE TOKEN UNDEF -%type FUNC_MACRO IDENTIFIER identifier_perhaps_macro OBJ_MACRO TOKEN word word_or_symbol +%type FUNC_MACRO IDENTIFIER identifier_perhaps_macro OBJ_MACRO replacement_word TOKEN word %type argument macro parameter_list replacement_list %type argument_list @@ -171,6 +171,9 @@ argument: $$ = _string_list_create (parser); _string_list_append_item ($$, $1); } +| macro { + $$ = $1; + } | argument word { _string_list_append_item ($1, $2); talloc_free ($2); @@ -227,18 +230,28 @@ directive: ; replacement_list: - word_or_symbol { + replacement_word { $$ = _string_list_create (parser); _string_list_append_item ($$, $1); talloc_free ($1); } -| replacement_list word_or_symbol { +| replacement_list replacement_word { _string_list_append_item ($1, $2); talloc_free ($2); $$ = $1; } ; +replacement_word: + word { $$ = $1; } +| FUNC_MACRO { $$ = $1; } +| OBJ_MACRO { $$ = $1; } +| '(' { $$ = xtalloc_strdup (parser, "("); } +| ')' { $$ = xtalloc_strdup (parser, ")"); } +| ',' { $$ = xtalloc_strdup (parser, ","); } +| SPACE { $$ = xtalloc_strdup (parser, " "); } +; + parameter_list: /* empty */ { $$ = _string_list_create (parser); @@ -261,18 +274,8 @@ identifier_perhaps_macro: | OBJ_MACRO { $$ = $1; } ; -word_or_symbol: - word { $$ = $1; } -| '(' { $$ = xtalloc_strdup (parser, "("); } -| ')' { $$ = xtalloc_strdup (parser, ")"); } -| ',' { $$ = xtalloc_strdup (parser, ","); } -| SPACE { $$ = xtalloc_strdup (parser, " "); } -; - word: IDENTIFIER { $$ = $1; } -| FUNC_MACRO { $$ = $1; } -| OBJ_MACRO { $$ = $1; } | TOKEN { $$ = $1; } ; -- cgit v1.2.3 From f6ae186cfd2c7006656ac55446247b569b92a721 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Fri, 14 May 2010 16:51:54 -0700 Subject: Add test invoking a macro with an argument containing (non-macro) parentheses. The macro invocation is defined to consume all text between a set of matched parentheses. We previously tested for inner parentheses from a nested function-like macro invocation. Here we test for inner parentheses occuring on their own, (not part of another macro invocation). --- tests/022-define-func-arg-with-parens.c | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 tests/022-define-func-arg-with-parens.c diff --git a/tests/022-define-func-arg-with-parens.c b/tests/022-define-func-arg-with-parens.c new file mode 100644 index 0000000000..c20d73a4a2 --- /dev/null +++ b/tests/022-define-func-arg-with-parens.c @@ -0,0 +1,2 @@ +#define foo(x) (x) +foo(argument(including parens)for the win) -- cgit v1.2.3 From 3596bb149e107ad12df4fee0723caf91819c0758 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Fri, 14 May 2010 16:53:52 -0700 Subject: Provide implementation for macro arguments containing parentheses. We were correctly parsing this already, but simply not returning any value (for no good reason). Fortunately the fix is quite simple. This makes the test added in the previous commit now pass. --- Makefile | 2 +- glcpp-parse.y | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 550945abd3..3fa863c49b 100644 --- a/Makefile +++ b/Makefile @@ -21,5 +21,5 @@ test: glcpp @(cd tests; ./glcpp-test) clean: - rm -f glcpp-lex.c glcpp-parse.c *.o *~ + rm -f glcpp glcpp-lex.c glcpp-parse.c *.o *~ rm -f tests/*.out tests/*.gcc tests/*.expected tests/*~ diff --git a/glcpp-parse.y b/glcpp-parse.y index f972ec372b..58afd724b6 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -177,13 +177,20 @@ argument: | argument word { _string_list_append_item ($1, $2); talloc_free ($2); + $$ = $1; } | argument SPACE word { _string_list_append_item ($1, " "); _string_list_append_item ($1, $3); talloc_free ($3); + $$ = $1; + } +| argument '(' argument ')' { + _string_list_append_item ($1, "("); + _string_list_append_list ($1, $3); + _string_list_append_item ($1, ")"); + $$ = $1; } -| argument '(' argument ')' ; directive_with_newline: -- cgit v1.2.3 From 4eb2ccf261f739ad9b91455f28c1dece573a30d6 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Fri, 14 May 2010 16:58:00 -0700 Subject: Add test with extra whitespace in macro defintions and invocations. This whitespace is not dealt with in an elegant way yet so this test does not pass currently. --- tests/023-define-extra-whitespace.c | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 tests/023-define-extra-whitespace.c diff --git a/tests/023-define-extra-whitespace.c b/tests/023-define-extra-whitespace.c new file mode 100644 index 0000000000..375355a17d --- /dev/null +++ b/tests/023-define-extra-whitespace.c @@ -0,0 +1,8 @@ +#define noargs() 1 +# define onearg(foo) foo + # define twoargs( x , y ) x y + # define threeargs( a , b , c ) a b c +noargs ( ) + onearg ( 2 ) + twoargs ( 3 , 4 ) +threeargs ( 5 , 6 , 7 ) -- cgit v1.2.3 From 81f01432bd4aad8e8b87ae273eb05297e35eff07 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Fri, 14 May 2010 17:08:45 -0700 Subject: Don't return SPACE tokens unless strictly needed. This reverts the unconditional return of SPACE tokens from the lexer from commit 48b94da0994b44e41324a2419117dcd81facce8b . That commit seemed useful because it kept the lexer simpler, but the presence of SPACE tokens is causing lots of extra complication for the parser itself, (redundant productions other than whitespace differences, several productions buggy in the case of extra whitespace, etc.) Of course, we'd prefer to never have any whitespace token, but that's not possible with the need to distinguish between "#define foo()" and "#define foo ()". So we'll accept a little bit of pain in the lexer, (enough state to support this special-case token), in exchange for keeping most of the parser blissffully ignorant of whether tokens are separated by whitespace or not. This change does mean that our output now differs from that of "gcc -E", but only in whitespace. So we test with "diff -w now to ignore those differences. --- glcpp-lex.l | 29 ++++++++++++++++++++++++++--- glcpp-parse.y | 22 +++++++--------------- tests/glcpp-test | 2 +- 3 files changed, 34 insertions(+), 19 deletions(-) diff --git a/glcpp-lex.l b/glcpp-lex.l index 21b9e3530a..3c9dda46d4 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -32,6 +32,9 @@ %option reentrant noyywrap %option extra-type="glcpp_parser_t *" +%x ST_DEFINE +%x ST_DEFVAL + SPACE [[:space:]] NONSPACE [^[:space:]] NEWLINE [\n] @@ -52,9 +55,31 @@ TOKEN [^[:space:](),]+ * "#define foo()" from "#define foo ()". */ {HASH}define{HSPACE}* { + BEGIN ST_DEFINE; return DEFINE; } +{IDENTIFIER} { + BEGIN ST_DEFVAL; + yylval.str = xtalloc_strdup (yyextra, yytext); + return IDENTIFIER; +} + +\n { + BEGIN INITIAL; + return NEWLINE; +} + +{HSPACE}+ { + BEGIN INITIAL; + return SPACE; +} + +"(" { + BEGIN INITIAL; + return '('; +} + {IDENTIFIER} { yylval.str = xtalloc_strdup (yyextra, yytext); switch (glcpp_parser_macro_type (yyextra, yylval.str)) @@ -84,8 +109,6 @@ TOKEN [^[:space:](),]+ return NEWLINE; } -{HSPACE}+ { - return SPACE; -} +{SPACE}+ %% diff --git a/glcpp-parse.y b/glcpp-parse.y index 58afd724b6..71ea3e5343 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -135,7 +135,6 @@ content: | '(' { printf ("("); } | ')' { printf (")"); } | ',' { printf (","); } -| SPACE { printf (" "); } ; macro: @@ -156,10 +155,6 @@ argument_list: $$ = _argument_list_create (parser); _argument_list_append ($$, $1); } -| argument_list ',' SPACE argument { - _argument_list_append ($1, $4); - $$ = $1; - } | argument_list ',' argument { _argument_list_append ($1, $3); $$ = $1; @@ -179,12 +174,6 @@ argument: talloc_free ($2); $$ = $1; } -| argument SPACE word { - _string_list_append_item ($1, " "); - _string_list_append_item ($1, $3); - talloc_free ($3); - $$ = $1; - } | argument '(' argument ')' { _string_list_append_item ($1, "("); _string_list_append_list ($1, $3); @@ -209,8 +198,8 @@ directive: string_list_t *list = _string_list_create (parser); _define_function_macro (parser, $2, $4, list); } -| DEFINE IDENTIFIER '(' parameter_list ')' SPACE replacement_list { - _define_function_macro (parser, $2, $4, $7); +| DEFINE IDENTIFIER '(' parameter_list ')' replacement_list { + _define_function_macro (parser, $2, $4, $6); } | UNDEF FUNC_MACRO { string_list_t *replacement = hash_table_find (parser->defines, $2); @@ -256,7 +245,6 @@ replacement_word: | '(' { $$ = xtalloc_strdup (parser, "("); } | ')' { $$ = xtalloc_strdup (parser, ")"); } | ',' { $$ = xtalloc_strdup (parser, ","); } -| SPACE { $$ = xtalloc_strdup (parser, " "); } ; parameter_list: @@ -373,8 +361,11 @@ _print_string_list (string_list_t *list) if (list == NULL) return; - for (node = list->head; node; node = node->next) + for (node = list->head; node; node = node->next) { printf ("%s", node->str); + if (node->next) + printf (" "); + } } argument_list_t * @@ -623,6 +614,7 @@ _expand_function_macro (glcpp_parser_t *parser, argument_list_t *arguments) { string_list_t *result; + macro_t *macro; result = _string_list_create (parser); diff --git a/tests/glcpp-test b/tests/glcpp-test index 25685eeabe..bd204de1e2 100755 --- a/tests/glcpp-test +++ b/tests/glcpp-test @@ -5,5 +5,5 @@ for test in *.c; do ../glcpp < $test > $test.out gcc -E $test -o $test.gcc grep -v '^#' < $test.gcc > $test.expected - diff -u $test.expected $test.out + diff -w -u $test.expected $test.out done -- cgit v1.2.3 From e36a4d5be9a9fa3abc4fb5d0b6c3601934f7a343 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Fri, 14 May 2010 17:29:24 -0700 Subject: Fix two whitespace bugs in the lexer. The first bug was not allowing whitespace between '#' and the directive name. The second bug was swallowing a terminating newline along with any trailing whitespace on a line. With these two fixes, and the previous commit to stop emitting SPACE tokens, the recently added extra-whitespace test now passes. --- glcpp-lex.l | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/glcpp-lex.l b/glcpp-lex.l index 3c9dda46d4..97ff1175f1 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -39,7 +39,7 @@ SPACE [[:space:]] NONSPACE [^[:space:]] NEWLINE [\n] HSPACE [ \t] -HASH ^{HSPACE}*# +HASH ^{HSPACE}*#{HSPACE}* IDENTIFIER [_a-zA-Z][_a-zA-Z0-9]* TOKEN [^[:space:](),]+ @@ -109,6 +109,6 @@ TOKEN [^[:space:](),]+ return NEWLINE; } -{SPACE}+ +{HSPACE}+ %% -- cgit v1.2.3 From 420d05a15b90658680b87b4d83b092768590319a Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Mon, 17 May 2010 10:15:23 -0700 Subject: Add test and fix bug leading to infinite recursion. The test case here is simply "#define foo foo" and "#define bar foo" and then attempting to expand "bar". Previously, our termination condition for the recursion was overly simple---just looking for the single identifier that began the expansion. We now fix this to maintain a stack of identifiers and terminate when any one of them occurs in the replacement list. --- glcpp-parse.y | 87 ++++++++++++++++++++++++------ tests/024-define-chain-to-self-recursion.c | 3 ++ 2 files changed, 75 insertions(+), 15 deletions(-) create mode 100644 tests/024-define-chain-to-self-recursion.c diff --git a/glcpp-parse.y b/glcpp-parse.y index 71ea3e5343..16d2a28a00 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -76,6 +76,12 @@ _string_list_append_item (string_list_t *list, const char *str); void _string_list_append_list (string_list_t *list, string_list_t *tail); +void +_string_list_push (string_list_t *list, const char *str); + +void +_string_list_pop (string_list_t *list); + int _string_list_contains (string_list_t *list, const char *member, int *index); @@ -319,6 +325,45 @@ _string_list_append_item (string_list_t *list, const char *str) list->tail = node; } +void +_string_list_push (string_list_t *list, const char *str) +{ + string_node_t *node; + + node = xtalloc (list, string_node_t); + node->str = xtalloc_strdup (node, str); + + node->next = list->head; + + if (list->tail == NULL) { + list->tail = node; + } + + list->head = node; +} + +void +_string_list_pop (string_list_t *list) +{ + string_node_t *node; + + node = list->head; + + if (node == NULL) { + fprintf (stderr, "Internal error: _string_list_pop called on an empty list.\n"); + exit (1); + } + + list->head = node->next; + + if (list->tail == node) { + assert (node->next == NULL); + list->tail = NULL; + } + + talloc_free (node); +} + int _string_list_contains (string_list_t *list, const char *member, int *index) { @@ -330,7 +375,8 @@ _string_list_contains (string_list_t *list, const char *member, int *index) for (i = 0, node = list->head; node; i++, node = node->next) { if (strcmp (node->str, member) == 0) { - *index = i; + if (index) + *index = i; return 1; } } @@ -525,14 +571,14 @@ _define_function_macro (glcpp_parser_t *parser, static string_list_t * _expand_macro_recursive (glcpp_parser_t *parser, const char *token, - const char *orig, + string_list_t *active, string_list_t *parameters, argument_list_t *arguments); static string_list_t * _expand_string_list_recursive (glcpp_parser_t *parser, string_list_t *list, - const char *orig, + string_list_t *active, string_list_t *parameters, argument_list_t *arguments) { @@ -547,7 +593,10 @@ _expand_string_list_recursive (glcpp_parser_t *parser, for (node = list->head ; node ; node = node->next) { token = node->str; - if (strcmp (token, orig) == 0) { + /* Don't expand this macro if it's on the active + * stack, (meaning we're already in the process of + * expanding it). */ + if (_string_list_contains (active, token, NULL)) { _string_list_append_item (result, token); continue; } @@ -557,11 +606,11 @@ _expand_string_list_recursive (glcpp_parser_t *parser, argument = _argument_list_member_at (arguments, index); child = _expand_string_list_recursive (parser, argument, - orig, NULL, NULL); + active, NULL, NULL); _string_list_append_list (result, child); } else { child = _expand_macro_recursive (parser, token, - orig, parameters, + active, parameters, arguments); _string_list_append_list (result, child); } @@ -574,12 +623,18 @@ _expand_string_list_recursive (glcpp_parser_t *parser, static string_list_t * _expand_macro_recursive (glcpp_parser_t *parser, const char *token, - const char *orig, + string_list_t *active, string_list_t *parameters, argument_list_t *arguments) { macro_t *macro; string_list_t *replacements; + string_list_t *result; + + if (active == NULL) + active = _string_list_create (NULL); + + _string_list_push (active, token); macro = hash_table_find (parser->defines, token); if (macro == NULL) { @@ -592,8 +647,14 @@ _expand_macro_recursive (glcpp_parser_t *parser, replacements = macro->replacements; - return _expand_string_list_recursive (parser, replacements, - orig, parameters, arguments); + result = _expand_string_list_recursive (parser, replacements, + active, parameters, arguments); + + _string_list_pop (active); + if (_string_list_length (active) == 0) + talloc_free (active); + + return result; } string_list_t * @@ -604,7 +665,7 @@ _expand_object_macro (glcpp_parser_t *parser, const char *identifier) macro = hash_table_find (parser->defines, identifier); assert (! macro->is_function); - return _expand_macro_recursive (parser, identifier, identifier, + return _expand_macro_recursive (parser, identifier, NULL, NULL, NULL); } @@ -613,12 +674,8 @@ _expand_function_macro (glcpp_parser_t *parser, const char *identifier, argument_list_t *arguments) { - string_list_t *result; - macro_t *macro; - result = _string_list_create (parser); - macro = hash_table_find (parser->defines, identifier); assert (macro->is_function); @@ -633,6 +690,6 @@ _expand_function_macro (glcpp_parser_t *parser, return NULL; } - return _expand_macro_recursive (parser, identifier, identifier, + return _expand_macro_recursive (parser, identifier, NULL, macro->parameters, arguments); } diff --git a/tests/024-define-chain-to-self-recursion.c b/tests/024-define-chain-to-self-recursion.c new file mode 100644 index 0000000000..e788adce30 --- /dev/null +++ b/tests/024-define-chain-to-self-recursion.c @@ -0,0 +1,3 @@ +#define foo foo +#define bar foo +bar -- cgit v1.2.3 From acf87bc03411c4d9b818a346bc9dad858b0a2407 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Mon, 17 May 2010 10:34:29 -0700 Subject: Fix bug (and add test) for a function-like-macro appearing as a non-macro. That is, when a function-like macro appears in the content without parentheses it should be accepted and passed on through, (previously the parser was regarding this as a syntax error). --- glcpp-parse.y | 4 ++++ tests/025-func-macro-as-non-macro.c | 2 ++ 2 files changed, 6 insertions(+) create mode 100644 tests/025-func-macro-as-non-macro.c diff --git a/glcpp-parse.y b/glcpp-parse.y index 16d2a28a00..6f158d9139 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -133,6 +133,10 @@ content: printf ("%s", $1); talloc_free ($1); } +| FUNC_MACRO { + printf ("%s", $1); + talloc_free ($1); + } | macro { _print_string_list ($1); } diff --git a/tests/025-func-macro-as-non-macro.c b/tests/025-func-macro-as-non-macro.c new file mode 100644 index 0000000000..3dbe026d9d --- /dev/null +++ b/tests/025-func-macro-as-non-macro.c @@ -0,0 +1,2 @@ +#define foo(bar) bar +foo -- cgit v1.2.3 From 796e1f0eadcfbbc6e4d79778b2378975204bb97c Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Mon, 17 May 2010 12:45:16 -0700 Subject: Expect 1 shift/reduce conflict. The most recent fix to the parser introduced a shift/reduce conflict. We document this conflict here, and tell bison that it need not report it (since I verified that it's being resolved in the direction desired). For the record, I did write additional lexer code to eliminate this conflict, but it was quite fragile, (would not accept a newline between a function-like macro name and the left parenthesis, for example). --- glcpp-parse.y | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/glcpp-parse.y b/glcpp-parse.y index 6f158d9139..959083578e 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -116,6 +116,14 @@ _argument_list_member_at (argument_list_t *list, int index); %type argument macro parameter_list replacement_list %type argument_list +/* Hard to remove shift/reduce conflicts documented as follows: + * + * 1. '(' after FUNC_MACRO name which is correctly resolved to shift + * to form macro invocation rather than reducing directly to + * content. + */ +%expect 1 + %% input: -- cgit v1.2.3 From 1a29500e72ac338c1fb243742aff1c167e1059db Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Mon, 17 May 2010 13:19:04 -0700 Subject: Fix (and add test for) function-like macro invocation with newlines. The test has a newline before the left parenthesis, and newlines to separate the parentheses from the argument. The fix involves more state in the lexer to only return a NEWLINE token when termniating a directive. This is very similar to our previous fix with extra lexer state to only return the SPACE token when it would be significant for the parser. With this change, the exact number and positioning of newlines in the output is now different compared to "gcc -E" so we add a -B option to diff when testing to ignore that. --- glcpp-lex.l | 82 +++++++++++++++++++++++++--------- glcpp-parse.y | 1 - tests/026-define-func-extra-newlines.c | 6 +++ tests/glcpp-test | 2 +- 4 files changed, 69 insertions(+), 22 deletions(-) create mode 100644 tests/026-define-func-extra-newlines.c diff --git a/glcpp-lex.l b/glcpp-lex.l index 97ff1175f1..4cb73c5d71 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -27,13 +27,36 @@ #include "glcpp.h" #include "glcpp-parse.h" + +/* Yes, a macro with a return statement in it is evil. But surely no + * more evil than all the code generation happening with flex in the + * first place. */ +#define LEXIFY_IDENTIFIER do { \ + yylval.str = xtalloc_strdup (yyextra, yytext); \ + switch (glcpp_parser_macro_type (yyextra, yylval.str)) \ + { \ + case MACRO_TYPE_UNDEFINED: \ + return IDENTIFIER; \ + break; \ + case MACRO_TYPE_OBJECT: \ + return OBJ_MACRO; \ + break; \ + case MACRO_TYPE_FUNCTION: \ + return FUNC_MACRO; \ + break; \ + } \ + } while (0) + %} %option reentrant noyywrap %option extra-type="glcpp_parser_t *" %x ST_DEFINE +%x ST_DEFVAL_START %x ST_DEFVAL +%x ST_UNDEF +%x ST_UNDEF_END SPACE [[:space:]] NONSPACE [^[:space:]] @@ -46,9 +69,20 @@ TOKEN [^[:space:](),]+ %% {HASH}undef{HSPACE}* { + BEGIN ST_UNDEF; return UNDEF; } +{IDENTIFIER} { + BEGIN ST_UNDEF_END; + LEXIFY_IDENTIFIER; +} + +\n { + BEGIN INITIAL; + return NEWLINE; +} + /* We use the ST_DEFINE and ST_DEFVAL states so that we can * pass a space token, (yes, a token for whitespace!), since * the preprocessor specification requires distinguishing @@ -60,40 +94,48 @@ TOKEN [^[:space:](),]+ } {IDENTIFIER} { - BEGIN ST_DEFVAL; + BEGIN ST_DEFVAL_START; yylval.str = xtalloc_strdup (yyextra, yytext); return IDENTIFIER; } -\n { +\n { BEGIN INITIAL; return NEWLINE; } -{HSPACE}+ { - BEGIN INITIAL; +{HSPACE}+ { + BEGIN ST_DEFVAL; return SPACE; } -"(" { - BEGIN INITIAL; +"(" { + BEGIN ST_DEFVAL; return '('; } -{IDENTIFIER} { +{IDENTIFIER} { + LEXIFY_IDENTIFIER; +} + +[(),] { + return yytext[0]; +} + +{TOKEN} { yylval.str = xtalloc_strdup (yyextra, yytext); - switch (glcpp_parser_macro_type (yyextra, yylval.str)) - { - case MACRO_TYPE_UNDEFINED: - return IDENTIFIER; - break; - case MACRO_TYPE_OBJECT: - return OBJ_MACRO; - break; - case MACRO_TYPE_FUNCTION: - return FUNC_MACRO; - break; - } + return TOKEN; +} + +\n { + BEGIN INITIAL; + return NEWLINE; +} + +{HSPACE}+ + +{IDENTIFIER} { + LEXIFY_IDENTIFIER; } [(),] { @@ -106,7 +148,7 @@ TOKEN [^[:space:](),]+ } \n { - return NEWLINE; + printf ("\n"); } {HSPACE}+ diff --git a/glcpp-parse.y b/glcpp-parse.y index 959083578e..b2eaa5ba69 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -149,7 +149,6 @@ content: _print_string_list ($1); } | directive_with_newline { printf ("\n"); } -| NEWLINE { printf ("\n"); } | '(' { printf ("("); } | ')' { printf (")"); } | ',' { printf (","); } diff --git a/tests/026-define-func-extra-newlines.c b/tests/026-define-func-extra-newlines.c new file mode 100644 index 0000000000..0d83740530 --- /dev/null +++ b/tests/026-define-func-extra-newlines.c @@ -0,0 +1,6 @@ +#define foo(a) bar + +foo +( +1 +) diff --git a/tests/glcpp-test b/tests/glcpp-test index bd204de1e2..673a4f45e9 100755 --- a/tests/glcpp-test +++ b/tests/glcpp-test @@ -5,5 +5,5 @@ for test in *.c; do ../glcpp < $test > $test.out gcc -E $test -o $test.gcc grep -v '^#' < $test.gcc > $test.expected - diff -w -u $test.expected $test.out + diff -B -w -u $test.expected $test.out done -- cgit v1.2.3 From d476db38fe21f5e6061a7d93dbd5a9991b91bf59 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Mon, 17 May 2010 13:33:10 -0700 Subject: Add several tests where the defined value of a macro is (or looks like) a macro Many of these look quite similar to existing tests that are handled correctly, yet none of these work. For example, in test 30 we have a simple non-function macro "foo" that is defined as "bar(baz(success))" and obviously non-function macro expansion has been working for a long time. Similarly, if we had text of "bar(baz(success))" it would be expanded correctly as well. But when this otherwise functioning text appears as the body of a macro, things don't work at all. This is pointing out a fundamental problem with the current approach. The current code does a recursive expansion of a macro definition, but this doesn't involve the parsing machinery, so it can't actually handle things like an arbitrary nesting of parentheses. The fix will require the parser to stuff macro values back into the lexer to get at all of the existing machinery when expanding macros. --- tests/027-define-chain-obj-to-func.c | 3 +++ tests/028-define-chain-obj-to-non-func.c | 3 +++ tests/029-define-chain-obj-to-func-with-args.c | 3 +++ tests/030-define-chain-obj-to-func-compose.c | 4 ++++ tests/031-define-chain-func-to-func-compose.c | 4 ++++ 5 files changed, 17 insertions(+) create mode 100644 tests/027-define-chain-obj-to-func.c create mode 100644 tests/028-define-chain-obj-to-non-func.c create mode 100644 tests/029-define-chain-obj-to-func-with-args.c create mode 100644 tests/030-define-chain-obj-to-func-compose.c create mode 100644 tests/031-define-chain-func-to-func-compose.c diff --git a/tests/027-define-chain-obj-to-func.c b/tests/027-define-chain-obj-to-func.c new file mode 100644 index 0000000000..5ccb52caba --- /dev/null +++ b/tests/027-define-chain-obj-to-func.c @@ -0,0 +1,3 @@ +#define failure() success +#define foo failure() +foo diff --git a/tests/028-define-chain-obj-to-non-func.c b/tests/028-define-chain-obj-to-non-func.c new file mode 100644 index 0000000000..44962a7187 --- /dev/null +++ b/tests/028-define-chain-obj-to-non-func.c @@ -0,0 +1,3 @@ +#define success() failure +#define foo success +foo diff --git a/tests/029-define-chain-obj-to-func-with-args.c b/tests/029-define-chain-obj-to-func-with-args.c new file mode 100644 index 0000000000..261f7d28fc --- /dev/null +++ b/tests/029-define-chain-obj-to-func-with-args.c @@ -0,0 +1,3 @@ +#define bar(failure) failure +#define foo bar(success) +foo diff --git a/tests/030-define-chain-obj-to-func-compose.c b/tests/030-define-chain-obj-to-func-compose.c new file mode 100644 index 0000000000..e56fbefd62 --- /dev/null +++ b/tests/030-define-chain-obj-to-func-compose.c @@ -0,0 +1,4 @@ +#define baz(failure) failure +#define bar(failure) failure +#define foo bar(baz(success)) +foo diff --git a/tests/031-define-chain-func-to-func-compose.c b/tests/031-define-chain-func-to-func-compose.c new file mode 100644 index 0000000000..3f4c8744df --- /dev/null +++ b/tests/031-define-chain-func-to-func-compose.c @@ -0,0 +1,4 @@ +#define baz(failure) failure +#define bar(failure) failure +#define foo() bar(baz(success)) +foo() -- cgit v1.2.3 From a807fb72c45888b5ff915aa08d8bd10069be4a2e Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 18 May 2010 22:10:04 -0700 Subject: Rewrite macro handling to support function-like macro invocation in macro values The rewrite her discards the functions that did direct, recursive expansion of macro values. Instead, the parser now pushes the macro definition string over to a stack of buffers for the lexer. This way, macro expansion gets access to all parsing machinery. This isn't a small change, but the result is simpler than before (I think). It passes the entire test suite, including the four tests added with the previous commit that were failing before. --- glcpp-lex.l | 146 ++++++++++++++++------- glcpp-parse.y | 367 ++++++++++++++++++++++++---------------------------------- glcpp.h | 77 ++++++++++-- xtalloc.c | 14 +++ 4 files changed, 338 insertions(+), 266 deletions(-) diff --git a/glcpp-lex.l b/glcpp-lex.l index 4cb73c5d71..52be1b1ea4 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -27,34 +27,15 @@ #include "glcpp.h" #include "glcpp-parse.h" - -/* Yes, a macro with a return statement in it is evil. But surely no - * more evil than all the code generation happening with flex in the - * first place. */ -#define LEXIFY_IDENTIFIER do { \ - yylval.str = xtalloc_strdup (yyextra, yytext); \ - switch (glcpp_parser_macro_type (yyextra, yylval.str)) \ - { \ - case MACRO_TYPE_UNDEFINED: \ - return IDENTIFIER; \ - break; \ - case MACRO_TYPE_OBJECT: \ - return OBJ_MACRO; \ - break; \ - case MACRO_TYPE_FUNCTION: \ - return FUNC_MACRO; \ - break; \ - } \ - } while (0) - %} %option reentrant noyywrap %option extra-type="glcpp_parser_t *" %x ST_DEFINE -%x ST_DEFVAL_START -%x ST_DEFVAL +%x ST_DEFINE_OBJ_OR_FUNC +%x ST_DEFINE_PARAMETER +%x ST_DEFINE_VALUE %x ST_UNDEF %x ST_UNDEF_END @@ -75,12 +56,14 @@ TOKEN [^[:space:](),]+ {IDENTIFIER} { BEGIN ST_UNDEF_END; - LEXIFY_IDENTIFIER; + yylval.str = xtalloc_strdup (yyextra, yytext); + return IDENTIFIER; } +{HSPACE}* + \n { BEGIN INITIAL; - return NEWLINE; } /* We use the ST_DEFINE and ST_DEFVAL states so that we can @@ -94,48 +77,73 @@ TOKEN [^[:space:](),]+ } {IDENTIFIER} { - BEGIN ST_DEFVAL_START; + BEGIN ST_DEFINE_OBJ_OR_FUNC; yylval.str = xtalloc_strdup (yyextra, yytext); return IDENTIFIER; } -\n { +\n { BEGIN INITIAL; - return NEWLINE; + yylval.str = xtalloc_strdup (yyextra, ""); + return REPLACEMENT; } -{HSPACE}+ { - BEGIN ST_DEFVAL; - return SPACE; +{HSPACE}+ { + BEGIN ST_DEFINE_VALUE; } -"(" { - BEGIN ST_DEFVAL; +"(" { + BEGIN ST_DEFINE_PARAMETER; return '('; } -{IDENTIFIER} { - LEXIFY_IDENTIFIER; +{IDENTIFIER} { + yylval.str = xtalloc_strdup (yyextra, yytext); + return IDENTIFIER; } -[(),] { - return yytext[0]; +"," { + return ','; } -{TOKEN} { - yylval.str = xtalloc_strdup (yyextra, yytext); - return TOKEN; +")" { + BEGIN ST_DEFINE_VALUE; + return ')'; } -\n { +{HSPACE}+ + +.*\n { BEGIN INITIAL; - return NEWLINE; + yylval.str = xtalloc_strndup (yyextra, yytext, strlen (yytext) - 1); + return REPLACEMENT; } -{HSPACE}+ - {IDENTIFIER} { - LEXIFY_IDENTIFIER; + int parameter_index; + yylval.str = xtalloc_strdup (yyextra, yytext); + switch (glcpp_parser_classify_token (yyextra, yylval.str, + ¶meter_index)) + { + case TOKEN_CLASS_ARGUMENT: + talloc_free (yylval.str); + /* We don't return a value here since the + * current token will be replaced by new + * tokens. */ + glcpp_parser_push_expansion_argument (yyextra, + parameter_index); + break; + case TOKEN_CLASS_IDENTIFIER: + return IDENTIFIER; + break; + case TOKEN_CLASS_FUNC_MACRO: + return FUNC_MACRO; + break; + case TOKEN_CLASS_OBJ_MACRO: + return OBJ_MACRO; + break; + + } } [(),] { @@ -153,4 +161,54 @@ TOKEN [^[:space:](),]+ {HSPACE}+ +<> { + int done; + + done = glcpp_lex_stack_pop (yyextra->lex_stack); + + if (done) + yyterminate (); + + glcpp_parser_pop_expansion (yyextra); +} + %% + +void +glcpp_lex_stack_push (glcpp_lex_stack_t *stack, const char *string) +{ + struct yyguts_t *yyg = (struct yyguts_t*) stack->parser->scanner; + glcpp_lex_node_t *node; + + /* Save the current buffer on the top of the stack. */ + node = xtalloc (stack, glcpp_lex_node_t); + node->buffer = YY_CURRENT_BUFFER; + + node->next = stack->head; + stack->head = node; + + /* Then switch to a new scan buffer for string. */ + yy_scan_string (string, stack->parser->scanner); +} + +int +glcpp_lex_stack_pop (glcpp_lex_stack_t *stack) +{ + struct yyguts_t *yyg = (struct yyguts_t*) stack->parser->scanner; + glcpp_lex_node_t *node; + + node = stack->head; + + if (node == NULL) + return 1; + + stack->head = node->next; + + yy_delete_buffer (YY_CURRENT_BUFFER, stack->parser->scanner); + yy_switch_to_buffer ((YY_BUFFER_STATE) node->buffer, + stack->parser->scanner); + + talloc_free (node); + + return 0; +} diff --git a/glcpp-parse.y b/glcpp-parse.y index b2eaa5ba69..9f1075aa50 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -25,41 +25,29 @@ #include #include #include -#include #include "glcpp.h" #define YYLEX_PARAM parser->scanner -typedef struct { - int is_function; - string_list_t *parameters; - string_list_t *replacements; -} macro_t; - -struct glcpp_parser { - yyscan_t scanner; - struct hash_table *defines; -}; - void yyerror (void *scanner, const char *error); void _define_object_macro (glcpp_parser_t *parser, const char *macro, - string_list_t *replacements); + const char *replacement); void _define_function_macro (glcpp_parser_t *parser, const char *macro, string_list_t *parameters, - string_list_t *replacements); + const char *replacement); -string_list_t * +void _expand_object_macro (glcpp_parser_t *parser, const char *identifier); -string_list_t * +void _expand_function_macro (glcpp_parser_t *parser, const char *identifier, argument_list_t *arguments); @@ -76,12 +64,6 @@ _string_list_append_item (string_list_t *list, const char *str); void _string_list_append_list (string_list_t *list, string_list_t *tail); -void -_string_list_push (string_list_t *list, const char *str); - -void -_string_list_pop (string_list_t *list); - int _string_list_contains (string_list_t *list, const char *member, int *index); @@ -111,9 +93,9 @@ _argument_list_member_at (argument_list_t *list, int index); %parse-param {glcpp_parser_t *parser} %lex-param {void *scanner} -%token DEFINE FUNC_MACRO IDENTIFIER NEWLINE OBJ_MACRO SPACE TOKEN UNDEF -%type FUNC_MACRO IDENTIFIER identifier_perhaps_macro OBJ_MACRO replacement_word TOKEN word -%type argument macro parameter_list replacement_list +%token DEFINE FUNC_MACRO IDENTIFIER NEWLINE OBJ_MACRO REPLACEMENT TOKEN UNDEF +%type FUNC_MACRO IDENTIFIER OBJ_MACRO REPLACEMENT TOKEN word +%type argument macro parameter_list %type argument_list /* Hard to remove shift/reduce conflicts documented as follows: @@ -145,21 +127,21 @@ content: printf ("%s", $1); talloc_free ($1); } -| macro { - _print_string_list ($1); +| directive { + printf ("\n"); } -| directive_with_newline { printf ("\n"); } | '(' { printf ("("); } | ')' { printf (")"); } | ',' { printf (","); } +| macro ; macro: FUNC_MACRO '(' argument_list ')' { - $$ = _expand_function_macro (parser, $1, $3); + _expand_function_macro (parser, $1, $3); } | OBJ_MACRO { - $$ = _expand_object_macro (parser, $1); + _expand_object_macro (parser, $1); talloc_free ($1); } ; @@ -184,7 +166,7 @@ argument: _string_list_append_item ($$, $1); } | macro { - $$ = $1; + $$ = _string_list_create (parser); } | argument word { _string_list_append_item ($1, $2); @@ -199,93 +181,42 @@ argument: } ; -directive_with_newline: - directive NEWLINE -; - directive: - DEFINE IDENTIFIER { - string_list_t *list = _string_list_create (parser); - _define_object_macro (parser, $2, list); + DEFINE IDENTIFIER REPLACEMENT { + _define_object_macro (parser, $2, $3); } -| DEFINE IDENTIFIER SPACE replacement_list { - _define_object_macro (parser, $2, $4); - } -| DEFINE IDENTIFIER '(' parameter_list ')' { - string_list_t *list = _string_list_create (parser); - _define_function_macro (parser, $2, $4, list); - } -| DEFINE IDENTIFIER '(' parameter_list ')' replacement_list { +| DEFINE IDENTIFIER '(' parameter_list ')' REPLACEMENT { _define_function_macro (parser, $2, $4, $6); } -| UNDEF FUNC_MACRO { - string_list_t *replacement = hash_table_find (parser->defines, $2); - if (replacement) { - /* XXX: Need hash table to support a real way - * to remove an element rather than prefixing - * a new node with data of NULL like this. */ - hash_table_insert (parser->defines, NULL, $2); - talloc_free (replacement); - } - talloc_free ($2); - } -| UNDEF OBJ_MACRO { - string_list_t *replacement = hash_table_find (parser->defines, $2); - if (replacement) { +| UNDEF IDENTIFIER { + string_list_t *macro = hash_table_find (parser->defines, $2); + if (macro) { /* XXX: Need hash table to support a real way * to remove an element rather than prefixing * a new node with data of NULL like this. */ hash_table_insert (parser->defines, NULL, $2); - talloc_free (replacement); + talloc_free (macro); } talloc_free ($2); } ; -replacement_list: - replacement_word { - $$ = _string_list_create (parser); - _string_list_append_item ($$, $1); - talloc_free ($1); - } -| replacement_list replacement_word { - _string_list_append_item ($1, $2); - talloc_free ($2); - $$ = $1; - } -; - -replacement_word: - word { $$ = $1; } -| FUNC_MACRO { $$ = $1; } -| OBJ_MACRO { $$ = $1; } -| '(' { $$ = xtalloc_strdup (parser, "("); } -| ')' { $$ = xtalloc_strdup (parser, ")"); } -| ',' { $$ = xtalloc_strdup (parser, ","); } -; - parameter_list: /* empty */ { $$ = _string_list_create (parser); } -| identifier_perhaps_macro { +| IDENTIFIER { $$ = _string_list_create (parser); _string_list_append_item ($$, $1); talloc_free ($1); } -| parameter_list ',' identifier_perhaps_macro { +| parameter_list ',' IDENTIFIER { _string_list_append_item ($1, $3); talloc_free ($3); $$ = $1; } ; -identifier_perhaps_macro: - IDENTIFIER { $$ = $1; } -| FUNC_MACRO { $$ = $1; } -| OBJ_MACRO { $$ = $1; } -; - word: IDENTIFIER { $$ = $1; } | TOKEN { $$ = $1; } @@ -336,45 +267,6 @@ _string_list_append_item (string_list_t *list, const char *str) list->tail = node; } -void -_string_list_push (string_list_t *list, const char *str) -{ - string_node_t *node; - - node = xtalloc (list, string_node_t); - node->str = xtalloc_strdup (node, str); - - node->next = list->head; - - if (list->tail == NULL) { - list->tail = node; - } - - list->head = node; -} - -void -_string_list_pop (string_list_t *list) -{ - string_node_t *node; - - node = list->head; - - if (node == NULL) { - fprintf (stderr, "Internal error: _string_list_pop called on an empty list.\n"); - exit (1); - } - - list->head = node->next; - - if (list->tail == node) { - assert (node->next == NULL); - list->tail = NULL; - } - - talloc_free (node); -} - int _string_list_contains (string_list_t *list, const char *member, int *index) { @@ -512,6 +404,11 @@ glcpp_parser_create (void) yylex_init_extra (parser, &parser->scanner); parser->defines = hash_table_ctor (32, hash_table_string_hash, hash_table_string_compare); + parser->expansions = NULL; + + parser->lex_stack = xtalloc (parser, glcpp_lex_stack_t); + parser->lex_stack->parser = parser; + parser->lex_stack->head = NULL; return parser; } @@ -530,26 +427,46 @@ glcpp_parser_destroy (glcpp_parser_t *parser) talloc_free (parser); } -macro_type_t -glcpp_parser_macro_type (glcpp_parser_t *parser, const char *identifier) +token_class_t +glcpp_parser_classify_token (glcpp_parser_t *parser, + const char *identifier, + int *parameter_index) { macro_t *macro; + /* First we check if we are currently expanding a + * function-like macro, and if so, whether the parameter list + * contains a parameter matching this token name. */ + if (parser->expansions && + parser->expansions->macro && + parser->expansions->macro->parameters) + { + string_list_t *list; + + list = parser->expansions->macro->parameters; + + if (_string_list_contains (list, identifier, parameter_index)) + return TOKEN_CLASS_ARGUMENT; + } + + /* If not a function-like macro parameter, we next check if + * this token is a macro itself. */ + macro = hash_table_find (parser->defines, identifier); if (macro == NULL) - return MACRO_TYPE_UNDEFINED; + return TOKEN_CLASS_IDENTIFIER; if (macro->is_function) - return MACRO_TYPE_FUNCTION; + return TOKEN_CLASS_FUNC_MACRO; else - return MACRO_TYPE_OBJECT; + return TOKEN_CLASS_OBJ_MACRO; } void _define_object_macro (glcpp_parser_t *parser, const char *identifier, - string_list_t *replacements) + const char *replacement) { macro_t *macro; @@ -557,7 +474,8 @@ _define_object_macro (glcpp_parser_t *parser, macro->is_function = 0; macro->parameters = NULL; - macro->replacements = talloc_steal (macro, replacements); + macro->identifier = talloc_strdup (macro, identifier); + macro->replacement = talloc_steal (macro, replacement); hash_table_insert (parser->defines, macro, identifier); } @@ -566,7 +484,7 @@ void _define_function_macro (glcpp_parser_t *parser, const char *identifier, string_list_t *parameters, - string_list_t *replacements) + const char *replacement) { macro_t *macro; @@ -574,101 +492,126 @@ _define_function_macro (glcpp_parser_t *parser, macro->is_function = 1; macro->parameters = talloc_steal (macro, parameters); - macro->replacements = talloc_steal (macro, replacements); + macro->identifier = talloc_strdup (macro, identifier); + macro->replacement = talloc_steal (macro, replacement); hash_table_insert (parser->defines, macro, identifier); } -static string_list_t * -_expand_macro_recursive (glcpp_parser_t *parser, - const char *token, - string_list_t *active, - string_list_t *parameters, - argument_list_t *arguments); - -static string_list_t * -_expand_string_list_recursive (glcpp_parser_t *parser, - string_list_t *list, - string_list_t *active, - string_list_t *parameters, - argument_list_t *arguments) +static void +_glcpp_parser_push_expansion_internal (glcpp_parser_t *parser, + macro_t *macro, + argument_list_t *arguments, + const char * replacement) +{ + expansion_node_t *node; + + node = xtalloc (parser, expansion_node_t); + + node->macro = macro; + node->arguments = arguments; + + node->next = parser->expansions; + parser->expansions = node; + + glcpp_lex_stack_push (parser->lex_stack, replacement); +} + +void +glcpp_parser_push_expansion_macro (glcpp_parser_t *parser, + macro_t *macro, + argument_list_t *arguments) +{ + _glcpp_parser_push_expansion_internal (parser, macro, arguments, + macro->replacement); +} + +void +glcpp_parser_push_expansion_argument (glcpp_parser_t *parser, + int argument_index) { - string_list_t *result; - string_list_t *child; - const char *token; + argument_list_t *arguments; + string_list_t *argument; string_node_t *node; - int index; + char *argument_str, *s; + int length; - result = _string_list_create (parser); + arguments = parser->expansions->arguments; - for (node = list->head ; node ; node = node->next) { - token = node->str; + argument = _argument_list_member_at (arguments, argument_index); - /* Don't expand this macro if it's on the active - * stack, (meaning we're already in the process of - * expanding it). */ - if (_string_list_contains (active, token, NULL)) { - _string_list_append_item (result, token); - continue; - } + length = 0; + for (node = argument->head; node; node = node->next) + length += strlen (node->str) + 1; + + argument_str = xtalloc_size (parser, length); - if (_string_list_contains (parameters, token, &index)) { - string_list_t *argument; - - argument = _argument_list_member_at (arguments, index); - child = _expand_string_list_recursive (parser, argument, - active, NULL, NULL); - _string_list_append_list (result, child); - } else { - child = _expand_macro_recursive (parser, token, - active, parameters, - arguments); - _string_list_append_list (result, child); + *argument_str = '\0'; + s = argument_str; + for (node = argument->head; node; node = node->next) { + strcpy (s, node->str); + s += strlen (node->str); + if (node->next) { + *s = ' '; + s++; + *s = '\0'; } } - return result; + _glcpp_parser_push_expansion_internal (parser, NULL, NULL, + argument_str); } - -static string_list_t * -_expand_macro_recursive (glcpp_parser_t *parser, - const char *token, - string_list_t *active, - string_list_t *parameters, - argument_list_t *arguments) +/* The lexer calls this when it exhausts a string. */ +void +glcpp_parser_pop_expansion (glcpp_parser_t *parser) { - macro_t *macro; - string_list_t *replacements; - string_list_t *result; + expansion_node_t *node; + + node = parser->expansions; - if (active == NULL) - active = _string_list_create (NULL); + if (node == NULL) { + fprintf (stderr, "Internal error: _expansion_list_pop called on an empty list.\n"); + exit (1); + } - _string_list_push (active, token); + parser->expansions = node->next; - macro = hash_table_find (parser->defines, token); - if (macro == NULL) { - string_list_t *result; + talloc_free (node); +} - result = _string_list_create (parser); - _string_list_append_item (result, token); - return result; - } +int +glcpp_parser_is_expanding (glcpp_parser_t *parser, const char *member) +{ + expansion_node_t *node; - replacements = macro->replacements; + for (node = parser->expansions; node; node = node->next) { + if (node->macro && + strcmp (node->macro->identifier, member) == 0) + { + return 1; + } + } - result = _expand_string_list_recursive (parser, replacements, - active, parameters, arguments); + return 0; +} - _string_list_pop (active); - if (_string_list_length (active) == 0) - talloc_free (active); +static void +_expand_macro (glcpp_parser_t *parser, + const char *token, + macro_t *macro, + argument_list_t *arguments) +{ + /* Don't recurse if we're already actively expanding this token. */ + if (glcpp_parser_is_expanding (parser, token)) { + printf ("%s", token); + return; + } - return result; + glcpp_parser_push_expansion_macro (parser, macro, arguments); } -string_list_t * +void _expand_object_macro (glcpp_parser_t *parser, const char *identifier) { macro_t *macro; @@ -676,11 +619,10 @@ _expand_object_macro (glcpp_parser_t *parser, const char *identifier) macro = hash_table_find (parser->defines, identifier); assert (! macro->is_function); - return _expand_macro_recursive (parser, identifier, NULL, - NULL, NULL); + _expand_macro (parser, identifier, macro, NULL); } -string_list_t * +void _expand_function_macro (glcpp_parser_t *parser, const char *identifier, argument_list_t *arguments) @@ -698,9 +640,8 @@ _expand_function_macro (glcpp_parser_t *parser, identifier, _argument_list_length (arguments), _string_list_length (macro->parameters)); - return NULL; + return; } - return _expand_macro_recursive (parser, identifier, NULL, - macro->parameters, arguments); + _expand_macro (parser, identifier, macro, arguments); } diff --git a/glcpp.h b/glcpp.h index 7966a2a3d2..81f7d14c5b 100644 --- a/glcpp.h +++ b/glcpp.h @@ -24,11 +24,13 @@ #ifndef GLCPP_H #define GLCPP_H +#include + #include "hash_table.h" #define yyscan_t void* -/* Some data types used for parser value. */ +/* Some data types used for parser values. */ typedef struct string_node { const char *str; @@ -52,6 +54,56 @@ typedef struct argument_list { typedef struct glcpp_parser glcpp_parser_t; +/* Support for temporarily lexing/parsing tokens from a string. */ + +typedef struct glcpp_lex_node { + void *buffer; + struct glcpp_lex_node *next; +} glcpp_lex_node_t; + +typedef struct { + glcpp_parser_t *parser; + glcpp_lex_node_t *head; +} glcpp_lex_stack_t; + +void +glcpp_lex_stack_push (glcpp_lex_stack_t *stack, const char *string); + +int +glcpp_lex_stack_pop (glcpp_lex_stack_t *stack); + +typedef enum { + TOKEN_CLASS_ARGUMENT, + TOKEN_CLASS_IDENTIFIER, + TOKEN_CLASS_FUNC_MACRO, + TOKEN_CLASS_OBJ_MACRO +} token_class_t; + +token_class_t +glcpp_parser_classify_token (glcpp_parser_t *parser, + const char *identifier, + int *parameter_index); + +typedef struct { + int is_function; + string_list_t *parameters; + const char *identifier; + const char *replacement; +} macro_t; + +typedef struct expansion_node { + macro_t *macro; + argument_list_t *arguments; + struct expansion_node *next; +} expansion_node_t; + +struct glcpp_parser { + yyscan_t scanner; + struct hash_table *defines; + expansion_node_t *expansions; + glcpp_lex_stack_t *lex_stack; +}; + glcpp_parser_t * glcpp_parser_create (void); @@ -61,15 +113,17 @@ glcpp_parser_parse (glcpp_parser_t *parser); void glcpp_parser_destroy (glcpp_parser_t *parser); -typedef enum { - MACRO_TYPE_UNDEFINED, - MACRO_TYPE_OBJECT, - MACRO_TYPE_FUNCTION -} macro_type_t; +void +glcpp_parser_push_expansion_macro (glcpp_parser_t *parser, + macro_t *macro, + argument_list_t *arguments); -macro_type_t -glcpp_parser_macro_type (glcpp_parser_t *parser, - const char *identifier); +void +glcpp_parser_push_expansion_argument (glcpp_parser_t *parser, + int argument_index); + +void +glcpp_parser_pop_expansion (glcpp_parser_t *parser); /* Generated by glcpp-lex.l to glcpp-lex.c */ @@ -91,10 +145,15 @@ yyparse (glcpp_parser_t *parser); #define xtalloc(ctx, type) (type *)xtalloc_named_const(ctx, sizeof(type), #type) +#define xtalloc_size(ctx, size) xtalloc_named_const(ctx, size, __location__) + void * xtalloc_named_const (const void *context, size_t size, const char *name); char * xtalloc_strdup (const void *t, const char *p); +char * +xtalloc_strndup (const void *t, const char *p, size_t n); + #endif diff --git a/xtalloc.c b/xtalloc.c index 849e12d349..d9893ae889 100644 --- a/xtalloc.c +++ b/xtalloc.c @@ -50,3 +50,17 @@ xtalloc_strdup (const void *t, const char *p) return ret; } + +char * +xtalloc_strndup (const void *t, const char *p, size_t n) +{ + char *ret; + + ret = talloc_strndup (t, p, n); + if (ret == NULL) { + fprintf (stderr, "Out of memory.\n"); + exit (1); + } + + return ret; +} -- cgit v1.2.3 From be0e2e9b2ada51be66afb6b44330acb44e0261f2 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 19 May 2010 07:29:22 -0700 Subject: Fix bug (and add tests) for a function-like macro defined as itself. This case worked previously, but broke in the recent rewrite of function- like macro expansion. The recursion was still terminated correctly, but any parenthesized expression after the macro name was still being swallowed even though the identifier was not being expanded as a macro. The fix is to notice earlier that the identifier is an already-expanding macro. We let the lexer know this through the classify_token function so that an already-expanding macro is lexed as an identifier, not a FUNC_MACRO. --- glcpp-parse.y | 59 ++++++++++++++++-------------------- tests/032-define-func-self-recurse.c | 2 ++ tests/033-define-func-self-compose.c | 2 ++ 3 files changed, 30 insertions(+), 33 deletions(-) create mode 100644 tests/032-define-func-self-recurse.c create mode 100644 tests/033-define-func-self-compose.c diff --git a/glcpp-parse.y b/glcpp-parse.y index 9f1075aa50..8dc07483c1 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -427,6 +427,22 @@ glcpp_parser_destroy (glcpp_parser_t *parser) talloc_free (parser); } +static int +glcpp_parser_is_expanding (glcpp_parser_t *parser, const char *member) +{ + expansion_node_t *node; + + for (node = parser->expansions; node; node = node->next) { + if (node->macro && + strcmp (node->macro->identifier, member) == 0) + { + return 1; + } + } + + return 0; +} + token_class_t glcpp_parser_classify_token (glcpp_parser_t *parser, const char *identifier, @@ -457,6 +473,12 @@ glcpp_parser_classify_token (glcpp_parser_t *parser, if (macro == NULL) return TOKEN_CLASS_IDENTIFIER; + /* Don't consider this a macro if we are already actively + * expanding this macro. */ + if (glcpp_parser_is_expanding (parser, identifier)) + return TOKEN_CLASS_IDENTIFIER; + + /* Definitely a macro. Just need to check if it's function-like. */ if (macro->is_function) return TOKEN_CLASS_FUNC_MACRO; else @@ -580,37 +602,6 @@ glcpp_parser_pop_expansion (glcpp_parser_t *parser) talloc_free (node); } -int -glcpp_parser_is_expanding (glcpp_parser_t *parser, const char *member) -{ - expansion_node_t *node; - - for (node = parser->expansions; node; node = node->next) { - if (node->macro && - strcmp (node->macro->identifier, member) == 0) - { - return 1; - } - } - - return 0; -} - -static void -_expand_macro (glcpp_parser_t *parser, - const char *token, - macro_t *macro, - argument_list_t *arguments) -{ - /* Don't recurse if we're already actively expanding this token. */ - if (glcpp_parser_is_expanding (parser, token)) { - printf ("%s", token); - return; - } - - glcpp_parser_push_expansion_macro (parser, macro, arguments); -} - void _expand_object_macro (glcpp_parser_t *parser, const char *identifier) { @@ -618,8 +609,9 @@ _expand_object_macro (glcpp_parser_t *parser, const char *identifier) macro = hash_table_find (parser->defines, identifier); assert (! macro->is_function); + assert (! glcpp_parser_is_expanding (parser, identifier)); - _expand_macro (parser, identifier, macro, NULL); + glcpp_parser_push_expansion_macro (parser, macro, NULL); } void @@ -631,6 +623,7 @@ _expand_function_macro (glcpp_parser_t *parser, macro = hash_table_find (parser->defines, identifier); assert (macro->is_function); + assert (! glcpp_parser_is_expanding (parser, identifier)); if (_argument_list_length (arguments) != _string_list_length (macro->parameters)) @@ -643,5 +636,5 @@ _expand_function_macro (glcpp_parser_t *parser, return; } - _expand_macro (parser, identifier, macro, arguments); + glcpp_parser_push_expansion_macro (parser, macro, arguments); } diff --git a/tests/032-define-func-self-recurse.c b/tests/032-define-func-self-recurse.c new file mode 100644 index 0000000000..60d8526c0a --- /dev/null +++ b/tests/032-define-func-self-recurse.c @@ -0,0 +1,2 @@ +#define foo(a) foo(2 * (a)) +foo(3) diff --git a/tests/033-define-func-self-compose.c b/tests/033-define-func-self-compose.c new file mode 100644 index 0000000000..8abaaf6be9 --- /dev/null +++ b/tests/033-define-func-self-compose.c @@ -0,0 +1,2 @@ +#define foo(a) foo(2 * (a)) +foo(foo(3)) -- cgit v1.2.3 From 69f390d6096c597dbe63f20fd02b2312da211de8 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 19 May 2010 07:42:42 -0700 Subject: Fix bug (and test) for an invocation using macro name as a non-macro argument This adds a second shift/reduce conflict to our grammar. It's basically the same conflict we had previously, (deciding to shift a '(' after a FUNC_MACRO) but this time in the "argument" context rather than the "content" context. It would be nice to not have these, but I think they are unavoidable (withotu a lot of pain at least) given the preprocessor specification. --- glcpp-parse.y | 10 +++++++++- tests/034-define-func-self-compose-non-func.c | 2 ++ 2 files changed, 11 insertions(+), 1 deletion(-) create mode 100644 tests/034-define-func-self-compose-non-func.c diff --git a/glcpp-parse.y b/glcpp-parse.y index 8dc07483c1..ea27184c47 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -103,8 +103,12 @@ _argument_list_member_at (argument_list_t *list, int index); * 1. '(' after FUNC_MACRO name which is correctly resolved to shift * to form macro invocation rather than reducing directly to * content. + * + * 2. Similarly, '(' after FUNC_MACRO which is correctly resolved to + * shift to form macro invocation rather than reducing directly to + * argument. */ -%expect 1 +%expect 2 %% @@ -168,6 +172,10 @@ argument: | macro { $$ = _string_list_create (parser); } +| FUNC_MACRO { + $$ = _string_list_create (parser); + _string_list_append_item ($$, $1); + } | argument word { _string_list_append_item ($1, $2); talloc_free ($2); diff --git a/tests/034-define-func-self-compose-non-func.c b/tests/034-define-func-self-compose-non-func.c new file mode 100644 index 0000000000..209a5f7e07 --- /dev/null +++ b/tests/034-define-func-self-compose-non-func.c @@ -0,0 +1,2 @@ +#define foo(bar) bar +foo(foo) -- cgit v1.2.3 From 59ca98990f814926d716a13b0201c94945133824 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 19 May 2010 07:49:47 -0700 Subject: Fix bug as in previous fix, but with multi-token argument. The previous fix added FUNC_MACRO to a production one higher in teh grammar than it should have. So it prevented a FUNC_MACRO from appearing as part of a mutli-token argument rather than just alone as an argument. Fix this (and add a test). --- glcpp-parse.y | 22 ++++++++++------------ ...nc-self-compose-non-func-multi-token-argument.c | 2 ++ 2 files changed, 12 insertions(+), 12 deletions(-) create mode 100644 tests/035-define-func-self-compose-non-func-multi-token-argument.c diff --git a/glcpp-parse.y b/glcpp-parse.y index ea27184c47..400f138d17 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -94,7 +94,7 @@ _argument_list_member_at (argument_list_t *list, int index); %lex-param {void *scanner} %token DEFINE FUNC_MACRO IDENTIFIER NEWLINE OBJ_MACRO REPLACEMENT TOKEN UNDEF -%type FUNC_MACRO IDENTIFIER OBJ_MACRO REPLACEMENT TOKEN word +%type argument_word FUNC_MACRO IDENTIFIER OBJ_MACRO REPLACEMENT TOKEN %type argument macro parameter_list %type argument_list @@ -165,18 +165,14 @@ argument_list: ; argument: - word { + argument_word { $$ = _string_list_create (parser); _string_list_append_item ($$, $1); } | macro { $$ = _string_list_create (parser); } -| FUNC_MACRO { - $$ = _string_list_create (parser); - _string_list_append_item ($$, $1); - } -| argument word { +| argument argument_word { _string_list_append_item ($1, $2); talloc_free ($2); $$ = $1; @@ -189,6 +185,13 @@ argument: } ; +argument_word: + IDENTIFIER { $$ = $1; } +| TOKEN { $$ = $1; } +| FUNC_MACRO { $$ = $1; } +; + + directive: DEFINE IDENTIFIER REPLACEMENT { _define_object_macro (parser, $2, $3); @@ -225,11 +228,6 @@ parameter_list: } ; -word: - IDENTIFIER { $$ = $1; } -| TOKEN { $$ = $1; } -; - %% string_list_t * diff --git a/tests/035-define-func-self-compose-non-func-multi-token-argument.c b/tests/035-define-func-self-compose-non-func-multi-token-argument.c new file mode 100644 index 0000000000..9955219470 --- /dev/null +++ b/tests/035-define-func-self-compose-non-func-multi-token-argument.c @@ -0,0 +1,2 @@ +#define foo(bar) bar +foo(1 + foo) -- cgit v1.2.3 From 5d2114254592e03b6d554c5e2eea4ea442c3fa05 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 19 May 2010 07:57:03 -0700 Subject: Like previous fix, but for object-like macros (and add a test). The support for an object-like amcro within a macro-invocation argument was also implemented at one level too high in the grammar. Fortunately, this is a very simple fix. --- glcpp-parse.y | 4 +--- tests/036-define-func-non-macro-multi-token-argument.c | 3 +++ 2 files changed, 4 insertions(+), 3 deletions(-) create mode 100644 tests/036-define-func-non-macro-multi-token-argument.c diff --git a/glcpp-parse.y b/glcpp-parse.y index 400f138d17..647532f209 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -169,9 +169,6 @@ argument: $$ = _string_list_create (parser); _string_list_append_item ($$, $1); } -| macro { - $$ = _string_list_create (parser); - } | argument argument_word { _string_list_append_item ($1, $2); talloc_free ($2); @@ -189,6 +186,7 @@ argument_word: IDENTIFIER { $$ = $1; } | TOKEN { $$ = $1; } | FUNC_MACRO { $$ = $1; } +| macro { $$ = xtalloc_strdup (parser, ""); } ; diff --git a/tests/036-define-func-non-macro-multi-token-argument.c b/tests/036-define-func-non-macro-multi-token-argument.c new file mode 100644 index 0000000000..b21ff33673 --- /dev/null +++ b/tests/036-define-func-non-macro-multi-token-argument.c @@ -0,0 +1,3 @@ +#define bar success +#define foo(x) x +foo(more bar) -- cgit v1.2.3 From 8f38aff9b5dd42ef963532fe5fc618e8bafa218a Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 19 May 2010 10:01:29 -0700 Subject: Add a wrapper function around the lexer. We rename the generated lexer from yylex to glcpp_lex. Then we implement our own yylex function in glcpp-parse.y that calls glcpp_lex. This doesn't change the behavior at all yet, but gives us a place where we can do implement alternate lexing in the future. (We want this because instead of re-lexing from strings for macro expansion, we want to lex from pre-parsed token lists. We need this so that when we terminate recursion due to an already active macro expansion, we can ensure that that symbol never gets expanded again later.) --- Makefile | 2 +- glcpp-parse.y | 13 +++++++++++-- glcpp.h | 6 +++--- 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index 3fa863c49b..88116128f8 100644 --- a/Makefile +++ b/Makefile @@ -13,7 +13,7 @@ glcpp: glcpp.o glcpp-lex.o glcpp-parse.o hash_table.o xtalloc.o bison --debug --defines=$*.h --output=$*.c $^ %.c: %.l - flex --outfile=$@ $< + flex --prefix=glcpp_ --outfile=$@ $< glcpp-lex.c: glcpp-parse.h diff --git a/glcpp-parse.y b/glcpp-parse.y index 647532f209..6ef1cae0ec 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -82,6 +82,9 @@ _argument_list_length (argument_list_t *list); string_list_t * _argument_list_member_at (argument_list_t *list, int index); +static int +yylex (yyscan_t scanner); + %} %union { @@ -405,7 +408,7 @@ glcpp_parser_create (void) parser = xtalloc (NULL, glcpp_parser_t); - yylex_init_extra (parser, &parser->scanner); + glcpp_lex_init_extra (parser, &parser->scanner); parser->defines = hash_table_ctor (32, hash_table_string_hash, hash_table_string_compare); parser->expansions = NULL; @@ -426,7 +429,7 @@ glcpp_parser_parse (glcpp_parser_t *parser) void glcpp_parser_destroy (glcpp_parser_t *parser) { - yylex_destroy (parser->scanner); + glcpp_lex_destroy (parser->scanner); hash_table_dtor (parser->defines); talloc_free (parser); } @@ -642,3 +645,9 @@ _expand_function_macro (glcpp_parser_t *parser, glcpp_parser_push_expansion_macro (parser, macro, arguments); } + +static int +yylex (yyscan_t scanner) +{ + return glcpp_lex (scanner); +} diff --git a/glcpp.h b/glcpp.h index 81f7d14c5b..6aabf6f182 100644 --- a/glcpp.h +++ b/glcpp.h @@ -128,13 +128,13 @@ glcpp_parser_pop_expansion (glcpp_parser_t *parser); /* Generated by glcpp-lex.l to glcpp-lex.c */ int -yylex_init_extra (glcpp_parser_t *parser, yyscan_t* scanner); +glcpp_lex_init_extra (glcpp_parser_t *parser, yyscan_t* scanner); int -yylex (yyscan_t scanner); +glcpp_lex (yyscan_t scanner); int -yylex_destroy (yyscan_t scanner); +glcpp_lex_destroy (yyscan_t scanner); /* Generated by glcpp-parse.y to glcpp-parse.c */ -- cgit v1.2.3 From 0293b2e2dd81fabd3ecb71e036a99621801e1c94 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 19 May 2010 10:05:40 -0700 Subject: Rename yylex to glcpp_parser_lex and give it a glcpp_parser_t* argument. Much cleaner this way, (and now our custom lex function has access to all the parser state which it will need). --- glcpp-parse.y | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index 6ef1cae0ec..04bac00e9f 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -82,8 +82,10 @@ _argument_list_length (argument_list_t *list); string_list_t * _argument_list_member_at (argument_list_t *list, int index); +#define yylex glcpp_parser_lex + static int -yylex (yyscan_t scanner); +glcpp_parser_lex (glcpp_parser_t *parser); %} @@ -94,7 +96,7 @@ yylex (yyscan_t scanner); } %parse-param {glcpp_parser_t *parser} -%lex-param {void *scanner} +%lex-param {glcpp_parser_t *parser} %token DEFINE FUNC_MACRO IDENTIFIER NEWLINE OBJ_MACRO REPLACEMENT TOKEN UNDEF %type argument_word FUNC_MACRO IDENTIFIER OBJ_MACRO REPLACEMENT TOKEN @@ -647,7 +649,7 @@ _expand_function_macro (glcpp_parser_t *parser, } static int -yylex (yyscan_t scanner) +glcpp_parser_lex (glcpp_parser_t *parser) { - return glcpp_lex (scanner); + return glcpp_lex (parser->scanner); } -- cgit v1.2.3 From 66df1c262a0c816b28b21457fc499fadfcc0dbee Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 19 May 2010 10:06:56 -0700 Subject: Remove unneeded YYLEX_PARAM define. I'm not sure where this came from, but it's clearly not needed. --- glcpp-parse.y | 2 -- 1 file changed, 2 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index 04bac00e9f..bca22cec86 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -28,8 +28,6 @@ #include "glcpp.h" -#define YYLEX_PARAM parser->scanner - void yyerror (void *scanner, const char *error); -- cgit v1.2.3 From 71c59ec66bc258be6a641b26f793060f6d9522c8 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 19 May 2010 10:07:31 -0700 Subject: Remove unused NEWLINE token. We fixed the lexer a while back to never return a NEWLINE token, but negelcted to clean up this declaration. --- glcpp-parse.y | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index bca22cec86..bb57b30098 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -96,7 +96,7 @@ glcpp_parser_lex (glcpp_parser_t *parser); %parse-param {glcpp_parser_t *parser} %lex-param {glcpp_parser_t *parser} -%token DEFINE FUNC_MACRO IDENTIFIER NEWLINE OBJ_MACRO REPLACEMENT TOKEN UNDEF +%token DEFINE FUNC_MACRO IDENTIFIER OBJ_MACRO REPLACEMENT TOKEN UNDEF %type argument_word FUNC_MACRO IDENTIFIER OBJ_MACRO REPLACEMENT TOKEN %type argument macro parameter_list %type argument_list -- cgit v1.2.3 From aaa9acbf10b7a8e7dac061885ef95823ad27f80e Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 19 May 2010 13:28:24 -0700 Subject: Perform "re lexing" on string list values rathern than on text. Previously, we would pass original strings back to the original lexer whenever we needed to re-lex something, (such as an expanded macro or a macro argument). Now, we instead parse the macro or argument originally to a string list, and then re-lex by simply returning each string from this list in turn. We do this in the recently added glcpp_parser_lex function that sits on top of the lower-level glcpp_lex that only deals with text. This doesn't change any behavior (at least according to the existing test suite which all still passes) but it brings us much closer to being able to "finalize" an unexpanded macro as required by the specification. --- glcpp-lex.l | 71 +++++++--------------------- glcpp-parse.y | 148 ++++++++++++++++++++++++++++++++++++++++------------------ glcpp.h | 38 +++------------ 3 files changed, 124 insertions(+), 133 deletions(-) diff --git a/glcpp-lex.l b/glcpp-lex.l index 52be1b1ea4..aec967964b 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -84,12 +84,12 @@ TOKEN [^[:space:](),]+ \n { BEGIN INITIAL; - yylval.str = xtalloc_strdup (yyextra, ""); - return REPLACEMENT; + return NEWLINE; } {HSPACE}+ { BEGIN ST_DEFINE_VALUE; + return SPACE; } "(" { @@ -113,10 +113,21 @@ TOKEN [^[:space:](),]+ {HSPACE}+ -.*\n { +{TOKEN} { + yylval.str = xtalloc_strdup (yyextra, yytext); + return TOKEN; +} + +[(),] { + yylval.str = xtalloc_strdup (yyextra, yytext); + return TOKEN; +} + +{HSPACE}+ + +\n { BEGIN INITIAL; - yylval.str = xtalloc_strndup (yyextra, yytext, strlen (yytext) - 1); - return REPLACEMENT; + return NEWLINE; } {IDENTIFIER} { @@ -161,54 +172,4 @@ TOKEN [^[:space:](),]+ {HSPACE}+ -<> { - int done; - - done = glcpp_lex_stack_pop (yyextra->lex_stack); - - if (done) - yyterminate (); - - glcpp_parser_pop_expansion (yyextra); -} - %% - -void -glcpp_lex_stack_push (glcpp_lex_stack_t *stack, const char *string) -{ - struct yyguts_t *yyg = (struct yyguts_t*) stack->parser->scanner; - glcpp_lex_node_t *node; - - /* Save the current buffer on the top of the stack. */ - node = xtalloc (stack, glcpp_lex_node_t); - node->buffer = YY_CURRENT_BUFFER; - - node->next = stack->head; - stack->head = node; - - /* Then switch to a new scan buffer for string. */ - yy_scan_string (string, stack->parser->scanner); -} - -int -glcpp_lex_stack_pop (glcpp_lex_stack_t *stack) -{ - struct yyguts_t *yyg = (struct yyguts_t*) stack->parser->scanner; - glcpp_lex_node_t *node; - - node = stack->head; - - if (node == NULL) - return 1; - - stack->head = node->next; - - yy_delete_buffer (YY_CURRENT_BUFFER, stack->parser->scanner); - yy_switch_to_buffer ((YY_BUFFER_STATE) node->buffer, - stack->parser->scanner); - - talloc_free (node); - - return 0; -} diff --git a/glcpp-parse.y b/glcpp-parse.y index bb57b30098..2383c93117 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -34,13 +34,13 @@ yyerror (void *scanner, const char *error); void _define_object_macro (glcpp_parser_t *parser, const char *macro, - const char *replacement); + string_list_t *replacements); void _define_function_macro (glcpp_parser_t *parser, const char *macro, string_list_t *parameters, - const char *replacement); + string_list_t *replacements); void _expand_object_macro (glcpp_parser_t *parser, const char *identifier); @@ -80,6 +80,14 @@ _argument_list_length (argument_list_t *list); string_list_t * _argument_list_member_at (argument_list_t *list, int index); +static void +glcpp_parser_push_expansion_macro (glcpp_parser_t *parser, + macro_t *macro, + argument_list_t *arguments); + +static void +glcpp_parser_pop_expansion (glcpp_parser_t *parser); + #define yylex glcpp_parser_lex static int @@ -96,9 +104,9 @@ glcpp_parser_lex (glcpp_parser_t *parser); %parse-param {glcpp_parser_t *parser} %lex-param {glcpp_parser_t *parser} -%token DEFINE FUNC_MACRO IDENTIFIER OBJ_MACRO REPLACEMENT TOKEN UNDEF -%type argument_word FUNC_MACRO IDENTIFIER OBJ_MACRO REPLACEMENT TOKEN -%type argument macro parameter_list +%token DEFINE FUNC_MACRO IDENTIFIER OBJ_MACRO NEWLINE SPACE TOKEN UNDEF +%type argument_word FUNC_MACRO IDENTIFIER OBJ_MACRO TOKEN +%type argument macro parameter_list replacement_list pp_tokens %type argument_list /* Hard to remove shift/reduce conflicts documented as follows: @@ -194,10 +202,14 @@ argument_word: directive: - DEFINE IDENTIFIER REPLACEMENT { - _define_object_macro (parser, $2, $3); + DEFINE IDENTIFIER NEWLINE { + string_list_t *list = _string_list_create (parser); + _define_object_macro (parser, $2, list); } -| DEFINE IDENTIFIER '(' parameter_list ')' REPLACEMENT { +| DEFINE IDENTIFIER SPACE replacement_list NEWLINE { + _define_object_macro (parser, $2, $4); + } +| DEFINE IDENTIFIER '(' parameter_list ')' replacement_list NEWLINE { _define_function_macro (parser, $2, $4, $6); } | UNDEF IDENTIFIER { @@ -229,6 +241,27 @@ parameter_list: } ; +replacement_list: + /* empty */ { + $$ = _string_list_create (parser); + } +| pp_tokens { + $$ = $1; + } +; + + +pp_tokens: + TOKEN { + $$ = _string_list_create (parser); + _string_list_append_item ($$, $1); + } +| pp_tokens TOKEN { + _string_list_append_item ($1, $2); + $$ = $1; + } +; + %% string_list_t * @@ -413,10 +446,6 @@ glcpp_parser_create (void) hash_table_string_compare); parser->expansions = NULL; - parser->lex_stack = xtalloc (parser, glcpp_lex_stack_t); - parser->lex_stack->parser = parser; - parser->lex_stack->head = NULL; - return parser; } @@ -495,7 +524,7 @@ glcpp_parser_classify_token (glcpp_parser_t *parser, void _define_object_macro (glcpp_parser_t *parser, const char *identifier, - const char *replacement) + string_list_t *replacements) { macro_t *macro; @@ -504,7 +533,7 @@ _define_object_macro (glcpp_parser_t *parser, macro->is_function = 0; macro->parameters = NULL; macro->identifier = talloc_strdup (macro, identifier); - macro->replacement = talloc_steal (macro, replacement); + macro->replacements = talloc_steal (macro, replacements); hash_table_insert (parser->defines, macro, identifier); } @@ -513,7 +542,7 @@ void _define_function_macro (glcpp_parser_t *parser, const char *identifier, string_list_t *parameters, - const char *replacement) + string_list_t *replacements) { macro_t *macro; @@ -522,7 +551,7 @@ _define_function_macro (glcpp_parser_t *parser, macro->is_function = 1; macro->parameters = talloc_steal (macro, parameters); macro->identifier = talloc_strdup (macro, identifier); - macro->replacement = talloc_steal (macro, replacement); + macro->replacements = talloc_steal (macro, replacements); hash_table_insert (parser->defines, macro, identifier); } @@ -531,7 +560,7 @@ static void _glcpp_parser_push_expansion_internal (glcpp_parser_t *parser, macro_t *macro, argument_list_t *arguments, - const char * replacement) + string_node_t *replacements) { expansion_node_t *node; @@ -539,20 +568,19 @@ _glcpp_parser_push_expansion_internal (glcpp_parser_t *parser, node->macro = macro; node->arguments = arguments; + node->replacements = replacements; node->next = parser->expansions; parser->expansions = node; - - glcpp_lex_stack_push (parser->lex_stack, replacement); } -void +static void glcpp_parser_push_expansion_macro (glcpp_parser_t *parser, macro_t *macro, argument_list_t *arguments) { _glcpp_parser_push_expansion_internal (parser, macro, arguments, - macro->replacement); + macro->replacements->head); } void @@ -561,38 +589,16 @@ glcpp_parser_push_expansion_argument (glcpp_parser_t *parser, { argument_list_t *arguments; string_list_t *argument; - string_node_t *node; - char *argument_str, *s; - int length; arguments = parser->expansions->arguments; argument = _argument_list_member_at (arguments, argument_index); - length = 0; - for (node = argument->head; node; node = node->next) - length += strlen (node->str) + 1; - - argument_str = xtalloc_size (parser, length); - - *argument_str = '\0'; - s = argument_str; - for (node = argument->head; node; node = node->next) { - strcpy (s, node->str); - s += strlen (node->str); - if (node->next) { - *s = ' '; - s++; - *s = '\0'; - } - } - _glcpp_parser_push_expansion_internal (parser, NULL, NULL, - argument_str); + argument->head); } -/* The lexer calls this when it exhausts a string. */ -void +static void glcpp_parser_pop_expansion (glcpp_parser_t *parser) { expansion_node_t *node; @@ -649,5 +655,55 @@ _expand_function_macro (glcpp_parser_t *parser, static int glcpp_parser_lex (glcpp_parser_t *parser) { - return glcpp_lex (parser->scanner); + expansion_node_t *expansion; + string_node_t *replacements; + int parameter_index; + + /* Who says C can't do efficient tail recursion? */ + RECURSE: + + expansion = parser->expansions; + + if (expansion == NULL) + return glcpp_lex (parser->scanner); + + replacements = expansion->replacements; + + /* Pop expansion when replacements is exhausted. */ + if (replacements == NULL) { + glcpp_parser_pop_expansion (parser); + goto RECURSE; + } + + expansion->replacements = replacements->next; + + if (strcmp (replacements->str, "(") == 0) + return '('; + else if (strcmp (replacements->str, ")") == 0) + return ')'; + else if (strcmp (replacements->str, ",") == 0) + return ','; + + yylval.str = xtalloc_strdup (parser, replacements->str); + + switch (glcpp_parser_classify_token (parser, yylval.str, + ¶meter_index)) + { + case TOKEN_CLASS_ARGUMENT: + talloc_free (yylval.str); + glcpp_parser_push_expansion_argument (parser, + parameter_index); + goto RECURSE; + break; + case TOKEN_CLASS_IDENTIFIER: + return IDENTIFIER; + break; + case TOKEN_CLASS_FUNC_MACRO: + return FUNC_MACRO; + break; + default: + case TOKEN_CLASS_OBJ_MACRO: + return OBJ_MACRO; + break; + } } diff --git a/glcpp.h b/glcpp.h index 6aabf6f182..ef821a7637 100644 --- a/glcpp.h +++ b/glcpp.h @@ -54,24 +54,6 @@ typedef struct argument_list { typedef struct glcpp_parser glcpp_parser_t; -/* Support for temporarily lexing/parsing tokens from a string. */ - -typedef struct glcpp_lex_node { - void *buffer; - struct glcpp_lex_node *next; -} glcpp_lex_node_t; - -typedef struct { - glcpp_parser_t *parser; - glcpp_lex_node_t *head; -} glcpp_lex_stack_t; - -void -glcpp_lex_stack_push (glcpp_lex_stack_t *stack, const char *string); - -int -glcpp_lex_stack_pop (glcpp_lex_stack_t *stack); - typedef enum { TOKEN_CLASS_ARGUMENT, TOKEN_CLASS_IDENTIFIER, @@ -88,12 +70,13 @@ typedef struct { int is_function; string_list_t *parameters; const char *identifier; - const char *replacement; + string_list_t *replacements; } macro_t; typedef struct expansion_node { macro_t *macro; argument_list_t *arguments; + string_node_t *replacements; struct expansion_node *next; } expansion_node_t; @@ -101,9 +84,12 @@ struct glcpp_parser { yyscan_t scanner; struct hash_table *defines; expansion_node_t *expansions; - glcpp_lex_stack_t *lex_stack; }; +void +glcpp_parser_push_expansion_argument (glcpp_parser_t *parser, + int argument_index); + glcpp_parser_t * glcpp_parser_create (void); @@ -113,18 +99,6 @@ glcpp_parser_parse (glcpp_parser_t *parser); void glcpp_parser_destroy (glcpp_parser_t *parser); -void -glcpp_parser_push_expansion_macro (glcpp_parser_t *parser, - macro_t *macro, - argument_list_t *arguments); - -void -glcpp_parser_push_expansion_argument (glcpp_parser_t *parser, - int argument_index); - -void -glcpp_parser_pop_expansion (glcpp_parser_t *parser); - /* Generated by glcpp-lex.l to glcpp-lex.c */ int -- cgit v1.2.3 From 472524413d004680dbdb89602617f32da8f42f56 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 19 May 2010 13:54:37 -0700 Subject: Use new token_list_t rather than string_list_t for macro values. There's not yet any change in functionality here, (at least according to the test suite). But we now have the option of specifying a type for each string in the token list. This will allow us to finalize an unexpanded macro name so that it won't be subjected to excess expansion later. --- glcpp-parse.y | 111 +++++++++++++++++++++++++++++++++++++++++++--------------- glcpp.h | 17 +++++++-- 2 files changed, 97 insertions(+), 31 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index 2383c93117..c8d1919d9c 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -34,13 +34,13 @@ yyerror (void *scanner, const char *error); void _define_object_macro (glcpp_parser_t *parser, const char *macro, - string_list_t *replacements); + token_list_t *replacements); void _define_function_macro (glcpp_parser_t *parser, const char *macro, string_list_t *parameters, - string_list_t *replacements); + token_list_t *replacements); void _expand_object_macro (glcpp_parser_t *parser, const char *identifier); @@ -72,14 +72,23 @@ argument_list_t * _argument_list_create (void *ctx); void -_argument_list_append (argument_list_t *list, string_list_t *argument); +_argument_list_append (argument_list_t *list, token_list_t *argument); int _argument_list_length (argument_list_t *list); -string_list_t * +token_list_t * _argument_list_member_at (argument_list_t *list, int index); +token_list_t * +_token_list_create (void *ctx); + +void +_token_list_append (token_list_t *list, int type, const char *value); + +void +_token_list_append_list (token_list_t *list, token_list_t *tail); + static void glcpp_parser_push_expansion_macro (glcpp_parser_t *parser, macro_t *macro, @@ -97,8 +106,9 @@ glcpp_parser_lex (glcpp_parser_t *parser); %union { char *str; - string_list_t *string_list; argument_list_t *argument_list; + string_list_t *string_list; + token_list_t *token_list; } %parse-param {glcpp_parser_t *parser} @@ -106,8 +116,9 @@ glcpp_parser_lex (glcpp_parser_t *parser); %token DEFINE FUNC_MACRO IDENTIFIER OBJ_MACRO NEWLINE SPACE TOKEN UNDEF %type argument_word FUNC_MACRO IDENTIFIER OBJ_MACRO TOKEN -%type argument macro parameter_list replacement_list pp_tokens %type argument_list +%type macro parameter_list +%type argument replacement_list pp_tokens /* Hard to remove shift/reduce conflicts documented as follows: * @@ -177,18 +188,18 @@ argument_list: argument: argument_word { - $$ = _string_list_create (parser); - _string_list_append_item ($$, $1); + $$ = _token_list_create (parser); + _token_list_append ($$, IDENTIFIER, $1); } | argument argument_word { - _string_list_append_item ($1, $2); + _token_list_append ($1, IDENTIFIER, $2); talloc_free ($2); $$ = $1; } | argument '(' argument ')' { - _string_list_append_item ($1, "("); - _string_list_append_list ($1, $3); - _string_list_append_item ($1, ")"); + _token_list_append ($1, '(', "("); + _token_list_append_list ($1, $3); + _token_list_append ($1, ')', ")"); $$ = $1; } ; @@ -203,7 +214,7 @@ argument_word: directive: DEFINE IDENTIFIER NEWLINE { - string_list_t *list = _string_list_create (parser); + token_list_t *list = _token_list_create (parser); _define_object_macro (parser, $2, list); } | DEFINE IDENTIFIER SPACE replacement_list NEWLINE { @@ -243,7 +254,7 @@ parameter_list: replacement_list: /* empty */ { - $$ = _string_list_create (parser); + $$ = _token_list_create (parser); } | pp_tokens { $$ = $1; @@ -253,11 +264,11 @@ replacement_list: pp_tokens: TOKEN { - $$ = _string_list_create (parser); - _string_list_append_item ($$, $1); + $$ = _token_list_create (parser); + _token_list_append ($$, TOKEN, $1); } | pp_tokens TOKEN { - _string_list_append_item ($1, $2); + _token_list_append ($1, TOKEN, $2); $$ = $1; } ; @@ -370,7 +381,7 @@ _argument_list_create (void *ctx) } void -_argument_list_append (argument_list_t *list, string_list_t *argument) +_argument_list_append (argument_list_t *list, token_list_t *argument) { argument_node_t *node; @@ -406,7 +417,7 @@ _argument_list_length (argument_list_t *list) return length; } -string_list_t * +token_list_t * _argument_list_member_at (argument_list_t *list, int index) { argument_node_t *node; @@ -427,6 +438,50 @@ _argument_list_member_at (argument_list_t *list, int index) return NULL; } + +token_list_t * +_token_list_create (void *ctx) +{ + token_list_t *list; + + list = xtalloc (ctx, token_list_t); + list->head = NULL; + list->tail = NULL; + + return list; +} + +void +_token_list_append (token_list_t *list, int type, const char *value) +{ + token_node_t *node; + + node = xtalloc (list, token_node_t); + node->type = type; + node->value = xtalloc_strdup (list, value); + + node->next = NULL; + + if (list->head == NULL) { + list->head = node; + } else { + list->tail->next = node; + } + + list->tail = node; +} + +void +_token_list_append_list (token_list_t *list, token_list_t *tail) +{ + if (list->head == NULL) { + list->head = tail->head; + } else { + list->tail->next = tail->head; + } + + list->tail = tail->tail; +} void yyerror (void *scanner, const char *error) @@ -524,7 +579,7 @@ glcpp_parser_classify_token (glcpp_parser_t *parser, void _define_object_macro (glcpp_parser_t *parser, const char *identifier, - string_list_t *replacements) + token_list_t *replacements) { macro_t *macro; @@ -542,7 +597,7 @@ void _define_function_macro (glcpp_parser_t *parser, const char *identifier, string_list_t *parameters, - string_list_t *replacements) + token_list_t *replacements) { macro_t *macro; @@ -560,7 +615,7 @@ static void _glcpp_parser_push_expansion_internal (glcpp_parser_t *parser, macro_t *macro, argument_list_t *arguments, - string_node_t *replacements) + token_node_t *replacements) { expansion_node_t *node; @@ -588,7 +643,7 @@ glcpp_parser_push_expansion_argument (glcpp_parser_t *parser, int argument_index) { argument_list_t *arguments; - string_list_t *argument; + token_list_t *argument; arguments = parser->expansions->arguments; @@ -656,7 +711,7 @@ static int glcpp_parser_lex (glcpp_parser_t *parser) { expansion_node_t *expansion; - string_node_t *replacements; + token_node_t *replacements; int parameter_index; /* Who says C can't do efficient tail recursion? */ @@ -677,14 +732,14 @@ glcpp_parser_lex (glcpp_parser_t *parser) expansion->replacements = replacements->next; - if (strcmp (replacements->str, "(") == 0) + if (strcmp (replacements->value, "(") == 0) return '('; - else if (strcmp (replacements->str, ")") == 0) + else if (strcmp (replacements->value, ")") == 0) return ')'; - else if (strcmp (replacements->str, ",") == 0) + else if (strcmp (replacements->value, ",") == 0) return ','; - yylval.str = xtalloc_strdup (parser, replacements->str); + yylval.str = xtalloc_strdup (parser, replacements->value); switch (glcpp_parser_classify_token (parser, yylval.str, ¶meter_index)) diff --git a/glcpp.h b/glcpp.h index ef821a7637..c647e2a72e 100644 --- a/glcpp.h +++ b/glcpp.h @@ -42,8 +42,19 @@ typedef struct string_list { string_node_t *tail; } string_list_t; +typedef struct token_node { + int type; + const char *value; + struct token_node *next; +} token_node_t; + +typedef struct token_list { + token_node_t *head; + token_node_t *tail; +} token_list_t; + typedef struct argument_node { - string_list_t *argument; + token_list_t *argument; struct argument_node *next; } argument_node_t; @@ -70,13 +81,13 @@ typedef struct { int is_function; string_list_t *parameters; const char *identifier; - string_list_t *replacements; + token_list_t *replacements; } macro_t; typedef struct expansion_node { macro_t *macro; argument_list_t *arguments; - string_node_t *replacements; + token_node_t *replacements; struct expansion_node *next; } expansion_node_t; -- cgit v1.2.3 From b569383bbdfa22ed591255f56fb93832633a95ae Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 20 May 2010 08:01:44 -0700 Subject: Avoid re-expanding a macro name that has once been rejected from expansion. The specification of the preprocessor in C99 says that when we see a macro name that we are already expanding that we refuse to expand it now, (which we've done for a while), but also that we refuse to ever expand it later if seen in other contexts at which it would be legitimate to expand. We add a test case for that here, and fix it to work. The fix takes advantage of a new token_t value for tokens and argument words along with the recently added IDENTIFIER_FINALIZED token type which instructs the parser to not even look for another expansion. --- glcpp-lex.l | 12 ++++++++--- glcpp-parse.y | 38 ++++++++++++++++++++++++----------- glcpp.h | 6 ++++++ tests/037-finalize-unexpanded-macro.c | 3 +++ 4 files changed, 44 insertions(+), 15 deletions(-) create mode 100644 tests/037-finalize-unexpanded-macro.c diff --git a/glcpp-lex.l b/glcpp-lex.l index aec967964b..8e3ab661e6 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -114,12 +114,14 @@ TOKEN [^[:space:](),]+ {HSPACE}+ {TOKEN} { - yylval.str = xtalloc_strdup (yyextra, yytext); + yylval.token.type = TOKEN; + yylval.token.value = xtalloc_strdup (yyextra, yytext); return TOKEN; } [(),] { - yylval.str = xtalloc_strdup (yyextra, yytext); + yylval.token.type = TOKEN; + yylval.token.value = xtalloc_strdup (yyextra, yytext); return TOKEN; } @@ -147,6 +149,9 @@ TOKEN [^[:space:](),]+ case TOKEN_CLASS_IDENTIFIER: return IDENTIFIER; break; + case TOKEN_CLASS_IDENTIFIER_FINALIZED: + return IDENTIFIER_FINALIZED; + break; case TOKEN_CLASS_FUNC_MACRO: return FUNC_MACRO; break; @@ -162,7 +167,8 @@ TOKEN [^[:space:](),]+ } {TOKEN} { - yylval.str = xtalloc_strdup (yyextra, yytext); + yylval.token.type = TOKEN; + yylval.token.value = xtalloc_strdup (yyextra, yytext); return TOKEN; } diff --git a/glcpp-parse.y b/glcpp-parse.y index c8d1919d9c..28e79ebf9f 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -108,16 +108,18 @@ glcpp_parser_lex (glcpp_parser_t *parser); char *str; argument_list_t *argument_list; string_list_t *string_list; + token_t token; token_list_t *token_list; } %parse-param {glcpp_parser_t *parser} %lex-param {glcpp_parser_t *parser} -%token DEFINE FUNC_MACRO IDENTIFIER OBJ_MACRO NEWLINE SPACE TOKEN UNDEF -%type argument_word FUNC_MACRO IDENTIFIER OBJ_MACRO TOKEN +%token DEFINE FUNC_MACRO IDENTIFIER IDENTIFIER_FINALIZED OBJ_MACRO NEWLINE SPACE TOKEN UNDEF +%type FUNC_MACRO IDENTIFIER IDENTIFIER_FINALIZED OBJ_MACRO %type argument_list %type macro parameter_list +%type TOKEN argument_word %type argument replacement_list pp_tokens /* Hard to remove shift/reduce conflicts documented as follows: @@ -145,10 +147,14 @@ content: printf ("%s", $1); talloc_free ($1); } -| TOKEN { +| IDENTIFIER_FINALIZED { printf ("%s", $1); talloc_free ($1); } +| TOKEN { + printf ("%s", $1.value); + talloc_free ($1.value); + } | FUNC_MACRO { printf ("%s", $1); talloc_free ($1); @@ -189,11 +195,11 @@ argument_list: argument: argument_word { $$ = _token_list_create (parser); - _token_list_append ($$, IDENTIFIER, $1); + _token_list_append ($$, $1.type, $1.value); } | argument argument_word { - _token_list_append ($1, IDENTIFIER, $2); - talloc_free ($2); + _token_list_append ($1, $2.type, $2.value); + talloc_free ($2.value); $$ = $1; } | argument '(' argument ')' { @@ -205,10 +211,11 @@ argument: ; argument_word: - IDENTIFIER { $$ = $1; } + IDENTIFIER { $$.type = IDENTIFIER; $$.value = $1; } +| IDENTIFIER_FINALIZED { $$.type = IDENTIFIER_FINALIZED; $$.value = $1; } | TOKEN { $$ = $1; } -| FUNC_MACRO { $$ = $1; } -| macro { $$ = xtalloc_strdup (parser, ""); } +| FUNC_MACRO { $$.type = FUNC_MACRO; $$.value = $1; } +| macro { $$.type = TOKEN; $$.value = xtalloc_strdup (parser, ""); } ; @@ -265,10 +272,10 @@ replacement_list: pp_tokens: TOKEN { $$ = _token_list_create (parser); - _token_list_append ($$, TOKEN, $1); + _token_list_append ($$, $1.type, $1.value); } | pp_tokens TOKEN { - _token_list_append ($1, TOKEN, $2); + _token_list_append ($1, $2.type, $2.value); $$ = $1; } ; @@ -567,7 +574,7 @@ glcpp_parser_classify_token (glcpp_parser_t *parser, /* Don't consider this a macro if we are already actively * expanding this macro. */ if (glcpp_parser_is_expanding (parser, identifier)) - return TOKEN_CLASS_IDENTIFIER; + return TOKEN_CLASS_IDENTIFIER_FINALIZED; /* Definitely a macro. Just need to check if it's function-like. */ if (macro->is_function) @@ -741,6 +748,10 @@ glcpp_parser_lex (glcpp_parser_t *parser) yylval.str = xtalloc_strdup (parser, replacements->value); + /* Carefully refuse to expand any finalized identifier. */ + if (replacements->type == IDENTIFIER_FINALIZED) + return IDENTIFIER_FINALIZED; + switch (glcpp_parser_classify_token (parser, yylval.str, ¶meter_index)) { @@ -753,6 +764,9 @@ glcpp_parser_lex (glcpp_parser_t *parser) case TOKEN_CLASS_IDENTIFIER: return IDENTIFIER; break; + case TOKEN_CLASS_IDENTIFIER_FINALIZED: + return IDENTIFIER_FINALIZED; + break; case TOKEN_CLASS_FUNC_MACRO: return FUNC_MACRO; break; diff --git a/glcpp.h b/glcpp.h index c647e2a72e..5432a31817 100644 --- a/glcpp.h +++ b/glcpp.h @@ -42,6 +42,11 @@ typedef struct string_list { string_node_t *tail; } string_list_t; +typedef struct token { + int type; + char *value; +} token_t; + typedef struct token_node { int type; const char *value; @@ -68,6 +73,7 @@ typedef struct glcpp_parser glcpp_parser_t; typedef enum { TOKEN_CLASS_ARGUMENT, TOKEN_CLASS_IDENTIFIER, + TOKEN_CLASS_IDENTIFIER_FINALIZED, TOKEN_CLASS_FUNC_MACRO, TOKEN_CLASS_OBJ_MACRO } token_class_t; diff --git a/tests/037-finalize-unexpanded-macro.c b/tests/037-finalize-unexpanded-macro.c new file mode 100644 index 0000000000..b3a2f37f1b --- /dev/null +++ b/tests/037-finalize-unexpanded-macro.c @@ -0,0 +1,3 @@ +#define expand(x) expand(x once) +#define foo(x) x +foo(expand(just)) -- cgit v1.2.3 From 9f3d2c4e3dff3eb4f5820a034426056bf66b3015 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 20 May 2010 08:42:02 -0700 Subject: Add support for commas within parenthesized groups in function arguments. The specification says that commas within a parenthesized group, (that's not a function-like macro invocation), are passed through literally and not considered argument separators in any outer macro invocation. Add support and a test for this case. This support makes a third occurrence of the same "FUNC_MACRO (" shift/reduce conflict appear, so expect that. This change does introduce a fairly large copy/paste block in the grammar which is unfortunate. Perhaps if I were more clever I'd find a way to share the common pieces between argument and argument_or_comma. --- glcpp-parse.y | 44 ++++++++++++++++++++++++++++++++++++---- tests/038-func-arg-with-commas.c | 2 ++ 2 files changed, 42 insertions(+), 4 deletions(-) create mode 100644 tests/038-func-arg-with-commas.c diff --git a/glcpp-parse.y b/glcpp-parse.y index 28e79ebf9f..c9edc5c304 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -119,8 +119,8 @@ glcpp_parser_lex (glcpp_parser_t *parser); %type FUNC_MACRO IDENTIFIER IDENTIFIER_FINALIZED OBJ_MACRO %type argument_list %type macro parameter_list -%type TOKEN argument_word -%type argument replacement_list pp_tokens +%type TOKEN argument_word argument_word_or_comma +%type argument argument_or_comma replacement_list pp_tokens /* Hard to remove shift/reduce conflicts documented as follows: * @@ -131,8 +131,10 @@ glcpp_parser_lex (glcpp_parser_t *parser); * 2. Similarly, '(' after FUNC_MACRO which is correctly resolved to * shift to form macro invocation rather than reducing directly to * argument. + * + * 3. Similarly again now that we added argument_or_comma as well. */ -%expect 2 +%expect 3 %% @@ -202,7 +204,7 @@ argument: talloc_free ($2.value); $$ = $1; } -| argument '(' argument ')' { +| argument '(' argument_or_comma ')' { _token_list_append ($1, '(', "("); _token_list_append_list ($1, $3); _token_list_append ($1, ')', ")"); @@ -218,6 +220,40 @@ argument_word: | macro { $$.type = TOKEN; $$.value = xtalloc_strdup (parser, ""); } ; + /* XXX: The body of argument_or_comma is the same as the body + * of argument, but with "argument" and "argument_word" + * changed to "argument_or_comma" and + * "argument_word_or_comma". It would be nice to have less + * redundancy here, but I'm not sure how. + * + * It would also be nice to have a less ugly grammar to have + * to implement, but such is the C preprocessor. + */ +argument_or_comma: + argument_word_or_comma { + $$ = _token_list_create (parser); + _token_list_append ($$, $1.type, $1.value); + } +| argument_or_comma argument_word_or_comma { + _token_list_append ($1, $2.type, $2.value); + $$ = $1; + } +| argument_or_comma '(' argument_or_comma ')' { + _token_list_append ($1, '(', "("); + _token_list_append_list ($1, $3); + _token_list_append ($1, ')', ")"); + $$ = $1; + } +; + +argument_word_or_comma: + IDENTIFIER { $$.type = IDENTIFIER; $$.value = $1; } +| IDENTIFIER_FINALIZED { $$.type = IDENTIFIER_FINALIZED; $$.value = $1; } +| TOKEN { $$ = $1; } +| FUNC_MACRO { $$.type = FUNC_MACRO; $$.value = $1; } +| macro { $$.type = TOKEN; $$.value = xtalloc_strdup (parser, ""); } +| ',' { $$.type = ','; $$.value = xtalloc_strdup (parser, ","); } +; directive: DEFINE IDENTIFIER NEWLINE { diff --git a/tests/038-func-arg-with-commas.c b/tests/038-func-arg-with-commas.c new file mode 100644 index 0000000000..1407c7d6e3 --- /dev/null +++ b/tests/038-func-arg-with-commas.c @@ -0,0 +1,2 @@ +#define foo(x) success +foo(argument (with,embedded , commas) -- tricky) -- cgit v1.2.3 From 805ea6afe66f52476094256914b7319b29972a16 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 20 May 2010 12:06:33 -0700 Subject: Add test (and fix) for a function argument of a macro that expands with a comma. The fix here is quite simple (and actually only deletes code). When expanding a macro, we don't return a ',' as a unique token type, but simply let it fall through to the generic case. --- glcpp-parse.y | 2 -- tests/039-func-arg-obj-macro-with-comma.c | 3 +++ 2 files changed, 3 insertions(+), 2 deletions(-) create mode 100644 tests/039-func-arg-obj-macro-with-comma.c diff --git a/glcpp-parse.y b/glcpp-parse.y index c9edc5c304..773db93e54 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -779,8 +779,6 @@ glcpp_parser_lex (glcpp_parser_t *parser) return '('; else if (strcmp (replacements->value, ")") == 0) return ')'; - else if (strcmp (replacements->value, ",") == 0) - return ','; yylval.str = xtalloc_strdup (parser, replacements->value); diff --git a/tests/039-func-arg-obj-macro-with-comma.c b/tests/039-func-arg-obj-macro-with-comma.c new file mode 100644 index 0000000000..0f7fe632b5 --- /dev/null +++ b/tests/039-func-arg-obj-macro-with-comma.c @@ -0,0 +1,3 @@ +#define foo(a) (a) +#define bar two,words +foo(bar) -- cgit v1.2.3 From 660bda057a0f9c83625e798c0f719080d11e9431 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 20 May 2010 14:00:28 -0700 Subject: Stop ignoring whitespace while testing. Sometime back the output of glcpp started differing from the output of "gcc -E" in the amount of whitespace in emitted. At the time, I switched the test suite to use "diff -w" to ignore this. This was a mistake since it ignores whitespace entirely. (I meant to use "diff -b" which ignores only changes in the amount of whitespace.) So bugs have since been introduced that the test suite doesn't notice. For example, glcpp is producing "twotokens" where it should be producing "two tokens". Let's stop ignoring whitespace in the test suite, which currently introduces lots of failures---some real and some spurious. --- tests/glcpp-test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/glcpp-test b/tests/glcpp-test index 673a4f45e9..25685eeabe 100755 --- a/tests/glcpp-test +++ b/tests/glcpp-test @@ -5,5 +5,5 @@ for test in *.c; do ../glcpp < $test > $test.out gcc -E $test -o $test.gcc grep -v '^#' < $test.gcc > $test.expected - diff -B -w -u $test.expected $test.out + diff -u $test.expected $test.out done -- cgit v1.2.3 From 323421db6567f3402e0ff9dcf548269e6d7b5497 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 20 May 2010 14:05:37 -0700 Subject: Remove "unnecessary" whitespace from some tests. This whitespace was not part of anything being tested, and it introduces differences (that we don't actually care about) between the output of "gcc -E" and glcpp. Just eliminate this extra whitespace to reduce spurious test-case failures. --- tests/015-define-object-with-parens.c | 6 +++--- tests/016-define-func-1-arg.c | 2 +- tests/020-define-func-2-arg-multi.c | 2 +- tests/023-define-extra-whitespace.c | 4 ++-- tests/032-define-func-self-recurse.c | 2 +- tests/033-define-func-self-compose.c | 2 +- tests/035-define-func-self-compose-non-func-multi-token-argument.c | 2 +- 7 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/015-define-object-with-parens.c b/tests/015-define-object-with-parens.c index 10bf7e31a3..558da9c617 100644 --- a/tests/015-define-object-with-parens.c +++ b/tests/015-define-object-with-parens.c @@ -1,4 +1,4 @@ -#define foo ( ) 1 +#define foo ()1 foo() -#define bar () 2 -bar( ) +#define bar ()2 +bar() diff --git a/tests/016-define-func-1-arg.c b/tests/016-define-func-1-arg.c index dea38d1fed..a2e2404c7c 100644 --- a/tests/016-define-func-1-arg.c +++ b/tests/016-define-func-1-arg.c @@ -1,2 +1,2 @@ -#define foo(x) ((x) + 1) +#define foo(x) ((x)+1) foo(bar) diff --git a/tests/020-define-func-2-arg-multi.c b/tests/020-define-func-2-arg-multi.c index 253421139d..3049ad1546 100644 --- a/tests/020-define-func-2-arg-multi.c +++ b/tests/020-define-func-2-arg-multi.c @@ -1,2 +1,2 @@ -#define foo(x,y) x, two fish, red fish, y +#define foo(x,y) x,two fish,red fish,y foo(one fish, blue fish) diff --git a/tests/023-define-extra-whitespace.c b/tests/023-define-extra-whitespace.c index 375355a17d..7ebfed6516 100644 --- a/tests/023-define-extra-whitespace.c +++ b/tests/023-define-extra-whitespace.c @@ -3,6 +3,6 @@ # define twoargs( x , y ) x y # define threeargs( a , b , c ) a b c noargs ( ) - onearg ( 2 ) - twoargs ( 3 , 4 ) +onearg ( 2 ) +twoargs ( 3 , 4 ) threeargs ( 5 , 6 , 7 ) diff --git a/tests/032-define-func-self-recurse.c b/tests/032-define-func-self-recurse.c index 60d8526c0a..b3ac70f499 100644 --- a/tests/032-define-func-self-recurse.c +++ b/tests/032-define-func-self-recurse.c @@ -1,2 +1,2 @@ -#define foo(a) foo(2 * (a)) +#define foo(a) foo(2*(a)) foo(3) diff --git a/tests/033-define-func-self-compose.c b/tests/033-define-func-self-compose.c index 8abaaf6be9..f65e48286c 100644 --- a/tests/033-define-func-self-compose.c +++ b/tests/033-define-func-self-compose.c @@ -1,2 +1,2 @@ -#define foo(a) foo(2 * (a)) +#define foo(a) foo(2*(a)) foo(foo(3)) diff --git a/tests/035-define-func-self-compose-non-func-multi-token-argument.c b/tests/035-define-func-self-compose-non-func-multi-token-argument.c index 9955219470..c307fbe830 100644 --- a/tests/035-define-func-self-compose-non-func-multi-token-argument.c +++ b/tests/035-define-func-self-compose-non-func-multi-token-argument.c @@ -1,2 +1,2 @@ #define foo(bar) bar -foo(1 + foo) +foo(1+foo) -- cgit v1.2.3 From ff13cfed81132eaaa8859f25f87ea5398d4864ba Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 20 May 2010 14:08:19 -0700 Subject: Remove unused function _print_string_list The only good dead code is non-existing dead code. --- glcpp-parse.y | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index 773db93e54..79a8ec2cf2 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -50,9 +50,6 @@ _expand_function_macro (glcpp_parser_t *parser, const char *identifier, argument_list_t *arguments); -void -_print_string_list (string_list_t *list); - string_list_t * _string_list_create (void *ctx); @@ -396,21 +393,6 @@ _string_list_length (string_list_t *list) return length; } -void -_print_string_list (string_list_t *list) -{ - string_node_t *node; - - if (list == NULL) - return; - - for (node = list->head; node; node = node->next) { - printf ("%s", node->str); - if (node->next) - printf (" "); - } -} - argument_list_t * _argument_list_create (void *ctx) { -- cgit v1.2.3 From 005b32061f77008530a290ed991980a579095002 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 20 May 2010 14:19:57 -0700 Subject: Fix bug of consuming excess whitespace. We fix this by moving printing up to the top-level "input" action and tracking whether a space is needed between one token and the next. This fixes all actual bugs in test-suite output, but does leave some tests failing due to differences in the amount of whitespace produced, (which aren't actual bugs per se). --- glcpp-parse.y | 71 ++++++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 53 insertions(+), 18 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index 79a8ec2cf2..c6d64176b2 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -102,6 +102,7 @@ glcpp_parser_lex (glcpp_parser_t *parser); %} %union { + int ival; char *str; argument_list_t *argument_list; string_list_t *string_list; @@ -112,8 +113,9 @@ glcpp_parser_lex (glcpp_parser_t *parser); %parse-param {glcpp_parser_t *parser} %lex-param {glcpp_parser_t *parser} -%token DEFINE FUNC_MACRO IDENTIFIER IDENTIFIER_FINALIZED OBJ_MACRO NEWLINE SPACE TOKEN UNDEF -%type FUNC_MACRO IDENTIFIER IDENTIFIER_FINALIZED OBJ_MACRO +%token DEFINE FUNC_MACRO IDENTIFIER IDENTIFIER_FINALIZED OBJ_MACRO NEWLINE SEPARATOR SPACE TOKEN UNDEF +%type input punctuator +%type content FUNC_MACRO IDENTIFIER IDENTIFIER_FINALIZED OBJ_MACRO %type argument_list %type macro parameter_list %type TOKEN argument_word argument_word_or_comma @@ -135,38 +137,71 @@ glcpp_parser_lex (glcpp_parser_t *parser); %% + /* We do all printing at the input level. + * + * The value for "input" is simply TOKEN or SEPARATOR so we + * can decide whether it's necessary to print a space + * character between any two. */ input: - /* empty */ -| input content + /* empty */ { + $$ = SEPARATOR; + } +| input content { + int is_token; + + if ($2 && strlen ($2)) { + int c = $2[0]; + int is_not_separator = ((c >= 'a' && c <= 'z') || + (c >= 'A' && c <= 'Z') || + (c >= 'A' && c <= 'Z') || + (c >= '0' && c <= '9') || + (c == '_')); + + if ($1 == TOKEN && is_not_separator) + printf (" "); + printf ("%s", $2); + if (is_not_separator) + $$ = TOKEN; + else + $$ = SEPARATOR; + } else { + $$ = $1; + } + if ($2) + talloc_free ($2); + } ; - /* We do all printing at the content level */ content: IDENTIFIER { - printf ("%s", $1); - talloc_free ($1); + $$ = $1; } | IDENTIFIER_FINALIZED { - printf ("%s", $1); - talloc_free ($1); + $$ = $1; } | TOKEN { - printf ("%s", $1.value); - talloc_free ($1.value); + $$ = $1.value; } | FUNC_MACRO { - printf ("%s", $1); - talloc_free ($1); + $$ = $1; } | directive { - printf ("\n"); + $$ = talloc_strdup (parser, "\n"); + } +| punctuator { + $$ = talloc_asprintf (parser, "%c", $1); + } +| macro { + $$ = NULL; } -| '(' { printf ("("); } -| ')' { printf (")"); } -| ',' { printf (","); } -| macro ; +punctuator: + '(' { $$ = '('; } +| ')' { $$ = ')'; } +| ',' { $$ = ','; } + ; + macro: FUNC_MACRO '(' argument_list ')' { _expand_function_macro (parser, $1, $3); -- cgit v1.2.3 From 5a6b9a27fdb2ac66aaadd90b15b1889fea8f08d0 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 20 May 2010 14:29:43 -0700 Subject: Avoid printing a space at the beginning of lines in the output. This fixes more differences compared to "gcc -E" so removes several cases of erroneously failing test cases. The implementation isn't very elegant, but it is functional. --- glcpp-lex.l | 5 +++++ glcpp-parse.y | 18 +++++++++++------- glcpp.h | 1 + 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/glcpp-lex.l b/glcpp-lex.l index 8e3ab661e6..13e4d6f0ef 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -173,7 +173,12 @@ TOKEN [^[:space:](),]+ } \n { + /* XXX: Printing here (rather than in a parser production) + * *and* frobbing a bit of the parser state here are both ugly + * things. But all my attempts to avoid this by returning a + * NEWLINE token here have led to even more ugly things. */ printf ("\n"); + yyextra->just_printed_separator = 1; } {HSPACE}+ diff --git a/glcpp-parse.y b/glcpp-parse.y index c6d64176b2..93713a3f0c 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -114,7 +114,7 @@ glcpp_parser_lex (glcpp_parser_t *parser); %lex-param {glcpp_parser_t *parser} %token DEFINE FUNC_MACRO IDENTIFIER IDENTIFIER_FINALIZED OBJ_MACRO NEWLINE SEPARATOR SPACE TOKEN UNDEF -%type input punctuator +%type punctuator %type content FUNC_MACRO IDENTIFIER IDENTIFIER_FINALIZED OBJ_MACRO %type argument_list %type macro parameter_list @@ -144,7 +144,7 @@ glcpp_parser_lex (glcpp_parser_t *parser); * character between any two. */ input: /* empty */ { - $$ = SEPARATOR; + parser->just_printed_separator = 1; } | input content { int is_token; @@ -157,16 +157,18 @@ input: (c >= '0' && c <= '9') || (c == '_')); - if ($1 == TOKEN && is_not_separator) + if (! parser->just_printed_separator && is_not_separator) + { printf (" "); + } printf ("%s", $2); + if (is_not_separator) - $$ = TOKEN; + parser->just_printed_separator = 0; else - $$ = SEPARATOR; - } else { - $$ = $1; + parser->just_printed_separator = 1; } + if ($2) talloc_free ($2); } @@ -561,6 +563,8 @@ glcpp_parser_create (void) hash_table_string_compare); parser->expansions = NULL; + parser->just_printed_separator = 1; + return parser; } diff --git a/glcpp.h b/glcpp.h index 5432a31817..c25e29c688 100644 --- a/glcpp.h +++ b/glcpp.h @@ -101,6 +101,7 @@ struct glcpp_parser { yyscan_t scanner; struct hash_table *defines; expansion_node_t *expansions; + int just_printed_separator; }; void -- cgit v1.2.3 From 876e510bdab96574c4ca5ee94c580fe6ad7f0106 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 20 May 2010 14:38:06 -0700 Subject: Finish cleaning up whitespace differences. The last remaining thing here was that when a line ended with a macro, and the parser looked ahead to the newline token, the lexer was printing that newline before the parser printed the expansion of the macro. The fix is simple, just make the lexer tell the parser that a newline is needed, and the parser can wait until reducing a production to print that newline. With this, we now pass the entire test suite with simply "diff -u", so we no longer have any diff options hiding whitespace bugs from us. Hurrah! --- glcpp-lex.l | 7 +------ glcpp-parse.y | 9 +++++++++ glcpp.h | 1 + 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/glcpp-lex.l b/glcpp-lex.l index 13e4d6f0ef..114b59f045 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -173,12 +173,7 @@ TOKEN [^[:space:](),]+ } \n { - /* XXX: Printing here (rather than in a parser production) - * *and* frobbing a bit of the parser state here are both ugly - * things. But all my attempts to avoid this by returning a - * NEWLINE token here have led to even more ugly things. */ - printf ("\n"); - yyextra->just_printed_separator = 1; + yyextra->need_newline = 1; } {HSPACE}+ diff --git a/glcpp-parse.y b/glcpp-parse.y index 93713a3f0c..ddc2a258cd 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -171,6 +171,12 @@ input: if ($2) talloc_free ($2); + + if (parser->need_newline) { + printf ("\n"); + parser->just_printed_separator = 1; + parser->need_newline = 0; + } } ; @@ -564,6 +570,7 @@ glcpp_parser_create (void) parser->expansions = NULL; parser->just_printed_separator = 1; + parser->need_newline = 0; return parser; } @@ -577,6 +584,8 @@ glcpp_parser_parse (glcpp_parser_t *parser) void glcpp_parser_destroy (glcpp_parser_t *parser) { + if (parser->need_newline) + printf ("\n"); glcpp_lex_destroy (parser->scanner); hash_table_dtor (parser->defines); talloc_free (parser); diff --git a/glcpp.h b/glcpp.h index c25e29c688..2e93cb981d 100644 --- a/glcpp.h +++ b/glcpp.h @@ -102,6 +102,7 @@ struct glcpp_parser { struct hash_table *defines; expansion_node_t *expansions; int just_printed_separator; + int need_newline; }; void -- cgit v1.2.3 From b894583fd0246060d908a0cc7b5f3ef72a5a2112 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 20 May 2010 15:02:03 -0700 Subject: Add xtalloc_asprintf I expect this to be useful in the upcoming implementation of token pasting. --- glcpp.h | 3 +++ xtalloc.c | 18 ++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/glcpp.h b/glcpp.h index 2e93cb981d..048a9be76b 100644 --- a/glcpp.h +++ b/glcpp.h @@ -149,4 +149,7 @@ xtalloc_strdup (const void *t, const char *p); char * xtalloc_strndup (const void *t, const char *p, size_t n); +char * +xtalloc_asprintf (const void *t, const char *fmt, ...); + #endif diff --git a/xtalloc.c b/xtalloc.c index d9893ae889..e52d12ac6b 100644 --- a/xtalloc.c +++ b/xtalloc.c @@ -64,3 +64,21 @@ xtalloc_strndup (const void *t, const char *p, size_t n) return ret; } + +char * +xtalloc_asprintf (const void *t, const char *fmt, ...) +{ + va_list ap; + char *ret; + + va_start(ap, fmt); + + ret = talloc_vasprintf(t, fmt, ap); + if (ret == NULL) { + fprintf (stderr, "Out of memory.\n"); + exit (1); + } + + va_end(ap); + return ret; +} -- cgit v1.2.3 From c10a51ba13272dc48407b885d8684be99bba120d Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 20 May 2010 15:15:26 -0700 Subject: Pre-expand macro arguments at time of invocation. Previously, we were using the same lexing stack as we use for macro expansion to also expand macro arguments. Instead, we now do this earlier by simply recursing over the macro-invocations replacement list and constructing a new expanded list, (and pushing only *that* onto the stack). This is simpler, and also allows us to more easily implement token pasting in the future. --- glcpp-lex.l | 8 ------ glcpp-parse.y | 88 +++++++++++++++++++---------------------------------------- glcpp.h | 2 -- 3 files changed, 28 insertions(+), 70 deletions(-) diff --git a/glcpp-lex.l b/glcpp-lex.l index 114b59f045..6138a9de12 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -138,14 +138,6 @@ TOKEN [^[:space:](),]+ switch (glcpp_parser_classify_token (yyextra, yylval.str, ¶meter_index)) { - case TOKEN_CLASS_ARGUMENT: - talloc_free (yylval.str); - /* We don't return a value here since the - * current token will be replaced by new - * tokens. */ - glcpp_parser_push_expansion_argument (yyextra, - parameter_index); - break; case TOKEN_CLASS_IDENTIFIER: return IDENTIFIER; break; diff --git a/glcpp-parse.y b/glcpp-parse.y index ddc2a258cd..0691619acf 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -86,11 +86,6 @@ _token_list_append (token_list_t *list, int type, const char *value); void _token_list_append_list (token_list_t *list, token_list_t *tail); -static void -glcpp_parser_push_expansion_macro (glcpp_parser_t *parser, - macro_t *macro, - argument_list_t *arguments); - static void glcpp_parser_pop_expansion (glcpp_parser_t *parser); @@ -614,24 +609,7 @@ glcpp_parser_classify_token (glcpp_parser_t *parser, { macro_t *macro; - /* First we check if we are currently expanding a - * function-like macro, and if so, whether the parameter list - * contains a parameter matching this token name. */ - if (parser->expansions && - parser->expansions->macro && - parser->expansions->macro->parameters) - { - string_list_t *list; - - list = parser->expansions->macro->parameters; - - if (_string_list_contains (list, identifier, parameter_index)) - return TOKEN_CLASS_ARGUMENT; - } - - /* If not a function-like macro parameter, we next check if - * this token is a macro itself. */ - + /* Is this token a defined macro? */ macro = hash_table_find (parser->defines, identifier); if (macro == NULL) @@ -685,47 +663,21 @@ _define_function_macro (glcpp_parser_t *parser, } static void -_glcpp_parser_push_expansion_internal (glcpp_parser_t *parser, - macro_t *macro, - argument_list_t *arguments, - token_node_t *replacements) +_glcpp_parser_push_expansion (glcpp_parser_t *parser, + macro_t *macro, + token_node_t *replacements) { expansion_node_t *node; node = xtalloc (parser, expansion_node_t); node->macro = macro; - node->arguments = arguments; node->replacements = replacements; node->next = parser->expansions; parser->expansions = node; } -static void -glcpp_parser_push_expansion_macro (glcpp_parser_t *parser, - macro_t *macro, - argument_list_t *arguments) -{ - _glcpp_parser_push_expansion_internal (parser, macro, arguments, - macro->replacements->head); -} - -void -glcpp_parser_push_expansion_argument (glcpp_parser_t *parser, - int argument_index) -{ - argument_list_t *arguments; - token_list_t *argument; - - arguments = parser->expansions->arguments; - - argument = _argument_list_member_at (arguments, argument_index); - - _glcpp_parser_push_expansion_internal (parser, NULL, NULL, - argument->head); -} - static void glcpp_parser_pop_expansion (glcpp_parser_t *parser) { @@ -752,7 +704,7 @@ _expand_object_macro (glcpp_parser_t *parser, const char *identifier) assert (! macro->is_function); assert (! glcpp_parser_is_expanding (parser, identifier)); - glcpp_parser_push_expansion_macro (parser, macro, NULL); + _glcpp_parser_push_expansion (parser, macro, macro->replacements->head); } void @@ -761,6 +713,9 @@ _expand_function_macro (glcpp_parser_t *parser, argument_list_t *arguments) { macro_t *macro; + token_list_t *expanded; + token_node_t *i, *j; + int parameter_index; macro = hash_table_find (parser->defines, identifier); assert (macro->is_function); @@ -777,7 +732,26 @@ _expand_function_macro (glcpp_parser_t *parser, return; } - glcpp_parser_push_expansion_macro (parser, macro, arguments); + expanded = _token_list_create (macro); + + for (i = macro->replacements->head; i; i = i->next) { + if (_string_list_contains (macro->parameters, i->value, + ¶meter_index)) + { + token_list_t *argument; + argument = _argument_list_member_at (arguments, + parameter_index); + for (j = argument->head; j; j = j->next) + { + _token_list_append (expanded, j->type, + j->value); + } + } else { + _token_list_append (expanded, i->type, i->value); + } + } + + _glcpp_parser_push_expansion (parser, macro, expanded->head); } static int @@ -819,12 +793,6 @@ glcpp_parser_lex (glcpp_parser_t *parser) switch (glcpp_parser_classify_token (parser, yylval.str, ¶meter_index)) { - case TOKEN_CLASS_ARGUMENT: - talloc_free (yylval.str); - glcpp_parser_push_expansion_argument (parser, - parameter_index); - goto RECURSE; - break; case TOKEN_CLASS_IDENTIFIER: return IDENTIFIER; break; diff --git a/glcpp.h b/glcpp.h index 048a9be76b..1537109ada 100644 --- a/glcpp.h +++ b/glcpp.h @@ -71,7 +71,6 @@ typedef struct argument_list { typedef struct glcpp_parser glcpp_parser_t; typedef enum { - TOKEN_CLASS_ARGUMENT, TOKEN_CLASS_IDENTIFIER, TOKEN_CLASS_IDENTIFIER_FINALIZED, TOKEN_CLASS_FUNC_MACRO, @@ -92,7 +91,6 @@ typedef struct { typedef struct expansion_node { macro_t *macro; - argument_list_t *arguments; token_node_t *replacements; struct expansion_node *next; } expansion_node_t; -- cgit v1.2.3 From d8327e575dd20fe696f3a44ada4bd4001b15db27 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 20 May 2010 15:18:54 -0700 Subject: Implement (and add test) for token pasting. This is *very* easy to implement now that macro arguments are pre-expanded. --- glcpp-parse.y | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index 0691619acf..aa758f7e43 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -760,6 +760,8 @@ glcpp_parser_lex (glcpp_parser_t *parser) expansion_node_t *expansion; token_node_t *replacements; int parameter_index; + const char *token; + token_class_t class; /* Who says C can't do efficient tail recursion? */ RECURSE: @@ -779,12 +781,31 @@ glcpp_parser_lex (glcpp_parser_t *parser) expansion->replacements = replacements->next; - if (strcmp (replacements->value, "(") == 0) + token = replacements->value; + + /* Implement token pasting. */ + if (replacements->next && strcmp (replacements->next->value, "##") == 0) { + token_node_t *next_node; + + next_node = replacements->next->next; + + if (next_node == NULL) { + fprintf (stderr, "Error: '##' cannot appear at the end of a macro expansion.\n"); + exit (1); + } + + token = xtalloc_asprintf (parser, "%s%s", + token, next_node->value); + expansion->replacements = next_node->next; + } + + + if (strcmp (token, "(") == 0) return '('; - else if (strcmp (replacements->value, ")") == 0) + else if (strcmp (token, ")") == 0) return ')'; - yylval.str = xtalloc_strdup (parser, replacements->value); + yylval.str = xtalloc_strdup (parser, token); /* Carefully refuse to expand any finalized identifier. */ if (replacements->type == IDENTIFIER_FINALIZED) -- cgit v1.2.3 From b20d33c5c6fea8e392c26e9ab060efd14034f1f9 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 20 May 2010 22:27:07 -0700 Subject: Implement #if, #else, #elif, and #endif with tests. So far the only expression implemented is a single integer literal, but obviously that's easy to extend. Various things including nesting are tested here. --- glcpp-lex.l | 32 ++++++++++++++ glcpp-parse.y | 109 ++++++++++++++++++++++++++++++++++++++++++++-- glcpp.h | 12 +++++ tests/040-token-pasting.c | 2 + tests/041-if-0.c | 5 +++ tests/042-if-1.c | 5 +++ tests/043-if-0-else.c | 7 +++ tests/044-if-1-else.c | 7 +++ tests/045-if-0-elif.c | 11 +++++ tests/046-if-1-elsif.c | 11 +++++ tests/047-if-elif-else.c | 11 +++++ tests/048-if-nested.c | 11 +++++ tests/glcpp-test | 2 +- 13 files changed, 221 insertions(+), 4 deletions(-) create mode 100644 tests/040-token-pasting.c create mode 100644 tests/041-if-0.c create mode 100644 tests/042-if-1.c create mode 100644 tests/043-if-0-else.c create mode 100644 tests/044-if-1-else.c create mode 100644 tests/045-if-0-elif.c create mode 100644 tests/046-if-1-elsif.c create mode 100644 tests/047-if-elif-else.c create mode 100644 tests/048-if-nested.c diff --git a/glcpp-lex.l b/glcpp-lex.l index 6138a9de12..825ce3d370 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -36,6 +36,7 @@ %x ST_DEFINE_OBJ_OR_FUNC %x ST_DEFINE_PARAMETER %x ST_DEFINE_VALUE +%x ST_IF %x ST_UNDEF %x ST_UNDEF_END @@ -44,11 +45,42 @@ NONSPACE [^[:space:]] NEWLINE [\n] HSPACE [ \t] HASH ^{HSPACE}*#{HSPACE}* +INTEGER [0-9]+ IDENTIFIER [_a-zA-Z][_a-zA-Z0-9]* TOKEN [^[:space:](),]+ %% +{HASH}if{HSPACE}* { + BEGIN ST_IF; + return IF; +} + +{HASH}elif{HSPACE}* { + BEGIN ST_IF; + return ELIF; +} + +{INTEGER} { + yylval.ival = atoi (yytext); + return INTEGER; +} + +{HSPACE}+ + +\n { + BEGIN INITIAL; + return NEWLINE; +} + +{HASH}endif{HSPACE}* { + return ENDIF; +} + +{HASH}else{HSPACE}* { + return ELSE; +} + {HASH}undef{HSPACE}* { BEGIN ST_UNDEF; return UNDEF; diff --git a/glcpp-parse.y b/glcpp-parse.y index aa758f7e43..26432f2032 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -89,6 +89,16 @@ _token_list_append_list (token_list_t *list, token_list_t *tail); static void glcpp_parser_pop_expansion (glcpp_parser_t *parser); +static void +_glcpp_parser_skip_stack_push_if (glcpp_parser_t *parser, int condition); + +static void +_glcpp_parser_skip_stack_change_if (glcpp_parser_t *parser, const char *type, + int condition); + +static void +_glcpp_parser_skip_stack_pop (glcpp_parser_t *parser); + #define yylex glcpp_parser_lex static int @@ -108,8 +118,8 @@ glcpp_parser_lex (glcpp_parser_t *parser); %parse-param {glcpp_parser_t *parser} %lex-param {glcpp_parser_t *parser} -%token DEFINE FUNC_MACRO IDENTIFIER IDENTIFIER_FINALIZED OBJ_MACRO NEWLINE SEPARATOR SPACE TOKEN UNDEF -%type punctuator +%token DEFINE ELIF ELSE ENDIF FUNC_MACRO IDENTIFIER IDENTIFIER_FINALIZED IF IFDEF IFNDEF INTEGER OBJ_MACRO NEWLINE SEPARATOR SPACE TOKEN UNDEF +%type expression INTEGER punctuator %type content FUNC_MACRO IDENTIFIER IDENTIFIER_FINALIZED OBJ_MACRO %type argument_list %type macro parameter_list @@ -143,8 +153,12 @@ input: } | input content { int is_token; + int skipping = 0; + + if (parser->skip_stack && parser->skip_stack->type != SKIP_NO_SKIP) + skipping = 1; - if ($2 && strlen ($2)) { + if ($2 && strlen ($2) && ! skipping) { int c = $2[0]; int is_not_separator = ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || @@ -301,6 +315,28 @@ directive: | DEFINE IDENTIFIER '(' parameter_list ')' replacement_list NEWLINE { _define_function_macro (parser, $2, $4, $6); } +| IF expression NEWLINE { + _glcpp_parser_skip_stack_push_if (parser, $2); + } +| IFDEF IDENTIFIER NEWLINE { + string_list_t *macro = hash_table_find (parser->defines, $2); + talloc_free ($2); + _glcpp_parser_skip_stack_push_if (parser, macro != NULL); + } +| IFNDEF IDENTIFIER NEWLINE { + string_list_t *macro = hash_table_find (parser->defines, $2); + talloc_free ($2); + _glcpp_parser_skip_stack_push_if (parser, macro == NULL); + } +| ELIF expression NEWLINE { + _glcpp_parser_skip_stack_change_if (parser, "#elif", $2); + } +| ELSE { + _glcpp_parser_skip_stack_change_if (parser, "else", 1); + } +| ENDIF { + _glcpp_parser_skip_stack_pop (parser); + } | UNDEF IDENTIFIER { string_list_t *macro = hash_table_find (parser->defines, $2); if (macro) { @@ -314,6 +350,13 @@ directive: } ; +/* XXX: Need to fill out with all operators. */ +expression: + INTEGER { + $$ = $1; + } +; + parameter_list: /* empty */ { $$ = _string_list_create (parser); @@ -567,6 +610,8 @@ glcpp_parser_create (void) parser->just_printed_separator = 1; parser->need_newline = 0; + parser->skip_stack = NULL; + return parser; } @@ -581,6 +626,8 @@ glcpp_parser_destroy (glcpp_parser_t *parser) { if (parser->need_newline) printf ("\n"); + if (parser->skip_stack) + fprintf (stderr, "Error: Unterminated #if\n"); glcpp_lex_destroy (parser->scanner); hash_table_dtor (parser->defines); talloc_free (parser); @@ -829,3 +876,59 @@ glcpp_parser_lex (glcpp_parser_t *parser) break; } } + +static void +_glcpp_parser_skip_stack_push_if (glcpp_parser_t *parser, int condition) +{ + skip_type_t current = SKIP_NO_SKIP; + skip_node_t *node; + + if (parser->skip_stack) + current = parser->skip_stack->type; + + node = xtalloc (parser, skip_node_t); + + if (current == SKIP_NO_SKIP) { + if (condition) + node->type = SKIP_NO_SKIP; + else + node->type = SKIP_TO_ELSE; + } else { + node->type = SKIP_TO_ENDIF; + } + + node->next = parser->skip_stack; + parser->skip_stack = node; +} + +static void +_glcpp_parser_skip_stack_change_if (glcpp_parser_t *parser, const char *type, + int condition) +{ + if (parser->skip_stack == NULL) { + fprintf (stderr, "Error: %s without #if\n", type); + exit (1); + } + + if (parser->skip_stack->type == SKIP_TO_ELSE) { + if (condition) + parser->skip_stack->type = SKIP_NO_SKIP; + } else { + parser->skip_stack->type = SKIP_TO_ENDIF; + } +} + +static void +_glcpp_parser_skip_stack_pop (glcpp_parser_t *parser) +{ + skip_node_t *node; + + if (parser->skip_stack == NULL) { + fprintf (stderr, "Error: #endif without #if\n"); + exit (1); + } + + node = parser->skip_stack; + parser->skip_stack = node->next; + talloc_free (node); +} diff --git a/glcpp.h b/glcpp.h index 1537109ada..33ece8f92b 100644 --- a/glcpp.h +++ b/glcpp.h @@ -95,12 +95,24 @@ typedef struct expansion_node { struct expansion_node *next; } expansion_node_t; +typedef enum skip_type { + SKIP_NO_SKIP, + SKIP_TO_ELSE, + SKIP_TO_ENDIF +} skip_type_t; + +typedef struct skip_node { + skip_type_t type; + struct skip_node *next; +} skip_node_t; + struct glcpp_parser { yyscan_t scanner; struct hash_table *defines; expansion_node_t *expansions; int just_printed_separator; int need_newline; + skip_node_t *skip_stack; }; void diff --git a/tests/040-token-pasting.c b/tests/040-token-pasting.c new file mode 100644 index 0000000000..caab3ba736 --- /dev/null +++ b/tests/040-token-pasting.c @@ -0,0 +1,2 @@ +#define paste(a,b) a ## b +paste(one , token) diff --git a/tests/041-if-0.c b/tests/041-if-0.c new file mode 100644 index 0000000000..2cab677d3e --- /dev/null +++ b/tests/041-if-0.c @@ -0,0 +1,5 @@ +success_1 +#if 0 +failure +#endif +success_2 diff --git a/tests/042-if-1.c b/tests/042-if-1.c new file mode 100644 index 0000000000..874a25cf41 --- /dev/null +++ b/tests/042-if-1.c @@ -0,0 +1,5 @@ +success_1 +#if 1 +success_2 +#endif +success_3 diff --git a/tests/043-if-0-else.c b/tests/043-if-0-else.c new file mode 100644 index 0000000000..323351f9db --- /dev/null +++ b/tests/043-if-0-else.c @@ -0,0 +1,7 @@ +success_1 +#if 0 +failure +#else +success_2 +#endif +success_3 diff --git a/tests/044-if-1-else.c b/tests/044-if-1-else.c new file mode 100644 index 0000000000..28dfc25c6f --- /dev/null +++ b/tests/044-if-1-else.c @@ -0,0 +1,7 @@ +success_1 +#if 1 +success_2 +#else +failure +#endif +success_3 diff --git a/tests/045-if-0-elif.c b/tests/045-if-0-elif.c new file mode 100644 index 0000000000..e50f686d46 --- /dev/null +++ b/tests/045-if-0-elif.c @@ -0,0 +1,11 @@ +success_1 +#if 0 +failure_1 +#elif 0 +failure_2 +#elif 1 +success_3 +#elif 1 +failure_3 +#endif +success_4 diff --git a/tests/046-if-1-elsif.c b/tests/046-if-1-elsif.c new file mode 100644 index 0000000000..130515a01e --- /dev/null +++ b/tests/046-if-1-elsif.c @@ -0,0 +1,11 @@ +success_1 +#if 1 +success_2 +#elif 0 +failure_1 +#elif 1 +failure_2 +#elif 0 +failure_3 +#endif +success_3 diff --git a/tests/047-if-elif-else.c b/tests/047-if-elif-else.c new file mode 100644 index 0000000000..e8f0838a9e --- /dev/null +++ b/tests/047-if-elif-else.c @@ -0,0 +1,11 @@ +success_1 +#if 0 +failure_1 +#elif 0 +failure_2 +#elif 0 +failure_3 +#else +success_2 +#endif +success_3 diff --git a/tests/048-if-nested.c b/tests/048-if-nested.c new file mode 100644 index 0000000000..fc4679c3be --- /dev/null +++ b/tests/048-if-nested.c @@ -0,0 +1,11 @@ +success_1 +#if 0 +failure_1 +#if 1 +failure_2 +#else +failure_3 +#endif +failure_4 +#endif +success_2 diff --git a/tests/glcpp-test b/tests/glcpp-test index 25685eeabe..022a236712 100755 --- a/tests/glcpp-test +++ b/tests/glcpp-test @@ -5,5 +5,5 @@ for test in *.c; do ../glcpp < $test > $test.out gcc -E $test -o $test.gcc grep -v '^#' < $test.gcc > $test.expected - diff -u $test.expected $test.out + diff -B -u $test.expected $test.out done -- cgit v1.2.3 From bcbd587b0f5312d85307785ee2df6e5906af4f7b Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Mon, 24 May 2010 10:37:38 -0700 Subject: Implement all operators specified for GLSL #if expressions (with tests). The operator coverage here is quite complete. The one big thing missing is that we are not yet doing macro expansion in #if lines. This makes the whole support fairly useless, so we plan to fix that shortcoming right away. --- glcpp-lex.l | 45 +++++++++++++++++ glcpp-parse.y | 97 +++++++++++++++++++++++++++++++++--- tests/049-if-expression-precedence.c | 6 +++ tests/050-if-defined.c | 19 +++++++ tests/051-if-relational.c | 35 +++++++++++++ 5 files changed, 195 insertions(+), 7 deletions(-) create mode 100644 tests/049-if-expression-precedence.c create mode 100644 tests/050-if-defined.c create mode 100644 tests/051-if-relational.c diff --git a/glcpp-lex.l b/glcpp-lex.l index 825ce3d370..84166fb76f 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -66,6 +66,51 @@ TOKEN [^[:space:](),]+ return INTEGER; } +"defined" { + return DEFINED; +} + +"<<" { + return LEFT_SHIFT; +} + +">>" { + return RIGHT_SHIFT; +} + +"<=" { + return LESS_OR_EQUAL; +} + +">=" { + return GREATER_OR_EQUAL; +} + +"==" { + return EQUAL; +} + +"!=" { + return NOT_EQUAL; +} + +"&&" { + return AND; +} + +"||" { + return OR; +} + +[-+*/%<>&^|()] { + return yytext[0]; +} + +{IDENTIFIER} { + yylval.str = xtalloc_strdup (yyextra, yytext); + return IDENTIFIER; +} + {HSPACE}+ \n { diff --git a/glcpp-parse.y b/glcpp-parse.y index 26432f2032..0d3afa7af6 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -118,13 +118,24 @@ glcpp_parser_lex (glcpp_parser_t *parser); %parse-param {glcpp_parser_t *parser} %lex-param {glcpp_parser_t *parser} -%token DEFINE ELIF ELSE ENDIF FUNC_MACRO IDENTIFIER IDENTIFIER_FINALIZED IF IFDEF IFNDEF INTEGER OBJ_MACRO NEWLINE SEPARATOR SPACE TOKEN UNDEF +%token DEFINE DEFINED ELIF ELSE ENDIF FUNC_MACRO IDENTIFIER IDENTIFIER_FINALIZED IF IFDEF IFNDEF INTEGER OBJ_MACRO NEWLINE SPACE TOKEN UNDEF %type expression INTEGER punctuator %type content FUNC_MACRO IDENTIFIER IDENTIFIER_FINALIZED OBJ_MACRO %type argument_list %type macro parameter_list %type TOKEN argument_word argument_word_or_comma %type argument argument_or_comma replacement_list pp_tokens +%left OR +%left AND +%left '|' +%left '^' +%left '&' +%left EQUAL NOT_EQUAL +%left '<' '>' LESS_OR_EQUAL GREATER_OR_EQUAL +%left LEFT_SHIFT RIGHT_SHIFT +%left '+' '-' +%left '*' '/' '%' +%right UNARY /* Hard to remove shift/reduce conflicts documented as follows: * @@ -142,11 +153,7 @@ glcpp_parser_lex (glcpp_parser_t *parser); %% - /* We do all printing at the input level. - * - * The value for "input" is simply TOKEN or SEPARATOR so we - * can decide whether it's necessary to print a space - * character between any two. */ + /* We do all printing at the input level. */ input: /* empty */ { parser->just_printed_separator = 1; @@ -350,11 +357,87 @@ directive: } ; -/* XXX: Need to fill out with all operators. */ expression: INTEGER { $$ = $1; } +| expression OR expression { + $$ = $1 || $3; + } +| expression AND expression { + $$ = $1 && $3; + } +| expression '|' expression { + $$ = $1 | $3; + } +| expression '^' expression { + $$ = $1 ^ $3; + } +| expression '&' expression { + $$ = $1 & $3; + } +| expression NOT_EQUAL expression { + $$ = $1 != $3; + } +| expression EQUAL expression { + $$ = $1 == $3; + } +| expression GREATER_OR_EQUAL expression { + $$ = $1 >= $3; + } +| expression LESS_OR_EQUAL expression { + $$ = $1 <= $3; + } +| expression '>' expression { + $$ = $1 > $3; + } +| expression '<' expression { + $$ = $1 < $3; + } +| expression RIGHT_SHIFT expression { + $$ = $1 >> $3; + } +| expression LEFT_SHIFT expression { + $$ = $1 << $3; + } +| expression '-' expression { + $$ = $1 - $3; + } +| expression '+' expression { + $$ = $1 + $3; + } +| expression '%' expression { + $$ = $1 % $3; + } +| expression '/' expression { + $$ = $1 / $3; + } +| expression '*' expression { + $$ = $1 * $3; + } +| '!' expression %prec UNARY { + $$ = ! $2; + } +| '~' expression %prec UNARY { + $$ = ~ $2; + } +| '-' expression %prec UNARY { + $$ = - $2; + } +| '+' expression %prec UNARY { + $$ = + $2; + } +| DEFINED IDENTIFIER %prec UNARY { + string_list_t *macro = hash_table_find (parser->defines, $2); + talloc_free ($2); + if (macro) + $$ = 1; + else + $$ = 0; + } +| '(' expression ')' { + $$ = $2; + } ; parameter_list: diff --git a/tests/049-if-expression-precedence.c b/tests/049-if-expression-precedence.c new file mode 100644 index 0000000000..cea935220f --- /dev/null +++ b/tests/049-if-expression-precedence.c @@ -0,0 +1,6 @@ +#if 1 + 2 * 3 + - (25 % 17 - + 1) +failure with operator precedence +#else +success +#endif + diff --git a/tests/050-if-defined.c b/tests/050-if-defined.c new file mode 100644 index 0000000000..9838cc747d --- /dev/null +++ b/tests/050-if-defined.c @@ -0,0 +1,19 @@ +#if defined foo +failure_1 +#else +success_1 +#endif +#define foo +#if defined foo +success_2 +#else +failure_2 +#endif +#undef foo +#if defined foo +failure_3 +#else +success_3 +#endif + + diff --git a/tests/051-if-relational.c b/tests/051-if-relational.c new file mode 100644 index 0000000000..c3db488e0d --- /dev/null +++ b/tests/051-if-relational.c @@ -0,0 +1,35 @@ +#if 3 < 2 +failure_1 +#else +success_1 +#endif + +#if 3 >= 2 +success_2 +#else +failure_2 +#endif + +#if 2 + 3 <= 5 +success_3 +#else +failure_3 +#endif + +#if 3 - 2 == 1 +success_3 +#else +failure_3 +#endif + +#if 1 > 3 +failure_4 +#else +success_4 +#endif + +#if 1 != 5 +success_5 +#else +failure_5 +#endif -- cgit v1.2.3 From 89b933a24375a2ebed383290f24360a14edbac6b Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Mon, 24 May 2010 11:26:42 -0700 Subject: Add the '~' operator to the lexer. This was simply missing before, (and unnoticed since we had no test of the '~' operator). --- glcpp-lex.l | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/glcpp-lex.l b/glcpp-lex.l index 84166fb76f..fe95508a32 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -102,7 +102,7 @@ TOKEN [^[:space:](),]+ return OR; } -[-+*/%<>&^|()] { +[-+*/%<>&^|()~] { return yytext[0]; } -- cgit v1.2.3 From 35419095f8d92f7dc5de472da3a0271d343cbcba Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Mon, 24 May 2010 11:27:23 -0700 Subject: Switch to intmax_t (rather than int) for #if expressions This is what the C99 specification demands. And the GLSL specification says that we should follow the "standard C++" rules for #if condition expressions rather than the GLSL rules, (which only support a 32-bit integer). --- glcpp-parse.y | 4 +++- glcpp.h | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index 0d3afa7af6..2c0fe9a6af 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -107,6 +107,7 @@ glcpp_parser_lex (glcpp_parser_t *parser); %} %union { + intmax_t imaxval; int ival; char *str; argument_list_t *argument_list; @@ -119,7 +120,8 @@ glcpp_parser_lex (glcpp_parser_t *parser); %lex-param {glcpp_parser_t *parser} %token DEFINE DEFINED ELIF ELSE ENDIF FUNC_MACRO IDENTIFIER IDENTIFIER_FINALIZED IF IFDEF IFNDEF INTEGER OBJ_MACRO NEWLINE SPACE TOKEN UNDEF -%type expression INTEGER punctuator +%type punctuator +%type expression INTEGER %type content FUNC_MACRO IDENTIFIER IDENTIFIER_FINALIZED OBJ_MACRO %type argument_list %type macro parameter_list diff --git a/glcpp.h b/glcpp.h index 33ece8f92b..503731b85b 100644 --- a/glcpp.h +++ b/glcpp.h @@ -24,6 +24,8 @@ #ifndef GLCPP_H #define GLCPP_H +#include + #include #include "hash_table.h" -- cgit v1.2.3 From 03f6d5d2d4a6c42a197ee8eb4e26b87c87bbe43e Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Mon, 24 May 2010 11:29:02 -0700 Subject: Add support for octal and hexadecimal integer literals. In addition to the decimal literals which we already support. Note that we use strtoll here to get the large-width integers demanded by the specification. --- glcpp-lex.l | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/glcpp-lex.l b/glcpp-lex.l index fe95508a32..ee1f6e3aee 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -45,10 +45,13 @@ NONSPACE [^[:space:]] NEWLINE [\n] HSPACE [ \t] HASH ^{HSPACE}*#{HSPACE}* -INTEGER [0-9]+ IDENTIFIER [_a-zA-Z][_a-zA-Z0-9]* TOKEN [^[:space:](),]+ +DECIMAL_INTEGER [1-9][0-9]*[uU]? +OCTAL_INTEGER 0[0-7]*[uU]? +HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? + %% {HASH}if{HSPACE}* { @@ -61,8 +64,18 @@ TOKEN [^[:space:](),]+ return ELIF; } -{INTEGER} { - yylval.ival = atoi (yytext); +{DECIMAL_INTEGER} { + yylval.ival = strtoll (yytext, NULL, 10); + return INTEGER; +} + +{OCTAL_INTEGER} { + yylval.ival = strtoll (yytext + 1, NULL, 8); + return INTEGER; +} + +{HEXADECIMAL_INTEGER} { + yylval.ival = strtoll (yytext + 2, NULL, 16); return INTEGER; } -- cgit v1.2.3 From bb9315f8047770585391c56973ef26c30f74d603 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Mon, 24 May 2010 11:30:06 -0700 Subject: Add test of bitwise operators and octal/hexadecimal literals. This new test covers several features from the last few commits. This test passes already. --- tests/052-if-bitwise.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 tests/052-if-bitwise.c diff --git a/tests/052-if-bitwise.c b/tests/052-if-bitwise.c new file mode 100644 index 0000000000..2d8e45eb61 --- /dev/null +++ b/tests/052-if-bitwise.c @@ -0,0 +1,20 @@ +#if (0xaaaaaaaa | 0x55555555) != 4294967295 +failure_1 +#else +success_1 +#endif +#if (0x12345678 ^ 0xfdecba98) == 4023971040 +success_2 +#else +failure_2 +#endif +#if (~ 0xdeadbeef) != -3735928560 +failure_3 +#else +success_3 +#endif +#if (0667 & 0733) == 403 +success_4 +#else +failure_4 +#endif -- cgit v1.2.3 From 00f1ec421edf73516fdcfbbdb651f13eeefe8f08 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Mon, 24 May 2010 11:33:28 -0700 Subject: Add test for '/', '<<', and '>>' in #if expressions. These operators have been supported already, but were not covered in existing tests yet. So this test passes already. --- tests/053-if-divide-and-shift.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 tests/053-if-divide-and-shift.c diff --git a/tests/053-if-divide-and-shift.c b/tests/053-if-divide-and-shift.c new file mode 100644 index 0000000000..ddc1573ab2 --- /dev/null +++ b/tests/053-if-divide-and-shift.c @@ -0,0 +1,16 @@ +#if (15 / 2) != 7 +failure_1 +#else +success_1 +#endif +#if (1 << 12) == 4096 +success_2 +#else +failure_2 +#endif +#if (31762 >> 8) != 124 +failure_3 +#else +success_3 +#endif + -- cgit v1.2.3 From 3ff81670848abb29b92e78f45080ad36cc85001c Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 25 May 2010 13:09:03 -0700 Subject: Starting over with the C99 grammar for the preprocessor. This is a fresh start with a much simpler approach for the flex/bison portions of the preprocessor. This isn't functional yet, (produces no output), but can at least read all of our test cases without any parse errors. The grammar here is based on the grammar provided for the preprocessor in the C99 specification. --- glcpp-lex.l | 197 ++++----------------------- glcpp-parse.y | 401 +++++++++---------------------------------------------- tests/glcpp-test | 5 +- 3 files changed, 98 insertions(+), 505 deletions(-) diff --git a/glcpp-lex.l b/glcpp-lex.l index ee1f6e3aee..f1dd11ea9b 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -32,21 +32,14 @@ %option reentrant noyywrap %option extra-type="glcpp_parser_t *" -%x ST_DEFINE -%x ST_DEFINE_OBJ_OR_FUNC -%x ST_DEFINE_PARAMETER -%x ST_DEFINE_VALUE -%x ST_IF -%x ST_UNDEF -%x ST_UNDEF_END - SPACE [[:space:]] NONSPACE [^[:space:]] NEWLINE [\n] HSPACE [ \t] HASH ^{HSPACE}*#{HSPACE}* IDENTIFIER [_a-zA-Z][_a-zA-Z0-9]* -TOKEN [^[:space:](),]+ +PUNCTUATION [][(){}.&*~!/%<>^|;,+-] +OTHER [^][(){}.&*~!/%<>^|;,=#[:space:]+-]+ DECIMAL_INTEGER [1-9][0-9]*[uU]? OCTAL_INTEGER 0[0-7]*[uU]? @@ -54,208 +47,74 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? %% -{HASH}if{HSPACE}* { - BEGIN ST_IF; - return IF; -} - -{HASH}elif{HSPACE}* { - BEGIN ST_IF; - return ELIF; +{HASH}define{HSPACE}+/{IDENTIFIER}"(" { + return HASH_DEFINE_FUNC; } -{DECIMAL_INTEGER} { - yylval.ival = strtoll (yytext, NULL, 10); - return INTEGER; +{HASH}define { + return HASH_DEFINE_OBJ; } -{OCTAL_INTEGER} { - yylval.ival = strtoll (yytext + 1, NULL, 8); - return INTEGER; +{HASH}undef { + return HASH_UNDEF; } -{HEXADECIMAL_INTEGER} { - yylval.ival = strtoll (yytext + 2, NULL, 16); - return INTEGER; +{HASH} { + return HASH; } -"defined" { - return DEFINED; +{IDENTIFIER} { + yylval.str = xtalloc_strdup (yyextra, yytext); + return IDENTIFIER; } -"<<" { +"<<" { return LEFT_SHIFT; } -">>" { +">>" { return RIGHT_SHIFT; } -"<=" { +"<=" { return LESS_OR_EQUAL; } -">=" { +">=" { return GREATER_OR_EQUAL; } -"==" { +"==" { return EQUAL; } -"!=" { +"!=" { return NOT_EQUAL; } -"&&" { +"&&" { return AND; } -"||" { +"||" { return OR; } -[-+*/%<>&^|()~] { - return yytext[0]; -} - -{IDENTIFIER} { - yylval.str = xtalloc_strdup (yyextra, yytext); - return IDENTIFIER; -} - -{HSPACE}+ - -\n { - BEGIN INITIAL; - return NEWLINE; -} - -{HASH}endif{HSPACE}* { - return ENDIF; +"##" { + return PASTE; } -{HASH}else{HSPACE}* { - return ELSE; -} - -{HASH}undef{HSPACE}* { - BEGIN ST_UNDEF; - return UNDEF; -} - -{IDENTIFIER} { - BEGIN ST_UNDEF_END; - yylval.str = xtalloc_strdup (yyextra, yytext); - return IDENTIFIER; -} - -{HSPACE}* - -\n { - BEGIN INITIAL; -} - - /* We use the ST_DEFINE and ST_DEFVAL states so that we can - * pass a space token, (yes, a token for whitespace!), since - * the preprocessor specification requires distinguishing - * "#define foo()" from "#define foo ()". - */ -{HASH}define{HSPACE}* { - BEGIN ST_DEFINE; - return DEFINE; -} - -{IDENTIFIER} { - BEGIN ST_DEFINE_OBJ_OR_FUNC; - yylval.str = xtalloc_strdup (yyextra, yytext); - return IDENTIFIER; -} - -\n { - BEGIN INITIAL; - return NEWLINE; -} - -{HSPACE}+ { - BEGIN ST_DEFINE_VALUE; - return SPACE; -} - -"(" { - BEGIN ST_DEFINE_PARAMETER; - return '('; -} - -{IDENTIFIER} { - yylval.str = xtalloc_strdup (yyextra, yytext); - return IDENTIFIER; -} - -"," { - return ','; -} - -")" { - BEGIN ST_DEFINE_VALUE; - return ')'; -} - -{HSPACE}+ - -{TOKEN} { - yylval.token.type = TOKEN; - yylval.token.value = xtalloc_strdup (yyextra, yytext); - return TOKEN; -} - -[(),] { - yylval.token.type = TOKEN; - yylval.token.value = xtalloc_strdup (yyextra, yytext); - return TOKEN; +{PUNCTUATION} { + return yytext[0]; } -{HSPACE}+ - -\n { - BEGIN INITIAL; +\n { return NEWLINE; } -{IDENTIFIER} { - int parameter_index; +{OTHER} { yylval.str = xtalloc_strdup (yyextra, yytext); - switch (glcpp_parser_classify_token (yyextra, yylval.str, - ¶meter_index)) - { - case TOKEN_CLASS_IDENTIFIER: - return IDENTIFIER; - break; - case TOKEN_CLASS_IDENTIFIER_FINALIZED: - return IDENTIFIER_FINALIZED; - break; - case TOKEN_CLASS_FUNC_MACRO: - return FUNC_MACRO; - break; - case TOKEN_CLASS_OBJ_MACRO: - return OBJ_MACRO; - break; - - } -} - -[(),] { - return yytext[0]; -} - -{TOKEN} { - yylval.token.type = TOKEN; - yylval.token.value = xtalloc_strdup (yyextra, yytext); - return TOKEN; -} - -\n { - yyextra->need_newline = 1; + return OTHER; } {HSPACE}+ diff --git a/glcpp-parse.y b/glcpp-parse.y index 2c0fe9a6af..ebb28ed196 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -119,366 +119,97 @@ glcpp_parser_lex (glcpp_parser_t *parser); %parse-param {glcpp_parser_t *parser} %lex-param {glcpp_parser_t *parser} -%token DEFINE DEFINED ELIF ELSE ENDIF FUNC_MACRO IDENTIFIER IDENTIFIER_FINALIZED IF IFDEF IFNDEF INTEGER OBJ_MACRO NEWLINE SPACE TOKEN UNDEF -%type punctuator -%type expression INTEGER -%type content FUNC_MACRO IDENTIFIER IDENTIFIER_FINALIZED OBJ_MACRO -%type argument_list -%type macro parameter_list -%type TOKEN argument_word argument_word_or_comma -%type argument argument_or_comma replacement_list pp_tokens -%left OR -%left AND -%left '|' -%left '^' -%left '&' -%left EQUAL NOT_EQUAL -%left '<' '>' LESS_OR_EQUAL GREATER_OR_EQUAL -%left LEFT_SHIFT RIGHT_SHIFT -%left '+' '-' -%left '*' '/' '%' -%right UNARY - -/* Hard to remove shift/reduce conflicts documented as follows: - * - * 1. '(' after FUNC_MACRO name which is correctly resolved to shift - * to form macro invocation rather than reducing directly to - * content. - * - * 2. Similarly, '(' after FUNC_MACRO which is correctly resolved to - * shift to form macro invocation rather than reducing directly to - * argument. - * - * 3. Similarly again now that we added argument_or_comma as well. - */ -%expect 3 +%token HASH_DEFINE_FUNC HASH_DEFINE_OBJ HASH IDENTIFIER NEWLINE OTHER HASH_UNDEF +%token LEFT_SHIFT RIGHT_SHIFT LESS_OR_EQUAL GREATER_OR_EQUAL EQUAL NOT_EQUAL AND OR PASTE + + /* Stale stuff just to allow code to compile. */ +%token IDENTIFIER_FINALIZED FUNC_MACRO OBJ_MACRO %% - /* We do all printing at the input level. */ input: - /* empty */ { - parser->just_printed_separator = 1; - } -| input content { - int is_token; - int skipping = 0; - - if (parser->skip_stack && parser->skip_stack->type != SKIP_NO_SKIP) - skipping = 1; - - if ($2 && strlen ($2) && ! skipping) { - int c = $2[0]; - int is_not_separator = ((c >= 'a' && c <= 'z') || - (c >= 'A' && c <= 'Z') || - (c >= 'A' && c <= 'Z') || - (c >= '0' && c <= '9') || - (c == '_')); - - if (! parser->just_printed_separator && is_not_separator) - { - printf (" "); - } - printf ("%s", $2); - - if (is_not_separator) - parser->just_printed_separator = 0; - else - parser->just_printed_separator = 1; - } - - if ($2) - talloc_free ($2); - - if (parser->need_newline) { - printf ("\n"); - parser->just_printed_separator = 1; - parser->need_newline = 0; - } - } -; - -content: - IDENTIFIER { - $$ = $1; - } -| IDENTIFIER_FINALIZED { - $$ = $1; - } -| TOKEN { - $$ = $1.value; - } -| FUNC_MACRO { - $$ = $1; - } -| directive { - $$ = talloc_strdup (parser, "\n"); - } -| punctuator { - $$ = talloc_asprintf (parser, "%c", $1); - } -| macro { - $$ = NULL; - } + /* empty */ +| input line ; -punctuator: - '(' { $$ = '('; } -| ')' { $$ = ')'; } -| ',' { $$ = ','; } - ; - -macro: - FUNC_MACRO '(' argument_list ')' { - _expand_function_macro (parser, $1, $3); - } -| OBJ_MACRO { - _expand_object_macro (parser, $1); - talloc_free ($1); - } +line: + control_line +| text_line +| HASH non_directive ; -argument_list: - /* empty */ { - $$ = _argument_list_create (parser); - } -| argument { - $$ = _argument_list_create (parser); - _argument_list_append ($$, $1); - } -| argument_list ',' argument { - _argument_list_append ($1, $3); - $$ = $1; - } -; - -argument: - argument_word { - $$ = _token_list_create (parser); - _token_list_append ($$, $1.type, $1.value); - } -| argument argument_word { - _token_list_append ($1, $2.type, $2.value); - talloc_free ($2.value); - $$ = $1; - } -| argument '(' argument_or_comma ')' { - _token_list_append ($1, '(', "("); - _token_list_append_list ($1, $3); - _token_list_append ($1, ')', ")"); - $$ = $1; - } +control_line: + HASH_DEFINE_OBJ IDENTIFIER replacement_list NEWLINE +| HASH_DEFINE_FUNC IDENTIFIER '(' ')' replacement_list NEWLINE +| HASH_DEFINE_FUNC IDENTIFIER '(' identifier_list ')' replacement_list NEWLINE +| HASH_UNDEF IDENTIFIER NEWLINE +| HASH NEWLINE ; -argument_word: - IDENTIFIER { $$.type = IDENTIFIER; $$.value = $1; } -| IDENTIFIER_FINALIZED { $$.type = IDENTIFIER_FINALIZED; $$.value = $1; } -| TOKEN { $$ = $1; } -| FUNC_MACRO { $$.type = FUNC_MACRO; $$.value = $1; } -| macro { $$.type = TOKEN; $$.value = xtalloc_strdup (parser, ""); } +identifier_list: + IDENTIFIER +| identifier_list ',' IDENTIFIER ; - /* XXX: The body of argument_or_comma is the same as the body - * of argument, but with "argument" and "argument_word" - * changed to "argument_or_comma" and - * "argument_word_or_comma". It would be nice to have less - * redundancy here, but I'm not sure how. - * - * It would also be nice to have a less ugly grammar to have - * to implement, but such is the C preprocessor. - */ -argument_or_comma: - argument_word_or_comma { - $$ = _token_list_create (parser); - _token_list_append ($$, $1.type, $1.value); - } -| argument_or_comma argument_word_or_comma { - _token_list_append ($1, $2.type, $2.value); - $$ = $1; - } -| argument_or_comma '(' argument_or_comma ')' { - _token_list_append ($1, '(', "("); - _token_list_append_list ($1, $3); - _token_list_append ($1, ')', ")"); - $$ = $1; - } +text_line: + NEWLINE +| pp_tokens NEWLINE ; -argument_word_or_comma: - IDENTIFIER { $$.type = IDENTIFIER; $$.value = $1; } -| IDENTIFIER_FINALIZED { $$.type = IDENTIFIER_FINALIZED; $$.value = $1; } -| TOKEN { $$ = $1; } -| FUNC_MACRO { $$.type = FUNC_MACRO; $$.value = $1; } -| macro { $$.type = TOKEN; $$.value = xtalloc_strdup (parser, ""); } -| ',' { $$.type = ','; $$.value = xtalloc_strdup (parser, ","); } +non_directive: + pp_tokens NEWLINE ; -directive: - DEFINE IDENTIFIER NEWLINE { - token_list_t *list = _token_list_create (parser); - _define_object_macro (parser, $2, list); - } -| DEFINE IDENTIFIER SPACE replacement_list NEWLINE { - _define_object_macro (parser, $2, $4); - } -| DEFINE IDENTIFIER '(' parameter_list ')' replacement_list NEWLINE { - _define_function_macro (parser, $2, $4, $6); - } -| IF expression NEWLINE { - _glcpp_parser_skip_stack_push_if (parser, $2); - } -| IFDEF IDENTIFIER NEWLINE { - string_list_t *macro = hash_table_find (parser->defines, $2); - talloc_free ($2); - _glcpp_parser_skip_stack_push_if (parser, macro != NULL); - } -| IFNDEF IDENTIFIER NEWLINE { - string_list_t *macro = hash_table_find (parser->defines, $2); - talloc_free ($2); - _glcpp_parser_skip_stack_push_if (parser, macro == NULL); - } -| ELIF expression NEWLINE { - _glcpp_parser_skip_stack_change_if (parser, "#elif", $2); - } -| ELSE { - _glcpp_parser_skip_stack_change_if (parser, "else", 1); - } -| ENDIF { - _glcpp_parser_skip_stack_pop (parser); - } -| UNDEF IDENTIFIER { - string_list_t *macro = hash_table_find (parser->defines, $2); - if (macro) { - /* XXX: Need hash table to support a real way - * to remove an element rather than prefixing - * a new node with data of NULL like this. */ - hash_table_insert (parser->defines, NULL, $2); - talloc_free (macro); - } - talloc_free ($2); - } +replacement_list: + /* empty */ +| pp_tokens ; -expression: - INTEGER { - $$ = $1; - } -| expression OR expression { - $$ = $1 || $3; - } -| expression AND expression { - $$ = $1 && $3; - } -| expression '|' expression { - $$ = $1 | $3; - } -| expression '^' expression { - $$ = $1 ^ $3; - } -| expression '&' expression { - $$ = $1 & $3; - } -| expression NOT_EQUAL expression { - $$ = $1 != $3; - } -| expression EQUAL expression { - $$ = $1 == $3; - } -| expression GREATER_OR_EQUAL expression { - $$ = $1 >= $3; - } -| expression LESS_OR_EQUAL expression { - $$ = $1 <= $3; - } -| expression '>' expression { - $$ = $1 > $3; - } -| expression '<' expression { - $$ = $1 < $3; - } -| expression RIGHT_SHIFT expression { - $$ = $1 >> $3; - } -| expression LEFT_SHIFT expression { - $$ = $1 << $3; - } -| expression '-' expression { - $$ = $1 - $3; - } -| expression '+' expression { - $$ = $1 + $3; - } -| expression '%' expression { - $$ = $1 % $3; - } -| expression '/' expression { - $$ = $1 / $3; - } -| expression '*' expression { - $$ = $1 * $3; - } -| '!' expression %prec UNARY { - $$ = ! $2; - } -| '~' expression %prec UNARY { - $$ = ~ $2; - } -| '-' expression %prec UNARY { - $$ = - $2; - } -| '+' expression %prec UNARY { - $$ = + $2; - } -| DEFINED IDENTIFIER %prec UNARY { - string_list_t *macro = hash_table_find (parser->defines, $2); - talloc_free ($2); - if (macro) - $$ = 1; - else - $$ = 0; - } -| '(' expression ')' { - $$ = $2; - } +pp_tokens: + preprocessing_token +| pp_tokens preprocessing_token ; -parameter_list: - /* empty */ { - $$ = _string_list_create (parser); - } -| IDENTIFIER { - $$ = _string_list_create (parser); - _string_list_append_item ($$, $1); - talloc_free ($1); - } -| parameter_list ',' IDENTIFIER { - _string_list_append_item ($1, $3); - talloc_free ($3); - $$ = $1; - } +preprocessing_token: + IDENTIFIER +| punctuator +| OTHER ; -replacement_list: - /* empty */ { - $$ = _token_list_create (parser); - } -| pp_tokens { - $$ = $1; - } +punctuator: + '[' +| ']' +| '(' +| ')' +| '{' +| '}' +| '.' +| '&' +| '*' +| '+' +| '-' +| '~' +| '!' +| '/' +| '%' +| LEFT_SHIFT +| RIGHT_SHIFT +| '<' +| '>' +| LESS_OR_EQUAL +| GREATER_OR_EQUAL +| EQUAL +| NOT_EQUAL +| '^' +| '|' +| AND +| OR +| ';' +| ',' +| PASTE ; -pp_tokens: - TOKEN { - $$ = _token_list_create (parser); - _token_list_append ($$, $1.type, $1.value); - } -| pp_tokens TOKEN { - _token_list_append ($1, $2.type, $2.value); - $$ = $1; - } -; - %% string_list_t * diff --git a/tests/glcpp-test b/tests/glcpp-test index 022a236712..868b03cce8 100755 --- a/tests/glcpp-test +++ b/tests/glcpp-test @@ -1,9 +1,12 @@ #!/bin/sh +set -e + +echo "Caution: These results are just verifying parse-ability, not correctness!" for test in *.c; do echo "Testing $test" ../glcpp < $test > $test.out gcc -E $test -o $test.gcc grep -v '^#' < $test.gcc > $test.expected - diff -B -u $test.expected $test.out +# diff -B -u $test.expected $test.out done -- cgit v1.2.3 From 9bb796f33ac67abdf6c0bf55a06b0d8448caa3d3 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 25 May 2010 14:40:47 -0700 Subject: Add xtalloc_reference. Yet another talloc wrapper that should come in handy. --- glcpp.h | 6 ++++++ xtalloc.c | 15 +++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/glcpp.h b/glcpp.h index 503731b85b..6171ce8b4a 100644 --- a/glcpp.h +++ b/glcpp.h @@ -164,4 +164,10 @@ xtalloc_strndup (const void *t, const char *p, size_t n); char * xtalloc_asprintf (const void *t, const char *fmt, ...); +void * +_xtalloc_reference_loc (const void *context, + const void *ptr, const char *location); + +#define xtalloc_reference(ctx, ptr) (_TALLOC_TYPEOF(ptr))_xtalloc_reference_loc((ctx),(ptr), __location__) + #endif diff --git a/xtalloc.c b/xtalloc.c index e52d12ac6b..656ac2d6cb 100644 --- a/xtalloc.c +++ b/xtalloc.c @@ -82,3 +82,18 @@ xtalloc_asprintf (const void *t, const char *fmt, ...) va_end(ap); return ret; } + +void * +_xtalloc_reference_loc (const void *context, + const void *ptr, const char *location) +{ + void *ret; + + ret = _talloc_reference_loc (context, ptr, location); + if (ret == NULL) { + fprintf (stderr, "Out of memory.\n"); + exit (1); + } + + return ret; +} -- cgit v1.2.3 From 80dc60b9c3529cf438948d50b9619e8af2fad880 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 25 May 2010 14:42:00 -0700 Subject: Delete some trailing whitespace. This pernicious stuff managed to sneak in on us. --- glcpp-parse.y | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index ebb28ed196..c53370a89a 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -95,7 +95,7 @@ _glcpp_parser_skip_stack_push_if (glcpp_parser_t *parser, int condition); static void _glcpp_parser_skip_stack_change_if (glcpp_parser_t *parser, const char *type, int condition); - + static void _glcpp_parser_skip_stack_pop (glcpp_parser_t *parser); @@ -243,7 +243,7 @@ _string_list_append_item (string_list_t *list, const char *str) node = xtalloc (list, string_node_t); node->str = xtalloc_strdup (node, str); - + node->next = NULL; if (list->head == NULL) { @@ -404,7 +404,7 @@ _token_list_append_list (token_list_t *list, token_list_t *tail) list->tail = tail->tail; } - + void yyerror (void *scanner, const char *error) { @@ -733,7 +733,7 @@ _glcpp_parser_skip_stack_change_if (glcpp_parser_t *parser, const char *type, parser->skip_stack->type = SKIP_TO_ENDIF; } } - + static void _glcpp_parser_skip_stack_pop (glcpp_parser_t *parser) { -- cgit v1.2.3 From 808401fd79eea9fa2c965f9f235a753c0cb0d920 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 25 May 2010 14:52:43 -0700 Subject: Store parsed tokens as token list and print all text lines. Still not doing any macro expansion just yet. But it should be fairly easy from here. --- glcpp-parse.y | 227 +++++++++++++++++++++++++++++++++++++++++-------------- glcpp.h | 27 +++++-- tests/glcpp-test | 5 +- 3 files changed, 195 insertions(+), 64 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index c53370a89a..991b8a0b85 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -77,15 +77,29 @@ _argument_list_length (argument_list_t *list); token_list_t * _argument_list_member_at (argument_list_t *list, int index); +/* Note: This function talloc_steal()s the str pointer. */ +token_t * +_token_create_str (void *ctx, int type, char *str); + +token_t * +_token_create_ival (void *ctx, int type, int ival); + token_list_t * _token_list_create (void *ctx); +/* Note: This function add a talloc_reference() to token. + * + * You may want to talloc_unlink any current reference if you no + * longer need it. */ void -_token_list_append (token_list_t *list, int type, const char *value); +_token_list_append (token_list_t *list, token_t *token); void _token_list_append_list (token_list_t *list, token_list_t *tail); +void +_token_list_print (token_list_t *list); + static void glcpp_parser_pop_expansion (glcpp_parser_t *parser); @@ -107,12 +121,9 @@ glcpp_parser_lex (glcpp_parser_t *parser); %} %union { - intmax_t imaxval; int ival; char *str; - argument_list_t *argument_list; - string_list_t *string_list; - token_t token; + token_t *token; token_list_t *token_list; } @@ -121,6 +132,10 @@ glcpp_parser_lex (glcpp_parser_t *parser); %token HASH_DEFINE_FUNC HASH_DEFINE_OBJ HASH IDENTIFIER NEWLINE OTHER HASH_UNDEF %token LEFT_SHIFT RIGHT_SHIFT LESS_OR_EQUAL GREATER_OR_EQUAL EQUAL NOT_EQUAL AND OR PASTE +%type punctuator +%type IDENTIFIER OTHER +%type preprocessing_token +%type pp_tokens replacement_list text_line /* Stale stuff just to allow code to compile. */ %token IDENTIFIER_FINALIZED FUNC_MACRO OBJ_MACRO @@ -134,7 +149,11 @@ input: line: control_line -| text_line +| text_line { + _token_list_print ($1); + printf ("\n"); + talloc_free ($1); + } | HASH non_directive ; @@ -152,7 +171,7 @@ identifier_list: ; text_line: - NEWLINE + NEWLINE { $$ = NULL; } | pp_tokens NEWLINE ; @@ -161,55 +180,68 @@ non_directive: ; replacement_list: - /* empty */ + /* empty */ { $$ = NULL; } | pp_tokens ; pp_tokens: - preprocessing_token -| pp_tokens preprocessing_token + preprocessing_token { + $$ = _token_list_create (parser); + _token_list_append ($$, $1); + talloc_unlink (parser, $1); + } +| pp_tokens preprocessing_token { + $$ = $1; + _token_list_append ($$, $2); + talloc_unlink (parser, $2); + } ; preprocessing_token: - IDENTIFIER -| punctuator -| OTHER + IDENTIFIER { + $$ = _token_create_str (parser, IDENTIFIER, $1); + } +| punctuator { + $$ = _token_create_ival (parser, $1, $1); + } +| OTHER { + $$ = _token_create_str (parser, OTHER, $1); + } ; punctuator: - '[' -| ']' -| '(' -| ')' -| '{' -| '}' -| '.' -| '&' -| '*' -| '+' -| '-' -| '~' -| '!' -| '/' -| '%' -| LEFT_SHIFT -| RIGHT_SHIFT -| '<' -| '>' -| LESS_OR_EQUAL -| GREATER_OR_EQUAL -| EQUAL -| NOT_EQUAL -| '^' -| '|' -| AND -| OR -| ';' -| ',' -| PASTE + '[' { $$ = '['; } +| ']' { $$ = ']'; } +| '(' { $$ = '('; } +| ')' { $$ = ')'; } +| '{' { $$ = '{'; } +| '}' { $$ = '}'; } +| '.' { $$ = '.'; } +| '&' { $$ = '&'; } +| '*' { $$ = '*'; } +| '+' { $$ = '+'; } +| '-' { $$ = '-'; } +| '~' { $$ = '~'; } +| '!' { $$ = '!'; } +| '/' { $$ = '/'; } +| '%' { $$ = '%'; } +| LEFT_SHIFT { $$ = LEFT_SHIFT; } +| RIGHT_SHIFT { $$ = RIGHT_SHIFT; } +| '<' { $$ = '<'; } +| '>' { $$ = '>'; } +| LESS_OR_EQUAL { $$ = LESS_OR_EQUAL; } +| GREATER_OR_EQUAL { $$ = GREATER_OR_EQUAL; } +| EQUAL { $$ = EQUAL; } +| NOT_EQUAL { $$ = NOT_EQUAL; } +| '^' { $$ = '^'; } +| '|' { $$ = '|'; } +| AND { $$ = AND; } +| OR { $$ = OR; } +| ';' { $$ = ';'; } +| ',' { $$ = ','; } +| PASTE { $$ = PASTE; } ; - %% string_list_t * @@ -361,6 +393,77 @@ _argument_list_member_at (argument_list_t *list, int index) return NULL; } +/* Note: This function talloc_steal()s the str pointer. */ +token_t * +_token_create_str (void *ctx, int type, char *str) +{ + token_t *token; + + token = xtalloc (ctx, token_t); + token->type = type; + token->value.str = talloc_steal (token, str); + + return token; +} + +token_t * +_token_create_ival (void *ctx, int type, int ival) +{ + token_t *token; + + token = xtalloc (ctx, token_t); + token->type = type; + token->value.ival = ival; + + return token; +} + +void +_token_print (token_t *token) +{ + if (token->type < 256) { + printf ("%c", token->type); + return; + } + + switch (token->type) { + case IDENTIFIER: + case OTHER: + printf ("%s", token->value.str); + break; + case LEFT_SHIFT: + printf ("<<"); + break; + case RIGHT_SHIFT: + printf (">>"); + break; + case LESS_OR_EQUAL: + printf ("<="); + break; + case GREATER_OR_EQUAL: + printf (">="); + break; + case EQUAL: + printf ("=="); + break; + case NOT_EQUAL: + printf ("!="); + break; + case AND: + printf ("&&"); + break; + case OR: + printf ("||"); + break; + case PASTE: + printf ("##"); + break; + default: + fprintf (stderr, "Error: Don't know how to print token type %d\n", token->type); + break; + } +} + token_list_t * _token_list_create (void *ctx) { @@ -374,13 +477,12 @@ _token_list_create (void *ctx) } void -_token_list_append (token_list_t *list, int type, const char *value) +_token_list_append (token_list_t *list, token_t *token) { token_node_t *node; node = xtalloc (list, token_node_t); - node->type = type; - node->value = xtalloc_strdup (list, value); + node->token = xtalloc_reference (list, token); node->next = NULL; @@ -405,6 +507,21 @@ _token_list_append_list (token_list_t *list, token_list_t *tail) list->tail = tail->tail; } +void +_token_list_print (token_list_t *list) +{ + token_node_t *node; + + if (list == NULL) + return; + + for (node = list->head; node; node = node->next) { + _token_print (node->token); + if (node->next) + printf (" "); + } +} + void yyerror (void *scanner, const char *error) { @@ -598,7 +715,8 @@ _expand_function_macro (glcpp_parser_t *parser, expanded = _token_list_create (macro); for (i = macro->replacements->head; i; i = i->next) { - if (_string_list_contains (macro->parameters, i->value, + if (_string_list_contains (macro->parameters, + i->token->value.str, ¶meter_index)) { token_list_t *argument; @@ -606,11 +724,10 @@ _expand_function_macro (glcpp_parser_t *parser, parameter_index); for (j = argument->head; j; j = j->next) { - _token_list_append (expanded, j->type, - j->value); + _token_list_append (expanded, j->token); } } else { - _token_list_append (expanded, i->type, i->value); + _token_list_append (expanded, i->token); } } @@ -644,10 +761,10 @@ glcpp_parser_lex (glcpp_parser_t *parser) expansion->replacements = replacements->next; - token = replacements->value; + token = replacements->token->value.str; /* Implement token pasting. */ - if (replacements->next && strcmp (replacements->next->value, "##") == 0) { + if (replacements->next && strcmp (replacements->next->token->value.str, "##") == 0) { token_node_t *next_node; next_node = replacements->next->next; @@ -658,7 +775,7 @@ glcpp_parser_lex (glcpp_parser_t *parser) } token = xtalloc_asprintf (parser, "%s%s", - token, next_node->value); + token, next_node->token->value.str); expansion->replacements = next_node->next; } @@ -671,7 +788,7 @@ glcpp_parser_lex (glcpp_parser_t *parser) yylval.str = xtalloc_strdup (parser, token); /* Carefully refuse to expand any finalized identifier. */ - if (replacements->type == IDENTIFIER_FINALIZED) + if (replacements->token->type == IDENTIFIER_FINALIZED) return IDENTIFIER_FINALIZED; switch (glcpp_parser_classify_token (parser, yylval.str, diff --git a/glcpp.h b/glcpp.h index 6171ce8b4a..261254a17c 100644 --- a/glcpp.h +++ b/glcpp.h @@ -44,21 +44,34 @@ typedef struct string_list { string_node_t *tail; } string_list_t; -typedef struct token { +typedef struct token token_t; +typedef struct token_list token_list_t; + +typedef union YYSTYPE +{ + int ival; + char *str; + token_t *token; + token_list_t *token_list; +} YYSTYPE; + +# define YYSTYPE_IS_TRIVIAL 1 +# define YYSTYPE_IS_DECLARED 1 + +struct token { int type; - char *value; -} token_t; + YYSTYPE value; +}; typedef struct token_node { - int type; - const char *value; + token_t *token; struct token_node *next; } token_node_t; -typedef struct token_list { +struct token_list { token_node_t *head; token_node_t *tail; -} token_list_t; +}; typedef struct argument_node { token_list_t *argument; diff --git a/tests/glcpp-test b/tests/glcpp-test index 868b03cce8..34cca88330 100755 --- a/tests/glcpp-test +++ b/tests/glcpp-test @@ -7,6 +7,7 @@ for test in *.c; do echo "Testing $test" ../glcpp < $test > $test.out gcc -E $test -o $test.gcc - grep -v '^#' < $test.gcc > $test.expected -# diff -B -u $test.expected $test.out +# grep -v '^#' < $test.gcc > $test.expected + grep -v '^[ ]*#' < $test > $test.expected + diff -w -u $test.expected $test.out done -- cgit v1.2.3 From 9fb8b7a495c9dc6f9a62cf82300fae5925af92fc Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 25 May 2010 15:04:32 -0700 Subject: Make the lexer pass whitespace through (as OTHER tokens) for text lines. With this change, we can recreate the original text-line input exactly. Previously we were inserting a space between every pair of tokens so our output had a lot more whitespace than our input. With this change, we can drop the "-b" option to diff and match the input exactly. --- glcpp-lex.l | 72 +++++++++++++++++++++++++++++++++++++++++++++----------- glcpp-parse.y | 2 -- tests/glcpp-test | 2 +- 3 files changed, 59 insertions(+), 17 deletions(-) diff --git a/glcpp-lex.l b/glcpp-lex.l index f1dd11ea9b..7b5cdd57a0 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -32,6 +32,21 @@ %option reentrant noyywrap %option extra-type="glcpp_parser_t *" + /* This lexer has two states: + * + * The CONTROL state is for control lines (directives) + * It lexes exactly as specified in the C99 specification. + * + * The INITIAL state is for input lines. In this state, we + * make the OTHER token much more broad in that it now + * includes tokens consisting entirely of whitespace. This + * allows us to pass text through verbatim. It avoids the + * "inadvertent token pasting" problem that would occur if we + * just printed tokens, while also avoiding excess whitespace + * insertion in the output.*/ + +%x CONTROL + SPACE [[:space:]] NONSPACE [^[:space:]] NEWLINE [\n] @@ -48,75 +63,104 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? %% {HASH}define{HSPACE}+/{IDENTIFIER}"(" { + BEGIN CONTROL; return HASH_DEFINE_FUNC; } {HASH}define { + BEGIN CONTROL; return HASH_DEFINE_OBJ; } {HASH}undef { + BEGIN CONTROL; return HASH_UNDEF; } {HASH} { + BEGIN CONTROL; return HASH; } -{IDENTIFIER} { +{IDENTIFIER} { yylval.str = xtalloc_strdup (yyextra, yytext); return IDENTIFIER; } -"<<" { +"<<" { return LEFT_SHIFT; } -">>" { +">>" { return RIGHT_SHIFT; } -"<=" { +"<=" { return LESS_OR_EQUAL; } -">=" { +">=" { return GREATER_OR_EQUAL; } -"==" { +"==" { return EQUAL; } -"!=" { +"!=" { return NOT_EQUAL; } -"&&" { +"&&" { return AND; } -"||" { +"||" { return OR; } -"##" { +"##" { return PASTE; } -{PUNCTUATION} { +{PUNCTUATION} { return yytext[0]; } -\n { +{OTHER} { + yylval.str = xtalloc_strdup (yyextra, yytext); + return OTHER; +} + +{HSPACE}+ + +\n { + BEGIN INITIAL; return NEWLINE; } -{OTHER} { +{IDENTIFIER} { + yylval.str = xtalloc_strdup (yyextra, yytext); + return IDENTIFIER; +} + +{OTHER}+ { + yylval.str = xtalloc_strdup (yyextra, yytext); + return OTHER; +} + +{HSPACE}+ { yylval.str = xtalloc_strdup (yyextra, yytext); return OTHER; } -{HSPACE}+ +\n { + return NEWLINE; +} + +. { + yylval.str = xtalloc_strdup (yyextra, yytext); + return OTHER; +} %% diff --git a/glcpp-parse.y b/glcpp-parse.y index 991b8a0b85..957421b864 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -517,8 +517,6 @@ _token_list_print (token_list_t *list) for (node = list->head; node; node = node->next) { _token_print (node->token); - if (node->next) - printf (" "); } } diff --git a/tests/glcpp-test b/tests/glcpp-test index 34cca88330..8074e47119 100755 --- a/tests/glcpp-test +++ b/tests/glcpp-test @@ -9,5 +9,5 @@ for test in *.c; do gcc -E $test -o $test.gcc # grep -v '^#' < $test.gcc > $test.expected grep -v '^[ ]*#' < $test > $test.expected - diff -w -u $test.expected $test.out + diff -u $test.expected $test.out done -- cgit v1.2.3 From ae6517f4a83981ae363bbbfe439ec23e8deb04b1 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 25 May 2010 15:24:59 -0700 Subject: Implement expansion of object-like macros. For this we add an "active" string_list_t to the parser. This makes the current expansion_list_t in the parser obsolete, but we don't remove that yet. With this change we can now start passing some actual tests, so we turn on real testing in the test suite again. I expect to implement things more or less in the same order as before, so the test suite now halts on first error. With this change the first 8 tests in the suite pass, (object-like macros with chaining and recursion). --- glcpp-parse.y | 128 +++++++++++++++++++++++++++++++++++++++++++++++-------- glcpp.h | 1 + tests/glcpp-test | 5 +-- 3 files changed, 112 insertions(+), 22 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index 957421b864..b3ef177a6d 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -59,6 +59,12 @@ _string_list_append_item (string_list_t *list, const char *str); void _string_list_append_list (string_list_t *list, string_list_t *tail); +void +_string_list_push (string_list_t *list, const char *str); + +void +_string_list_pop (string_list_t *list); + int _string_list_contains (string_list_t *list, const char *member, int *index); @@ -98,7 +104,8 @@ void _token_list_append_list (token_list_t *list, token_list_t *tail); void -_token_list_print (token_list_t *list); +_glcpp_parser_print_expanded_token_list (glcpp_parser_t *parser, + token_list_t *list); static void glcpp_parser_pop_expansion (glcpp_parser_t *parser); @@ -144,21 +151,24 @@ glcpp_parser_lex (glcpp_parser_t *parser); input: /* empty */ -| input line +| input line { + printf ("\n"); + } ; line: control_line | text_line { - _token_list_print ($1); - printf ("\n"); + _glcpp_parser_print_expanded_token_list (parser, $1); talloc_free ($1); } | HASH non_directive ; control_line: - HASH_DEFINE_OBJ IDENTIFIER replacement_list NEWLINE + HASH_DEFINE_OBJ IDENTIFIER replacement_list NEWLINE { + _define_object_macro (parser, $2, $3); + } | HASH_DEFINE_FUNC IDENTIFIER '(' ')' replacement_list NEWLINE | HASH_DEFINE_FUNC IDENTIFIER '(' identifier_list ')' replacement_list NEWLINE | HASH_UNDEF IDENTIFIER NEWLINE @@ -287,6 +297,42 @@ _string_list_append_item (string_list_t *list, const char *str) list->tail = node; } +void +_string_list_push (string_list_t *list, const char *str) +{ + string_node_t *node; + + node = xtalloc (list, string_node_t); + node->str = xtalloc_strdup (node, str); + node->next = list->head; + + if (list->tail == NULL) { + list->tail = node; + } + list->head = node; +} + +void +_string_list_pop (string_list_t *list) +{ + string_node_t *node; + + node = list->head; + + if (node == NULL) { + fprintf (stderr, "Internal error: _string_list_pop called on an empty list.\n"); + exit (1); + } + + list->head = node->next; + if (list->tail == node) { + assert (node->next == NULL); + list->tail = NULL; + } + + talloc_free (node); +} + int _string_list_contains (string_list_t *list, const char *member, int *index) { @@ -507,19 +553,6 @@ _token_list_append_list (token_list_t *list, token_list_t *tail) list->tail = tail->tail; } -void -_token_list_print (token_list_t *list) -{ - token_node_t *node; - - if (list == NULL) - return; - - for (node = list->head; node; node = node->next) { - _token_print (node->token); - } -} - void yyerror (void *scanner, const char *error) { @@ -536,6 +569,7 @@ glcpp_parser_create (void) glcpp_lex_init_extra (parser, &parser->scanner); parser->defines = hash_table_ctor (32, hash_table_string_hash, hash_table_string_compare); + parser->active = _string_list_create (parser); parser->expansions = NULL; parser->just_printed_separator = 1; @@ -605,6 +639,64 @@ glcpp_parser_classify_token (glcpp_parser_t *parser, return TOKEN_CLASS_OBJ_MACRO; } +void +_glcpp_parser_print_expanded_token (glcpp_parser_t *parser, + token_t *token) +{ + const char *identifier; + macro_t *macro; + + /* We only expand identifiers */ + if (token->type != IDENTIFIER) { + _token_print (token); + return; + } + + /* Look up this identifier in the hash table. */ + identifier = token->value.str; + macro = hash_table_find (parser->defines, identifier); + + /* Not a macro, so just print directly. */ + if (macro == NULL) { + printf ("%s", identifier); + return; + } + + /* We're not (yet) supporting function-like macros. */ + if (macro->is_function) { + printf ("%s", identifier); + return; + } + + /* Finally, don't expand this macro if we're already actively + * expanding it, (to avoid infinite recursion). */ + if (_string_list_contains (parser->active, identifier, NULL)) { + printf ("%s", identifier); + return; + } + + _string_list_push (parser->active, identifier); + _glcpp_parser_print_expanded_token_list (parser, + macro->replacements); + _string_list_pop (parser->active); +} + +void +_glcpp_parser_print_expanded_token_list (glcpp_parser_t *parser, + token_list_t *list) +{ + token_node_t *node; + + if (list == NULL) + return; + + for (node = list->head; node; node = node->next) { + _glcpp_parser_print_expanded_token (parser, node->token); + if (node->next) + printf (" "); + } +} + void _define_object_macro (glcpp_parser_t *parser, const char *identifier, diff --git a/glcpp.h b/glcpp.h index 261254a17c..bd599d7301 100644 --- a/glcpp.h +++ b/glcpp.h @@ -124,6 +124,7 @@ typedef struct skip_node { struct glcpp_parser { yyscan_t scanner; struct hash_table *defines; + string_list_t *active; expansion_node_t *expansions; int just_printed_separator; int need_newline; diff --git a/tests/glcpp-test b/tests/glcpp-test index 8074e47119..6304155210 100755 --- a/tests/glcpp-test +++ b/tests/glcpp-test @@ -1,13 +1,10 @@ #!/bin/sh set -e -echo "Caution: These results are just verifying parse-ability, not correctness!" - for test in *.c; do echo "Testing $test" ../glcpp < $test > $test.out gcc -E $test -o $test.gcc -# grep -v '^#' < $test.gcc > $test.expected - grep -v '^[ ]*#' < $test > $test.expected + grep -v '^#' < $test.gcc > $test.expected diff -u $test.expected $test.out done -- cgit v1.2.3 From e6fb7827c96451d4a09dfda31979a6b9cb27301e Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 25 May 2010 15:28:58 -0700 Subject: Implement #undef. Which is as simple as copying the former action back from the git history. Now all tests through test 11 pass. --- glcpp-parse.y | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index b3ef177a6d..830a6232d8 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -171,7 +171,17 @@ control_line: } | HASH_DEFINE_FUNC IDENTIFIER '(' ')' replacement_list NEWLINE | HASH_DEFINE_FUNC IDENTIFIER '(' identifier_list ')' replacement_list NEWLINE -| HASH_UNDEF IDENTIFIER NEWLINE +| HASH_UNDEF IDENTIFIER NEWLINE { + string_list_t *macro = hash_table_find (parser->defines, $2); + if (macro) { + /* XXX: Need hash table to support a real way + * to remove an element rather than prefixing + * a new node with data of NULL like this. */ + hash_table_insert (parser->defines, NULL, $2); + talloc_free (macro); + } + talloc_free ($2); + } | HASH NEWLINE ; -- cgit v1.2.3 From b1854fdfb6b567fa61d544d8080e2acb4cc78dc1 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 25 May 2010 16:28:26 -0700 Subject: Implement simplified substitution for function-like macro invocation. This supports function-like macro invocation but without any argument substitution. This now makes test 11 through 14 pass. --- glcpp-lex.l | 14 ++++- glcpp-parse.y | 174 +++++++++++++++++++++++++++++++++++++++++++++++++++------- glcpp.h | 1 + 3 files changed, 168 insertions(+), 21 deletions(-) diff --git a/glcpp-lex.l b/glcpp-lex.l index 7b5cdd57a0..b1980742d3 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -144,6 +144,18 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? return IDENTIFIER; } +"(" { + return '('; +} + +")" { + return ')'; +} + +"," { + return ','; +} + {OTHER}+ { yylval.str = xtalloc_strdup (yyextra, yytext); return OTHER; @@ -151,7 +163,7 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? {HSPACE}+ { yylval.str = xtalloc_strdup (yyextra, yytext); - return OTHER; + return SPACE; } \n { diff --git a/glcpp-parse.y b/glcpp-parse.y index 830a6232d8..60b414e43a 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -127,20 +127,14 @@ glcpp_parser_lex (glcpp_parser_t *parser); %} -%union { - int ival; - char *str; - token_t *token; - token_list_t *token_list; -} - %parse-param {glcpp_parser_t *parser} %lex-param {glcpp_parser_t *parser} -%token HASH_DEFINE_FUNC HASH_DEFINE_OBJ HASH IDENTIFIER NEWLINE OTHER HASH_UNDEF +%token HASH HASH_DEFINE_FUNC HASH_DEFINE_OBJ HASH_UNDEF IDENTIFIER NEWLINE OTHER SPACE %token LEFT_SHIFT RIGHT_SHIFT LESS_OR_EQUAL GREATER_OR_EQUAL EQUAL NOT_EQUAL AND OR PASTE %type punctuator -%type IDENTIFIER OTHER +%type IDENTIFIER OTHER SPACE +%type identifier_list %type preprocessing_token %type pp_tokens replacement_list text_line @@ -169,8 +163,12 @@ control_line: HASH_DEFINE_OBJ IDENTIFIER replacement_list NEWLINE { _define_object_macro (parser, $2, $3); } -| HASH_DEFINE_FUNC IDENTIFIER '(' ')' replacement_list NEWLINE -| HASH_DEFINE_FUNC IDENTIFIER '(' identifier_list ')' replacement_list NEWLINE +| HASH_DEFINE_FUNC IDENTIFIER '(' ')' replacement_list NEWLINE { + _define_function_macro (parser, $2, NULL, $5); + } +| HASH_DEFINE_FUNC IDENTIFIER '(' identifier_list ')' replacement_list NEWLINE { + _define_function_macro (parser, $2, $4, $6); + } | HASH_UNDEF IDENTIFIER NEWLINE { string_list_t *macro = hash_table_find (parser->defines, $2); if (macro) { @@ -186,8 +184,16 @@ control_line: ; identifier_list: - IDENTIFIER -| identifier_list ',' IDENTIFIER + IDENTIFIER { + $$ = _string_list_create (parser); + _string_list_append_item ($$, $1); + talloc_steal ($$, $1); + } +| identifier_list ',' IDENTIFIER { + $$ = $1; + _string_list_append_item ($$, $3); + talloc_steal ($$, $3); + } ; text_line: @@ -227,6 +233,9 @@ preprocessing_token: | OTHER { $$ = _token_create_str (parser, OTHER, $1); } +| SPACE { + $$ = _token_create_str (parser, OTHER, $1); + } ; punctuator: @@ -649,7 +658,14 @@ glcpp_parser_classify_token (glcpp_parser_t *parser, return TOKEN_CLASS_OBJ_MACRO; } -void +/* Print a non-macro token, or the expansion of an object-like macro. + * + * Returns 0 if this token is completely printed. + * + * Returns 1 in the case that 'token' is a function-like macro that + * needs further expansion. + */ +static int _glcpp_parser_print_expanded_token (glcpp_parser_t *parser, token_t *token) { @@ -659,7 +675,7 @@ _glcpp_parser_print_expanded_token (glcpp_parser_t *parser, /* We only expand identifiers */ if (token->type != IDENTIFIER) { _token_print (token); - return; + return 0; } /* Look up this identifier in the hash table. */ @@ -669,20 +685,135 @@ _glcpp_parser_print_expanded_token (glcpp_parser_t *parser, /* Not a macro, so just print directly. */ if (macro == NULL) { printf ("%s", identifier); - return; + return 0; } - /* We're not (yet) supporting function-like macros. */ + /* For function-like macros return 1 for further processing. */ if (macro->is_function) { - printf ("%s", identifier); - return; + return 1; } /* Finally, don't expand this macro if we're already actively * expanding it, (to avoid infinite recursion). */ if (_string_list_contains (parser->active, identifier, NULL)) { + printf ("%s", identifier); + return 0; + } + + _string_list_push (parser->active, identifier); + _glcpp_parser_print_expanded_token_list (parser, + macro->replacements); + _string_list_pop (parser->active); + + return 0; +} + +typedef enum function_status +{ + FUNCTION_STATUS_SUCCESS, + FUNCTION_NOT_A_FUNCTION, + FUNCTION_UNBALANCED_PARENTHESES +} function_status_t; + +/* Find a set of function-like macro arguments by looking for a + * balanced set of parentheses. Upon return *node will be the last + * consumed node, such that further processing can continue with + * node->next. + * + * Return values: + * + * FUNCTION_STATUS_SUCCESS: + * + * Successfully parsed a set of function arguments. + * + * FUNCTION_NOT_A_FUNCTION: + * + * Macro name not followed by a '('. This is not an error, but + * simply that the macro name should be treated as a non-macro. + * + * FUNCTION_UNBLANCED_PARENTHESES + * + * Macro name is not followed by a balanced set of parentheses. + */ +static function_status_t +_find_arguments (token_node_t **node_ret, argument_list_t **arguments) +{ + token_node_t *node = *node_ret, *last; + int paren_count; + int arg_count; + + last = node; + node = node->next; + + /* Ignore whitespace before first parenthesis. */ + while (node && node->token->type == SPACE) + node = node->next; + + if (node == NULL || node->token->type != '(') + return FUNCTION_NOT_A_FUNCTION; + + paren_count = 0; + arg_count = 0; + do { + if (node->token->type == '(') + { + paren_count++; + } + else if (node->token->type == ')') + { + paren_count--; + } + else if (node->token->type == ',' && + paren_count == 1) + { + arg_count++; + } + + last = node; + node = node->next; + + } while (node && paren_count); + + if (node && paren_count) + return FUNCTION_UNBALANCED_PARENTHESES; + + *node_ret = last; + + return FUNCTION_STATUS_SUCCESS; +} + +/* Prints the expansion of *node (consuming further tokens from the + * list as necessary). Upon return *node will be the last consumed + * node, such that further processing can continue with node->next. */ +static void +_glcpp_parser_print_expanded_function (glcpp_parser_t *parser, + token_node_t **node_ret) +{ + macro_t *macro; + token_node_t *node; + const char *identifier; + argument_list_t *arguments; + function_status_t status; + + node = *node_ret; + identifier = node->token->value.str; + + macro = hash_table_find (parser->defines, identifier); + + assert (macro->is_function); + + status = _find_arguments (node_ret, &arguments); + + switch (status) { + case FUNCTION_STATUS_SUCCESS: + break; + case FUNCTION_NOT_A_FUNCTION: printf ("%s", identifier); return; + case FUNCTION_UNBALANCED_PARENTHESES: + fprintf (stderr, "Error: Macro %s call has unbalanced parentheses\n", + identifier); + exit (1); } _string_list_push (parser->active, identifier); @@ -696,12 +827,15 @@ _glcpp_parser_print_expanded_token_list (glcpp_parser_t *parser, token_list_t *list) { token_node_t *node; + function_status_t function_status; if (list == NULL) return; for (node = list->head; node; node = node->next) { - _glcpp_parser_print_expanded_token (parser, node->token); + if (_glcpp_parser_print_expanded_token (parser, node->token)) + _glcpp_parser_print_expanded_function (parser, &node); + if (node->next) printf (" "); } diff --git a/glcpp.h b/glcpp.h index bd599d7301..043098b134 100644 --- a/glcpp.h +++ b/glcpp.h @@ -51,6 +51,7 @@ typedef union YYSTYPE { int ival; char *str; + string_list_t *string_list; token_t *token; token_list_t *token_list; } YYSTYPE; -- cgit v1.2.3 From f34a0009dd07dbca4de5491744bd3618eae9458e Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 25 May 2010 16:59:02 -0700 Subject: Pass through literal space values from replacement lists. This makes test 15 pass and also dramatically simplifies the lexer. We were previously using a CONTROL state in the lexer to only emit SPACE tokens when on text lines. But that's not actually what we want. We need SPACE tokens in the replacement lists as well. Instead of a lexer state for this, we now simply set a "space_tokens" flag whenever we start constructing a pp_tokens list and clear the flag whenever we see a '#' introducing a directive. Much cleaner this way. --- glcpp-lex.l | 85 +++++++++++++---------------------------------------------- glcpp-parse.y | 10 +++---- glcpp.h | 1 + 3 files changed, 25 insertions(+), 71 deletions(-) diff --git a/glcpp-lex.l b/glcpp-lex.l index b1980742d3..f6d0c8b7d6 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -32,21 +32,6 @@ %option reentrant noyywrap %option extra-type="glcpp_parser_t *" - /* This lexer has two states: - * - * The CONTROL state is for control lines (directives) - * It lexes exactly as specified in the C99 specification. - * - * The INITIAL state is for input lines. In this state, we - * make the OTHER token much more broad in that it now - * includes tokens consisting entirely of whitespace. This - * allows us to pass text through verbatim. It avoids the - * "inadvertent token pasting" problem that would occur if we - * just printed tokens, while also avoiding excess whitespace - * insertion in the output.*/ - -%x CONTROL - SPACE [[:space:]] NONSPACE [^[:space:]] NEWLINE [\n] @@ -63,116 +48,84 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? %% {HASH}define{HSPACE}+/{IDENTIFIER}"(" { - BEGIN CONTROL; + yyextra->space_tokens = 0; return HASH_DEFINE_FUNC; } {HASH}define { - BEGIN CONTROL; + yyextra->space_tokens = 0; return HASH_DEFINE_OBJ; } {HASH}undef { - BEGIN CONTROL; + yyextra->space_tokens = 0; return HASH_UNDEF; } {HASH} { - BEGIN CONTROL; + yyextra->space_tokens = 0; return HASH; } -{IDENTIFIER} { +{IDENTIFIER} { yylval.str = xtalloc_strdup (yyextra, yytext); return IDENTIFIER; } -"<<" { +"<<" { return LEFT_SHIFT; } -">>" { +">>" { return RIGHT_SHIFT; } -"<=" { +"<=" { return LESS_OR_EQUAL; } -">=" { +">=" { return GREATER_OR_EQUAL; } -"==" { +"==" { return EQUAL; } -"!=" { +"!=" { return NOT_EQUAL; } -"&&" { +"&&" { return AND; } -"||" { +"||" { return OR; } -"##" { +"##" { return PASTE; } -{PUNCTUATION} { +{PUNCTUATION} { return yytext[0]; } -{OTHER} { - yylval.str = xtalloc_strdup (yyextra, yytext); - return OTHER; -} - -{HSPACE}+ - -\n { - BEGIN INITIAL; - return NEWLINE; -} - -{IDENTIFIER} { - yylval.str = xtalloc_strdup (yyextra, yytext); - return IDENTIFIER; -} - -"(" { - return '('; -} - -")" { - return ')'; -} - -"," { - return ','; -} - {OTHER}+ { yylval.str = xtalloc_strdup (yyextra, yytext); return OTHER; } {HSPACE}+ { - yylval.str = xtalloc_strdup (yyextra, yytext); - return SPACE; + if (yyextra->space_tokens) { + yylval.str = xtalloc_strdup (yyextra, yytext); + return SPACE; + } } \n { return NEWLINE; } -. { - yylval.str = xtalloc_strdup (yyextra, yytext); - return OTHER; -} - %% diff --git a/glcpp-parse.y b/glcpp-parse.y index 60b414e43a..a1981995fd 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -160,7 +160,7 @@ line: ; control_line: - HASH_DEFINE_OBJ IDENTIFIER replacement_list NEWLINE { + HASH_DEFINE_OBJ IDENTIFIER replacement_list NEWLINE { _define_object_macro (parser, $2, $3); } | HASH_DEFINE_FUNC IDENTIFIER '(' ')' replacement_list NEWLINE { @@ -212,6 +212,7 @@ replacement_list: pp_tokens: preprocessing_token { + parser->space_tokens = 1; $$ = _token_list_create (parser); _token_list_append ($$, $1); talloc_unlink (parser, $1); @@ -234,7 +235,7 @@ preprocessing_token: $$ = _token_create_str (parser, OTHER, $1); } | SPACE { - $$ = _token_create_str (parser, OTHER, $1); + $$ = _token_create_str (parser, SPACE, $1); } ; @@ -494,6 +495,7 @@ _token_print (token_t *token) switch (token->type) { case IDENTIFIER: case OTHER: + case SPACE: printf ("%s", token->value.str); break; case LEFT_SHIFT: @@ -589,6 +591,7 @@ glcpp_parser_create (void) parser->defines = hash_table_ctor (32, hash_table_string_hash, hash_table_string_compare); parser->active = _string_list_create (parser); + parser->space_tokens = 1; parser->expansions = NULL; parser->just_printed_separator = 1; @@ -835,9 +838,6 @@ _glcpp_parser_print_expanded_token_list (glcpp_parser_t *parser, for (node = list->head; node; node = node->next) { if (_glcpp_parser_print_expanded_token (parser, node->token)) _glcpp_parser_print_expanded_function (parser, &node); - - if (node->next) - printf (" "); } } diff --git a/glcpp.h b/glcpp.h index 043098b134..f3760fa7a4 100644 --- a/glcpp.h +++ b/glcpp.h @@ -126,6 +126,7 @@ struct glcpp_parser { yyscan_t scanner; struct hash_table *defines; string_list_t *active; + int space_tokens; expansion_node_t *expansions; int just_printed_separator; int need_newline; -- cgit v1.2.3 From f8ec4e0be86eee05f5a661a01864247fcd1a6b30 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 25 May 2010 17:06:17 -0700 Subject: Add a test #0 to ensure that we don't do any inadvertent token pasting. This simply ensures that spaces in input line are preserved. --- tests/000-content-with-spaces.c | 1 + 1 file changed, 1 insertion(+) create mode 100644 tests/000-content-with-spaces.c diff --git a/tests/000-content-with-spaces.c b/tests/000-content-with-spaces.c new file mode 100644 index 0000000000..a7fc918c90 --- /dev/null +++ b/tests/000-content-with-spaces.c @@ -0,0 +1 @@ +this is four tokens -- cgit v1.2.3 From e9397867ddce20a4263949f4b3a488fa99af3041 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 25 May 2010 17:08:07 -0700 Subject: Collapse multiple spaces in input down to a single space. This is what gcc does, and it's actually less work to do this. Previously we were having to save the contents of space tokens as a string, but we don't need to do that now. We extend test #0 to exercise this feature here. --- glcpp-lex.l | 1 - glcpp-parse.y | 10 ++++++---- tests/000-content-with-spaces.c | 2 +- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/glcpp-lex.l b/glcpp-lex.l index f6d0c8b7d6..516f42dee3 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -119,7 +119,6 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? {HSPACE}+ { if (yyextra->space_tokens) { - yylval.str = xtalloc_strdup (yyextra, yytext); return SPACE; } } diff --git a/glcpp-parse.y b/glcpp-parse.y index a1981995fd..0460f71f74 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -132,8 +132,8 @@ glcpp_parser_lex (glcpp_parser_t *parser); %token HASH HASH_DEFINE_FUNC HASH_DEFINE_OBJ HASH_UNDEF IDENTIFIER NEWLINE OTHER SPACE %token LEFT_SHIFT RIGHT_SHIFT LESS_OR_EQUAL GREATER_OR_EQUAL EQUAL NOT_EQUAL AND OR PASTE -%type punctuator -%type IDENTIFIER OTHER SPACE +%type punctuator SPACE +%type IDENTIFIER OTHER %type identifier_list %type preprocessing_token %type pp_tokens replacement_list text_line @@ -235,7 +235,7 @@ preprocessing_token: $$ = _token_create_str (parser, OTHER, $1); } | SPACE { - $$ = _token_create_str (parser, SPACE, $1); + $$ = _token_create_ival (parser, SPACE, SPACE); } ; @@ -495,9 +495,11 @@ _token_print (token_t *token) switch (token->type) { case IDENTIFIER: case OTHER: - case SPACE: printf ("%s", token->value.str); break; + case SPACE: + printf (" "); + break; case LEFT_SHIFT: printf ("<<"); break; diff --git a/tests/000-content-with-spaces.c b/tests/000-content-with-spaces.c index a7fc918c90..696cb3a74f 100644 --- a/tests/000-content-with-spaces.c +++ b/tests/000-content-with-spaces.c @@ -1 +1 @@ -this is four tokens +this is four tokens -- cgit v1.2.3 From 9ce18cf9837bee379dfd0f52a3df005c1797e544 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 25 May 2010 17:32:21 -0700 Subject: Implement substitution of function parameters in macro calls. This makes tests 16 - 19 pass. --- glcpp-parse.y | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 57 insertions(+), 8 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index 0460f71f74..eb93bad85d 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -405,9 +405,6 @@ _argument_list_append (argument_list_t *list, token_list_t *argument) { argument_node_t *node; - if (argument == NULL || argument->head == NULL) - return; - node = xtalloc (list, argument_node_t); node->argument = argument; @@ -741,8 +738,9 @@ typedef enum function_status * Macro name is not followed by a balanced set of parentheses. */ static function_status_t -_find_arguments (token_node_t **node_ret, argument_list_t **arguments) +_arguments_parse (argument_list_t *arguments, token_node_t **node_ret) { + token_list_t *argument; token_node_t *node = *node_ret, *last; int paren_count; int arg_count; @@ -757,6 +755,8 @@ _find_arguments (token_node_t **node_ret, argument_list_t **arguments) if (node == NULL || node->token->type != '(') return FUNCTION_NOT_A_FUNCTION; + argument = NULL; + paren_count = 0; arg_count = 0; do { @@ -771,7 +771,14 @@ _find_arguments (token_node_t **node_ret, argument_list_t **arguments) else if (node->token->type == ',' && paren_count == 1) { - arg_count++; + argument = NULL; + } + else { + if (argument == NULL) { + argument = _token_list_create (arguments); + _argument_list_append (arguments, argument); + } + _token_list_append (argument, node->token); } last = node; @@ -799,6 +806,9 @@ _glcpp_parser_print_expanded_function (glcpp_parser_t *parser, const char *identifier; argument_list_t *arguments; function_status_t status; + token_list_t *expanded; + token_node_t *i, *j; + int parameter_index; node = *node_ret; identifier = node->token->value.str; @@ -807,7 +817,8 @@ _glcpp_parser_print_expanded_function (glcpp_parser_t *parser, assert (macro->is_function); - status = _find_arguments (node_ret, &arguments); + arguments = _argument_list_create (parser); + status = _arguments_parse (arguments, node_ret); switch (status) { case FUNCTION_STATUS_SUCCESS: @@ -821,10 +832,48 @@ _glcpp_parser_print_expanded_function (glcpp_parser_t *parser, exit (1); } + if (macro->replacements == NULL) { + talloc_free (arguments); + return; + } + + + if (_argument_list_length (arguments) != + _string_list_length (macro->parameters)) + { + fprintf (stderr, + "Error: macro %s invoked with %d arguments (expected %d)\n", + identifier, + _argument_list_length (arguments), + _string_list_length (macro->parameters)); + return; + } + + expanded = _token_list_create (arguments); + + for (i = macro->replacements->head; i; i = i->next) { + if (i->token->type == IDENTIFIER && + _string_list_contains (macro->parameters, + i->token->value.str, + ¶meter_index)) + { + token_list_t *argument; + argument = _argument_list_member_at (arguments, + parameter_index); + for (j = argument->head; j; j = j->next) + { + _token_list_append (expanded, j->token); + } + } else { + _token_list_append (expanded, i->token); + } + } + _string_list_push (parser->active, identifier); - _glcpp_parser_print_expanded_token_list (parser, - macro->replacements); + _glcpp_parser_print_expanded_token_list (parser, expanded); _string_list_pop (parser->active); + + talloc_free (arguments); } void -- cgit v1.2.3 From c7581c2e6e6897eddc55c537c92417b813a8b81e Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 25 May 2010 17:41:07 -0700 Subject: Ignore separating whitespace at the beginning of a macro argument. This causes test 16 to pass. Tests 17-20 are also passing now, (though they would probably have passed before this change and simply weren't being run yet). --- glcpp-parse.y | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index eb93bad85d..ec966580fc 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -743,7 +743,6 @@ _arguments_parse (argument_list_t *arguments, token_node_t **node_ret) token_list_t *argument; token_node_t *node = *node_ret, *last; int paren_count; - int arg_count; last = node; node = node->next; @@ -757,9 +756,7 @@ _arguments_parse (argument_list_t *arguments, token_node_t **node_ret) argument = NULL; - paren_count = 0; - arg_count = 0; - do { + for (paren_count = 0; node; last = node, node = node->next) { if (node->token->type == '(') { paren_count++; @@ -767,6 +764,11 @@ _arguments_parse (argument_list_t *arguments, token_node_t **node_ret) else if (node->token->type == ')') { paren_count--; + if (paren_count == 0) { + last = node; + node = node->next; + break; + } } else if (node->token->type == ',' && paren_count == 1) @@ -775,16 +777,16 @@ _arguments_parse (argument_list_t *arguments, token_node_t **node_ret) } else { if (argument == NULL) { + /* Don't treat initial whitespace as + * part of the arguement. */ + if (node->token->type == SPACE) + continue; argument = _token_list_create (arguments); _argument_list_append (arguments, argument); } _token_list_append (argument, node->token); } - - last = node; - node = node->next; - - } while (node && paren_count); + } if (node && paren_count) return FUNCTION_UNBALANCED_PARENTHESES; -- cgit v1.2.3 From 652fa272ea4bdb9bfe6cd7f8413b3a3b03972987 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 25 May 2010 17:45:22 -0700 Subject: Avoid swallowing initial left parenthesis from nested macro invocation. We weren't including this left parenthesis in the argument's token list so the nested function invocation wasn not being recognized. With this fix, tests 21 and 22 now pass. --- glcpp-parse.y | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index ec966580fc..131102fab9 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -754,9 +754,12 @@ _arguments_parse (argument_list_t *arguments, token_node_t **node_ret) if (node == NULL || node->token->type != '(') return FUNCTION_NOT_A_FUNCTION; + last = node; + node = node->next; + argument = NULL; - for (paren_count = 0; node; last = node, node = node->next) { + for (paren_count = 1; node; last = node, node = node->next) { if (node->token->type == '(') { paren_count++; @@ -770,7 +773,8 @@ _arguments_parse (argument_list_t *arguments, token_node_t **node_ret) break; } } - else if (node->token->type == ',' && + + if (node->token->type == ',' && paren_count == 1) { argument = NULL; -- cgit v1.2.3 From 5aa7ea08093f727761d424ad090f44b116c8f0bd Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 25 May 2010 18:39:43 -0700 Subject: Remove a bunch of old code and give the static treatment to what's left. We're no longer using the expansion stack, so its functions can go along with most of the body of glcpp_parser_lex that was using it. --- glcpp-parse.y | 262 +++++----------------------------------------------------- glcpp.h | 7 -- 2 files changed, 21 insertions(+), 248 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index 131102fab9..02286cd8e0 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -28,88 +28,77 @@ #include "glcpp.h" -void +static void yyerror (void *scanner, const char *error); -void +static void _define_object_macro (glcpp_parser_t *parser, const char *macro, token_list_t *replacements); -void +static void _define_function_macro (glcpp_parser_t *parser, const char *macro, string_list_t *parameters, token_list_t *replacements); -void -_expand_object_macro (glcpp_parser_t *parser, const char *identifier); - -void -_expand_function_macro (glcpp_parser_t *parser, - const char *identifier, - argument_list_t *arguments); - -string_list_t * +static string_list_t * _string_list_create (void *ctx); -void +static void _string_list_append_item (string_list_t *list, const char *str); -void +static void _string_list_append_list (string_list_t *list, string_list_t *tail); -void +static void _string_list_push (string_list_t *list, const char *str); -void +static void _string_list_pop (string_list_t *list); -int +static int _string_list_contains (string_list_t *list, const char *member, int *index); -int +static int _string_list_length (string_list_t *list); -argument_list_t * +static argument_list_t * _argument_list_create (void *ctx); -void +static void _argument_list_append (argument_list_t *list, token_list_t *argument); -int +static int _argument_list_length (argument_list_t *list); -token_list_t * +static token_list_t * _argument_list_member_at (argument_list_t *list, int index); /* Note: This function talloc_steal()s the str pointer. */ -token_t * +static token_t * _token_create_str (void *ctx, int type, char *str); -token_t * +static token_t * _token_create_ival (void *ctx, int type, int ival); -token_list_t * +static token_list_t * _token_list_create (void *ctx); /* Note: This function add a talloc_reference() to token. * * You may want to talloc_unlink any current reference if you no * longer need it. */ -void +static void _token_list_append (token_list_t *list, token_t *token); -void +static void _token_list_append_list (token_list_t *list, token_list_t *tail); -void +static void _glcpp_parser_print_expanded_token_list (glcpp_parser_t *parser, token_list_t *list); -static void -glcpp_parser_pop_expansion (glcpp_parser_t *parser); - static void _glcpp_parser_skip_stack_push_if (glcpp_parser_t *parser, int condition); @@ -591,10 +580,6 @@ glcpp_parser_create (void) hash_table_string_compare); parser->active = _string_list_create (parser); parser->space_tokens = 1; - parser->expansions = NULL; - - parser->just_printed_separator = 1; - parser->need_newline = 0; parser->skip_stack = NULL; @@ -610,8 +595,6 @@ glcpp_parser_parse (glcpp_parser_t *parser) void glcpp_parser_destroy (glcpp_parser_t *parser) { - if (parser->need_newline) - printf ("\n"); if (parser->skip_stack) fprintf (stderr, "Error: Unterminated #if\n"); glcpp_lex_destroy (parser->scanner); @@ -619,47 +602,6 @@ glcpp_parser_destroy (glcpp_parser_t *parser) talloc_free (parser); } -static int -glcpp_parser_is_expanding (glcpp_parser_t *parser, const char *member) -{ - expansion_node_t *node; - - for (node = parser->expansions; node; node = node->next) { - if (node->macro && - strcmp (node->macro->identifier, member) == 0) - { - return 1; - } - } - - return 0; -} - -token_class_t -glcpp_parser_classify_token (glcpp_parser_t *parser, - const char *identifier, - int *parameter_index) -{ - macro_t *macro; - - /* Is this token a defined macro? */ - macro = hash_table_find (parser->defines, identifier); - - if (macro == NULL) - return TOKEN_CLASS_IDENTIFIER; - - /* Don't consider this a macro if we are already actively - * expanding this macro. */ - if (glcpp_parser_is_expanding (parser, identifier)) - return TOKEN_CLASS_IDENTIFIER_FINALIZED; - - /* Definitely a macro. Just need to check if it's function-like. */ - if (macro->is_function) - return TOKEN_CLASS_FUNC_MACRO; - else - return TOKEN_CLASS_OBJ_MACRO; -} - /* Print a non-macro token, or the expansion of an object-like macro. * * Returns 0 if this token is completely printed. @@ -933,172 +875,10 @@ _define_function_macro (glcpp_parser_t *parser, hash_table_insert (parser->defines, macro, identifier); } -static void -_glcpp_parser_push_expansion (glcpp_parser_t *parser, - macro_t *macro, - token_node_t *replacements) -{ - expansion_node_t *node; - - node = xtalloc (parser, expansion_node_t); - - node->macro = macro; - node->replacements = replacements; - - node->next = parser->expansions; - parser->expansions = node; -} - -static void -glcpp_parser_pop_expansion (glcpp_parser_t *parser) -{ - expansion_node_t *node; - - node = parser->expansions; - - if (node == NULL) { - fprintf (stderr, "Internal error: _expansion_list_pop called on an empty list.\n"); - exit (1); - } - - parser->expansions = node->next; - - talloc_free (node); -} - -void -_expand_object_macro (glcpp_parser_t *parser, const char *identifier) -{ - macro_t *macro; - - macro = hash_table_find (parser->defines, identifier); - assert (! macro->is_function); - assert (! glcpp_parser_is_expanding (parser, identifier)); - - _glcpp_parser_push_expansion (parser, macro, macro->replacements->head); -} - -void -_expand_function_macro (glcpp_parser_t *parser, - const char *identifier, - argument_list_t *arguments) -{ - macro_t *macro; - token_list_t *expanded; - token_node_t *i, *j; - int parameter_index; - - macro = hash_table_find (parser->defines, identifier); - assert (macro->is_function); - assert (! glcpp_parser_is_expanding (parser, identifier)); - - if (_argument_list_length (arguments) != - _string_list_length (macro->parameters)) - { - fprintf (stderr, - "Error: macro %s invoked with %d arguments (expected %d)\n", - identifier, - _argument_list_length (arguments), - _string_list_length (macro->parameters)); - return; - } - - expanded = _token_list_create (macro); - - for (i = macro->replacements->head; i; i = i->next) { - if (_string_list_contains (macro->parameters, - i->token->value.str, - ¶meter_index)) - { - token_list_t *argument; - argument = _argument_list_member_at (arguments, - parameter_index); - for (j = argument->head; j; j = j->next) - { - _token_list_append (expanded, j->token); - } - } else { - _token_list_append (expanded, i->token); - } - } - - _glcpp_parser_push_expansion (parser, macro, expanded->head); -} - static int glcpp_parser_lex (glcpp_parser_t *parser) { - expansion_node_t *expansion; - token_node_t *replacements; - int parameter_index; - const char *token; - token_class_t class; - - /* Who says C can't do efficient tail recursion? */ - RECURSE: - - expansion = parser->expansions; - - if (expansion == NULL) - return glcpp_lex (parser->scanner); - - replacements = expansion->replacements; - - /* Pop expansion when replacements is exhausted. */ - if (replacements == NULL) { - glcpp_parser_pop_expansion (parser); - goto RECURSE; - } - - expansion->replacements = replacements->next; - - token = replacements->token->value.str; - - /* Implement token pasting. */ - if (replacements->next && strcmp (replacements->next->token->value.str, "##") == 0) { - token_node_t *next_node; - - next_node = replacements->next->next; - - if (next_node == NULL) { - fprintf (stderr, "Error: '##' cannot appear at the end of a macro expansion.\n"); - exit (1); - } - - token = xtalloc_asprintf (parser, "%s%s", - token, next_node->token->value.str); - expansion->replacements = next_node->next; - } - - - if (strcmp (token, "(") == 0) - return '('; - else if (strcmp (token, ")") == 0) - return ')'; - - yylval.str = xtalloc_strdup (parser, token); - - /* Carefully refuse to expand any finalized identifier. */ - if (replacements->token->type == IDENTIFIER_FINALIZED) - return IDENTIFIER_FINALIZED; - - switch (glcpp_parser_classify_token (parser, yylval.str, - ¶meter_index)) - { - case TOKEN_CLASS_IDENTIFIER: - return IDENTIFIER; - break; - case TOKEN_CLASS_IDENTIFIER_FINALIZED: - return IDENTIFIER_FINALIZED; - break; - case TOKEN_CLASS_FUNC_MACRO: - return FUNC_MACRO; - break; - default: - case TOKEN_CLASS_OBJ_MACRO: - return OBJ_MACRO; - break; - } + return glcpp_lex (parser->scanner); } static void diff --git a/glcpp.h b/glcpp.h index f3760fa7a4..6bd6e66a7c 100644 --- a/glcpp.h +++ b/glcpp.h @@ -127,16 +127,9 @@ struct glcpp_parser { struct hash_table *defines; string_list_t *active; int space_tokens; - expansion_node_t *expansions; - int just_printed_separator; - int need_newline; skip_node_t *skip_stack; }; -void -glcpp_parser_push_expansion_argument (glcpp_parser_t *parser, - int argument_index); - glcpp_parser_t * glcpp_parser_create (void); -- cgit v1.2.3 From 10ae438399f14367dd9e03032594c1e16c428999 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 25 May 2010 20:35:01 -0700 Subject: Avoid getting extra trailing whitespace from macros. This trailing whitespace was coming from macro definitions and from macro arguments. We fix this with a little extra state in the token_list. It now remembers the last non-space token added, so that these can be trimmed off just before printing the list. With this fix test 23 now passes. Tests 24 and 25 are also passing, but they probbably would ahve before this fix---just that they weren't being run earlier. --- glcpp-parse.y | 30 ++++++++++++++++++++++++++++-- glcpp.h | 1 + 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index 02286cd8e0..60eaf215b8 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -471,7 +471,7 @@ _token_create_ival (void *ctx, int type, int ival) } void -_token_print (token_t *token) +_glcpp_parser_print_token (glcpp_parser_t *parser, token_t *token) { if (token->type < 256) { printf ("%c", token->type); @@ -527,6 +527,7 @@ _token_list_create (void *ctx) list = xtalloc (ctx, token_list_t); list->head = NULL; list->tail = NULL; + list->non_space_tail = NULL; return list; } @@ -548,6 +549,8 @@ _token_list_append (token_list_t *list, token_t *token) } list->tail = node; + if (token->type != SPACE) + list->non_space_tail = node; } void @@ -560,6 +563,25 @@ _token_list_append_list (token_list_t *list, token_list_t *tail) } list->tail = tail->tail; + list->non_space_tail = tail->non_space_tail; +} + +void +_token_list_trim_trailing_space (token_list_t *list) +{ + token_node_t *tail, *next; + + if (list->non_space_tail) { + tail = list->non_space_tail->next; + list->non_space_tail->next = NULL; + list->tail = list->non_space_tail; + + while (tail) { + next = tail->next; + talloc_free (tail); + tail = next; + } + } } void @@ -618,7 +640,7 @@ _glcpp_parser_print_expanded_token (glcpp_parser_t *parser, /* We only expand identifiers */ if (token->type != IDENTIFIER) { - _token_print (token); + _glcpp_parser_print_token (parser, token); return 0; } @@ -719,6 +741,8 @@ _arguments_parse (argument_list_t *arguments, token_node_t **node_ret) if (node->token->type == ',' && paren_count == 1) { + if (argument) + _token_list_trim_trailing_space (argument); argument = NULL; } else { @@ -834,6 +858,8 @@ _glcpp_parser_print_expanded_token_list (glcpp_parser_t *parser, if (list == NULL) return; + _token_list_trim_trailing_space (list); + for (node = list->head; node; node = node->next) { if (_glcpp_parser_print_expanded_token (parser, node->token)) _glcpp_parser_print_expanded_function (parser, &node); diff --git a/glcpp.h b/glcpp.h index 6bd6e66a7c..21db918cdc 100644 --- a/glcpp.h +++ b/glcpp.h @@ -72,6 +72,7 @@ typedef struct token_node { struct token_list { token_node_t *head; token_node_t *tail; + token_node_t *non_space_tail; }; typedef struct argument_node { -- cgit v1.2.3 From 039739b2da0ce8496f6e8d38127c0b3793607afa Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 26 May 2010 07:58:59 -0700 Subject: Defer test 26 until much later (to test 55). Supporting embedded newlines in a macro invocation is going to be tricky with our current approach to lexing and parsing. Since this isn't really an important feature for us, we can defer this until more important things are resolved. With this test out of the way, tests 27 through 31 are passing. --- tests/026-define-func-extra-newlines.c | 6 ------ tests/055-define-func-extra-newlines.c | 6 ++++++ 2 files changed, 6 insertions(+), 6 deletions(-) delete mode 100644 tests/026-define-func-extra-newlines.c create mode 100644 tests/055-define-func-extra-newlines.c diff --git a/tests/026-define-func-extra-newlines.c b/tests/026-define-func-extra-newlines.c deleted file mode 100644 index 0d83740530..0000000000 --- a/tests/026-define-func-extra-newlines.c +++ /dev/null @@ -1,6 +0,0 @@ -#define foo(a) bar - -foo -( -1 -) diff --git a/tests/055-define-func-extra-newlines.c b/tests/055-define-func-extra-newlines.c new file mode 100644 index 0000000000..0d83740530 --- /dev/null +++ b/tests/055-define-func-extra-newlines.c @@ -0,0 +1,6 @@ +#define foo(a) bar + +foo +( +1 +) -- cgit v1.2.3 From c0607d573e04846a23c3162901aabd7fc40ebc61 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 26 May 2010 08:01:42 -0700 Subject: Check active expansions before expanding a function-like macro invocation. With this fix, test 32 no longer recurses infinitely, but now passes. --- glcpp-parse.y | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index 60eaf215b8..a2bff6e0ad 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -654,11 +654,6 @@ _glcpp_parser_print_expanded_token (glcpp_parser_t *parser, return 0; } - /* For function-like macros return 1 for further processing. */ - if (macro->is_function) { - return 1; - } - /* Finally, don't expand this macro if we're already actively * expanding it, (to avoid infinite recursion). */ if (_string_list_contains (parser->active, identifier, NULL)) { @@ -666,6 +661,11 @@ _glcpp_parser_print_expanded_token (glcpp_parser_t *parser, return 0; } + /* For function-like macros return 1 for further processing. */ + if (macro->is_function) { + return 1; + } + _string_list_push (parser->active, identifier); _glcpp_parser_print_expanded_token_list (parser, macro->replacements); -- cgit v1.2.3 From 0197e9b64f0e64a617537c5ad1465b4a8706fe1c Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 26 May 2010 08:05:19 -0700 Subject: Change macro expansion to append onto token lists rather than printing directly. This doesn't change any functionality here, but will allow us to make future changes that were not possible with direct printing. Specifically, we need to expand macros within macro arguments before performing argument substitution. And *that* expansion cannot result in immediate printing. --- glcpp-parse.y | 193 ++++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 120 insertions(+), 73 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index a2bff6e0ad..e25cfa9214 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -99,6 +99,11 @@ static void _glcpp_parser_print_expanded_token_list (glcpp_parser_t *parser, token_list_t *list); +static void +_glcpp_parser_expand_token_list_onto (glcpp_parser_t *parser, + token_list_t *list, + token_list_t *result); + static void _glcpp_parser_skip_stack_push_if (glcpp_parser_t *parser, int condition); @@ -470,55 +475,6 @@ _token_create_ival (void *ctx, int type, int ival) return token; } -void -_glcpp_parser_print_token (glcpp_parser_t *parser, token_t *token) -{ - if (token->type < 256) { - printf ("%c", token->type); - return; - } - - switch (token->type) { - case IDENTIFIER: - case OTHER: - printf ("%s", token->value.str); - break; - case SPACE: - printf (" "); - break; - case LEFT_SHIFT: - printf ("<<"); - break; - case RIGHT_SHIFT: - printf (">>"); - break; - case LESS_OR_EQUAL: - printf ("<="); - break; - case GREATER_OR_EQUAL: - printf (">="); - break; - case EQUAL: - printf ("=="); - break; - case NOT_EQUAL: - printf ("!="); - break; - case AND: - printf ("&&"); - break; - case OR: - printf ("||"); - break; - case PASTE: - printf ("##"); - break; - default: - fprintf (stderr, "Error: Don't know how to print token type %d\n", token->type); - break; - } -} - token_list_t * _token_list_create (void *ctx) { @@ -584,6 +540,67 @@ _token_list_trim_trailing_space (token_list_t *list) } } +static void +_token_print (token_t *token) +{ + if (token->type < 256) { + printf ("%c", token->type); + return; + } + + switch (token->type) { + case IDENTIFIER: + case OTHER: + printf ("%s", token->value.str); + break; + case SPACE: + printf (" "); + break; + case LEFT_SHIFT: + printf ("<<"); + break; + case RIGHT_SHIFT: + printf (">>"); + break; + case LESS_OR_EQUAL: + printf ("<="); + break; + case GREATER_OR_EQUAL: + printf (">="); + break; + case EQUAL: + printf ("=="); + break; + case NOT_EQUAL: + printf ("!="); + break; + case AND: + printf ("&&"); + break; + case OR: + printf ("||"); + break; + case PASTE: + printf ("##"); + break; + default: + fprintf (stderr, "Error: Don't know how to print token type %d\n", token->type); + break; + } +} + +static void +_token_list_print (token_list_t *list) +{ + token_node_t *node; + + if (list == NULL) + return; + + for (node = list->head; node; node = node->next) + _token_print (node->token); +} + void yyerror (void *scanner, const char *error) { @@ -624,23 +641,26 @@ glcpp_parser_destroy (glcpp_parser_t *parser) talloc_free (parser); } -/* Print a non-macro token, or the expansion of an object-like macro. +/* Appends onto 'expansion' a non-macro token or the expansion of an + * object-like macro. * - * Returns 0 if this token is completely printed. + * Returns 0 if this token is completely processed. * * Returns 1 in the case that 'token' is a function-like macro that * needs further expansion. */ static int -_glcpp_parser_print_expanded_token (glcpp_parser_t *parser, - token_t *token) +_glcpp_parser_expand_token_onto (glcpp_parser_t *parser, + token_t *token, + token_list_t *result) { const char *identifier; macro_t *macro; + token_list_t *expansion; /* We only expand identifiers */ if (token->type != IDENTIFIER) { - _glcpp_parser_print_token (parser, token); + _token_list_append (result, token); return 0; } @@ -648,16 +668,16 @@ _glcpp_parser_print_expanded_token (glcpp_parser_t *parser, identifier = token->value.str; macro = hash_table_find (parser->defines, identifier); - /* Not a macro, so just print directly. */ + /* Not a macro, so just append. */ if (macro == NULL) { - printf ("%s", identifier); + _token_list_append (result, token); return 0; } /* Finally, don't expand this macro if we're already actively * expanding it, (to avoid infinite recursion). */ if (_string_list_contains (parser->active, identifier, NULL)) { - printf ("%s", identifier); + _token_list_append (result, token); return 0; } @@ -667,8 +687,9 @@ _glcpp_parser_print_expanded_token (glcpp_parser_t *parser, } _string_list_push (parser->active, identifier); - _glcpp_parser_print_expanded_token_list (parser, - macro->replacements); + _glcpp_parser_expand_token_list_onto (parser, + macro->replacements, + result); _string_list_pop (parser->active); return 0; @@ -770,15 +791,16 @@ _arguments_parse (argument_list_t *arguments, token_node_t **node_ret) * list as necessary). Upon return *node will be the last consumed * node, such that further processing can continue with node->next. */ static void -_glcpp_parser_print_expanded_function (glcpp_parser_t *parser, - token_node_t **node_ret) +_glcpp_parser_expand_function_onto (glcpp_parser_t *parser, + token_node_t **node_ret, + token_list_t *result) { macro_t *macro; token_node_t *node; const char *identifier; argument_list_t *arguments; function_status_t status; - token_list_t *expanded; + token_list_t *substituted; token_node_t *i, *j; int parameter_index; @@ -796,7 +818,7 @@ _glcpp_parser_print_expanded_function (glcpp_parser_t *parser, case FUNCTION_STATUS_SUCCESS: break; case FUNCTION_NOT_A_FUNCTION: - printf ("%s", identifier); + _token_list_append (result, node->token); return; case FUNCTION_UNBALANCED_PARENTHESES: fprintf (stderr, "Error: Macro %s call has unbalanced parentheses\n", @@ -809,7 +831,6 @@ _glcpp_parser_print_expanded_function (glcpp_parser_t *parser, return; } - if (_argument_list_length (arguments) != _string_list_length (macro->parameters)) { @@ -821,7 +842,8 @@ _glcpp_parser_print_expanded_function (glcpp_parser_t *parser, return; } - expanded = _token_list_create (arguments); + /* Perform argument substitution on the replacement list. */ + substituted = _token_list_create (arguments); for (i = macro->replacements->head; i; i = i->next) { if (i->token->type == IDENTIFIER && @@ -834,36 +856,61 @@ _glcpp_parser_print_expanded_function (glcpp_parser_t *parser, parameter_index); for (j = argument->head; j; j = j->next) { - _token_list_append (expanded, j->token); + _token_list_append (substituted, j->token); } } else { - _token_list_append (expanded, i->token); + _token_list_append (substituted, i->token); } } _string_list_push (parser->active, identifier); - _glcpp_parser_print_expanded_token_list (parser, expanded); + _glcpp_parser_expand_token_list_onto (parser, substituted, result); _string_list_pop (parser->active); talloc_free (arguments); } +static void +_glcpp_parser_expand_token_list_onto (glcpp_parser_t *parser, + token_list_t *list, + token_list_t *result) +{ + token_node_t *node; + + if (list == NULL) + return; + + for (node = list->head; node; node = node->next) + { + if (_glcpp_parser_expand_token_onto (parser, node->token, + result)) + { + _glcpp_parser_expand_function_onto (parser, &node, + result); + } + } +} + void _glcpp_parser_print_expanded_token_list (glcpp_parser_t *parser, token_list_t *list) { + token_list_t *expanded; token_node_t *node; function_status_t function_status; if (list == NULL) return; - _token_list_trim_trailing_space (list); + expanded = _token_list_create (parser); - for (node = list->head; node; node = node->next) { - if (_glcpp_parser_print_expanded_token (parser, node->token)) - _glcpp_parser_print_expanded_function (parser, &node); - } + _glcpp_parser_expand_token_list_onto (parser, list, expanded); + + _token_list_trim_trailing_space (expanded); + + _token_list_print (expanded); + + talloc_free (expanded); } void -- cgit v1.2.3 From d5cd40343f4a83d3270cb87ef38e85dcb9682e8c Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 26 May 2010 08:09:29 -0700 Subject: Expand macro arguments before performing argument substitution. As required by the C99 specification of the preprocessor. With this fix, tests 33 through 36 now pass. --- glcpp-parse.y | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index e25cfa9214..3b736f8e64 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -854,10 +854,11 @@ _glcpp_parser_expand_function_onto (glcpp_parser_t *parser, token_list_t *argument; argument = _argument_list_member_at (arguments, parameter_index); - for (j = argument->head; j; j = j->next) - { - _token_list_append (substituted, j->token); - } + /* Before substituting, we expand the argument + * tokens. */ + _glcpp_parser_expand_token_list_onto (parser, + argument, + substituted); } else { _token_list_append (substituted, i->token); } -- cgit v1.2.3 From b1ae61a2ee1bf2ba733dca417b0268b1106d83cf Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 26 May 2010 08:10:38 -0700 Subject: Fix a typo in a comment. Always better to use proper grammar in our grammar. --- glcpp-parse.y | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index 3b736f8e64..5b792a976e 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -85,7 +85,7 @@ _token_create_ival (void *ctx, int type, int ival); static token_list_t * _token_list_create (void *ctx); -/* Note: This function add a talloc_reference() to token. +/* Note: This function adds a talloc_reference() to token. * * You may want to talloc_unlink any current reference if you no * longer need it. */ -- cgit v1.2.3 From c9dcc08d4512370b6fef6370afb8bcdb0ecd9292 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 26 May 2010 08:11:08 -0700 Subject: README: Document some known limitations. None of these are fundamental---just a few things that haven't been implemented yet. --- README | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/README b/README index ba833a49ff..f0f64c2644 100644 --- a/README +++ b/README @@ -12,3 +12,15 @@ preprocessors". To fill in these details, I've been using the C99 standard (for which I had a convenient copy) as available from: http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1124.pdf + +Known limitations +----------------- +Macro invocations cannot include embedded newlines. + +The __LINE__, __FILE__, and __VERSION__ macros are not yet supported. + +The argument of the 'defined' operator cannot yet include enclosing +parentheses. + +The #error, #pragma, #extension, #version, and #line macros are not +yet supported. -- cgit v1.2.3 From ec4ada01c01338ae1deab634cf62f24344bdbd3a Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 26 May 2010 08:15:49 -0700 Subject: Prevent unexpanded macros from being expanded again in the future. With this fix, tests 37 - 39 now pass. --- glcpp-parse.y | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index 5b792a976e..ec10433063 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -676,8 +676,17 @@ _glcpp_parser_expand_token_onto (glcpp_parser_t *parser, /* Finally, don't expand this macro if we're already actively * expanding it, (to avoid infinite recursion). */ - if (_string_list_contains (parser->active, identifier, NULL)) { - _token_list_append (result, token); + if (_string_list_contains (parser->active, identifier, NULL)) + { + /* We change the token type here from IDENTIFIER to + * OTHER to prevent any future expansion of this + * unexpanded token. */ + char *str; + token_t *new_token; + + str = xtalloc_strdup (result, token->value.str); + new_token = _token_create_str (result, OTHER, str); + _token_list_append (result, new_token); return 0; } -- cgit v1.2.3 From 63909fc19654ddb3ef339bcceed9cbf6e6a057bc Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 26 May 2010 08:16:56 -0700 Subject: Remove some stale token types. All the code referencing these was removed some time ago. --- glcpp-parse.y | 3 --- 1 file changed, 3 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index ec10433063..04e78b1826 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -132,9 +132,6 @@ glcpp_parser_lex (glcpp_parser_t *parser); %type preprocessing_token %type pp_tokens replacement_list text_line - /* Stale stuff just to allow code to compile. */ -%token IDENTIFIER_FINALIZED FUNC_MACRO OBJ_MACRO - %% input: -- cgit v1.2.3 From ce540f2571a449a3620bd3672bfb93b39cef71e1 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 26 May 2010 08:25:44 -0700 Subject: Rename identifier from 'i' to 'node'. Now that we no longer have nested for loops with 'i' and 'j' we can use the 'node' that we already have. --- glcpp-parse.y | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index 04e78b1826..5f59b5b006 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -807,7 +807,6 @@ _glcpp_parser_expand_function_onto (glcpp_parser_t *parser, argument_list_t *arguments; function_status_t status; token_list_t *substituted; - token_node_t *i, *j; int parameter_index; node = *node_ret; @@ -851,10 +850,11 @@ _glcpp_parser_expand_function_onto (glcpp_parser_t *parser, /* Perform argument substitution on the replacement list. */ substituted = _token_list_create (arguments); - for (i = macro->replacements->head; i; i = i->next) { - if (i->token->type == IDENTIFIER && + for (node = macro->replacements->head; node; node = node->next) + { + if (node->token->type == IDENTIFIER && _string_list_contains (macro->parameters, - i->token->value.str, + node->token->value.str, ¶meter_index)) { token_list_t *argument; @@ -866,7 +866,7 @@ _glcpp_parser_expand_function_onto (glcpp_parser_t *parser, argument, substituted); } else { - _token_list_append (substituted, i->token); + _token_list_append (substituted, node->token); } } -- cgit v1.2.3 From ad0dee6bb0f197b9addb45f38e8843d6a504723c Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 26 May 2010 09:04:50 -0700 Subject: Implement token pasting. Which makes test 40 now pass. --- glcpp-parse.y | 117 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) diff --git a/glcpp-parse.y b/glcpp-parse.y index 5f59b5b006..330d3ab3bc 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -586,6 +586,86 @@ _token_print (token_t *token) } } +/* Change 'token' into a new token formed by pasting 'other'. */ +static void +_token_paste (token_t *token, token_t *other) +{ + /* A very few single-character punctuators can be combined + * with another to form a multi-character punctuator. */ + switch (token->type) { + case '<': + if (other->type == '<') { + token->type = LEFT_SHIFT; + token->value.ival = LEFT_SHIFT; + return; + } else if (other->type == '=') { + token->type = LESS_OR_EQUAL; + token->value.ival = LESS_OR_EQUAL; + return; + } + break; + case '>': + if (other->type == '>') { + token->type = RIGHT_SHIFT; + token->value.ival = RIGHT_SHIFT; + return; + } else if (other->type == '=') { + token->type = GREATER_OR_EQUAL; + token->value.ival = GREATER_OR_EQUAL; + return; + } + break; + case '=': + if (other->type == '=') { + token->type = EQUAL; + token->value.ival = EQUAL; + return; + } + break; + case '!': + if (other->type == '=') { + token->type = NOT_EQUAL; + token->value.ival = NOT_EQUAL; + return; + } + break; + case '&': + if (other->type == '&') { + token->type = AND; + token->value.ival = AND; + return; + } + break; + case '|': + if (other->type == '|') { + token->type = OR; + token->value.ival = OR; + return; + } + break; + } + + /* Two string-valued tokens can usually just be mashed + * together. + * + * XXX: Since our 'OTHER' case is currently so loose, this may + * allow some things thruogh that should be treated as + * errors. */ + if ((token->type == IDENTIFIER || token->type == OTHER) && + (other->type == IDENTIFIER || other->type == OTHER)) + { + token->value.str = talloc_strdup_append (token->value.str, + other->value.str); + return; + } + + printf ("Error: Pasting \""); + _token_print (token); + printf ("\" and \""); + _token_print (other); + printf ("\" does not give a valid preprocessing token.\n"); +} + static void _token_list_print (token_list_t *list) { @@ -870,6 +950,43 @@ _glcpp_parser_expand_function_onto (glcpp_parser_t *parser, } } + /* After argument substitution, and before further expansion + * below, implement token pasting. */ + + node = substituted->head; + while (node) + { + token_node_t *next_non_space; + + /* Look ahead for a PASTE token, skipping space. */ + next_non_space = node->next; + while (next_non_space && next_non_space->token->type == SPACE) + next_non_space = next_non_space->next; + + if (next_non_space == NULL) + break; + + if (next_non_space->token->type != PASTE) { + node = next_non_space; + continue; + } + + /* Now find the next non-space token after the PASTE. */ + next_non_space = next_non_space->next; + while (next_non_space && next_non_space->token->type == SPACE) + next_non_space = next_non_space->next; + + if (next_non_space == NULL) { + fprintf (stderr, "Error: '##' cannot appear at either end of a macro expansion\n"); + exit (1); + } + + _token_paste (node->token, next_non_space->token); + node->next = next_non_space->next; + + node = node->next; + } + _string_list_push (parser->active, identifier); _glcpp_parser_expand_token_list_onto (parser, substituted, result); _string_list_pop (parser->active); -- cgit v1.2.3 From 8fed1cddae8b024972d0c08f120bfd0292cb9cca Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 26 May 2010 09:32:12 -0700 Subject: stash --- glcpp-lex.l | 40 +++++++++++++++++ glcpp-parse.y | 141 +++++++++++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 176 insertions(+), 5 deletions(-) diff --git a/glcpp-lex.l b/glcpp-lex.l index 516f42dee3..97f01d0636 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -62,11 +62,47 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? return HASH_UNDEF; } +{HASH}if { + yyextra->space_tokens = 0; + return HASH_IF; +} + +{HASH}elif { + yyextra->space_tokens = 0; + return HASH_ELIF; +} + +{HASH}else { + yyextra->space_tokens = 0; + return HASH_ELSE; +} + +{HASH}endif { + yyextra->space_tokens = 0; + return HASH_ENDIF; +} + {HASH} { yyextra->space_tokens = 0; return HASH; } +{DECIMAL_INTEGER} { + yylval.ival = strtoll (yytext, NULL, 10); + return INTEGER; +} + +{OCTAL_INTEGER} { + yylval.ival = strtoll (yytext + 1, NULL, 8); + return INTEGER; +} + +{HEXADECIMAL_INTEGER} { + yylval.ival = strtoll (yytext + 2, NULL, 16); + return INTEGER; +} + + {IDENTIFIER} { yylval.str = xtalloc_strdup (yyextra, yytext); return IDENTIFIER; @@ -108,6 +144,10 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? return PASTE; } +"defined" { + return DEFINED; +} + {PUNCTUATION} { return yytext[0]; } diff --git a/glcpp-parse.y b/glcpp-parse.y index 330d3ab3bc..58e1e655fd 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -25,6 +25,7 @@ #include #include #include +#include #include "glcpp.h" @@ -124,27 +125,46 @@ glcpp_parser_lex (glcpp_parser_t *parser); %parse-param {glcpp_parser_t *parser} %lex-param {glcpp_parser_t *parser} -%token HASH HASH_DEFINE_FUNC HASH_DEFINE_OBJ HASH_UNDEF IDENTIFIER NEWLINE OTHER SPACE -%token LEFT_SHIFT RIGHT_SHIFT LESS_OR_EQUAL GREATER_OR_EQUAL EQUAL NOT_EQUAL AND OR PASTE -%type punctuator SPACE +%token DEFINED HASH HASH_DEFINE_FUNC HASH_DEFINE_OBJ HASH_ELIF HASH_ELSE HASH_ENDIF HASH_IF HASH_IFDEF HASH_IFNDEF HASH_UNDEF IDENTIFIER INTEGER NEWLINE OTHER SPACE +%token PASTE +%type expression INTEGER punctuator SPACE %type IDENTIFIER OTHER %type identifier_list %type preprocessing_token %type pp_tokens replacement_list text_line +%left OR +%left AND +%left '|' +%left '^' +%left '&' +%left EQUAL NOT_EQUAL +%left '<' '>' LESS_OR_EQUAL GREATER_OR_EQUAL +%left LEFT_SHIFT RIGHT_SHIFT +%left '+' '-' +%left '*' '/' '%' +%right UNARY %% input: /* empty */ | input line { - printf ("\n"); + if (parser->skip_stack == NULL || + parser->skip_stack->type == SKIP_NO_SKIP) + { + printf ("\n"); + } } ; line: control_line | text_line { - _glcpp_parser_print_expanded_token_list (parser, $1); + if (parser->skip_stack == NULL || + parser->skip_stack->type == SKIP_NO_SKIP) + { + _glcpp_parser_print_expanded_token_list (parser, $1); + } talloc_free ($1); } | HASH non_directive @@ -171,9 +191,114 @@ control_line: } talloc_free ($2); } +| HASH_IF expression NEWLINE { + _glcpp_parser_skip_stack_push_if (parser, $2); + } +| HASH_IFDEF IDENTIFIER NEWLINE { + string_list_t *macro = hash_table_find (parser->defines, $2); + talloc_free ($2); + _glcpp_parser_skip_stack_push_if (parser, macro != NULL); + } +| HASH_IFNDEF IDENTIFIER NEWLINE { + string_list_t *macro = hash_table_find (parser->defines, $2); + talloc_free ($2); + _glcpp_parser_skip_stack_push_if (parser, macro == NULL); + } +| HASH_ELIF expression NEWLINE { + _glcpp_parser_skip_stack_change_if (parser, "#elif", $2); + } +| HASH_ELSE NEWLINE { + _glcpp_parser_skip_stack_change_if (parser, "else", 1); + } +| HASH_ENDIF NEWLINE { + _glcpp_parser_skip_stack_pop (parser); + } | HASH NEWLINE ; +expression: + INTEGER { + $$ = $1; + } +| expression OR expression { + $$ = $1 || $3; + } +| expression AND expression { + $$ = $1 && $3; + } +| expression '|' expression { + $$ = $1 | $3; + } +| expression '^' expression { + $$ = $1 ^ $3; + } +| expression '&' expression { + $$ = $1 & $3; + } +| expression NOT_EQUAL expression { + $$ = $1 != $3; + } +| expression EQUAL expression { + $$ = $1 == $3; + } +| expression GREATER_OR_EQUAL expression { + $$ = $1 >= $3; + } +| expression LESS_OR_EQUAL expression { + $$ = $1 <= $3; + } +| expression '>' expression { + $$ = $1 > $3; + } +| expression '<' expression { + $$ = $1 < $3; + } +| expression RIGHT_SHIFT expression { + $$ = $1 >> $3; + } +| expression LEFT_SHIFT expression { + $$ = $1 << $3; + } +| expression '-' expression { + $$ = $1 - $3; + } +| expression '+' expression { + $$ = $1 + $3; + } +| expression '%' expression { + $$ = $1 % $3; + } +| expression '/' expression { + $$ = $1 / $3; + } +| expression '*' expression { + $$ = $1 * $3; + } +| '!' expression %prec UNARY { + $$ = ! $2; + } +| '~' expression %prec UNARY { + $$ = ~ $2; + } +| '-' expression %prec UNARY { + $$ = - $2; + } +| '+' expression %prec UNARY { + $$ = + $2; + } +| DEFINED IDENTIFIER %prec UNARY { + string_list_t *macro = hash_table_find (parser->defines, $2); + talloc_free ($2); + if (macro) + $$ = 1; + else + $$ = 0; + } +| '(' expression ')' { + $$ = $2; + } +; + identifier_list: IDENTIFIER { $$ = _string_list_create (parser); @@ -219,6 +344,9 @@ preprocessing_token: IDENTIFIER { $$ = _token_create_str (parser, IDENTIFIER, $1); } +| INTEGER { + $$ = _token_create_ival (parser, INTEGER, $1); + } | punctuator { $$ = _token_create_ival (parser, $1, $1); } @@ -546,6 +674,9 @@ _token_print (token_t *token) } switch (token->type) { + case INTEGER: + printf ("%" PRIxMAX, token->value.ival); + break; case IDENTIFIER: case OTHER: printf ("%s", token->value.str); -- cgit v1.2.3 From f6914fd37b2b66d7be1ba0c31450d89d1785ccce Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 26 May 2010 09:32:57 -0700 Subject: Implement #if and friends. With this change, tests 41 through 49 all pass. (The defined operator appears to be somehow broken so that test 50 doesn't pass yet.) --- glcpp.h | 2 +- tests/049-if-expression-precedence.c | 1 - tests/050-if-defined.c | 2 -- tests/glcpp-test | 2 +- 4 files changed, 2 insertions(+), 5 deletions(-) diff --git a/glcpp.h b/glcpp.h index 21db918cdc..36ab0e7ca5 100644 --- a/glcpp.h +++ b/glcpp.h @@ -49,7 +49,7 @@ typedef struct token_list token_list_t; typedef union YYSTYPE { - int ival; + intmax_t ival; char *str; string_list_t *string_list; token_t *token; diff --git a/tests/049-if-expression-precedence.c b/tests/049-if-expression-precedence.c index cea935220f..833ea03882 100644 --- a/tests/049-if-expression-precedence.c +++ b/tests/049-if-expression-precedence.c @@ -3,4 +3,3 @@ failure with operator precedence #else success #endif - diff --git a/tests/050-if-defined.c b/tests/050-if-defined.c index 9838cc747d..34f0f95140 100644 --- a/tests/050-if-defined.c +++ b/tests/050-if-defined.c @@ -15,5 +15,3 @@ failure_3 #else success_3 #endif - - diff --git a/tests/glcpp-test b/tests/glcpp-test index 6304155210..bf88d4462e 100755 --- a/tests/glcpp-test +++ b/tests/glcpp-test @@ -6,5 +6,5 @@ for test in *.c; do ../glcpp < $test > $test.out gcc -E $test -o $test.gcc grep -v '^#' < $test.gcc > $test.expected - diff -u $test.expected $test.out + diff -B -u $test.expected $test.out done -- cgit v1.2.3 From 16c1e980e2e3c8852ce9bea85afe094c24e420fa Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 26 May 2010 09:35:34 -0700 Subject: Fix lexing of "defined" as an operator, not an identifier. Simply need to move the rule for IDENTIFIER to be after "defined" and everything is happy. With this change, tests 50 through 53 all pass now. --- glcpp-lex.l | 11 +++++------ tests/053-if-divide-and-shift.c | 1 - 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/glcpp-lex.l b/glcpp-lex.l index 97f01d0636..d6b7726d36 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -102,12 +102,6 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? return INTEGER; } - -{IDENTIFIER} { - yylval.str = xtalloc_strdup (yyextra, yytext); - return IDENTIFIER; -} - "<<" { return LEFT_SHIFT; } @@ -148,6 +142,11 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? return DEFINED; } +{IDENTIFIER} { + yylval.str = xtalloc_strdup (yyextra, yytext); + return IDENTIFIER; +} + {PUNCTUATION} { return yytext[0]; } diff --git a/tests/053-if-divide-and-shift.c b/tests/053-if-divide-and-shift.c index ddc1573ab2..d24c54a88d 100644 --- a/tests/053-if-divide-and-shift.c +++ b/tests/053-if-divide-and-shift.c @@ -13,4 +13,3 @@ failure_3 #else success_3 #endif - -- cgit v1.2.3 From 8e82fcb070d5fae0ec2c763cee4cea225b459664 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 26 May 2010 11:15:21 -0700 Subject: Implement (and test) support for macro expansion within conditional expressions. To do this we have split the existing "HASH_IF expression" into two productions: First is HASH_IF pp_tokens which simply constructs a list of tokens. Then, with that resulting token list, we first evaluate all DEFINED operator tokens, then expand all macros, and finally start lexing from the resulting token list. This brings us to the second production, IF_EXPANDED expression This final production works just like our previous "HASH_IF expression", evaluating a constant integer expression. The new test (54) added for this case now passes. --- glcpp-parse.y | 155 ++++++++++++++++++++++++++++++++++++++------- glcpp.h | 2 + tests/054-if-with-macros.c | 34 ++++++++++ 3 files changed, 169 insertions(+), 22 deletions(-) create mode 100644 tests/054-if-with-macros.c diff --git a/glcpp-parse.y b/glcpp-parse.y index 58e1e655fd..cce8a70156 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -96,6 +96,10 @@ _token_list_append (token_list_t *list, token_t *token); static void _token_list_append_list (token_list_t *list, token_list_t *tail); +static void +_glcpp_parser_evaluate_defined (glcpp_parser_t *parser, + token_list_t *list); + static void _glcpp_parser_print_expanded_token_list (glcpp_parser_t *parser, token_list_t *list); @@ -120,14 +124,17 @@ _glcpp_parser_skip_stack_pop (glcpp_parser_t *parser); static int glcpp_parser_lex (glcpp_parser_t *parser); +static void +glcpp_parser_lex_from (glcpp_parser_t *parser, token_list_t *list); + %} %parse-param {glcpp_parser_t *parser} %lex-param {glcpp_parser_t *parser} -%token DEFINED HASH HASH_DEFINE_FUNC HASH_DEFINE_OBJ HASH_ELIF HASH_ELSE HASH_ENDIF HASH_IF HASH_IFDEF HASH_IFNDEF HASH_UNDEF IDENTIFIER INTEGER NEWLINE OTHER SPACE +%token DEFINED ELIF_EXPANDED HASH HASH_DEFINE_FUNC HASH_DEFINE_OBJ HASH_ELIF HASH_ELSE HASH_ENDIF HASH_IF HASH_IFDEF HASH_IFNDEF HASH_UNDEF IDENTIFIER IF_EXPANDED INTEGER NEWLINE OTHER SPACE %token PASTE -%type expression INTEGER punctuator SPACE +%type expression INTEGER operator SPACE %type IDENTIFIER OTHER %type identifier_list %type preprocessing_token @@ -148,28 +155,39 @@ glcpp_parser_lex (glcpp_parser_t *parser); input: /* empty */ -| input line { +| input line +; + +line: + control_line { if (parser->skip_stack == NULL || parser->skip_stack->type == SKIP_NO_SKIP) { printf ("\n"); } } -; - -line: - control_line | text_line { if (parser->skip_stack == NULL || parser->skip_stack->type == SKIP_NO_SKIP) { _glcpp_parser_print_expanded_token_list (parser, $1); + printf ("\n"); } talloc_free ($1); } +| expanded_line | HASH non_directive ; +expanded_line: + IF_EXPANDED expression NEWLINE { + _glcpp_parser_skip_stack_push_if (parser, $2); + } +| ELIF_EXPANDED expression NEWLINE { + _glcpp_parser_skip_stack_change_if (parser, "elif", $2); + } +; + control_line: HASH_DEFINE_OBJ IDENTIFIER replacement_list NEWLINE { _define_object_macro (parser, $2, $3); @@ -191,8 +209,17 @@ control_line: } talloc_free ($2); } -| HASH_IF expression NEWLINE { - _glcpp_parser_skip_stack_push_if (parser, $2); +| HASH_IF pp_tokens NEWLINE { + token_list_t *expanded; + token_t *token; + + expanded = _token_list_create (parser); + token = _token_create_ival (parser, IF_EXPANDED, IF_EXPANDED); + _token_list_append (expanded, token); + talloc_unlink (parser, token); + _glcpp_parser_evaluate_defined (parser, $2); + _glcpp_parser_expand_token_list_onto (parser, $2, expanded); + glcpp_parser_lex_from (parser, expanded); } | HASH_IFDEF IDENTIFIER NEWLINE { string_list_t *macro = hash_table_find (parser->defines, $2); @@ -204,8 +231,17 @@ control_line: talloc_free ($2); _glcpp_parser_skip_stack_push_if (parser, macro == NULL); } -| HASH_ELIF expression NEWLINE { - _glcpp_parser_skip_stack_change_if (parser, "#elif", $2); +| HASH_ELIF pp_tokens NEWLINE { + token_list_t *expanded; + token_t *token; + + expanded = _token_list_create (parser); + token = _token_create_ival (parser, ELIF_EXPANDED, ELIF_EXPANDED); + _token_list_append (expanded, token); + talloc_unlink (parser, token); + _glcpp_parser_evaluate_defined (parser, $2); + _glcpp_parser_expand_token_list_onto (parser, $2, expanded); + glcpp_parser_lex_from (parser, expanded); } | HASH_ELSE NEWLINE { _glcpp_parser_skip_stack_change_if (parser, "else", 1); @@ -286,14 +322,6 @@ expression: | '+' expression %prec UNARY { $$ = + $2; } -| DEFINED IDENTIFIER %prec UNARY { - string_list_t *macro = hash_table_find (parser->defines, $2); - talloc_free ($2); - if (macro) - $$ = 1; - else - $$ = 0; - } | '(' expression ')' { $$ = $2; } @@ -347,7 +375,7 @@ preprocessing_token: | INTEGER { $$ = _token_create_ival (parser, INTEGER, $1); } -| punctuator { +| operator { $$ = _token_create_ival (parser, $1, $1); } | OTHER { @@ -358,7 +386,7 @@ preprocessing_token: } ; -punctuator: +operator: '[' { $$ = '['; } | ']' { $$ = ']'; } | '(' { $$ = '('; } @@ -389,6 +417,7 @@ punctuator: | ';' { $$ = ';'; } | ',' { $$ = ','; } | PASTE { $$ = PASTE; } +| DEFINED { $$ = DEFINED; } ; %% @@ -830,6 +859,9 @@ glcpp_parser_create (void) parser->skip_stack = NULL; + parser->lex_from_list = NULL; + parser->lex_from_node = NULL; + return parser; } @@ -849,6 +881,39 @@ glcpp_parser_destroy (glcpp_parser_t *parser) talloc_free (parser); } +/* Replace any occurences of DEFINED tokens in 'list' with either a + * '0' or '1' INTEGER token depending on whether the next token in the + * list is defined or not. */ +static void +_glcpp_parser_evaluate_defined (glcpp_parser_t *parser, + token_list_t *list) +{ + token_node_t *node, *next; + string_list_t *macro; + + if (list == NULL) + return; + + for (node = list->head; node; node = node->next) { + if (node->token->type != DEFINED) + continue; + next = node->next; + while (next && next->token->type == SPACE) + next = next->next; + if (next == NULL || next->token->type != IDENTIFIER) { + fprintf (stderr, "Error: operator \"defined\" requires an identifier\n"); + exit (1); + } + macro = hash_table_find (parser->defines, + next->token->value.str); + + node->token->type = INTEGER; + node->token->value.ival = (macro != NULL); + node->next = next->next; + } +} + + /* Appends onto 'expansion' a non-macro token or the expansion of an * object-like macro. * @@ -1206,7 +1271,53 @@ _define_function_macro (glcpp_parser_t *parser, static int glcpp_parser_lex (glcpp_parser_t *parser) { - return glcpp_lex (parser->scanner); + token_node_t *node; + int ret; + + if (parser->lex_from_list == NULL) + return glcpp_lex (parser->scanner); + + node = parser->lex_from_node; + + if (node == NULL) { + talloc_free (parser->lex_from_list); + parser->lex_from_list = NULL; + return NEWLINE; + } + + yylval = node->token->value; + ret = node->token->type; + + parser->lex_from_node = node->next; + + return ret; +} + +static void +glcpp_parser_lex_from (glcpp_parser_t *parser, token_list_t *list) +{ + token_node_t *node; + + assert (parser->lex_from_list == NULL); + + /* Copy list, eliminating any space tokens. */ + parser->lex_from_list = _token_list_create (parser); + + for (node = list->head; node; node = node->next) { + if (node->token->type == SPACE) + continue; + _token_list_append (parser->lex_from_list, node->token); + } + + talloc_free (list); + + parser->lex_from_node = parser->lex_from_list->head; + + /* It's possible the list consisted of nothing but whitespace. */ + if (parser->lex_from_node == NULL) { + talloc_free (parser->lex_from_list); + parser->lex_from_list = NULL; + } } static void diff --git a/glcpp.h b/glcpp.h index 36ab0e7ca5..e5be1a6cd6 100644 --- a/glcpp.h +++ b/glcpp.h @@ -129,6 +129,8 @@ struct glcpp_parser { string_list_t *active; int space_tokens; skip_node_t *skip_stack; + token_list_t *lex_from_list; + token_node_t *lex_from_node; }; glcpp_parser_t * diff --git a/tests/054-if-with-macros.c b/tests/054-if-with-macros.c new file mode 100644 index 0000000000..3da79a0d96 --- /dev/null +++ b/tests/054-if-with-macros.c @@ -0,0 +1,34 @@ +#define one 1 +#define two 2 +#define three 3 +#define five 5 +#if five < two +failure_1 +#else +success_1 +#endif +#if three >= two +success_2 +#else +failure_2 +#endif +#if two + three <= five +success_3 +#else +failure_3 +#endif +#if five - two == three +success_4 +#else +failure_4 +#endif +#if one > three +failure_5 +#else +success_5 +#endif +#if one != five +success_6 +#else +failure_6 +#endif -- cgit v1.2.3 From 0324cad796b7a68634a729719f08fcbb5bbd04cc Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 26 May 2010 15:53:05 -0700 Subject: All macro lookups should be of type macro_t, not string_list_t. This is what I get for using a non-type-safe hash-table implementation. --- glcpp-parse.y | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index cce8a70156..a809ebf3af 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -199,7 +199,7 @@ control_line: _define_function_macro (parser, $2, $4, $6); } | HASH_UNDEF IDENTIFIER NEWLINE { - string_list_t *macro = hash_table_find (parser->defines, $2); + macro_t *macro = hash_table_find (parser->defines, $2); if (macro) { /* XXX: Need hash table to support a real way * to remove an element rather than prefixing @@ -222,12 +222,12 @@ control_line: glcpp_parser_lex_from (parser, expanded); } | HASH_IFDEF IDENTIFIER NEWLINE { - string_list_t *macro = hash_table_find (parser->defines, $2); + macro_t *macro = hash_table_find (parser->defines, $2); talloc_free ($2); _glcpp_parser_skip_stack_push_if (parser, macro != NULL); } | HASH_IFNDEF IDENTIFIER NEWLINE { - string_list_t *macro = hash_table_find (parser->defines, $2); + macro_t *macro = hash_table_find (parser->defines, $2); talloc_free ($2); _glcpp_parser_skip_stack_push_if (parser, macro == NULL); } @@ -889,7 +889,7 @@ _glcpp_parser_evaluate_defined (glcpp_parser_t *parser, token_list_t *list) { token_node_t *node, *next; - string_list_t *macro; + macro_t *macro; if (list == NULL) return; -- cgit v1.2.3 From 95951ea7bb8728cf54ae4136cb59d0af9e8a06bd Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 26 May 2010 15:57:10 -0700 Subject: Treat newlines as space when invoking a function-like macro invocation. This adds three new pieces of state to the parser, (is_control_line, newline_as_space, and paren_count), and a large amount of messy code. I'd definitely like to see a cleaner solution for this. With this fix, the "define-func-extra-newlines" now passes so we put it back to test #26 where it was originally (lately it has been known as test #55). Also, we tweak test 25 slightly. Previously this test was ending a file function-like macro name that was not actually a macro (not followed by a left parenthesis). As is, this fix was making that test fail because the text_line production expects to see a terminating NEWLINE, but that NEWLINE is now getting turned into a SPACE here. This seems unlikely to be a problem in the wild, (function macros being used in a non-macro sense seems rare enough---but more than likely they won't happen at the end of a file). Still, we document this shortcoming in the README. --- README | 4 +++ glcpp-parse.y | 61 ++++++++++++++++++++++++++++++++-- glcpp.h | 3 ++ tests/025-func-macro-as-non-macro.c | 2 +- tests/026-define-func-extra-newlines.c | 6 ++++ tests/055-define-func-extra-newlines.c | 6 ---- 6 files changed, 73 insertions(+), 9 deletions(-) create mode 100644 tests/026-define-func-extra-newlines.c delete mode 100644 tests/055-define-func-extra-newlines.c diff --git a/README b/README index f0f64c2644..ab42a3ffe1 100644 --- a/README +++ b/README @@ -24,3 +24,7 @@ parentheses. The #error, #pragma, #extension, #version, and #line macros are not yet supported. + +A file that ends with a function-like macro name as the last +non-whitespace token will result in a parse error, (where it should be +passed through as is). \ No newline at end of file diff --git a/glcpp-parse.y b/glcpp-parse.y index a809ebf3af..1346b65aff 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -856,6 +856,9 @@ glcpp_parser_create (void) hash_table_string_compare); parser->active = _string_list_create (parser); parser->space_tokens = 1; + parser->newline_as_space = 0; + parser->in_control_line = 0; + parser->paren_count = 0; parser->skip_stack = NULL; @@ -1274,8 +1277,62 @@ glcpp_parser_lex (glcpp_parser_t *parser) token_node_t *node; int ret; - if (parser->lex_from_list == NULL) - return glcpp_lex (parser->scanner); + if (parser->lex_from_list == NULL) { + ret = glcpp_lex (parser->scanner); + + /* XXX: This ugly block of code exists for the sole + * purpose of converting a NEWLINE token into a SPACE + * token, but only in the case where we have seen a + * function-like macro name, but have not yet seen its + * closing parenthesis. + * + * There's perhaps a more compact way to do this with + * mid-rule actions in the grammar. + * + * I'm definitely not pleased with the complexity of + * this code here. + */ + if (parser->newline_as_space) + { + if (ret == '(') { + parser->paren_count++; + } else if (ret == ')') { + parser->paren_count--; + if (parser->paren_count == 0) + parser->newline_as_space = 0; + } else if (ret == NEWLINE) { + ret = SPACE; + } else if (ret != SPACE) { + if (parser->paren_count == 0) + parser->newline_as_space = 0; + } + } + else if (parser->in_control_line) + { + if (ret == NEWLINE) + parser->in_control_line = 0; + } + else if (ret == HASH_DEFINE_OBJ || ret == HASH_DEFINE_FUNC || + ret == HASH_UNDEF || ret == HASH_IF || + ret == HASH_IFDEF || ret == HASH_IFNDEF || + ret == HASH_ELIF || ret == HASH_ELSE || + ret == HASH_ENDIF || ret == HASH) + { + parser->in_control_line = 1; + } + else if (ret == IDENTIFIER) + { + macro_t *macro; + macro = hash_table_find (parser->defines, + yylval.str); + if (macro && macro->is_function) { + parser->newline_as_space = 1; + parser->paren_count = 0; + } + } + + return ret; + } node = parser->lex_from_node; diff --git a/glcpp.h b/glcpp.h index e5be1a6cd6..5c8c304a9c 100644 --- a/glcpp.h +++ b/glcpp.h @@ -128,6 +128,9 @@ struct glcpp_parser { struct hash_table *defines; string_list_t *active; int space_tokens; + int newline_as_space; + int in_control_line; + int paren_count; skip_node_t *skip_stack; token_list_t *lex_from_list; token_node_t *lex_from_node; diff --git a/tests/025-func-macro-as-non-macro.c b/tests/025-func-macro-as-non-macro.c index 3dbe026d9d..b433671d1b 100644 --- a/tests/025-func-macro-as-non-macro.c +++ b/tests/025-func-macro-as-non-macro.c @@ -1,2 +1,2 @@ #define foo(bar) bar -foo +foo bar diff --git a/tests/026-define-func-extra-newlines.c b/tests/026-define-func-extra-newlines.c new file mode 100644 index 0000000000..0d83740530 --- /dev/null +++ b/tests/026-define-func-extra-newlines.c @@ -0,0 +1,6 @@ +#define foo(a) bar + +foo +( +1 +) diff --git a/tests/055-define-func-extra-newlines.c b/tests/055-define-func-extra-newlines.c deleted file mode 100644 index 0d83740530..0000000000 --- a/tests/055-define-func-extra-newlines.c +++ /dev/null @@ -1,6 +0,0 @@ -#define foo(a) bar - -foo -( -1 -) -- cgit v1.2.3 From a8ea26d7c94526518670e54f44336f433d0ac77c Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 26 May 2010 16:18:05 -0700 Subject: Add two tests developed on the take-2 branch. The define-chain-obj-to-func-parens-in-text test passes here while the if-with-macros test fails. --- tests/054-if-with-macros.c | 34 ++++++++++++++++++++++ .../055-define-chain-obj-to-func-parens-in-text.c | 3 ++ 2 files changed, 37 insertions(+) create mode 100644 tests/054-if-with-macros.c create mode 100644 tests/055-define-chain-obj-to-func-parens-in-text.c diff --git a/tests/054-if-with-macros.c b/tests/054-if-with-macros.c new file mode 100644 index 0000000000..3da79a0d96 --- /dev/null +++ b/tests/054-if-with-macros.c @@ -0,0 +1,34 @@ +#define one 1 +#define two 2 +#define three 3 +#define five 5 +#if five < two +failure_1 +#else +success_1 +#endif +#if three >= two +success_2 +#else +failure_2 +#endif +#if two + three <= five +success_3 +#else +failure_3 +#endif +#if five - two == three +success_4 +#else +failure_4 +#endif +#if one > three +failure_5 +#else +success_5 +#endif +#if one != five +success_6 +#else +failure_6 +#endif diff --git a/tests/055-define-chain-obj-to-func-parens-in-text.c b/tests/055-define-chain-obj-to-func-parens-in-text.c new file mode 100644 index 0000000000..00f2c2346d --- /dev/null +++ b/tests/055-define-chain-obj-to-func-parens-in-text.c @@ -0,0 +1,3 @@ +#define failure() success +#define foo failure +foo() -- cgit v1.2.3 From 7db2402a8009772a3f10d19cfc7f30be9ee79295 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 26 May 2010 17:01:57 -0700 Subject: Add support (and test) for an object-to-function chain with the parens in the content. That is, the following case: #define foo(x) (x) #define bar bar(baz) which now works with this (ugly) commit. I definitely want to come up with something cleaner than this. --- glcpp-parse.y | 65 ++++++++++++++++------ .../055-define-chain-obj-to-func-parens-in-text.c | 3 + 2 files changed, 52 insertions(+), 16 deletions(-) create mode 100644 tests/055-define-chain-obj-to-func-parens-in-text.c diff --git a/glcpp-parse.y b/glcpp-parse.y index 1346b65aff..abdcd1ed5d 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -926,9 +926,9 @@ _glcpp_parser_evaluate_defined (glcpp_parser_t *parser, * needs further expansion. */ static int -_glcpp_parser_expand_token_onto (glcpp_parser_t *parser, - token_t *token, - token_list_t *result) +_expand_token_onto (glcpp_parser_t *parser, + token_t *token, + token_list_t *result) { const char *identifier; macro_t *macro; @@ -1075,10 +1075,10 @@ _arguments_parse (argument_list_t *arguments, token_node_t **node_ret) /* Prints the expansion of *node (consuming further tokens from the * list as necessary). Upon return *node will be the last consumed * node, such that further processing can continue with node->next. */ -static void -_glcpp_parser_expand_function_onto (glcpp_parser_t *parser, - token_node_t **node_ret, - token_list_t *result) +static function_status_t +_expand_function_onto (glcpp_parser_t *parser, + token_node_t **node_ret, + token_list_t *result) { macro_t *macro; token_node_t *node; @@ -1103,7 +1103,7 @@ _glcpp_parser_expand_function_onto (glcpp_parser_t *parser, break; case FUNCTION_NOT_A_FUNCTION: _token_list_append (result, node->token); - return; + return FUNCTION_NOT_A_FUNCTION; case FUNCTION_UNBALANCED_PARENTHESES: fprintf (stderr, "Error: Macro %s call has unbalanced parentheses\n", identifier); @@ -1112,7 +1112,7 @@ _glcpp_parser_expand_function_onto (glcpp_parser_t *parser, if (macro->replacements == NULL) { talloc_free (arguments); - return; + return FUNCTION_STATUS_SUCCESS; } if (_argument_list_length (arguments) != @@ -1123,7 +1123,7 @@ _glcpp_parser_expand_function_onto (glcpp_parser_t *parser, identifier, _argument_list_length (arguments), _string_list_length (macro->parameters)); - return; + exit (1); } /* Perform argument substitution on the replacement list. */ @@ -1191,6 +1191,8 @@ _glcpp_parser_expand_function_onto (glcpp_parser_t *parser, _string_list_pop (parser->active); talloc_free (arguments); + + return FUNCTION_STATUS_SUCCESS; } static void @@ -1199,19 +1201,50 @@ _glcpp_parser_expand_token_list_onto (glcpp_parser_t *parser, token_list_t *result) { token_node_t *node; + token_list_t *intermediate, *list_orig = list; + int i, need_rescan = 0; if (list == NULL) return; - for (node = list->head; node; node = node->next) - { - if (_glcpp_parser_expand_token_onto (parser, node->token, - result)) + intermediate = _token_list_create (parser); + + /* XXX: The two-pass expansion here is really ugly. The + * problem this is solving is that we can expand a macro into + * a function-like macro name, and then we need to recognize + * that as a function-like macro, but perhaps the parentheses + * and arguments aren't on the token list yet, (since they are + * in the actual content so they are part of what we are + * expanding. + * + * This ugly hack works, but is messy, fragile, and hard to + * maintain. I think a cleaner solution would separate the + * notions of expanding and appending and avoid this problem + * altogether. + */ + + for (i = 0; i < 2; i++) { + if (i == 1) { + list = intermediate; + intermediate = _token_list_create (parser); + } + for (node = list->head; node; node = node->next) { - _glcpp_parser_expand_function_onto (parser, &node, - result); + if (_expand_token_onto (parser, node->token, + intermediate)) + { + if (_expand_function_onto (parser, &node, + intermediate)) + { + need_rescan = 1; + } + } } + if (list != list_orig) + talloc_free (list); } + + _token_list_append_list (result, intermediate); } void diff --git a/tests/055-define-chain-obj-to-func-parens-in-text.c b/tests/055-define-chain-obj-to-func-parens-in-text.c new file mode 100644 index 0000000000..00f2c2346d --- /dev/null +++ b/tests/055-define-chain-obj-to-func-parens-in-text.c @@ -0,0 +1,3 @@ +#define failure() success +#define foo failure +foo() -- cgit v1.2.3 From dd7490093d84ce74a99922c3544b51c3f5d43345 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 27 May 2010 10:12:33 -0700 Subject: Avoid treating an expanded comma as an argument separator. That is, a function-like invocation foo(x) is valid as a single-argument invocation even if 'x' is a macro that expands into a value with a comma. Add a new COMMA_FINAL token type to handle this, and add a test for this case, (which passes). --- glcpp-parse.y | 18 ++++++++++++++++-- tests/056-macro-argument-with-comma.c | 4 ++++ 2 files changed, 20 insertions(+), 2 deletions(-) create mode 100644 tests/056-macro-argument-with-comma.c diff --git a/glcpp-parse.y b/glcpp-parse.y index abdcd1ed5d..b2684d06d9 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -132,7 +132,7 @@ glcpp_parser_lex_from (glcpp_parser_t *parser, token_list_t *list); %parse-param {glcpp_parser_t *parser} %lex-param {glcpp_parser_t *parser} -%token DEFINED ELIF_EXPANDED HASH HASH_DEFINE_FUNC HASH_DEFINE_OBJ HASH_ELIF HASH_ELSE HASH_ENDIF HASH_IF HASH_IFDEF HASH_IFNDEF HASH_UNDEF IDENTIFIER IF_EXPANDED INTEGER NEWLINE OTHER SPACE +%token COMMA_FINAL DEFINED ELIF_EXPANDED HASH HASH_DEFINE_FUNC HASH_DEFINE_OBJ HASH_ELIF HASH_ELSE HASH_ENDIF HASH_IF HASH_IFDEF HASH_IFNDEF HASH_UNDEF IDENTIFIER IF_EXPANDED INTEGER NEWLINE OTHER SPACE %token PASTE %type expression INTEGER operator SPACE %type IDENTIFIER OTHER @@ -740,6 +740,9 @@ _token_print (token_t *token) case PASTE: printf ("##"); break; + case COMMA_FINAL: + printf (","); + break; default: fprintf (stderr, "Error: Don't know how to print token type %d\n", token->type); break; @@ -936,7 +939,18 @@ _expand_token_onto (glcpp_parser_t *parser, /* We only expand identifiers */ if (token->type != IDENTIFIER) { - _token_list_append (result, token); + /* We change any COMMA into a COMMA_FINAL to prevent + * it being mistaken for an argument separator + * later. */ + if (token->type == ',') { + token_t *new_token; + + new_token = _token_create_ival (result, COMMA_FINAL, + COMMA_FINAL); + _token_list_append (result, new_token); + } else { + _token_list_append (result, token); + } return 0; } diff --git a/tests/056-macro-argument-with-comma.c b/tests/056-macro-argument-with-comma.c new file mode 100644 index 0000000000..58701d1f25 --- /dev/null +++ b/tests/056-macro-argument-with-comma.c @@ -0,0 +1,4 @@ +#define bar with,embedded,commas +#define function(x) success +#define foo function +foo(bar) -- cgit v1.2.3 From 602a34769a0850a98366c4011ce8b8c7d08c9276 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 27 May 2010 10:14:38 -0700 Subject: Add test 56 for a comma within the expansion of an argument. This case was tricky on the take-2 branch. It happens to be passing already here. --- tests/056-macro-argument-with-comma.c | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 tests/056-macro-argument-with-comma.c diff --git a/tests/056-macro-argument-with-comma.c b/tests/056-macro-argument-with-comma.c new file mode 100644 index 0000000000..58701d1f25 --- /dev/null +++ b/tests/056-macro-argument-with-comma.c @@ -0,0 +1,4 @@ +#define bar with,embedded,commas +#define function(x) success +#define foo function +foo(bar) -- cgit v1.2.3 From a65cf7b1d29e98ef3bf31051df8a06cb394d131f Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 27 May 2010 11:55:36 -0700 Subject: Make two list-processing functions do nothing with an empty list. This just makes these functions easier to understand all around. In the case of _token_list_append_list this is an actual bug fix, (where append an empty list onto a non-empty list would previously scramble the tail pointer of the original list). --- glcpp-parse.y | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index b2684d06d9..ba79a611f6 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -666,6 +666,9 @@ _token_list_append (token_list_t *list, token_t *token) void _token_list_append_list (token_list_t *list, token_list_t *tail) { + if (tail == NULL || tail->head == NULL) + return; + if (list->head == NULL) { list->head = tail->head; } else { @@ -1218,7 +1221,7 @@ _glcpp_parser_expand_token_list_onto (glcpp_parser_t *parser, token_list_t *intermediate, *list_orig = list; int i, need_rescan = 0; - if (list == NULL) + if (list == NULL || list->head == NULL) return; intermediate = _token_list_create (parser); -- cgit v1.2.3 From a19297b26e971e5a9dbe00b4254931505da4b5a9 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 27 May 2010 13:29:19 -0700 Subject: Provide support for empty arguments in macro invocations. For this we always add a new argument to the argument list as soon as possible, without waiting until we see some argument token. This does mean we need to take some extra care when comparing the number of arguments with the number of expected arguments. In addition to matching numbers, we also support one (empty) argument when zero arguments are expected. Add a test case here for this, which does pass. --- glcpp-parse.y | 20 +++++++++++--------- tests/057-empty-arguments.c | 6 ++++++ 2 files changed, 17 insertions(+), 9 deletions(-) create mode 100644 tests/057-empty-arguments.c diff --git a/glcpp-parse.y b/glcpp-parse.y index ba79a611f6..3e0a96528b 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -1044,7 +1044,8 @@ _arguments_parse (argument_list_t *arguments, token_node_t **node_ret) last = node; node = node->next; - argument = NULL; + argument = _token_list_create (arguments); + _argument_list_append (arguments, argument); for (paren_count = 1; node; last = node, node = node->next) { if (node->token->type == '(') @@ -1064,18 +1065,16 @@ _arguments_parse (argument_list_t *arguments, token_node_t **node_ret) if (node->token->type == ',' && paren_count == 1) { - if (argument) - _token_list_trim_trailing_space (argument); - argument = NULL; + _token_list_trim_trailing_space (argument); + argument = _token_list_create (arguments); + _argument_list_append (arguments, argument); } else { - if (argument == NULL) { + if (argument->head == NULL) { /* Don't treat initial whitespace as * part of the arguement. */ if (node->token->type == SPACE) continue; - argument = _token_list_create (arguments); - _argument_list_append (arguments, argument); } _token_list_append (argument, node->token); } @@ -1132,8 +1131,11 @@ _expand_function_onto (glcpp_parser_t *parser, return FUNCTION_STATUS_SUCCESS; } - if (_argument_list_length (arguments) != - _string_list_length (macro->parameters)) + if (! ((_argument_list_length (arguments) == + _string_list_length (macro->parameters)) || + (_string_list_length (macro->parameters) == 0 && + _argument_list_length (arguments) == 1 && + arguments->head->argument->head == NULL))) { fprintf (stderr, "Error: macro %s invoked with %d arguments (expected %d)\n", diff --git a/tests/057-empty-arguments.c b/tests/057-empty-arguments.c new file mode 100644 index 0000000000..6140232865 --- /dev/null +++ b/tests/057-empty-arguments.c @@ -0,0 +1,6 @@ +#define zero() success +zero() +#define one(x) success +one() +#define two(x,y) success +two(,) -- cgit v1.2.3 From fb48fcdf9b5a5b002469ed247809fb0294d6c7a8 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 27 May 2010 13:44:13 -0700 Subject: Add test for macro invocations with empty arguments. This case was recently solved on the take-2 branch. --- tests/057-empty-arguments.c | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 tests/057-empty-arguments.c diff --git a/tests/057-empty-arguments.c b/tests/057-empty-arguments.c new file mode 100644 index 0000000000..6140232865 --- /dev/null +++ b/tests/057-empty-arguments.c @@ -0,0 +1,6 @@ +#define zero() success +zero() +#define one(x) success +one() +#define two(x,y) success +two(,) -- cgit v1.2.3 From 85b50e840d969c4d9ebcfcc3df1df7a95e07e34e Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 27 May 2010 14:01:18 -0700 Subject: Add placeholder tokens to support pasting with empty arguments. Along with a passing test to verify that this works. --- glcpp-parse.y | 36 +++++++++++++++++++++++++------ tests/058-token-pasting-empty-arguments.c | 5 +++++ 2 files changed, 35 insertions(+), 6 deletions(-) create mode 100644 tests/058-token-pasting-empty-arguments.c diff --git a/glcpp-parse.y b/glcpp-parse.y index 3e0a96528b..d587a4bf33 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -132,7 +132,7 @@ glcpp_parser_lex_from (glcpp_parser_t *parser, token_list_t *list); %parse-param {glcpp_parser_t *parser} %lex-param {glcpp_parser_t *parser} -%token COMMA_FINAL DEFINED ELIF_EXPANDED HASH HASH_DEFINE_FUNC HASH_DEFINE_OBJ HASH_ELIF HASH_ELSE HASH_ENDIF HASH_IF HASH_IFDEF HASH_IFNDEF HASH_UNDEF IDENTIFIER IF_EXPANDED INTEGER NEWLINE OTHER SPACE +%token COMMA_FINAL DEFINED ELIF_EXPANDED HASH HASH_DEFINE_FUNC HASH_DEFINE_OBJ HASH_ELIF HASH_ELSE HASH_ENDIF HASH_IF HASH_IFDEF HASH_IFNDEF HASH_UNDEF IDENTIFIER IF_EXPANDED INTEGER NEWLINE OTHER PLACEHOLDER SPACE %token PASTE %type expression INTEGER operator SPACE %type IDENTIFIER OTHER @@ -746,6 +746,9 @@ _token_print (token_t *token) case COMMA_FINAL: printf (","); break; + case PLACEHOLDER: + /* Nothing to print. */ + break; default: fprintf (stderr, "Error: Don't know how to print token type %d\n", token->type); break; @@ -756,6 +759,17 @@ _token_print (token_t *token) static void _token_paste (token_t *token, token_t *other) { + /* Pasting a placeholder onto anything makes no change. */ + if (other->type == PLACEHOLDER) + return; + + /* When 'token' is a placeholder, just return contents of 'other'. */ + if (token->type == PLACEHOLDER) { + token->type = other->type; + token->value = other->value; + return; + } + /* A very few single-character punctuators can be combined * with another to form a multi-character punctuator. */ switch (token->type) { @@ -1159,10 +1173,20 @@ _expand_function_onto (glcpp_parser_t *parser, argument = _argument_list_member_at (arguments, parameter_index); /* Before substituting, we expand the argument - * tokens. */ - _glcpp_parser_expand_token_list_onto (parser, - argument, - substituted); + * tokens, or append a placeholder token for + * an empty argument. */ + if (argument->head) { + _glcpp_parser_expand_token_list_onto (parser, + argument, + substituted); + } else { + token_t *new_token; + + new_token = _token_create_ival (substituted, + PLACEHOLDER, + PLACEHOLDER); + _token_list_append (substituted, new_token); + } } else { _token_list_append (substituted, node->token); } @@ -1196,7 +1220,7 @@ _expand_function_onto (glcpp_parser_t *parser, if (next_non_space == NULL) { fprintf (stderr, "Error: '##' cannot appear at either end of a macro expansion\n"); - exit (1); + return FUNCTION_STATUS_SUCCESS; } _token_paste (node->token, next_non_space->token); diff --git a/tests/058-token-pasting-empty-arguments.c b/tests/058-token-pasting-empty-arguments.c new file mode 100644 index 0000000000..8ac260c76b --- /dev/null +++ b/tests/058-token-pasting-empty-arguments.c @@ -0,0 +1,5 @@ +#define paste(x,y) x ## y +paste(a,b) +paste(a,) +paste(,b) +paste(,) -- cgit v1.2.3 From 050e3ded1ea05cfe336dd0cd20212d17d7960c9e Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 27 May 2010 14:36:29 -0700 Subject: Implement token pasting of integers. To do this correctly, we change the lexer to lex integers as string values, (new token type of INTEGER_STRING), and only convert to integer values when evaluating an expression value. Add a new test case for this, (which does pass now). --- Makefile | 2 +- glcpp-lex.l | 12 ++++++------ glcpp-parse.y | 32 ++++++++++++++++++++++---------- tests/059-token-pasting-integer.c | 4 ++++ 4 files changed, 33 insertions(+), 17 deletions(-) create mode 100644 tests/059-token-pasting-integer.c diff --git a/Makefile b/Makefile index 88116128f8..0c06aa880f 100644 --- a/Makefile +++ b/Makefile @@ -7,7 +7,7 @@ CFLAGS = -g override CFLAGS += -Wall -Wextra -Wwrite-strings -Wswitch-enum -Wno-unused glcpp: glcpp.o glcpp-lex.o glcpp-parse.o hash_table.o xtalloc.o - gcc -o $@ -ltalloc $^ + gcc -o $@ -ltalloc -lm $^ %.c %.h: %.y bison --debug --defines=$*.h --output=$*.c $^ diff --git a/glcpp-lex.l b/glcpp-lex.l index d6b7726d36..70d47d2497 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -88,18 +88,18 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? } {DECIMAL_INTEGER} { - yylval.ival = strtoll (yytext, NULL, 10); - return INTEGER; + yylval.str = xtalloc_strdup (yyextra, yytext); + return INTEGER_STRING; } {OCTAL_INTEGER} { - yylval.ival = strtoll (yytext + 1, NULL, 8); - return INTEGER; + yylval.str = xtalloc_strdup (yyextra, yytext); + return INTEGER_STRING; } {HEXADECIMAL_INTEGER} { - yylval.ival = strtoll (yytext + 2, NULL, 16); - return INTEGER; + yylval.str = xtalloc_strdup (yyextra, yytext); + return INTEGER_STRING; } "<<" { diff --git a/glcpp-parse.y b/glcpp-parse.y index d587a4bf33..5b2d0d3927 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -132,10 +132,10 @@ glcpp_parser_lex_from (glcpp_parser_t *parser, token_list_t *list); %parse-param {glcpp_parser_t *parser} %lex-param {glcpp_parser_t *parser} -%token COMMA_FINAL DEFINED ELIF_EXPANDED HASH HASH_DEFINE_FUNC HASH_DEFINE_OBJ HASH_ELIF HASH_ELSE HASH_ENDIF HASH_IF HASH_IFDEF HASH_IFNDEF HASH_UNDEF IDENTIFIER IF_EXPANDED INTEGER NEWLINE OTHER PLACEHOLDER SPACE +%token COMMA_FINAL DEFINED ELIF_EXPANDED HASH HASH_DEFINE_FUNC HASH_DEFINE_OBJ HASH_ELIF HASH_ELSE HASH_ENDIF HASH_IF HASH_IFDEF HASH_IFNDEF HASH_UNDEF IDENTIFIER IF_EXPANDED INTEGER INTEGER_STRING NEWLINE OTHER PLACEHOLDER SPACE %token PASTE %type expression INTEGER operator SPACE -%type IDENTIFIER OTHER +%type IDENTIFIER INTEGER_STRING OTHER %type identifier_list %type preprocessing_token %type pp_tokens replacement_list text_line @@ -253,7 +253,16 @@ control_line: ; expression: - INTEGER { + INTEGER_STRING { + if (strlen ($1) >= 3 && strncmp ($1, "0x", 2) == 0) { + $$ = strtoll ($1 + 2, NULL, 16); + } else if ($1[0] == '0') { + $$ = strtoll ($1, NULL, 8); + } else { + $$ = strtoll ($1, NULL, 10); + } + } +| INTEGER { $$ = $1; } | expression OR expression { @@ -372,8 +381,8 @@ preprocessing_token: IDENTIFIER { $$ = _token_create_str (parser, IDENTIFIER, $1); } -| INTEGER { - $$ = _token_create_ival (parser, INTEGER, $1); +| INTEGER_STRING { + $$ = _token_create_str (parser, INTEGER_STRING, $1); } | operator { $$ = _token_create_ival (parser, $1, $1); @@ -710,6 +719,7 @@ _token_print (token_t *token) printf ("%" PRIxMAX, token->value.ival); break; case IDENTIFIER: + case INTEGER_STRING: case OTHER: printf ("%s", token->value.str); break; @@ -828,11 +838,13 @@ _token_paste (token_t *token, token_t *other) /* Two string-valued tokens can usually just be mashed * together. * - * XXX: Since our 'OTHER' case is currently so loose, this may - * allow some things thruogh that should be treated as - * errors. */ - if ((token->type == IDENTIFIER || token->type == OTHER) && - (other->type == IDENTIFIER || other->type == OTHER)) + * XXX: This isn't actually legitimate. Several things here + * should result in a diagnostic since the result cannot be a + * valid, single pre-processing token. For example, pasting + * "123" and "abc" is not legal, but we don't catch that + * here. */ + if ((token->type == IDENTIFIER || token->type == OTHER || token->type == INTEGER_STRING) && + (other->type == IDENTIFIER || other->type == OTHER || other->type == INTEGER_STRING)) { token->value.str = talloc_strdup_append (token->value.str, other->value.str); diff --git a/tests/059-token-pasting-integer.c b/tests/059-token-pasting-integer.c new file mode 100644 index 0000000000..37b895a423 --- /dev/null +++ b/tests/059-token-pasting-integer.c @@ -0,0 +1,4 @@ +#define paste(x,y) x ## y +paste(1,2) +paste(1,000) +paste(identifier,2) -- cgit v1.2.3 From 886e05a35a319cdace9afed93d0cc8df2c7f33e0 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 27 May 2010 14:45:20 -0700 Subject: Add test for token-pasting of integers. This test was tricky to make pass in the take-2 branch. It ends up passing already here with no additional effort, (since we are lexing integers as string-valued token except when in the ST_IF state in the lexer anyway). --- tests/059-token-pasting-integer.c | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 tests/059-token-pasting-integer.c diff --git a/tests/059-token-pasting-integer.c b/tests/059-token-pasting-integer.c new file mode 100644 index 0000000000..37b895a423 --- /dev/null +++ b/tests/059-token-pasting-integer.c @@ -0,0 +1,4 @@ +#define paste(x,y) x ## y +paste(1,2) +paste(1,000) +paste(identifier,2) -- cgit v1.2.3 From baa17c87485b5e776ec142844f5df38a3df9dccc Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 27 May 2010 14:53:51 -0700 Subject: Remove blank lines from output files before comparing. Recently I'm seeing cases where "gcc -E" mysteriously omits blank lines, (even though it prints the blank lines in other very similar cases). Rather than trying to decipher and imitate this, just get rid of the blank lines. This approach with sed to kill the lines before the diff is better than "diff -B" since when there is an actual difference, the presence of blank lines won't make the diff harder to read. --- .gitignore | 1 + tests/glcpp-test | 7 ++++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index d67bd38c93..b88f0cc75c 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,5 @@ glcpp-parse.h *~ tests/*.expected tests/*.gcc +tests/*.glcpp tests/*.out diff --git a/tests/glcpp-test b/tests/glcpp-test index bf88d4462e..92c994979a 100755 --- a/tests/glcpp-test +++ b/tests/glcpp-test @@ -3,8 +3,9 @@ set -e for test in *.c; do echo "Testing $test" - ../glcpp < $test > $test.out + ../glcpp < $test > $test.glcpp + grep -v '^$' < $test.glcpp > $test.out || true gcc -E $test -o $test.gcc - grep -v '^#' < $test.gcc > $test.expected - diff -B -u $test.expected $test.out + grep -v '^#' < $test.gcc | grep -v '^$' > $test.expected || true + diff -u $test.expected $test.out done -- cgit v1.2.3 From 95ec433d59be234cf2695ae091cee4ace3314d21 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Fri, 28 May 2010 08:00:43 -0700 Subject: Revert "Add support for an object-to-function chain with the parens in the content." This reverts commit 7db2402a8009772a3f10d19cfc7f30be9ee79295 It doesn't revert the new test case from that commit, just the extremely ugly second-pass implementation. --- glcpp-parse.y | 65 +++++++++++++++-------------------------------------------- 1 file changed, 16 insertions(+), 49 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index 5b2d0d3927..f4cb72a133 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -958,9 +958,9 @@ _glcpp_parser_evaluate_defined (glcpp_parser_t *parser, * needs further expansion. */ static int -_expand_token_onto (glcpp_parser_t *parser, - token_t *token, - token_list_t *result) +_glcpp_parser_expand_token_onto (glcpp_parser_t *parser, + token_t *token, + token_list_t *result) { const char *identifier; macro_t *macro; @@ -1117,10 +1117,10 @@ _arguments_parse (argument_list_t *arguments, token_node_t **node_ret) /* Prints the expansion of *node (consuming further tokens from the * list as necessary). Upon return *node will be the last consumed * node, such that further processing can continue with node->next. */ -static function_status_t -_expand_function_onto (glcpp_parser_t *parser, - token_node_t **node_ret, - token_list_t *result) +static void +_glcpp_parser_expand_function_onto (glcpp_parser_t *parser, + token_node_t **node_ret, + token_list_t *result) { macro_t *macro; token_node_t *node; @@ -1145,7 +1145,7 @@ _expand_function_onto (glcpp_parser_t *parser, break; case FUNCTION_NOT_A_FUNCTION: _token_list_append (result, node->token); - return FUNCTION_NOT_A_FUNCTION; + return; case FUNCTION_UNBALANCED_PARENTHESES: fprintf (stderr, "Error: Macro %s call has unbalanced parentheses\n", identifier); @@ -1154,7 +1154,7 @@ _expand_function_onto (glcpp_parser_t *parser, if (macro->replacements == NULL) { talloc_free (arguments); - return FUNCTION_STATUS_SUCCESS; + return; } if (! ((_argument_list_length (arguments) == @@ -1168,7 +1168,7 @@ _expand_function_onto (glcpp_parser_t *parser, identifier, _argument_list_length (arguments), _string_list_length (macro->parameters)); - exit (1); + return; } /* Perform argument substitution on the replacement list. */ @@ -1246,8 +1246,6 @@ _expand_function_onto (glcpp_parser_t *parser, _string_list_pop (parser->active); talloc_free (arguments); - - return FUNCTION_STATUS_SUCCESS; } static void @@ -1256,50 +1254,19 @@ _glcpp_parser_expand_token_list_onto (glcpp_parser_t *parser, token_list_t *result) { token_node_t *node; - token_list_t *intermediate, *list_orig = list; - int i, need_rescan = 0; if (list == NULL || list->head == NULL) return; - intermediate = _token_list_create (parser); - - /* XXX: The two-pass expansion here is really ugly. The - * problem this is solving is that we can expand a macro into - * a function-like macro name, and then we need to recognize - * that as a function-like macro, but perhaps the parentheses - * and arguments aren't on the token list yet, (since they are - * in the actual content so they are part of what we are - * expanding. - * - * This ugly hack works, but is messy, fragile, and hard to - * maintain. I think a cleaner solution would separate the - * notions of expanding and appending and avoid this problem - * altogether. - */ - - for (i = 0; i < 2; i++) { - if (i == 1) { - list = intermediate; - intermediate = _token_list_create (parser); - } - for (node = list->head; node; node = node->next) + for (node = list->head; node; node = node->next) + { + if (_glcpp_parser_expand_token_onto (parser, node->token, + result)) { - if (_expand_token_onto (parser, node->token, - intermediate)) - { - if (_expand_function_onto (parser, &node, - intermediate)) - { - need_rescan = 1; - } - } + _glcpp_parser_expand_function_onto (parser, &node, + result); } - if (list != list_orig) - talloc_free (list); } - - _token_list_append_list (result, intermediate); } void -- cgit v1.2.3 From 9b519f9c7997e0ec02c66d39edc12912aebb9eca Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Fri, 28 May 2010 08:04:13 -0700 Subject: Stop interrupting the test suite at the first failure. This behavior was useful when starting the implementation over ("take-2") where the whole test suite was failing. This made it easy to focus on one test at a time and get each working. More recently, we got the whole suite working, so we don't need this feature anymore. And in the previous commit, we regressed a couple of tests, so it's nice to be able to see all the failures with a single run of the suite. --- tests/glcpp-test | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/glcpp-test b/tests/glcpp-test index 92c994979a..ba398af0d5 100755 --- a/tests/glcpp-test +++ b/tests/glcpp-test @@ -1,5 +1,4 @@ #!/bin/sh -set -e for test in *.c; do echo "Testing $test" -- cgit v1.2.3 From 3c93d397050bbeccb7809e53a425c860df947c45 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Fri, 28 May 2010 08:17:46 -0700 Subject: Simplify calling conventions of functions under expand_token_list_onto. We previously had a confusing thing where _expand_token_onto would return a non-zero value to indicate that the caller should then call _expand_function_onto. It's much cleaner for _expand_token_onto to just do what's needed and call the necessary function. --- glcpp-parse.y | 159 +++++++++++++++++++++++++++------------------------------- 1 file changed, 74 insertions(+), 85 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index f4cb72a133..9f97b2a282 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -948,81 +948,6 @@ _glcpp_parser_evaluate_defined (glcpp_parser_t *parser, } } - -/* Appends onto 'expansion' a non-macro token or the expansion of an - * object-like macro. - * - * Returns 0 if this token is completely processed. - * - * Returns 1 in the case that 'token' is a function-like macro that - * needs further expansion. - */ -static int -_glcpp_parser_expand_token_onto (glcpp_parser_t *parser, - token_t *token, - token_list_t *result) -{ - const char *identifier; - macro_t *macro; - token_list_t *expansion; - - /* We only expand identifiers */ - if (token->type != IDENTIFIER) { - /* We change any COMMA into a COMMA_FINAL to prevent - * it being mistaken for an argument separator - * later. */ - if (token->type == ',') { - token_t *new_token; - - new_token = _token_create_ival (result, COMMA_FINAL, - COMMA_FINAL); - _token_list_append (result, new_token); - } else { - _token_list_append (result, token); - } - return 0; - } - - /* Look up this identifier in the hash table. */ - identifier = token->value.str; - macro = hash_table_find (parser->defines, identifier); - - /* Not a macro, so just append. */ - if (macro == NULL) { - _token_list_append (result, token); - return 0; - } - - /* Finally, don't expand this macro if we're already actively - * expanding it, (to avoid infinite recursion). */ - if (_string_list_contains (parser->active, identifier, NULL)) - { - /* We change the token type here from IDENTIFIER to - * OTHER to prevent any future expansion of this - * unexpanded token. */ - char *str; - token_t *new_token; - - str = xtalloc_strdup (result, token->value.str); - new_token = _token_create_str (result, OTHER, str); - _token_list_append (result, new_token); - return 0; - } - - /* For function-like macros return 1 for further processing. */ - if (macro->is_function) { - return 1; - } - - _string_list_push (parser->active, identifier); - _glcpp_parser_expand_token_list_onto (parser, - macro->replacements, - result); - _string_list_pop (parser->active); - - return 0; -} - typedef enum function_status { FUNCTION_STATUS_SUCCESS, @@ -1114,9 +1039,10 @@ _arguments_parse (argument_list_t *arguments, token_node_t **node_ret) return FUNCTION_STATUS_SUCCESS; } -/* Prints the expansion of *node (consuming further tokens from the - * list as necessary). Upon return *node will be the last consumed - * node, such that further processing can continue with node->next. */ +/* Appends expansion of *node (consuming further tokens from the list + * as necessary) onto result. Upon return *node will be the last + * consumed node, such that further processing can continue with + * node->next. */ static void _glcpp_parser_expand_function_onto (glcpp_parser_t *parser, token_node_t **node_ret, @@ -1232,7 +1158,7 @@ _glcpp_parser_expand_function_onto (glcpp_parser_t *parser, if (next_non_space == NULL) { fprintf (stderr, "Error: '##' cannot appear at either end of a macro expansion\n"); - return FUNCTION_STATUS_SUCCESS; + return; } _token_paste (node->token, next_non_space->token); @@ -1248,6 +1174,74 @@ _glcpp_parser_expand_function_onto (glcpp_parser_t *parser, talloc_free (arguments); } + +/* Appends the expansion of the token in *node onto result. + * Upon return *node will be the last consumed node, such that further + * processing can continue with node->next. */ +static void +_glcpp_parser_expand_token_onto (glcpp_parser_t *parser, + token_node_t **node, + token_list_t *result) +{ + token_t *token = (*node)->token; + const char *identifier; + macro_t *macro; + token_list_t *expansion; + + /* We only expand identifiers */ + if (token->type != IDENTIFIER) { + /* We change any COMMA into a COMMA_FINAL to prevent + * it being mistaken for an argument separator + * later. */ + if (token->type == ',') { + token_t *new_token; + + new_token = _token_create_ival (result, COMMA_FINAL, + COMMA_FINAL); + _token_list_append (result, new_token); + } else { + _token_list_append (result, token); + } + return; + } + + /* Look up this identifier in the hash table. */ + identifier = token->value.str; + macro = hash_table_find (parser->defines, identifier); + + /* Not a macro, so just append. */ + if (macro == NULL) { + _token_list_append (result, token); + return; + } + + /* Finally, don't expand this macro if we're already actively + * expanding it, (to avoid infinite recursion). */ + if (_string_list_contains (parser->active, identifier, NULL)) + { + /* We change the token type here from IDENTIFIER to + * OTHER to prevent any future expansion of this + * unexpanded token. */ + char *str; + token_t *new_token; + + str = xtalloc_strdup (result, token->value.str); + new_token = _token_create_str (result, OTHER, str); + _token_list_append (result, new_token); + return; + } + + if (macro->is_function) { + _glcpp_parser_expand_function_onto (parser, node, result); + } else { + _string_list_push (parser->active, identifier); + _glcpp_parser_expand_token_list_onto (parser, + macro->replacements, + result); + _string_list_pop (parser->active); + } +} + static void _glcpp_parser_expand_token_list_onto (glcpp_parser_t *parser, token_list_t *list, @@ -1260,12 +1254,7 @@ _glcpp_parser_expand_token_list_onto (glcpp_parser_t *parser, for (node = list->head; node; node = node->next) { - if (_glcpp_parser_expand_token_onto (parser, node->token, - result)) - { - _glcpp_parser_expand_function_onto (parser, &node, - result); - } + _glcpp_parser_expand_token_onto (parser, &node, result); } } -- cgit v1.2.3 From 681afbc855c86df8c3521ccdfadb7f16b9729baa Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Fri, 28 May 2010 15:06:02 -0700 Subject: Perform macro by replacing tokens in original list. We take the results of macro expansion and splice them into the original token list over which we are iterating. This makes it easy for function-like macro invocations to find their arguments since they are simply subsequent tokens on the list. This fixes the recently-introduced regressions (tests 55 and 56) and also passes new tests 60 and 61 introduced to strees this feature, (with macro-argument parentheses split between a macro value and the textual input). --- glcpp-parse.y | 278 +++++++++++++-------- .../060-left-paren-in-macro-right-paren-in-text.c | 3 + tests/061-define-chain-obj-to-func-multi.c | 5 + 3 files changed, 187 insertions(+), 99 deletions(-) create mode 100644 tests/060-left-paren-in-macro-right-paren-in-text.c create mode 100644 tests/061-define-chain-obj-to-func-multi.c diff --git a/glcpp-parse.y b/glcpp-parse.y index 9f97b2a282..c89d7bf159 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -101,13 +101,12 @@ _glcpp_parser_evaluate_defined (glcpp_parser_t *parser, token_list_t *list); static void -_glcpp_parser_print_expanded_token_list (glcpp_parser_t *parser, - token_list_t *list); +_glcpp_parser_expand_token_list (glcpp_parser_t *parser, + token_list_t *list); static void -_glcpp_parser_expand_token_list_onto (glcpp_parser_t *parser, - token_list_t *list, - token_list_t *result); +_glcpp_parser_print_expanded_token_list (glcpp_parser_t *parser, + token_list_t *list); static void _glcpp_parser_skip_stack_push_if (glcpp_parser_t *parser, int condition); @@ -218,7 +217,8 @@ control_line: _token_list_append (expanded, token); talloc_unlink (parser, token); _glcpp_parser_evaluate_defined (parser, $2); - _glcpp_parser_expand_token_list_onto (parser, $2, expanded); + _glcpp_parser_expand_token_list (parser, $2); + _token_list_append_list (expanded, $2); glcpp_parser_lex_from (parser, expanded); } | HASH_IFDEF IDENTIFIER NEWLINE { @@ -240,7 +240,8 @@ control_line: _token_list_append (expanded, token); talloc_unlink (parser, token); _glcpp_parser_evaluate_defined (parser, $2); - _glcpp_parser_expand_token_list_onto (parser, $2, expanded); + _glcpp_parser_expand_token_list (parser, $2); + _token_list_append_list (expanded, $2); glcpp_parser_lex_from (parser, expanded); } | HASH_ELSE NEWLINE { @@ -688,6 +689,22 @@ _token_list_append_list (token_list_t *list, token_list_t *tail) list->non_space_tail = tail->non_space_tail; } +token_list_t * +_token_list_copy (void *ctx, token_list_t *other) +{ + token_list_t *copy; + token_node_t *node; + + if (other == NULL) + return NULL; + + copy = _token_list_create (ctx); + for (node = other->head; node; node = node->next) + _token_list_append (copy, node->token); + + return copy; +} + void _token_list_trim_trailing_space (token_list_t *list) { @@ -956,9 +973,12 @@ typedef enum function_status } function_status_t; /* Find a set of function-like macro arguments by looking for a - * balanced set of parentheses. Upon return *node will be the last - * consumed node, such that further processing can continue with - * node->next. + * balanced set of parentheses. + * + * When called, 'node' should be the opening-parenthesis token, (or + * perhaps preceeding SPACE tokens). Upon successful return *last will + * be the last consumed node, (corresponding to the closing right + * parenthesis). * * Return values: * @@ -976,13 +996,13 @@ typedef enum function_status * Macro name is not followed by a balanced set of parentheses. */ static function_status_t -_arguments_parse (argument_list_t *arguments, token_node_t **node_ret) +_arguments_parse (argument_list_t *arguments, + token_node_t *node, + token_node_t **last) { token_list_t *argument; - token_node_t *node = *node_ret, *last; int paren_count; - last = node; node = node->next; /* Ignore whitespace before first parenthesis. */ @@ -992,13 +1012,12 @@ _arguments_parse (argument_list_t *arguments, token_node_t **node_ret) if (node == NULL || node->token->type != '(') return FUNCTION_NOT_A_FUNCTION; - last = node; node = node->next; argument = _token_list_create (arguments); _argument_list_append (arguments, argument); - for (paren_count = 1; node; last = node, node = node->next) { + for (paren_count = 1; node; node = node->next) { if (node->token->type == '(') { paren_count++; @@ -1006,11 +1025,8 @@ _arguments_parse (argument_list_t *arguments, token_node_t **node_ret) else if (node->token->type == ')') { paren_count--; - if (paren_count == 0) { - last = node; - node = node->next; + if (paren_count == 0) break; - } } if (node->token->type == ',' && @@ -1031,32 +1047,44 @@ _arguments_parse (argument_list_t *arguments, token_node_t **node_ret) } } - if (node && paren_count) + if (paren_count) return FUNCTION_UNBALANCED_PARENTHESES; - *node_ret = last; + *last = node; return FUNCTION_STATUS_SUCCESS; } -/* Appends expansion of *node (consuming further tokens from the list - * as necessary) onto result. Upon return *node will be the last - * consumed node, such that further processing can continue with - * node->next. */ -static void -_glcpp_parser_expand_function_onto (glcpp_parser_t *parser, - token_node_t **node_ret, - token_list_t *result) +/* This is a helper function that's essentially part of the + * implementation of _glcpp_parser_expand_node. It shouldn't be called + * except for by that function. + * + * Returns NULL if node is a simple token with no expansion, (that is, + * although 'node' corresponds to an identifier defined as a + * function-like macro, it is not followed with a parenthesized + * argument list). + * + * Compute the complete expansion of node (which is a function-like + * macro) and subsequent nodes which are arguments. + * + * Returns the token list that results from the expansion and sets + * *last to the last node in the list that was consumed by the + * expansion. Specificallty, *last will be set as follows: as the + * token of the closing right parenthesis. + */ +static token_list_t * +_glcpp_parser_expand_function (glcpp_parser_t *parser, + token_node_t *node, + token_node_t **last) + { macro_t *macro; - token_node_t *node; const char *identifier; argument_list_t *arguments; function_status_t status; token_list_t *substituted; int parameter_index; - node = *node_ret; identifier = node->token->value.str; macro = hash_table_find (parser->defines, identifier); @@ -1064,23 +1092,20 @@ _glcpp_parser_expand_function_onto (glcpp_parser_t *parser, assert (macro->is_function); arguments = _argument_list_create (parser); - status = _arguments_parse (arguments, node_ret); + status = _arguments_parse (arguments, node, last); switch (status) { case FUNCTION_STATUS_SUCCESS: break; case FUNCTION_NOT_A_FUNCTION: - _token_list_append (result, node->token); - return; + return NULL; case FUNCTION_UNBALANCED_PARENTHESES: - fprintf (stderr, "Error: Macro %s call has unbalanced parentheses\n", - identifier); - exit (1); + return NULL; } if (macro->replacements == NULL) { talloc_free (arguments); - return; + return _token_list_create (parser); } if (! ((_argument_list_length (arguments) == @@ -1094,7 +1119,7 @@ _glcpp_parser_expand_function_onto (glcpp_parser_t *parser, identifier, _argument_list_length (arguments), _string_list_length (macro->parameters)); - return; + return NULL; } /* Perform argument substitution on the replacement list. */ @@ -1114,9 +1139,9 @@ _glcpp_parser_expand_function_onto (glcpp_parser_t *parser, * tokens, or append a placeholder token for * an empty argument. */ if (argument->head) { - _glcpp_parser_expand_token_list_onto (parser, - argument, - substituted); + _glcpp_parser_expand_token_list (parser, + argument); + _token_list_append_list (substituted, argument); } else { token_t *new_token; @@ -1158,7 +1183,7 @@ _glcpp_parser_expand_function_onto (glcpp_parser_t *parser, if (next_non_space == NULL) { fprintf (stderr, "Error: '##' cannot appear at either end of a macro expansion\n"); - return; + return NULL; } _token_paste (node->token, next_non_space->token); @@ -1168,22 +1193,33 @@ _glcpp_parser_expand_function_onto (glcpp_parser_t *parser, } _string_list_push (parser->active, identifier); - _glcpp_parser_expand_token_list_onto (parser, substituted, result); + _glcpp_parser_expand_token_list (parser, substituted); _string_list_pop (parser->active); - talloc_free (arguments); + return substituted; } - -/* Appends the expansion of the token in *node onto result. - * Upon return *node will be the last consumed node, such that further - * processing can continue with node->next. */ -static void -_glcpp_parser_expand_token_onto (glcpp_parser_t *parser, - token_node_t **node, - token_list_t *result) +/* Compute the complete expansion of node, (and subsequent nodes after + * 'node' in the case that 'node' is a function-like macro and + * subsequent nodes are arguments). + * + * Returns NULL if node is a simple token with no expansion. + * + * Otherwise, returns the token list that results from the expansion + * and sets *last to the last node in the list that was consumed by + * the expansion. Specificallty, *last will be set as follows: + * + * As 'node' in the case of object-like macro expansion. + * + * As the token of the closing right parenthesis in the case of + * function-like macro expansion. + */ +static token_list_t * +_glcpp_parser_expand_node (glcpp_parser_t *parser, + token_node_t *node, + token_node_t **last) { - token_t *token = (*node)->token; + token_t *token = node->token; const char *identifier; macro_t *macro; token_list_t *expansion; @@ -1194,90 +1230,134 @@ _glcpp_parser_expand_token_onto (glcpp_parser_t *parser, * it being mistaken for an argument separator * later. */ if (token->type == ',') { - token_t *new_token; - - new_token = _token_create_ival (result, COMMA_FINAL, - COMMA_FINAL); - _token_list_append (result, new_token); - } else { - _token_list_append (result, token); + token->type = COMMA_FINAL; + token->value.ival = COMMA_FINAL; } - return; + + return NULL; } /* Look up this identifier in the hash table. */ identifier = token->value.str; macro = hash_table_find (parser->defines, identifier); - /* Not a macro, so just append. */ - if (macro == NULL) { - _token_list_append (result, token); - return; - } + /* Not a macro, so no expansion needed. */ + if (macro == NULL) + return NULL; /* Finally, don't expand this macro if we're already actively * expanding it, (to avoid infinite recursion). */ - if (_string_list_contains (parser->active, identifier, NULL)) - { + if (_string_list_contains (parser->active, identifier, NULL)) { /* We change the token type here from IDENTIFIER to * OTHER to prevent any future expansion of this * unexpanded token. */ char *str; - token_t *new_token; + token_list_t *expansion; + token_t *final; - str = xtalloc_strdup (result, token->value.str); - new_token = _token_create_str (result, OTHER, str); - _token_list_append (result, new_token); - return; + str = xtalloc_strdup (parser, token->value.str); + final = _token_create_str (parser, OTHER, str); + expansion = _token_list_create (parser); + _token_list_append (expansion, final); + *last = node; + return expansion; } - if (macro->is_function) { - _glcpp_parser_expand_function_onto (parser, node, result); - } else { + if (! macro->is_function) + { + *last = node; + + if (macro->replacements == NULL) + return _token_list_create (parser); + + expansion = _token_list_copy (parser, macro->replacements); + _string_list_push (parser->active, identifier); - _glcpp_parser_expand_token_list_onto (parser, - macro->replacements, - result); + _glcpp_parser_expand_token_list (parser, expansion); _string_list_pop (parser->active); + + return expansion; } + + return _glcpp_parser_expand_function (parser, node, last); } +/* Walk over the token list replacing nodes with their expansion. + * Whenever nodes are expanded the walking will walk over the new + * nodes, continuing to expand as necessary. The results are placed in + * 'list' itself; + */ static void -_glcpp_parser_expand_token_list_onto (glcpp_parser_t *parser, - token_list_t *list, - token_list_t *result) +_glcpp_parser_expand_token_list (glcpp_parser_t *parser, + token_list_t *list) { - token_node_t *node; + token_node_t *node_prev; + token_node_t *node, *last; + token_list_t *expansion; - if (list == NULL || list->head == NULL) + if (list == NULL) return; - for (node = list->head; node; node = node->next) - { - _glcpp_parser_expand_token_onto (parser, &node, result); + _token_list_trim_trailing_space (list); + + node_prev = NULL; + node = list->head; + + while (node) { + /* Find the expansion for node, which will replace all + * nodes from node to last, inclusive. */ + expansion = _glcpp_parser_expand_node (parser, node, &last); + if (expansion) { + /* Splice expansion into list, supporting a + * simple deletion if the expansion is + * empty. */ + if (expansion->head) { + if (node_prev) + node_prev->next = expansion->head; + else + list->head = expansion->head; + expansion->tail->next = last->next; + if (last == list->tail) + list->tail = expansion->tail; + } else { + if (node_prev) + node_prev->next = last->next; + else + list->head = last->next; + if (last == list->tail) + list->tail == NULL; + } + } else { + node_prev = node; + } + node = node_prev ? node_prev->next : list->head; } + + list->non_space_tail = list->tail; +} + +static void +_glcpp_parser_expand_token_list_onto (glcpp_parser_t *parser, + token_list_t *list, + token_list_t *result) +{ + _glcpp_parser_expand_token_list (parser, list); + + _token_list_append_list (result, list); } void _glcpp_parser_print_expanded_token_list (glcpp_parser_t *parser, token_list_t *list) { - token_list_t *expanded; - token_node_t *node; - function_status_t function_status; - if (list == NULL) return; - expanded = _token_list_create (parser); - - _glcpp_parser_expand_token_list_onto (parser, list, expanded); - - _token_list_trim_trailing_space (expanded); + _glcpp_parser_expand_token_list (parser, list); - _token_list_print (expanded); + _token_list_trim_trailing_space (list); - talloc_free (expanded); + _token_list_print (list); } void diff --git a/tests/060-left-paren-in-macro-right-paren-in-text.c b/tests/060-left-paren-in-macro-right-paren-in-text.c new file mode 100644 index 0000000000..ed80ea879c --- /dev/null +++ b/tests/060-left-paren-in-macro-right-paren-in-text.c @@ -0,0 +1,3 @@ +#define double(a) a*2 +#define foo double( +foo 5) diff --git a/tests/061-define-chain-obj-to-func-multi.c b/tests/061-define-chain-obj-to-func-multi.c new file mode 100644 index 0000000000..6dbfd1f62d --- /dev/null +++ b/tests/061-define-chain-obj-to-func-multi.c @@ -0,0 +1,5 @@ +#define foo(x) success +#define bar foo +#define baz bar +#define joe baz +joe (failure) -- cgit v1.2.3 From c7144dc2e0175a8f4922f261d75437b984039a8c Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Fri, 28 May 2010 15:12:36 -0700 Subject: Remove some blank lines from the end of some test cases. To match what we have done on the take-2 branch to these test cases. --- tests/049-if-expression-precedence.c | 1 - tests/050-if-defined.c | 2 -- tests/053-if-divide-and-shift.c | 1 - 3 files changed, 4 deletions(-) diff --git a/tests/049-if-expression-precedence.c b/tests/049-if-expression-precedence.c index cea935220f..833ea03882 100644 --- a/tests/049-if-expression-precedence.c +++ b/tests/049-if-expression-precedence.c @@ -3,4 +3,3 @@ failure with operator precedence #else success #endif - diff --git a/tests/050-if-defined.c b/tests/050-if-defined.c index 9838cc747d..34f0f95140 100644 --- a/tests/050-if-defined.c +++ b/tests/050-if-defined.c @@ -15,5 +15,3 @@ failure_3 #else success_3 #endif - - diff --git a/tests/053-if-divide-and-shift.c b/tests/053-if-divide-and-shift.c index ddc1573ab2..d24c54a88d 100644 --- a/tests/053-if-divide-and-shift.c +++ b/tests/053-if-divide-and-shift.c @@ -13,4 +13,3 @@ failure_3 #else success_3 #endif - -- cgit v1.2.3 From 792bdcbeee770b14dc833261e7ef3c1d400e5e3f Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Fri, 28 May 2010 15:13:11 -0700 Subject: Tweak test 25 slightly, (so the non-macro doesn't end the file). This isn't a problem here, but on the take-2 branch, it was trickier at one point to make a non-macro work when the last token of the file. So we use the simpler test case here and defer the other case until later. --- tests/025-func-macro-as-non-macro.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/025-func-macro-as-non-macro.c b/tests/025-func-macro-as-non-macro.c index 3dbe026d9d..b433671d1b 100644 --- a/tests/025-func-macro-as-non-macro.c +++ b/tests/025-func-macro-as-non-macro.c @@ -1,2 +1,2 @@ #define foo(bar) bar -foo +foo bar -- cgit v1.2.3 From b1249f69fd687441632c2d2e63618627ae9be442 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Fri, 28 May 2010 15:15:00 -0700 Subject: Add two (passing) tests from the take-2 branch. These two tests were tricky to make work on take-2, but happen to already eb working here. --- tests/000-content-with-spaces.c | 1 + tests/061-define-chain-obj-to-func-multi.c | 5 +++++ 2 files changed, 6 insertions(+) create mode 100644 tests/000-content-with-spaces.c create mode 100644 tests/061-define-chain-obj-to-func-multi.c diff --git a/tests/000-content-with-spaces.c b/tests/000-content-with-spaces.c new file mode 100644 index 0000000000..696cb3a74f --- /dev/null +++ b/tests/000-content-with-spaces.c @@ -0,0 +1 @@ +this is four tokens diff --git a/tests/061-define-chain-obj-to-func-multi.c b/tests/061-define-chain-obj-to-func-multi.c new file mode 100644 index 0000000000..6dbfd1f62d --- /dev/null +++ b/tests/061-define-chain-obj-to-func-multi.c @@ -0,0 +1,5 @@ +#define foo(x) success +#define bar foo +#define baz bar +#define joe baz +joe (failure) -- cgit v1.2.3 From 614a9aece0888e7c8221ad2e8a231762442db794 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Fri, 28 May 2010 15:15:59 -0700 Subject: Add two more (failing) tests from the take-2 branch. These tests were recently fixed on the take-2 branch, but will require additional work before they will pass here. --- tests/058-token-pasting-empty-arguments.c | 5 +++++ tests/060-left-paren-in-macro-right-paren-in-text.c | 3 +++ 2 files changed, 8 insertions(+) create mode 100644 tests/058-token-pasting-empty-arguments.c create mode 100644 tests/060-left-paren-in-macro-right-paren-in-text.c diff --git a/tests/058-token-pasting-empty-arguments.c b/tests/058-token-pasting-empty-arguments.c new file mode 100644 index 0000000000..8ac260c76b --- /dev/null +++ b/tests/058-token-pasting-empty-arguments.c @@ -0,0 +1,5 @@ +#define paste(x,y) x ## y +paste(a,b) +paste(a,) +paste(,b) +paste(,) diff --git a/tests/060-left-paren-in-macro-right-paren-in-text.c b/tests/060-left-paren-in-macro-right-paren-in-text.c new file mode 100644 index 0000000000..ed80ea879c --- /dev/null +++ b/tests/060-left-paren-in-macro-right-paren-in-text.c @@ -0,0 +1,3 @@ +#define double(a) a*2 +#define foo double( +foo 5) -- cgit v1.2.3 From 631016946ca8134244c4e58bef6863d204b1119b Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Sat, 29 May 2010 05:07:24 -0700 Subject: Fix pass-through of '=' and add a test for it. Previously '=' was not included in our PUNCTUATION regeular expression, but it *was* excldued from our OTHER regular expression, so we were getting the default (and hamful) lex action of just printing it. The test we add here is named "punctuator" with the idea that we can extend it as needed for other punctuator testing. --- glcpp-lex.l | 2 +- glcpp-parse.y | 1 + tests/071-punctuator.c | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) create mode 100644 tests/071-punctuator.c diff --git a/glcpp-lex.l b/glcpp-lex.l index 70d47d2497..52269c6b30 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -38,7 +38,7 @@ NEWLINE [\n] HSPACE [ \t] HASH ^{HSPACE}*#{HSPACE}* IDENTIFIER [_a-zA-Z][_a-zA-Z0-9]* -PUNCTUATION [][(){}.&*~!/%<>^|;,+-] +PUNCTUATION [][(){}.&*~!/%<>^|;,=+-] OTHER [^][(){}.&*~!/%<>^|;,=#[:space:]+-]+ DECIMAL_INTEGER [1-9][0-9]*[uU]? diff --git a/glcpp-parse.y b/glcpp-parse.y index c89d7bf159..01ca08ec74 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -426,6 +426,7 @@ operator: | OR { $$ = OR; } | ';' { $$ = ';'; } | ',' { $$ = ','; } +| '=' { $$ = '='; } | PASTE { $$ = PASTE; } | DEFINED { $$ = DEFINED; } ; diff --git a/tests/071-punctuator.c b/tests/071-punctuator.c new file mode 100644 index 0000000000..959d682598 --- /dev/null +++ b/tests/071-punctuator.c @@ -0,0 +1 @@ +a = b -- cgit v1.2.3 From b06096e86eda1257769156523b5738044c6a2b10 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Sat, 29 May 2010 05:54:19 -0700 Subject: Add test and fix bugs with multiple token-pasting on the same line. The list replacement when token pasting was broken, (failing to properly update the list's tail pointer). Also, memory management when pasting was broken, (modifying the original token's string which would cause problems with multiple calls to a macro which pasted a literal string). We didn't catch this with previous tests because they only pasted argument values. --- glcpp-parse.y | 92 +++++++++++++++++-------------------- tests/072-token-pasting-same-line.c | 2 + 2 files changed, 43 insertions(+), 51 deletions(-) create mode 100644 tests/072-token-pasting-same-line.c diff --git a/glcpp-parse.y b/glcpp-parse.y index 01ca08ec74..f4c834e038 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -783,73 +783,53 @@ _token_print (token_t *token) } } -/* Change 'token' into a new token formed by pasting 'other'. */ -static void +/* Return a new token (talloc()ed off of 'token') formed by pasting + * 'token' and 'other'. Note that this function may return 'token' or + * 'other' directly rather than allocating anything new. + * + * Caution: Only very cursory error-checking is performed to see if + * the final result is a valid single token. */ +static token_t * _token_paste (token_t *token, token_t *other) { /* Pasting a placeholder onto anything makes no change. */ if (other->type == PLACEHOLDER) - return; + return token; - /* When 'token' is a placeholder, just return contents of 'other'. */ - if (token->type == PLACEHOLDER) { - token->type = other->type; - token->value = other->value; - return; - } + /* When 'token' is a placeholder, just return 'other'. */ + if (token->type == PLACEHOLDER) + return other; /* A very few single-character punctuators can be combined * with another to form a multi-character punctuator. */ switch (token->type) { case '<': - if (other->type == '<') { - token->type = LEFT_SHIFT; - token->value.ival = LEFT_SHIFT; - return; - } else if (other->type == '=') { - token->type = LESS_OR_EQUAL; - token->value.ival = LESS_OR_EQUAL; - return; - } + if (other->type == '<') + return _token_create_ival (token, LEFT_SHIFT, LEFT_SHIFT); + else if (other->type == '=') + return _token_create_ival (token, LESS_OR_EQUAL, LESS_OR_EQUAL); break; case '>': - if (other->type == '>') { - token->type = RIGHT_SHIFT; - token->value.ival = RIGHT_SHIFT; - return; - } else if (other->type == '=') { - token->type = GREATER_OR_EQUAL; - token->value.ival = GREATER_OR_EQUAL; - return; - } + if (other->type == '>') + return _token_create_ival (token, RIGHT_SHIFT, RIGHT_SHIFT); + else if (other->type == '=') + return _token_create_ival (token, GREATER_OR_EQUAL, GREATER_OR_EQUAL); break; case '=': - if (other->type == '=') { - token->type = EQUAL; - token->value.ival = EQUAL; - return; - } + if (other->type == '=') + return _token_create_ival (token, EQUAL, EQUAL); break; case '!': - if (other->type == '=') { - token->type = NOT_EQUAL; - token->value.ival = NOT_EQUAL; - return; - } + if (other->type == '=') + return _token_create_ival (token, NOT_EQUAL, NOT_EQUAL); break; case '&': - if (other->type == '&') { - token->type = AND; - token->value.ival = AND; - return; - } + if (other->type == '&') + return _token_create_ival (token, AND, AND); break; case '|': - if (other->type == '|') { - token->type = OR; - token->value.ival = OR; - return; - } + if (other->type == '|') + return _token_create_ival (token, OR, OR); break; } @@ -864,9 +844,11 @@ _token_paste (token_t *token, token_t *other) if ((token->type == IDENTIFIER || token->type == OTHER || token->type == INTEGER_STRING) && (other->type == IDENTIFIER || other->type == OTHER || other->type == INTEGER_STRING)) { - token->value.str = talloc_strdup_append (token->value.str, - other->value.str); - return; + char *str; + + str = xtalloc_asprintf (token, "%s%s", + token->value.str, other->value.str); + return _token_create_str (token, token->type, str); } printf ("Error: Pasting \""); @@ -874,6 +856,8 @@ _token_paste (token_t *token, token_t *other) printf ("\" and \""); _token_print (other); printf ("\" does not give a valid preprocessing token.\n"); + + return token; } static void @@ -1159,6 +1143,8 @@ _glcpp_parser_expand_function (glcpp_parser_t *parser, /* After argument substitution, and before further expansion * below, implement token pasting. */ + _token_list_trim_trailing_space (substituted); + node = substituted->head; while (node) { @@ -1187,12 +1173,16 @@ _glcpp_parser_expand_function (glcpp_parser_t *parser, return NULL; } - _token_paste (node->token, next_non_space->token); + node->token = _token_paste (node->token, next_non_space->token); node->next = next_non_space->next; + if (next_non_space == substituted->tail) + substituted->tail = node; node = node->next; } + substituted->non_space_tail = substituted->tail; + _string_list_push (parser->active, identifier); _glcpp_parser_expand_token_list (parser, substituted); _string_list_pop (parser->active); diff --git a/tests/072-token-pasting-same-line.c b/tests/072-token-pasting-same-line.c new file mode 100644 index 0000000000..e421e9d5e2 --- /dev/null +++ b/tests/072-token-pasting-same-line.c @@ -0,0 +1,2 @@ +#define paste(x) success_ ## x +paste(1) paste(2) paste(3) -- cgit v1.2.3 From 75ef1c75dd47a0b4054a767fd94f7c3cf68d2331 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Sat, 29 May 2010 05:57:22 -0700 Subject: Add killer test case from the C99 specification. Happily, this passes now, (since many of the previously added test cases were extracted from this one). --- tests/099-c99-example.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 tests/099-c99-example.c diff --git a/tests/099-c99-example.c b/tests/099-c99-example.c new file mode 100644 index 0000000000..d1976b1f26 --- /dev/null +++ b/tests/099-c99-example.c @@ -0,0 +1,17 @@ +#define x 3 +#define f(a) f(x * (a)) +#undef x +#define x 2 +#define g f +#define z z[0] +#define h g(~ +#define m(a) a(w) +#define w 0,1 +#define t(a) a +#define p() int +#define q(x) x +#define r(x,y) x ## y +f(y+1) + f(f(z)) % t(t(g)(0) + t)(1); +g(x +(3,4)-w) | h 5) & m + (f)^m(m); +p() i[q()] = { q(1), r(2,3), r(4,), r(,5), r(,)}; -- cgit v1.2.3 From ae3fb09cd20fc189d68f0c2a63cc74dd584d7ee1 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Sat, 29 May 2010 06:01:32 -0700 Subject: Add three more tests cases recently added to the take-2 branch. The 071-punctuator test is failing only trivially (whitespace change only). And the 072-token-pasting-same-line.c test passes just fine here, (more evidence perhaps that the approach in take-2 is more trouble than it's worth?). The 099-c99-example test case is the inspiration for much of the rest of the test suite. It amazingly passes on the take-2 branch, but doesn't pass here yet. --- tests/071-punctuator.c | 1 + tests/072-token-pasting-same-line.c | 2 ++ tests/099-c99-example.c | 17 +++++++++++++++++ 3 files changed, 20 insertions(+) create mode 100644 tests/071-punctuator.c create mode 100644 tests/072-token-pasting-same-line.c create mode 100644 tests/099-c99-example.c diff --git a/tests/071-punctuator.c b/tests/071-punctuator.c new file mode 100644 index 0000000000..959d682598 --- /dev/null +++ b/tests/071-punctuator.c @@ -0,0 +1 @@ +a = b diff --git a/tests/072-token-pasting-same-line.c b/tests/072-token-pasting-same-line.c new file mode 100644 index 0000000000..e421e9d5e2 --- /dev/null +++ b/tests/072-token-pasting-same-line.c @@ -0,0 +1,2 @@ +#define paste(x) success_ ## x +paste(1) paste(2) paste(3) diff --git a/tests/099-c99-example.c b/tests/099-c99-example.c new file mode 100644 index 0000000000..d1976b1f26 --- /dev/null +++ b/tests/099-c99-example.c @@ -0,0 +1,17 @@ +#define x 3 +#define f(a) f(x * (a)) +#undef x +#define x 2 +#define g f +#define z z[0] +#define h g(~ +#define m(a) a(w) +#define w 0,1 +#define t(a) a +#define p() int +#define q(x) x +#define r(x,y) x ## y +f(y+1) + f(f(z)) % t(t(g)(0) + t)(1); +g(x +(3,4)-w) | h 5) & m + (f)^m(m); +p() i[q()] = { q(1), r(2,3), r(4,), r(,5), r(,)}; -- cgit v1.2.3 From a771a40e2257657cbdae0eb97a7bb8733db76b91 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 1 Jun 2010 11:20:18 -0700 Subject: Fix #if-skipping to *really* skip the skipped group. Previously we were avoiding printing within a skipped group, but we were still evluating directives such as #define and #undef and still emitting diagnostics for things such as macro calls with the wrong number of arguments. Add a test for this and fix it with a high-priority rule in the lexer that consumes the skipped content. --- glcpp-lex.l | 48 ++++++++++++++++++++++++++++++------------ glcpp-parse.y | 15 ++++--------- glcpp.h | 1 + tests/062-if-0-skips-garbage.c | 5 +++++ 4 files changed, 44 insertions(+), 25 deletions(-) create mode 100644 tests/062-if-0-skips-garbage.c diff --git a/glcpp-lex.l b/glcpp-lex.l index 52269c6b30..a51d9e185f 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -47,39 +47,58 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? %% -{HASH}define{HSPACE}+/{IDENTIFIER}"(" { +{HASH}if/.*\n { + yyextra->lexing_if = 1; yyextra->space_tokens = 0; - return HASH_DEFINE_FUNC; + return HASH_IF; } -{HASH}define { +{HASH}elif/.*\n { + yyextra->lexing_if = 1; yyextra->space_tokens = 0; - return HASH_DEFINE_OBJ; + return HASH_ELIF; } -{HASH}undef { +{HASH}else/.*\n { yyextra->space_tokens = 0; - return HASH_UNDEF; + return HASH_ELSE; } -{HASH}if { +{HASH}endif/.*\n { yyextra->space_tokens = 0; - return HASH_IF; + return HASH_ENDIF; } -{HASH}elif { + /* When skipping (due to an #if 0 or similar) consume anything + * up to a newline. We do this less priroty than any + * #if-related directive (#if, #elif, #else, #endif), but with + * more priority than any other directive or token to avoid + * any side-effects from skipped content. + * + * We use the lexing_if flag to avoid skipping any part of an + * if conditional expression. */ +[^\n]+/\n { + if (yyextra->lexing_if || + yyextra->skip_stack == NULL || + yyextra->skip_stack->type == SKIP_NO_SKIP) + { + REJECT; + } +} + +{HASH}define{HSPACE}+/{IDENTIFIER}"(" { yyextra->space_tokens = 0; - return HASH_ELIF; + return HASH_DEFINE_FUNC; } -{HASH}else { +{HASH}define { yyextra->space_tokens = 0; - return HASH_ELSE; + return HASH_DEFINE_OBJ; } -{HASH}endif { +{HASH}undef { yyextra->space_tokens = 0; - return HASH_ENDIF; + return HASH_UNDEF; } {HASH} { @@ -163,6 +182,7 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? } \n { + yyextra->lexing_if = 0; return NEWLINE; } diff --git a/glcpp-parse.y b/glcpp-parse.y index f4c834e038..dd8e133f55 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -159,19 +159,11 @@ input: line: control_line { - if (parser->skip_stack == NULL || - parser->skip_stack->type == SKIP_NO_SKIP) - { - printf ("\n"); - } + printf ("\n"); } | text_line { - if (parser->skip_stack == NULL || - parser->skip_stack->type == SKIP_NO_SKIP) - { - _glcpp_parser_print_expanded_token_list (parser, $1); - printf ("\n"); - } + _glcpp_parser_print_expanded_token_list (parser, $1); + printf ("\n"); talloc_free ($1); } | expanded_line @@ -889,6 +881,7 @@ glcpp_parser_create (void) parser->defines = hash_table_ctor (32, hash_table_string_hash, hash_table_string_compare); parser->active = _string_list_create (parser); + parser->lexing_if = 0; parser->space_tokens = 1; parser->newline_as_space = 0; parser->in_control_line = 0; diff --git a/glcpp.h b/glcpp.h index 5c8c304a9c..41fc2043d1 100644 --- a/glcpp.h +++ b/glcpp.h @@ -127,6 +127,7 @@ struct glcpp_parser { yyscan_t scanner; struct hash_table *defines; string_list_t *active; + int lexing_if; int space_tokens; int newline_as_space; int in_control_line; diff --git a/tests/062-if-0-skips-garbage.c b/tests/062-if-0-skips-garbage.c new file mode 100644 index 0000000000..d9e439bb89 --- /dev/null +++ b/tests/062-if-0-skips-garbage.c @@ -0,0 +1,5 @@ +#define foo(a,b) +#if 0 +foo(bar) +foo( +#endif -- cgit v1.2.3 From 2571415d1a7eec72db33cd521ca48fe755c43f9c Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 1 Jun 2010 12:18:43 -0700 Subject: Implement comment handling in the lexer (with test). We support both single-line (//) and multi-line (/* ... */) comments and add a test for this, (trying to stress the rules just a bit by embedding one comment delimiter into a comment delimited with the other style, etc.). To keep the test suite passing we do now discard any output lines from glcpp that consist only of spacing, (in addition to blank lines as previously). We also discard any initial whitespace from gcc output. In neither case should the absence or presence of this whitespace affect correctness. --- glcpp-lex.l | 11 +++++++++++ tests/063-comments.c | 15 +++++++++++++++ tests/glcpp-test | 4 ++-- 3 files changed, 28 insertions(+), 2 deletions(-) create mode 100644 tests/063-comments.c diff --git a/glcpp-lex.l b/glcpp-lex.l index a51d9e185f..0954ab7e83 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -47,6 +47,17 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? %% + /* Single-line comments */ +"//"[^\n]+\n { + return NEWLINE; +} + + /* Multi-line comments */ +[/][*]([^*]*[*]+[^/])*[^*]*[*]*[/] { + if (yyextra->space_tokens) + return SPACE; +} + {HASH}if/.*\n { yyextra->lexing_if = 1; yyextra->space_tokens = 0; diff --git a/tests/063-comments.c b/tests/063-comments.c new file mode 100644 index 0000000000..4cda52236e --- /dev/null +++ b/tests/063-comments.c @@ -0,0 +1,15 @@ +/* this is a comment */ +// so is this +// */ +f = g/**//h; +/*//*/l(); +m = n//**/o ++ p; +/* this +comment spans +multiple lines and +contains *** stars +and slashes / *** / +and other stuff. +****/ +more code here diff --git a/tests/glcpp-test b/tests/glcpp-test index ba398af0d5..24110333a5 100755 --- a/tests/glcpp-test +++ b/tests/glcpp-test @@ -3,8 +3,8 @@ for test in *.c; do echo "Testing $test" ../glcpp < $test > $test.glcpp - grep -v '^$' < $test.glcpp > $test.out || true + grep -v '^ *$' < $test.glcpp > $test.out || true gcc -E $test -o $test.gcc - grep -v '^#' < $test.gcc | grep -v '^$' > $test.expected || true + grep -v '^#' < $test.gcc | grep -v '^$' | sed -r -e 's/^ +/ /' > $test.expected || true diff -u $test.expected $test.out done -- cgit v1.2.3 From 4c22f4dba7a87de4736e01010e361b073a7501c8 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 2 Jun 2010 10:48:47 -0700 Subject: Fix multi-line comment regular expression to handle (non) nested comments. Ken reminded me of a couple cases that I should be testing. These are the non-nestedness of things that look like nested comments as well as potentially tricky things like "/*/" and "/*/*/". The (non) nested comment case was not working in the case of the comment terminator with multiple '*' characters. We fix this by not considering a '*' as the "non-slash" to terminate a sequence of '*' characters within the comment. We also fix the final match of the terminator to use '+' rather than '*' to require the presence of a final '*' character in the comment terminator. --- glcpp-lex.l | 2 +- tests/063-comments.c | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/glcpp-lex.l b/glcpp-lex.l index 0954ab7e83..7bc5fab76d 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -53,7 +53,7 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? } /* Multi-line comments */ -[/][*]([^*]*[*]+[^/])*[^*]*[*]*[/] { +[/][*]([^*]*[*]+[^*/])*[^*]*[*]+[/] { if (yyextra->space_tokens) return SPACE; } diff --git a/tests/063-comments.c b/tests/063-comments.c index 4cda52236e..e641d2f0f9 100644 --- a/tests/063-comments.c +++ b/tests/063-comments.c @@ -13,3 +13,8 @@ and slashes / *** / and other stuff. ****/ more code here +/* Test that /* nested + comments */ +are not treated like comments. +/*/ this is a comment */ +/*/*/ -- cgit v1.2.3 From e4b2731a25c071407d90c6c593a226574e9c36f9 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 2 Jun 2010 10:59:08 -0700 Subject: Make the multi-line comment regular expression a bit easier to read. Use quoted strings for literal portions rather than a sequence of single-character character classes. --- glcpp-lex.l | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/glcpp-lex.l b/glcpp-lex.l index 7bc5fab76d..2aec46a2ed 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -53,7 +53,7 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? } /* Multi-line comments */ -[/][*]([^*]*[*]+[^*/])*[^*]*[*]+[/] { +"/*"([^*]*[*]+[^*/])*[^*]*[*]+"/" { if (yyextra->space_tokens) return SPACE; } -- cgit v1.2.3 From 111e25bd84fb923bbab5b0ca76bbbb5d9a537a26 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 2 Jun 2010 12:54:15 -0700 Subject: Factor out common sub-expression from multi-line-comment regular expression. In two places we look for an (optional) sequence of characters other than "*" followed by a sequence of on or more "*". Using a name for this (NON_STARS_THEN_STARS) seems to make it a bit easier to understand. --- glcpp-lex.l | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/glcpp-lex.l b/glcpp-lex.l index 2aec46a2ed..0d9a75415a 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -45,6 +45,8 @@ DECIMAL_INTEGER [1-9][0-9]*[uU]? OCTAL_INTEGER 0[0-7]*[uU]? HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? +NON_STARS_THEN_STARS [^*]*[*]+ + %% /* Single-line comments */ @@ -53,7 +55,7 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? } /* Multi-line comments */ -"/*"([^*]*[*]+[^*/])*[^*]*[*]+"/" { +"/*"({NON_STARS_THEN_STARS}[^*/])*{NON_STARS_THEN_STARS}"/" { if (yyextra->space_tokens) return SPACE; } -- cgit v1.2.3 From c7c95fe51f0ff83d4d3e07a926f96336248f9509 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 2 Jun 2010 14:43:03 -0700 Subject: Remove dead code: _glcpp_parser_expand_token_list_onto This function simply isn't being called anymore. --- glcpp-parse.y | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index dd8e133f55..a4e6559282 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -1320,16 +1320,6 @@ _glcpp_parser_expand_token_list (glcpp_parser_t *parser, list->non_space_tail = list->tail; } -static void -_glcpp_parser_expand_token_list_onto (glcpp_parser_t *parser, - token_list_t *list, - token_list_t *result) -{ - _glcpp_parser_expand_token_list (parser, list); - - _token_list_append_list (result, list); -} - void _glcpp_parser_print_expanded_token_list (glcpp_parser_t *parser, token_list_t *list) -- cgit v1.2.3 From 22b3aced03c1a243ba03fbcba5aa51f97e4f0abb Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 2 Jun 2010 15:32:03 -0700 Subject: Eliminate some recursion from children of _expand_token_list Previously, both _expand_node and _expand_function would always make mutually recursive calls into _expand_token_list. This was unnecessary since these functions can simply return unexpanded results, after which the outer iteration will next attempt expansion of the results. The only trick in doing this is to arrange so that the active list is popped at the appropriate time. To do this, we add a new token_node_t marker to the active stack. When pushing onto the active list, we set marker to last->next, and when the marker is seen by the token list iteration, we pop from the active stack. --- glcpp-parse.y | 159 +++++++++++++++++++++++++++++++++++++--------------------- glcpp.h | 8 ++- 2 files changed, 110 insertions(+), 57 deletions(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index a4e6559282..1c7c84dac7 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -52,12 +52,6 @@ _string_list_append_item (string_list_t *list, const char *str); static void _string_list_append_list (string_list_t *list, string_list_t *tail); -static void -_string_list_push (string_list_t *list, const char *str); - -static void -_string_list_pop (string_list_t *list); - static int _string_list_contains (string_list_t *list, const char *member, int *index); @@ -96,6 +90,20 @@ _token_list_append (token_list_t *list, token_t *token); static void _token_list_append_list (token_list_t *list, token_list_t *tail); +static int +_token_list_length (token_list_t *list); + +static active_list_t * +_active_list_push (active_list_t *list, + const char *identifier, + token_node_t *marker); + +static active_list_t * +_active_list_pop (active_list_t *list); + +int +_active_list_contains (active_list_t *list, const char *identifier); + static void _glcpp_parser_evaluate_defined (glcpp_parser_t *parser, token_list_t *list); @@ -468,42 +476,6 @@ _string_list_append_item (string_list_t *list, const char *str) list->tail = node; } -void -_string_list_push (string_list_t *list, const char *str) -{ - string_node_t *node; - - node = xtalloc (list, string_node_t); - node->str = xtalloc_strdup (node, str); - node->next = list->head; - - if (list->tail == NULL) { - list->tail = node; - } - list->head = node; -} - -void -_string_list_pop (string_list_t *list) -{ - string_node_t *node; - - node = list->head; - - if (node == NULL) { - fprintf (stderr, "Internal error: _string_list_pop called on an empty list.\n"); - exit (1); - } - - list->head = node->next; - if (list->tail == node) { - assert (node->next == NULL); - list->tail = NULL; - } - - talloc_free (node); -} - int _string_list_contains (string_list_t *list, const char *member, int *index) { @@ -716,6 +688,21 @@ _token_list_trim_trailing_space (token_list_t *list) } } +static int +_token_list_length (token_list_t *list) +{ + int length = 0; + token_node_t *node; + + if (list == NULL) + return 0; + + for (node = list->head; node; node = node->next) + length++; + + return length; +} + static void _token_print (token_t *token) { @@ -880,7 +867,7 @@ glcpp_parser_create (void) glcpp_lex_init_extra (parser, &parser->scanner); parser->defines = hash_table_ctor (32, hash_table_string_hash, hash_table_string_compare); - parser->active = _string_list_create (parser); + parser->active = NULL; parser->lexing_if = 0; parser->space_tokens = 1; parser->newline_as_space = 0; @@ -1176,10 +1163,6 @@ _glcpp_parser_expand_function (glcpp_parser_t *parser, substituted->non_space_tail = substituted->tail; - _string_list_push (parser->active, identifier); - _glcpp_parser_expand_token_list (parser, substituted); - _string_list_pop (parser->active); - return substituted; } @@ -1206,7 +1189,6 @@ _glcpp_parser_expand_node (glcpp_parser_t *parser, token_t *token = node->token; const char *identifier; macro_t *macro; - token_list_t *expansion; /* We only expand identifiers */ if (token->type != IDENTIFIER) { @@ -1231,7 +1213,7 @@ _glcpp_parser_expand_node (glcpp_parser_t *parser, /* Finally, don't expand this macro if we're already actively * expanding it, (to avoid infinite recursion). */ - if (_string_list_contains (parser->active, identifier, NULL)) { + if (_active_list_contains (parser->active, identifier)) { /* We change the token type here from IDENTIFIER to * OTHER to prevent any future expansion of this * unexpanded token. */ @@ -1254,18 +1236,63 @@ _glcpp_parser_expand_node (glcpp_parser_t *parser, if (macro->replacements == NULL) return _token_list_create (parser); - expansion = _token_list_copy (parser, macro->replacements); - - _string_list_push (parser->active, identifier); - _glcpp_parser_expand_token_list (parser, expansion); - _string_list_pop (parser->active); - - return expansion; + return _token_list_copy (parser, macro->replacements); } return _glcpp_parser_expand_function (parser, node, last); } +/* Push a new identifier onto the active list, returning the new list. + * + * Here, 'marker' is the token node that appears in the list after the + * expansion of 'identifier'. That is, when the list iterator begins + * examinging 'marker', then it is time to pop this node from the + * active stack. + */ +active_list_t * +_active_list_push (active_list_t *list, + const char *identifier, + token_node_t *marker) +{ + active_list_t *node; + + node = xtalloc (list, active_list_t); + node->identifier = xtalloc_strdup (node, identifier); + node->marker = marker; + node->next = list; + + return node; +} + +active_list_t * +_active_list_pop (active_list_t *list) +{ + active_list_t *node = list; + + if (node == NULL) + return NULL; + + node = list->next; + talloc_free (list); + + return node; +} + +int +_active_list_contains (active_list_t *list, const char *identifier) +{ + active_list_t *node; + + if (list == NULL) + return 0; + + for (node = list; node; node = node->next) + if (strcmp (node->identifier, identifier) == 0) + return 1; + + return 0; +} + /* Walk over the token list replacing nodes with their expansion. * Whenever nodes are expanded the walking will walk over the new * nodes, continuing to expand as necessary. The results are placed in @@ -1288,10 +1315,27 @@ _glcpp_parser_expand_token_list (glcpp_parser_t *parser, node = list->head; while (node) { + + while (parser->active && parser->active->marker == node) + parser->active = _active_list_pop (parser->active); + /* Find the expansion for node, which will replace all * nodes from node to last, inclusive. */ expansion = _glcpp_parser_expand_node (parser, node, &last); if (expansion) { + token_node_t *n; + + for (n = node; n != last->next; n = n->next) + while (parser->active && + parser->active->marker == n) + { + parser->active = _active_list_pop (parser->active); + } + + parser->active = _active_list_push (parser->active, + node->token->value.str, + last->next); + /* Splice expansion into list, supporting a * simple deletion if the expansion is * empty. */ @@ -1317,6 +1361,9 @@ _glcpp_parser_expand_token_list (glcpp_parser_t *parser, node = node_prev ? node_prev->next : list->head; } + while (parser->active) + parser->active = _active_list_pop (parser->active); + list->non_space_tail = list->tail; } diff --git a/glcpp.h b/glcpp.h index 41fc2043d1..4459daa4f3 100644 --- a/glcpp.h +++ b/glcpp.h @@ -123,10 +123,16 @@ typedef struct skip_node { struct skip_node *next; } skip_node_t; +typedef struct active_list { + const char *identifier; + token_node_t *marker; + struct active_list *next; +} active_list_t; + struct glcpp_parser { yyscan_t scanner; struct hash_table *defines; - string_list_t *active; + active_list_t *active; int lexing_if; int space_tokens; int newline_as_space; -- cgit v1.2.3 From 14c98a56442a076a831aee85e9b3e54d934ec360 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 2 Jun 2010 15:49:54 -0700 Subject: Restore error message for a macro with unbalanced parentheses. We had to remove this earlier because our recursive function calls caused the same nodes to be examined for expansion more than once. And in the test suite, one node would be examined before it had its closing parenthesis and then again later after the parenthesis was added. So we removed this error message to allow the test case to pass. Now that we've removed the unnecessary recursive function call we can catch this error case and report it as desired. --- glcpp-parse.y | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/glcpp-parse.y b/glcpp-parse.y index 1c7c84dac7..b07714eebd 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -956,7 +956,7 @@ typedef enum function_status * Macro name not followed by a '('. This is not an error, but * simply that the macro name should be treated as a non-macro. * - * FUNCTION_UNBLANCED_PARENTHESES + * FUNCTION_UNBALANCED_PARENTHESES * * Macro name is not followed by a balanced set of parentheses. */ @@ -1065,6 +1065,9 @@ _glcpp_parser_expand_function (glcpp_parser_t *parser, case FUNCTION_NOT_A_FUNCTION: return NULL; case FUNCTION_UNBALANCED_PARENTHESES: + fprintf (stderr, "Error: Macro %s call has unbalanced parentheses\n", + identifier); + exit (1); return NULL; } -- cgit v1.2.3 From 5ae88af9886b4b7bf486cbc0d10a9bab6456165f Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Wed, 2 Jun 2010 15:59:45 -0700 Subject: test suite: Add expected output for every test. Rather than using the (munged) output of "gcc -E" we now capture precisely the output we expect from every test case. This allows us to stay immune from strange output from gcc (unpredictable whitespace output---aprticularly with different gcc versions). This will also allow us to write tests that capture expected error messages from the preprocessor as well. --- .gitignore | 3 -- Makefile | 2 +- tests/000-content-with-spaces.c.expected | 1 + tests/001-define.c.expected | 2 ++ tests/002-define-chain.c.expected | 3 ++ tests/003-define-chain-reverse.c.expected | 3 ++ tests/004-define-recursive.c.expected | 6 ++++ tests/005-define-composite-chain.c.expected | 3 ++ .../006-define-composite-chain-reverse.c.expected | 3 ++ tests/007-define-composite-recursive.c.expected | 6 ++++ tests/008-define-empty.c.expected | 2 ++ tests/009-undef.c.expected | 4 +++ tests/010-undef-re-define.c.expected | 6 ++++ tests/011-define-func-empty.c.expected | 2 ++ tests/012-define-func-no-args.c.expected | 2 ++ tests/013-define-func-1-arg-unused.c.expected | 2 ++ tests/014-define-func-2-arg-unused.c.expected | 2 ++ tests/015-define-object-with-parens.c.expected | 4 +++ tests/016-define-func-1-arg.c.expected | 2 ++ tests/017-define-func-2-args.c.expected | 2 ++ .../018-define-func-macro-as-parameter.c.expected | 3 ++ tests/019-define-func-1-arg-multi.c.expected | 2 ++ tests/020-define-func-2-arg-multi.c.expected | 2 ++ tests/021-define-func-compose.c.expected | 3 ++ tests/022-define-func-arg-with-parens.c.expected | 2 ++ tests/023-define-extra-whitespace.c.expected | 8 +++++ .../024-define-chain-to-self-recursion.c.expected | 3 ++ tests/025-func-macro-as-non-macro.c.expected | 2 ++ tests/026-define-func-extra-newlines.c.expected | 3 ++ tests/027-define-chain-obj-to-func.c.expected | 3 ++ tests/028-define-chain-obj-to-non-func.c.expected | 3 ++ ...9-define-chain-obj-to-func-with-args.c.expected | 3 ++ ...030-define-chain-obj-to-func-compose.c.expected | 4 +++ ...31-define-chain-func-to-func-compose.c.expected | 4 +++ tests/032-define-func-self-recurse.c.expected | 2 ++ tests/033-define-func-self-compose.c.expected | 2 ++ ...34-define-func-self-compose-non-func.c.expected | 2 ++ ...ompose-non-func-multi-token-argument.c.expected | 2 ++ ...-func-non-macro-multi-token-argument.c.expected | 3 ++ tests/037-finalize-unexpanded-macro.c.expected | 3 ++ tests/038-func-arg-with-commas.c.expected | 2 ++ tests/039-func-arg-obj-macro-with-comma.c.expected | 3 ++ tests/040-token-pasting.c.expected | 2 ++ tests/041-if-0.c.expected | 5 ++++ tests/042-if-1.c.expected | 5 ++++ tests/043-if-0-else.c.expected | 7 +++++ tests/044-if-1-else.c.expected | 7 +++++ tests/045-if-0-elif.c.expected | 11 +++++++ tests/046-if-1-elsif.c.expected | 11 +++++++ tests/047-if-elif-else.c.expected | 11 +++++++ tests/048-if-nested.c.expected | 11 +++++++ tests/049-if-expression-precedence.c.expected | 5 ++++ tests/050-if-defined.c.expected | 17 +++++++++++ tests/051-if-relational.c.expected | 35 ++++++++++++++++++++++ tests/052-if-bitwise.c.expected | 20 +++++++++++++ tests/053-if-divide-and-shift.c.expected | 15 ++++++++++ tests/054-if-with-macros.c.expected | 34 +++++++++++++++++++++ ...ine-chain-obj-to-func-parens-in-text.c.expected | 3 ++ tests/056-macro-argument-with-comma.c.expected | 4 +++ tests/057-empty-arguments.c.expected | 6 ++++ tests/058-token-pasting-empty-arguments.c.expected | 5 ++++ tests/059-token-pasting-integer.c.expected | 4 +++ ...t-paren-in-macro-right-paren-in-text.c.expected | 3 ++ .../061-define-chain-obj-to-func-multi.c.expected | 5 ++++ tests/062-if-0-skips-garbage.c.expected | 5 ++++ tests/063-comments.c.expected | 13 ++++++++ tests/071-punctuator.c.expected | 1 + tests/072-token-pasting-same-line.c.expected | 2 ++ tests/099-c99-example.c.expected | 16 ++++++++++ tests/glcpp-test | 5 +--- 70 files changed, 384 insertions(+), 8 deletions(-) create mode 100644 tests/000-content-with-spaces.c.expected create mode 100644 tests/001-define.c.expected create mode 100644 tests/002-define-chain.c.expected create mode 100644 tests/003-define-chain-reverse.c.expected create mode 100644 tests/004-define-recursive.c.expected create mode 100644 tests/005-define-composite-chain.c.expected create mode 100644 tests/006-define-composite-chain-reverse.c.expected create mode 100644 tests/007-define-composite-recursive.c.expected create mode 100644 tests/008-define-empty.c.expected create mode 100644 tests/009-undef.c.expected create mode 100644 tests/010-undef-re-define.c.expected create mode 100644 tests/011-define-func-empty.c.expected create mode 100644 tests/012-define-func-no-args.c.expected create mode 100644 tests/013-define-func-1-arg-unused.c.expected create mode 100644 tests/014-define-func-2-arg-unused.c.expected create mode 100644 tests/015-define-object-with-parens.c.expected create mode 100644 tests/016-define-func-1-arg.c.expected create mode 100644 tests/017-define-func-2-args.c.expected create mode 100644 tests/018-define-func-macro-as-parameter.c.expected create mode 100644 tests/019-define-func-1-arg-multi.c.expected create mode 100644 tests/020-define-func-2-arg-multi.c.expected create mode 100644 tests/021-define-func-compose.c.expected create mode 100644 tests/022-define-func-arg-with-parens.c.expected create mode 100644 tests/023-define-extra-whitespace.c.expected create mode 100644 tests/024-define-chain-to-self-recursion.c.expected create mode 100644 tests/025-func-macro-as-non-macro.c.expected create mode 100644 tests/026-define-func-extra-newlines.c.expected create mode 100644 tests/027-define-chain-obj-to-func.c.expected create mode 100644 tests/028-define-chain-obj-to-non-func.c.expected create mode 100644 tests/029-define-chain-obj-to-func-with-args.c.expected create mode 100644 tests/030-define-chain-obj-to-func-compose.c.expected create mode 100644 tests/031-define-chain-func-to-func-compose.c.expected create mode 100644 tests/032-define-func-self-recurse.c.expected create mode 100644 tests/033-define-func-self-compose.c.expected create mode 100644 tests/034-define-func-self-compose-non-func.c.expected create mode 100644 tests/035-define-func-self-compose-non-func-multi-token-argument.c.expected create mode 100644 tests/036-define-func-non-macro-multi-token-argument.c.expected create mode 100644 tests/037-finalize-unexpanded-macro.c.expected create mode 100644 tests/038-func-arg-with-commas.c.expected create mode 100644 tests/039-func-arg-obj-macro-with-comma.c.expected create mode 100644 tests/040-token-pasting.c.expected create mode 100644 tests/041-if-0.c.expected create mode 100644 tests/042-if-1.c.expected create mode 100644 tests/043-if-0-else.c.expected create mode 100644 tests/044-if-1-else.c.expected create mode 100644 tests/045-if-0-elif.c.expected create mode 100644 tests/046-if-1-elsif.c.expected create mode 100644 tests/047-if-elif-else.c.expected create mode 100644 tests/048-if-nested.c.expected create mode 100644 tests/049-if-expression-precedence.c.expected create mode 100644 tests/050-if-defined.c.expected create mode 100644 tests/051-if-relational.c.expected create mode 100644 tests/052-if-bitwise.c.expected create mode 100644 tests/053-if-divide-and-shift.c.expected create mode 100644 tests/054-if-with-macros.c.expected create mode 100644 tests/055-define-chain-obj-to-func-parens-in-text.c.expected create mode 100644 tests/056-macro-argument-with-comma.c.expected create mode 100644 tests/057-empty-arguments.c.expected create mode 100644 tests/058-token-pasting-empty-arguments.c.expected create mode 100644 tests/059-token-pasting-integer.c.expected create mode 100644 tests/060-left-paren-in-macro-right-paren-in-text.c.expected create mode 100644 tests/061-define-chain-obj-to-func-multi.c.expected create mode 100644 tests/062-if-0-skips-garbage.c.expected create mode 100644 tests/063-comments.c.expected create mode 100644 tests/071-punctuator.c.expected create mode 100644 tests/072-token-pasting-same-line.c.expected create mode 100644 tests/099-c99-example.c.expected diff --git a/.gitignore b/.gitignore index b88f0cc75c..077db8d8e1 100644 --- a/.gitignore +++ b/.gitignore @@ -4,7 +4,4 @@ glcpp-parse.c glcpp-parse.h *.o *~ -tests/*.expected -tests/*.gcc -tests/*.glcpp tests/*.out diff --git a/Makefile b/Makefile index 0c06aa880f..3fb44ac3b2 100644 --- a/Makefile +++ b/Makefile @@ -22,4 +22,4 @@ test: glcpp clean: rm -f glcpp glcpp-lex.c glcpp-parse.c *.o *~ - rm -f tests/*.out tests/*.gcc tests/*.expected tests/*~ + rm -f tests/*.out tests/*~ diff --git a/tests/000-content-with-spaces.c.expected b/tests/000-content-with-spaces.c.expected new file mode 100644 index 0000000000..a7fc918c90 --- /dev/null +++ b/tests/000-content-with-spaces.c.expected @@ -0,0 +1 @@ +this is four tokens diff --git a/tests/001-define.c.expected b/tests/001-define.c.expected new file mode 100644 index 0000000000..a464d9da74 --- /dev/null +++ b/tests/001-define.c.expected @@ -0,0 +1,2 @@ + +1 diff --git a/tests/002-define-chain.c.expected b/tests/002-define-chain.c.expected new file mode 100644 index 0000000000..c6c9ee38a9 --- /dev/null +++ b/tests/002-define-chain.c.expected @@ -0,0 +1,3 @@ + + +1 diff --git a/tests/003-define-chain-reverse.c.expected b/tests/003-define-chain-reverse.c.expected new file mode 100644 index 0000000000..c6c9ee38a9 --- /dev/null +++ b/tests/003-define-chain-reverse.c.expected @@ -0,0 +1,3 @@ + + +1 diff --git a/tests/004-define-recursive.c.expected b/tests/004-define-recursive.c.expected new file mode 100644 index 0000000000..2d07687f8c --- /dev/null +++ b/tests/004-define-recursive.c.expected @@ -0,0 +1,6 @@ + + + +foo +bar +baz diff --git a/tests/005-define-composite-chain.c.expected b/tests/005-define-composite-chain.c.expected new file mode 100644 index 0000000000..892975c268 --- /dev/null +++ b/tests/005-define-composite-chain.c.expected @@ -0,0 +1,3 @@ + + +a 1 diff --git a/tests/006-define-composite-chain-reverse.c.expected b/tests/006-define-composite-chain-reverse.c.expected new file mode 100644 index 0000000000..892975c268 --- /dev/null +++ b/tests/006-define-composite-chain-reverse.c.expected @@ -0,0 +1,3 @@ + + +a 1 diff --git a/tests/007-define-composite-recursive.c.expected b/tests/007-define-composite-recursive.c.expected new file mode 100644 index 0000000000..0b0b477d9d --- /dev/null +++ b/tests/007-define-composite-recursive.c.expected @@ -0,0 +1,6 @@ + + + +a b c foo +b c a bar +c a b baz diff --git a/tests/008-define-empty.c.expected b/tests/008-define-empty.c.expected new file mode 100644 index 0000000000..139597f9cb --- /dev/null +++ b/tests/008-define-empty.c.expected @@ -0,0 +1,2 @@ + + diff --git a/tests/009-undef.c.expected b/tests/009-undef.c.expected new file mode 100644 index 0000000000..9c0b35a451 --- /dev/null +++ b/tests/009-undef.c.expected @@ -0,0 +1,4 @@ + +1 + +foo diff --git a/tests/010-undef-re-define.c.expected b/tests/010-undef-re-define.c.expected new file mode 100644 index 0000000000..5970f49028 --- /dev/null +++ b/tests/010-undef-re-define.c.expected @@ -0,0 +1,6 @@ + +1 + +foo + +2 diff --git a/tests/011-define-func-empty.c.expected b/tests/011-define-func-empty.c.expected new file mode 100644 index 0000000000..139597f9cb --- /dev/null +++ b/tests/011-define-func-empty.c.expected @@ -0,0 +1,2 @@ + + diff --git a/tests/012-define-func-no-args.c.expected b/tests/012-define-func-no-args.c.expected new file mode 100644 index 0000000000..9f075f2600 --- /dev/null +++ b/tests/012-define-func-no-args.c.expected @@ -0,0 +1,2 @@ + +bar diff --git a/tests/013-define-func-1-arg-unused.c.expected b/tests/013-define-func-1-arg-unused.c.expected new file mode 100644 index 0000000000..a464d9da74 --- /dev/null +++ b/tests/013-define-func-1-arg-unused.c.expected @@ -0,0 +1,2 @@ + +1 diff --git a/tests/014-define-func-2-arg-unused.c.expected b/tests/014-define-func-2-arg-unused.c.expected new file mode 100644 index 0000000000..a464d9da74 --- /dev/null +++ b/tests/014-define-func-2-arg-unused.c.expected @@ -0,0 +1,2 @@ + +1 diff --git a/tests/015-define-object-with-parens.c.expected b/tests/015-define-object-with-parens.c.expected new file mode 100644 index 0000000000..a70321a4c5 --- /dev/null +++ b/tests/015-define-object-with-parens.c.expected @@ -0,0 +1,4 @@ + +()1() + +()2() diff --git a/tests/016-define-func-1-arg.c.expected b/tests/016-define-func-1-arg.c.expected new file mode 100644 index 0000000000..6bfe04f738 --- /dev/null +++ b/tests/016-define-func-1-arg.c.expected @@ -0,0 +1,2 @@ + +((bar)+1) diff --git a/tests/017-define-func-2-args.c.expected b/tests/017-define-func-2-args.c.expected new file mode 100644 index 0000000000..f7a2b8c26c --- /dev/null +++ b/tests/017-define-func-2-args.c.expected @@ -0,0 +1,2 @@ + +((bar)*(baz)) diff --git a/tests/018-define-func-macro-as-parameter.c.expected b/tests/018-define-func-macro-as-parameter.c.expected new file mode 100644 index 0000000000..c6c9ee38a9 --- /dev/null +++ b/tests/018-define-func-macro-as-parameter.c.expected @@ -0,0 +1,3 @@ + + +1 diff --git a/tests/019-define-func-1-arg-multi.c.expected b/tests/019-define-func-1-arg-multi.c.expected new file mode 100644 index 0000000000..1e89b8cfd0 --- /dev/null +++ b/tests/019-define-func-1-arg-multi.c.expected @@ -0,0 +1,2 @@ + +(this is more than one word) diff --git a/tests/020-define-func-2-arg-multi.c.expected b/tests/020-define-func-2-arg-multi.c.expected new file mode 100644 index 0000000000..19f59f5ecb --- /dev/null +++ b/tests/020-define-func-2-arg-multi.c.expected @@ -0,0 +1,2 @@ + +one fish,two fish,red fish,blue fish diff --git a/tests/021-define-func-compose.c.expected b/tests/021-define-func-compose.c.expected new file mode 100644 index 0000000000..87f51f0bac --- /dev/null +++ b/tests/021-define-func-compose.c.expected @@ -0,0 +1,3 @@ + + +(2*((1+(3)))) diff --git a/tests/022-define-func-arg-with-parens.c.expected b/tests/022-define-func-arg-with-parens.c.expected new file mode 100644 index 0000000000..1dfc6698bb --- /dev/null +++ b/tests/022-define-func-arg-with-parens.c.expected @@ -0,0 +1,2 @@ + +(argument(including parens)for the win) diff --git a/tests/023-define-extra-whitespace.c.expected b/tests/023-define-extra-whitespace.c.expected new file mode 100644 index 0000000000..9c58275d0f --- /dev/null +++ b/tests/023-define-extra-whitespace.c.expected @@ -0,0 +1,8 @@ + + + + +1 +2 +3 4 +5 6 7 diff --git a/tests/024-define-chain-to-self-recursion.c.expected b/tests/024-define-chain-to-self-recursion.c.expected new file mode 100644 index 0000000000..15600af546 --- /dev/null +++ b/tests/024-define-chain-to-self-recursion.c.expected @@ -0,0 +1,3 @@ + + +foo diff --git a/tests/025-func-macro-as-non-macro.c.expected b/tests/025-func-macro-as-non-macro.c.expected new file mode 100644 index 0000000000..4a59f0520e --- /dev/null +++ b/tests/025-func-macro-as-non-macro.c.expected @@ -0,0 +1,2 @@ + +foo bar diff --git a/tests/026-define-func-extra-newlines.c.expected b/tests/026-define-func-extra-newlines.c.expected new file mode 100644 index 0000000000..5e3c70f2cc --- /dev/null +++ b/tests/026-define-func-extra-newlines.c.expected @@ -0,0 +1,3 @@ + + +bar diff --git a/tests/027-define-chain-obj-to-func.c.expected b/tests/027-define-chain-obj-to-func.c.expected new file mode 100644 index 0000000000..94c15f9505 --- /dev/null +++ b/tests/027-define-chain-obj-to-func.c.expected @@ -0,0 +1,3 @@ + + +success diff --git a/tests/028-define-chain-obj-to-non-func.c.expected b/tests/028-define-chain-obj-to-non-func.c.expected new file mode 100644 index 0000000000..94c15f9505 --- /dev/null +++ b/tests/028-define-chain-obj-to-non-func.c.expected @@ -0,0 +1,3 @@ + + +success diff --git a/tests/029-define-chain-obj-to-func-with-args.c.expected b/tests/029-define-chain-obj-to-func-with-args.c.expected new file mode 100644 index 0000000000..94c15f9505 --- /dev/null +++ b/tests/029-define-chain-obj-to-func-with-args.c.expected @@ -0,0 +1,3 @@ + + +success diff --git a/tests/030-define-chain-obj-to-func-compose.c.expected b/tests/030-define-chain-obj-to-func-compose.c.expected new file mode 100644 index 0000000000..bed826e783 --- /dev/null +++ b/tests/030-define-chain-obj-to-func-compose.c.expected @@ -0,0 +1,4 @@ + + + +success diff --git a/tests/031-define-chain-func-to-func-compose.c.expected b/tests/031-define-chain-func-to-func-compose.c.expected new file mode 100644 index 0000000000..bed826e783 --- /dev/null +++ b/tests/031-define-chain-func-to-func-compose.c.expected @@ -0,0 +1,4 @@ + + + +success diff --git a/tests/032-define-func-self-recurse.c.expected b/tests/032-define-func-self-recurse.c.expected new file mode 100644 index 0000000000..983f941740 --- /dev/null +++ b/tests/032-define-func-self-recurse.c.expected @@ -0,0 +1,2 @@ + +foo(2*(3)) diff --git a/tests/033-define-func-self-compose.c.expected b/tests/033-define-func-self-compose.c.expected new file mode 100644 index 0000000000..0818362364 --- /dev/null +++ b/tests/033-define-func-self-compose.c.expected @@ -0,0 +1,2 @@ + +foo(2*(foo(2*(3)))) diff --git a/tests/034-define-func-self-compose-non-func.c.expected b/tests/034-define-func-self-compose-non-func.c.expected new file mode 100644 index 0000000000..3f808fe665 --- /dev/null +++ b/tests/034-define-func-self-compose-non-func.c.expected @@ -0,0 +1,2 @@ + +foo diff --git a/tests/035-define-func-self-compose-non-func-multi-token-argument.c.expected b/tests/035-define-func-self-compose-non-func-multi-token-argument.c.expected new file mode 100644 index 0000000000..09dfdd64e9 --- /dev/null +++ b/tests/035-define-func-self-compose-non-func-multi-token-argument.c.expected @@ -0,0 +1,2 @@ + +1+foo diff --git a/tests/036-define-func-non-macro-multi-token-argument.c.expected b/tests/036-define-func-non-macro-multi-token-argument.c.expected new file mode 100644 index 0000000000..580ed9599c --- /dev/null +++ b/tests/036-define-func-non-macro-multi-token-argument.c.expected @@ -0,0 +1,3 @@ + + +more success diff --git a/tests/037-finalize-unexpanded-macro.c.expected b/tests/037-finalize-unexpanded-macro.c.expected new file mode 100644 index 0000000000..e804d7e4f9 --- /dev/null +++ b/tests/037-finalize-unexpanded-macro.c.expected @@ -0,0 +1,3 @@ + + +expand(just once) diff --git a/tests/038-func-arg-with-commas.c.expected b/tests/038-func-arg-with-commas.c.expected new file mode 100644 index 0000000000..6544adb3a2 --- /dev/null +++ b/tests/038-func-arg-with-commas.c.expected @@ -0,0 +1,2 @@ + +success diff --git a/tests/039-func-arg-obj-macro-with-comma.c.expected b/tests/039-func-arg-obj-macro-with-comma.c.expected new file mode 100644 index 0000000000..8a15397a03 --- /dev/null +++ b/tests/039-func-arg-obj-macro-with-comma.c.expected @@ -0,0 +1,3 @@ + + +(two,words) diff --git a/tests/040-token-pasting.c.expected b/tests/040-token-pasting.c.expected new file mode 100644 index 0000000000..48e836ec3f --- /dev/null +++ b/tests/040-token-pasting.c.expected @@ -0,0 +1,2 @@ + +onetoken diff --git a/tests/041-if-0.c.expected b/tests/041-if-0.c.expected new file mode 100644 index 0000000000..8b506b32d5 --- /dev/null +++ b/tests/041-if-0.c.expected @@ -0,0 +1,5 @@ +success_1 + + + +success_2 diff --git a/tests/042-if-1.c.expected b/tests/042-if-1.c.expected new file mode 100644 index 0000000000..a6ae9465a9 --- /dev/null +++ b/tests/042-if-1.c.expected @@ -0,0 +1,5 @@ +success_1 + +success_2 + +success_3 diff --git a/tests/043-if-0-else.c.expected b/tests/043-if-0-else.c.expected new file mode 100644 index 0000000000..3d7e6be96c --- /dev/null +++ b/tests/043-if-0-else.c.expected @@ -0,0 +1,7 @@ +success_1 + + + +success_2 + +success_3 diff --git a/tests/044-if-1-else.c.expected b/tests/044-if-1-else.c.expected new file mode 100644 index 0000000000..4a31e1cfa9 --- /dev/null +++ b/tests/044-if-1-else.c.expected @@ -0,0 +1,7 @@ +success_1 + +success_2 + + + +success_3 diff --git a/tests/045-if-0-elif.c.expected b/tests/045-if-0-elif.c.expected new file mode 100644 index 0000000000..a9bb1588e4 --- /dev/null +++ b/tests/045-if-0-elif.c.expected @@ -0,0 +1,11 @@ +success_1 + + + + + +success_3 + + + +success_4 diff --git a/tests/046-if-1-elsif.c.expected b/tests/046-if-1-elsif.c.expected new file mode 100644 index 0000000000..a4995713ca --- /dev/null +++ b/tests/046-if-1-elsif.c.expected @@ -0,0 +1,11 @@ +success_1 + +success_2 + + + + + + + +success_3 diff --git a/tests/047-if-elif-else.c.expected b/tests/047-if-elif-else.c.expected new file mode 100644 index 0000000000..54d3086119 --- /dev/null +++ b/tests/047-if-elif-else.c.expected @@ -0,0 +1,11 @@ +success_1 + + + + + + + +success_2 + +success_3 diff --git a/tests/048-if-nested.c.expected b/tests/048-if-nested.c.expected new file mode 100644 index 0000000000..8beb9c32c3 --- /dev/null +++ b/tests/048-if-nested.c.expected @@ -0,0 +1,11 @@ +success_1 + + + + + + + + + +success_2 diff --git a/tests/049-if-expression-precedence.c.expected b/tests/049-if-expression-precedence.c.expected new file mode 100644 index 0000000000..729bdd15f8 --- /dev/null +++ b/tests/049-if-expression-precedence.c.expected @@ -0,0 +1,5 @@ + + + +success + diff --git a/tests/050-if-defined.c.expected b/tests/050-if-defined.c.expected new file mode 100644 index 0000000000..737eb8d940 --- /dev/null +++ b/tests/050-if-defined.c.expected @@ -0,0 +1,17 @@ + + + +success_1 + + + +success_2 + + + + + + + +success_3 + diff --git a/tests/051-if-relational.c.expected b/tests/051-if-relational.c.expected new file mode 100644 index 0000000000..652fefdd43 --- /dev/null +++ b/tests/051-if-relational.c.expected @@ -0,0 +1,35 @@ + + + +success_1 + + + +success_2 + + + + + +success_3 + + + + + +success_3 + + + + + + + +success_4 + + + +success_5 + + + diff --git a/tests/052-if-bitwise.c.expected b/tests/052-if-bitwise.c.expected new file mode 100644 index 0000000000..44e52b206e --- /dev/null +++ b/tests/052-if-bitwise.c.expected @@ -0,0 +1,20 @@ + + + +success_1 + + +success_2 + + + + + + +success_3 + + +success_4 + + + diff --git a/tests/053-if-divide-and-shift.c.expected b/tests/053-if-divide-and-shift.c.expected new file mode 100644 index 0000000000..7e78e0454e --- /dev/null +++ b/tests/053-if-divide-and-shift.c.expected @@ -0,0 +1,15 @@ + + + +success_1 + + +success_2 + + + + + + +success_3 + diff --git a/tests/054-if-with-macros.c.expected b/tests/054-if-with-macros.c.expected new file mode 100644 index 0000000000..70f737c90a --- /dev/null +++ b/tests/054-if-with-macros.c.expected @@ -0,0 +1,34 @@ + + + + + + + +success_1 + + +success_2 + + + + +success_3 + + + + +success_4 + + + + + + +success_5 + + +success_6 + + + diff --git a/tests/055-define-chain-obj-to-func-parens-in-text.c.expected b/tests/055-define-chain-obj-to-func-parens-in-text.c.expected new file mode 100644 index 0000000000..94c15f9505 --- /dev/null +++ b/tests/055-define-chain-obj-to-func-parens-in-text.c.expected @@ -0,0 +1,3 @@ + + +success diff --git a/tests/056-macro-argument-with-comma.c.expected b/tests/056-macro-argument-with-comma.c.expected new file mode 100644 index 0000000000..bed826e783 --- /dev/null +++ b/tests/056-macro-argument-with-comma.c.expected @@ -0,0 +1,4 @@ + + + +success diff --git a/tests/057-empty-arguments.c.expected b/tests/057-empty-arguments.c.expected new file mode 100644 index 0000000000..7d97e15e29 --- /dev/null +++ b/tests/057-empty-arguments.c.expected @@ -0,0 +1,6 @@ + +success + +success + +success diff --git a/tests/058-token-pasting-empty-arguments.c.expected b/tests/058-token-pasting-empty-arguments.c.expected new file mode 100644 index 0000000000..e0967a1b95 --- /dev/null +++ b/tests/058-token-pasting-empty-arguments.c.expected @@ -0,0 +1,5 @@ + +ab +a +b + diff --git a/tests/059-token-pasting-integer.c.expected b/tests/059-token-pasting-integer.c.expected new file mode 100644 index 0000000000..f1288aa7cb --- /dev/null +++ b/tests/059-token-pasting-integer.c.expected @@ -0,0 +1,4 @@ + +12 +1000 +identifier2 diff --git a/tests/060-left-paren-in-macro-right-paren-in-text.c.expected b/tests/060-left-paren-in-macro-right-paren-in-text.c.expected new file mode 100644 index 0000000000..3e5501aa6e --- /dev/null +++ b/tests/060-left-paren-in-macro-right-paren-in-text.c.expected @@ -0,0 +1,3 @@ + + +5*2 diff --git a/tests/061-define-chain-obj-to-func-multi.c.expected b/tests/061-define-chain-obj-to-func-multi.c.expected new file mode 100644 index 0000000000..15eb64b97f --- /dev/null +++ b/tests/061-define-chain-obj-to-func-multi.c.expected @@ -0,0 +1,5 @@ + + + + +success diff --git a/tests/062-if-0-skips-garbage.c.expected b/tests/062-if-0-skips-garbage.c.expected new file mode 100644 index 0000000000..3f2ff2d6cc --- /dev/null +++ b/tests/062-if-0-skips-garbage.c.expected @@ -0,0 +1,5 @@ + + + + + diff --git a/tests/063-comments.c.expected b/tests/063-comments.c.expected new file mode 100644 index 0000000000..4998d76cc2 --- /dev/null +++ b/tests/063-comments.c.expected @@ -0,0 +1,13 @@ + + + +f = g /h; + l(); +m = n ++ p; + +more code here + +are not treated like comments. + + diff --git a/tests/071-punctuator.c.expected b/tests/071-punctuator.c.expected new file mode 100644 index 0000000000..959d682598 --- /dev/null +++ b/tests/071-punctuator.c.expected @@ -0,0 +1 @@ +a = b diff --git a/tests/072-token-pasting-same-line.c.expected b/tests/072-token-pasting-same-line.c.expected new file mode 100644 index 0000000000..7b80af7e46 --- /dev/null +++ b/tests/072-token-pasting-same-line.c.expected @@ -0,0 +1,2 @@ + +success_1 success_2 success_3 diff --git a/tests/099-c99-example.c.expected b/tests/099-c99-example.c.expected new file mode 100644 index 0000000000..352bbff48f --- /dev/null +++ b/tests/099-c99-example.c.expected @@ -0,0 +1,16 @@ + + + + + + + + + + + + + +f(2 * (y+1)) + f(2 * (f(2 * (z[0])))) % f(2 * (0)) + t(1); +f(2 * (2 +(3,4)-0,1)) | f(2 * (~ 5)) & f(2 * (0,1))^m(0,1); +int i[] = { 1, 23, 4, 5, }; diff --git a/tests/glcpp-test b/tests/glcpp-test index 24110333a5..396f6e175e 100755 --- a/tests/glcpp-test +++ b/tests/glcpp-test @@ -2,9 +2,6 @@ for test in *.c; do echo "Testing $test" - ../glcpp < $test > $test.glcpp - grep -v '^ *$' < $test.glcpp > $test.out || true - gcc -E $test -o $test.gcc - grep -v '^#' < $test.gcc | grep -v '^$' | sed -r -e 's/^ +/ /' > $test.expected || true + ../glcpp < $test > $test.out diff -u $test.expected $test.out done -- cgit v1.2.3 From 2ab0b13dd9b281b9c68b3d3e2fb01d19564d115e Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Fri, 4 Jun 2010 14:53:58 -0700 Subject: Disallow defining macros whose names start with "__" or "GL_". The GLSL specification reserves these for future use. --- glcpp-parse.y | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/glcpp-parse.y b/glcpp-parse.y index dd8e133f55..5072c48ee8 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -1344,6 +1344,22 @@ _glcpp_parser_print_expanded_token_list (glcpp_parser_t *parser, _token_list_print (list); } +void +_check_for_reserved_macro_name (const char *identifier) +{ + /* According to the GLSL specification, macro names starting with "__" + * or "GL_" are reserved for future use. So, don't allow them. + */ + if (strncmp(identifier, "__", 2) == 0) { + fprintf (stderr, "Error: Macro names starting with \"__\" are reserved.\n"); + exit(1); + } + if (strncmp(identifier, "GL_", 3) == 0) { + fprintf (stderr, "Error: Macro names starting with \"GL_\" are reserved.\n"); + exit(1); + } +} + void _define_object_macro (glcpp_parser_t *parser, const char *identifier, @@ -1351,6 +1367,8 @@ _define_object_macro (glcpp_parser_t *parser, { macro_t *macro; + _check_for_reserved_macro_name(identifier); + macro = xtalloc (parser, macro_t); macro->is_function = 0; @@ -1369,6 +1387,8 @@ _define_function_macro (glcpp_parser_t *parser, { macro_t *macro; + _check_for_reserved_macro_name(identifier); + macro = xtalloc (parser, macro_t); macro->is_function = 1; -- cgit v1.2.3