summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore1
-rw-r--r--Makefile2
-rw-r--r--README16
-rw-r--r--glcpp-lex.l222
-rw-r--r--glcpp-parse.y1410
-rw-r--r--glcpp.h49
-rwxr-xr-xtests/glcpp-test7
-rw-r--r--xtalloc.c15
8 files changed, 1106 insertions, 616 deletions
diff --git a/.gitignore b/.gitignore
index d67bd38c93..b88f0cc75c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,4 +6,5 @@ glcpp-parse.h
*~
tests/*.expected
tests/*.gcc
+tests/*.glcpp
tests/*.out
diff --git a/Makefile b/Makefile
index 88116128f8..0c06aa880f 100644
--- a/Makefile
+++ b/Makefile
@@ -7,7 +7,7 @@ CFLAGS = -g
override CFLAGS += -Wall -Wextra -Wwrite-strings -Wswitch-enum -Wno-unused
glcpp: glcpp.o glcpp-lex.o glcpp-parse.o hash_table.o xtalloc.o
- gcc -o $@ -ltalloc $^
+ gcc -o $@ -ltalloc -lm $^
%.c %.h: %.y
bison --debug --defines=$*.h --output=$*.c $^
diff --git a/README b/README
index ba833a49ff..ab42a3ffe1 100644
--- a/README
+++ b/README
@@ -12,3 +12,19 @@ preprocessors". To fill in these details, I've been using the C99
standard (for which I had a convenient copy) as available from:
http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1124.pdf
+
+Known limitations
+-----------------
+Macro invocations cannot include embedded newlines.
+
+The __LINE__, __FILE__, and __VERSION__ macros are not yet supported.
+
+The argument of the 'defined' operator cannot yet include enclosing
+parentheses.
+
+The #error, #pragma, #extension, #version, and #line macros are not
+yet supported.
+
+A file that ends with a function-like macro name as the last
+non-whitespace token will result in a parse error, (where it should be
+passed through as is). \ No newline at end of file
diff --git a/glcpp-lex.l b/glcpp-lex.l
index ee1f6e3aee..52269c6b30 100644
--- a/glcpp-lex.l
+++ b/glcpp-lex.l
@@ -32,21 +32,14 @@
%option reentrant noyywrap
%option extra-type="glcpp_parser_t *"
-%x ST_DEFINE
-%x ST_DEFINE_OBJ_OR_FUNC
-%x ST_DEFINE_PARAMETER
-%x ST_DEFINE_VALUE
-%x ST_IF
-%x ST_UNDEF
-%x ST_UNDEF_END
-
SPACE [[:space:]]
NONSPACE [^[:space:]]
NEWLINE [\n]
HSPACE [ \t]
HASH ^{HSPACE}*#{HSPACE}*
IDENTIFIER [_a-zA-Z][_a-zA-Z0-9]*
-TOKEN [^[:space:](),]+
+PUNCTUATION [][(){}.&*~!/%<>^|;,=+-]
+OTHER [^][(){}.&*~!/%<>^|;,=#[:space:]+-]+
DECIMAL_INTEGER [1-9][0-9]*[uU]?
OCTAL_INTEGER 0[0-7]*[uU]?
@@ -54,210 +47,123 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]?
%%
-{HASH}if{HSPACE}* {
- BEGIN ST_IF;
- return IF;
+{HASH}define{HSPACE}+/{IDENTIFIER}"(" {
+ yyextra->space_tokens = 0;
+ return HASH_DEFINE_FUNC;
}
-{HASH}elif{HSPACE}* {
- BEGIN ST_IF;
- return ELIF;
+{HASH}define {
+ yyextra->space_tokens = 0;
+ return HASH_DEFINE_OBJ;
}
-<ST_IF>{DECIMAL_INTEGER} {
- yylval.ival = strtoll (yytext, NULL, 10);
- return INTEGER;
+{HASH}undef {
+ yyextra->space_tokens = 0;
+ return HASH_UNDEF;
}
-<ST_IF>{OCTAL_INTEGER} {
- yylval.ival = strtoll (yytext + 1, NULL, 8);
- return INTEGER;
+{HASH}if {
+ yyextra->space_tokens = 0;
+ return HASH_IF;
}
-<ST_IF>{HEXADECIMAL_INTEGER} {
- yylval.ival = strtoll (yytext + 2, NULL, 16);
- return INTEGER;
+{HASH}elif {
+ yyextra->space_tokens = 0;
+ return HASH_ELIF;
}
-<ST_IF>"defined" {
- return DEFINED;
+{HASH}else {
+ yyextra->space_tokens = 0;
+ return HASH_ELSE;
}
-<ST_IF>"<<" {
- return LEFT_SHIFT;
+{HASH}endif {
+ yyextra->space_tokens = 0;
+ return HASH_ENDIF;
}
-<ST_IF>">>" {
- return RIGHT_SHIFT;
+{HASH} {
+ yyextra->space_tokens = 0;
+ return HASH;
}
-<ST_IF>"<=" {
- return LESS_OR_EQUAL;
-}
-
-<ST_IF>">=" {
- return GREATER_OR_EQUAL;
-}
-
-<ST_IF>"==" {
- return EQUAL;
-}
-
-<ST_IF>"!=" {
- return NOT_EQUAL;
-}
-
-<ST_IF>"&&" {
- return AND;
-}
-
-<ST_IF>"||" {
- return OR;
-}
-
-<ST_IF>[-+*/%<>&^|()~] {
- return yytext[0];
+{DECIMAL_INTEGER} {
+ yylval.str = xtalloc_strdup (yyextra, yytext);
+ return INTEGER_STRING;
}
-<ST_IF>{IDENTIFIER} {
+{OCTAL_INTEGER} {
yylval.str = xtalloc_strdup (yyextra, yytext);
- return IDENTIFIER;
+ return INTEGER_STRING;
}
-<ST_IF>{HSPACE}+
-
-<ST_IF>\n {
- BEGIN INITIAL;
- return NEWLINE;
+{HEXADECIMAL_INTEGER} {
+ yylval.str = xtalloc_strdup (yyextra, yytext);
+ return INTEGER_STRING;
}
-{HASH}endif{HSPACE}* {
- return ENDIF;
+"<<" {
+ return LEFT_SHIFT;
}
-{HASH}else{HSPACE}* {
- return ELSE;
+">>" {
+ return RIGHT_SHIFT;
}
-{HASH}undef{HSPACE}* {
- BEGIN ST_UNDEF;
- return UNDEF;
+"<=" {
+ return LESS_OR_EQUAL;
}
-<ST_UNDEF>{IDENTIFIER} {
- BEGIN ST_UNDEF_END;
- yylval.str = xtalloc_strdup (yyextra, yytext);
- return IDENTIFIER;
+">=" {
+ return GREATER_OR_EQUAL;
}
-<ST_UNDEF_END>{HSPACE}*
-
-<ST_UNDEF_END>\n {
- BEGIN INITIAL;
+"==" {
+ return EQUAL;
}
- /* We use the ST_DEFINE and ST_DEFVAL states so that we can
- * pass a space token, (yes, a token for whitespace!), since
- * the preprocessor specification requires distinguishing
- * "#define foo()" from "#define foo ()".
- */
-{HASH}define{HSPACE}* {
- BEGIN ST_DEFINE;
- return DEFINE;
+"!=" {
+ return NOT_EQUAL;
}
-<ST_DEFINE>{IDENTIFIER} {
- BEGIN ST_DEFINE_OBJ_OR_FUNC;
- yylval.str = xtalloc_strdup (yyextra, yytext);
- return IDENTIFIER;
+"&&" {
+ return AND;
}
-<ST_DEFINE_OBJ_OR_FUNC>\n {
- BEGIN INITIAL;
- return NEWLINE;
+"||" {
+ return OR;
}
-<ST_DEFINE_OBJ_OR_FUNC>{HSPACE}+ {
- BEGIN ST_DEFINE_VALUE;
- return SPACE;
+"##" {
+ return PASTE;
}
-<ST_DEFINE_OBJ_OR_FUNC>"(" {
- BEGIN ST_DEFINE_PARAMETER;
- return '(';
+"defined" {
+ return DEFINED;
}
-<ST_DEFINE_PARAMETER>{IDENTIFIER} {
+{IDENTIFIER} {
yylval.str = xtalloc_strdup (yyextra, yytext);
return IDENTIFIER;
}
-<ST_DEFINE_PARAMETER>"," {
- return ',';
-}
-
-<ST_DEFINE_PARAMETER>")" {
- BEGIN ST_DEFINE_VALUE;
- return ')';
-}
-
-<ST_DEFINE_PARAMETER>{HSPACE}+
-
-<ST_DEFINE_VALUE>{TOKEN} {
- yylval.token.type = TOKEN;
- yylval.token.value = xtalloc_strdup (yyextra, yytext);
- return TOKEN;
-}
-
-<ST_DEFINE_VALUE>[(),] {
- yylval.token.type = TOKEN;
- yylval.token.value = xtalloc_strdup (yyextra, yytext);
- return TOKEN;
-}
-
-<ST_DEFINE_VALUE>{HSPACE}+
-
-<ST_DEFINE_VALUE>\n {
- BEGIN INITIAL;
- return NEWLINE;
+{PUNCTUATION} {
+ return yytext[0];
}
-{IDENTIFIER} {
- int parameter_index;
+{OTHER}+ {
yylval.str = xtalloc_strdup (yyextra, yytext);
- switch (glcpp_parser_classify_token (yyextra, yylval.str,
- &parameter_index))
- {
- case TOKEN_CLASS_IDENTIFIER:
- return IDENTIFIER;
- break;
- case TOKEN_CLASS_IDENTIFIER_FINALIZED:
- return IDENTIFIER_FINALIZED;
- break;
- case TOKEN_CLASS_FUNC_MACRO:
- return FUNC_MACRO;
- break;
- case TOKEN_CLASS_OBJ_MACRO:
- return OBJ_MACRO;
- break;
-
- }
-}
-
-[(),] {
- return yytext[0];
+ return OTHER;
}
-{TOKEN} {
- yylval.token.type = TOKEN;
- yylval.token.value = xtalloc_strdup (yyextra, yytext);
- return TOKEN;
+{HSPACE}+ {
+ if (yyextra->space_tokens) {
+ return SPACE;
+ }
}
\n {
- yyextra->need_newline = 1;
+ return NEWLINE;
}
-{HSPACE}+
-
%%
diff --git a/glcpp-parse.y b/glcpp-parse.y
index 2c0fe9a6af..f4c834e038 100644
--- a/glcpp-parse.y
+++ b/glcpp-parse.y
@@ -25,69 +25,88 @@
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
+#include <inttypes.h>
#include "glcpp.h"
-void
+static void
yyerror (void *scanner, const char *error);
-void
+static void
_define_object_macro (glcpp_parser_t *parser,
const char *macro,
token_list_t *replacements);
-void
+static void
_define_function_macro (glcpp_parser_t *parser,
const char *macro,
string_list_t *parameters,
token_list_t *replacements);
-void
-_expand_object_macro (glcpp_parser_t *parser, const char *identifier);
-
-void
-_expand_function_macro (glcpp_parser_t *parser,
- const char *identifier,
- argument_list_t *arguments);
-
-string_list_t *
+static string_list_t *
_string_list_create (void *ctx);
-void
+static void
_string_list_append_item (string_list_t *list, const char *str);
-void
+static void
_string_list_append_list (string_list_t *list, string_list_t *tail);
-int
+static void
+_string_list_push (string_list_t *list, const char *str);
+
+static void
+_string_list_pop (string_list_t *list);
+
+static int
_string_list_contains (string_list_t *list, const char *member, int *index);
-int
+static int
_string_list_length (string_list_t *list);
-argument_list_t *
+static argument_list_t *
_argument_list_create (void *ctx);
-void
+static void
_argument_list_append (argument_list_t *list, token_list_t *argument);
-int
+static int
_argument_list_length (argument_list_t *list);
-token_list_t *
+static token_list_t *
_argument_list_member_at (argument_list_t *list, int index);
-token_list_t *
+/* Note: This function talloc_steal()s the str pointer. */
+static token_t *
+_token_create_str (void *ctx, int type, char *str);
+
+static token_t *
+_token_create_ival (void *ctx, int type, int ival);
+
+static token_list_t *
_token_list_create (void *ctx);
-void
-_token_list_append (token_list_t *list, int type, const char *value);
+/* Note: This function adds a talloc_reference() to token.
+ *
+ * You may want to talloc_unlink any current reference if you no
+ * longer need it. */
+static void
+_token_list_append (token_list_t *list, token_t *token);
-void
+static void
_token_list_append_list (token_list_t *list, token_list_t *tail);
static void
-glcpp_parser_pop_expansion (glcpp_parser_t *parser);
+_glcpp_parser_evaluate_defined (glcpp_parser_t *parser,
+ token_list_t *list);
+
+static void
+_glcpp_parser_expand_token_list (glcpp_parser_t *parser,
+ token_list_t *list);
+
+static void
+_glcpp_parser_print_expanded_token_list (glcpp_parser_t *parser,
+ token_list_t *list);
static void
_glcpp_parser_skip_stack_push_if (glcpp_parser_t *parser, int condition);
@@ -95,7 +114,7 @@ _glcpp_parser_skip_stack_push_if (glcpp_parser_t *parser, int condition);
static void
_glcpp_parser_skip_stack_change_if (glcpp_parser_t *parser, const char *type,
int condition);
-
+
static void
_glcpp_parser_skip_stack_pop (glcpp_parser_t *parser);
@@ -104,29 +123,21 @@ _glcpp_parser_skip_stack_pop (glcpp_parser_t *parser);
static int
glcpp_parser_lex (glcpp_parser_t *parser);
-%}
+static void
+glcpp_parser_lex_from (glcpp_parser_t *parser, token_list_t *list);
-%union {
- intmax_t imaxval;
- int ival;
- char *str;
- argument_list_t *argument_list;
- string_list_t *string_list;
- token_t token;
- token_list_t *token_list;
-}
+%}
%parse-param {glcpp_parser_t *parser}
%lex-param {glcpp_parser_t *parser}
-%token DEFINE DEFINED ELIF ELSE ENDIF FUNC_MACRO IDENTIFIER IDENTIFIER_FINALIZED IF IFDEF IFNDEF INTEGER OBJ_MACRO NEWLINE SPACE TOKEN UNDEF
-%type <ival> punctuator
-%type <imaxval> expression INTEGER
-%type <str> content FUNC_MACRO IDENTIFIER IDENTIFIER_FINALIZED OBJ_MACRO
-%type <argument_list> argument_list
-%type <string_list> macro parameter_list
-%type <token> TOKEN argument_word argument_word_or_comma
-%type <token_list> argument argument_or_comma replacement_list pp_tokens
+%token COMMA_FINAL DEFINED ELIF_EXPANDED HASH HASH_DEFINE_FUNC HASH_DEFINE_OBJ HASH_ELIF HASH_ELSE HASH_ENDIF HASH_IF HASH_IFDEF HASH_IFNDEF HASH_UNDEF IDENTIFIER IF_EXPANDED INTEGER INTEGER_STRING NEWLINE OTHER PLACEHOLDER SPACE
+%token PASTE
+%type <ival> expression INTEGER operator SPACE
+%type <str> IDENTIFIER INTEGER_STRING OTHER
+%type <string_list> identifier_list
+%type <token> preprocessing_token
+%type <token_list> pp_tokens replacement_list text_line
%left OR
%left AND
%left '|'
@@ -139,228 +150,120 @@ glcpp_parser_lex (glcpp_parser_t *parser);
%left '*' '/' '%'
%right UNARY
-/* Hard to remove shift/reduce conflicts documented as follows:
- *
- * 1. '(' after FUNC_MACRO name which is correctly resolved to shift
- * to form macro invocation rather than reducing directly to
- * content.
- *
- * 2. Similarly, '(' after FUNC_MACRO which is correctly resolved to
- * shift to form macro invocation rather than reducing directly to
- * argument.
- *
- * 3. Similarly again now that we added argument_or_comma as well.
- */
-%expect 3
-
%%
- /* We do all printing at the input level. */
input:
- /* empty */ {
- parser->just_printed_separator = 1;
- }
-| input content {
- int is_token;
- int skipping = 0;
-
- if (parser->skip_stack && parser->skip_stack->type != SKIP_NO_SKIP)
- skipping = 1;
-
- if ($2 && strlen ($2) && ! skipping) {
- int c = $2[0];
- int is_not_separator = ((c >= 'a' && c <= 'z') ||
- (c >= 'A' && c <= 'Z') ||
- (c >= 'A' && c <= 'Z') ||
- (c >= '0' && c <= '9') ||
- (c == '_'));
-
- if (! parser->just_printed_separator && is_not_separator)
- {
- printf (" ");
- }
- printf ("%s", $2);
-
- if (is_not_separator)
- parser->just_printed_separator = 0;
- else
- parser->just_printed_separator = 1;
- }
-
- if ($2)
- talloc_free ($2);
+ /* empty */
+| input line
+;
- if (parser->need_newline) {
+line:
+ control_line {
+ if (parser->skip_stack == NULL ||
+ parser->skip_stack->type == SKIP_NO_SKIP)
+ {
printf ("\n");
- parser->just_printed_separator = 1;
- parser->need_newline = 0;
}
}
-;
-
-content:
- IDENTIFIER {
- $$ = $1;
- }
-| IDENTIFIER_FINALIZED {
- $$ = $1;
- }
-| TOKEN {
- $$ = $1.value;
- }
-| FUNC_MACRO {
- $$ = $1;
- }
-| directive {
- $$ = talloc_strdup (parser, "\n");
- }
-| punctuator {
- $$ = talloc_asprintf (parser, "%c", $1);
- }
-| macro {
- $$ = NULL;
- }
-;
-
-punctuator:
- '(' { $$ = '('; }
-| ')' { $$ = ')'; }
-| ',' { $$ = ','; }
- ;
-
-macro:
- FUNC_MACRO '(' argument_list ')' {
- _expand_function_macro (parser, $1, $3);
- }
-| OBJ_MACRO {
- _expand_object_macro (parser, $1);
+| text_line {
+ if (parser->skip_stack == NULL ||
+ parser->skip_stack->type == SKIP_NO_SKIP)
+ {
+ _glcpp_parser_print_expanded_token_list (parser, $1);
+ printf ("\n");
+ }
talloc_free ($1);
}
+| expanded_line
+| HASH non_directive
;
-argument_list:
- /* empty */ {
- $$ = _argument_list_create (parser);
- }
-| argument {
- $$ = _argument_list_create (parser);
- _argument_list_append ($$, $1);
- }
-| argument_list ',' argument {
- _argument_list_append ($1, $3);
- $$ = $1;
- }
-;
-
-argument:
- argument_word {
- $$ = _token_list_create (parser);
- _token_list_append ($$, $1.type, $1.value);
- }
-| argument argument_word {
- _token_list_append ($1, $2.type, $2.value);
- talloc_free ($2.value);
- $$ = $1;
- }
-| argument '(' argument_or_comma ')' {
- _token_list_append ($1, '(', "(");
- _token_list_append_list ($1, $3);
- _token_list_append ($1, ')', ")");
- $$ = $1;
- }
-;
-
-argument_word:
- IDENTIFIER { $$.type = IDENTIFIER; $$.value = $1; }
-| IDENTIFIER_FINALIZED { $$.type = IDENTIFIER_FINALIZED; $$.value = $1; }
-| TOKEN { $$ = $1; }
-| FUNC_MACRO { $$.type = FUNC_MACRO; $$.value = $1; }
-| macro { $$.type = TOKEN; $$.value = xtalloc_strdup (parser, ""); }
-;
-
- /* XXX: The body of argument_or_comma is the same as the body
- * of argument, but with "argument" and "argument_word"
- * changed to "argument_or_comma" and
- * "argument_word_or_comma". It would be nice to have less
- * redundancy here, but I'm not sure how.
- *
- * It would also be nice to have a less ugly grammar to have
- * to implement, but such is the C preprocessor.
- */
-argument_or_comma:
- argument_word_or_comma {
- $$ = _token_list_create (parser);
- _token_list_append ($$, $1.type, $1.value);
- }
-| argument_or_comma argument_word_or_comma {
- _token_list_append ($1, $2.type, $2.value);
- $$ = $1;
+expanded_line:
+ IF_EXPANDED expression NEWLINE {
+ _glcpp_parser_skip_stack_push_if (parser, $2);
}
-| argument_or_comma '(' argument_or_comma ')' {
- _token_list_append ($1, '(', "(");
- _token_list_append_list ($1, $3);
- _token_list_append ($1, ')', ")");
- $$ = $1;
+| ELIF_EXPANDED expression NEWLINE {
+ _glcpp_parser_skip_stack_change_if (parser, "elif", $2);
}
;
-argument_word_or_comma:
- IDENTIFIER { $$.type = IDENTIFIER; $$.value = $1; }
-| IDENTIFIER_FINALIZED { $$.type = IDENTIFIER_FINALIZED; $$.value = $1; }
-| TOKEN { $$ = $1; }
-| FUNC_MACRO { $$.type = FUNC_MACRO; $$.value = $1; }
-| macro { $$.type = TOKEN; $$.value = xtalloc_strdup (parser, ""); }
-| ',' { $$.type = ','; $$.value = xtalloc_strdup (parser, ","); }
-;
-
-directive:
- DEFINE IDENTIFIER NEWLINE {
- token_list_t *list = _token_list_create (parser);
- _define_object_macro (parser, $2, list);
+control_line:
+ HASH_DEFINE_OBJ IDENTIFIER replacement_list NEWLINE {
+ _define_object_macro (parser, $2, $3);
}
-| DEFINE IDENTIFIER SPACE replacement_list NEWLINE {
- _define_object_macro (parser, $2, $4);
+| HASH_DEFINE_FUNC IDENTIFIER '(' ')' replacement_list NEWLINE {
+ _define_function_macro (parser, $2, NULL, $5);
}
-| DEFINE IDENTIFIER '(' parameter_list ')' replacement_list NEWLINE {
+| HASH_DEFINE_FUNC IDENTIFIER '(' identifier_list ')' replacement_list NEWLINE {
_define_function_macro (parser, $2, $4, $6);
}
-| IF expression NEWLINE {
- _glcpp_parser_skip_stack_push_if (parser, $2);
+| HASH_UNDEF IDENTIFIER NEWLINE {
+ macro_t *macro = hash_table_find (parser->defines, $2);
+ if (macro) {
+ /* XXX: Need hash table to support a real way
+ * to remove an element rather than prefixing
+ * a new node with data of NULL like this. */
+ hash_table_insert (parser->defines, NULL, $2);
+ talloc_free (macro);
+ }
+ talloc_free ($2);
}
-| IFDEF IDENTIFIER NEWLINE {
- string_list_t *macro = hash_table_find (parser->defines, $2);
+| HASH_IF pp_tokens NEWLINE {
+ token_list_t *expanded;
+ token_t *token;
+
+ expanded = _token_list_create (parser);
+ token = _token_create_ival (parser, IF_EXPANDED, IF_EXPANDED);
+ _token_list_append (expanded, token);
+ talloc_unlink (parser, token);
+ _glcpp_parser_evaluate_defined (parser, $2);
+ _glcpp_parser_expand_token_list (parser, $2);
+ _token_list_append_list (expanded, $2);
+ glcpp_parser_lex_from (parser, expanded);
+ }
+| HASH_IFDEF IDENTIFIER NEWLINE {
+ macro_t *macro = hash_table_find (parser->defines, $2);
talloc_free ($2);
_glcpp_parser_skip_stack_push_if (parser, macro != NULL);
}
-| IFNDEF IDENTIFIER NEWLINE {
- string_list_t *macro = hash_table_find (parser->defines, $2);
+| HASH_IFNDEF IDENTIFIER NEWLINE {
+ macro_t *macro = hash_table_find (parser->defines, $2);
talloc_free ($2);
_glcpp_parser_skip_stack_push_if (parser, macro == NULL);
}
-| ELIF expression NEWLINE {
- _glcpp_parser_skip_stack_change_if (parser, "#elif", $2);
+| HASH_ELIF pp_tokens NEWLINE {
+ token_list_t *expanded;
+ token_t *token;
+
+ expanded = _token_list_create (parser);
+ token = _token_create_ival (parser, ELIF_EXPANDED, ELIF_EXPANDED);
+ _token_list_append (expanded, token);
+ talloc_unlink (parser, token);
+ _glcpp_parser_evaluate_defined (parser, $2);
+ _glcpp_parser_expand_token_list (parser, $2);
+ _token_list_append_list (expanded, $2);
+ glcpp_parser_lex_from (parser, expanded);
}
-| ELSE {
+| HASH_ELSE NEWLINE {
_glcpp_parser_skip_stack_change_if (parser, "else", 1);
}
-| ENDIF {
+| HASH_ENDIF NEWLINE {
_glcpp_parser_skip_stack_pop (parser);
}
-| UNDEF IDENTIFIER {
- string_list_t *macro = hash_table_find (parser->defines, $2);
- if (macro) {
- /* XXX: Need hash table to support a real way
- * to remove an element rather than prefixing
- * a new node with data of NULL like this. */
- hash_table_insert (parser->defines, NULL, $2);
- talloc_free (macro);
- }
- talloc_free ($2);
- }
+| HASH NEWLINE
;
expression:
- INTEGER {
+ INTEGER_STRING {
+ if (strlen ($1) >= 3 && strncmp ($1, "0x", 2) == 0) {
+ $$ = strtoll ($1 + 2, NULL, 16);
+ } else if ($1[0] == '0') {
+ $$ = strtoll ($1, NULL, 8);
+ } else {
+ $$ = strtoll ($1, NULL, 10);
+ }
+ }
+| INTEGER {
$$ = $1;
}
| expression OR expression {
@@ -429,56 +332,105 @@ expression:
| '+' expression %prec UNARY {
$$ = + $2;
}
-| DEFINED IDENTIFIER %prec UNARY {
- string_list_t *macro = hash_table_find (parser->defines, $2);
- talloc_free ($2);
- if (macro)
- $$ = 1;
- else
- $$ = 0;
- }
| '(' expression ')' {
$$ = $2;
}
;
-parameter_list:
- /* empty */ {
- $$ = _string_list_create (parser);
- }
-| IDENTIFIER {
+identifier_list:
+ IDENTIFIER {
$$ = _string_list_create (parser);
_string_list_append_item ($$, $1);
- talloc_free ($1);
+ talloc_steal ($$, $1);
}
-| parameter_list ',' IDENTIFIER {
- _string_list_append_item ($1, $3);
- talloc_free ($3);
- $$ = $1;
+| identifier_list ',' IDENTIFIER {
+ $$ = $1;
+ _string_list_append_item ($$, $3);
+ talloc_steal ($$, $3);
}
;
+text_line:
+ NEWLINE { $$ = NULL; }
+| pp_tokens NEWLINE
+;
+
+non_directive:
+ pp_tokens NEWLINE
+;
+
replacement_list:
- /* empty */ {
+ /* empty */ { $$ = NULL; }
+| pp_tokens
+;
+
+pp_tokens:
+ preprocessing_token {
+ parser->space_tokens = 1;
$$ = _token_list_create (parser);
+ _token_list_append ($$, $1);
+ talloc_unlink (parser, $1);
}
-| pp_tokens {
+| pp_tokens preprocessing_token {
$$ = $1;
+ _token_list_append ($$, $2);
+ talloc_unlink (parser, $2);
}
;
-
-pp_tokens:
- TOKEN {
- $$ = _token_list_create (parser);
- _token_list_append ($$, $1.type, $1.value);
+preprocessing_token:
+ IDENTIFIER {
+ $$ = _token_create_str (parser, IDENTIFIER, $1);
}
-| pp_tokens TOKEN {
- _token_list_append ($1, $2.type, $2.value);
- $$ = $1;
+| INTEGER_STRING {
+ $$ = _token_create_str (parser, INTEGER_STRING, $1);
+ }
+| operator {
+ $$ = _token_create_ival (parser, $1, $1);
+ }
+| OTHER {
+ $$ = _token_create_str (parser, OTHER, $1);
+ }
+| SPACE {
+ $$ = _token_create_ival (parser, SPACE, SPACE);
}
;
+operator:
+ '[' { $$ = '['; }
+| ']' { $$ = ']'; }
+| '(' { $$ = '('; }
+| ')' { $$ = ')'; }
+| '{' { $$ = '{'; }
+| '}' { $$ = '}'; }
+| '.' { $$ = '.'; }
+| '&' { $$ = '&'; }
+| '*' { $$ = '*'; }
+| '+' { $$ = '+'; }
+| '-' { $$ = '-'; }
+| '~' { $$ = '~'; }
+| '!' { $$ = '!'; }
+| '/' { $$ = '/'; }
+| '%' { $$ = '%'; }
+| LEFT_SHIFT { $$ = LEFT_SHIFT; }
+| RIGHT_SHIFT { $$ = RIGHT_SHIFT; }
+| '<' { $$ = '<'; }
+| '>' { $$ = '>'; }
+| LESS_OR_EQUAL { $$ = LESS_OR_EQUAL; }
+| GREATER_OR_EQUAL { $$ = GREATER_OR_EQUAL; }
+| EQUAL { $$ = EQUAL; }
+| NOT_EQUAL { $$ = NOT_EQUAL; }
+| '^' { $$ = '^'; }
+| '|' { $$ = '|'; }
+| AND { $$ = AND; }
+| OR { $$ = OR; }
+| ';' { $$ = ';'; }
+| ',' { $$ = ','; }
+| '=' { $$ = '='; }
+| PASTE { $$ = PASTE; }
+| DEFINED { $$ = DEFINED; }
+;
+
%%
string_list_t *
@@ -512,7 +464,7 @@ _string_list_append_item (string_list_t *list, const char *str)
node = xtalloc (list, string_node_t);
node->str = xtalloc_strdup (node, str);
-
+
node->next = NULL;
if (list->head == NULL) {
@@ -524,6 +476,42 @@ _string_list_append_item (string_list_t *list, const char *str)
list->tail = node;
}
+void
+_string_list_push (string_list_t *list, const char *str)
+{
+ string_node_t *node;
+
+ node = xtalloc (list, string_node_t);
+ node->str = xtalloc_strdup (node, str);
+ node->next = list->head;
+
+ if (list->tail == NULL) {
+ list->tail = node;
+ }
+ list->head = node;
+}
+
+void
+_string_list_pop (string_list_t *list)
+{
+ string_node_t *node;
+
+ node = list->head;
+
+ if (node == NULL) {
+ fprintf (stderr, "Internal error: _string_list_pop called on an empty list.\n");
+ exit (1);
+ }
+
+ list->head = node->next;
+ if (list->tail == node) {
+ assert (node->next == NULL);
+ list->tail = NULL;
+ }
+
+ talloc_free (node);
+}
+
int
_string_list_contains (string_list_t *list, const char *member, int *index)
{
@@ -576,9 +564,6 @@ _argument_list_append (argument_list_t *list, token_list_t *argument)
{
argument_node_t *node;
- if (argument == NULL || argument->head == NULL)
- return;
-
node = xtalloc (list, argument_node_t);
node->argument = argument;
@@ -630,6 +615,31 @@ _argument_list_member_at (argument_list_t *list, int index)
return NULL;
}
+/* Note: This function talloc_steal()s the str pointer. */
+token_t *
+_token_create_str (void *ctx, int type, char *str)
+{
+ token_t *token;
+
+ token = xtalloc (ctx, token_t);
+ token->type = type;
+ token->value.str = talloc_steal (token, str);
+
+ return token;
+}
+
+token_t *
+_token_create_ival (void *ctx, int type, int ival)
+{
+ token_t *token;
+
+ token = xtalloc (ctx, token_t);
+ token->type = type;
+ token->value.ival = ival;
+
+ return token;
+}
+
token_list_t *
_token_list_create (void *ctx)
{
@@ -638,18 +648,18 @@ _token_list_create (void *ctx)
list = xtalloc (ctx, token_list_t);
list->head = NULL;
list->tail = NULL;
+ list->non_space_tail = NULL;
return list;
}
void
-_token_list_append (token_list_t *list, int type, const char *value)
+_token_list_append (token_list_t *list, token_t *token)
{
token_node_t *node;
node = xtalloc (list, token_node_t);
- node->type = type;
- node->value = xtalloc_strdup (list, value);
+ node->token = xtalloc_reference (list, token);
node->next = NULL;
@@ -660,11 +670,16 @@ _token_list_append (token_list_t *list, int type, const char *value)
}
list->tail = node;
+ if (token->type != SPACE)
+ list->non_space_tail = node;
}
void
_token_list_append_list (token_list_t *list, token_list_t *tail)
{
+ if (tail == NULL || tail->head == NULL)
+ return;
+
if (list->head == NULL) {
list->head = tail->head;
} else {
@@ -672,8 +687,191 @@ _token_list_append_list (token_list_t *list, token_list_t *tail)
}
list->tail = tail->tail;
+ list->non_space_tail = tail->non_space_tail;
+}
+
+token_list_t *
+_token_list_copy (void *ctx, token_list_t *other)
+{
+ token_list_t *copy;
+ token_node_t *node;
+
+ if (other == NULL)
+ return NULL;
+
+ copy = _token_list_create (ctx);
+ for (node = other->head; node; node = node->next)
+ _token_list_append (copy, node->token);
+
+ return copy;
}
-
+
+void
+_token_list_trim_trailing_space (token_list_t *list)
+{
+ token_node_t *tail, *next;
+
+ if (list->non_space_tail) {
+ tail = list->non_space_tail->next;
+ list->non_space_tail->next = NULL;
+ list->tail = list->non_space_tail;
+
+ while (tail) {
+ next = tail->next;
+ talloc_free (tail);
+ tail = next;
+ }
+ }
+}
+
+static void
+_token_print (token_t *token)
+{
+ if (token->type < 256) {
+ printf ("%c", token->type);
+ return;
+ }
+
+ switch (token->type) {
+ case INTEGER:
+ printf ("%" PRIxMAX, token->value.ival);
+ break;
+ case IDENTIFIER:
+ case INTEGER_STRING:
+ case OTHER:
+ printf ("%s", token->value.str);
+ break;
+ case SPACE:
+ printf (" ");
+ break;
+ case LEFT_SHIFT:
+ printf ("<<");
+ break;
+ case RIGHT_SHIFT:
+ printf (">>");
+ break;
+ case LESS_OR_EQUAL:
+ printf ("<=");
+ break;
+ case GREATER_OR_EQUAL:
+ printf (">=");
+ break;
+ case EQUAL:
+ printf ("==");
+ break;
+ case NOT_EQUAL:
+ printf ("!=");
+ break;
+ case AND:
+ printf ("&&");
+ break;
+ case OR:
+ printf ("||");
+ break;
+ case PASTE:
+ printf ("##");
+ break;
+ case COMMA_FINAL:
+ printf (",");
+ break;
+ case PLACEHOLDER:
+ /* Nothing to print. */
+ break;
+ default:
+ fprintf (stderr, "Error: Don't know how to print token type %d\n", token->type);
+ break;
+ }
+}
+
+/* Return a new token (talloc()ed off of 'token') formed by pasting
+ * 'token' and 'other'. Note that this function may return 'token' or
+ * 'other' directly rather than allocating anything new.
+ *
+ * Caution: Only very cursory error-checking is performed to see if
+ * the final result is a valid single token. */
+static token_t *
+_token_paste (token_t *token, token_t *other)
+{
+ /* Pasting a placeholder onto anything makes no change. */
+ if (other->type == PLACEHOLDER)
+ return token;
+
+ /* When 'token' is a placeholder, just return 'other'. */
+ if (token->type == PLACEHOLDER)
+ return other;
+
+ /* A very few single-character punctuators can be combined
+ * with another to form a multi-character punctuator. */
+ switch (token->type) {
+ case '<':
+ if (other->type == '<')
+ return _token_create_ival (token, LEFT_SHIFT, LEFT_SHIFT);
+ else if (other->type == '=')
+ return _token_create_ival (token, LESS_OR_EQUAL, LESS_OR_EQUAL);
+ break;
+ case '>':
+ if (other->type == '>')
+ return _token_create_ival (token, RIGHT_SHIFT, RIGHT_SHIFT);
+ else if (other->type == '=')
+ return _token_create_ival (token, GREATER_OR_EQUAL, GREATER_OR_EQUAL);
+ break;
+ case '=':
+ if (other->type == '=')
+ return _token_create_ival (token, EQUAL, EQUAL);
+ break;
+ case '!':
+ if (other->type == '=')
+ return _token_create_ival (token, NOT_EQUAL, NOT_EQUAL);
+ break;
+ case '&':
+ if (other->type == '&')
+ return _token_create_ival (token, AND, AND);
+ break;
+ case '|':
+ if (other->type == '|')
+ return _token_create_ival (token, OR, OR);
+ break;
+ }
+
+ /* Two string-valued tokens can usually just be mashed
+ * together.
+ *
+ * XXX: This isn't actually legitimate. Several things here
+ * should result in a diagnostic since the result cannot be a
+ * valid, single pre-processing token. For example, pasting
+ * "123" and "abc" is not legal, but we don't catch that
+ * here. */
+ if ((token->type == IDENTIFIER || token->type == OTHER || token->type == INTEGER_STRING) &&
+ (other->type == IDENTIFIER || other->type == OTHER || other->type == INTEGER_STRING))
+ {
+ char *str;
+
+ str = xtalloc_asprintf (token, "%s%s",
+ token->value.str, other->value.str);
+ return _token_create_str (token, token->type, str);
+ }
+
+ printf ("Error: Pasting \"");
+ _token_print (token);
+ printf ("\" and \"");
+ _token_print (other);
+ printf ("\" does not give a valid preprocessing token.\n");
+
+ return token;
+}
+
+static void
+_token_list_print (token_list_t *list)
+{
+ token_node_t *node;
+
+ if (list == NULL)
+ return;
+
+ for (node = list->head; node; node = node->next)
+ _token_print (node->token);
+}
+
void
yyerror (void *scanner, const char *error)
{
@@ -690,13 +888,17 @@ glcpp_parser_create (void)
glcpp_lex_init_extra (parser, &parser->scanner);
parser->defines = hash_table_ctor (32, hash_table_string_hash,
hash_table_string_compare);
- parser->expansions = NULL;
-
- parser->just_printed_separator = 1;
- parser->need_newline = 0;
+ parser->active = _string_list_create (parser);
+ parser->space_tokens = 1;
+ parser->newline_as_space = 0;
+ parser->in_control_line = 0;
+ parser->paren_count = 0;
parser->skip_stack = NULL;
+ parser->lex_from_list = NULL;
+ parser->lex_from_node = NULL;
+
return parser;
}
@@ -709,8 +911,6 @@ glcpp_parser_parse (glcpp_parser_t *parser)
void
glcpp_parser_destroy (glcpp_parser_t *parser)
{
- if (parser->need_newline)
- printf ("\n");
if (parser->skip_stack)
fprintf (stderr, "Error: Unterminated #if\n");
glcpp_lex_destroy (parser->scanner);
@@ -718,247 +918,577 @@ glcpp_parser_destroy (glcpp_parser_t *parser)
talloc_free (parser);
}
-static int
-glcpp_parser_is_expanding (glcpp_parser_t *parser, const char *member)
+/* Replace any occurences of DEFINED tokens in 'list' with either a
+ * '0' or '1' INTEGER token depending on whether the next token in the
+ * list is defined or not. */
+static void
+_glcpp_parser_evaluate_defined (glcpp_parser_t *parser,
+ token_list_t *list)
{
- expansion_node_t *node;
+ token_node_t *node, *next;
+ macro_t *macro;
+
+ if (list == NULL)
+ return;
- for (node = parser->expansions; node; node = node->next) {
- if (node->macro &&
- strcmp (node->macro->identifier, member) == 0)
+ for (node = list->head; node; node = node->next) {
+ if (node->token->type != DEFINED)
+ continue;
+ next = node->next;
+ while (next && next->token->type == SPACE)
+ next = next->next;
+ if (next == NULL || next->token->type != IDENTIFIER) {
+ fprintf (stderr, "Error: operator \"defined\" requires an identifier\n");
+ exit (1);
+ }
+ macro = hash_table_find (parser->defines,
+ next->token->value.str);
+
+ node->token->type = INTEGER;
+ node->token->value.ival = (macro != NULL);
+ node->next = next->next;
+ }
+}
+
+typedef enum function_status
+{
+ FUNCTION_STATUS_SUCCESS,
+ FUNCTION_NOT_A_FUNCTION,
+ FUNCTION_UNBALANCED_PARENTHESES
+} function_status_t;
+
+/* Find a set of function-like macro arguments by looking for a
+ * balanced set of parentheses.
+ *
+ * When called, 'node' should be the opening-parenthesis token, (or
+ * perhaps preceeding SPACE tokens). Upon successful return *last will
+ * be the last consumed node, (corresponding to the closing right
+ * parenthesis).
+ *
+ * Return values:
+ *
+ * FUNCTION_STATUS_SUCCESS:
+ *
+ * Successfully parsed a set of function arguments.
+ *
+ * FUNCTION_NOT_A_FUNCTION:
+ *
+ * Macro name not followed by a '('. This is not an error, but
+ * simply that the macro name should be treated as a non-macro.
+ *
+ * FUNCTION_UNBLANCED_PARENTHESES
+ *
+ * Macro name is not followed by a balanced set of parentheses.
+ */
+static function_status_t
+_arguments_parse (argument_list_t *arguments,
+ token_node_t *node,
+ token_node_t **last)
+{
+ token_list_t *argument;
+ int paren_count;
+
+ node = node->next;
+
+ /* Ignore whitespace before first parenthesis. */
+ while (node && node->token->type == SPACE)
+ node = node->next;
+
+ if (node == NULL || node->token->type != '(')
+ return FUNCTION_NOT_A_FUNCTION;
+
+ node = node->next;
+
+ argument = _token_list_create (arguments);
+ _argument_list_append (arguments, argument);
+
+ for (paren_count = 1; node; node = node->next) {
+ if (node->token->type == '(')
{
- return 1;
+ paren_count++;
+ }
+ else if (node->token->type == ')')
+ {
+ paren_count--;
+ if (paren_count == 0)
+ break;
+ }
+
+ if (node->token->type == ',' &&
+ paren_count == 1)
+ {
+ _token_list_trim_trailing_space (argument);
+ argument = _token_list_create (arguments);
+ _argument_list_append (arguments, argument);
+ }
+ else {
+ if (argument->head == NULL) {
+ /* Don't treat initial whitespace as
+ * part of the arguement. */
+ if (node->token->type == SPACE)
+ continue;
+ }
+ _token_list_append (argument, node->token);
}
}
- return 0;
+ if (paren_count)
+ return FUNCTION_UNBALANCED_PARENTHESES;
+
+ *last = node;
+
+ return FUNCTION_STATUS_SUCCESS;
}
-token_class_t
-glcpp_parser_classify_token (glcpp_parser_t *parser,
- const char *identifier,
- int *parameter_index)
+/* This is a helper function that's essentially part of the
+ * implementation of _glcpp_parser_expand_node. It shouldn't be called
+ * except for by that function.
+ *
+ * Returns NULL if node is a simple token with no expansion, (that is,
+ * although 'node' corresponds to an identifier defined as a
+ * function-like macro, it is not followed with a parenthesized
+ * argument list).
+ *
+ * Compute the complete expansion of node (which is a function-like
+ * macro) and subsequent nodes which are arguments.
+ *
+ * Returns the token list that results from the expansion and sets
+ * *last to the last node in the list that was consumed by the
+ * expansion. Specificallty, *last will be set as follows: as the
+ * token of the closing right parenthesis.
+ */
+static token_list_t *
+_glcpp_parser_expand_function (glcpp_parser_t *parser,
+ token_node_t *node,
+ token_node_t **last)
+
{
macro_t *macro;
+ const char *identifier;
+ argument_list_t *arguments;
+ function_status_t status;
+ token_list_t *substituted;
+ int parameter_index;
+
+ identifier = node->token->value.str;
- /* Is this token a defined macro? */
macro = hash_table_find (parser->defines, identifier);
- if (macro == NULL)
- return TOKEN_CLASS_IDENTIFIER;
+ assert (macro->is_function);
- /* Don't consider this a macro if we are already actively
- * expanding this macro. */
- if (glcpp_parser_is_expanding (parser, identifier))
- return TOKEN_CLASS_IDENTIFIER_FINALIZED;
+ arguments = _argument_list_create (parser);
+ status = _arguments_parse (arguments, node, last);
- /* Definitely a macro. Just need to check if it's function-like. */
- if (macro->is_function)
- return TOKEN_CLASS_FUNC_MACRO;
- else
- return TOKEN_CLASS_OBJ_MACRO;
-}
+ switch (status) {
+ case FUNCTION_STATUS_SUCCESS:
+ break;
+ case FUNCTION_NOT_A_FUNCTION:
+ return NULL;
+ case FUNCTION_UNBALANCED_PARENTHESES:
+ return NULL;
+ }
-void
-_define_object_macro (glcpp_parser_t *parser,
- const char *identifier,
- token_list_t *replacements)
-{
- macro_t *macro;
+ if (macro->replacements == NULL) {
+ talloc_free (arguments);
+ return _token_list_create (parser);
+ }
- macro = xtalloc (parser, macro_t);
+ if (! ((_argument_list_length (arguments) ==
+ _string_list_length (macro->parameters)) ||
+ (_string_list_length (macro->parameters) == 0 &&
+ _argument_list_length (arguments) == 1 &&
+ arguments->head->argument->head == NULL)))
+ {
+ fprintf (stderr,
+ "Error: macro %s invoked with %d arguments (expected %d)\n",
+ identifier,
+ _argument_list_length (arguments),
+ _string_list_length (macro->parameters));
+ return NULL;
+ }
- macro->is_function = 0;
- macro->parameters = NULL;
- macro->identifier = talloc_strdup (macro, identifier);
- macro->replacements = talloc_steal (macro, replacements);
+ /* Perform argument substitution on the replacement list. */
+ substituted = _token_list_create (arguments);
- hash_table_insert (parser->defines, macro, identifier);
+ for (node = macro->replacements->head; node; node = node->next)
+ {
+ if (node->token->type == IDENTIFIER &&
+ _string_list_contains (macro->parameters,
+ node->token->value.str,
+ &parameter_index))
+ {
+ token_list_t *argument;
+ argument = _argument_list_member_at (arguments,
+ parameter_index);
+ /* Before substituting, we expand the argument
+ * tokens, or append a placeholder token for
+ * an empty argument. */
+ if (argument->head) {
+ _glcpp_parser_expand_token_list (parser,
+ argument);
+ _token_list_append_list (substituted, argument);
+ } else {
+ token_t *new_token;
+
+ new_token = _token_create_ival (substituted,
+ PLACEHOLDER,
+ PLACEHOLDER);
+ _token_list_append (substituted, new_token);
+ }
+ } else {
+ _token_list_append (substituted, node->token);
+ }
+ }
+
+ /* After argument substitution, and before further expansion
+ * below, implement token pasting. */
+
+ _token_list_trim_trailing_space (substituted);
+
+ node = substituted->head;
+ while (node)
+ {
+ token_node_t *next_non_space;
+
+ /* Look ahead for a PASTE token, skipping space. */
+ next_non_space = node->next;
+ while (next_non_space && next_non_space->token->type == SPACE)
+ next_non_space = next_non_space->next;
+
+ if (next_non_space == NULL)
+ break;
+
+ if (next_non_space->token->type != PASTE) {
+ node = next_non_space;
+ continue;
+ }
+
+ /* Now find the next non-space token after the PASTE. */
+ next_non_space = next_non_space->next;
+ while (next_non_space && next_non_space->token->type == SPACE)
+ next_non_space = next_non_space->next;
+
+ if (next_non_space == NULL) {
+ fprintf (stderr, "Error: '##' cannot appear at either end of a macro expansion\n");
+ return NULL;
+ }
+
+ node->token = _token_paste (node->token, next_non_space->token);
+ node->next = next_non_space->next;
+ if (next_non_space == substituted->tail)
+ substituted->tail = node;
+
+ node = node->next;
+ }
+
+ substituted->non_space_tail = substituted->tail;
+
+ _string_list_push (parser->active, identifier);
+ _glcpp_parser_expand_token_list (parser, substituted);
+ _string_list_pop (parser->active);
+
+ return substituted;
}
-void
-_define_function_macro (glcpp_parser_t *parser,
- const char *identifier,
- string_list_t *parameters,
- token_list_t *replacements)
+/* Compute the complete expansion of node, (and subsequent nodes after
+ * 'node' in the case that 'node' is a function-like macro and
+ * subsequent nodes are arguments).
+ *
+ * Returns NULL if node is a simple token with no expansion.
+ *
+ * Otherwise, returns the token list that results from the expansion
+ * and sets *last to the last node in the list that was consumed by
+ * the expansion. Specificallty, *last will be set as follows:
+ *
+ * As 'node' in the case of object-like macro expansion.
+ *
+ * As the token of the closing right parenthesis in the case of
+ * function-like macro expansion.
+ */
+static token_list_t *
+_glcpp_parser_expand_node (glcpp_parser_t *parser,
+ token_node_t *node,
+ token_node_t **last)
{
+ token_t *token = node->token;
+ const char *identifier;
macro_t *macro;
+ token_list_t *expansion;
+
+ /* We only expand identifiers */
+ if (token->type != IDENTIFIER) {
+ /* We change any COMMA into a COMMA_FINAL to prevent
+ * it being mistaken for an argument separator
+ * later. */
+ if (token->type == ',') {
+ token->type = COMMA_FINAL;
+ token->value.ival = COMMA_FINAL;
+ }
- macro = xtalloc (parser, macro_t);
+ return NULL;
+ }
- macro->is_function = 1;
- macro->parameters = talloc_steal (macro, parameters);
- macro->identifier = talloc_strdup (macro, identifier);
- macro->replacements = talloc_steal (macro, replacements);
+ /* Look up this identifier in the hash table. */
+ identifier = token->value.str;
+ macro = hash_table_find (parser->defines, identifier);
- hash_table_insert (parser->defines, macro, identifier);
+ /* Not a macro, so no expansion needed. */
+ if (macro == NULL)
+ return NULL;
+
+ /* Finally, don't expand this macro if we're already actively
+ * expanding it, (to avoid infinite recursion). */
+ if (_string_list_contains (parser->active, identifier, NULL)) {
+ /* We change the token type here from IDENTIFIER to
+ * OTHER to prevent any future expansion of this
+ * unexpanded token. */
+ char *str;
+ token_list_t *expansion;
+ token_t *final;
+
+ str = xtalloc_strdup (parser, token->value.str);
+ final = _token_create_str (parser, OTHER, str);
+ expansion = _token_list_create (parser);
+ _token_list_append (expansion, final);
+ *last = node;
+ return expansion;
+ }
+
+ if (! macro->is_function)
+ {
+ *last = node;
+
+ if (macro->replacements == NULL)
+ return _token_list_create (parser);
+
+ expansion = _token_list_copy (parser, macro->replacements);
+
+ _string_list_push (parser->active, identifier);
+ _glcpp_parser_expand_token_list (parser, expansion);
+ _string_list_pop (parser->active);
+
+ return expansion;
+ }
+
+ return _glcpp_parser_expand_function (parser, node, last);
}
+/* Walk over the token list replacing nodes with their expansion.
+ * Whenever nodes are expanded the walking will walk over the new
+ * nodes, continuing to expand as necessary. The results are placed in
+ * 'list' itself;
+ */
static void
-_glcpp_parser_push_expansion (glcpp_parser_t *parser,
- macro_t *macro,
- token_node_t *replacements)
+_glcpp_parser_expand_token_list (glcpp_parser_t *parser,
+ token_list_t *list)
{
- expansion_node_t *node;
+ token_node_t *node_prev;
+ token_node_t *node, *last;
+ token_list_t *expansion;
- node = xtalloc (parser, expansion_node_t);
+ if (list == NULL)
+ return;
- node->macro = macro;
- node->replacements = replacements;
+ _token_list_trim_trailing_space (list);
- node->next = parser->expansions;
- parser->expansions = node;
+ node_prev = NULL;
+ node = list->head;
+
+ while (node) {
+ /* Find the expansion for node, which will replace all
+ * nodes from node to last, inclusive. */
+ expansion = _glcpp_parser_expand_node (parser, node, &last);
+ if (expansion) {
+ /* Splice expansion into list, supporting a
+ * simple deletion if the expansion is
+ * empty. */
+ if (expansion->head) {
+ if (node_prev)
+ node_prev->next = expansion->head;
+ else
+ list->head = expansion->head;
+ expansion->tail->next = last->next;
+ if (last == list->tail)
+ list->tail = expansion->tail;
+ } else {
+ if (node_prev)
+ node_prev->next = last->next;
+ else
+ list->head = last->next;
+ if (last == list->tail)
+ list->tail == NULL;
+ }
+ } else {
+ node_prev = node;
+ }
+ node = node_prev ? node_prev->next : list->head;
+ }
+
+ list->non_space_tail = list->tail;
}
static void
-glcpp_parser_pop_expansion (glcpp_parser_t *parser)
+_glcpp_parser_expand_token_list_onto (glcpp_parser_t *parser,
+ token_list_t *list,
+ token_list_t *result)
{
- expansion_node_t *node;
+ _glcpp_parser_expand_token_list (parser, list);
- node = parser->expansions;
+ _token_list_append_list (result, list);
+}
- if (node == NULL) {
- fprintf (stderr, "Internal error: _expansion_list_pop called on an empty list.\n");
- exit (1);
- }
+void
+_glcpp_parser_print_expanded_token_list (glcpp_parser_t *parser,
+ token_list_t *list)
+{
+ if (list == NULL)
+ return;
- parser->expansions = node->next;
+ _glcpp_parser_expand_token_list (parser, list);
- talloc_free (node);
+ _token_list_trim_trailing_space (list);
+
+ _token_list_print (list);
}
void
-_expand_object_macro (glcpp_parser_t *parser, const char *identifier)
+_define_object_macro (glcpp_parser_t *parser,
+ const char *identifier,
+ token_list_t *replacements)
{
macro_t *macro;
- macro = hash_table_find (parser->defines, identifier);
- assert (! macro->is_function);
- assert (! glcpp_parser_is_expanding (parser, identifier));
+ macro = xtalloc (parser, macro_t);
- _glcpp_parser_push_expansion (parser, macro, macro->replacements->head);
+ macro->is_function = 0;
+ macro->parameters = NULL;
+ macro->identifier = talloc_strdup (macro, identifier);
+ macro->replacements = talloc_steal (macro, replacements);
+
+ hash_table_insert (parser->defines, macro, identifier);
}
void
-_expand_function_macro (glcpp_parser_t *parser,
+_define_function_macro (glcpp_parser_t *parser,
const char *identifier,
- argument_list_t *arguments)
+ string_list_t *parameters,
+ token_list_t *replacements)
{
macro_t *macro;
- token_list_t *expanded;
- token_node_t *i, *j;
- int parameter_index;
-
- macro = hash_table_find (parser->defines, identifier);
- assert (macro->is_function);
- assert (! glcpp_parser_is_expanding (parser, identifier));
-
- if (_argument_list_length (arguments) !=
- _string_list_length (macro->parameters))
- {
- fprintf (stderr,
- "Error: macro %s invoked with %d arguments (expected %d)\n",
- identifier,
- _argument_list_length (arguments),
- _string_list_length (macro->parameters));
- return;
- }
- expanded = _token_list_create (macro);
+ macro = xtalloc (parser, macro_t);
- for (i = macro->replacements->head; i; i = i->next) {
- if (_string_list_contains (macro->parameters, i->value,
- &parameter_index))
- {
- token_list_t *argument;
- argument = _argument_list_member_at (arguments,
- parameter_index);
- for (j = argument->head; j; j = j->next)
- {
- _token_list_append (expanded, j->type,
- j->value);
- }
- } else {
- _token_list_append (expanded, i->type, i->value);
- }
- }
+ macro->is_function = 1;
+ macro->parameters = talloc_steal (macro, parameters);
+ macro->identifier = talloc_strdup (macro, identifier);
+ macro->replacements = talloc_steal (macro, replacements);
- _glcpp_parser_push_expansion (parser, macro, expanded->head);
+ hash_table_insert (parser->defines, macro, identifier);
}
static int
glcpp_parser_lex (glcpp_parser_t *parser)
{
- expansion_node_t *expansion;
- token_node_t *replacements;
- int parameter_index;
- const char *token;
- token_class_t class;
-
- /* Who says C can't do efficient tail recursion? */
- RECURSE:
-
- expansion = parser->expansions;
+ token_node_t *node;
+ int ret;
+
+ if (parser->lex_from_list == NULL) {
+ ret = glcpp_lex (parser->scanner);
+
+ /* XXX: This ugly block of code exists for the sole
+ * purpose of converting a NEWLINE token into a SPACE
+ * token, but only in the case where we have seen a
+ * function-like macro name, but have not yet seen its
+ * closing parenthesis.
+ *
+ * There's perhaps a more compact way to do this with
+ * mid-rule actions in the grammar.
+ *
+ * I'm definitely not pleased with the complexity of
+ * this code here.
+ */
+ if (parser->newline_as_space)
+ {
+ if (ret == '(') {
+ parser->paren_count++;
+ } else if (ret == ')') {
+ parser->paren_count--;
+ if (parser->paren_count == 0)
+ parser->newline_as_space = 0;
+ } else if (ret == NEWLINE) {
+ ret = SPACE;
+ } else if (ret != SPACE) {
+ if (parser->paren_count == 0)
+ parser->newline_as_space = 0;
+ }
+ }
+ else if (parser->in_control_line)
+ {
+ if (ret == NEWLINE)
+ parser->in_control_line = 0;
+ }
+ else if (ret == HASH_DEFINE_OBJ || ret == HASH_DEFINE_FUNC ||
+ ret == HASH_UNDEF || ret == HASH_IF ||
+ ret == HASH_IFDEF || ret == HASH_IFNDEF ||
+ ret == HASH_ELIF || ret == HASH_ELSE ||
+ ret == HASH_ENDIF || ret == HASH)
+ {
+ parser->in_control_line = 1;
+ }
+ else if (ret == IDENTIFIER)
+ {
+ macro_t *macro;
+ macro = hash_table_find (parser->defines,
+ yylval.str);
+ if (macro && macro->is_function) {
+ parser->newline_as_space = 1;
+ parser->paren_count = 0;
+ }
+ }
- if (expansion == NULL)
- return glcpp_lex (parser->scanner);
+ return ret;
+ }
- replacements = expansion->replacements;
+ node = parser->lex_from_node;
- /* Pop expansion when replacements is exhausted. */
- if (replacements == NULL) {
- glcpp_parser_pop_expansion (parser);
- goto RECURSE;
+ if (node == NULL) {
+ talloc_free (parser->lex_from_list);
+ parser->lex_from_list = NULL;
+ return NEWLINE;
}
- expansion->replacements = replacements->next;
-
- token = replacements->value;
+ yylval = node->token->value;
+ ret = node->token->type;
- /* Implement token pasting. */
- if (replacements->next && strcmp (replacements->next->value, "##") == 0) {
- token_node_t *next_node;
+ parser->lex_from_node = node->next;
- next_node = replacements->next->next;
+ return ret;
+}
- if (next_node == NULL) {
- fprintf (stderr, "Error: '##' cannot appear at the end of a macro expansion.\n");
- exit (1);
- }
+static void
+glcpp_parser_lex_from (glcpp_parser_t *parser, token_list_t *list)
+{
+ token_node_t *node;
- token = xtalloc_asprintf (parser, "%s%s",
- token, next_node->value);
- expansion->replacements = next_node->next;
- }
+ assert (parser->lex_from_list == NULL);
+ /* Copy list, eliminating any space tokens. */
+ parser->lex_from_list = _token_list_create (parser);
- if (strcmp (token, "(") == 0)
- return '(';
- else if (strcmp (token, ")") == 0)
- return ')';
+ for (node = list->head; node; node = node->next) {
+ if (node->token->type == SPACE)
+ continue;
+ _token_list_append (parser->lex_from_list, node->token);
+ }
- yylval.str = xtalloc_strdup (parser, token);
+ talloc_free (list);
- /* Carefully refuse to expand any finalized identifier. */
- if (replacements->type == IDENTIFIER_FINALIZED)
- return IDENTIFIER_FINALIZED;
+ parser->lex_from_node = parser->lex_from_list->head;
- switch (glcpp_parser_classify_token (parser, yylval.str,
- &parameter_index))
- {
- case TOKEN_CLASS_IDENTIFIER:
- return IDENTIFIER;
- break;
- case TOKEN_CLASS_IDENTIFIER_FINALIZED:
- return IDENTIFIER_FINALIZED;
- break;
- case TOKEN_CLASS_FUNC_MACRO:
- return FUNC_MACRO;
- break;
- default:
- case TOKEN_CLASS_OBJ_MACRO:
- return OBJ_MACRO;
- break;
+ /* It's possible the list consisted of nothing but whitespace. */
+ if (parser->lex_from_node == NULL) {
+ talloc_free (parser->lex_from_list);
+ parser->lex_from_list = NULL;
}
}
@@ -1002,7 +1532,7 @@ _glcpp_parser_skip_stack_change_if (glcpp_parser_t *parser, const char *type,
parser->skip_stack->type = SKIP_TO_ENDIF;
}
}
-
+
static void
_glcpp_parser_skip_stack_pop (glcpp_parser_t *parser)
{
diff --git a/glcpp.h b/glcpp.h
index 503731b85b..5c8c304a9c 100644
--- a/glcpp.h
+++ b/glcpp.h
@@ -44,21 +44,36 @@ typedef struct string_list {
string_node_t *tail;
} string_list_t;
-typedef struct token {
+typedef struct token token_t;
+typedef struct token_list token_list_t;
+
+typedef union YYSTYPE
+{
+ intmax_t ival;
+ char *str;
+ string_list_t *string_list;
+ token_t *token;
+ token_list_t *token_list;
+} YYSTYPE;
+
+# define YYSTYPE_IS_TRIVIAL 1
+# define YYSTYPE_IS_DECLARED 1
+
+struct token {
int type;
- char *value;
-} token_t;
+ YYSTYPE value;
+};
typedef struct token_node {
- int type;
- const char *value;
+ token_t *token;
struct token_node *next;
} token_node_t;
-typedef struct token_list {
+struct token_list {
token_node_t *head;
token_node_t *tail;
-} token_list_t;
+ token_node_t *non_space_tail;
+};
typedef struct argument_node {
token_list_t *argument;
@@ -111,16 +126,16 @@ typedef struct skip_node {
struct glcpp_parser {
yyscan_t scanner;
struct hash_table *defines;
- expansion_node_t *expansions;
- int just_printed_separator;
- int need_newline;
+ string_list_t *active;
+ int space_tokens;
+ int newline_as_space;
+ int in_control_line;
+ int paren_count;
skip_node_t *skip_stack;
+ token_list_t *lex_from_list;
+ token_node_t *lex_from_node;
};
-void
-glcpp_parser_push_expansion_argument (glcpp_parser_t *parser,
- int argument_index);
-
glcpp_parser_t *
glcpp_parser_create (void);
@@ -164,4 +179,10 @@ xtalloc_strndup (const void *t, const char *p, size_t n);
char *
xtalloc_asprintf (const void *t, const char *fmt, ...);
+void *
+_xtalloc_reference_loc (const void *context,
+ const void *ptr, const char *location);
+
+#define xtalloc_reference(ctx, ptr) (_TALLOC_TYPEOF(ptr))_xtalloc_reference_loc((ctx),(ptr), __location__)
+
#endif
diff --git a/tests/glcpp-test b/tests/glcpp-test
index 022a236712..ba398af0d5 100755
--- a/tests/glcpp-test
+++ b/tests/glcpp-test
@@ -2,8 +2,9 @@
for test in *.c; do
echo "Testing $test"
- ../glcpp < $test > $test.out
+ ../glcpp < $test > $test.glcpp
+ grep -v '^$' < $test.glcpp > $test.out || true
gcc -E $test -o $test.gcc
- grep -v '^#' < $test.gcc > $test.expected
- diff -B -u $test.expected $test.out
+ grep -v '^#' < $test.gcc | grep -v '^$' > $test.expected || true
+ diff -u $test.expected $test.out
done
diff --git a/xtalloc.c b/xtalloc.c
index e52d12ac6b..656ac2d6cb 100644
--- a/xtalloc.c
+++ b/xtalloc.c
@@ -82,3 +82,18 @@ xtalloc_asprintf (const void *t, const char *fmt, ...)
va_end(ap);
return ret;
}
+
+void *
+_xtalloc_reference_loc (const void *context,
+ const void *ptr, const char *location)
+{
+ void *ret;
+
+ ret = _talloc_reference_loc (context, ptr, location);
+ if (ret == NULL) {
+ fprintf (stderr, "Out of memory.\n");
+ exit (1);
+ }
+
+ return ret;
+}