diff options
author | Carl Worth <cworth@cworth.org> | 2010-05-29 06:03:32 -0700 |
---|---|---|
committer | Carl Worth <cworth@cworth.org> | 2010-05-29 06:03:40 -0700 |
commit | 96d3994881832201db7edd8a0a6f4b34655649d3 (patch) | |
tree | 2f599ca455e3fa16e0488516708e9871f07bbdb9 /glcpp-lex.l | |
parent | ae3fb09cd20fc189d68f0c2a63cc74dd584d7ee1 (diff) | |
parent | 75ef1c75dd47a0b4054a767fd94f7c3cf68d2331 (diff) |
Merge branch 'take-2'
The take-2 branch started over with a new grammar based directly on
the grammar from the C99 specification. It doesn't try to capture
things like balanced sets of parentheses for macro arguments in the
grammar. Instead, it merely captures things as token lists and then
performs operations like parsing arguments and expanding macros on
those lists.
We merge it here since it's currently behaving better, (passing the
entire test suite). But the code base has proven quite fragile
really. Several of the recently added test cases required additional
special cases in the take-2 branch while working trivially on master.
So this merge point may be useful in the future, since we might have a
cleaner code base by coming back to the state before this merge and
fixing it, rather than accepting all the fragile
imperative/list-munging code from the take-2 branch.
Diffstat (limited to 'glcpp-lex.l')
-rw-r--r-- | glcpp-lex.l | 222 |
1 files changed, 64 insertions, 158 deletions
diff --git a/glcpp-lex.l b/glcpp-lex.l index ee1f6e3aee..52269c6b30 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -32,21 +32,14 @@ %option reentrant noyywrap %option extra-type="glcpp_parser_t *" -%x ST_DEFINE -%x ST_DEFINE_OBJ_OR_FUNC -%x ST_DEFINE_PARAMETER -%x ST_DEFINE_VALUE -%x ST_IF -%x ST_UNDEF -%x ST_UNDEF_END - SPACE [[:space:]] NONSPACE [^[:space:]] NEWLINE [\n] HSPACE [ \t] HASH ^{HSPACE}*#{HSPACE}* IDENTIFIER [_a-zA-Z][_a-zA-Z0-9]* -TOKEN [^[:space:](),]+ +PUNCTUATION [][(){}.&*~!/%<>^|;,=+-] +OTHER [^][(){}.&*~!/%<>^|;,=#[:space:]+-]+ DECIMAL_INTEGER [1-9][0-9]*[uU]? OCTAL_INTEGER 0[0-7]*[uU]? @@ -54,210 +47,123 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? %% -{HASH}if{HSPACE}* { - BEGIN ST_IF; - return IF; +{HASH}define{HSPACE}+/{IDENTIFIER}"(" { + yyextra->space_tokens = 0; + return HASH_DEFINE_FUNC; } -{HASH}elif{HSPACE}* { - BEGIN ST_IF; - return ELIF; +{HASH}define { + yyextra->space_tokens = 0; + return HASH_DEFINE_OBJ; } -<ST_IF>{DECIMAL_INTEGER} { - yylval.ival = strtoll (yytext, NULL, 10); - return INTEGER; +{HASH}undef { + yyextra->space_tokens = 0; + return HASH_UNDEF; } -<ST_IF>{OCTAL_INTEGER} { - yylval.ival = strtoll (yytext + 1, NULL, 8); - return INTEGER; +{HASH}if { + yyextra->space_tokens = 0; + return HASH_IF; } -<ST_IF>{HEXADECIMAL_INTEGER} { - yylval.ival = strtoll (yytext + 2, NULL, 16); - return INTEGER; +{HASH}elif { + yyextra->space_tokens = 0; + return HASH_ELIF; } -<ST_IF>"defined" { - return DEFINED; +{HASH}else { + yyextra->space_tokens = 0; + return HASH_ELSE; } -<ST_IF>"<<" { - return LEFT_SHIFT; +{HASH}endif { + yyextra->space_tokens = 0; + return HASH_ENDIF; } -<ST_IF>">>" { - return RIGHT_SHIFT; +{HASH} { + yyextra->space_tokens = 0; + return HASH; } -<ST_IF>"<=" { - return LESS_OR_EQUAL; -} - -<ST_IF>">=" { - return GREATER_OR_EQUAL; -} - -<ST_IF>"==" { - return EQUAL; -} - -<ST_IF>"!=" { - return NOT_EQUAL; -} - -<ST_IF>"&&" { - return AND; -} - -<ST_IF>"||" { - return OR; -} - -<ST_IF>[-+*/%<>&^|()~] { - return yytext[0]; +{DECIMAL_INTEGER} { + yylval.str = xtalloc_strdup (yyextra, yytext); + return INTEGER_STRING; } -<ST_IF>{IDENTIFIER} { +{OCTAL_INTEGER} { yylval.str = xtalloc_strdup (yyextra, yytext); - return IDENTIFIER; + return INTEGER_STRING; } -<ST_IF>{HSPACE}+ - -<ST_IF>\n { - BEGIN INITIAL; - return NEWLINE; +{HEXADECIMAL_INTEGER} { + yylval.str = xtalloc_strdup (yyextra, yytext); + return INTEGER_STRING; } -{HASH}endif{HSPACE}* { - return ENDIF; +"<<" { + return LEFT_SHIFT; } -{HASH}else{HSPACE}* { - return ELSE; +">>" { + return RIGHT_SHIFT; } -{HASH}undef{HSPACE}* { - BEGIN ST_UNDEF; - return UNDEF; +"<=" { + return LESS_OR_EQUAL; } -<ST_UNDEF>{IDENTIFIER} { - BEGIN ST_UNDEF_END; - yylval.str = xtalloc_strdup (yyextra, yytext); - return IDENTIFIER; +">=" { + return GREATER_OR_EQUAL; } -<ST_UNDEF_END>{HSPACE}* - -<ST_UNDEF_END>\n { - BEGIN INITIAL; +"==" { + return EQUAL; } - /* We use the ST_DEFINE and ST_DEFVAL states so that we can - * pass a space token, (yes, a token for whitespace!), since - * the preprocessor specification requires distinguishing - * "#define foo()" from "#define foo ()". - */ -{HASH}define{HSPACE}* { - BEGIN ST_DEFINE; - return DEFINE; +"!=" { + return NOT_EQUAL; } -<ST_DEFINE>{IDENTIFIER} { - BEGIN ST_DEFINE_OBJ_OR_FUNC; - yylval.str = xtalloc_strdup (yyextra, yytext); - return IDENTIFIER; +"&&" { + return AND; } -<ST_DEFINE_OBJ_OR_FUNC>\n { - BEGIN INITIAL; - return NEWLINE; +"||" { + return OR; } -<ST_DEFINE_OBJ_OR_FUNC>{HSPACE}+ { - BEGIN ST_DEFINE_VALUE; - return SPACE; +"##" { + return PASTE; } -<ST_DEFINE_OBJ_OR_FUNC>"(" { - BEGIN ST_DEFINE_PARAMETER; - return '('; +"defined" { + return DEFINED; } -<ST_DEFINE_PARAMETER>{IDENTIFIER} { +{IDENTIFIER} { yylval.str = xtalloc_strdup (yyextra, yytext); return IDENTIFIER; } -<ST_DEFINE_PARAMETER>"," { - return ','; -} - -<ST_DEFINE_PARAMETER>")" { - BEGIN ST_DEFINE_VALUE; - return ')'; -} - -<ST_DEFINE_PARAMETER>{HSPACE}+ - -<ST_DEFINE_VALUE>{TOKEN} { - yylval.token.type = TOKEN; - yylval.token.value = xtalloc_strdup (yyextra, yytext); - return TOKEN; -} - -<ST_DEFINE_VALUE>[(),] { - yylval.token.type = TOKEN; - yylval.token.value = xtalloc_strdup (yyextra, yytext); - return TOKEN; -} - -<ST_DEFINE_VALUE>{HSPACE}+ - -<ST_DEFINE_VALUE>\n { - BEGIN INITIAL; - return NEWLINE; +{PUNCTUATION} { + return yytext[0]; } -{IDENTIFIER} { - int parameter_index; +{OTHER}+ { yylval.str = xtalloc_strdup (yyextra, yytext); - switch (glcpp_parser_classify_token (yyextra, yylval.str, - ¶meter_index)) - { - case TOKEN_CLASS_IDENTIFIER: - return IDENTIFIER; - break; - case TOKEN_CLASS_IDENTIFIER_FINALIZED: - return IDENTIFIER_FINALIZED; - break; - case TOKEN_CLASS_FUNC_MACRO: - return FUNC_MACRO; - break; - case TOKEN_CLASS_OBJ_MACRO: - return OBJ_MACRO; - break; - - } -} - -[(),] { - return yytext[0]; + return OTHER; } -{TOKEN} { - yylval.token.type = TOKEN; - yylval.token.value = xtalloc_strdup (yyextra, yytext); - return TOKEN; +{HSPACE}+ { + if (yyextra->space_tokens) { + return SPACE; + } } \n { - yyextra->need_newline = 1; + return NEWLINE; } -{HSPACE}+ - %% |