diff options
Diffstat (limited to 'scripts/genksyms/lex.l')
| -rw-r--r-- | scripts/genksyms/lex.l | 107 |
1 files changed, 90 insertions, 17 deletions
diff --git a/scripts/genksyms/lex.l b/scripts/genksyms/lex.l index fe50ff9dacd..e583565f201 100644 --- a/scripts/genksyms/lex.l +++ b/scripts/genksyms/lex.l @@ -29,7 +29,7 @@ #include <ctype.h> #include "genksyms.h" -#include "parse.h" +#include "parse.tab.h" /* We've got a two-level lexer here. We let flex do basic tokenization and then we categorize those basic tokens in the second stage. */ @@ -55,10 +55,6 @@ CHAR L?\'([^\\\']*\\.)*[^\\\']*\' MC_TOKEN ([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>) -/* Version 2 checksumming does proper tokenization; version 1 wasn't - quite so pedantic. */ -%s V2_TOKENS - /* We don't do multiple input files. */ %option noyywrap @@ -84,9 +80,9 @@ MC_TOKEN ([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>) recognized as tokens. We don't actually use them since we don't parse expressions, but we do want whitespace to be arranged around them properly. */ -<V2_TOKENS>{MC_TOKEN} return OTHER; -<V2_TOKENS>{INT} return INT; -<V2_TOKENS>{REAL} return REAL; +{MC_TOKEN} return OTHER; +{INT} return INT; +{REAL} return REAL; "..." return DOTS; @@ -98,17 +94,29 @@ MC_TOKEN ([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>) /* Bring in the keyword recognizer. */ -#include "keywords.c" +#include "keywords.hash.c" /* Macros to append to our phrase collection list. */ +/* + * We mark any token, that that equals to a known enumerator, as + * SYM_ENUM_CONST. The parser will change this for struct and union tags later, + * the only problem is struct and union members: + * enum e { a, b }; struct s { int a, b; } + * but in this case, the only effect will be, that the ABI checksums become + * more volatile, which is acceptable. Also, such collisions are quite rare, + * so far it was only observed in include/linux/telephony.h. + */ #define _APP(T,L) do { \ cur_node = next_node; \ next_node = xmalloc(sizeof(*next_node)); \ next_node->next = cur_node; \ cur_node->string = memcpy(xmalloc(L+1), T, L+1); \ - cur_node->tag = SYM_NORMAL; \ + cur_node->tag = \ + find_symbol(cur_node->string, SYM_ENUM_CONST, 1)?\ + SYM_ENUM_CONST : SYM_NORMAL ; \ + cur_node->in_source_file = in_source_file; \ } while (0) #define APP _APP(yytext, yyleng) @@ -121,8 +129,9 @@ int yylex(void) { static enum { - ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_BRACKET, ST_BRACE, - ST_EXPRESSION, ST_TABLE_1, ST_TABLE_2, ST_TABLE_3, ST_TABLE_4, + ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_TYPEOF, ST_TYPEOF_1, + ST_BRACKET, ST_BRACE, ST_EXPRESSION, + ST_TABLE_1, ST_TABLE_2, ST_TABLE_3, ST_TABLE_4, ST_TABLE_5, ST_TABLE_6 } lexstate = ST_NOTSTARTED; @@ -134,7 +143,6 @@ yylex(void) if (lexstate == ST_NOTSTARTED) { - BEGIN(V2_TOKENS); next_node = xmalloc(sizeof(*next_node)); next_node->next = NULL; lexstate = ST_NORMAL; @@ -160,6 +168,13 @@ repeat: cur_filename = memcpy(xmalloc(e-file+1), file, e-file+1); cur_line = atoi(yytext+2); + if (!source_file) { + source_file = xstrdup(cur_filename); + in_source_file = 1; + } else { + in_source_file = (strcmp(cur_filename, source_file) == 0); + } + goto repeat; } @@ -184,11 +199,15 @@ repeat: lexstate = ST_ASM; count = 0; goto repeat; + case TYPEOF_KEYW: + lexstate = ST_TYPEOF; + count = 0; + goto repeat; case STRUCT_KEYW: case UNION_KEYW: - dont_want_brace_phrase = 3; case ENUM_KEYW: + dont_want_brace_phrase = 3; suppress_type_lookup = 2; goto fini; @@ -198,8 +217,7 @@ repeat: } if (!suppress_type_lookup) { - struct symbol *sym = find_symbol(yytext, SYM_TYPEDEF); - if (sym && sym->type == SYM_TYPEDEF) + if (find_symbol(yytext, SYM_TYPEDEF, 1)) token = TYPE; } } @@ -271,6 +289,48 @@ repeat: } break; + case ST_TYPEOF: + switch (token) + { + case '(': + if ( ++count == 1 ) + lexstate = ST_TYPEOF_1; + else + APP; + goto repeat; + case ')': + APP; + if (--count == 0) + { + lexstate = ST_NORMAL; + token = TYPEOF_PHRASE; + break; + } + goto repeat; + default: + APP; + goto repeat; + } + break; + + case ST_TYPEOF_1: + if (token == IDENT) + { + if (is_reserved_word(yytext, yyleng) + || find_symbol(yytext, SYM_TYPEDEF, 1)) + { + yyless(0); + unput('('); + lexstate = ST_NORMAL; + token = TYPEOF_KEYW; + break; + } + _APP("(", 1); + } + APP; + lexstate = ST_TYPEOF; + goto repeat; + case ST_BRACKET: APP; switch (token) @@ -318,7 +378,20 @@ repeat: ++count; APP; goto repeat; - case ')': case ']': case '}': + case '}': + /* is this the last line of an enum declaration? */ + if (count == 0) + { + /* Put back the token we just read so's we can find it again + after registering the expression. */ + unput(token); + + lexstate = ST_NORMAL; + token = EXPRESSION_PHRASE; + break; + } + /* FALLTHRU */ + case ')': case ']': --count; APP; goto repeat; |
