aboutsummaryrefslogtreecommitdiff
path: root/scripts/genksyms/lex.l
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/genksyms/lex.l')
-rw-r--r--scripts/genksyms/lex.l107
1 files changed, 90 insertions, 17 deletions
diff --git a/scripts/genksyms/lex.l b/scripts/genksyms/lex.l
index fe50ff9dacd..e583565f201 100644
--- a/scripts/genksyms/lex.l
+++ b/scripts/genksyms/lex.l
@@ -29,7 +29,7 @@
#include <ctype.h>
#include "genksyms.h"
-#include "parse.h"
+#include "parse.tab.h"
/* We've got a two-level lexer here. We let flex do basic tokenization
and then we categorize those basic tokens in the second stage. */
@@ -55,10 +55,6 @@ CHAR L?\'([^\\\']*\\.)*[^\\\']*\'
MC_TOKEN ([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>)
-/* Version 2 checksumming does proper tokenization; version 1 wasn't
- quite so pedantic. */
-%s V2_TOKENS
-
/* We don't do multiple input files. */
%option noyywrap
@@ -84,9 +80,9 @@ MC_TOKEN ([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>)
recognized as tokens. We don't actually use them since we don't
parse expressions, but we do want whitespace to be arranged
around them properly. */
-<V2_TOKENS>{MC_TOKEN} return OTHER;
-<V2_TOKENS>{INT} return INT;
-<V2_TOKENS>{REAL} return REAL;
+{MC_TOKEN} return OTHER;
+{INT} return INT;
+{REAL} return REAL;
"..." return DOTS;
@@ -98,17 +94,29 @@ MC_TOKEN ([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>)
/* Bring in the keyword recognizer. */
-#include "keywords.c"
+#include "keywords.hash.c"
/* Macros to append to our phrase collection list. */
+/*
+ * We mark any token, that that equals to a known enumerator, as
+ * SYM_ENUM_CONST. The parser will change this for struct and union tags later,
+ * the only problem is struct and union members:
+ * enum e { a, b }; struct s { int a, b; }
+ * but in this case, the only effect will be, that the ABI checksums become
+ * more volatile, which is acceptable. Also, such collisions are quite rare,
+ * so far it was only observed in include/linux/telephony.h.
+ */
#define _APP(T,L) do { \
cur_node = next_node; \
next_node = xmalloc(sizeof(*next_node)); \
next_node->next = cur_node; \
cur_node->string = memcpy(xmalloc(L+1), T, L+1); \
- cur_node->tag = SYM_NORMAL; \
+ cur_node->tag = \
+ find_symbol(cur_node->string, SYM_ENUM_CONST, 1)?\
+ SYM_ENUM_CONST : SYM_NORMAL ; \
+ cur_node->in_source_file = in_source_file; \
} while (0)
#define APP _APP(yytext, yyleng)
@@ -121,8 +129,9 @@ int
yylex(void)
{
static enum {
- ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_BRACKET, ST_BRACE,
- ST_EXPRESSION, ST_TABLE_1, ST_TABLE_2, ST_TABLE_3, ST_TABLE_4,
+ ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_TYPEOF, ST_TYPEOF_1,
+ ST_BRACKET, ST_BRACE, ST_EXPRESSION,
+ ST_TABLE_1, ST_TABLE_2, ST_TABLE_3, ST_TABLE_4,
ST_TABLE_5, ST_TABLE_6
} lexstate = ST_NOTSTARTED;
@@ -134,7 +143,6 @@ yylex(void)
if (lexstate == ST_NOTSTARTED)
{
- BEGIN(V2_TOKENS);
next_node = xmalloc(sizeof(*next_node));
next_node->next = NULL;
lexstate = ST_NORMAL;
@@ -160,6 +168,13 @@ repeat:
cur_filename = memcpy(xmalloc(e-file+1), file, e-file+1);
cur_line = atoi(yytext+2);
+ if (!source_file) {
+ source_file = xstrdup(cur_filename);
+ in_source_file = 1;
+ } else {
+ in_source_file = (strcmp(cur_filename, source_file) == 0);
+ }
+
goto repeat;
}
@@ -184,11 +199,15 @@ repeat:
lexstate = ST_ASM;
count = 0;
goto repeat;
+ case TYPEOF_KEYW:
+ lexstate = ST_TYPEOF;
+ count = 0;
+ goto repeat;
case STRUCT_KEYW:
case UNION_KEYW:
- dont_want_brace_phrase = 3;
case ENUM_KEYW:
+ dont_want_brace_phrase = 3;
suppress_type_lookup = 2;
goto fini;
@@ -198,8 +217,7 @@ repeat:
}
if (!suppress_type_lookup)
{
- struct symbol *sym = find_symbol(yytext, SYM_TYPEDEF);
- if (sym && sym->type == SYM_TYPEDEF)
+ if (find_symbol(yytext, SYM_TYPEDEF, 1))
token = TYPE;
}
}
@@ -271,6 +289,48 @@ repeat:
}
break;
+ case ST_TYPEOF:
+ switch (token)
+ {
+ case '(':
+ if ( ++count == 1 )
+ lexstate = ST_TYPEOF_1;
+ else
+ APP;
+ goto repeat;
+ case ')':
+ APP;
+ if (--count == 0)
+ {
+ lexstate = ST_NORMAL;
+ token = TYPEOF_PHRASE;
+ break;
+ }
+ goto repeat;
+ default:
+ APP;
+ goto repeat;
+ }
+ break;
+
+ case ST_TYPEOF_1:
+ if (token == IDENT)
+ {
+ if (is_reserved_word(yytext, yyleng)
+ || find_symbol(yytext, SYM_TYPEDEF, 1))
+ {
+ yyless(0);
+ unput('(');
+ lexstate = ST_NORMAL;
+ token = TYPEOF_KEYW;
+ break;
+ }
+ _APP("(", 1);
+ }
+ APP;
+ lexstate = ST_TYPEOF;
+ goto repeat;
+
case ST_BRACKET:
APP;
switch (token)
@@ -318,7 +378,20 @@ repeat:
++count;
APP;
goto repeat;
- case ')': case ']': case '}':
+ case '}':
+ /* is this the last line of an enum declaration? */
+ if (count == 0)
+ {
+ /* Put back the token we just read so's we can find it again
+ after registering the expression. */
+ unput(token);
+
+ lexstate = ST_NORMAL;
+ token = EXPRESSION_PHRASE;
+ break;
+ }
+ /* FALLTHRU */
+ case ')': case ']':
--count;
APP;
goto repeat;