[mirror_ubuntu-zesty-kernel.git] / scripts / genksyms / lex.l

/* Lexical analysis for genksyms.
   Copyright 1996, 1997 Linux International.

   New implementation contributed by Richard Henderson <rth@tamu.edu>
   Based on original work by Bjorn Ekwall <bj0rn@blox.se>

   Taken from Linux modutils 2.4.22.

   This program is free software; you can redistribute it and/or modify it
   under the terms of the GNU General Public License as published by the
   Free Software Foundation; either version 2 of the License, or (at your
   option) any later version.

   This program is distributed in the hope that it will be useful, but
   WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software Foundation,
   Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */


%{

#include <limits.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

#include "genksyms.h"
#include "parse.h"

/* We've got a two-level lexer here.  We let flex do basic tokenization
   and then we categorize those basic tokens in the second stage.  */
#define YY_DECL		static int yylex1(void)

%}

IDENT			[A-Za-z_\$][A-Za-z0-9_\$]*

O_INT			0[0-7]*
D_INT			[1-9][0-9]*
X_INT			0[Xx][0-9A-Fa-f]+
I_SUF			[Uu]|[Ll]|[Uu][Ll]|[Ll][Uu]
INT			({O_INT}|{D_INT}|{X_INT}){I_SUF}?

FRAC			([0-9]*\.[0-9]+)|([0-9]+\.)
EXP			[Ee][+-]?[0-9]+
F_SUF			[FfLl]
REAL			({FRAC}{EXP}?{F_SUF}?)|([0-9]+{EXP}{F_SUF}?)

STRING			L?\"([^\\\"]*\\.)*[^\\\"]*\"
CHAR			L?\'([^\\\']*\\.)*[^\\\']*\'

MC_TOKEN		([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>)

/* Version 2 checksumming does proper tokenization; version 1 wasn't
   quite so pedantic.  */
%s V2_TOKENS

/* We don't do multiple input files.  */
%option noyywrap

%option noinput

%%


 /* Keep track of our location in the original source files.  */
^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n	return FILENAME;
^#.*\n					cur_line++;
\n					cur_line++;

 /* Ignore all other whitespace.  */
[ \t\f\v\r]+				;


{STRING}				return STRING;
{CHAR}					return CHAR;
{IDENT}					return IDENT;

 /* The Pedant requires that the other C multi-character tokens be
    recognized as tokens.  We don't actually use them since we don't
    parse expressions, but we do want whitespace to be arranged
    around them properly.  */
<V2_TOKENS>{MC_TOKEN}			return OTHER;
<V2_TOKENS>{INT}			return INT;
<V2_TOKENS>{REAL}			return REAL;

"..."					return DOTS;

 /* All other tokens are single characters.  */
.					return yytext[0];


%%

/* Bring in the keyword recognizer.  */

#include "keywords.c"


/* Macros to append to our phrase collection list.  */

#define _APP(T,L)	do {						   \
			  cur_node = next_node;				   \
			  next_node = xmalloc(sizeof(*next_node));	   \
			  next_node->next = cur_node;			   \
			  cur_node->string = memcpy(xmalloc(L+1), T, L+1); \
			  cur_node->tag = SYM_NORMAL;			   \
			} while (0)

#define APP		_APP(yytext, yyleng)


/* The second stage lexer.  Here we incorporate knowledge of the state
   of the parser to tailor the tokens that are returned.  */

int
yylex(void)
{
  static enum {
    ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_BRACKET, ST_BRACE,
    ST_EXPRESSION, ST_TABLE_1, ST_TABLE_2, ST_TABLE_3, ST_TABLE_4,
    ST_TABLE_5, ST_TABLE_6
  } lexstate = ST_NOTSTARTED;

  static int suppress_type_lookup, dont_want_brace_phrase;
  static struct string_list *next_node;

  int token, count = 0;
  struct string_list *cur_node;

  if (lexstate == ST_NOTSTARTED)
    {
      BEGIN(V2_TOKENS);
      next_node = xmalloc(sizeof(*next_node));
      next_node->next = NULL;
      lexstate = ST_NORMAL;
    }

repeat:
  token = yylex1();

  if (token == 0)
    return 0;
  else if (token == FILENAME)
    {
      char *file, *e;

      /* Save the filename and line number for later error messages.  */

      if (cur_filename)
	free(cur_filename);

      file = strchr(yytext, '\"')+1;
      e = strchr(file, '\"');
      *e = '\0';
      cur_filename = memcpy(xmalloc(e-file+1), file, e-file+1);
      cur_line = atoi(yytext+2);

      goto repeat;
    }

  switch (lexstate)
    {
    case ST_NORMAL:
      switch (token)
	{
	case IDENT:
	  APP;
	  {
	    const struct resword *r = is_reserved_word(yytext, yyleng);
	    if (r)
	      {
		switch (token = r->token)
		  {
		  case ATTRIBUTE_KEYW:
		    lexstate = ST_ATTRIBUTE;
		    count = 0;
		    goto repeat;
		  case ASM_KEYW:
		    lexstate = ST_ASM;
		    count = 0;
		    goto repeat;

		  case STRUCT_KEYW:
		  case UNION_KEYW:
		    dont_want_brace_phrase = 3;
		  case ENUM_KEYW:
		    suppress_type_lookup = 2;
		    goto fini;

		  case EXPORT_SYMBOL_KEYW:
		      goto fini;
		  }
	      }
	    if (!suppress_type_lookup)
	      {
		struct symbol *sym = find_symbol(yytext, SYM_TYPEDEF);
		if (sym && sym->type == SYM_TYPEDEF)
		  token = TYPE;
	      }
	  }
	  break;

	case '[':
	  APP;
	  lexstate = ST_BRACKET;
	  count = 1;
	  goto repeat;

	case '{':
	  APP;
	  if (dont_want_brace_phrase)
	    break;
	  lexstate = ST_BRACE;
	  count = 1;
	  goto repeat;

	case '=': case ':':
	  APP;
	  lexstate = ST_EXPRESSION;
	  break;

	case DOTS:
	default:
	  APP;
	  break;
	}
      break;

    case ST_ATTRIBUTE:
      APP;
      switch (token)
	{
	case '(':
	  ++count;
	  goto repeat;
	case ')':
	  if (--count == 0)
	    {
	      lexstate = ST_NORMAL;
	      token = ATTRIBUTE_PHRASE;
	      break;
	    }
	  goto repeat;
	default:
	  goto repeat;
	}
      break;

    case ST_ASM:
      APP;
      switch (token)
	{
	case '(':
	  ++count;
	  goto repeat;
	case ')':
	  if (--count == 0)
	    {
	      lexstate = ST_NORMAL;
	      token = ASM_PHRASE;
	      break;
	    }
	  goto repeat;
	default:
	  goto repeat;
	}
      break;

    case ST_BRACKET:
      APP;
      switch (token)
	{
	case '[':
	  ++count;
	  goto repeat;
	case ']':
	  if (--count == 0)
	    {
	      lexstate = ST_NORMAL;
	      token = BRACKET_PHRASE;
	      break;
	    }
	  goto repeat;
	default:
	  goto repeat;
	}
      break;

    case ST_BRACE:
      APP;
      switch (token)
	{
	case '{':
	  ++count;
	  goto repeat;
	case '}':
	  if (--count == 0)
	    {
	      lexstate = ST_NORMAL;
	      token = BRACE_PHRASE;
	      break;
	    }
	  goto repeat;
	default:
	  goto repeat;
	}
      break;

    case ST_EXPRESSION:
      switch (token)
	{
	case '(': case '[': case '{':
	  ++count;
	  APP;
	  goto repeat;
	case ')': case ']': case '}':
	  --count;
	  APP;
	  goto repeat;
	case ',': case ';':
	  if (count == 0)
	    {
	      /* Put back the token we just read so's we can find it again
		 after registering the expression.  */
	      unput(token);

	      lexstate = ST_NORMAL;
	      token = EXPRESSION_PHRASE;
	      break;
	    }
	  APP;
	  goto repeat;
	default:
	  APP;
	  goto repeat;
	}
      break;

    case ST_TABLE_1:
      goto repeat;

    case ST_TABLE_2:
      if (token == IDENT && yyleng == 1 && yytext[0] == 'X')
	{
	  token = EXPORT_SYMBOL_KEYW;
	  lexstate = ST_TABLE_5;
	  APP;
	  break;
	}
      lexstate = ST_TABLE_6;
      /* FALLTHRU */

    case ST_TABLE_6:
      switch (token)
	{
	case '{': case '[': case '(':
	  ++count;
	  break;
	case '}': case ']': case ')':
	  --count;
	  break;
	case ',':
	  if (count == 0)
	    lexstate = ST_TABLE_2;
	  break;
	};
      goto repeat;

    case ST_TABLE_3:
      goto repeat;

    case ST_TABLE_4:
      if (token == ';')
	lexstate = ST_NORMAL;
      goto repeat;

    case ST_TABLE_5:
      switch (token)
	{
	case ',':
	  token = ';';
	  lexstate = ST_TABLE_2;
	  APP;
	  break;
	default:
	  APP;
	  break;
	}
      break;

    default:
      exit(1);
    }
fini:

  if (suppress_type_lookup > 0)
    --suppress_type_lookup;
  if (dont_want_brace_phrase > 0)
    --dont_want_brace_phrase;

  yylval = &next_node->next;

  return token;
}
Commit	Line	Data
1da177e4 LT	1	/* Lexical analysis for genksyms.
	2	Copyright 1996, 1997 Linux International.
	3
	4	New implementation contributed by Richard Henderson <rth@tamu.edu>
	5	Based on original work by Bjorn Ekwall <bj0rn@blox.se>
	6
	7	Taken from Linux modutils 2.4.22.
	8
	9	This program is free software; you can redistribute it and/or modify it
	10	under the terms of the GNU General Public License as published by the
	11	Free Software Foundation; either version 2 of the License, or (at your
	12	option) any later version.
	13
	14	This program is distributed in the hope that it will be useful, but
	15	WITHOUT ANY WARRANTY; without even the implied warranty of
	16	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	17	General Public License for more details.
	18
	19	You should have received a copy of the GNU General Public License
	20	along with this program; if not, write to the Free Software Foundation,
	21	Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
	22
	23
	24	%{
	25
	26	#include <limits.h>
	27	#include <stdlib.h>
	28	#include <string.h>
	29	#include <ctype.h>
	30
	31	#include "genksyms.h"
	32	#include "parse.h"
	33
	34	/* We've got a two-level lexer here. We let flex do basic tokenization
	35	and then we categorize those basic tokens in the second stage. */
	36	#define YY_DECL static int yylex1(void)
	37
	38	%}
	39
	40	IDENT [A-Za-z_\$][A-Za-z0-9_\$]*
	41
	42	O_INT 0[0-7]*
	43	D_INT [1-9][0-9]*
	44	X_INT 0[Xx][0-9A-Fa-f]+
	45	I_SUF [Uu]\|[Ll]\|[Uu][Ll]\|[Ll][Uu]
	46	INT ({O_INT}\|{D_INT}\|{X_INT}){I_SUF}?
	47
	48	FRAC ([0-9]*\.[0-9]+)\|([0-9]+\.)
	49	EXP [Ee][+-]?[0-9]+
	50	F_SUF [FfLl]
	51	REAL ({FRAC}{EXP}?{F_SUF}?)\|([0-9]+{EXP}{F_SUF}?)
	52
	53	STRING L?\"([^\\\"]\\.)[^\\\"]*\"
	54	CHAR L?\'([^\\\']\\.)[^\\\']*\'
	55
	56	MC_TOKEN ([~%^&*+=\|<>/-]=)\|(&&)\|("\|\|")\|(->)\|(<<)\|(>>)
	57
	58	/* Version 2 checksumming does proper tokenization; version 1 wasn't
	59	quite so pedantic. */
	60	%s V2_TOKENS
	61
	62	/* We don't do multiple input files. */
	63	%option noyywrap
	64
11ddad39 AB	65	%option noinput
11ddad39 AB	66
1da177e4 LT	67	%%
	68
	69
	70	/* Keep track of our location in the original source files. */
	71	^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n return FILENAME;
	72	^#.*\n cur_line++;
	73	\n cur_line++;
	74
	75	/* Ignore all other whitespace. */
	76	[ \t\f\v\r]+ ;
	77
	78
	79	{STRING} return STRING;
	80	{CHAR} return CHAR;
	81	{IDENT} return IDENT;
	82
	83	/* The Pedant requires that the other C multi-character tokens be
	84	recognized as tokens. We don't actually use them since we don't
	85	parse expressions, but we do want whitespace to be arranged
	86	around them properly. */
	87	<V2_TOKENS>{MC_TOKEN} return OTHER;
	88	<V2_TOKENS>{INT} return INT;
	89	<V2_TOKENS>{REAL} return REAL;
	90
	91	"..." return DOTS;
	92
	93	/* All other tokens are single characters. */
	94	. return yytext[0];
	95
	96
	97	%%
	98
	99	/* Bring in the keyword recognizer. */
	100
	101	#include "keywords.c"
	102
	103
	104	/* Macros to append to our phrase collection list. */
	105
	106	#define _APP(T,L) do { \
	107	cur_node = next_node; \
	108	next_node = xmalloc(sizeof(*next_node)); \
	109	next_node->next = cur_node; \
	110	cur_node->string = memcpy(xmalloc(L+1), T, L+1); \
	111	cur_node->tag = SYM_NORMAL; \
	112	} while (0)
	113
	114	#define APP _APP(yytext, yyleng)
	115
	116
	117	/* The second stage lexer. Here we incorporate knowledge of the state
	118	of the parser to tailor the tokens that are returned. */
	119
	120	int
	121	yylex(void)
	122	{
	123	static enum {
	124	ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_BRACKET, ST_BRACE,
	125	ST_EXPRESSION, ST_TABLE_1, ST_TABLE_2, ST_TABLE_3, ST_TABLE_4,
	126	ST_TABLE_5, ST_TABLE_6
	127	} lexstate = ST_NOTSTARTED;
	128
	129	static int suppress_type_lookup, dont_want_brace_phrase;
	130	static struct string_list *next_node;
131
132	int token, count = 0;
133	struct string_list *cur_node;
134
135	if (lexstate == ST_NOTSTARTED)
136	{
137	BEGIN(V2_TOKENS);
138	next_node = xmalloc(sizeof(*next_node));
139	next_node->next = NULL;
140	lexstate = ST_NORMAL;
141	}
142
143	repeat:
144	token = yylex1();
145
146	if (token == 0)
147	return 0;
148	else if (token == FILENAME)
149	{
150	char file, e;
151
152	/* Save the filename and line number for later error messages. */
153
154	if (cur_filename)
155	free(cur_filename);
156
157	file = strchr(yytext, '\"')+1;
158	e = strchr(file, '\"');
159	*e = '\0';
160	cur_filename = memcpy(xmalloc(e-file+1), file, e-file+1);
161	cur_line = atoi(yytext+2);
162
163	goto repeat;
164	}
165
166	switch (lexstate)
167	{
168	case ST_NORMAL:
169	switch (token)
170	{
171	case IDENT:
172	APP;
173	{
174	const struct resword *r = is_reserved_word(yytext, yyleng);
175	if (r)
176	{
177	switch (token = r->token)
178	{
179	case ATTRIBUTE_KEYW:
180	lexstate = ST_ATTRIBUTE;
181	count = 0;
182	goto repeat;
183	case ASM_KEYW:
184	lexstate = ST_ASM;
185	count = 0;
186	goto repeat;
187
188	case STRUCT_KEYW:
189	case UNION_KEYW:
190	dont_want_brace_phrase = 3;
191	case ENUM_KEYW:
192	suppress_type_lookup = 2;
193	goto fini;
194
195	case EXPORT_SYMBOL_KEYW:
196	goto fini;
197	}
198	}
199	if (!suppress_type_lookup)
200	{
201	struct symbol *sym = find_symbol(yytext, SYM_TYPEDEF);
202	if (sym && sym->type == SYM_TYPEDEF)
203	token = TYPE;
204	}
205	}
206	break;
207
208	case '[':
209	APP;
210	lexstate = ST_BRACKET;
211	count = 1;
212	goto repeat;
213
214	case '{':
215	APP;
216	if (dont_want_brace_phrase)
217	break;
218	lexstate = ST_BRACE;
219	count = 1;
220	goto repeat;
221
222	case '=': case ':':
223	APP;
224	lexstate = ST_EXPRESSION;
225	break;
226
227	case DOTS:
228	default:
229	APP;
230	break;
231	}
232	break;
233
234	case ST_ATTRIBUTE:
235	APP;
236	switch (token)
237	{
238	case '(':
239	++count;
240	goto repeat;
241	case ')':
242	if (--count == 0)
243	{
244	lexstate = ST_NORMAL;
245	token = ATTRIBUTE_PHRASE;
246	break;
247	}
248	goto repeat;
249	default:
250	goto repeat;
251	}
252	break;
253
254	case ST_ASM:
255	APP;
256	switch (token)
257	{
258	case '(':
259	++count;
260	goto repeat;
261	case ')':
262	if (--count == 0)
263	{
264	lexstate = ST_NORMAL;
265	token = ASM_PHRASE;
266	break;
267	}
268	goto repeat;
269	default:
270	goto repeat;
271	}
272	break;
273
274	case ST_BRACKET:
275	APP;
276	switch (token)
277	{
278	case '[':
279	++count;
280	goto repeat;
281	case ']':
282	if (--count == 0)
283	{
284	lexstate = ST_NORMAL;
285	token = BRACKET_PHRASE;
286	break;
287	}
288	goto repeat;
289	default:
290	goto repeat;
291	}
292	break;
293
294	case ST_BRACE:
295	APP;
296	switch (token)
297	{
298	case '{':
299	++count;
300	goto repeat;
301	case '}':
302	if (--count == 0)
303	{
304	lexstate = ST_NORMAL;
305	token = BRACE_PHRASE;
306	break;
307	}
308	goto repeat;
309	default:
310	goto repeat;
311	}
312	break;
313
314	case ST_EXPRESSION:
315	switch (token)
316	{
317	case '(': case '[': case '{':
318	++count;
319	APP;
320	goto repeat;
321	case ')': case ']': case '}':
322	--count;
323	APP;
324	goto repeat;
325	case ',': case ';':
326	if (count == 0)
327	{
328	/* Put back the token we just read so's we can find it again
329	after registering the expression. */
330	unput(token);
331
332	lexstate = ST_NORMAL;
333	token = EXPRESSION_PHRASE;
334	break;
335	}
336	APP;
337	goto repeat;
338	default:
339	APP;
340	goto repeat;
341	}
342	break;
343
344	case ST_TABLE_1:
345	goto repeat;
346
347	case ST_TABLE_2:
348	if (token == IDENT && yyleng == 1 && yytext[0] == 'X')
349	{
350	token = EXPORT_SYMBOL_KEYW;
351	lexstate = ST_TABLE_5;
352	APP;
353	break;
354	}
355	lexstate = ST_TABLE_6;
356	/* FALLTHRU */
357
358	case ST_TABLE_6:
359	switch (token)
360	{
361	case '{': case '[': case '(':
362	++count;
363	break;
364	case '}': case ']': case ')':
365	--count;
366	break;
367	case ',':
368	if (count == 0)
369	lexstate = ST_TABLE_2;
370	break;
371	};
372	goto repeat;
373
374	case ST_TABLE_3:
375	goto repeat;
376
377	case ST_TABLE_4:
378	if (token == ';')
379	lexstate = ST_NORMAL;
380	goto repeat;
381
382	case ST_TABLE_5:
383	switch (token)
384	{
385	case ',':
386	token = ';';
387	lexstate = ST_TABLE_2;
388	APP;
389	break;
390	default:
391	APP;
392	break;
393	}
394	break;
395
396	default:
6803dc0e	397	exit(1);
1da177e4 LT	398	}
	399	fini:
	400
	401	if (suppress_type_lookup > 0)
	402	--suppress_type_lookup;
	403	if (dont_want_brace_phrase > 0)
	404	--dont_want_brace_phrase;
	405
	406	yylval = &next_node->next;
	407
	408	return token;
	409	}