[mirror_ubuntu-artful-kernel.git] / scripts / genksyms / lex.l

/* Lexical analysis for genksyms.
   Copyright 1996, 1997 Linux International.

   New implementation contributed by Richard Henderson <rth@tamu.edu>
   Based on original work by Bjorn Ekwall <bj0rn@blox.se>

   Taken from Linux modutils 2.4.22.

   This program is free software; you can redistribute it and/or modify it
   under the terms of the GNU General Public License as published by the
   Free Software Foundation; either version 2 of the License, or (at your
   option) any later version.

   This program is distributed in the hope that it will be useful, but
   WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software Foundation,
   Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */


%{

#include <limits.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

#include "genksyms.h"
#include "parse.h"

/* We've got a two-level lexer here.  We let flex do basic tokenization
   and then we categorize those basic tokens in the second stage.  */
#define YY_DECL		static int yylex1(void)

%}

IDENT			[A-Za-z_\$][A-Za-z0-9_\$]*

O_INT			0[0-7]*
D_INT			[1-9][0-9]*
X_INT			0[Xx][0-9A-Fa-f]+
I_SUF			[Uu]|[Ll]|[Uu][Ll]|[Ll][Uu]
INT			({O_INT}|{D_INT}|{X_INT}){I_SUF}?

FRAC			([0-9]*\.[0-9]+)|([0-9]+\.)
EXP			[Ee][+-]?[0-9]+
F_SUF			[FfLl]
REAL			({FRAC}{EXP}?{F_SUF}?)|([0-9]+{EXP}{F_SUF}?)

STRING			L?\"([^\\\"]*\\.)*[^\\\"]*\"
CHAR			L?\'([^\\\']*\\.)*[^\\\']*\'

MC_TOKEN		([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>)

/* We don't do multiple input files.  */
%option noyywrap

%option noinput

%%


 /* Keep track of our location in the original source files.  */
^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n	return FILENAME;
^#.*\n					cur_line++;
\n					cur_line++;

 /* Ignore all other whitespace.  */
[ \t\f\v\r]+				;


{STRING}				return STRING;
{CHAR}					return CHAR;
{IDENT}					return IDENT;

 /* The Pedant requires that the other C multi-character tokens be
    recognized as tokens.  We don't actually use them since we don't
    parse expressions, but we do want whitespace to be arranged
    around them properly.  */
{MC_TOKEN}				return OTHER;
{INT}					return INT;
{REAL}					return REAL;

"..."					return DOTS;

 /* All other tokens are single characters.  */
.					return yytext[0];


%%

/* Bring in the keyword recognizer.  */

#include "keywords.c"


/* Macros to append to our phrase collection list.  */

#define _APP(T,L)	do {						   \
			  cur_node = next_node;				   \
			  next_node = xmalloc(sizeof(*next_node));	   \
			  next_node->next = cur_node;			   \
			  cur_node->string = memcpy(xmalloc(L+1), T, L+1); \
			  cur_node->tag = SYM_NORMAL;			   \
			} while (0)

#define APP		_APP(yytext, yyleng)


/* The second stage lexer.  Here we incorporate knowledge of the state
   of the parser to tailor the tokens that are returned.  */

int
yylex(void)
{
  static enum {
    ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_BRACKET, ST_BRACE,
    ST_EXPRESSION, ST_TABLE_1, ST_TABLE_2, ST_TABLE_3, ST_TABLE_4,
    ST_TABLE_5, ST_TABLE_6
  } lexstate = ST_NOTSTARTED;

  static int suppress_type_lookup, dont_want_brace_phrase;
  static struct string_list *next_node;

  int token, count = 0;
  struct string_list *cur_node;

  if (lexstate == ST_NOTSTARTED)
    {
      next_node = xmalloc(sizeof(*next_node));
      next_node->next = NULL;
      lexstate = ST_NORMAL;
    }

repeat:
  token = yylex1();

  if (token == 0)
    return 0;
  else if (token == FILENAME)
    {
      char *file, *e;

      /* Save the filename and line number for later error messages.  */

      if (cur_filename)
	free(cur_filename);

      file = strchr(yytext, '\"')+1;
      e = strchr(file, '\"');
      *e = '\0';
      cur_filename = memcpy(xmalloc(e-file+1), file, e-file+1);
      cur_line = atoi(yytext+2);

      goto repeat;
    }

  switch (lexstate)
    {
    case ST_NORMAL:
      switch (token)
	{
	case IDENT:
	  APP;
	  {
	    const struct resword *r = is_reserved_word(yytext, yyleng);
	    if (r)
	      {
		switch (token = r->token)
		  {
		  case ATTRIBUTE_KEYW:
		    lexstate = ST_ATTRIBUTE;
		    count = 0;
		    goto repeat;
		  case ASM_KEYW:
		    lexstate = ST_ASM;
		    count = 0;
		    goto repeat;

		  case STRUCT_KEYW:
		  case UNION_KEYW:
		    dont_want_brace_phrase = 3;
		  case ENUM_KEYW:
		    suppress_type_lookup = 2;
		    goto fini;

		  case EXPORT_SYMBOL_KEYW:
		      goto fini;
		  }
	      }
	    if (!suppress_type_lookup)
	      {
		if (find_symbol(yytext, SYM_TYPEDEF, 1))
		  token = TYPE;
	      }
	  }
	  break;

	case '[':
	  APP;
	  lexstate = ST_BRACKET;
	  count = 1;
	  goto repeat;

	case '{':
	  APP;
	  if (dont_want_brace_phrase)
	    break;
	  lexstate = ST_BRACE;
	  count = 1;
	  goto repeat;

	case '=': case ':':
	  APP;
	  lexstate = ST_EXPRESSION;
	  break;

	case DOTS:
	default:
	  APP;
	  break;
	}
      break;

    case ST_ATTRIBUTE:
      APP;
      switch (token)
	{
	case '(':
	  ++count;
	  goto repeat;
	case ')':
	  if (--count == 0)
	    {
	      lexstate = ST_NORMAL;
	      token = ATTRIBUTE_PHRASE;
	      break;
	    }
	  goto repeat;
	default:
	  goto repeat;
	}
      break;

    case ST_ASM:
      APP;
      switch (token)
	{
	case '(':
	  ++count;
	  goto repeat;
	case ')':
	  if (--count == 0)
	    {
	      lexstate = ST_NORMAL;
	      token = ASM_PHRASE;
	      break;
	    }
	  goto repeat;
	default:
	  goto repeat;
	}
      break;

    case ST_BRACKET:
      APP;
      switch (token)
	{
	case '[':
	  ++count;
	  goto repeat;
	case ']':
	  if (--count == 0)
	    {
	      lexstate = ST_NORMAL;
	      token = BRACKET_PHRASE;
	      break;
	    }
	  goto repeat;
	default:
	  goto repeat;
	}
      break;

    case ST_BRACE:
      APP;
      switch (token)
	{
	case '{':
	  ++count;
	  goto repeat;
	case '}':
	  if (--count == 0)
	    {
	      lexstate = ST_NORMAL;
	      token = BRACE_PHRASE;
	      break;
	    }
	  goto repeat;
	default:
	  goto repeat;
	}
      break;

    case ST_EXPRESSION:
      switch (token)
	{
	case '(': case '[': case '{':
	  ++count;
	  APP;
	  goto repeat;
	case ')': case ']': case '}':
	  --count;
	  APP;
	  goto repeat;
	case ',': case ';':
	  if (count == 0)
	    {
	      /* Put back the token we just read so's we can find it again
		 after registering the expression.  */
	      unput(token);

	      lexstate = ST_NORMAL;
	      token = EXPRESSION_PHRASE;
	      break;
	    }
	  APP;
	  goto repeat;
	default:
	  APP;
	  goto repeat;
	}
      break;

    case ST_TABLE_1:
      goto repeat;

    case ST_TABLE_2:
      if (token == IDENT && yyleng == 1 && yytext[0] == 'X')
	{
	  token = EXPORT_SYMBOL_KEYW;
	  lexstate = ST_TABLE_5;
	  APP;
	  break;
	}
      lexstate = ST_TABLE_6;
      /* FALLTHRU */

    case ST_TABLE_6:
      switch (token)
	{
	case '{': case '[': case '(':
	  ++count;
	  break;
	case '}': case ']': case ')':
	  --count;
	  break;
	case ',':
	  if (count == 0)
	    lexstate = ST_TABLE_2;
	  break;
	};
      goto repeat;

    case ST_TABLE_3:
      goto repeat;

    case ST_TABLE_4:
      if (token == ';')
	lexstate = ST_NORMAL;
      goto repeat;

    case ST_TABLE_5:
      switch (token)
	{
	case ',':
	  token = ';';
	  lexstate = ST_TABLE_2;
	  APP;
	  break;
	default:
	  APP;
	  break;
	}
      break;

    default:
      exit(1);
    }
fini:

  if (suppress_type_lookup > 0)
    --suppress_type_lookup;
  if (dont_want_brace_phrase > 0)
    --dont_want_brace_phrase;

  yylval = &next_node->next;

  return token;
}
Commit	Line	Data
1da177e4 LT	1	/* Lexical analysis for genksyms.
	2	Copyright 1996, 1997 Linux International.
	3
	4	New implementation contributed by Richard Henderson <rth@tamu.edu>
	5	Based on original work by Bjorn Ekwall <bj0rn@blox.se>
	6
	7	Taken from Linux modutils 2.4.22.
	8
	9	This program is free software; you can redistribute it and/or modify it
	10	under the terms of the GNU General Public License as published by the
	11	Free Software Foundation; either version 2 of the License, or (at your
	12	option) any later version.
	13
	14	This program is distributed in the hope that it will be useful, but
	15	WITHOUT ANY WARRANTY; without even the implied warranty of
	16	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	17	General Public License for more details.
	18
	19	You should have received a copy of the GNU General Public License
	20	along with this program; if not, write to the Free Software Foundation,
	21	Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
	22
	23
	24	%{
	25
	26	#include <limits.h>
	27	#include <stdlib.h>
	28	#include <string.h>
	29	#include <ctype.h>
	30
	31	#include "genksyms.h"
	32	#include "parse.h"
	33
	34	/* We've got a two-level lexer here. We let flex do basic tokenization
	35	and then we categorize those basic tokens in the second stage. */
	36	#define YY_DECL static int yylex1(void)
	37
	38	%}
	39
	40	IDENT [A-Za-z_\$][A-Za-z0-9_\$]*
	41
	42	O_INT 0[0-7]*
	43	D_INT [1-9][0-9]*
	44	X_INT 0[Xx][0-9A-Fa-f]+
	45	I_SUF [Uu]\|[Ll]\|[Uu][Ll]\|[Ll][Uu]
	46	INT ({O_INT}\|{D_INT}\|{X_INT}){I_SUF}?
	47
	48	FRAC ([0-9]*\.[0-9]+)\|([0-9]+\.)
	49	EXP [Ee][+-]?[0-9]+
	50	F_SUF [FfLl]
	51	REAL ({FRAC}{EXP}?{F_SUF}?)\|([0-9]+{EXP}{F_SUF}?)
	52
	53	STRING L?\"([^\\\"]\\.)[^\\\"]*\"
	54	CHAR L?\'([^\\\']\\.)[^\\\']*\'
	55
	56	MC_TOKEN ([~%^&*+=\|<>/-]=)\|(&&)\|("\|\|")\|(->)\|(<<)\|(>>)
	57
1da177e4 LT	58	/* We don't do multiple input files. */
	59	%option noyywrap
	60
11ddad39 AB	61	%option noinput
11ddad39 AB	62
1da177e4 LT	63	%%
	64
	65
	66	/* Keep track of our location in the original source files. */
	67	^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n return FILENAME;
	68	^#.*\n cur_line++;
	69	\n cur_line++;
	70
	71	/* Ignore all other whitespace. */
	72	[ \t\f\v\r]+ ;
	73
	74
	75	{STRING} return STRING;
	76	{CHAR} return CHAR;
	77	{IDENT} return IDENT;
	78
	79	/* The Pedant requires that the other C multi-character tokens be
	80	recognized as tokens. We don't actually use them since we don't
	81	parse expressions, but we do want whitespace to be arranged
	82	around them properly. */
95f1d639 MM	83	{MC_TOKEN} return OTHER;
	84	{INT} return INT;
	85	{REAL} return REAL;
1da177e4 LT	86
	87	"..." return DOTS;
	88
	89	/* All other tokens are single characters. */
	90	. return yytext[0];
	91
	92
	93	%%
	94
	95	/* Bring in the keyword recognizer. */
	96
	97	#include "keywords.c"
	98
	99
	100	/* Macros to append to our phrase collection list. */
	101
	102	#define _APP(T,L) do { \
	103	cur_node = next_node; \
	104	next_node = xmalloc(sizeof(*next_node)); \
	105	next_node->next = cur_node; \
	106	cur_node->string = memcpy(xmalloc(L+1), T, L+1); \
	107	cur_node->tag = SYM_NORMAL; \
	108	} while (0)
	109
	110	#define APP _APP(yytext, yyleng)
	111
	112
	113	/* The second stage lexer. Here we incorporate knowledge of the state
	114	of the parser to tailor the tokens that are returned. */
	115
	116	int
	117	yylex(void)
	118	{
	119	static enum {
	120	ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_BRACKET, ST_BRACE,
	121	ST_EXPRESSION, ST_TABLE_1, ST_TABLE_2, ST_TABLE_3, ST_TABLE_4,
	122	ST_TABLE_5, ST_TABLE_6
	123	} lexstate = ST_NOTSTARTED;
	124
	125	static int suppress_type_lookup, dont_want_brace_phrase;
	126	static struct string_list *next_node;
	127
	128	int token, count = 0;
	129	struct string_list *cur_node;
	130
	131	if (lexstate == ST_NOTSTARTED)
	132	{
1da177e4 LT	133	next_node = xmalloc(sizeof(*next_node));
	134	next_node->next = NULL;
	135	lexstate = ST_NORMAL;
	136	}
	137
	138	repeat:
	139	token = yylex1();
	140
	141	if (token == 0)
	142	return 0;
	143	else if (token == FILENAME)
	144	{
	145	char file, e;
	146
	147	/* Save the filename and line number for later error messages. */
	148
	149	if (cur_filename)
	150	free(cur_filename);
	151
	152	file = strchr(yytext, '\"')+1;
	153	e = strchr(file, '\"');
	154	*e = '\0';
	155	cur_filename = memcpy(xmalloc(e-file+1), file, e-file+1);
	156	cur_line = atoi(yytext+2);
	157
	158	goto repeat;
	159	}
	160
	161	switch (lexstate)
	162	{
	163	case ST_NORMAL:
	164	switch (token)
	165	{
	166	case IDENT:
	167	APP;
	168	{
	169	const struct resword *r = is_reserved_word(yytext, yyleng);
	170	if (r)
	171	{
	172	switch (token = r->token)
	173	{
	174	case ATTRIBUTE_KEYW:
	175	lexstate = ST_ATTRIBUTE;
	176	count = 0;
	177	goto repeat;
	178	case ASM_KEYW:
	179	lexstate = ST_ASM;
	180	count = 0;
	181	goto repeat;
	182
	183	case STRUCT_KEYW:
	184	case UNION_KEYW:
	185	dont_want_brace_phrase = 3;
	186	case ENUM_KEYW:
	187	suppress_type_lookup = 2;
	188	goto fini;
	189
	190	case EXPORT_SYMBOL_KEYW:
	191	goto fini;
	192	}
	193	}
	194	if (!suppress_type_lookup)
	195	{
01762c4e	196	if (find_symbol(yytext, SYM_TYPEDEF, 1))
1da177e4 LT	197	token = TYPE;
	198	}
	199	}
	200	break;
	201
	202	case '[':
	203	APP;
	204	lexstate = ST_BRACKET;
	205	count = 1;
	206	goto repeat;
	207
	208	case '{':
	209	APP;
	210	if (dont_want_brace_phrase)
	211	break;
	212	lexstate = ST_BRACE;
	213	count = 1;
	214	goto repeat;
	215
	216	case '=': case ':':
	217	APP;
	218	lexstate = ST_EXPRESSION;
	219	break;
	220
	221	case DOTS:
	222	default:
	223	APP;
	224	break;
	225	}
	226	break;
	227
	228	case ST_ATTRIBUTE:
	229	APP;
	230	switch (token)
	231	{
	232	case '(':
	233	++count;
	234	goto repeat;
	235	case ')':
	236	if (--count == 0)
	237	{
	238	lexstate = ST_NORMAL;
	239	token = ATTRIBUTE_PHRASE;
	240	break;
	241	}
	242	goto repeat;
	243	default:
	244	goto repeat;
	245	}
	246	break;
	247
	248	case ST_ASM:
	249	APP;
	250	switch (token)
	251	{
	252	case '(':
	253	++count;
	254	goto repeat;
	255	case ')':
	256	if (--count == 0)
	257	{
	258	lexstate = ST_NORMAL;
	259	token = ASM_PHRASE;
	260	break;
261	}
262	goto repeat;
263	default:
264	goto repeat;
265	}
266	break;
267
268	case ST_BRACKET:
269	APP;
270	switch (token)
271	{
272	case '[':
273	++count;
274	goto repeat;
275	case ']':
276	if (--count == 0)
277	{
278	lexstate = ST_NORMAL;
279	token = BRACKET_PHRASE;
280	break;
281	}
282	goto repeat;
283	default:
284	goto repeat;
285	}
286	break;
287
288	case ST_BRACE:
289	APP;
290	switch (token)
291	{
292	case '{':
293	++count;
294	goto repeat;
295	case '}':
296	if (--count == 0)
297	{
298	lexstate = ST_NORMAL;
299	token = BRACE_PHRASE;
300	break;
301	}
302	goto repeat;
303	default:
304	goto repeat;
305	}
306	break;
307
308	case ST_EXPRESSION:
309	switch (token)
310	{
311	case '(': case '[': case '{':
312	++count;
313	APP;
314	goto repeat;
315	case ')': case ']': case '}':
316	--count;
317	APP;
318	goto repeat;
319	case ',': case ';':
320	if (count == 0)
321	{
322	/* Put back the token we just read so's we can find it again
323	after registering the expression. */
324	unput(token);
325
326	lexstate = ST_NORMAL;
327	token = EXPRESSION_PHRASE;
328	break;
329	}
330	APP;
331	goto repeat;
332	default:
333	APP;
334	goto repeat;
335	}
336	break;
337
338	case ST_TABLE_1:
339	goto repeat;
340
341	case ST_TABLE_2:
342	if (token == IDENT && yyleng == 1 && yytext[0] == 'X')
343	{
344	token = EXPORT_SYMBOL_KEYW;
345	lexstate = ST_TABLE_5;
346	APP;
347	break;
348	}
349	lexstate = ST_TABLE_6;
350	/* FALLTHRU */
351
352	case ST_TABLE_6:
353	switch (token)
354	{
355	case '{': case '[': case '(':
356	++count;
357	break;
358	case '}': case ']': case ')':
359	--count;
360	break;
361	case ',':
362	if (count == 0)
363	lexstate = ST_TABLE_2;
364	break;
365	};
366	goto repeat;
367
368	case ST_TABLE_3:
369	goto repeat;
370
371	case ST_TABLE_4:
372	if (token == ';')
373	lexstate = ST_NORMAL;
374	goto repeat;
375
376	case ST_TABLE_5:
377	switch (token)
378	{
379	case ',':
380	token = ';';
381	lexstate = ST_TABLE_2;
382	APP;
383	break;
384	default:
385	APP;
386	break;
387	}
388	break;
389
390	default:
6803dc0e	391	exit(1);
1da177e4 LT	392	}
	393	fini:
	394
	395	if (suppress_type_lookup > 0)
	396	--suppress_type_lookup;
	397	if (dont_want_brace_phrase > 0)
	398	--dont_want_brace_phrase;
	399
	400	yylval = &next_node->next;
	401
	402	return token;
	403	}