[ceph.git] / ceph / src / boost / tools / build / src / engine / scan.c

/*
 * Copyright 1993-2002 Christopher Seiwald and Perforce Software, Inc.
 *
 * This file is part of Jam - see jam.c for Copyright information.
 */

/*
 * scan.c - the jam yacc scanner
 *
 */

#include "jam.h"
#include "scan.h"
#include "output.h"

#include "constants.h"
#include "jambase.h"
#include "jamgram.h"


struct keyword
{
    char * word;
    int    type;
} keywords[] =
{
#include "jamgramtab.h"
    { 0, 0 }
};

typedef struct include include;
struct include
{
    include   * next;        /* next serial include file */
    char      * string;      /* pointer into current line */
    char    * * strings;     /* for yyfparse() -- text to parse */
    FILE      * file;        /* for yyfparse() -- file being read */
    OBJECT    * fname;       /* for yyfparse() -- file name */
    int         line;        /* line counter for error messages */
    char        buf[ 512 ];  /* for yyfparse() -- line buffer */
};

static include * incp = 0;  /* current file; head of chain */

static int scanmode = SCAN_NORMAL;
static int anyerrors = 0;


static char * symdump( YYSTYPE * );

#define BIGGEST_TOKEN 10240  /* no single token can be larger */


/*
 * Set parser mode: normal, string, or keyword.
 */

void yymode( int n )
{
    scanmode = n;
}


void yyerror( char const * s )
{
    /* We use yylval instead of incp to access the error location information as
     * the incp pointer will already be reset to 0 in case the error occurred at
     * EOF.
     *
     * The two may differ only if ran into an unexpected EOF or we get an error
     * while reading a lexical token spanning multiple lines, e.g. a multi-line
     * string literal or action body, in which case yylval location information
     * will hold the information about where the token started while incp will
     * hold the information about where reading it broke.
     */
    out_printf( "%s:%d: %s at %s\n", object_str( yylval.file ), yylval.line, s,
            symdump( &yylval ) );
    ++anyerrors;
}


int yyanyerrors()
{
    return anyerrors != 0;
}


void yyfparse( OBJECT * s )
{
    include * i = (include *)BJAM_MALLOC( sizeof( *i ) );

    /* Push this onto the incp chain. */
    i->string = "";
    i->strings = 0;
    i->file = 0;
    i->fname = object_copy( s );
    i->line = 0;
    i->next = incp;
    incp = i;

    /* If the filename is "+", it means use the internal jambase. */
    if ( !strcmp( object_str( s ), "+" ) )
        i->strings = jambase;
}


/*
 * yyfdone() - cleanup after we're done parsing a file.
 */
void yyfdone( void )
{
    include * const i = incp;
    incp = i->next;

    /* Close file, free name. */
    if(i->file && (i->file != stdin))
        fclose(i->file);
    object_free(i->fname);
    BJAM_FREE((char *)i);
}


/*
 * yyline() - read new line and return first character.
 *
 * Fabricates a continuous stream of characters across include files, returning
 * EOF at the bitter end.
 */

int yyline()
{
    include * const i = incp;

    if ( !incp )
        return EOF;

    /* Once we start reading from the input stream, we reset the include
     * insertion point so that the next include file becomes the head of the
     * list.
     */

    /* If there is more data in this line, return it. */
    if ( *i->string )
        return *i->string++;

    /* If we are reading from an internal string list, go to the next string. */
    if ( i->strings )
    {
        if ( *i->strings )
        {
            ++i->line;
            i->string = *(i->strings++);
            return *i->string++;
        }
    }
    else
    {
        /* If necessary, open the file. */
        if ( !i->file )
        {
            FILE * f = stdin;
            if ( strcmp( object_str( i->fname ), "-" ) && !( f = fopen( object_str( i->fname ), "r" ) ) )
                perror( object_str( i->fname ) );
            i->file = f;
        }

        /* If there is another line in this file, start it. */
        if ( i->file && fgets( i->buf, sizeof( i->buf ), i->file ) )
        {
            ++i->line;
            i->string = i->buf;
            return *i->string++;
        }
    }

    /* This include is done. Return EOF so yyparse() returns to
     * parse_file().
     */

    return EOF;
}


/*
 * yylex() - set yylval to current token; return its type.
 *
 * Macros to move things along:
 *
 *  yychar() - return and advance character; invalid after EOF.
 *  yyprev() - back up one character; invalid before yychar().
 *
 * yychar() returns a continuous stream of characters, until it hits the EOF of
 * the current include file.
 */

#define yychar() ( *incp->string ? *incp->string++ : yyline() )
#define yyprev() ( incp->string-- )

int yylex()
{
    int c;
    char buf[ BIGGEST_TOKEN ];
    char * b = buf;

    if ( !incp )
        goto eof;

    /* Get first character (whitespace or of token). */
    c = yychar();

    if ( scanmode == SCAN_STRING )
    {
        /* If scanning for a string (action's {}'s), look for the closing brace.
         * We handle matching braces, if they match.
         */

        int nest = 1;

        while ( ( c != EOF ) && ( b < buf + sizeof( buf ) ) )
        {
            if ( c == '{' )
                ++nest;

            if ( ( c == '}' ) && !--nest )
                break;

            *b++ = c;

            c = yychar();

            /* Turn trailing "\r\n" sequences into plain "\n" for Cygwin. */
            if ( ( c == '\n' ) && ( b[ -1 ] == '\r' ) )
                --b;
        }

        /* We ate the ending brace -- regurgitate it. */
        if ( c != EOF )
            yyprev();

        /* Check for obvious errors. */
        if ( b == buf + sizeof( buf ) )
        {
            yyerror( "action block too big" );
            goto eof;
        }

        if ( nest )
        {
            yyerror( "unmatched {} in action block" );
            goto eof;
        }

        *b = 0;
        yylval.type = STRING;
        yylval.string = object_new( buf );
        yylval.file = incp->fname;
        yylval.line = incp->line;
    }
    else
    {
        char * b = buf;
        struct keyword * k;
        int inquote = 0;
        int notkeyword;

        /* Eat white space. */
        for ( ; ; )
        {
            /* Skip past white space. */
            while ( ( c != EOF ) && isspace( c ) )
                c = yychar();

            /* Not a comment? */
            if ( c != '#' )
                break;

            /* Swallow up comment line. */
            while ( ( ( c = yychar() ) != EOF ) && ( c != '\n' ) ) ;
        }

        /* c now points to the first character of a token. */
        if ( c == EOF )
            goto eof;

        yylval.file = incp->fname;
        yylval.line = incp->line;

        /* While scanning the word, disqualify it for (expensive) keyword lookup
         * when we can: $anything, "anything", \anything
         */
        notkeyword = c == '$';

        /* Look for white space to delimit word. "'s get stripped but preserve
         * white space. \ protects next character.
         */
        while
        (
            ( c != EOF ) &&
            ( b < buf + sizeof( buf ) ) &&
            ( inquote || !isspace( c ) )
        )
        {
            if ( c == '"' )
            {
                /* begin or end " */
                inquote = !inquote;
                notkeyword = 1;
            }
            else if ( c != '\\' )
            {
                /* normal char */
                *b++ = c;
            }
            else if ( ( c = yychar() ) != EOF )
            {
                /* \c */
                if (c == 'n')
                    c = '\n';
                else if (c == 'r')
                    c = '\r';
                else if (c == 't')
                    c = '\t';
                *b++ = c;
                notkeyword = 1;
            }
            else
            {
                /* \EOF */
                break;
            }

            c = yychar();
        }

        /* Check obvious errors. */
        if ( b == buf + sizeof( buf ) )
        {
            yyerror( "string too big" );
            goto eof;
        }

        if ( inquote )
        {
            yyerror( "unmatched \" in string" );
            goto eof;
        }

        /* We looked ahead a character - back up. */
        if ( c != EOF )
            yyprev();

        /* Scan token table. Do not scan if it is obviously not a keyword or if
         * it is an alphabetic when were looking for punctuation.
         */

        *b = 0;
        yylval.type = ARG;

        if ( !notkeyword && !( isalpha( *buf ) && ( scanmode == SCAN_PUNCT ) ) )
            for ( k = keywords; k->word; ++k )
                if ( ( *buf == *k->word ) && !strcmp( k->word, buf ) )
                { 
                    yylval.type = k->type;
                    yylval.keyword = k->word;  /* used by symdump */
                    break;
                }

        if ( yylval.type == ARG )
            yylval.string = object_new( buf );
    }

    if ( DEBUG_SCAN )
        out_printf( "scan %s\n", symdump( &yylval ) );

    return yylval.type;

eof:
    /* We do not reset yylval.file & yylval.line here so unexpected EOF error
     * messages would include correct error location information.
     */
    yylval.type = EOF;
    return yylval.type;
}


static char * symdump( YYSTYPE * s )
{
    static char buf[ BIGGEST_TOKEN + 20 ];
    switch ( s->type )
    {
        case EOF   : sprintf( buf, "EOF"                                        ); break;
        case 0     : sprintf( buf, "unknown symbol %s", object_str( s->string ) ); break;
        case ARG   : sprintf( buf, "argument %s"      , object_str( s->string ) ); break;
        case STRING: sprintf( buf, "string \"%s\""    , object_str( s->string ) ); break;
        default    : sprintf( buf, "keyword %s"       , s->keyword              ); break;
    }
    return buf;
}


/*
 * Get information about the current file and line, for those epsilon
 * transitions that produce a parse.
 */

void yyinput_last_read_token( OBJECT * * name, int * line )
{
    /* TODO: Consider whether and when we might want to report where the last
     * read token ended, e.g. EOF errors inside string literals.
     */
    *name = yylval.file;
    *line = yylval.line;
}
Commit	Line	Data
7c673cae FG	1	/*
	2	* Copyright 1993-2002 Christopher Seiwald and Perforce Software, Inc.
	3	*
	4	* This file is part of Jam - see jam.c for Copyright information.
	5	*/
	6
	7	/*
	8	* scan.c - the jam yacc scanner
	9	*
	10	*/
	11
	12	#include "jam.h"
	13	#include "scan.h"
	14	#include "output.h"
	15
	16	#include "constants.h"
	17	#include "jambase.h"
	18	#include "jamgram.h"
	19
	20
	21	struct keyword
	22	{
	23	char * word;
	24	int type;
	25	} keywords[] =
	26	{
	27	#include "jamgramtab.h"
	28	{ 0, 0 }
	29	};
	30
	31	typedef struct include include;
	32	struct include
	33	{
	34	include * next; /* next serial include file */
	35	char * string; /* pointer into current line */
	36	char * * strings; /* for yyfparse() -- text to parse */
	37	FILE * file; /* for yyfparse() -- file being read */
	38	OBJECT * fname; /* for yyfparse() -- file name */
	39	int line; /* line counter for error messages */
	40	char buf[ 512 ]; /* for yyfparse() -- line buffer */
	41	};
	42
	43	static include * incp = 0; /* current file; head of chain */
	44
	45	static int scanmode = SCAN_NORMAL;
	46	static int anyerrors = 0;
	47
	48
	49	static char * symdump( YYSTYPE * );
	50
	51	#define BIGGEST_TOKEN 10240 /* no single token can be larger */
	52
	53
	54	/*
	55	* Set parser mode: normal, string, or keyword.
	56	*/
	57
	58	void yymode( int n )
	59	{
	60	scanmode = n;
	61	}
	62
	63
	64	void yyerror( char const * s )
65	{
66	/* We use yylval instead of incp to access the error location information as
67	* the incp pointer will already be reset to 0 in case the error occurred at
68	* EOF.
69	*
70	* The two may differ only if ran into an unexpected EOF or we get an error
71	* while reading a lexical token spanning multiple lines, e.g. a multi-line
72	* string literal or action body, in which case yylval location information
73	* will hold the information about where the token started while incp will
74	* hold the information about where reading it broke.
75	*/
76	out_printf( "%s:%d: %s at %s\n", object_str( yylval.file ), yylval.line, s,
77	symdump( &yylval ) );
78	++anyerrors;
79	}
80
81
82	int yyanyerrors()
83	{
84	return anyerrors != 0;
85	}
86
87
88	void yyfparse( OBJECT * s )
89	{
90	include * i = (include )BJAM_MALLOC( sizeof( i ) );
91
92	/* Push this onto the incp chain. */
93	i->string = "";
94	i->strings = 0;
95	i->file = 0;
96	i->fname = object_copy( s );
97	i->line = 0;
98	i->next = incp;
99	incp = i;
100
101	/* If the filename is "+", it means use the internal jambase. */
102	if ( !strcmp( object_str( s ), "+" ) )
103	i->strings = jambase;
104	}
105
106
107	/*
108	* yyfdone() - cleanup after we're done parsing a file.
109	*/
110	void yyfdone( void )
111	{
112	include * const i = incp;
113	incp = i->next;
114
115	/* Close file, free name. */
116	if(i->file && (i->file != stdin))
117	fclose(i->file);
118	object_free(i->fname);
119	BJAM_FREE((char *)i);
120	}
121
122
123	/*
124	* yyline() - read new line and return first character.
125	*
126	* Fabricates a continuous stream of characters across include files, returning
127	* EOF at the bitter end.
128	*/
129
130	int yyline()
131	{
132	include * const i = incp;
133
134	if ( !incp )
135	return EOF;
136
137	/* Once we start reading from the input stream, we reset the include
138	* insertion point so that the next include file becomes the head of the
139	* list.
140	*/
141
142	/* If there is more data in this line, return it. */
143	if ( *i->string )
144	return *i->string++;
145
146	/* If we are reading from an internal string list, go to the next string. */
147	if ( i->strings )
148	{
149	if ( *i->strings )
150	{
151	++i->line;
152	i->string = *(i->strings++);
153	return *i->string++;
154	}
155	}
156	else
157	{
158	/* If necessary, open the file. */
159	if ( !i->file )
160	{
161	FILE * f = stdin;
162	if ( strcmp( object_str( i->fname ), "-" ) && !( f = fopen( object_str( i->fname ), "r" ) ) )
163	perror( object_str( i->fname ) );
164	i->file = f;
165	}
166
167	/* If there is another line in this file, start it. */
168	if ( i->file && fgets( i->buf, sizeof( i->buf ), i->file ) )
169	{
170	++i->line;
171	i->string = i->buf;
172	return *i->string++;
173	}
174	}
175
176	/* This include is done. Return EOF so yyparse() returns to
177	* parse_file().
178	*/
179
180	return EOF;
181	}
182
183
184	/*
185	* yylex() - set yylval to current token; return its type.
186	*
187	* Macros to move things along:
188	*
189	* yychar() - return and advance character; invalid after EOF.
190	* yyprev() - back up one character; invalid before yychar().
191	*
192	* yychar() returns a continuous stream of characters, until it hits the EOF of
193	* the current include file.
194	*/
195
196	#define yychar() ( incp->string ? incp->string++ : yyline() )
197	#define yyprev() ( incp->string-- )
198
199	int yylex()
200	{
201	int c;
202	char buf[ BIGGEST_TOKEN ];
203	char * b = buf;
204
205	if ( !incp )
206	goto eof;
207
208	/* Get first character (whitespace or of token). */
209	c = yychar();
210
211	if ( scanmode == SCAN_STRING )
212	{
213	/* If scanning for a string (action's {}'s), look for the closing brace.
214	* We handle matching braces, if they match.
215	*/
216
217	int nest = 1;
218
219	while ( ( c != EOF ) && ( b < buf + sizeof( buf ) ) )
220	{
221	if ( c == '{' )
222	++nest;
223
224	if ( ( c == '}' ) && !--nest )
225	break;
226
227	*b++ = c;
228
229	c = yychar();
230
231	/* Turn trailing "\r\n" sequences into plain "\n" for Cygwin. */
232	if ( ( c == '\n' ) && ( b[ -1 ] == '\r' ) )
233	--b;
234	}
235
236	/* We ate the ending brace -- regurgitate it. */
237	if ( c != EOF )
238	yyprev();
239
240	/* Check for obvious errors. */
241	if ( b == buf + sizeof( buf ) )
242	{
243	yyerror( "action block too big" );
244	goto eof;
245	}
246
247	if ( nest )
248	{
249	yyerror( "unmatched {} in action block" );
250	goto eof;
251	}
252
253	*b = 0;
254	yylval.type = STRING;
255	yylval.string = object_new( buf );
256	yylval.file = incp->fname;
257	yylval.line = incp->line;
258	}
259	else
260	{
261	char * b = buf;
262	struct keyword * k;
263	int inquote = 0;
264	int notkeyword;
265
266	/* Eat white space. */
267	for ( ; ; )
268	{
269	/* Skip past white space. */
270	while ( ( c != EOF ) && isspace( c ) )
271	c = yychar();
272
273	/* Not a comment? */
274	if ( c != '#' )
275	break;
276
277	/* Swallow up comment line. */
278	while ( ( ( c = yychar() ) != EOF ) && ( c != '\n' ) ) ;
279	}
280
281	/* c now points to the first character of a token. */
282	if ( c == EOF )
283	goto eof;
284
285	yylval.file = incp->fname;
286	yylval.line = incp->line;
287
288	/* While scanning the word, disqualify it for (expensive) keyword lookup
289	* when we can: $anything, "anything", \anything
290	*/
291	notkeyword = c == '$';
292
293	/* Look for white space to delimit word. "'s get stripped but preserve
294	* white space. \ protects next character.
295	*/
296	while
297	(
298	( c != EOF ) &&
299	( b < buf + sizeof( buf ) ) &&
300	( inquote \|\| !isspace( c ) )
301	)
302	{
303	if ( c == '"' )
304	{
305	/* begin or end " */
306	inquote = !inquote;
307	notkeyword = 1;
308	}
309	else if ( c != '\\' )
310	{
311	/* normal char */
312	*b++ = c;
313	}
314	else if ( ( c = yychar() ) != EOF )
315	{
316	/* \c */
317	if (c == 'n')
318	c = '\n';
319	else if (c == 'r')
320	c = '\r';
321	else if (c == 't')
322	c = '\t';
323	*b++ = c;
324	notkeyword = 1;
325	}
326	else
327	{
328	/* \EOF */
329	break;
330	}
331
332	c = yychar();
333	}
334
335	/* Check obvious errors. */
336	if ( b == buf + sizeof( buf ) )
337	{
338	yyerror( "string too big" );
339	goto eof;
340	}
341
342	if ( inquote )
343	{
344	yyerror( "unmatched \" in string" );
345	goto eof;
346	}
347
348	/* We looked ahead a character - back up. */
349	if ( c != EOF )
350	yyprev();
351
352	/* Scan token table. Do not scan if it is obviously not a keyword or if
353	* it is an alphabetic when were looking for punctuation.
354	*/
355
356	*b = 0;
357	yylval.type = ARG;
358
359	if ( !notkeyword && !( isalpha( *buf ) && ( scanmode == SCAN_PUNCT ) ) )
360	for ( k = keywords; k->word; ++k )
361	if ( ( buf == k->word ) && !strcmp( k->word, buf ) )
362	{
363	yylval.type = k->type;
364	yylval.keyword = k->word; /* used by symdump */
365	break;
366	}
367
368	if ( yylval.type == ARG )
369	yylval.string = object_new( buf );
370	}
371
372	if ( DEBUG_SCAN )
373	out_printf( "scan %s\n", symdump( &yylval ) );
374
375	return yylval.type;
376
377	eof:
378	/* We do not reset yylval.file & yylval.line here so unexpected EOF error
379	* messages would include correct error location information.
380	*/
381	yylval.type = EOF;
382	return yylval.type;
383	}
384
385
386	static char * symdump( YYSTYPE * s )
387	{
388	static char buf[ BIGGEST_TOKEN + 20 ];
389	switch ( s->type )
390	{
391	case EOF : sprintf( buf, "EOF" ); break;
392	case 0 : sprintf( buf, "unknown symbol %s", object_str( s->string ) ); break;
393	case ARG : sprintf( buf, "argument %s" , object_str( s->string ) ); break;
394	case STRING: sprintf( buf, "string \"%s\"" , object_str( s->string ) ); break;
395	default : sprintf( buf, "keyword %s" , s->keyword ); break;
396	}
397	return buf;
398	}
399
400
401	/*
402	* Get information about the current file and line, for those epsilon
403	* transitions that produce a parse.
404	*/
405
406	void yyinput_last_read_token( OBJECT * * name, int * line )
407	{
408	/* TODO: Consider whether and when we might want to report where the last
409	* read token ended, e.g. EOF errors inside string literals.
410	*/
411	*name = yylval.file;
412	*line = yylval.line;
413	}