5 * We reserve no LEGAL rights to the Purdue Compiler Construction Tool
6 * Set (PCCTS) -- PCCTS is in the public domain. An individual or
7 * company may do whatever they wish with source code distributed with
8 * PCCTS or the code generated by PCCTS, including the incorporation of
9 * PCCTS, or its output, into commerical software.
11 * We encourage users to develop software with PCCTS. However, we do ask
12 * that credit is given to us for developing PCCTS. By "credit",
13 * we mean that if you incorporate our source code into one of your
14 * programs (commercial product, research project, or otherwise) that you
15 * acknowledge this fact somewhere in the documentation, research report,
16 * etc... If you like PCCTS and have developed a nice tool with the
17 * output, please mention that you developed it using PCCTS. In
18 * addition, we ask that this header remain intact in our source code.
19 * As long as these guidelines are kept, we expect to continue enhancing
20 * this system and expect to make other tools available as they are
25 * Parr Research Corporation
26 * with Purdue University and AHPCRC, University of Minnesota
32 #include "pccts_stdlib.h"
33 #include "pccts_stdarg.h"
34 #include "pccts_string.h"
35 #include "pccts_stdio.h"
39 /* I have to put this here due to C++ limitation
40 * that you can't have a 'forward' decl for enums.
41 * I hate C++!!!!!!!!!!!!!!!
42 * Of course, if I could use real templates, this would go away.
45 // MR1 10-Apr-97 133MR1 Prevent use of varying sizes for the
46 // MR1 ANTLRTokenType enum
49 enum ANTLRTokenType
{ TER_HATES_CPP
=0, ITS_TOO_COMPLICATED
=9999}; // MR1
51 #define ANTLR_SUPPORT_CODE
54 #include ATOKENBUFFER_H
57 static const int zzINF_DEF_TOKEN_BUFFER_SIZE
= 2000; /* MR14 */
58 static const int zzINF_BUFFER_TOKEN_CHUNK_SIZE
= 1000; /* MR14 */
60 /* L o o k a h e a d M a c r o s */
62 /* maximum of 32 bits/unsigned int and must be 8 bits/byte;
63 * we only use 8 bits of it.
65 SetWordType
ANTLRParser::bitmask
[sizeof(SetWordType
)*8] = {
66 0x00000001, 0x00000002, 0x00000004, 0x00000008,
67 0x00000010, 0x00000020, 0x00000040, 0x00000080
70 char ANTLRParser::eMsgBuffer
[500] = "";
76 delete [] zzFAILtext
; // MR16 Manfred Kogler
80 ANTLRParser(ANTLRTokenBuffer
*_inputTokens
,
87 can_use_inf_look
= use_inf_look
;
88 /* MR14 */ if (dlook
!= 0) {
89 /* MR14 */ panic("ANTLRParser::ANTLRParser - Demand lookahead not supported in C++ mode");
92 demand_look
= 0; /* demand_look = dlook; */
96 eofToken
= (ANTLRTokenType
)1;
98 // allocate lookahead buffer
99 token_type
= new ANTLRTokenType
[LLk
];
103 stillToFetch
= 0; // MR19
106 inf_labase
= 0; // MR7
108 /* prime lookahead buffer, point to inputTokens */
109 this->inputTokens
= _inputTokens
;
110 this->inputTokens
->setMinTokens(k
);
111 _inputTokens
->setParser(this); // MR1
112 resynchConsumed
=1; // MR8
113 zzFAILtext
=NULL
; // MR9
114 traceOptionValueDefault
=0; // MR10
115 traceReset(); // MR10
116 zzGuessSeq
=0; // MR10
117 syntaxErrCount
=0; // MR11
120 void ANTLRParser::init()
123 resynchConsumed
=1; // MR8
124 traceReset(); // MR10
127 void ANTLRParser::traceReset()
129 traceOptionValue
=traceOptionValueDefault
;
130 traceGuessOptionValue
=1;
131 traceCurrentRuleName
=NULL
;
136 guess(ANTLRParserState
*st
)
140 return setjmp(guess_start
.state
);
144 saveState(ANTLRParserState
*buf
)
146 buf
->guess_start
= guess_start
;
147 buf
->guessing
= guessing
;
148 buf
->inf_labase
= inf_labase
;
149 buf
->inf_last
= inf_last
;
151 buf
->traceOptionValue
=traceOptionValue
; /* MR10 */
152 buf
->traceGuessOptionValue
=traceGuessOptionValue
; /* MR10 */
153 buf
->traceCurrentRuleName
=traceCurrentRuleName
; /* MR10 */
154 buf
->traceDepth
=traceDepth
; /* MR10 */
158 restoreState(ANTLRParserState
*buf
)
161 int prevTraceOptionValue
;
163 guess_start
= buf
->guess_start
;
164 guessing
= buf
->guessing
;
165 inf_labase
= buf
->inf_labase
;
166 inf_last
= buf
->inf_last
;
169 // restore lookahead buffer from k tokens before restored TokenBuffer position
170 // if demand_look, then I guess we don't look backwards for these tokens.
171 for (i
=1; i
<=LLk
; i
++) token_type
[i
-1] =
172 inputTokens
->bufferedToken(i
-LLk
)->getType();
178 prevTraceOptionValue
=traceOptionValue
;
179 traceOptionValue
=buf
->traceOptionValue
;
180 if ( (prevTraceOptionValue
> 0) !=
181 (traceOptionValue
> 0)) {
182 if (traceCurrentRuleName
!= NULL
) { /* MR21 */
183 if (traceOptionValue
> 0) {
185 "trace enable restored in rule %s depth %d\n",
186 traceCurrentRuleName
,
189 if (traceOptionValue
<= 0) {
191 "trace disable restored in rule %s depth %d\n",
192 traceCurrentRuleName
, /* MR21 */
197 traceGuessOptionValue
=buf
->traceGuessOptionValue
;
198 traceCurrentRuleName
=buf
->traceCurrentRuleName
;
199 traceDepth
=buf
->traceDepth
;
203 /* Get the next symbol from the input stream; put it into lookahead buffer;
204 * fill token_type[] fast reference cache also. NLA is the next place where
205 * a lookahead ANTLRAbstractToken should go.
211 #ifdef ZZDEBUG_CONSUME_ACTION
212 zzdebug_consume_action();
216 // Defer Fetch feature
217 // Moves action of consume() into LA() function
222 NLA
= inputTokens
->getToken()->getType();
224 lap
= (lap
+1)&(LLk
-1);
229 _ANTLRTokenPtr
ANTLRParser::
234 // Defer Fetch feature
235 // Moves action of consume() into LA() function
241 #ifdef DEBUG_TOKENBUFFER
242 if ( i
>= inputTokens
->bufferSize() || inputTokens
->minTokens() < LLk
) /* MR20 Was "<=" */
244 char buf
[2000]; /* MR20 Was "static" */
245 sprintf(buf
, "The minimum number of tokens you requested that the\nANTLRTokenBuffer buffer is not enough to satisfy your\nLT(%d) request; increase 'k' argument to constructor for ANTLRTokenBuffer\n", i
);
249 return inputTokens
->bufferedToken(i
-LLk
);
256 int i
, c
= k
- (LLk
-dirty
);
257 for (i
=1; i
<=c
; i
++) consume();
260 /* fill the lookahead buffer up with k symbols (even if DEMAND_LOOK);
267 for(i
=1;i
<=LLk
; i
++) consume();
269 // lap = 0; // MR14 Sinan Karasu (sinan.karasu@boeing.com)
270 // labase = 0; // MR14
274 /* check to see if the current input symbol matches '_t'.
275 * During NON demand lookahead mode, dirty will always be 0 and
276 * hence the extra code for consuming tokens in _match is never
277 * executed; the same routine can be used for both modes.
280 _match(ANTLRTokenType _t
, ANTLRChar
**MissText
,
281 ANTLRTokenType
*MissTok
, _ANTLRTokenPtr
*BadTok
,
282 SetWordType
**MissSet
)
289 *MissTok
= _t
; *BadTok
= LT(1);
294 labase
= (labase
+1)&(LLk
-1); // labase maintained even if !demand look
298 /* check to see if the current input symbol matches '_t'.
299 * Used during exception handling.
302 _match_wsig(ANTLRTokenType _t
)
307 if ( LA(1)!=_t
) return 0;
309 labase
= (labase
+1)&(LLk
-1); // labase maintained even if !demand look
313 /* check to see if the current input symbol matches any token in a set.
314 * During NON demand lookahead mode, dirty will always be 0 and
315 * hence the extra code for consuming tokens in _match is never
316 * executed; the same routine can be used for both modes.
319 _setmatch(SetWordType
*tset
, ANTLRChar
**MissText
,
320 ANTLRTokenType
*MissTok
, _ANTLRTokenPtr
*BadTok
,
321 SetWordType
**MissSet
)
326 if ( !set_el(LA(1), tset
) ) {
328 *MissTok
= (ANTLRTokenType
)0; *BadTok
=LT(1);
333 labase
= (labase
+1)&(LLk
-1); // labase maintained even if !demand look
338 _setmatch_wsig(SetWordType
*tset
)
343 if ( !set_el(LA(1), tset
) ) return 0;
345 labase
= (labase
+1)&(LLk
-1); // labase maintained even if !demand look
349 /* Exception handling routines */
352 // Change suggested by Eli Sternheim (eli@interhdl.com)
355 consumeUntil(SetWordType
*st
)
357 ANTLRTokenType tmp
; // MR1
358 const int Eof
=1; // MR1
359 while ( !set_el( (tmp
=LA(1)), st
) && tmp
!=Eof
) { consume(); } // MR1
364 // Change suggested by Eli Sternheim (eli@interhdl.com)
367 consumeUntilToken(int t
)
370 const int Eof
=1; // MR1
371 while ( (tmp
=LA(1)) !=t
&& tmp
!=Eof
) { consume(); } // MR1
375 /* Old error stuff */
378 resynch(SetWordType
*wd
,SetWordType mask
)
381 /* MR8 S.Bochnak@microtool.com.pl */
382 /* MR8 Change file scope static "consumed" to instance var */
384 /* if you enter here without having consumed a token from last resynch
385 * force a token consumption.
387 /* MR8 */ if ( !resynchConsumed
) {consume(); resynchConsumed
=1; return;}
389 /* if current token is in resynch set, we've got what we wanted */
391 /* MR8 */ if ( wd
[LA(1)]&mask
|| LA(1) == eofToken
) {resynchConsumed
=0; return;}
393 /* scan until we find something in the resynch set */
395 while ( !(wd
[LA(1)]&mask
) && LA(1) != eofToken
) {consume();}
397 /* MR8 */ resynchConsumed
=1;
400 /* standard error reporting function that assumes DLG-based scanners;
401 * you should redefine in subclass to change it or if you use your
405 syn(_ANTLRTokenPtr tok
, ANTLRChar
*egroup
, SetWordType
*eset
,
406 ANTLRTokenType etok
, int k
)
410 line
= LT(1)->getLine();
412 syntaxErrCount
++; /* MR11 */
413 fprintf(stderr
, "line %d: syntax error at \"%s\"",
415 (LA(1)==eofToken
&& LT(1)->getText()[0] == '@')?
416 "<eof>":LT(1)->getText() /* MR21a */);
417 if ( !etok
&& !eset
) {fprintf(stderr
, "\n"); return;}
418 if ( k
==1 ) fprintf(stderr
, " missing");
421 fprintf(stderr
, "; \"%s\" not", LT(1)->getText());
422 if ( set_deg(eset
)>1 ) fprintf(stderr
, " in");
424 if ( set_deg(eset
)>0 ) edecode(eset
);
425 else fprintf(stderr
, " %s", token_tbl
[etok
]);
426 if ( strlen(egroup
) > 0 ) fprintf(stderr
, " in %s", egroup
);
427 fprintf(stderr
, "\n");
430 /* is b an element of set p? */
432 set_el(ANTLRTokenType b
, SetWordType
*p
)
434 return( p
[DIVWORD(b
)] & bitmask
[MODWORD(b
)] );
438 set_deg(SetWordType
*a
)
440 /* Fast compute degree of a set... the number
441 of elements present in the set. Assumes
442 that all word bits are used in the set
444 register SetWordType
*p
= a
;
445 register SetWordType
*endp
= &(a
[bsetsize
]);
446 register int degree
= 0;
448 if ( a
== NULL
) return 0;
451 register SetWordType t
= *p
;
452 register SetWordType
*b
= &(bitmask
[0]);
454 if (t
& *b
) ++degree
;
455 } while (++b
< &(bitmask
[sizeof(SetWordType
)*8]));
463 edecode(SetWordType
*a
)
465 register SetWordType
*p
= a
;
466 register SetWordType
*endp
= &(p
[bsetsize
]);
467 register unsigned e
= 0;
469 if ( set_deg(a
)>1 ) fprintf(stderr
, " {");
471 register SetWordType t
= *p
;
472 register SetWordType
*b
= &(bitmask
[0]);
474 if ( t
& *b
) fprintf(stderr
, " %s", token_tbl
[e
]);
476 } while (++b
< &(bitmask
[sizeof(SetWordType
)*8]));
477 } while (++p
< endp
);
478 if ( set_deg(a
)>1 ) fprintf(stderr
, " }");
482 * zzFAIL(k, e1, e2, ...,&zzMissSet,&zzMissText,&zzBadTok,&zzBadText,&zzErrk)
483 * where the zzMiss stuff is set here to the token that did not match
484 * (and which set wasn't it a member of).
487 // MR9 29-Sep-97 Stan Bochnak (S.Bochnak@microTool.com.pl)
488 // MR9 Original fix to static allocated text didn't
489 // MR9 work because a pointer to it was passed back
490 // MR9 to caller. Replace with instance variable.
492 const int SETWORDCOUNT
=20;
495 ANTLRParser::FAIL(int k
, ...)
501 if (zzFAILtext
== NULL
) zzFAILtext
=new char [1000]; // MR9
502 SetWordType
**f
=new SetWordType
*[SETWORDCOUNT
]; // MR1 // MR9
503 SetWordType
**miss_set
;
504 ANTLRChar
**miss_text
;
505 _ANTLRTokenPtr
*bad_tok
;
506 ANTLRChar
**bad_text
;
509 // err_k is passed as a "int *", not "unsigned *"
517 zzFAILtext
[0] = '\0';
518 if ( k
> SETWORDCOUNT
) panic("FAIL: overflowed buffer");
519 for (i
=1; i
<=k
; i
++) /* collect all lookahead sets */
521 f
[i
-1] = va_arg(ap
, SetWordType
*);
523 for (i
=1; i
<=k
; i
++) /* look for offending token */
525 if ( i
>1 ) strcat(zzFAILtext
, " ");
526 strcat(zzFAILtext
, LT(i
)->getText());
527 if ( !set_el(LA(i
), f
[i
-1]) ) break;
529 miss_set
= va_arg(ap
, SetWordType
**);
530 miss_text
= va_arg(ap
, ANTLRChar
**);
531 bad_tok
= va_arg(ap
, _ANTLRTokenPtr
*);
532 bad_text
= va_arg(ap
, ANTLRChar
**);
533 err_k
= va_arg(ap
, int *); // MR1
536 /* bad; lookahead is permutation that cannot be matched,
537 * but, the ith token of lookahead is valid at the ith position
538 * (The old LL sub 1 (k) versus LL(k) parsing technique)
541 *miss_text
= LT(1)->getText();
543 *bad_text
= (*bad_tok
)->getText();
546 // MR4 20-May-97 erroneously deleted contents of f[]
547 // MR4 reported by Bruce Guenter (bruceg@qcc.sk.ca)
548 // MR1 10-Apr-97 release temporary storage
553 /* fprintf(stderr, "%s not in %dth set\n", zztokens[LA(i)], i);*/
555 *miss_text
= zzFAILtext
;
557 *bad_text
= (*bad_tok
)->getText();
558 if ( i
==1 ) *err_k
= 1;
561 // MR4 20-May-97 erroneously deleted contents of f[]
562 // MR4 reported by Bruce Guenter (bruceg@qcc.sk.ca)
563 // MR1 10-Apr-97 release temporary storage
570 _match_wdfltsig(ANTLRTokenType tokenWanted
, SetWordType
*whatFollows
)
572 if ( dirty
==LLk
) consume();
574 if ( LA(1)!=tokenWanted
)
576 syntaxErrCount
++; /* MR11 */
578 "line %d: syntax error at \"%s\" missing %s\n",
580 (LA(1)==eofToken
&& LT(1)->getText()[0] == '@')?"<eof>":LT(1)->getText(), /* MR21a */
581 token_tbl
[tokenWanted
]);
582 consumeUntil( whatFollows
);
587 labase
= (labase
+1)&(LLk
-1); // labase maintained even if !demand look
588 /* if ( !demand_look ) consume(); */
595 _setmatch_wdfltsig(SetWordType
*tokensWanted
,
596 ANTLRTokenType tokenTypeOfSet
,
597 SetWordType
*whatFollows
)
599 if ( dirty
==LLk
) consume();
600 if ( !set_el(LA(1), tokensWanted
) )
602 syntaxErrCount
++; /* MR11 */
604 "line %d: syntax error at \"%s\" missing %s\n",
606 (LA(1)==eofToken
&& LT(1)->getText()[0] == '@')?"<eof>":LT(1)->getText(), /* MR21a */
607 token_tbl
[tokenTypeOfSet
]);
608 consumeUntil( whatFollows
);
613 labase
= (labase
+1)&(LLk
-1); // labase maintained even if !demand look
614 /* if ( !demand_look ) consume(); */
620 eMsgd(char *err
,int d
)
622 sprintf(eMsgBuffer
, err
, d
); // dangerous, but I don't care
627 eMsg(char *err
, char *s
)
629 sprintf(eMsgBuffer
, err
, s
);
634 eMsg2(char *err
,char *s
, char *t
)
636 sprintf(eMsgBuffer
, err
, s
, t
);
641 panic(const char *msg
) // MR20 const
643 fprintf(stderr
, "ANTLR panic: %s\n", msg
);
644 exit(PCCTS_EXIT_FAILURE
); // MR1
647 const ANTLRChar
*ANTLRParser:: // MR1
648 parserTokenName(int tok
) { // MR1
649 return token_tbl
[tok
]; // MR1
652 void ANTLRParser::traceGuessDone(const ANTLRParserState
*state
) {
656 if (traceCurrentRuleName
== NULL
) return;
658 if (traceOptionValue
<= 0) {
660 } else if (traceGuessOptionValue
<= 0) {
667 fprintf(stderr
,"guess done - returning to rule %s {\"%s\"} at depth %d",
668 state
->traceCurrentRuleName
,
669 LT(1)->getType() == eofToken
? "@" : LT(1)->getText(),
671 if (state
->guessing
!= 0) {
672 fprintf(stderr
," (guess mode continues - an enclosing guess is still active)");
674 fprintf(stderr
," (guess mode ends)");
676 fprintf(stderr
,"\n");
680 void ANTLRParser::traceGuessFail() {
684 if (traceCurrentRuleName
== NULL
) return; /* MR21 */
686 if (traceOptionValue
<= 0) {
688 } else if (guessing
&& traceGuessOptionValue
<= 0) {
695 fprintf(stderr
,"guess failed\n");
700 zero value turns off trace
703 void ANTLRParser::tracein(const ANTLRChar
* rule
) {
708 traceCurrentRuleName
=rule
;
710 if (traceOptionValue
<= 0) {
712 } else if (guessing
&& traceGuessOptionValue
<= 0) {
719 fprintf(stderr
,"enter rule %s {\"%s\"} depth %d",
721 LT(1)->getType() == eofToken
? "@" : LT(1)->getText(),
723 if (guessing
) fprintf(stderr
," guessing");
724 fprintf(stderr
,"\n");
729 void ANTLRParser::traceout(const ANTLRChar
* rule
) {
735 if (traceOptionValue
<= 0) {
737 } else if (guessing
&& traceGuessOptionValue
<= 0) {
744 fprintf(stderr
,"exit rule %s {\"%s\"} depth %d",
746 LT(1)->getType() == eofToken
? "@" : LT(1)->getText(),
748 if (guessing
) fprintf(stderr
," guessing");
749 fprintf(stderr
,"\n");
753 int ANTLRParser::traceOption(int delta
) {
755 int prevValue
=traceOptionValue
;
757 traceOptionValue
=traceOptionValue
+delta
;
759 if (traceCurrentRuleName
!= NULL
) {
760 if (prevValue
<= 0 && traceOptionValue
> 0) {
761 fprintf(stderr
,"trace enabled in rule %s depth %d\n",traceCurrentRuleName
,traceDepth
);
763 if (prevValue
> 0 && traceOptionValue
<= 0) {
764 fprintf(stderr
,"trace disabled in rule %s depth %d\n",traceCurrentRuleName
,traceDepth
);
771 int ANTLRParser::traceGuessOption(int delta
) {
773 int prevValue
=traceGuessOptionValue
;
775 traceGuessOptionValue
=traceGuessOptionValue
+delta
;
777 if (traceCurrentRuleName
!= NULL
) {
778 if (prevValue
<= 0 && traceGuessOptionValue
> 0) {
779 fprintf(stderr
,"guess trace enabled in rule %s depth %d\n",traceCurrentRuleName
,traceDepth
);
781 if (prevValue
> 0 && traceGuessOptionValue
<= 0) {
782 fprintf(stderr
,"guess trace disabled in rule %s depth %d\n",traceCurrentRuleName
,traceDepth
);
788 // MR19 V.H. Simonis Defer Fetch feature
790 void ANTLRParser::undeferFetch()
795 for (int stillToFetch_x
= 0; stillToFetch_x
< stillToFetch
; ++stillToFetch_x
) {
796 NLA
= inputTokens
->getToken()->getType();
798 lap
= (lap
+1)&(LLk
-1);
808 int ANTLRParser::isDeferFetchEnabled()