5 * We reserve no LEGAL rights to the Purdue Compiler Construction Tool
6 * Set (PCCTS) -- PCCTS is in the public domain. An individual or
7 * company may do whatever they wish with source code distributed with
8 * PCCTS or the code generated by PCCTS, including the incorporation of
9 * PCCTS, or its output, into commerical software.
11 * We encourage users to develop software with PCCTS. However, we do ask
12 * that credit is given to us for developing PCCTS. By "credit",
13 * we mean that if you incorporate our source code into one of your
14 * programs (commercial product, research project, or otherwise) that you
15 * acknowledge this fact somewhere in the documentation, research report,
16 * etc... If you like PCCTS and have developed a nice tool with the
17 * output, please mention that you developed it using PCCTS. In
18 * addition, we ask that this header remain intact in our source code.
19 * As long as these guidelines are kept, we expect to continue enhancing
20 * this system and expect to make other tools available as they are
25 * Parr Research Corporation
26 * with Purdue University and AHPCRC, University of Minnesota
32 #include "pccts_stdlib.h"
33 #include "pccts_stdarg.h"
34 #include "pccts_string.h"
35 #include "pccts_stdio.h"
39 /* I have to put this here due to C++ limitation
40 * that you can't have a 'forward' decl for enums.
41 * I hate C++!!!!!!!!!!!!!!!
42 * Of course, if I could use real templates, this would go away.
45 // MR1 10-Apr-97 133MR1 Prevent use of varying sizes for the
46 // MR1 ANTLRTokenType enum
49 enum ANTLRTokenType
{ TER_HATES_CPP
=0, ITS_TOO_COMPLICATED
=9999}; // MR1
51 #define ANTLR_SUPPORT_CODE
54 #include ATOKENBUFFER_H
57 static const int zzINF_DEF_TOKEN_BUFFER_SIZE
= 2000; /* MR14 */
58 static const int zzINF_BUFFER_TOKEN_CHUNK_SIZE
= 1000; /* MR14 */
60 /* L o o k a h e a d M a c r o s */
62 /* maximum of 32 bits/unsigned int and must be 8 bits/byte;
63 * we only use 8 bits of it.
65 SetWordType
ANTLRParser::bitmask
[sizeof(SetWordType
)*8] = {
66 0x00000001, 0x00000002, 0x00000004, 0x00000008,
67 0x00000010, 0x00000020, 0x00000040, 0x00000080
70 char ANTLRParser::eMsgBuffer
[500] = "";
76 delete [] zzFAILtext
; // MR16 Manfred Kogler
80 ANTLRParser(ANTLRTokenBuffer
*_inputTokens
,
87 can_use_inf_look
= use_inf_look
;
88 /* MR14 */ if (dlook
!= 0) {
89 /* MR14 */ panic("ANTLRParser::ANTLRParser - Demand lookahead not supported in C++ mode");
92 demand_look
= 0; /* demand_look = dlook; */
96 eofToken
= (ANTLRTokenType
)1;
98 // allocate lookahead buffer
99 token_type
= new ANTLRTokenType
[LLk
];
103 stillToFetch
= 0; // MR19
106 inf_labase
= 0; // MR7
108 /* prime lookahead buffer, point to inputTokens */
109 this->inputTokens
= _inputTokens
;
110 this->inputTokens
->setMinTokens(k
);
111 _inputTokens
->setParser(this); // MR1
112 resynchConsumed
=1; // MR8
113 zzFAILtext
=NULL
; // MR9
114 traceOptionValueDefault
=0; // MR10
115 traceReset(); // MR10
116 zzGuessSeq
=0; // MR10
117 syntaxErrCount
=0; // MR11
120 void ANTLRParser::init()
123 resynchConsumed
=1; // MR8
124 traceReset(); // MR10
127 void ANTLRParser::traceReset()
129 traceOptionValue
=traceOptionValueDefault
;
130 traceGuessOptionValue
=1;
131 traceCurrentRuleName
=NULL
;
136 #ifdef _MSC_VER // MR23
138 //interaction between '_setjmp' and C++ object destruction is non-portable
139 #pragma warning(disable : 4611)
142 guess(ANTLRParserState
*st
)
146 return setjmp(guess_start
.state
);
148 #ifdef _MSC_VER // MR23
149 #pragma warning(default: 4611)
153 saveState(ANTLRParserState
*buf
)
155 buf
->guess_start
= guess_start
;
156 buf
->guessing
= guessing
;
157 buf
->inf_labase
= inf_labase
;
158 buf
->inf_last
= inf_last
;
160 buf
->traceOptionValue
=traceOptionValue
; /* MR10 */
161 buf
->traceGuessOptionValue
=traceGuessOptionValue
; /* MR10 */
162 buf
->traceCurrentRuleName
=traceCurrentRuleName
; /* MR10 */
163 buf
->traceDepth
=traceDepth
; /* MR10 */
167 restoreState(ANTLRParserState
*buf
)
170 int prevTraceOptionValue
;
172 guess_start
= buf
->guess_start
;
173 guessing
= buf
->guessing
;
174 inf_labase
= buf
->inf_labase
;
175 inf_last
= buf
->inf_last
;
178 // restore lookahead buffer from k tokens before restored TokenBuffer position
179 // if demand_look, then I guess we don't look backwards for these tokens.
180 for (i
=1; i
<=LLk
; i
++) token_type
[i
-1] =
181 inputTokens
->bufferedToken(i
-LLk
)->getType();
187 prevTraceOptionValue
=traceOptionValue
;
188 traceOptionValue
=buf
->traceOptionValue
;
189 if ( (prevTraceOptionValue
> 0) !=
190 (traceOptionValue
> 0)) {
191 if (traceCurrentRuleName
!= NULL
) { /* MR21 */
192 if (traceOptionValue
> 0) {
193 /* MR23 */ printMessage(stderr
,
194 "trace enable restored in rule %s depth %d\n",
195 traceCurrentRuleName
,
198 if (traceOptionValue
<= 0) {
199 /* MR23 */ printMessage(stderr
,
200 "trace disable restored in rule %s depth %d\n",
201 traceCurrentRuleName
, /* MR21 */
206 traceGuessOptionValue
=buf
->traceGuessOptionValue
;
207 traceCurrentRuleName
=buf
->traceCurrentRuleName
;
208 traceDepth
=buf
->traceDepth
;
212 /* Get the next symbol from the input stream; put it into lookahead buffer;
213 * fill token_type[] fast reference cache also. NLA is the next place where
214 * a lookahead ANTLRAbstractToken should go.
220 #ifdef ZZDEBUG_CONSUME_ACTION
221 zzdebug_consume_action();
225 // Defer Fetch feature
226 // Moves action of consume() into LA() function
231 NLA
= inputTokens
->getToken()->getType();
233 lap
= (lap
+1)&(LLk
-1);
238 _ANTLRTokenPtr
ANTLRParser::
243 // Defer Fetch feature
244 // Moves action of consume() into LA() function
250 #ifdef DEBUG_TOKENBUFFER
251 if ( i
>= inputTokens
->bufferSize() || inputTokens
->minTokens() < LLk
) /* MR20 Was "<=" */
253 char buf
[2000]; /* MR20 Was "static" */
254 sprintf(buf
, "The minimum number of tokens you requested that the\nANTLRTokenBuffer buffer is not enough to satisfy your\nLT(%d) request; increase 'k' argument to constructor for ANTLRTokenBuffer\n", i
);
258 return inputTokens
->bufferedToken(i
-LLk
);
265 int i
, c
= k
- (LLk
-dirty
);
266 for (i
=1; i
<=c
; i
++) consume();
269 /* fill the lookahead buffer up with k symbols (even if DEMAND_LOOK);
276 for(i
=1;i
<=LLk
; i
++) consume();
278 // lap = 0; // MR14 Sinan Karasu (sinan.karasu@boeing.com)
279 // labase = 0; // MR14
283 /* check to see if the current input symbol matches '_t'.
284 * During NON demand lookahead mode, dirty will always be 0 and
285 * hence the extra code for consuming tokens in _match is never
286 * executed; the same routine can be used for both modes.
289 _match(ANTLRTokenType _t
, ANTLRChar
**MissText
,
290 ANTLRTokenType
*MissTok
, _ANTLRTokenPtr
*BadTok
,
291 SetWordType
**MissSet
)
304 labase
= (labase
+1)&(LLk
-1); // labase maintained even if !demand look
308 /* check to see if the current input symbol matches '_t'.
309 * Used during exception handling.
312 _match_wsig(ANTLRTokenType _t
)
317 if ( LA(1)!=_t
) return 0;
319 labase
= (labase
+1)&(LLk
-1); // labase maintained even if !demand look
323 /* check to see if the current input symbol matches any token in a set.
324 * During NON demand lookahead mode, dirty will always be 0 and
325 * hence the extra code for consuming tokens in _match is never
326 * executed; the same routine can be used for both modes.
329 _setmatch(SetWordType
*tset
, ANTLRChar
**MissText
,
330 ANTLRTokenType
*MissTok
, _ANTLRTokenPtr
*BadTok
,
331 SetWordType
**MissSet
, SetWordType
*tokclassErrset
)
336 if ( !set_el(LA(1), tset
) ) {
337 *MissText
=NULL
; /* MR23 */
338 *MissTok
=(ANTLRTokenType
) 0; /* MR23 */
339 *BadTok
=LT(1); /* MR23 */
340 *MissSet
=tokclassErrset
; /* MR23 */
344 labase
= (labase
+1)&(LLk
-1); // labase maintained even if !demand look
349 _setmatch_wsig(SetWordType
*tset
)
354 if ( !set_el(LA(1), tset
) ) return 0;
356 labase
= (labase
+1)&(LLk
-1); // labase maintained even if !demand look
360 /* Exception handling routines */
363 // Change suggested by Eli Sternheim (eli@interhdl.com)
366 consumeUntil(SetWordType
*st
)
368 ANTLRTokenType tmp
; // MR1
369 const int Eof
=1; // MR1
370 while ( !set_el( (tmp
=LA(1)), st
) && tmp
!=Eof
) { consume(); } // MR1
375 // Change suggested by Eli Sternheim (eli@interhdl.com)
378 consumeUntilToken(int t
)
381 const int Eof
=1; // MR1
382 while ( (tmp
=LA(1)) !=t
&& tmp
!=Eof
) { consume(); } // MR1
386 /* Old error stuff */
389 resynch(SetWordType
*wd
,SetWordType mask
)
392 /* MR8 S.Bochnak@microtool.com.pl */
393 /* MR8 Change file scope static "consumed" to instance var */
395 /* if you enter here without having consumed a token from last resynch
396 * force a token consumption.
398 /* MR8 */ if ( !resynchConsumed
) {consume(); resynchConsumed
=1; return;}
400 /* if current token is in resynch set, we've got what we wanted */
402 /* MR8 */ if ( wd
[LA(1)]&mask
|| LA(1) == eofToken
) {resynchConsumed
=0; return;}
404 /* scan until we find something in the resynch set */
406 while ( !(wd
[LA(1)]&mask
) && LA(1) != eofToken
) {consume();}
408 /* MR8 */ resynchConsumed
=1;
411 /* standard error reporting function that assumes DLG-based scanners;
412 * you should redefine in subclass to change it or if you use your
416 /* MR23 THM There appears to be a parameter "badText" passed to syn()
417 which is not present in the parameter list. This may be
418 because in C mode there is no attribute function which
419 returns the text, so the text representation of the token
420 must be passed explicitly. I think.
424 syn(_ANTLRTokenPtr
/*tok MR23*/, ANTLRChar
*egroup
, SetWordType
*eset
,
425 ANTLRTokenType etok
, int k
)
429 line
= LT(1)->getLine();
431 syntaxErrCount
++; /* MR11 */
433 /* MR23 If the token is not an EOF token, then use the ->getText() value.
435 If the token is the EOF token the text returned by ->getText()
436 may be garbage. If the text from the token table is "@" use
437 "<eof>" instead, because end-users don't know what "@" means.
438 If the text is not "@" then use that text, which must have been
439 supplied by the grammar writer.
441 const char * errorAt
= LT(1)->getText();
442 if (LA(1) == eofToken
) {
443 errorAt
= parserTokenName(LA(1));
444 if (errorAt
[0] == '@') errorAt
= "<eof>";
446 /* MR23 */ printMessage(stderr
, "line %d: syntax error at \"%s\"",
448 if ( !etok
&& !eset
) {/* MR23 */ printMessage(stderr
, "\n"); return;}
449 if ( k
==1 ) /* MR23 */ printMessage(stderr
, " missing");
452 /* MR23 */ printMessage(stderr
, "; \"%s\" not", LT(k
)->getText()); // MR23 use LT(k) since k>1
453 if ( set_deg(eset
)>1 ) /* MR23 */ printMessage(stderr
, " in");
455 if ( set_deg(eset
)>0 ) edecode(eset
);
456 else /* MR23 */ printMessage(stderr
, " %s", token_tbl
[etok
]);
457 if ( strlen(egroup
) > 0 ) /* MR23 */ printMessage(stderr
, " in %s", egroup
);
458 /* MR23 */ printMessage(stderr
, "\n");
461 /* is b an element of set p? */
463 set_el(ANTLRTokenType b
, SetWordType
*p
)
465 return( p
[DIVWORD(b
)] & bitmask
[MODWORD(b
)] );
469 set_deg(SetWordType
*a
)
471 /* Fast compute degree of a set... the number
472 of elements present in the set. Assumes
473 that all word bits are used in the set
475 register SetWordType
*p
= a
;
476 register SetWordType
*endp
= &(a
[bsetsize
]);
477 register int degree
= 0;
479 if ( a
== NULL
) return 0;
482 register SetWordType t
= *p
;
483 register SetWordType
*b
= &(bitmask
[0]);
485 if (t
& *b
) ++degree
;
486 } while (++b
< &(bitmask
[sizeof(SetWordType
)*8]));
494 edecode(SetWordType
*a
)
496 register SetWordType
*p
= a
;
497 register SetWordType
*endp
= &(p
[bsetsize
]);
498 register unsigned e
= 0;
500 if ( set_deg(a
)>1 ) /* MR23 */ printMessage(stderr
, " {");
502 register SetWordType t
= *p
;
503 register SetWordType
*b
= &(bitmask
[0]);
505 if ( t
& *b
) /* MR23 */ printMessage(stderr
, " %s", token_tbl
[e
]);
507 } while (++b
< &(bitmask
[sizeof(SetWordType
)*8]));
508 } while (++p
< endp
);
509 if ( set_deg(a
)>1 ) /* MR23 */ printMessage(stderr
, " }");
513 * zzFAIL(k, e1, e2, ...,&zzMissSet,&zzMissText,&zzBadTok,&zzBadText,&zzErrk)
514 * where the zzMiss stuff is set here to the token that did not match
515 * (and which set wasn't it a member of).
518 // MR9 29-Sep-97 Stan Bochnak (S.Bochnak@microTool.com.pl)
519 // MR9 Original fix to static allocated text didn't
520 // MR9 work because a pointer to it was passed back
521 // MR9 to caller. Replace with instance variable.
523 const int SETWORDCOUNT
=20;
526 ANTLRParser::FAIL(int k
, ...)
532 if (zzFAILtext
== NULL
) zzFAILtext
=new char [1000]; // MR9
533 SetWordType
**f
=new SetWordType
*[SETWORDCOUNT
]; // MR1 // MR9
534 SetWordType
**miss_set
;
535 ANTLRChar
**miss_text
;
536 _ANTLRTokenPtr
*bad_tok
;
537 ANTLRChar
**bad_text
;
540 // err_k is passed as a "int *", not "unsigned *"
548 zzFAILtext
[0] = '\0';
549 if ( k
> SETWORDCOUNT
) panic("FAIL: overflowed buffer");
550 for (i
=1; i
<=k
; i
++) /* collect all lookahead sets */
552 f
[i
-1] = va_arg(ap
, SetWordType
*);
554 for (i
=1; i
<=k
; i
++) /* look for offending token */
556 if ( i
>1 ) strcat(zzFAILtext
, " ");
557 strcat(zzFAILtext
, LT(i
)->getText());
558 if ( !set_el(LA(i
), f
[i
-1]) ) break;
560 miss_set
= va_arg(ap
, SetWordType
**);
561 miss_text
= va_arg(ap
, ANTLRChar
**);
562 bad_tok
= va_arg(ap
, _ANTLRTokenPtr
*);
563 bad_text
= va_arg(ap
, ANTLRChar
**);
564 err_k
= va_arg(ap
, int *); // MR1
567 /* bad; lookahead is permutation that cannot be matched,
568 * but, the ith token of lookahead is valid at the ith position
569 * (The old LL sub 1 (k) versus LL(k) parsing technique)
572 *miss_text
= LT(1)->getText();
574 *bad_text
= (*bad_tok
)->getText();
577 // MR4 20-May-97 erroneously deleted contents of f[]
578 // MR4 reported by Bruce Guenter (bruceg@qcc.sk.ca)
579 // MR1 10-Apr-97 release temporary storage
584 /* MR23 printMessage(stderr, "%s not in %dth set\n", zztokens[LA(i)], i);*/
586 *miss_text
= zzFAILtext
;
588 *bad_text
= (*bad_tok
)->getText();
589 if ( i
==1 ) *err_k
= 1;
592 // MR4 20-May-97 erroneously deleted contents of f[]
593 // MR4 reported by Bruce Guenter (bruceg@qcc.sk.ca)
594 // MR1 10-Apr-97 release temporary storage
601 _match_wdfltsig(ANTLRTokenType tokenWanted
, SetWordType
*whatFollows
)
603 if ( dirty
==LLk
) consume();
605 if ( LA(1)!=tokenWanted
)
607 syntaxErrCount
++; /* MR11 */
608 /* MR23 */ printMessage(stderr
,
609 "line %d: syntax error at \"%s\" missing %s\n",
611 (LA(1)==eofToken
&& LT(1)->getText()[0] == '@')?"<eof>":LT(1)->getText(), /* MR21a */
612 token_tbl
[tokenWanted
]);
613 consumeUntil( whatFollows
);
618 labase
= (labase
+1)&(LLk
-1); // labase maintained even if !demand look
619 /* if ( !demand_look ) consume(); */
626 _setmatch_wdfltsig(SetWordType
*tokensWanted
,
627 ANTLRTokenType tokenTypeOfSet
,
628 SetWordType
*whatFollows
)
630 if ( dirty
==LLk
) consume();
631 if ( !set_el(LA(1), tokensWanted
) )
633 syntaxErrCount
++; /* MR11 */
634 /* MR23 */ printMessage(stderr
,
635 "line %d: syntax error at \"%s\" missing %s\n",
637 (LA(1)==eofToken
&& LT(1)->getText()[0] == '@')?"<eof>":LT(1)->getText(), /* MR21a */
638 token_tbl
[tokenTypeOfSet
]);
639 consumeUntil( whatFollows
);
644 labase
= (labase
+1)&(LLk
-1); // labase maintained even if !demand look
645 /* if ( !demand_look ) consume(); */
651 eMsgd(char *err
,int d
)
653 sprintf(eMsgBuffer
, err
, d
); // dangerous, but I don't care
658 eMsg(char *err
, char *s
)
660 sprintf(eMsgBuffer
, err
, s
);
665 eMsg2(char *err
,char *s
, char *t
)
667 sprintf(eMsgBuffer
, err
, s
, t
);
672 panic(const char *msg
) // MR20 const
674 /* MR23 */ printMessage(stderr
, "ANTLR panic: %s\n", msg
);
675 exit(PCCTS_EXIT_FAILURE
); // MR1
678 const ANTLRChar
*ANTLRParser:: // MR1
679 parserTokenName(int tok
) { // MR1
680 return token_tbl
[tok
]; // MR1
683 void ANTLRParser::traceGuessDone(const ANTLRParserState
*state
) {
687 if (traceCurrentRuleName
== NULL
) return;
689 if (traceOptionValue
<= 0) {
691 } else if (traceGuessOptionValue
<= 0) {
698 /* MR23 */ printMessage(stderr
,"guess done - returning to rule %s {\"%s\"} at depth %d",
699 state
->traceCurrentRuleName
,
700 LT(1)->getType() == eofToken
? "@" : LT(1)->getText(),
702 if (state
->guessing
!= 0) {
703 /* MR23 */ printMessage(stderr
," (guess mode continues - an enclosing guess is still active)");
705 /* MR23 */ printMessage(stderr
," (guess mode ends)");
707 /* MR23 */ printMessage(stderr
,"\n");
711 void ANTLRParser::traceGuessFail() {
715 if (traceCurrentRuleName
== NULL
) return; /* MR21 */
717 if (traceOptionValue
<= 0) {
719 } else if (guessing
&& traceGuessOptionValue
<= 0) {
726 /* MR23 */ printMessage(stderr
,"guess failed in %s\n",traceCurrentRuleName
);
731 zero value turns off trace
734 void ANTLRParser::tracein(const ANTLRChar
* rule
) {
739 traceCurrentRuleName
=rule
;
741 if (traceOptionValue
<= 0) {
743 } else if (guessing
&& traceGuessOptionValue
<= 0) {
750 /* MR23 */ printMessage(stderr
,"enter rule %s {\"%s\"} depth %d",
752 LT(1)->getType() == eofToken
? "@" : LT(1)->getText(),
754 if (guessing
) /* MR23 */ printMessage(stderr
," guessing");
755 /* MR23 */ printMessage(stderr
,"\n");
760 void ANTLRParser::traceout(const ANTLRChar
* rule
) {
766 if (traceOptionValue
<= 0) {
768 } else if (guessing
&& traceGuessOptionValue
<= 0) {
775 /* MR23 */ printMessage(stderr
,"exit rule %s {\"%s\"} depth %d",
777 LT(1)->getType() == eofToken
? "@" : LT(1)->getText(),
779 if (guessing
) /* MR23 */ printMessage(stderr
," guessing");
780 /* MR23 */ printMessage(stderr
,"\n");
784 int ANTLRParser::traceOption(int delta
) {
786 int prevValue
=traceOptionValue
;
788 traceOptionValue
=traceOptionValue
+delta
;
790 if (traceCurrentRuleName
!= NULL
) {
791 if (prevValue
<= 0 && traceOptionValue
> 0) {
792 /* MR23 */ printMessage(stderr
,"trace enabled in rule %s depth %d\n",traceCurrentRuleName
,traceDepth
);
794 if (prevValue
> 0 && traceOptionValue
<= 0) {
795 /* MR23 */ printMessage(stderr
,"trace disabled in rule %s depth %d\n",traceCurrentRuleName
,traceDepth
);
802 int ANTLRParser::traceGuessOption(int delta
) {
804 int prevValue
=traceGuessOptionValue
;
806 traceGuessOptionValue
=traceGuessOptionValue
+delta
;
808 if (traceCurrentRuleName
!= NULL
) {
809 if (prevValue
<= 0 && traceGuessOptionValue
> 0) {
810 /* MR23 */ printMessage(stderr
,"guess trace enabled in rule %s depth %d\n",traceCurrentRuleName
,traceDepth
);
812 if (prevValue
> 0 && traceGuessOptionValue
<= 0) {
813 /* MR23 */ printMessage(stderr
,"guess trace disabled in rule %s depth %d\n",traceCurrentRuleName
,traceDepth
);
819 // MR19 V.H. Simonis Defer Fetch feature
821 void ANTLRParser::undeferFetch()
826 for (int stillToFetch_x
= 0; stillToFetch_x
< stillToFetch
; ++stillToFetch_x
) {
827 NLA
= inputTokens
->getToken()->getType();
829 lap
= (lap
+1)&(LLk
-1);
839 int ANTLRParser::isDeferFetchEnabled()
849 int ANTLRParser::printMessage(FILE* pFile
, const char* pFormat
, ...)
852 va_start( marker
, pFormat
);
853 int iRet
= printMessageV(pFile
, pFormat
, marker
);
858 int ANTLRParser::printMessageV(FILE* pFile
, const char* pFormat
, va_list arglist
) // MR23
860 return vfprintf(pFile
, pFormat
, arglist
);
863 // MR23 Move semantic predicate error handling from macro to virtual function
865 // Called by the zzfailed_pred
867 void ANTLRParser::failedSemanticPredicate(const char* predicate
)
869 printMessage(stdout
,"line %d: semantic error; failed predicate: '%s'\n",
870 LT(1)->getLine(), predicate
);