3 * Define the generic ANTLRParser superclass, which is subclassed to
4 * define an actual parser.
6 * Before entry into this file: ANTLRTokenType must be set.
10 * We reserve no LEGAL rights to the Purdue Compiler Construction Tool
11 * Set (PCCTS) -- PCCTS is in the public domain. An individual or
12 * company may do whatever they wish with source code distributed with
13 * PCCTS or the code generated by PCCTS, including the incorporation of
14 * PCCTS, or its output, into commerical software.
16 * We encourage users to develop software with PCCTS. However, we do ask
17 * that credit is given to us for developing PCCTS. By "credit",
18 * we mean that if you incorporate our source code into one of your
19 * programs (commercial product, research project, or otherwise) that you
20 * acknowledge this fact somewhere in the documentation, research report,
21 * etc... If you like PCCTS and have developed a nice tool with the
22 * output, please mention that you developed it using PCCTS. In
23 * addition, we ask that this header remain intact in our source code.
24 * As long as these guidelines are kept, we expect to continue enhancing
25 * this system and expect to make other tools available as they are
30 * Parr Research Corporation
31 * with Purdue University and AHPCRC, University of Minnesota
35 #ifndef APARSER_H_GATE
36 #define APARSER_H_GATE
40 #include "pccts_stdio.h"
41 #include "pccts_setjmp.h"
46 #include ATOKENBUFFER_H
55 #define NLA (token_type[lap&(LLk-1)])/* --> next LA */
57 typedef unsigned char SetWordType
;
59 /* Define external bit set stuff (for SetWordType) */
60 #define EXT_WORDSIZE (sizeof(char)*8)
61 #define EXT_LOGWORDSIZE 3
63 /* s y n t a c t i c p r e d i c a t e s t u f f */
65 #ifndef zzUSER_GUESS_HOOK
66 #define zzUSER_GUESS_HOOK(seqFrozen,zzrv)
69 #ifndef zzUSER_GUESS_DONE_HOOK
70 #define zzUSER_GUESS_DONE_HOOK(seqFrozen)
73 /* MR14 Add zzUSER_GUESS_FAIL_HOOK and related code */
75 #define zzUSER_GUESS_FAIL_HOOK_INTERNAL zzUSER_GUESS_FAIL_HOOK(SeqFrozen)
76 #ifndef zzUSER_GUESS_FAIL_HOOK
77 #define zzUSER_GUESS_FAIL_HOOK(zzGuessSeq)
81 typedef struct _zzjmp_buf
{
85 /* these need to be macros not member functions */
86 #define zzGUESS_BLOCK ANTLRParserState zzst; int zzrv; int _marker; int zzGuessSeqFrozen;
87 #define zzNON_GUESS_MODE if ( !guessing )
88 #define zzGUESS_FAIL guess_fail();
90 /* Note: zzGUESS_DONE does not execute longjmp() */
92 #define zzGUESS_DONE {zzrv=1; inputTokens->rewind(_marker); guess_done(&zzst);zzUSER_GUESS_DONE_HOOK(zzGuessSeqFrozen) }
93 #define zzGUESS saveState(&zzst); \
95 zzGuessSeqFrozen = ++zzGuessSeq; \
96 _marker = inputTokens->mark(); \
97 zzrv = setjmp(guess_start.state); \
98 zzUSER_GUESS_HOOK(zzGuessSeqFrozen,zzrv) \
99 if ( zzrv ) zzGUESS_DONE
101 #define zzTRACEdata const ANTLRChar *zzTracePrevRuleName = NULL;
104 #define zzTRACEIN(r) zzTracePrevRuleName=traceCurrentRuleName;tracein(r);
107 #define zzTRACEOUT(r) traceout(r);traceCurrentRuleName=zzTracePrevRuleName;
110 /* a n t l r p a r s e r d e f */
112 struct ANTLRParserState
{
113 /* class variables */
114 zzjmp_buf guess_start
;
122 int traceOptionValue
; // MR10
123 int traceGuessOptionValue
; // MR10
124 const ANTLRChar
*traceCurrentRuleName
; // MR10
125 int traceDepth
; // MR10
131 * multiple inheritance is a cool way to include what stuff is needed
132 * in this structure (like guess stuff). however, i'm not convinced that
133 * multiple inheritance works correctly on all platforms. not that
134 * much space is used--just include all possibly useful members.
136 * the class should also be a template with arguments for the lookahead
137 * depth and so on. that way, more than one parser can be defined (as
138 * each will probably have different lookahead requirements). however,
139 * am i sure that templates work? no, i'm not sure.
141 * no attributes are maintained and, hence, the 'asp' variable is not
142 * needed. $i can still be referenced, but it refers to the token
143 * associated with that rule element. question: where are the token's
144 * stored if not on the software stack? in local variables created
145 * and assigned to by antlr.
149 /* class variables */
150 static SetWordType bitmask
[sizeof(SetWordType
)*8];
151 static char eMsgBuffer
[500];
154 int LLk
; // number of lookahead symbols (old LL_K)
156 ANTLRTokenType eofToken
; // when do I stop during resynch()s
157 int bsetsize
; // size of bitsets created by ANTLR in
158 // units of SetWordType
160 ANTLRTokenBuffer
*inputTokens
; //place to get input tokens
162 zzjmp_buf guess_start
; // where to jump back to upon failure
163 int guessing
; // if guessing (using (...)? predicate)
165 // infinite lookahead stuff
166 int can_use_inf_look
; // set by subclass (generated by ANTLR)
172 const ANTLRChar
**token_tbl
; // pointer to table of token type strings MR20 const
174 int dirty
; // used during demand lookahead
176 ANTLRTokenType
*token_type
; // fast reference cache of token.getType()
177 // ANTLRLightweightToken **token; // the token with all its attributes
181 int stillToFetch
; // MR19 V.H. Simonis
185 void fill_inf_look();
188 virtual void guess_fail() { // MR9 27-Sep-97 make virtual
189 traceGuessFail(); // MR10
190 longjmp(guess_start
.state
, 1); } // MR9
191 virtual void guess_done(ANTLRParserState
*st
) { // MR9 27-Sep-97 make virtual
192 restoreState(st
); } // MR9
193 virtual int guess(ANTLRParserState
*); // MR9 27-Sep-97 make virtual
195 int _match(ANTLRTokenType
, ANTLRChar
**, ANTLRTokenType
*,
196 _ANTLRTokenPtr
*, SetWordType
**);
197 int _setmatch(SetWordType
*, ANTLRChar
**, ANTLRTokenType
*,
198 _ANTLRTokenPtr
*, SetWordType
**,
199 SetWordType
* tokclassErrset
/* MR23 */);
200 int _match_wsig(ANTLRTokenType
);
201 int _setmatch_wsig(SetWordType
*);
202 virtual void consume();
203 virtual void resynch(SetWordType
*wd
,SetWordType mask
); // MR21
204 void prime_lookahead();
205 virtual void tracein(const ANTLRChar
*r
); // MR10
206 virtual void traceout(const ANTLRChar
*r
); // MR10
207 static unsigned MODWORD(unsigned x
) {return x
& (EXT_WORDSIZE
-1);} // x % EXT_WORDSIZE // MR9
208 static unsigned DIVWORD(unsigned x
) {return x
>> EXT_LOGWORDSIZE
;} // x / EXT_WORDSIZE // MR9
209 int set_deg(SetWordType
*);
210 int set_el(ANTLRTokenType
, SetWordType
*);
211 virtual void edecode(SetWordType
*); // MR1
212 virtual void FAIL(int k
, ...); // MR1
213 int traceOptionValue
; // MR10
214 int traceGuessOptionValue
; // MR10
215 const ANTLRChar
*traceCurrentRuleName
; // MR10
216 int traceDepth
; // MR10
217 void traceReset(); // MR10
218 virtual void traceGuessFail(); // MR10
219 virtual void traceGuessDone(const ANTLRParserState
*); // MR10
220 int zzGuessSeq
; // MR10
223 ANTLRParser(ANTLRTokenBuffer
*,
228 virtual ~ANTLRParser();
232 ANTLRTokenType
LA(int i
)
235 // MR14 demand look will always be 0 for C++ mode
237 //// return demand_look ? token_type[(labase+(i)-1)&(LLk-1)] :
238 //// token_type[(lap+(i)-1)&(LLk-1)];
240 // MR19 V.H. Simonis Defer fetch feature
245 return token_type
[(lap
+(i
)-1)&(LLk
-1)];
247 _ANTLRTokenPtr
LT(int i
);
249 void setEofToken(ANTLRTokenType t
) { eofToken
= t
; }
250 ANTLRTokenType
getEofToken() const { return eofToken
; } // MR14
252 void noGarbageCollectTokens() { inputTokens
->noGarbageCollectTokens(); }
253 void garbageCollectTokens() { inputTokens
->garbageCollectTokens(); }
255 virtual void syn(_ANTLRTokenPtr tok
, ANTLRChar
*egroup
,
256 SetWordType
*eset
, ANTLRTokenType etok
, int k
);
257 virtual void saveState(ANTLRParserState
*); // MR9 27-Sep-97 make virtual
258 virtual void restoreState(ANTLRParserState
*); // MR9 27-Sep-97 make virtual
260 virtual void panic(const char *msg
); // MR20 const
262 static char *eMsgd(char *,int);
263 static char *eMsg(char *,char *);
264 static char *eMsg2(char *,char *,char *);
266 virtual int printMessage(FILE* pFile
, const char* pFormat
, ...); // MR23
267 virtual int printMessageV(FILE* pFile
, const char* pFormat
, va_list arglist
); // MR23
269 void consumeUntil(SetWordType
*st
);
270 void consumeUntilToken(int t
);
272 virtual int _setmatch_wdfltsig(SetWordType
*tokensWanted
,
273 ANTLRTokenType tokenTypeOfSet
,
274 SetWordType
*whatFollows
);
275 virtual int _match_wdfltsig(ANTLRTokenType tokenWanted
,
276 SetWordType
*whatFollows
);
278 const ANTLRChar
* parserTokenName(int tok
); // MR1
280 int traceOptionValueDefault
; // MR11
281 int traceOption(int delta
); // MR11
282 int traceGuessOption(int delta
); // MR11
284 // MR8 5-Aug-97 S.Bochnak@microtool.com.pl
285 // MR8 Move resynch static local variable
286 // MR8 to class instance
288 int syntaxErrCount
; // MR12
289 ANTLRTokenStream
*getLexer() const { // MR12
290 return inputTokens
? inputTokens
->getLexer() : 0; } // MR12
292 int resynchConsumed
; // MR8
293 char *zzFAILtext
; // workarea required by zzFAIL // MR9
294 void undeferFetch(); // MR19 V.H. Simonis
295 int isDeferFetchEnabled(); // MR19 V.H. Simonis
296 virtual void failedSemanticPredicate(const char* predicate
); /* MR23 */
299 #define zzmatch(_t) \
300 if ( !_match((ANTLRTokenType)_t, &zzMissText, &zzMissTok, \
301 (_ANTLRTokenPtr *) &zzBadTok, &zzMissSet) ) goto fail;
303 #define zzmatch_wsig(_t,handler) \
304 if ( !_match_wsig((ANTLRTokenType)_t) ) if ( guessing ) zzGUESS_FAIL else {_signal=MismatchedToken; goto handler;}
306 #define zzsetmatch(_ts,_tokclassErrset) \
307 if ( !_setmatch(_ts, &zzMissText, &zzMissTok, \
308 (_ANTLRTokenPtr *) &zzBadTok, &zzMissSet, _tokclassErrset) ) goto fail;
310 #define zzsetmatch_wsig(_ts, handler) \
311 if ( !_setmatch_wsig(_ts) ) if ( guessing ) zzGUESS_FAIL else {_signal=MismatchedToken; goto handler;}
313 /* For the dflt signal matchers, a FALSE indicates that an error occurred
314 * just like the other matchers, but in this case, the routine has already
315 * recovered--we do NOT want to consume another token. However, when
316 * the match was successful, we do want to consume hence _signal=0 so that
317 * a token is consumed by the "if (!_signal) consume(); _signal=NoSignal;"
320 #define zzsetmatch_wdfltsig(tokensWanted, tokenTypeOfSet, whatFollows) \
321 if ( !_setmatch_wdfltsig(tokensWanted, tokenTypeOfSet, whatFollows) ) \
322 _signal = MismatchedToken;
324 #define zzmatch_wdfltsig(tokenWanted, whatFollows) \
325 if ( !_match_wdfltsig(tokenWanted, whatFollows) ) _signal = MismatchedToken;
328 // MR1 10-Apr-97 zzfailed_pred() macro does not backtrack in guess mode.
329 // MR1 Identification and correction due to J. Lilley
331 // MR23 Call virtual method to report error.
332 // MR23 Provide more control over failed predicate action
333 // without any need for user to worry about guessing internals.
335 #ifndef zzfailed_pred
336 #define zzfailed_pred(_p,_hasuseraction,_useraction) \
340 zzfailed_pred_action(_p,_hasuseraction,_useraction) \
344 // MR23 Provide more control over failed predicate action
345 // without any need for user to worry about guessing internals.
346 // _hasuseraction == 0 => no user specified error action
347 // _hasuseraction == 1 => user specified error action
349 #ifndef zzfailed_pred_action
350 #define zzfailed_pred_action(_p,_hasuseraction,_useraction) \
351 if (_hasuseraction) { _useraction } else { failedSemanticPredicate(_p); }
355 SetWordType *zzMissSet=NULL; ANTLRTokenType zzMissTok=(ANTLRTokenType)0; \
356 _ANTLRTokenPtr zzBadTok=NULL; ANTLRChar *zzBadText=(ANTLRChar *)""; \
357 int zzErrk=1,zzpf=0; \
359 ANTLRChar *zzMissText=(ANTLRChar *)"";
363 /* S t a n d a r d E x c e p t i o n S i g n a l s */
366 #define MismatchedToken 1
367 #define NoViableAlt 2
368 #define NoSemViableAlt 3
370 /* MR7 Allow more control over signalling */
371 /* by adding "Unwind" and "SetSignal" */
374 #define setSignal(newValue) *_retsignal=_signal=(newValue)
375 #define suppressSignal *_retsignal=_signal=0
376 #define exportSignal *_retsignal=_signal