4 * Manage tokens, regular expressions.
5 * Print methods for debugging
6 * Compute follow lists onto tail ends of rules.
8 * The following functions are visible:
10 * int addTname(char *); Add token name
11 * int addTexpr(char *); Add token expression
12 * int Tnum(char *); Get number of expr/token
13 * void Tklink(char *, char *); Link a name with an expression
14 * int hasAction(expr); Does expr already have action assigned?
15 * void setHasAction(expr); Indicate that expr now has an action
16 * Entry *newEntry(char *,int); Create new table entry with certain size
17 * void list_add(ListNode **list, char *e)
18 * void list_free(ListNode **list, int freeData); *** MR10 ***
19 * void list_apply(ListNode *list, void (*f)())
20 * void lexclass(char *m); switch to new/old lexical class
21 * void lexmode(int i); switch to old lexical class i
25 * We reserve no LEGAL rights to the Purdue Compiler Construction Tool
26 * Set (PCCTS) -- PCCTS is in the public domain. An individual or
27 * company may do whatever they wish with source code distributed with
28 * PCCTS or the code generated by PCCTS, including the incorporation of
29 * PCCTS, or its output, into commerical software.
31 * We encourage users to develop software with PCCTS. However, we do ask
32 * that credit is given to us for developing PCCTS. By "credit",
33 * we mean that if you incorporate our source code into one of your
34 * programs (commercial product, research project, or otherwise) that you
35 * acknowledge this fact somewhere in the documentation, research report,
36 * etc... If you like PCCTS and have developed a nice tool with the
37 * output, please mention that you developed it using PCCTS. In
38 * addition, we ask that this header remain intact in our source code.
39 * As long as these guidelines are kept, we expect to continue enhancing
40 * this system and expect to make other tools available as they are
45 * Parr Research Corporation
46 * with Purdue University and AHPCRC, University of Minnesota
59 static int tsize
=TSChunk
; /* size of token str arrays */
63 RemapForcedTokensInSyntaxDiagram(Node
*);
65 RemapForcedTokensInSyntaxDiagram();
68 /* T o k e n M a n i p u l a t i o n */
71 * add token 't' to the TokenStr/Expr array. Make more room if necessary.
72 * 't' is either an expression or a token name.
74 * There is only one TokenStr array, but multiple ExprStr's. Therefore,
75 * for each lex class (element of lclass) we must extend the ExprStr array.
76 * ExprStr's and TokenStr are always all the same size.
78 * Also, there is a Texpr hash table for each automaton.
88 if ( TokenNum
>= tsize
) /* terminal table overflow? */
93 more
= TSChunk
* (1 + ((TokenNum
-tsize
) / TSChunk
));
95 TokenStr
= (char **) realloc((char *)TokenStr
, tsize
*sizeof(char *));
96 require(TokenStr
!= NULL
, "Ttrack: can't extend TokenStr");
97 for (i
=0; i
<NumLexClasses
; i
++)
99 lclass
[i
].exprs
= (char **)
100 realloc((char *)lclass
[i
].exprs
, tsize
*sizeof(char *));
101 require(lclass
[i
].exprs
!= NULL
, "Ttrack: can't extend ExprStr");
102 for (p
= &lclass
[i
].exprs
[tsize
-more
],j
=1; j
<=more
; j
++) *p
++ = NULL
;
104 for (p
= &TokenStr
[tsize
-more
],i
=1; i
<=more
; i
++) *p
++ = NULL
;
105 lexmode( CurrentLexClass
); /* reset ExprStr in case table moved */
107 /* note: we use the actual ExprStr/TokenStr array
108 * here as TokenInd doesn't exist yet
110 if ( *t
== '"' ) ExprStr
[TokenNum
] = t
;
111 else TokenStr
[TokenNum
] = t
;
122 Expr
*p
= (Expr
*) calloc(1, sizeof(Expr
));
123 require(p
!=NULL
, "newExpr: cannot alloc Expr node");
126 p
->lclass
= CurrentLexClass
;
130 /* switch to lexical class/mode m. This amounts to creating a new
131 * lex mode if one does not already exist and making ExprStr point
132 * to the correct char string array. We must also switch Texpr tables.
134 * BTW, we need multiple ExprStr arrays because more than one automaton
135 * may have the same label for a token, but with different expressions.
136 * We need to track an expr for each automaton. If we disallowed this
137 * feature, only one ExprStr would be required.
149 static char EOFSTR
[] = "\"@\"";
151 if ( hash_get(Tname
, m
) != NULL
)
153 warn(eMsg1("lexclass name conflicts with token/errclass label '%s'",m
));
155 /* does m already exist? */
156 i
= LexClassIndex(m
);
157 if ( i
!= -1 ) {lexmode(i
); return;}
158 /* must make new one */
160 CurrentLexClass
= NumLexClasses
-1;
161 require(NumLexClasses
<=MaxLexClasses
, "number of allowable lexclasses exceeded\nIncrease MaxLexClasses in generic.h and recompile all C files");
162 lclass
[CurrentLexClass
].classnum
= m
;
163 lclass
[CurrentLexClass
].exprs
= (char **) calloc(tsize
, sizeof(char *));
164 require(lclass
[CurrentLexClass
].exprs
!=NULL
,
165 "lexclass: cannot allocate ExprStr");
166 lclass
[CurrentLexClass
].htable
= newHashTable();
167 ExprStr
= lclass
[CurrentLexClass
].exprs
;
168 Texpr
= lclass
[CurrentLexClass
].htable
;
169 /* define EOF for each automaton */
170 p
= newTermEntry( EOFSTR
);
171 p
->token
= EofToken
; /* couldn't have remapped tokens yet, use EofToken */
172 hash_add(Texpr
, EOFSTR
, (Entry
*)p
);
173 list_add(&ExprOrder
, (void *)newExpr(EOFSTR
));
174 /* note: we use the actual ExprStr array
175 * here as TokenInd doesn't exist yet
177 ExprStr
[EofToken
] = EOFSTR
;
188 require(i
<NumLexClasses
, "lexmode: invalid mode");
189 ExprStr
= lclass
[i
].exprs
;
190 Texpr
= lclass
[i
].htable
;
194 /* return index into lclass array of lexical class. return -1 if nonexistent */
197 LexClassIndex( char *cl
)
205 for (i
=0; i
<NumLexClasses
; i
++)
207 if ( strcmp(lclass
[i
].classnum
, cl
) == 0 ) return i
;
214 hasAction( char *expr
)
221 require(expr
!=NULL
, "hasAction: invalid expr");
223 p
= (TermEntry
*) hash_get(Texpr
, expr
);
224 require(p
!=NULL
, eMsg1("hasAction: expr '%s' doesn't exist",expr
));
225 return (p
->action
!=NULL
);
230 setHasAction( char *expr
, char *action
)
232 setHasAction( expr
, action
)
238 require(expr
!=NULL
, "setHasAction: invalid expr");
240 p
= (TermEntry
*) hash_get(Texpr
, expr
);
241 require(p
!=NULL
, eMsg1("setHasAction: expr '%s' doesn't exist",expr
));
247 newForcedToken(char *token
, int tnum
)
249 newForcedToken(token
, tnum
)
254 ForcedToken
*ft
= (ForcedToken
*) calloc(1, sizeof(ForcedToken
));
255 require(ft
!=NULL
, "out of memory");
262 * Make a token indirection array that remaps token numbers and then walk
263 * the appropriate symbol tables and SynDiag to change token numbers
267 RemapForcedTokens(void)
274 int max_token_number
=0; /* MR9 23-Sep-97 Removed "unsigned" */
277 if ( ForcedTokens
== NULL
) return;
279 /* find max token num */
280 for (p
= ForcedTokens
->next
; p
!=NULL
; p
=p
->next
)
282 q
= (ForcedToken
*) p
->elem
;
283 if ( q
->tnum
> max_token_number
) max_token_number
= q
->tnum
;
285 fprintf(stderr
, "max token number is %d\n", max_token_number
);
287 /* make token indirection array */
288 TokenInd
= (int *) calloc(max_token_number
+1, sizeof(int));
289 LastTokenCounted
= TokenNum
;
290 TokenNum
= max_token_number
+1;
291 require(TokenInd
!=NULL
, "RemapForcedTokens: cannot allocate TokenInd");
293 /* fill token indirection array and change token id htable ; swap token indices */
294 for (i
=1; i
<TokenNum
; i
++) TokenInd
[i
] = i
;
295 for (p
= ForcedTokens
->next
; p
!=NULL
; p
=p
->next
)
300 q
= (ForcedToken
*) p
->elem
;
301 fprintf(stderr
, "%s forced to %d\n", q
->token
, q
->tnum
);
302 te
= (TermEntry
*) hash_get(Tname
, q
->token
);
303 require(te
!=NULL
, "RemapForcedTokens: token not in hash table");
305 fprintf(stderr
, "Before: TokenInd[old_pos==%d] is %d\n", old_pos
, TokenInd
[old_pos
]);
306 fprintf(stderr
, "Before: TokenInd[target==%d] is %d\n", q
->tnum
, TokenInd
[q
->tnum
]);
307 q
= (ForcedToken
*) p
->elem
;
308 t
= TokenInd
[old_pos
];
309 TokenInd
[old_pos
] = q
->tnum
;
310 TokenInd
[q
->tnum
] = t
;
311 te
->token
= q
->tnum
; /* update token type id symbol table */
312 fprintf(stderr
, "After: TokenInd[old_pos==%d] is %d\n", old_pos
, TokenInd
[old_pos
]);
313 fprintf(stderr
, "After: TokenInd[target==%d] is %d\n", q
->tnum
, TokenInd
[q
->tnum
]);
315 /* Change the token number in the sym tab entry for the exprs
316 * at the old position of the token id and the target position
318 /* update expr at target (if any) of forced token id */
319 if ( q
->tnum
< TokenNum
) /* is it a valid position? */
321 for (i
=0; i
<NumLexClasses
; i
++)
323 if ( lclass
[i
].exprs
[q
->tnum
]!=NULL
)
325 /* update the symbol table for this expr */
326 TermEntry
*e
= (TermEntry
*) hash_get(lclass
[i
].htable
, lclass
[i
].exprs
[q
->tnum
]);
327 require(e
!=NULL
, "RemapForcedTokens: expr not in hash table");
329 fprintf(stderr
, "found expr '%s' at target %d in lclass[%d]; changed to %d\n",
330 lclass
[i
].exprs
[q
->tnum
], q
->tnum
, i
, old_pos
);
334 /* update expr at old position (if any) of forced token id */
335 for (i
=0; i
<NumLexClasses
; i
++)
337 if ( lclass
[i
].exprs
[old_pos
]!=NULL
)
339 /* update the symbol table for this expr */
340 TermEntry
*e
= (TermEntry
*) hash_get(lclass
[i
].htable
, lclass
[i
].exprs
[old_pos
]);
341 require(e
!=NULL
, "RemapForcedTokens: expr not in hash table");
343 fprintf(stderr
, "found expr '%s' for id %s in lclass[%d]; changed to %d\n",
344 lclass
[i
].exprs
[old_pos
], q
->token
, i
, q
->tnum
);
350 RemapForcedTokensInSyntaxDiagram((Node
*)SynDiag
);
355 RemapForcedTokensInSyntaxDiagram(Node
*p
)
357 RemapForcedTokensInSyntaxDiagram(p
)
361 Junction
*j
= (Junction
*) p
;
362 RuleRefNode
*r
= (RuleRefNode
*) p
;
363 TokNode
*t
= (TokNode
*)p
;
365 if ( p
==NULL
) return;
366 require(p
->ntype
>=1 && p
->ntype
<=NumNodeTypes
, "Remap...: invalid diagram node");
370 if ( j
->visited
) return;
371 if ( j
->jtype
== EndRule
) return;
373 RemapForcedTokensInSyntaxDiagram( j
->p1
);
374 RemapForcedTokensInSyntaxDiagram( j
->p2
);
378 RemapForcedTokensInSyntaxDiagram( r
->next
);
381 if ( t
->remapped
) return; /* we've been here before */
383 fprintf(stderr
, "remapping %d to %d\n", t
->token
, TokenInd
[t
->token
]);
384 t
->token
= TokenInd
[t
->token
];
385 RemapForcedTokensInSyntaxDiagram( t
->next
);
388 RemapForcedTokensInSyntaxDiagram( ((ActionNode
*)p
)->next
);
391 fatal_internal("invalid node type");
396 * Add a token name. Return the token number associated with it. If it already
397 * exists, then return the token number assigned to it.
399 * Track the order in which tokens are found so that the DLG output maintains
400 * that order. It also lets us map token numbers to strings.
404 addTname( char *token
)
411 require(token
!=NULL
, "addTname: invalid token name");
413 if ( (p
=(TermEntry
*)hash_get(Tname
, token
)) != NULL
) return p
->token
;
414 p
= newTermEntry( token
);
416 p
->token
= TokenNum
++;
417 hash_add(Tname
, token
, (Entry
*)p
);
421 /* This is the same as addTname except we force the TokenNum to be tnum.
422 * We don't have to use the Forced token stuff as no tokens will have
423 * been defined with #tokens when this is called. This is only called
424 * when a #tokdefs meta-op is used.
428 addForcedTname( char *token
, int tnum
)
430 addForcedTname( token
, tnum
)
436 require(token
!=NULL
, "addTname: invalid token name");
438 if ( (p
=(TermEntry
*)hash_get(Tname
, token
)) != NULL
) return p
->token
;
439 p
= newTermEntry( token
);
442 hash_add(Tname
, token
, (Entry
*)p
);
447 * Add a token expr. Return the token number associated with it. If it already
448 * exists, then return the token number assigned to it.
452 addTexpr( char *expr
)
459 require(expr
!=NULL
, "addTexpr: invalid regular expression");
461 if ( (p
=(TermEntry
*)hash_get(Texpr
, expr
)) != NULL
) return p
->token
;
462 p
= newTermEntry( expr
);
464 /* track the order in which they occur */
465 list_add(&ExprOrder
, (void *)newExpr(p
->str
));
466 p
->token
= TokenNum
++;
467 hash_add(Texpr
, expr
, (Entry
*)p
);
471 /* return the token number of 'term'. Return 0 if no 'term' exists */
481 require(term
!=NULL
, "Tnum: invalid terminal");
483 if ( *term
=='"' ) p
= (TermEntry
*) hash_get(Texpr
, term
);
484 else p
= (TermEntry
*) hash_get(Tname
, term
);
485 if ( p
== NULL
) return 0;
486 else return p
->token
;
489 /* associate a Name with an expr. If both have been already assigned
490 * token numbers, then an error is reported. Add the token or expr
491 * that has not been added if no error. This 'represents' the #token
492 * ANTLR pseudo-op. If both have not been defined, define them both
493 * linked to same token number.
497 Tklink( char *token
, char *expr
)
499 Tklink( token
, expr
)
505 require(token
!=NULL
&& expr
!=NULL
, "Tklink: invalid token name and/or expr");
507 p
= (TermEntry
*) hash_get(Tname
, token
);
508 q
= (TermEntry
*) hash_get(Texpr
, expr
);
509 if ( p
!= NULL
&& q
!= NULL
) /* both defined */
511 warn( eMsg2("token name %s and rexpr %s already defined; ignored",
515 if ( p
==NULL
&& q
==NULL
) /* both not defined */
517 int t
= addTname( token
);
518 q
= newTermEntry( expr
);
519 hash_add(Texpr
, expr
, (Entry
*)q
);
521 /* note: we use the actual ExprStr array
522 * here as TokenInd doesn't exist yet
525 /* track the order in which they occur */
526 list_add(&ExprOrder
, (void *)newExpr(q
->str
));
529 if ( p
!= NULL
) /* one is defined, one is not */
531 q
= newTermEntry( expr
);
532 hash_add(Texpr
, expr
, (Entry
*)q
);
534 ExprStr
[p
->token
] = q
->str
; /* both expr and token str defined now */
535 list_add(&ExprOrder
, (void *)newExpr(q
->str
));
537 else /* trying to associate name with expr here*/
539 p
= newTermEntry( token
);
540 hash_add(Tname
, token
, (Entry
*)p
);
542 TokenStr
[p
->token
] = p
->str
;/* both expr and token str defined now */
547 * Given a string, this function allocates and returns a pointer to a
548 * hash table record of size 'sz' whose "str" pointer is reset to a position
549 * in the string table.
553 newEntry( char *text
, int sz
)
561 require(text
!=NULL
, "new: NULL terminal");
563 if ( (p
= (Entry
*) calloc(1,sz
)) == 0 )
565 fatal_internal("newEntry: out of memory for terminals\n");
566 exit(PCCTS_EXIT_FAILURE
);
568 p
->str
= mystrdup(text
);
574 * add an element to a list.
576 * Any non-empty list has a sentinel node whose 'elem' pointer is really
577 * a pointer to the last element. (i.e. length(list) = #elemIn(list)+1).
578 * Elements are appended to the list.
582 list_add( ListNode
**list
, void *e
)
590 require(e
!=NULL
, "list_add: attempting to add NULL list element");
593 require(p
!=NULL
, "list_add: cannot alloc new list node");
597 ListNode
*sentinel
= newListNode
;
598 require(sentinel
!=NULL
, "list_add: cannot alloc sentinel node");
601 sentinel
->elem
= (char *)p
; /* set tail pointer */
603 else /* find end of list */
605 tail
= (ListNode
*) (*list
)->elem
; /* get tail pointer */
607 (*list
)->elem
= (char *) p
; /* reset tail */
611 /* MR10 list_free() frees the ListNode elements in the list */
612 /* MR10 if freeData then free the data elements of the list too */
616 list_free(ListNode
**list
,int freeData
)
618 list_free(list
,freeData
)
626 if (list
== NULL
) return;
627 if (*list
== NULL
) return;
628 for (p
=*list
; p
!= NULL
; p
=next
) {
630 if (freeData
&& p
->elem
!= NULL
) {
631 free( (char *) p
->elem
);
640 list_apply( ListNode
*list
, void (*f
)(void *) )
642 list_apply( list
, f
)
648 require(f
!=NULL
, "list_apply: NULL function to apply");
650 if ( list
== NULL
) return;
651 for (p
= list
->next
; p
!=NULL
; p
=p
->next
) (*f
)( p
->elem
);
657 int list_search_cstring(ListNode
*list
, char * cstring
)
659 int list_search_cstring(list
, cstring
)
665 if (list
== NULL
) return 0;
666 for (p
= list
->next
; p
!=NULL
; p
=p
->next
) {
667 if (p
->elem
== NULL
) continue;
668 if (0 == strcmp((char *) p
->elem
, cstring
)) return 1;
673 /* F O L L O W C y c l e S t u f f */
675 /* make a key based upon (rulename, computation, k value).
676 * Computation values are 'i'==FIRST, 'o'==FOLLOW.
679 /* MR10 Make the key all characters so it can be read easily */
680 /* MR10 by a simple dump program. Also, separates */
681 /* MR10 'o' and 'i' from rule name */
685 Fkey( char *rule
, int computation
, int k
)
687 Fkey( rule
, computation
, k
)
693 static char key
[MaxRuleName
+2+2+1]; /* MR10 */
696 if ( k
> 99 ) /* MR10 */
697 fatal("k>99 is too big for this implementation of ANTLR!\n"); /* MR10 */
698 if ( (i
=strlen(rule
)) > MaxRuleName
) /* MR10 */
699 fatal( eMsgd("rule name > max of %d\n", MaxRuleName
) ); /* MR10 */
702 /* MR10 */ key
[i
]='*';
703 /* MR10 */ key
[i
+1] = (char) computation
; /* MR20 G. Hobbelt */
704 /* MR10 */ if (k
< 10) {
705 /* MR10 */ key
[i
+2] = (char) ( '0' + k
);
706 /* MR10 */ key
[i
+3] = '\0';
708 /* MR10 */ key
[i
+2] = (char) ( '0' + k
/10);
709 /* MR10 */ key
[i
+3] = (char) ( '0' + k
% 10);
710 /* MR10 */ key
[i
+4] = '\0';
716 /* Push a rule onto the kth FOLLOW stack */
719 FoPush( char *rule
, int k
)
727 require(rule
!=NULL
, "FoPush: tried to push NULL rule");
728 require(k
<=CLL_k
, "FoPush: tried to access non-existent stack");
730 /*fprintf(stderr, "FoPush(%s)\n", rule);*/
731 r
= (RuleEntry
*) hash_get(Rname
, rule
);
732 if ( r
== NULL
) {fatal_internal( eMsg1("rule %s must be defined but isn't", rule
) );}
733 if ( FoStack
[k
] == NULL
) /* Does the kth stack exist yet? */
735 /*fprintf(stderr, "allocating FoStack\n");*/
736 FoStack
[k
] = (int *) calloc(FoStackSize
, sizeof(int));
737 require(FoStack
[k
]!=NULL
, "FoPush: cannot allocate FOLLOW stack\n");
739 if ( FoTOS
[k
] == NULL
)
742 *(FoTOS
[k
]) = r
->rulenum
;
747 require(valid(FoStack
[k
]), "FoPush: invalid FoStack");
749 if ( FoTOS
[k
] >= &(FoStack
[k
][FoStackSize
-1]) )
750 fatal( eMsgd("exceeded max depth of FOLLOW recursion (%d)\n",
752 require(FoTOS
[k
]>=FoStack
[k
],
753 eMsg1("FoPush: FoStack stack-ptr is playing out of its sandbox",
756 *(FoTOS
[k
]) = r
->rulenum
;
761 **** fprintf(stderr, "FoStack[k=%d]:\n", k);
762 **** for (p=FoStack[k]; p<=FoTOS[k]; p++)
764 **** fprintf(stderr, "\t%s\n", RulePtr[*p]->rname);
770 /* Pop one rule off of the FOLLOW stack. TOS ptr is NULL if empty. */
779 require(k
<=CLL_k
, "FoPop: tried to access non-existent stack");
780 /*fprintf(stderr, "FoPop\n");*/
781 require(FoTOS
[k
]>=FoStack
[k
]&&FoTOS
[k
]<=&(FoStack
[k
][FoStackSize
-1]),
782 "FoPop: FoStack stack-ptr is playing out of its sandbox");
783 if ( FoTOS
[k
] == FoStack
[k
] ) FoTOS
[k
] = NULL
;
787 /* Compute FOLLOW cycle.
788 * Mark all FOLLOW sets for rules in cycle as incomplete.
789 * Then, save cycle on the cycle list (Cycles) for later resolution.
790 * The Cycle is stored in the form:
791 * (head of cycle==croot, rest of rules in cycle==cyclicDep)
793 * e.g. (Fo means "FOLLOW of", "-->" means requires or depends on)
795 * Fo(x)-->Fo(a)-->Fo(b)-->Fo(c)-->Fo(x)
796 * ^----Infinite recursion (cycle)
798 * the cycle would be: x -> {a,b,c} or stored as (x,{a,b,c}). Fo(x) depends
799 * on the FOLLOW of a,b, and c. The root of a cycle is always complete after
800 * Fo(x) finishes. Fo(a,b,c) however are not. It turns out that all rules
801 * in a FOLLOW cycle have the same FOLLOW set.
805 RegisterCycle( char *rule
, int k
)
807 RegisterCycle( rule
, k
)
816 require(rule
!=NULL
, "RegisterCycle: tried to register NULL rule");
817 require(k
<=CLL_k
, "RegisterCycle: tried to access non-existent stack");
819 /*fprintf(stderr, "RegisterCycle(%s)\n", rule);*/
820 /* Find cycle start */
821 r
= (RuleEntry
*) hash_get(Rname
, rule
);
822 require(r
!=NULL
,eMsg1("rule %s must be defined but isn't", rule
));
823 require(FoTOS
[k
]>=FoStack
[k
]&&FoTOS
[k
]<=&(FoStack
[k
][FoStackSize
-1]),
824 eMsg1("RegisterCycle(%s): FoStack stack-ptr is playing out of its sandbox",
826 /*** if ( FoTOS[k]<FoStack[k]||FoTOS[k]>&(FoStack[k][FoStackSize-1]) )
828 **** fprintf(stderr, "RegisterCycle(%s): FoStack stack-ptr is playing out of its sandbox\n",
830 **** fprintf(stderr, "RegisterCycle: sp==0x%x out of bounds 0x%x...0x%x\n",
831 **** FoTOS[k], FoStack[k], &(FoStack[k][FoStackSize-1]));
832 **** exit(PCCTS_EXIT_FAILURE);
837 require(valid(FoStack
[k
]), "RegisterCycle: invalid FoStack");
839 for (p
=FoTOS
[k
]; *p
!= r
->rulenum
&& p
>= FoStack
[k
]; --p
) {;}
840 require(p
>=FoStack
[k
], "RegisterCycle: FoStack is screwed up beyond belief");
841 if ( p
== FoTOS
[k
] ) return; /* don't worry about cycles to oneself */
843 /* compute cyclic dependents (rules in cycle except head) */
845 require(c
!=NULL
, "RegisterCycle: couldn't alloc new cycle");
846 c
->cyclicDep
= empty
;
847 c
->croot
= *p
++; /* record root of cycle */
848 for (; p
<=FoTOS
[k
]; p
++)
850 /* Mark all dependent rules as incomplete */
851 f
= (CacheEntry
*) hash_get(Fcache
, Fkey(RulePtr
[*p
]->rname
,'o',k
));
854 f
= newCacheEntry( Fkey(RulePtr
[*p
]->rname
,'o',k
) );
855 hash_add(Fcache
, Fkey(RulePtr
[*p
]->rname
,'o',k
), (Entry
*)f
);
857 f
->incomplete
= TRUE
;
859 set_orel(*p
, &(c
->cyclicDep
)); /* mark rule as dependent of croot */
861 list_add(&(Cycles
[k
]), (void *)c
);
864 /* make all rules in cycle complete
866 * while ( some set has changed ) do
868 * if degree of FOLLOW set for croot > old degree then
869 * update all FOLLOW sets for rules in cyclic dependency
877 ResolveFoCycles( int k
)
888 /* int i; */ /* MR10 not useful */
891 unsigned *cursor
; /* MR10 */
892 unsigned *origin
; /* MR10 */
894 /*fprintf(stderr, "Resolving following cycles for %d\n", k);*/
899 for (p
= Cycles
[k
]->next
; p
!=NULL
; p
=p
->next
)
901 c
= (Cycle
*) p
->elem
;
902 /*fprintf(stderr, "cycle %d: %s -->", i++, RulePtr[c->croot]->rname);*/
903 /*s_fprT(stderr, c->cyclicDep);*/
904 /*fprintf(stderr, "\n");*/
906 hash_get(Fcache
, Fkey(RulePtr
[c
->croot
]->rname
,'o',k
));
907 require(f
!=NULL
, eMsg1("FOLLOW(%s) must be in cache but isn't", RulePtr
[c
->croot
]->rname
) );
908 if ( (d
=set_deg(f
->fset
)) > c
->deg
)
910 /*fprintf(stderr, "Fo(%s) has changed\n", RulePtr[c->croot]->rname);*/
912 c
->deg
= d
; /* update cycle FOLLOW set degree */
914 /* MR10 */ origin
=set_pdq(c
->cyclicDep
);
915 /* MR10 */ for (cursor
=origin
; *cursor
!= nil
; cursor
++) {
916 /* MR10 */ r
=*cursor
;
918 /******** while ( !set_nil(c->cyclicDep) ) { *****/
919 /******** r = set_int(c->cyclicDep); *****/
920 /******** set_rm(r, c->cyclicDep); *****/
922 /*fprintf(stderr, "updating Fo(%s)\n", RulePtr[r]->rname);*/
924 hash_get(Fcache
, Fkey(RulePtr
[r
]->rname
,'o',k
));
925 require(g
!=NULL
, eMsg1("FOLLOW(%s) must be in cache but isn't", RulePtr
[r
]->rname
) );
926 set_orin(&(g
->fset
), f
->fset
);
927 g
->incomplete
= FALSE
;
929 /* MR10 */ free( (char *) origin
);
930 /* MR10 */ origin
=NULL
;
933 /* MR10 - this if statement appears to be meaningless since i is always 0 */
934 /* MR10 if ( i == 1 ) changed = 0; */ /* if only 1 cycle, no need to repeat */
936 /* kill Cycle list */
937 for (q
= Cycles
[k
]->next
; q
!= NULL
; q
=p
)
940 set_free( ((Cycle
*)q
->elem
)->cyclicDep
);
943 free( (char *)Cycles
[k
] );
948 /* P r i n t i n g S y n t a x D i a g r a m s */
952 pBlk( Junction
*q
, int btype
)
962 q
->end
->pvisited
= TRUE
;
963 if ( btype
== aLoopBegin
)
965 require(q
->p2
!=NULL
, "pBlk: invalid ()* block");
967 alt
= (Junction
*)q
->p2
;
973 while ( !set_nil(alt
->fset
[k
]) )
975 s_fprT(stdout
, alt
->fset
[k
]);
976 if ( k
++ == CLL_k
) break;
977 if ( !set_nil(alt
->fset
[k
]) ) printf(", ");
983 for (a
=1,alt
=q
; alt
!= NULL
; alt
= (Junction
*) alt
->p2
, a
++)
985 if ( alt
->p1
!= NULL
) PRINT(alt
->p1
);
988 printf( " /* [%d] ", alt
->altnum
);
990 while ( !set_nil(alt
->fset
[k
]) )
992 s_fprT(stdout
, alt
->fset
[k
]);
993 if ( k
++ == CLL_k
) break;
994 if ( !set_nil(alt
->fset
[k
]) ) printf(", ");
996 if ( alt
->p2
== NULL
&& btype
== aOptBlk
)
997 printf( " (optional branch) */\n");
998 else printf( " */\n");
1001 /* ignore implied empty alt of Plus blocks */
1002 if ( alt
->p2
!= NULL
&& ((Junction
*)alt
->p2
)->ignore
) break;
1004 if ( alt
->p2
!= NULL
&& !(((Junction
*)alt
->p2
)->p2
==NULL
&& btype
== aOptBlk
) )
1009 if ( a
+1==pAlt1
|| a
+1==pAlt2
) printf("=>");
1016 p
= (Junction
*) ((Junction
*)alt
->p2
)->p1
;
1019 if ( p
->ntype
==nAction
)
1021 p
=(Junction
*)((ActionNode
*)p
)->next
;
1024 if ( p
->ntype
!=nJunction
)
1028 if ( p
->jtype
==EndBlk
|| p
->jtype
==EndRule
)
1033 p
= (Junction
*)p
->p1
;
1035 if ( p
==NULL
) printf("\n\t"); /* Empty alt? */
1039 q
->end
->pvisited
= FALSE
;
1042 /* How to print out a junction */
1045 pJunc( Junction
*q
)
1053 require(q
!=NULL
, "pJunc: NULL node");
1054 require(q
->ntype
==nJunction
, "pJunc: not junction");
1056 if ( q
->pvisited
== TRUE
) return;
1061 if ( PrintAnnotate
) First(q
, 1, q
->jtype
, &dum_k
);
1062 if ( q
->end
->p1
!= NULL
&& ((Junction
*)q
->end
->p1
)->ntype
==nJunction
&&
1063 ((Junction
*)q
->end
->p1
)->jtype
== EndRule
) doing_rule
= 1;
1064 else doing_rule
= 0;
1068 if ( pAlt1
==1 ) printf("=>");
1074 if ( pLevel
==1 ) printf(" ");
1079 if ( pLevel
==1 ) printf(" ");
1081 if ( pLevel
>1 ) printf(" ");
1084 if ( q
->guess
) printf("?");
1086 if ( PrintAnnotate
) freeBlkFsets(q
);
1087 if ( q
->end
->p1
!= NULL
) PRINT(q
->end
->p1
);
1090 if ( PrintAnnotate
) First(q
, 1, q
->jtype
, &dum_k
);
1094 if ( pAlt1
==1 ) printf("=>");
1099 if ( pLevel
==1 ) printf(" ");
1101 if ( pLevel
>1 ) printf(" ");
1102 else printf("\n\t");
1105 if ( PrintAnnotate
) freeBlkFsets(q
);
1106 if ( q
->end
->p1
!= NULL
) PRINT(q
->end
->p1
);
1109 if ( PrintAnnotate
) First(q
, 1, q
->jtype
, &dum_k
);
1113 if ( pAlt1
==1 ) printf("=>");
1118 if ( pLevel
==1 ) printf(" ");
1120 if ( pLevel
>1 ) printf(" ");
1121 else printf("\n\t");
1124 if ( PrintAnnotate
) freeBlkFsets(q
);
1125 if ( q
->end
->p1
!= NULL
) PRINT(q
->end
->p1
);
1128 if ( PrintAnnotate
) First(q
, 1, q
->jtype
, &dum_k
);
1130 if ( PrintAnnotate
) freeBlkFsets(q
);
1133 if ( PrintAnnotate
) First(q
, 1, q
->jtype
, &dum_k
);
1137 if ( pAlt1
==1 ) printf("=>");
1142 if ( pLevel
==1 ) printf(" ");
1144 if ( pLevel
>1 ) printf(" ");
1147 if ( PrintAnnotate
) freeBlkFsets(q
);
1148 if ( q
->end
->p1
!= NULL
) PRINT(q
->end
->p1
);
1153 printf( "\n%s :\n", q
->rname
);
1155 if ( q
->p2
!= NULL
) PRINT(q
->p2
);
1158 if ( q
->p1
!= NULL
) PRINT(q
->p1
);
1159 q
->pvisited
= FALSE
;
1160 if ( q
->p2
!= NULL
) PRINT(q
->p2
);
1166 q
->pvisited
= FALSE
;
1169 /* How to print out a rule reference node */
1172 pRuleRef( RuleRefNode
*p
)
1178 require(p
!=NULL
, "pRuleRef: NULL node");
1179 require(p
->ntype
==nRuleRef
, "pRuleRef: not rule ref node");
1181 printf( " %s", p
->text
);
1185 /* How to print out a terminal node */
1188 pToken( TokNode
*p
)
1194 require(p
!=NULL
, "pToken: NULL node");
1195 require(p
->ntype
==nToken
, "pToken: not token node");
1197 if ( p
->wild_card
) printf(" .");
1198 printf( " %s", TerminalString(p
->token
));
1202 /* How to print out a terminal node */
1205 pAction( ActionNode
*p
)
1211 require(p
!=NULL
, "pAction: NULL node");
1212 require(p
->ntype
==nAction
, "pAction: not action node");
1217 /* F i l l F o l l o w L i s t s */
1220 * Search all rules for all rule reference nodes, q to rule, r.
1221 * Add q->next to follow list dangling off of rule r.
1224 * r: -o-R-o-->o--> Ptr to node following rule r in another rule
1226 * o--> Ptr to node following another reference to r.
1228 * This is the data structure employed to avoid FOLLOW set computation. We
1229 * simply compute the FIRST (reach) of the EndRule Node which follows the
1230 * list found at the end of all rules which are referenced elsewhere. Rules
1231 * not invoked by other rules have no follow list (r->end->p1==NULL).
1232 * Generally, only start symbols are not invoked by another rule.
1234 * Note that this mechanism also gives a free cross-reference mechanism.
1236 * The entire syntax diagram is layed out like this:
1259 Junction
*j
= (Junction
*) p
;
1260 RuleRefNode
*r
= (RuleRefNode
*) p
;
1262 if ( p
==NULL
) return;
1263 require(p
->ntype
>=1 && p
->ntype
<=NumNodeTypes
,
1264 eMsgd("FoLink: invalid diagram node: ntype==%d",p
->ntype
));
1268 if ( j
->fvisited
) return;
1269 if ( j
->jtype
== EndRule
) return;
1274 /* MR14 */ /* Need to determine whether the guess block is an */
1275 /* MR14 */ /* of the form (alpha)? beta before follow sets are */
1276 /* MR14 */ /* computed. This is necessary to solve problem */
1277 /* MR14 */ /* of doing follow on the alpha of an (alpha)? beta block. */
1279 /* MR14 */ /* This is performed by analysis_point as a side-effect. */
1282 /* MR14 */ if (j
->jtype
== aSubBlk
&& j
->guess
) {
1283 /* MR14 */ Junction
*ignore
;
1284 /* MR14 */ ignore
=analysis_point(j
);
1289 if ( r
->linked
) return;
1290 q
= (RuleEntry
*) hash_get(Rname
, r
->text
);
1293 warnFL( eMsg1("rule %s not defined",r
->text
), FileStr
[r
->file
], r
->line
);
1297 if ( r
->parms
!=NULL
&& RulePtr
[q
->rulenum
]->pdecl
==NULL
)
1299 warnFL( eMsg1("rule %s accepts no parameter(s)", r
->text
),
1300 FileStr
[r
->file
], r
->line
);
1302 if ( r
->parms
==NULL
&& RulePtr
[q
->rulenum
]->pdecl
!=NULL
)
1304 warnFL( eMsg1("rule %s requires parameter(s)", r
->text
),
1305 FileStr
[r
->file
], r
->line
);
1307 if ( r
->assign
!=NULL
&& RulePtr
[q
->rulenum
]->ret
==NULL
)
1309 warnFL( eMsg1("rule %s yields no return value(s)", r
->text
),
1310 FileStr
[r
->file
], r
->line
);
1312 if ( r
->assign
==NULL
&& RulePtr
[q
->rulenum
]->ret
!=NULL
)
1314 warnFL( eMsg1("rule %s returns a value(s)", r
->text
),
1315 FileStr
[r
->file
], r
->line
);
1319 addFoLink( r
->next
, r
->rname
, RulePtr
[q
->rulenum
] );
1326 FoLink( ((TokNode
*)p
)->next
);
1329 FoLink( ((ActionNode
*)p
)->next
);
1332 fatal_internal("invalid node type");
1337 * Add a reference to the end of a rule.
1339 * 'r' points to the RuleBlk node in a rule. r->end points to the last node
1340 * (EndRule jtype) in a rule.
1346 * r->end --> o-->o--> Ptr to node following rule r in another rule
1348 * o--> Ptr to node following another reference to r.
1350 * Note that the links are added to the head of the list so that r->end->p1
1351 * always points to the most recently added follow-link. At the end, it should
1352 * point to the last reference found in the grammar (starting from the 1st rule).
1356 addFoLink( Node
*p
, char *rname
, Junction
*r
)
1358 addFoLink( p
, rname
, r
)
1365 require(r
!=NULL
, "addFoLink: incorrect rule graph");
1366 require(r
->end
!=NULL
, "addFoLink: incorrect rule graph");
1367 require(r
->end
->jtype
==EndRule
, "addFoLink: incorrect rule graph");
1368 require(p
!=NULL
, "addFoLink: NULL FOLLOW link");
1371 j
->rname
= rname
; /* rname on follow links point to target rule */
1372 j
->p1
= p
; /* link to other rule */
1373 j
->p2
= (Node
*) r
->end
->p1
;/* point to head of list */
1374 r
->end
->p1
= (Node
*) j
; /* reset head to point to new node */
1379 GenCrossRef( Junction
*p
)
1389 require(p
!=NULL
, "GenCrossRef: why are you passing me a null grammar?");
1391 printf("Cross Reference:\n\n");
1393 for (; p
!=NULL
; p
= (Junction
*)p
->p2
)
1395 printf("Rule %20s referenced by {", p
->rname
);
1396 /* make a set of rules for uniqueness */
1397 for (j
= (Junction
*)(p
->end
)->p1
; j
!=NULL
; j
= (Junction
*)j
->p2
)
1399 q
= (RuleEntry
*) hash_get(Rname
, j
->rname
);
1400 require(q
!=NULL
, "GenCrossRef: FoLinks are screwed up");
1401 set_orel(q
->rulenum
, &a
);
1403 for (; !set_nil(a
); set_rm(e
, a
))
1406 printf(" %s", RulePtr
[e
]->rname
);
1414 The single argument is a pointer to the start of an element of a
1415 C++ style function prototypet list. Given a pointer to the start of
1416 an formal we must locate the comma (or the end of the string)
1417 and locate the datatype, formal name, and initial value expression.
1419 The function returns a pointer to the character following the comma
1420 which terminates the formal declaration, or a pointer to the end of
1421 the string if none was found.
1423 I thought we were parsing specialists, how come I'm doing this by
1431 Foo f = &farray[1,2],
1434 TFoo<int,char> f = TFoo<int,char>(1,2),
1436 A non-zero value for nesting indicates a problem matching '(' and ')',
1437 '[' and ']', '<' and '>', '{' and '}', or improperly terminated string
1438 or character literal.
1444 * Don't care if it is a valid string literal or not, just find the end
1445 * Start with pointer to leading "\""
1449 char * skipStringLiteral(char *pCurrent
)
1451 char * skipStringLiteral(pCurrent
)
1456 if (*p
== 0) return p
;
1457 require (*p
== '\"', "skipStringLiteral")
1459 for (p
= p
; *p
!= 0; p
++) {
1474 * Don't care if it is a valid character literal or not, just find the end
1475 * Start with pointer to leading "'"
1479 char * skipCharLiteral(char *pStart
)
1481 char * skipCharLiteral(pStart
)
1486 if (*p
== 0) return p
;
1487 require (*p
== '\'', "skipCharLiteral")
1489 for (p
= p
; *p
!= 0; p
++) {
1504 char * skipSpaces(char *pStart
)
1506 char * skipSpaces(pStart
)
1511 while (*p
!= 0 && isspace(*p
)) p
++;
1516 char * skipToSeparatorOrEqualSign(char *pStart
, int *pNest
)
1518 char * skipToSeparatorOrEqualSign(pStart
, pNest
)
1549 p
= skipStringLiteral(p
);
1553 p
= skipCharLiteral(p
);
1558 if (*p
== 0) goto EXIT
;
1564 if (nest
== 0) goto EXIT
;
1578 char * skipToSeparator(char *pStart
, int *pNest
)
1580 char * skipToSeparator(pStart
, pNest
)
1587 p
= skipToSeparatorOrEqualSign(p
, pNest
);
1588 if (*pNest
!= 0) return p
;
1589 if (*p
== ',') return p
;
1590 if (*p
== 0) return p
;
1595 /* skip to just past the "=" separating the declaration from the initialization value */
1598 char * getInitializer(char *pStart
)
1600 char * getInitializer(pStart
)
1612 require(pStart
!=NULL
, "getInitializer: invalid string");
1614 p
= endFormal(pStart
,
1621 if (nest
!= 0) return NULL
;
1622 if (pEqualSign
== NULL
) return NULL
;
1623 if (pValue
== NULL
) return NULL
;
1624 return strBetween(pValue
, NULL
, pSeparator
);
1628 Examines the string from pStart to pEnd-1.
1629 If the string has 0 length or is entirely white space
1630 returns 1. Otherwise 0.
1634 int isWhiteString(const char *pStart
, const char *pEnd
)
1636 int isWhiteString(pStart
, pEnd
)
1642 for (p
= pStart
; p
< pEnd
; p
++) {
1643 if (! isspace(*p
)) return 0;
1649 This replaces HasComma() which couldn't distinguish
1660 int hasMultipleOperands(char *pStart
)
1662 int hasMultipleOperands(pStart
)
1670 if (*p
== 0) return 0;
1671 p
= skipToSeparator(p
, &nest
);
1672 if (nest
== 0 && *p
== ',') return 1;
1677 #define MAX_STR_BETWEEN_WORK_AREA 1000
1679 static char strBetweenWorkArea
[MAX_STR_BETWEEN_WORK_AREA
];
1683 strBetween(pStart, pNext, pStop)
1685 Creates a null terminated string by copying the text between two pointers
1686 to a work area. The start of the string is pStart. The end of the string
1687 is the character before pNext, or if pNext is null then the character before
1688 pStop. Trailing spaces are not included in the copy operation.
1690 This is used when a string contains several parts. The pNext part may be
1691 optional. The pStop will stop the scan when the optional part is not present
1692 (is a null pointer).
1696 char *strBetween(char *pStart
, char *pNext
, char *pStop
)
1698 char *strBetween(pStart
, pNext
, pStop
)
1705 char *q
= strBetweenWorkArea
;
1708 pEnd
= (pNext
!= NULL
) ? pNext
: pStop
;
1710 require (pEnd
!= NULL
, "pEnd == NULL");
1711 require (pEnd
>= pStart
, "pEnd < pStart");
1712 for (pEnd
--; pEnd
>= pStart
; pEnd
--) { /* MR31 */
1713 if (! isspace(*pEnd
)) break;
1716 p
<= pEnd
&& q
< &strBetweenWorkArea
[MAX_STR_BETWEEN_WORK_AREA
-2];
1721 return strBetweenWorkArea
;
1725 function Returns pointer to character following separator at
1726 value which to continue search for next formal. If at the
1727 end of the string a pointer to the null byte at the
1728 end of the string is returned.
1730 pStart Pointer to the starting position of the formal list
1732 This may be the middle of a longer string, for example
1733 when looking for the end of formal #3 starting from
1734 the middle of the complete formal list.
1736 ppDataType Returns a pointer to the start of the data type in the
1737 formal. Example: pointer to "Foo".
1739 ppSymbol Returns a pointer to the start of the formal symbol.
1740 Example: pointer to "f".
1742 ppEqualSign Returns a pointer to the equal sign separating the
1743 formal symbol from the initial value. If there is
1744 no "=" then this will be NULL.
1746 ppValue Returns a pointer to the initial value part of the
1747 formal declaration. Example: pointer to "&farray[1,2]"
1749 ppSeparator Returns a pointer to the character which terminated the
1750 scan. This should be a pointer to a comma or a null
1751 byte which terminates the string.
1753 pNest Returns the nesting level when a separator was found.
1754 This is non-zero for any kind of error. This is zero
1755 for a successful parse of this portion of the formal
1761 char * endFormal(char *pStart
,
1769 char * endFormal(pStart
,
1791 *ppEqualSign
= NULL
;
1793 *ppSeparator
= NULL
;
1797 /* The first non-blank is the start of the datatype */
1800 if (*p
== 0) goto EXIT
;
1803 /* We are not looking for the symbol, we are looking
1804 for the separator that follows the symbol. Then
1807 Search for the ',' or '=" or null terminator.
1810 p
= skipToSeparatorOrEqualSign(p
, pNest
);
1812 if (*pNest
!= 0) goto EXIT
;
1815 Work backwards to find start of symbol
1816 Skip spaces between the end of symbol and separator
1817 Assume that there are no spaces in the formal, but
1818 there is a space preceding the formal
1821 for (q
= &p
[-1]; q
>= *ppDataType
; q
--) {
1822 if (! isspace(*q
)) break;
1824 if (q
< *ppDataType
) goto EXIT
;
1827 MR26 Handle things like: IIR_Bool (IIR_Decl::*constraint)()
1828 Backup until we hit the end of a symbol string, then find the
1829 start of the symbol string. This wont' work for functions
1830 with prototypes, but works for the most common cases. For
1831 others, use typedef names.
1834 /* MR26 */ for (q
= q
; q
>= *ppDataType
; q
--) {
1835 /* MR26 */ if (isalpha(*q
) || isdigit(*q
) || *q
== '_' || *q
== '$') break;
1837 /* MR26 */ if (q
< *ppDataType
) goto EXIT
;
1839 for (q
= q
; q
>= *ppDataType
; q
--) {
1840 if ( ! (isalpha(*q
) || isdigit(*q
) || *q
== '_' || *q
== '$')) break;
1845 if (*p
== ',' || *p
== 0) {
1851 p
= skipSpaces(++p
);
1853 if (*p
== 0) goto EXIT
;
1856 while (*p
!= 0 && *pNest
== 0 && *p
!= ',') {
1857 p
= skipToSeparator(p
, pNest
);
1859 if (*pNest
== 0) *ppSeparator
= p
;