]>
Commit | Line | Data |
---|---|---|
3eb9473e | 1 | /* ANTLRTokenBuffer.C\r |
2 | *\r | |
3 | * SOFTWARE RIGHTS\r | |
4 | *\r | |
5 | * We reserve no LEGAL rights to the Purdue Compiler Construction Tool\r | |
6 | * Set (PCCTS) -- PCCTS is in the public domain. An individual or\r | |
7 | * company may do whatever they wish with source code distributed with\r | |
8 | * PCCTS or the code generated by PCCTS, including the incorporation of\r | |
9 | * PCCTS, or its output, into commerical software.\r | |
10 | *\r | |
11 | * We encourage users to develop software with PCCTS. However, we do ask\r | |
12 | * that credit is given to us for developing PCCTS. By "credit",\r | |
13 | * we mean that if you incorporate our source code into one of your\r | |
14 | * programs (commercial product, research project, or otherwise) that you\r | |
15 | * acknowledge this fact somewhere in the documentation, research report,\r | |
16 | * etc... If you like PCCTS and have developed a nice tool with the\r | |
17 | * output, please mention that you developed it using PCCTS. In\r | |
18 | * addition, we ask that this header remain intact in our source code.\r | |
19 | * As long as these guidelines are kept, we expect to continue enhancing\r | |
20 | * this system and expect to make other tools available as they are\r | |
21 | * completed.\r | |
22 | *\r | |
23 | * ANTLR 1.33\r | |
24 | * Terence Parr\r | |
25 | * Parr Research Corporation\r | |
26 | * with Purdue University and AHPCRC, University of Minnesota\r | |
27 | * 1989-1998\r | |
28 | */\r | |
29 | \r | |
30 | typedef int ANTLRTokenType; // fool AToken.h into compiling\r | |
31 | \r | |
32 | class ANTLRParser; /* MR1 */\r | |
33 | \r | |
34 | #define ANTLR_SUPPORT_CODE\r | |
35 | \r | |
36 | #include "pcctscfg.h"\r | |
37 | \r | |
38 | #include ATOKENBUFFER_H\r | |
39 | typedef ANTLRAbstractToken *_ANTLRTokenPtr;\r | |
40 | \r | |
41 | #if defined(DBG_TBUF)||defined(DBG_TBUF_MARK_REW)\r | |
42 | static unsigned char test[1000];\r | |
43 | #endif\r | |
44 | \r | |
45 | #ifdef DBG_REFCOUNTTOKEN\r | |
46 | int ANTLRCommonToken::ctor = 0;\r | |
47 | int ANTLRCommonToken::dtor = 0;\r | |
48 | #endif\r | |
49 | \r | |
50 | ANTLRTokenBuffer::\r | |
51 | ANTLRTokenBuffer(ANTLRTokenStream *_input, int _k, int _chunk_size_formal) /* MR14 */\r | |
52 | {\r | |
53 | this->input = _input;\r | |
54 | this->k = _k;\r | |
55 | buffer_size = chunk_size = _chunk_size_formal;\r | |
56 | buffer = (_ANTLRTokenPtr *)\r | |
57 | calloc(chunk_size+1,sizeof(_ANTLRTokenPtr ));\r | |
58 | if ( buffer == NULL ) {\r | |
59 | panic("cannot alloc token buffer");\r | |
60 | }\r | |
61 | buffer++; // leave the first elem empty so tp-1 is valid ptr\r | |
62 | \r | |
63 | tp = &buffer[0];\r | |
64 | last = tp-1;\r | |
65 | next = &buffer[0];\r | |
66 | num_markers = 0;\r | |
67 | end_of_buffer = &buffer[buffer_size-1];\r | |
68 | // BUGBUG -- threshold = &buffer[(int)(buffer_size*(1.0/2.0))];\r | |
69 | threshold = &buffer[(int)(buffer_size / 2)];\r | |
70 | _deleteTokens = 1; // assume we delete tokens\r | |
71 | parser=NULL; // MR5 - uninitialized reference\r | |
72 | }\r | |
73 | \r | |
74 | static void f() {;}\r | |
75 | ANTLRTokenBuffer::\r | |
76 | ~ANTLRTokenBuffer()\r | |
77 | {\r | |
78 | f();\r | |
79 | // Delete all remaining tokens (from 0..last inclusive)\r | |
80 | if ( _deleteTokens )\r | |
81 | {\r | |
82 | _ANTLRTokenPtr *z;\r | |
83 | for (z=buffer; z<=last; z++)\r | |
84 | {\r | |
85 | (*z)->deref();\r | |
86 | // z->deref();\r | |
87 | #ifdef DBG_REFCOUNTTOKEN\r | |
88 | fprintf(stderr, "##########dtor: deleting token '%s' (ref %d)\n",\r | |
89 | ((ANTLRCommonToken *)*z)->getText(), (*z)->nref());\r | |
90 | #endif\r | |
91 | if ( (*z)->nref()==0 )\r | |
92 | {\r | |
93 | delete (*z);\r | |
94 | }\r | |
95 | }\r | |
96 | }\r | |
97 | \r | |
98 | if ( buffer!=NULL ) free((char *)(buffer-1));\r | |
99 | }\r | |
100 | \r | |
101 | #if defined(DBG_TBUF)||defined(DBG_TBUF_MARK_REW)\r | |
102 | #include "pccts_stdio.h"\r | |
103 | PCCTS_NAMESPACE_STD\r | |
104 | #endif\r | |
105 | \r | |
106 | _ANTLRTokenPtr ANTLRTokenBuffer::\r | |
107 | getToken()\r | |
108 | {\r | |
109 | if ( tp <= last ) // is there any buffered lookahead still to be read?\r | |
110 | {\r | |
111 | return *tp++; // read buffered lookahead\r | |
112 | }\r | |
113 | // out of buffered lookahead, get some more "real"\r | |
114 | // input from getANTLRToken()\r | |
115 | if ( num_markers==0 )\r | |
116 | {\r | |
117 | if( next > threshold )\r | |
118 | {\r | |
119 | #ifdef DBG_TBUF\r | |
120 | fprintf(stderr,"getToken: next > threshold (high water is %d)\n", threshold-buffer);\r | |
121 | #endif\r | |
122 | makeRoom();\r | |
123 | }\r | |
124 | }\r | |
125 | else {\r | |
126 | if ( next > end_of_buffer )\r | |
127 | {\r | |
128 | #ifdef DBG_TBUF\r | |
129 | fprintf(stderr,"getToken: next > end_of_buffer (size is %d)\n", buffer_size);\r | |
130 | #endif\r | |
131 | extendBuffer();\r | |
132 | }\r | |
133 | }\r | |
134 | *next = getANTLRToken();\r | |
135 | (*next)->ref(); // say we have a copy of this pointer in buffer\r | |
136 | last = next;\r | |
137 | next++;\r | |
138 | tp = last;\r | |
139 | return *tp++;\r | |
140 | }\r | |
141 | \r | |
142 | void ANTLRTokenBuffer::\r | |
143 | rewind(int pos)\r | |
144 | {\r | |
145 | #if defined(DBG_TBUF)||defined(DBG_TBUF_MARK_REW)\r | |
146 | fprintf(stderr, "rewind(%d)[nm=%d,from=%d,%d.n=%d]\n", pos, num_markers, tp-buffer,pos,test[pos]);\r | |
147 | test[pos]--;\r | |
148 | #endif\r | |
149 | tp = &buffer[pos];\r | |
150 | num_markers--;\r | |
151 | }\r | |
152 | \r | |
153 | /*\r | |
154 | * This function is used to specify that the token pointers read\r | |
155 | * by the ANTLRTokenBuffer should be buffered up (to be reused later).\r | |
156 | */\r | |
157 | int ANTLRTokenBuffer::\r | |
158 | mark()\r | |
159 | {\r | |
160 | #if defined(DBG_TBUF)||defined(DBG_TBUF_MARK_REW)\r | |
161 | test[tp-buffer]++;\r | |
162 | fprintf(stderr,"mark(%d)[nm=%d,%d.n=%d]\n",tp-buffer,num_markers+1,tp-buffer,test[tp-buffer]);\r | |
163 | #endif\r | |
164 | num_markers++;\r | |
165 | return tp - buffer;\r | |
166 | }\r | |
167 | \r | |
168 | /*\r | |
169 | * returns the token pointer n positions ahead.\r | |
170 | * This implies that bufferedToken(1) gets the NEXT symbol of lookahead.\r | |
171 | * This is used in conjunction with the ANTLRParser lookahead buffer.\r | |
172 | *\r | |
173 | * No markers are set or anything. A bunch of input is buffered--that's all.\r | |
174 | * The tp pointer is left alone as the lookahead has not been advanced\r | |
175 | * with getToken(). The next call to getToken() will find a token\r | |
176 | * in the buffer and won't have to call getANTLRToken().\r | |
177 | *\r | |
178 | * If this is called before a consume() is done, how_many_more_i_need is\r | |
179 | * set to 'n'.\r | |
180 | */\r | |
181 | _ANTLRTokenPtr ANTLRTokenBuffer::\r | |
182 | bufferedToken(int n)\r | |
183 | {\r | |
184 | // int how_many_more_i_need = (last-tp < 0) ? n : n-(last-tp)-1;\r | |
185 | int how_many_more_i_need = (tp > last) ? n : n-(last-tp)-1;\r | |
186 | // Make sure that at least n tokens are available in the buffer\r | |
187 | #ifdef DBG_TBUF\r | |
188 | fprintf(stderr, "bufferedToken(%d)\n", n);\r | |
189 | #endif\r | |
190 | for (int i=1; i<=how_many_more_i_need; i++)\r | |
191 | {\r | |
192 | if ( next > end_of_buffer ) // buffer overflow?\r | |
193 | {\r | |
194 | extendBuffer();\r | |
195 | }\r | |
196 | *next = getANTLRToken();\r | |
197 | (*next)->ref(); // say we have a copy of this pointer in buffer\r | |
198 | last = next;\r | |
199 | next++;\r | |
200 | }\r | |
201 | return tp[n - 1];\r | |
202 | }\r | |
203 | \r | |
204 | /* If no markers are set, the none of the input needs to be saved (except\r | |
205 | * for the lookahead Token pointers). We save only k-1 token pointers as\r | |
206 | * we are guaranteed to do a getANTLRToken() right after this because otherwise\r | |
207 | * we wouldn't have needed to extend the buffer.\r | |
208 | *\r | |
209 | * If there are markers in the buffer, we need to save things and so\r | |
210 | * extendBuffer() is called.\r | |
211 | */\r | |
212 | void ANTLRTokenBuffer::\r | |
213 | makeRoom()\r | |
214 | {\r | |
215 | #ifdef DBG_TBUF\r | |
216 | fprintf(stderr, "in makeRoom.................\n");\r | |
217 | fprintf(stderr, "num_markers==%d\n", num_markers);\r | |
218 | #endif\r | |
219 | /*\r | |
220 | if ( num_markers == 0 )\r | |
221 | {\r | |
222 | */\r | |
223 | #ifdef DBG_TBUF\r | |
224 | fprintf(stderr, "moving lookahead and resetting next\n");\r | |
225 | \r | |
226 | _ANTLRTokenPtr *r;\r | |
227 | fprintf(stderr, "tbuf = [");\r | |
228 | for (r=buffer; r<=last; r++)\r | |
229 | {\r | |
230 | if ( *r==NULL ) fprintf(stderr, " xxx");\r | |
231 | else fprintf(stderr, " '%s'", ((ANTLRCommonToken *)*r)->getText());\r | |
232 | }\r | |
233 | fprintf(stderr, " ]\n");\r | |
234 | \r | |
235 | fprintf(stderr,\r | |
236 | "before: tp=%d, last=%d, next=%d, threshold=%d\n",tp-buffer,last-buffer,next-buffer,threshold-buffer);\r | |
237 | #endif\r | |
238 | \r | |
239 | // Delete all tokens from 0..last-(k-1) inclusive\r | |
240 | if ( _deleteTokens )\r | |
241 | {\r | |
242 | _ANTLRTokenPtr *z;\r | |
243 | for (z=buffer; z<=last-(k-1); z++)\r | |
244 | {\r | |
245 | (*z)->deref();\r | |
246 | // z->deref();\r | |
247 | #ifdef DBG_REFCOUNTTOKEN\r | |
248 | fprintf(stderr, "##########makeRoom: deleting token '%s' (ref %d)\n",\r | |
249 | ((ANTLRCommonToken *)*z)->getText(), (*z)->nref());\r | |
250 | #endif\r | |
251 | if ( (*z)->nref()==0 )\r | |
252 | {\r | |
253 | delete (*z);\r | |
254 | }\r | |
255 | }\r | |
256 | }\r | |
257 | \r | |
258 | // reset the buffer to initial conditions, but move k-1 symbols\r | |
259 | // to the beginning of buffer and put new input symbol at k\r | |
260 | _ANTLRTokenPtr *p = buffer, *q = last-(k-1)+1;\r | |
261 | // ANTLRAbstractToken **p = buffer, **q = end_of_buffer-(k-1)+1;\r | |
262 | #ifdef DBG_TBUF\r | |
263 | fprintf(stderr, "lookahead buffer = [");\r | |
264 | #endif\r | |
265 | for (int i=1; i<=(k-1); i++)\r | |
266 | {\r | |
267 | *p++ = *q++;\r | |
268 | #ifdef DBG_TBUF\r | |
269 | fprintf(stderr,\r | |
270 | " '%s'", ((ANTLRCommonToken *)buffer[i-1])->getText());\r | |
271 | #endif\r | |
272 | }\r | |
273 | #ifdef DBG_TBUF\r | |
274 | fprintf(stderr, " ]\n");\r | |
275 | #endif\r | |
276 | next = &buffer[k-1];\r | |
277 | tp = &buffer[k-1]; // tp points to what will be filled in next\r | |
278 | last = tp-1;\r | |
279 | #ifdef DBG_TBUF\r | |
280 | fprintf(stderr,\r | |
281 | "after: tp=%d, last=%d, next=%d\n",\r | |
282 | tp-buffer, last-buffer, next-buffer);\r | |
283 | #endif\r | |
284 | /*\r | |
285 | }\r | |
286 | else {\r | |
287 | extendBuffer();\r | |
288 | }\r | |
289 | */\r | |
290 | }\r | |
291 | \r | |
292 | /* This function extends 'buffer' by chunk_size and returns with all\r | |
293 | * pointers at the same relative positions in the buffer (the buffer base\r | |
294 | * address could have changed in realloc()) except that 'next' comes\r | |
295 | * back set to where the next token should be stored. All other pointers\r | |
296 | * are untouched.\r | |
297 | */\r | |
298 | void\r | |
299 | ANTLRTokenBuffer::\r | |
300 | extendBuffer()\r | |
301 | {\r | |
302 | int save_last = last-buffer, save_tp = tp-buffer, save_next = next-buffer;\r | |
303 | #ifdef DBG_TBUF\r | |
304 | fprintf(stderr, "extending physical buffer\n");\r | |
305 | #endif\r | |
306 | buffer_size += chunk_size;\r | |
307 | buffer = (_ANTLRTokenPtr *)\r | |
308 | realloc((char *)(buffer-1),\r | |
309 | (buffer_size+1)*sizeof(_ANTLRTokenPtr ));\r | |
310 | if ( buffer == NULL ) {\r | |
311 | panic("cannot alloc token buffer");\r | |
312 | }\r | |
313 | buffer++; // leave the first elem empty so tp-1 is valid ptr\r | |
314 | \r | |
315 | tp = buffer + save_tp; // put the pointers back to same relative position\r | |
316 | last = buffer + save_last;\r | |
317 | next = buffer + save_next;\r | |
318 | end_of_buffer = &buffer[buffer_size-1];\r | |
319 | // BUGBUG -- threshold = &buffer[(int)(buffer_size*(1.0/2.0))];\r | |
320 | threshold = &buffer[(int)(buffer_size / 2)];\r | |
321 | \r | |
322 | /*\r | |
323 | // zero out new token ptrs so we'll know if something to delete in buffer\r | |
324 | ANTLRAbstractToken **p = end_of_buffer-chunk_size+1;\r | |
325 | for (; p<=end_of_buffer; p++) *p = NULL;\r | |
326 | */\r | |
327 | }\r | |
328 | \r | |
329 | ANTLRParser * ANTLRTokenBuffer:: // MR1\r | |
330 | setParser(ANTLRParser *p) { // MR1\r | |
331 | ANTLRParser *old=parser; // MR1\r | |
332 | parser=p; // MR1\r | |
333 | input->setParser(p); // MR1\r | |
334 | return old; // MR1\r | |
335 | } // MR1\r | |
336 | // MR1\r | |
337 | ANTLRParser * ANTLRTokenBuffer:: // MR1\r | |
338 | getParser() { // MR1\r | |
339 | return parser; // MR1\r | |
340 | } // MR1\r | |
341 | \r | |
342 | /* to avoid having to link in another file just for the smart token ptr\r | |
343 | * stuff, we include it here. Ugh.\r | |
344 | */\r | |
345 | #include ATOKPTR_C\r |