]> git.proxmox.com Git - mirror_edk2.git/blob - Tools/CCode/Source/Pccts/support/rexpr/rexpr.c
More moves for Tool Packages
[mirror_edk2.git] / Tools / CCode / Source / Pccts / support / rexpr / rexpr.c
1 /*
2 * This file contains code for
3 *
4 * int rexpr(char *expr, char *s);
5 *
6 * which answers
7 *
8 * 1 if 's' is in the language described by the regular expression 'expr'
9 * 0 if it is not
10 * -1 if the regular expression is invalid
11 *
12 * Language membership is determined by constructing a non-deterministic
13 * finite automata (NFA) from the regular expression. A depth-
14 * first-search is performed on the NFA (graph) to check for a match of 's'.
15 * Each non-epsilon arc consumes one character from 's'. Backtracking is
16 * performed to check all possible paths through the NFA.
17 *
18 * Regular expressions follow the meta-language:
19 *
20 * <regExpr> ::= <andExpr> ( '|' <andExpr> )*
21 *
22 * <andExpr> ::= <expr> ( <expr> )*
23 *
24 * <expr> ::= {'~'} '[' <atomList> ']' <repeatSymbol>
25 * | '(' <regExpr> ')' <repeatSymbol>
26 * | '{' <regExpr> '}' <repeatSymbol>
27 * | <atom> <repeatSymbol>
28 *
29 * <repeatSymbol> ::= { '*' | '+' }
30 *
31 * <atomList> ::= <atom> ( <atom> )*
32 * | { <atomList> } <atom> '-' <atom> { <atomList> }
33 *
34 * <atom> ::= Token[Atom]
35 *
36 * Notes:
37 * ~ means complement the set in [..]. i.e. all characters not listed
38 * * means match 0 or more times (can be on expression or atom)
39 * + means match 1 or more times (can be on expression or atom)
40 * {} optional
41 * () grouping
42 * [] set of atoms
43 * x-y all characters from x to y (found only in [..])
44 * \xx the character with value xx
45 *
46 * Examples:
47 * [a-z]+
48 * match 1 or more lower-case letters (e.g. variable)
49 *
50 * 0x[0-9A-Fa-f]+
51 * match a hex number with 0x on front (e.g. 0xA1FF)
52 *
53 * [0-9]+.[0-9]+{e[0-9]+}
54 * match a floating point number (e.g. 3.14e21)
55 *
56 * Code example:
57 * if ( rexpr("[a-zA-Z][a-zA-Z0-9]+", str) ) then str is keyword
58 *
59 * Terence Parr
60 * Purdue University
61 * April 1991
62 */
63
64 #include <stdio.h>
65 #include <ctype.h>
66 #ifdef __STDC__
67 #include <stdlib.h>
68 #else
69 #include <malloc.h>
70 #endif
71 #include "rexpr.h"
72
73 #ifdef __USE_PROTOS
74 static int regExpr( GraphPtr g );
75 static int andExpr( GraphPtr g );
76 static int expr( GraphPtr g );
77 static int repeatSymbol( GraphPtr g );
78 static int atomList( char *p, int complement );
79 static void next( void );
80 static ArcPtr newGraphArc( void );
81 static NodePtr newNode( void );
82 static int ArcBetweenGraphNode( NodePtr i, NodePtr j, int label );
83 static Graph BuildNFA_atom( int label );
84 static Graph BuildNFA_AB( Graph A, Graph B );
85 static Graph BuildNFA_AorB( Graph A, Graph B );
86 static Graph BuildNFA_set( char *s );
87 static Graph BuildNFA_Astar( Graph A );
88 static Graph BuildNFA_Aplus( Graph A );
89 static Graph BuildNFA_Aoptional( Graph A );
90 #else
91 static int regExpr();
92 static int andExpr();
93 static int expr();
94 static int repeatSymbol();
95 static int atomList();
96 static void next();
97 static ArcPtr newGraphArc();
98 static NodePtr newNode();
99 static int ArcBetweenGraphNode();
100 static Graph BuildNFA_atom();
101 static Graph BuildNFA_AB();
102 static Graph BuildNFA_AorB();
103 static Graph BuildNFA_set();
104 static Graph BuildNFA_Astar();
105 static Graph BuildNFA_Aplus();
106 static Graph BuildNFA_Aoptional();
107 #endif
108
109 static char *_c;
110 static int token, tokchar;
111 static NodePtr accept;
112 static NodePtr freelist = NULL;
113
114 /*
115 * return 1 if s in language described by expr
116 * 0 if s is not
117 * -1 if expr is an invalid regular expression
118 */
119 #ifdef __USE_PROTOS
120 static int rexpr(char *expr,char *s)
121 #else
122 static int rexpr(expr, s)
123 char *expr, *s;
124 #endif
125 {
126 NodePtr p,q;
127 Graph nfa;
128 int result;
129
130 fprintf(stderr, "rexpr(%s,%s);\n", expr,s);
131 freelist = NULL;
132 _c = expr;
133 next();
134 if ( regExpr(&nfa) == -1 ) return -1;
135 accept = nfa.right;
136 result = match(nfa.left, s);
137 /* free all your memory */
138 p = q = freelist;
139 while ( p!=NULL ) { q = p->track; free(p); p = q; }
140 return result;
141 }
142
143 /*
144 * do a depth-first-search on the NFA looking for a path from start to
145 * accept state labelled with the characters of 's'.
146 */
147
148 #ifdef __USE_PROTOS
149 static int match(NodePtr automaton,char *s)
150 #else
151 static int match(automaton, s)
152 NodePtr automaton;
153 char *s;
154 #endif
155 {
156 ArcPtr p;
157
158 if ( automaton == accept && *s == '\0' ) return 1; /* match */
159
160 for (p=automaton->arcs; p!=NULL; p=p->next) /* try all arcs */
161 {
162 if ( p->label == Epsilon )
163 {
164 if ( match(p->target, s) ) return 1;
165 }
166 else if ( p->label == *s )
167 if ( match(p->target, s+1) ) return 1;
168 }
169 return 0;
170 }
171
172 /*
173 * <regExpr> ::= <andExpr> ( '|' {<andExpr>} )*
174 *
175 * Return -1 if syntax error
176 * Return 0 if none found
177 * Return 1 if a regExrp was found
178 */
179
180 #ifdef __USE_PROTOS
181 static int regExpr(GraphPtr g)
182 #else
183 static int regExpr(g)
184 GraphPtr g;
185 #endif
186 {
187 Graph g1, g2;
188
189 if ( andExpr(&g1) == -1 )
190 {
191 return -1;
192 }
193
194 while ( token == '|' )
195 {
196 int a;
197 next();
198 a = andExpr(&g2);
199 if ( a == -1 ) return -1; /* syntax error below */
200 else if ( !a ) return 1; /* empty alternative */
201 g1 = BuildNFA_AorB(g1, g2);
202 }
203
204 if ( token!='\0' ) return -1;
205
206 *g = g1;
207 return 1;
208 }
209
210 /*
211 * <andExpr> ::= <expr> ( <expr> )*
212 */
213
214 #ifdef __USE_PROTOS
215 static int andExpr(GraphPtr g)
216 #else
217 static int andExpr(g)
218 GraphPtr g;
219 #endif
220 {
221 Graph g1, g2;
222
223 if ( expr(&g1) == -1 )
224 {
225 return -1;
226 }
227
228 while ( token==Atom || token=='{' || token=='(' || token=='~' || token=='[' )
229 {
230 if (expr(&g2) == -1) return -1;
231 g1 = BuildNFA_AB(g1, g2);
232 }
233
234 *g = g1;
235 return 1;
236 }
237
238 /*
239 * <expr> ::= {'~'} '[' <atomList> ']' <repeatSymbol>
240 * | '(' <regExpr> ')' <repeatSymbol>
241 * | '{' <regExpr> '}' <repeatSymbol>
242 * | <atom> <repeatSymbol>
243 */
244
245 #ifdef __USE_PROTOS
246 static int expr(GraphPtr g)
247 #else
248 static int expr(g)
249 GraphPtr g;
250 #endif
251 {
252 int complement = 0;
253 char s[257]; /* alloc space for string of char in [] */
254
255 if ( token == '~' || token == '[' )
256 {
257 if ( token == '~' ) {complement = 1; next();}
258 if ( token != '[' ) return -1;
259 next();
260 if ( atomList( s, complement ) == -1 ) return -1;
261 *g = BuildNFA_set( s );
262 if ( token != ']' ) return -1;
263 next();
264 repeatSymbol( g );
265 return 1;
266 }
267 if ( token == '(' )
268 {
269 next();
270 if ( regExpr( g ) == -1 ) return -1;
271 if ( token != ')' ) return -1;
272 next();
273 repeatSymbol( g );
274 return 1;
275 }
276 if ( token == '{' )
277 {
278 next();
279 if ( regExpr( g ) == -1 ) return -1;
280 if ( token != '}' ) return -1;
281 next();
282 /* S p e c i a l C a s e O p t i o n a l { } */
283 if ( token != '*' && token != '+' )
284 {
285 *g = BuildNFA_Aoptional( *g );
286 }
287 repeatSymbol( g );
288 return 1;
289 }
290 if ( token == Atom )
291 {
292 *g = BuildNFA_atom( tokchar );
293 next();
294 repeatSymbol( g );
295 return 1;
296 }
297
298 return -1;
299 }
300
301 /*
302 * <repeatSymbol> ::= { '*' | '+' }
303 */
304 #ifdef __USE_PROTOS
305 static int repeatSymbol(GraphPtr g)
306 #else
307 static int repeatSymbol(g)
308 GraphPtr g;
309 #endif
310 {
311 switch ( token )
312 {
313 case '*' : *g = BuildNFA_Astar( *g ); next(); break;
314 case '+' : *g = BuildNFA_Aplus( *g ); next(); break;
315 }
316 return 1;
317 }
318
319 /*
320 * <atomList> ::= <atom> { <atom> }*
321 * { <atomList> } <atom> '-' <atom> { <atomList> }
322 *
323 * a-b is same as ab
324 * q-a is same as q
325 */
326
327 #ifdef __USE_PROTOS
328 static int atomList(char *p, int complement)
329 #else
330 static int atomList(p, complement)
331 char *p;
332 int complement;
333 #endif
334 {
335 static unsigned char set[256]; /* no duplicates */
336 int first, last, i;
337 char *s = p;
338
339 if ( token != Atom ) return -1;
340
341 for (i=0; i<256; i++) set[i] = 0;
342 while ( token == Atom )
343 {
344 if ( !set[tokchar] ) *s++ = tokchar;
345 set[tokchar] = 1; /* Add atom to set */
346 next();
347 if ( token == '-' ) /* have we found '-' */
348 {
349 first = *(s-1); /* Get last char */
350 next();
351 if ( token != Atom ) return -1;
352 else
353 {
354 last = tokchar;
355 }
356 for (i = first+1; i <= last; i++)
357 {
358 if ( !set[tokchar] ) *s++ = i;
359 set[i] = 1; /* Add atom to set */
360 }
361 next();
362 }
363 }
364 *s = '\0';
365 if ( complement )
366 {
367 for (i=0; i<256; i++) set[i] = !set[i];
368 for (i=1,s=p; i<256; i++) if ( set[i] ) *s++ = i;
369 *s = '\0';
370 }
371 return 1;
372 }
373
374 /* a somewhat stupid lexical analyzer */
375
376 #ifdef __USE_PROTOS
377 static void next(void)
378 #else
379 static void next()
380 #endif
381 {
382 while ( *_c==' ' || *_c=='\t' || *_c=='\n' ) _c++;
383 if ( *_c=='\\' )
384 {
385 _c++;
386 if ( isdigit(*_c) )
387 {
388 int n=0;
389 while ( isdigit(*_c) )
390 {
391 n = n*10 + (*_c++ - '0');
392 }
393 if ( n>255 ) n=255;
394 tokchar = n;
395 }
396 else
397 {
398 switch (*_c)
399 {
400 case 'n' : tokchar = '\n'; break;
401 case 't' : tokchar = '\t'; break;
402 case 'r' : tokchar = '\r'; break;
403 default : tokchar = *_c;
404 }
405 _c++;
406 }
407 token = Atom;
408 }
409 else if ( isgraph(*_c) && *_c!='[' && *_c!='(' && *_c!='{' &&
410 *_c!='-' && *_c!='}' && *_c!=')' && *_c!=']' &&
411 *_c!='+' && *_c!='*' && *_c!='~' && *_c!='|' )
412 {
413 token = Atom;
414 tokchar = *_c++;
415 }
416 else
417 {
418 token = tokchar = *_c++;
419 }
420 }
421
422 /* N F A B u i l d i n g R o u t i n e s */
423
424 #ifdef __USE_PROTOS
425 static ArcPtr newGraphArc(void)
426 #else
427 static ArcPtr newGraphArc()
428 #endif
429 {
430 ArcPtr p;
431 p = (ArcPtr) calloc(1, sizeof(Arc));
432 if ( p==NULL ) {fprintf(stderr,"rexpr: out of memory\n"); exit(-1);}
433 if ( freelist != NULL ) p->track = (ArcPtr) freelist;
434 freelist = (NodePtr) p;
435 return p;
436 }
437
438 #ifdef __USE_PROTOS
439 static NodePtr newNode(void)
440 #else
441 static NodePtr newNode()
442 #endif
443 {
444 NodePtr p;
445 p = (NodePtr) calloc(1, sizeof(Node));
446 if ( p==NULL ) {fprintf(stderr,"rexpr: out of memory\n"); exit(-1);}
447 if ( freelist != NULL ) p->track = freelist;
448 freelist = p;
449 return p;
450 }
451
452 #ifdef __USE_PROTOS
453 static void ArcBetweenGraphNodes(NodePtr i,NodePtr j,int label)
454 #else
455 static void ArcBetweenGraphNodes(i, j, label)
456 NodePtr i, j;
457 int label;
458 #endif
459 {
460 ArcPtr a;
461
462 a = newGraphArc();
463 if ( i->arcs == NULL ) i->arctail = i->arcs = a;
464 else {(i->arctail)->next = a; i->arctail = a;}
465 a->label = label;
466 a->target = j;
467 }
468
469 #ifdef __USE_PROTOS
470 static Graph BuildNFA_atom(int label)
471 #else
472 static Graph BuildNFA_atom(label)
473 int label;
474 #endif
475 {
476 Graph g;
477
478 g.left = newNode();
479 g.right = newNode();
480 ArcBetweenGraphNodes(g.left, g.right, label);
481 return( g );
482 }
483
484 #ifdef __USE_PROTOS
485 static Graph BuildNFA_AB(Graph A,Graph B)
486 #else
487 static Graph BuildNFA_AB(A, B)
488 Graph A, B;
489 #endif
490 {
491 Graph g;
492
493 ArcBetweenGraphNodes(A.right, B.left, Epsilon);
494 g.left = A.left;
495 g.right = B.right;
496 return( g );
497 }
498
499 #ifdef __USE_PROTOS
500 static Graph BuildNFA_AorB(Graph A,Graph B)
501 #else
502 static Graph BuildNFA_AorB(A, B)
503 Graph A, B;
504 #endif
505 {
506 Graph g;
507
508 g.left = newNode();
509 ArcBetweenGraphNodes(g.left, A.left, Epsilon);
510 ArcBetweenGraphNodes(g.left, B.left, Epsilon);
511 g.right = newNode();
512 ArcBetweenGraphNodes(A.right, g.right, Epsilon);
513 ArcBetweenGraphNodes(B.right, g.right, Epsilon);
514 return( g );
515 }
516
517 #ifdef __USE_PROTOS
518 static Graph BuildNFA_set(char *s)
519 #else
520 static Graph BuildNFA_set( s )
521 char *s;
522 #endif
523 {
524 Graph g;
525
526 if ( s == NULL ) return g;
527
528 g.left = newNode();
529 g.right = newNode();
530 while ( *s != '\0' )
531 {
532 ArcBetweenGraphNodes(g.left, g.right, *s++);
533 }
534 return g;
535 }
536
537 #ifdef __USE_PROTOS
538 static Graph BuildNFA_Astar(Graph A)
539 #else
540 static Graph BuildNFA_Astar( A )
541 Graph A;
542 #endif
543 {
544 Graph g;
545
546 g.left = newNode();
547 g.right = newNode();
548
549 ArcBetweenGraphNodes(g.left, A.left, Epsilon);
550 ArcBetweenGraphNodes(g.left, g.right, Epsilon);
551 ArcBetweenGraphNodes(A.right, g.right, Epsilon);
552 ArcBetweenGraphNodes(A.right, A.left, Epsilon);
553
554 return( g );
555 }
556
557 #ifdef __USE_PROTOS
558 static Graph BuildNFA_Aplus(Graph A)
559 #else
560 static Graph BuildNFA_Aplus( A )
561 Graph A;
562 #endif
563 {
564 ArcBetweenGraphNodes(A.right, A.left, Epsilon);
565
566 return( A );
567 }
568
569 #ifdef __USE_PROTOS
570 static Graph BuildNFA_Aoptional(Graph A)
571 #else
572 static Graph BuildNFA_Aoptional( A )
573 Graph A;
574 #endif
575 {
576 Graph g;
577
578 g.left = newNode();
579 g.right = newNode();
580
581 ArcBetweenGraphNodes(g.left, A.left, Epsilon);
582 ArcBetweenGraphNodes(g.left, g.right, Epsilon);
583 ArcBetweenGraphNodes(A.right, g.right, Epsilon);
584
585 return( g );
586 }