]> git.proxmox.com Git - mirror_edk2.git/blame - Tools/CodeTools/Source/Pccts/support/rexpr/rexpr.c
More renames for Tool Packages
[mirror_edk2.git] / Tools / CodeTools / Source / Pccts / support / rexpr / rexpr.c
CommitLineData
878ddf1f 1/*\r
2 * This file contains code for\r
3 *\r
4 * int rexpr(char *expr, char *s);\r
5 *\r
6 * which answers\r
7 *\r
8 * 1 if 's' is in the language described by the regular expression 'expr'\r
9 * 0 if it is not\r
10 * -1 if the regular expression is invalid\r
11 *\r
12 * Language membership is determined by constructing a non-deterministic\r
13 * finite automata (NFA) from the regular expression. A depth-\r
14 * first-search is performed on the NFA (graph) to check for a match of 's'.\r
15 * Each non-epsilon arc consumes one character from 's'. Backtracking is\r
16 * performed to check all possible paths through the NFA.\r
17 *\r
18 * Regular expressions follow the meta-language:\r
19 *\r
20 * <regExpr> ::= <andExpr> ( '|' <andExpr> )*\r
21 *\r
22 * <andExpr> ::= <expr> ( <expr> )*\r
23 *\r
24 * <expr> ::= {'~'} '[' <atomList> ']' <repeatSymbol>\r
25 * | '(' <regExpr> ')' <repeatSymbol>\r
26 * | '{' <regExpr> '}' <repeatSymbol>\r
27 * | <atom> <repeatSymbol>\r
28 *\r
29 * <repeatSymbol> ::= { '*' | '+' }\r
30 *\r
31 * <atomList> ::= <atom> ( <atom> )*\r
32 * | { <atomList> } <atom> '-' <atom> { <atomList> }\r
33 *\r
34 * <atom> ::= Token[Atom]\r
35 *\r
36 * Notes:\r
37 * ~ means complement the set in [..]. i.e. all characters not listed\r
38 * * means match 0 or more times (can be on expression or atom)\r
39 * + means match 1 or more times (can be on expression or atom)\r
40 * {} optional\r
41 * () grouping\r
42 * [] set of atoms\r
43 * x-y all characters from x to y (found only in [..])\r
44 * \xx the character with value xx\r
45 *\r
46 * Examples:\r
47 * [a-z]+\r
48 * match 1 or more lower-case letters (e.g. variable)\r
49 *\r
50 * 0x[0-9A-Fa-f]+\r
51 * match a hex number with 0x on front (e.g. 0xA1FF)\r
52 *\r
53 * [0-9]+.[0-9]+{e[0-9]+}\r
54 * match a floating point number (e.g. 3.14e21)\r
55 *\r
56 * Code example:\r
57 * if ( rexpr("[a-zA-Z][a-zA-Z0-9]+", str) ) then str is keyword\r
58 *\r
59 * Terence Parr\r
60 * Purdue University\r
61 * April 1991\r
62 */\r
63\r
64#include <stdio.h>\r
65#include <ctype.h>\r
66#ifdef __STDC__\r
67#include <stdlib.h>\r
68#else\r
69#include <malloc.h>\r
70#endif\r
71#include "rexpr.h"\r
72\r
73#ifdef __USE_PROTOS\r
74static int regExpr( GraphPtr g );\r
75static int andExpr( GraphPtr g );\r
76static int expr( GraphPtr g );\r
77static int repeatSymbol( GraphPtr g );\r
78static int atomList( char *p, int complement );\r
79static void next( void );\r
80static ArcPtr newGraphArc( void );\r
81static NodePtr newNode( void );\r
82static int ArcBetweenGraphNode( NodePtr i, NodePtr j, int label );\r
83static Graph BuildNFA_atom( int label );\r
84static Graph BuildNFA_AB( Graph A, Graph B );\r
85static Graph BuildNFA_AorB( Graph A, Graph B );\r
86static Graph BuildNFA_set( char *s );\r
87static Graph BuildNFA_Astar( Graph A );\r
88static Graph BuildNFA_Aplus( Graph A );\r
89static Graph BuildNFA_Aoptional( Graph A );\r
90#else\r
91static int regExpr();\r
92static int andExpr();\r
93static int expr();\r
94static int repeatSymbol();\r
95static int atomList();\r
96static void next();\r
97static ArcPtr newGraphArc();\r
98static NodePtr newNode();\r
99static int ArcBetweenGraphNode();\r
100static Graph BuildNFA_atom();\r
101static Graph BuildNFA_AB();\r
102static Graph BuildNFA_AorB();\r
103static Graph BuildNFA_set();\r
104static Graph BuildNFA_Astar();\r
105static Graph BuildNFA_Aplus();\r
106static Graph BuildNFA_Aoptional();\r
107#endif\r
108\r
109static char *_c;\r
110static int token, tokchar;\r
111static NodePtr accept;\r
112static NodePtr freelist = NULL;\r
113\r
114/*\r
115 * return 1 if s in language described by expr\r
116 * 0 if s is not\r
117 * -1 if expr is an invalid regular expression\r
118 */\r
119#ifdef __USE_PROTOS\r
120static int rexpr(char *expr,char *s)\r
121#else\r
122static int rexpr(expr, s)\r
123char *expr, *s;\r
124#endif\r
125{\r
126 NodePtr p,q;\r
127 Graph nfa;\r
128 int result;\r
129\r
130 fprintf(stderr, "rexpr(%s,%s);\n", expr,s);\r
131 freelist = NULL;\r
132 _c = expr;\r
133 next();\r
134 if ( regExpr(&nfa) == -1 ) return -1;\r
135 accept = nfa.right;\r
136 result = match(nfa.left, s);\r
137 /* free all your memory */\r
138 p = q = freelist;\r
139 while ( p!=NULL ) { q = p->track; free(p); p = q; }\r
140 return result;\r
141}\r
142\r
143/*\r
144 * do a depth-first-search on the NFA looking for a path from start to\r
145 * accept state labelled with the characters of 's'.\r
146 */\r
147\r
148#ifdef __USE_PROTOS\r
149static int match(NodePtr automaton,char *s)\r
150#else\r
151static int match(automaton, s)\r
152NodePtr automaton;\r
153char *s;\r
154#endif\r
155{\r
156 ArcPtr p;\r
157 \r
158 if ( automaton == accept && *s == '\0' ) return 1; /* match */\r
159\r
160 for (p=automaton->arcs; p!=NULL; p=p->next) /* try all arcs */\r
161 {\r
162 if ( p->label == Epsilon )\r
163 {\r
164 if ( match(p->target, s) ) return 1;\r
165 }\r
166 else if ( p->label == *s )\r
167 if ( match(p->target, s+1) ) return 1;\r
168 }\r
169 return 0;\r
170}\r
171\r
172/*\r
173 * <regExpr> ::= <andExpr> ( '|' {<andExpr>} )*\r
174 *\r
175 * Return -1 if syntax error\r
176 * Return 0 if none found\r
177 * Return 1 if a regExrp was found\r
178 */\r
179\r
180#ifdef __USE_PROTOS\r
181static int regExpr(GraphPtr g)\r
182#else\r
183static int regExpr(g)\r
184GraphPtr g;\r
185#endif\r
186{\r
187 Graph g1, g2;\r
188 \r
189 if ( andExpr(&g1) == -1 )\r
190 {\r
191 return -1;\r
192 }\r
193 \r
194 while ( token == '|' )\r
195 {\r
196 int a;\r
197 next();\r
198 a = andExpr(&g2);\r
199 if ( a == -1 ) return -1; /* syntax error below */\r
200 else if ( !a ) return 1; /* empty alternative */\r
201 g1 = BuildNFA_AorB(g1, g2);\r
202 }\r
203 \r
204 if ( token!='\0' ) return -1;\r
205\r
206 *g = g1;\r
207 return 1;\r
208}\r
209\r
210/*\r
211 * <andExpr> ::= <expr> ( <expr> )*\r
212 */\r
213\r
214#ifdef __USE_PROTOS\r
215static int andExpr(GraphPtr g)\r
216#else\r
217static int andExpr(g)\r
218GraphPtr g;\r
219#endif\r
220{\r
221 Graph g1, g2;\r
222 \r
223 if ( expr(&g1) == -1 )\r
224 {\r
225 return -1;\r
226 }\r
227 \r
228 while ( token==Atom || token=='{' || token=='(' || token=='~' || token=='[' )\r
229 {\r
230 if (expr(&g2) == -1) return -1;\r
231 g1 = BuildNFA_AB(g1, g2);\r
232 }\r
233 \r
234 *g = g1;\r
235 return 1;\r
236}\r
237\r
238/*\r
239 * <expr> ::= {'~'} '[' <atomList> ']' <repeatSymbol>\r
240 * | '(' <regExpr> ')' <repeatSymbol>\r
241 * | '{' <regExpr> '}' <repeatSymbol>\r
242 * | <atom> <repeatSymbol>\r
243 */\r
244\r
245#ifdef __USE_PROTOS\r
246static int expr(GraphPtr g)\r
247#else\r
248static int expr(g)\r
249GraphPtr g;\r
250#endif\r
251{\r
252 int complement = 0;\r
253 char s[257]; /* alloc space for string of char in [] */\r
254 \r
255 if ( token == '~' || token == '[' )\r
256 {\r
257 if ( token == '~' ) {complement = 1; next();}\r
258 if ( token != '[' ) return -1;\r
259 next();\r
260 if ( atomList( s, complement ) == -1 ) return -1;\r
261 *g = BuildNFA_set( s );\r
262 if ( token != ']' ) return -1;\r
263 next();\r
264 repeatSymbol( g );\r
265 return 1;\r
266 }\r
267 if ( token == '(' )\r
268 {\r
269 next();\r
270 if ( regExpr( g ) == -1 ) return -1;\r
271 if ( token != ')' ) return -1;\r
272 next();\r
273 repeatSymbol( g );\r
274 return 1;\r
275 }\r
276 if ( token == '{' )\r
277 {\r
278 next();\r
279 if ( regExpr( g ) == -1 ) return -1;\r
280 if ( token != '}' ) return -1;\r
281 next();\r
282 /* S p e c i a l C a s e O p t i o n a l { } */\r
283 if ( token != '*' && token != '+' )\r
284 {\r
285 *g = BuildNFA_Aoptional( *g );\r
286 }\r
287 repeatSymbol( g );\r
288 return 1;\r
289 }\r
290 if ( token == Atom )\r
291 {\r
292 *g = BuildNFA_atom( tokchar );\r
293 next();\r
294 repeatSymbol( g );\r
295 return 1;\r
296 }\r
297 \r
298 return -1;\r
299}\r
300\r
301/*\r
302 * <repeatSymbol> ::= { '*' | '+' }\r
303 */\r
304#ifdef __USE_PROTOS\r
305static int repeatSymbol(GraphPtr g)\r
306#else\r
307static int repeatSymbol(g)\r
308GraphPtr g;\r
309#endif\r
310{\r
311 switch ( token )\r
312 {\r
313 case '*' : *g = BuildNFA_Astar( *g ); next(); break;\r
314 case '+' : *g = BuildNFA_Aplus( *g ); next(); break;\r
315 }\r
316 return 1;\r
317}\r
318\r
319/*\r
320 * <atomList> ::= <atom> { <atom> }*\r
321 * { <atomList> } <atom> '-' <atom> { <atomList> }\r
322 *\r
323 * a-b is same as ab\r
324 * q-a is same as q\r
325 */\r
326\r
327#ifdef __USE_PROTOS\r
328static int atomList(char *p, int complement)\r
329#else\r
330static int atomList(p, complement)\r
331char *p;\r
332int complement;\r
333#endif\r
334{\r
335 static unsigned char set[256]; /* no duplicates */\r
336 int first, last, i;\r
337 char *s = p;\r
338 \r
339 if ( token != Atom ) return -1;\r
340 \r
341 for (i=0; i<256; i++) set[i] = 0;\r
342 while ( token == Atom )\r
343 {\r
344 if ( !set[tokchar] ) *s++ = tokchar;\r
345 set[tokchar] = 1; /* Add atom to set */\r
346 next();\r
347 if ( token == '-' ) /* have we found '-' */\r
348 {\r
349 first = *(s-1); /* Get last char */\r
350 next();\r
351 if ( token != Atom ) return -1;\r
352 else\r
353 {\r
354 last = tokchar;\r
355 }\r
356 for (i = first+1; i <= last; i++)\r
357 {\r
358 if ( !set[tokchar] ) *s++ = i;\r
359 set[i] = 1; /* Add atom to set */\r
360 }\r
361 next();\r
362 }\r
363 }\r
364 *s = '\0';\r
365 if ( complement )\r
366 {\r
367 for (i=0; i<256; i++) set[i] = !set[i];\r
368 for (i=1,s=p; i<256; i++) if ( set[i] ) *s++ = i;\r
369 *s = '\0';\r
370 }\r
371 return 1;\r
372}\r
373\r
374/* a somewhat stupid lexical analyzer */\r
375\r
376#ifdef __USE_PROTOS\r
377static void next(void)\r
378#else\r
379static void next()\r
380#endif\r
381{\r
382 while ( *_c==' ' || *_c=='\t' || *_c=='\n' ) _c++;\r
383 if ( *_c=='\\' )\r
384 {\r
385 _c++;\r
386 if ( isdigit(*_c) )\r
387 {\r
388 int n=0;\r
389 while ( isdigit(*_c) )\r
390 {\r
391 n = n*10 + (*_c++ - '0');\r
392 }\r
393 if ( n>255 ) n=255;\r
394 tokchar = n;\r
395 }\r
396 else\r
397 {\r
398 switch (*_c)\r
399 {\r
400 case 'n' : tokchar = '\n'; break;\r
401 case 't' : tokchar = '\t'; break;\r
402 case 'r' : tokchar = '\r'; break;\r
403 default : tokchar = *_c;\r
404 }\r
405 _c++;\r
406 }\r
407 token = Atom;\r
408 }\r
409 else if ( isgraph(*_c) && *_c!='[' && *_c!='(' && *_c!='{' &&\r
410 *_c!='-' && *_c!='}' && *_c!=')' && *_c!=']' &&\r
411 *_c!='+' && *_c!='*' && *_c!='~' && *_c!='|' )\r
412 {\r
413 token = Atom;\r
414 tokchar = *_c++;\r
415 }\r
416 else\r
417 {\r
418 token = tokchar = *_c++;\r
419 }\r
420}\r
421\r
422/* N F A B u i l d i n g R o u t i n e s */\r
423\r
424#ifdef __USE_PROTOS\r
425static ArcPtr newGraphArc(void)\r
426#else\r
427static ArcPtr newGraphArc()\r
428#endif\r
429{\r
430 ArcPtr p;\r
431 p = (ArcPtr) calloc(1, sizeof(Arc));\r
432 if ( p==NULL ) {fprintf(stderr,"rexpr: out of memory\n"); exit(-1);}\r
433 if ( freelist != NULL ) p->track = (ArcPtr) freelist;\r
434 freelist = (NodePtr) p;\r
435 return p;\r
436}\r
437\r
438#ifdef __USE_PROTOS\r
439static NodePtr newNode(void)\r
440#else\r
441static NodePtr newNode()\r
442#endif\r
443{\r
444 NodePtr p;\r
445 p = (NodePtr) calloc(1, sizeof(Node));\r
446 if ( p==NULL ) {fprintf(stderr,"rexpr: out of memory\n"); exit(-1);}\r
447 if ( freelist != NULL ) p->track = freelist;\r
448 freelist = p;\r
449 return p;\r
450}\r
451\r
452#ifdef __USE_PROTOS\r
453static void ArcBetweenGraphNodes(NodePtr i,NodePtr j,int label)\r
454#else\r
455static void ArcBetweenGraphNodes(i, j, label)\r
456NodePtr i, j;\r
457int label;\r
458#endif\r
459{\r
460 ArcPtr a;\r
461 \r
462 a = newGraphArc();\r
463 if ( i->arcs == NULL ) i->arctail = i->arcs = a;\r
464 else {(i->arctail)->next = a; i->arctail = a;}\r
465 a->label = label;\r
466 a->target = j;\r
467}\r
468\r
469#ifdef __USE_PROTOS\r
470static Graph BuildNFA_atom(int label)\r
471#else\r
472static Graph BuildNFA_atom(label)\r
473int label;\r
474#endif\r
475{\r
476 Graph g;\r
477 \r
478 g.left = newNode();\r
479 g.right = newNode();\r
480 ArcBetweenGraphNodes(g.left, g.right, label);\r
481 return( g );\r
482}\r
483\r
484#ifdef __USE_PROTOS\r
485static Graph BuildNFA_AB(Graph A,Graph B)\r
486#else\r
487static Graph BuildNFA_AB(A, B)\r
488Graph A, B;\r
489#endif\r
490{\r
491 Graph g;\r
492 \r
493 ArcBetweenGraphNodes(A.right, B.left, Epsilon);\r
494 g.left = A.left;\r
495 g.right = B.right;\r
496 return( g );\r
497}\r
498\r
499#ifdef __USE_PROTOS\r
500static Graph BuildNFA_AorB(Graph A,Graph B)\r
501#else\r
502static Graph BuildNFA_AorB(A, B)\r
503Graph A, B;\r
504#endif\r
505{\r
506 Graph g;\r
507 \r
508 g.left = newNode();\r
509 ArcBetweenGraphNodes(g.left, A.left, Epsilon);\r
510 ArcBetweenGraphNodes(g.left, B.left, Epsilon);\r
511 g.right = newNode();\r
512 ArcBetweenGraphNodes(A.right, g.right, Epsilon);\r
513 ArcBetweenGraphNodes(B.right, g.right, Epsilon);\r
514 return( g );\r
515}\r
516\r
517#ifdef __USE_PROTOS\r
518static Graph BuildNFA_set(char *s)\r
519#else\r
520static Graph BuildNFA_set( s )\r
521char *s;\r
522#endif\r
523{\r
524 Graph g;\r
525 \r
526 if ( s == NULL ) return g;\r
527 \r
528 g.left = newNode();\r
529 g.right = newNode();\r
530 while ( *s != '\0' )\r
531 {\r
532 ArcBetweenGraphNodes(g.left, g.right, *s++);\r
533 }\r
534 return g;\r
535}\r
536\r
537#ifdef __USE_PROTOS\r
538static Graph BuildNFA_Astar(Graph A)\r
539#else\r
540static Graph BuildNFA_Astar( A )\r
541Graph A;\r
542#endif\r
543{\r
544 Graph g;\r
545\r
546 g.left = newNode();\r
547 g.right = newNode();\r
548 \r
549 ArcBetweenGraphNodes(g.left, A.left, Epsilon);\r
550 ArcBetweenGraphNodes(g.left, g.right, Epsilon);\r
551 ArcBetweenGraphNodes(A.right, g.right, Epsilon);\r
552 ArcBetweenGraphNodes(A.right, A.left, Epsilon);\r
553 \r
554 return( g );\r
555}\r
556\r
557#ifdef __USE_PROTOS\r
558static Graph BuildNFA_Aplus(Graph A)\r
559#else\r
560static Graph BuildNFA_Aplus( A )\r
561Graph A;\r
562#endif\r
563{\r
564 ArcBetweenGraphNodes(A.right, A.left, Epsilon);\r
565 \r
566 return( A );\r
567}\r
568\r
569#ifdef __USE_PROTOS\r
570static Graph BuildNFA_Aoptional(Graph A)\r
571#else\r
572static Graph BuildNFA_Aoptional( A )\r
573Graph A;\r
574#endif\r
575{\r
576 Graph g;\r
577 \r
578 g.left = newNode();\r
579 g.right = newNode();\r
580 \r
581 ArcBetweenGraphNodes(g.left, A.left, Epsilon);\r
582 ArcBetweenGraphNodes(g.left, g.right, Epsilon);\r
583 ArcBetweenGraphNodes(A.right, g.right, Epsilon);\r
584 \r
585 return( g );\r
586}\r