]>
git.proxmox.com Git - mirror_edk2.git/blob - Tools/CCode/Source/Pccts/support/rexpr/rexpr.c
2 * This file contains code for
4 * int rexpr(char *expr, char *s);
8 * 1 if 's' is in the language described by the regular expression 'expr'
10 * -1 if the regular expression is invalid
12 * Language membership is determined by constructing a non-deterministic
13 * finite automata (NFA) from the regular expression. A depth-
14 * first-search is performed on the NFA (graph) to check for a match of 's'.
15 * Each non-epsilon arc consumes one character from 's'. Backtracking is
16 * performed to check all possible paths through the NFA.
18 * Regular expressions follow the meta-language:
20 * <regExpr> ::= <andExpr> ( '|' <andExpr> )*
22 * <andExpr> ::= <expr> ( <expr> )*
24 * <expr> ::= {'~'} '[' <atomList> ']' <repeatSymbol>
25 * | '(' <regExpr> ')' <repeatSymbol>
26 * | '{' <regExpr> '}' <repeatSymbol>
27 * | <atom> <repeatSymbol>
29 * <repeatSymbol> ::= { '*' | '+' }
31 * <atomList> ::= <atom> ( <atom> )*
32 * | { <atomList> } <atom> '-' <atom> { <atomList> }
34 * <atom> ::= Token[Atom]
37 * ~ means complement the set in [..]. i.e. all characters not listed
38 * * means match 0 or more times (can be on expression or atom)
39 * + means match 1 or more times (can be on expression or atom)
43 * x-y all characters from x to y (found only in [..])
44 * \xx the character with value xx
48 * match 1 or more lower-case letters (e.g. variable)
51 * match a hex number with 0x on front (e.g. 0xA1FF)
53 * [0-9]+.[0-9]+{e[0-9]+}
54 * match a floating point number (e.g. 3.14e21)
57 * if ( rexpr("[a-zA-Z][a-zA-Z0-9]+", str) ) then str is keyword
74 static int regExpr( GraphPtr g
);
75 static int andExpr( GraphPtr g
);
76 static int expr( GraphPtr g
);
77 static int repeatSymbol( GraphPtr g
);
78 static int atomList( char *p
, int complement
);
79 static void next( void );
80 static ArcPtr
newGraphArc( void );
81 static NodePtr
newNode( void );
82 static int ArcBetweenGraphNode( NodePtr i
, NodePtr j
, int label
);
83 static Graph
BuildNFA_atom( int label
);
84 static Graph
BuildNFA_AB( Graph A
, Graph B
);
85 static Graph
BuildNFA_AorB( Graph A
, Graph B
);
86 static Graph
BuildNFA_set( char *s
);
87 static Graph
BuildNFA_Astar( Graph A
);
88 static Graph
BuildNFA_Aplus( Graph A
);
89 static Graph
BuildNFA_Aoptional( Graph A
);
94 static int repeatSymbol();
95 static int atomList();
97 static ArcPtr
newGraphArc();
98 static NodePtr
newNode();
99 static int ArcBetweenGraphNode();
100 static Graph
BuildNFA_atom();
101 static Graph
BuildNFA_AB();
102 static Graph
BuildNFA_AorB();
103 static Graph
BuildNFA_set();
104 static Graph
BuildNFA_Astar();
105 static Graph
BuildNFA_Aplus();
106 static Graph
BuildNFA_Aoptional();
110 static int token
, tokchar
;
111 static NodePtr accept
;
112 static NodePtr freelist
= NULL
;
115 * return 1 if s in language described by expr
117 * -1 if expr is an invalid regular expression
120 static int rexpr(char *expr
,char *s
)
122 static int rexpr(expr
, s
)
130 fprintf(stderr
, "rexpr(%s,%s);\n", expr
,s
);
134 if ( regExpr(&nfa
) == -1 ) return -1;
136 result
= match(nfa
.left
, s
);
137 /* free all your memory */
139 while ( p
!=NULL
) { q
= p
->track
; free(p
); p
= q
; }
144 * do a depth-first-search on the NFA looking for a path from start to
145 * accept state labelled with the characters of 's'.
149 static int match(NodePtr automaton
,char *s
)
151 static int match(automaton
, s
)
158 if ( automaton
== accept
&& *s
== '\0' ) return 1; /* match */
160 for (p
=automaton
->arcs
; p
!=NULL
; p
=p
->next
) /* try all arcs */
162 if ( p
->label
== Epsilon
)
164 if ( match(p
->target
, s
) ) return 1;
166 else if ( p
->label
== *s
)
167 if ( match(p
->target
, s
+1) ) return 1;
173 * <regExpr> ::= <andExpr> ( '|' {<andExpr>} )*
175 * Return -1 if syntax error
176 * Return 0 if none found
177 * Return 1 if a regExrp was found
181 static int regExpr(GraphPtr g
)
183 static int regExpr(g
)
189 if ( andExpr(&g1
) == -1 )
194 while ( token
== '|' )
199 if ( a
== -1 ) return -1; /* syntax error below */
200 else if ( !a
) return 1; /* empty alternative */
201 g1
= BuildNFA_AorB(g1
, g2
);
204 if ( token
!='\0' ) return -1;
211 * <andExpr> ::= <expr> ( <expr> )*
215 static int andExpr(GraphPtr g
)
217 static int andExpr(g
)
223 if ( expr(&g1
) == -1 )
228 while ( token
==Atom
|| token
=='{' || token
=='(' || token
=='~' || token
=='[' )
230 if (expr(&g2
) == -1) return -1;
231 g1
= BuildNFA_AB(g1
, g2
);
239 * <expr> ::= {'~'} '[' <atomList> ']' <repeatSymbol>
240 * | '(' <regExpr> ')' <repeatSymbol>
241 * | '{' <regExpr> '}' <repeatSymbol>
242 * | <atom> <repeatSymbol>
246 static int expr(GraphPtr g
)
253 char s
[257]; /* alloc space for string of char in [] */
255 if ( token
== '~' || token
== '[' )
257 if ( token
== '~' ) {complement
= 1; next();}
258 if ( token
!= '[' ) return -1;
260 if ( atomList( s
, complement
) == -1 ) return -1;
261 *g
= BuildNFA_set( s
);
262 if ( token
!= ']' ) return -1;
270 if ( regExpr( g
) == -1 ) return -1;
271 if ( token
!= ')' ) return -1;
279 if ( regExpr( g
) == -1 ) return -1;
280 if ( token
!= '}' ) return -1;
282 /* S p e c i a l C a s e O p t i o n a l { } */
283 if ( token
!= '*' && token
!= '+' )
285 *g
= BuildNFA_Aoptional( *g
);
292 *g
= BuildNFA_atom( tokchar
);
302 * <repeatSymbol> ::= { '*' | '+' }
305 static int repeatSymbol(GraphPtr g
)
307 static int repeatSymbol(g
)
313 case '*' : *g
= BuildNFA_Astar( *g
); next(); break;
314 case '+' : *g
= BuildNFA_Aplus( *g
); next(); break;
320 * <atomList> ::= <atom> { <atom> }*
321 * { <atomList> } <atom> '-' <atom> { <atomList> }
328 static int atomList(char *p
, int complement
)
330 static int atomList(p
, complement
)
335 static unsigned char set
[256]; /* no duplicates */
339 if ( token
!= Atom
) return -1;
341 for (i
=0; i
<256; i
++) set
[i
] = 0;
342 while ( token
== Atom
)
344 if ( !set
[tokchar
] ) *s
++ = tokchar
;
345 set
[tokchar
] = 1; /* Add atom to set */
347 if ( token
== '-' ) /* have we found '-' */
349 first
= *(s
-1); /* Get last char */
351 if ( token
!= Atom
) return -1;
356 for (i
= first
+1; i
<= last
; i
++)
358 if ( !set
[tokchar
] ) *s
++ = i
;
359 set
[i
] = 1; /* Add atom to set */
367 for (i
=0; i
<256; i
++) set
[i
] = !set
[i
];
368 for (i
=1,s
=p
; i
<256; i
++) if ( set
[i
] ) *s
++ = i
;
374 /* a somewhat stupid lexical analyzer */
377 static void next(void)
382 while ( *_c
==' ' || *_c
=='\t' || *_c
=='\n' ) _c
++;
389 while ( isdigit(*_c
) )
391 n
= n
*10 + (*_c
++ - '0');
400 case 'n' : tokchar
= '\n'; break;
401 case 't' : tokchar
= '\t'; break;
402 case 'r' : tokchar
= '\r'; break;
403 default : tokchar
= *_c
;
409 else if ( isgraph(*_c
) && *_c
!='[' && *_c
!='(' && *_c
!='{' &&
410 *_c
!='-' && *_c
!='}' && *_c
!=')' && *_c
!=']' &&
411 *_c
!='+' && *_c
!='*' && *_c
!='~' && *_c
!='|' )
418 token
= tokchar
= *_c
++;
422 /* N F A B u i l d i n g R o u t i n e s */
425 static ArcPtr
newGraphArc(void)
427 static ArcPtr
newGraphArc()
431 p
= (ArcPtr
) calloc(1, sizeof(Arc
));
432 if ( p
==NULL
) {fprintf(stderr
,"rexpr: out of memory\n"); exit(-1);}
433 if ( freelist
!= NULL
) p
->track
= (ArcPtr
) freelist
;
434 freelist
= (NodePtr
) p
;
439 static NodePtr
newNode(void)
441 static NodePtr
newNode()
445 p
= (NodePtr
) calloc(1, sizeof(Node
));
446 if ( p
==NULL
) {fprintf(stderr
,"rexpr: out of memory\n"); exit(-1);}
447 if ( freelist
!= NULL
) p
->track
= freelist
;
453 static void ArcBetweenGraphNodes(NodePtr i
,NodePtr j
,int label
)
455 static void ArcBetweenGraphNodes(i
, j
, label
)
463 if ( i
->arcs
== NULL
) i
->arctail
= i
->arcs
= a
;
464 else {(i
->arctail
)->next
= a
; i
->arctail
= a
;}
470 static Graph
BuildNFA_atom(int label
)
472 static Graph
BuildNFA_atom(label
)
480 ArcBetweenGraphNodes(g
.left
, g
.right
, label
);
485 static Graph
BuildNFA_AB(Graph A
,Graph B
)
487 static Graph
BuildNFA_AB(A
, B
)
493 ArcBetweenGraphNodes(A
.right
, B
.left
, Epsilon
);
500 static Graph
BuildNFA_AorB(Graph A
,Graph B
)
502 static Graph
BuildNFA_AorB(A
, B
)
509 ArcBetweenGraphNodes(g
.left
, A
.left
, Epsilon
);
510 ArcBetweenGraphNodes(g
.left
, B
.left
, Epsilon
);
512 ArcBetweenGraphNodes(A
.right
, g
.right
, Epsilon
);
513 ArcBetweenGraphNodes(B
.right
, g
.right
, Epsilon
);
518 static Graph
BuildNFA_set(char *s
)
520 static Graph
BuildNFA_set( s
)
526 if ( s
== NULL
) return g
;
532 ArcBetweenGraphNodes(g
.left
, g
.right
, *s
++);
538 static Graph
BuildNFA_Astar(Graph A
)
540 static Graph
BuildNFA_Astar( A
)
549 ArcBetweenGraphNodes(g
.left
, A
.left
, Epsilon
);
550 ArcBetweenGraphNodes(g
.left
, g
.right
, Epsilon
);
551 ArcBetweenGraphNodes(A
.right
, g
.right
, Epsilon
);
552 ArcBetweenGraphNodes(A
.right
, A
.left
, Epsilon
);
558 static Graph
BuildNFA_Aplus(Graph A
)
560 static Graph
BuildNFA_Aplus( A
)
564 ArcBetweenGraphNodes(A
.right
, A
.left
, Epsilon
);
570 static Graph
BuildNFA_Aoptional(Graph A
)
572 static Graph
BuildNFA_Aoptional( A
)
581 ArcBetweenGraphNodes(g
.left
, A
.left
, Epsilon
);
582 ArcBetweenGraphNodes(g
.left
, g
.right
, Epsilon
);
583 ArcBetweenGraphNodes(A
.right
, g
.right
, Epsilon
);