+++ /dev/null
-/*\r
- * This file contains code for\r
- *\r
- * int rexpr(char *expr, char *s);\r
- *\r
- * which answers\r
- *\r
- * 1 if 's' is in the language described by the regular expression 'expr'\r
- * 0 if it is not\r
- * -1 if the regular expression is invalid\r
- *\r
- * Language membership is determined by constructing a non-deterministic\r
- * finite automata (NFA) from the regular expression. A depth-\r
- * first-search is performed on the NFA (graph) to check for a match of 's'.\r
- * Each non-epsilon arc consumes one character from 's'. Backtracking is\r
- * performed to check all possible paths through the NFA.\r
- *\r
- * Regular expressions follow the meta-language:\r
- *\r
- * <regExpr> ::= <andExpr> ( '|' <andExpr> )*\r
- *\r
- * <andExpr> ::= <expr> ( <expr> )*\r
- *\r
- * <expr> ::= {'~'} '[' <atomList> ']' <repeatSymbol>\r
- * | '(' <regExpr> ')' <repeatSymbol>\r
- * | '{' <regExpr> '}' <repeatSymbol>\r
- * | <atom> <repeatSymbol>\r
- *\r
- * <repeatSymbol> ::= { '*' | '+' }\r
- *\r
- * <atomList> ::= <atom> ( <atom> )*\r
- * | { <atomList> } <atom> '-' <atom> { <atomList> }\r
- *\r
- * <atom> ::= Token[Atom]\r
- *\r
- * Notes:\r
- * ~ means complement the set in [..]. i.e. all characters not listed\r
- * * means match 0 or more times (can be on expression or atom)\r
- * + means match 1 or more times (can be on expression or atom)\r
- * {} optional\r
- * () grouping\r
- * [] set of atoms\r
- * x-y all characters from x to y (found only in [..])\r
- * \xx the character with value xx\r
- *\r
- * Examples:\r
- * [a-z]+\r
- * match 1 or more lower-case letters (e.g. variable)\r
- *\r
- * 0x[0-9A-Fa-f]+\r
- * match a hex number with 0x on front (e.g. 0xA1FF)\r
- *\r
- * [0-9]+.[0-9]+{e[0-9]+}\r
- * match a floating point number (e.g. 3.14e21)\r
- *\r
- * Code example:\r
- * if ( rexpr("[a-zA-Z][a-zA-Z0-9]+", str) ) then str is keyword\r
- *\r
- * Terence Parr\r
- * Purdue University\r
- * April 1991\r
- */\r
-\r
-#include <stdio.h>\r
-#include <ctype.h>\r
-#ifdef __STDC__\r
-#include <stdlib.h>\r
-#else\r
-#include <malloc.h>\r
-#endif\r
-#include "rexpr.h"\r
-\r
-#ifdef __USE_PROTOS\r
-static int regExpr( GraphPtr g );\r
-static int andExpr( GraphPtr g );\r
-static int expr( GraphPtr g );\r
-static int repeatSymbol( GraphPtr g );\r
-static int atomList( char *p, int complement );\r
-static void next( void );\r
-static ArcPtr newGraphArc( void );\r
-static NodePtr newNode( void );\r
-static int ArcBetweenGraphNode( NodePtr i, NodePtr j, int label );\r
-static Graph BuildNFA_atom( int label );\r
-static Graph BuildNFA_AB( Graph A, Graph B );\r
-static Graph BuildNFA_AorB( Graph A, Graph B );\r
-static Graph BuildNFA_set( char *s );\r
-static Graph BuildNFA_Astar( Graph A );\r
-static Graph BuildNFA_Aplus( Graph A );\r
-static Graph BuildNFA_Aoptional( Graph A );\r
-#else\r
-static int regExpr();\r
-static int andExpr();\r
-static int expr();\r
-static int repeatSymbol();\r
-static int atomList();\r
-static void next();\r
-static ArcPtr newGraphArc();\r
-static NodePtr newNode();\r
-static int ArcBetweenGraphNode();\r
-static Graph BuildNFA_atom();\r
-static Graph BuildNFA_AB();\r
-static Graph BuildNFA_AorB();\r
-static Graph BuildNFA_set();\r
-static Graph BuildNFA_Astar();\r
-static Graph BuildNFA_Aplus();\r
-static Graph BuildNFA_Aoptional();\r
-#endif\r
-\r
-static char *_c;\r
-static int token, tokchar;\r
-static NodePtr accept;\r
-static NodePtr freelist = NULL;\r
-\r
-/*\r
- * return 1 if s in language described by expr\r
- * 0 if s is not\r
- * -1 if expr is an invalid regular expression\r
- */\r
-#ifdef __USE_PROTOS\r
-static int rexpr(char *expr,char *s)\r
-#else\r
-static int rexpr(expr, s)\r
-char *expr, *s;\r
-#endif\r
-{\r
- NodePtr p,q;\r
- Graph nfa;\r
- int result;\r
-\r
- fprintf(stderr, "rexpr(%s,%s);\n", expr,s);\r
- freelist = NULL;\r
- _c = expr;\r
- next();\r
- if ( regExpr(&nfa) == -1 ) return -1;\r
- accept = nfa.right;\r
- result = match(nfa.left, s);\r
- /* free all your memory */\r
- p = q = freelist;\r
- while ( p!=NULL ) { q = p->track; free(p); p = q; }\r
- return result;\r
-}\r
-\r
-/*\r
- * do a depth-first-search on the NFA looking for a path from start to\r
- * accept state labelled with the characters of 's'.\r
- */\r
-\r
-#ifdef __USE_PROTOS\r
-static int match(NodePtr automaton,char *s)\r
-#else\r
-static int match(automaton, s)\r
-NodePtr automaton;\r
-char *s;\r
-#endif\r
-{\r
- ArcPtr p;\r
- \r
- if ( automaton == accept && *s == '\0' ) return 1; /* match */\r
-\r
- for (p=automaton->arcs; p!=NULL; p=p->next) /* try all arcs */\r
- {\r
- if ( p->label == Epsilon )\r
- {\r
- if ( match(p->target, s) ) return 1;\r
- }\r
- else if ( p->label == *s )\r
- if ( match(p->target, s+1) ) return 1;\r
- }\r
- return 0;\r
-}\r
-\r
-/*\r
- * <regExpr> ::= <andExpr> ( '|' {<andExpr>} )*\r
- *\r
- * Return -1 if syntax error\r
- * Return 0 if none found\r
- * Return 1 if a regExrp was found\r
- */\r
-\r
-#ifdef __USE_PROTOS\r
-static int regExpr(GraphPtr g)\r
-#else\r
-static int regExpr(g)\r
-GraphPtr g;\r
-#endif\r
-{\r
- Graph g1, g2;\r
- \r
- if ( andExpr(&g1) == -1 )\r
- {\r
- return -1;\r
- }\r
- \r
- while ( token == '|' )\r
- {\r
- int a;\r
- next();\r
- a = andExpr(&g2);\r
- if ( a == -1 ) return -1; /* syntax error below */\r
- else if ( !a ) return 1; /* empty alternative */\r
- g1 = BuildNFA_AorB(g1, g2);\r
- }\r
- \r
- if ( token!='\0' ) return -1;\r
-\r
- *g = g1;\r
- return 1;\r
-}\r
-\r
-/*\r
- * <andExpr> ::= <expr> ( <expr> )*\r
- */\r
-\r
-#ifdef __USE_PROTOS\r
-static int andExpr(GraphPtr g)\r
-#else\r
-static int andExpr(g)\r
-GraphPtr g;\r
-#endif\r
-{\r
- Graph g1, g2;\r
- \r
- if ( expr(&g1) == -1 )\r
- {\r
- return -1;\r
- }\r
- \r
- while ( token==Atom || token=='{' || token=='(' || token=='~' || token=='[' )\r
- {\r
- if (expr(&g2) == -1) return -1;\r
- g1 = BuildNFA_AB(g1, g2);\r
- }\r
- \r
- *g = g1;\r
- return 1;\r
-}\r
-\r
-/*\r
- * <expr> ::= {'~'} '[' <atomList> ']' <repeatSymbol>\r
- * | '(' <regExpr> ')' <repeatSymbol>\r
- * | '{' <regExpr> '}' <repeatSymbol>\r
- * | <atom> <repeatSymbol>\r
- */\r
-\r
-#ifdef __USE_PROTOS\r
-static int expr(GraphPtr g)\r
-#else\r
-static int expr(g)\r
-GraphPtr g;\r
-#endif\r
-{\r
- int complement = 0;\r
- char s[257]; /* alloc space for string of char in [] */\r
- \r
- if ( token == '~' || token == '[' )\r
- {\r
- if ( token == '~' ) {complement = 1; next();}\r
- if ( token != '[' ) return -1;\r
- next();\r
- if ( atomList( s, complement ) == -1 ) return -1;\r
- *g = BuildNFA_set( s );\r
- if ( token != ']' ) return -1;\r
- next();\r
- repeatSymbol( g );\r
- return 1;\r
- }\r
- if ( token == '(' )\r
- {\r
- next();\r
- if ( regExpr( g ) == -1 ) return -1;\r
- if ( token != ')' ) return -1;\r
- next();\r
- repeatSymbol( g );\r
- return 1;\r
- }\r
- if ( token == '{' )\r
- {\r
- next();\r
- if ( regExpr( g ) == -1 ) return -1;\r
- if ( token != '}' ) return -1;\r
- next();\r
- /* S p e c i a l C a s e O p t i o n a l { } */\r
- if ( token != '*' && token != '+' )\r
- {\r
- *g = BuildNFA_Aoptional( *g );\r
- }\r
- repeatSymbol( g );\r
- return 1;\r
- }\r
- if ( token == Atom )\r
- {\r
- *g = BuildNFA_atom( tokchar );\r
- next();\r
- repeatSymbol( g );\r
- return 1;\r
- }\r
- \r
- return -1;\r
-}\r
-\r
-/*\r
- * <repeatSymbol> ::= { '*' | '+' }\r
- */\r
-#ifdef __USE_PROTOS\r
-static int repeatSymbol(GraphPtr g)\r
-#else\r
-static int repeatSymbol(g)\r
-GraphPtr g;\r
-#endif\r
-{\r
- switch ( token )\r
- {\r
- case '*' : *g = BuildNFA_Astar( *g ); next(); break;\r
- case '+' : *g = BuildNFA_Aplus( *g ); next(); break;\r
- }\r
- return 1;\r
-}\r
-\r
-/*\r
- * <atomList> ::= <atom> { <atom> }*\r
- * { <atomList> } <atom> '-' <atom> { <atomList> }\r
- *\r
- * a-b is same as ab\r
- * q-a is same as q\r
- */\r
-\r
-#ifdef __USE_PROTOS\r
-static int atomList(char *p, int complement)\r
-#else\r
-static int atomList(p, complement)\r
-char *p;\r
-int complement;\r
-#endif\r
-{\r
- static unsigned char set[256]; /* no duplicates */\r
- int first, last, i;\r
- char *s = p;\r
- \r
- if ( token != Atom ) return -1;\r
- \r
- for (i=0; i<256; i++) set[i] = 0;\r
- while ( token == Atom )\r
- {\r
- if ( !set[tokchar] ) *s++ = tokchar;\r
- set[tokchar] = 1; /* Add atom to set */\r
- next();\r
- if ( token == '-' ) /* have we found '-' */\r
- {\r
- first = *(s-1); /* Get last char */\r
- next();\r
- if ( token != Atom ) return -1;\r
- else\r
- {\r
- last = tokchar;\r
- }\r
- for (i = first+1; i <= last; i++)\r
- {\r
- if ( !set[tokchar] ) *s++ = i;\r
- set[i] = 1; /* Add atom to set */\r
- }\r
- next();\r
- }\r
- }\r
- *s = '\0';\r
- if ( complement )\r
- {\r
- for (i=0; i<256; i++) set[i] = !set[i];\r
- for (i=1,s=p; i<256; i++) if ( set[i] ) *s++ = i;\r
- *s = '\0';\r
- }\r
- return 1;\r
-}\r
-\r
-/* a somewhat stupid lexical analyzer */\r
-\r
-#ifdef __USE_PROTOS\r
-static void next(void)\r
-#else\r
-static void next()\r
-#endif\r
-{\r
- while ( *_c==' ' || *_c=='\t' || *_c=='\n' ) _c++;\r
- if ( *_c=='\\' )\r
- {\r
- _c++;\r
- if ( isdigit(*_c) )\r
- {\r
- int n=0;\r
- while ( isdigit(*_c) )\r
- {\r
- n = n*10 + (*_c++ - '0');\r
- }\r
- if ( n>255 ) n=255;\r
- tokchar = n;\r
- }\r
- else\r
- {\r
- switch (*_c)\r
- {\r
- case 'n' : tokchar = '\n'; break;\r
- case 't' : tokchar = '\t'; break;\r
- case 'r' : tokchar = '\r'; break;\r
- default : tokchar = *_c;\r
- }\r
- _c++;\r
- }\r
- token = Atom;\r
- }\r
- else if ( isgraph(*_c) && *_c!='[' && *_c!='(' && *_c!='{' &&\r
- *_c!='-' && *_c!='}' && *_c!=')' && *_c!=']' &&\r
- *_c!='+' && *_c!='*' && *_c!='~' && *_c!='|' )\r
- {\r
- token = Atom;\r
- tokchar = *_c++;\r
- }\r
- else\r
- {\r
- token = tokchar = *_c++;\r
- }\r
-}\r
-\r
-/* N F A B u i l d i n g R o u t i n e s */\r
-\r
-#ifdef __USE_PROTOS\r
-static ArcPtr newGraphArc(void)\r
-#else\r
-static ArcPtr newGraphArc()\r
-#endif\r
-{\r
- ArcPtr p;\r
- p = (ArcPtr) calloc(1, sizeof(Arc));\r
- if ( p==NULL ) {fprintf(stderr,"rexpr: out of memory\n"); exit(-1);}\r
- if ( freelist != NULL ) p->track = (ArcPtr) freelist;\r
- freelist = (NodePtr) p;\r
- return p;\r
-}\r
-\r
-#ifdef __USE_PROTOS\r
-static NodePtr newNode(void)\r
-#else\r
-static NodePtr newNode()\r
-#endif\r
-{\r
- NodePtr p;\r
- p = (NodePtr) calloc(1, sizeof(Node));\r
- if ( p==NULL ) {fprintf(stderr,"rexpr: out of memory\n"); exit(-1);}\r
- if ( freelist != NULL ) p->track = freelist;\r
- freelist = p;\r
- return p;\r
-}\r
-\r
-#ifdef __USE_PROTOS\r
-static void ArcBetweenGraphNodes(NodePtr i,NodePtr j,int label)\r
-#else\r
-static void ArcBetweenGraphNodes(i, j, label)\r
-NodePtr i, j;\r
-int label;\r
-#endif\r
-{\r
- ArcPtr a;\r
- \r
- a = newGraphArc();\r
- if ( i->arcs == NULL ) i->arctail = i->arcs = a;\r
- else {(i->arctail)->next = a; i->arctail = a;}\r
- a->label = label;\r
- a->target = j;\r
-}\r
-\r
-#ifdef __USE_PROTOS\r
-static Graph BuildNFA_atom(int label)\r
-#else\r
-static Graph BuildNFA_atom(label)\r
-int label;\r
-#endif\r
-{\r
- Graph g;\r
- \r
- g.left = newNode();\r
- g.right = newNode();\r
- ArcBetweenGraphNodes(g.left, g.right, label);\r
- return( g );\r
-}\r
-\r
-#ifdef __USE_PROTOS\r
-static Graph BuildNFA_AB(Graph A,Graph B)\r
-#else\r
-static Graph BuildNFA_AB(A, B)\r
-Graph A, B;\r
-#endif\r
-{\r
- Graph g;\r
- \r
- ArcBetweenGraphNodes(A.right, B.left, Epsilon);\r
- g.left = A.left;\r
- g.right = B.right;\r
- return( g );\r
-}\r
-\r
-#ifdef __USE_PROTOS\r
-static Graph BuildNFA_AorB(Graph A,Graph B)\r
-#else\r
-static Graph BuildNFA_AorB(A, B)\r
-Graph A, B;\r
-#endif\r
-{\r
- Graph g;\r
- \r
- g.left = newNode();\r
- ArcBetweenGraphNodes(g.left, A.left, Epsilon);\r
- ArcBetweenGraphNodes(g.left, B.left, Epsilon);\r
- g.right = newNode();\r
- ArcBetweenGraphNodes(A.right, g.right, Epsilon);\r
- ArcBetweenGraphNodes(B.right, g.right, Epsilon);\r
- return( g );\r
-}\r
-\r
-#ifdef __USE_PROTOS\r
-static Graph BuildNFA_set(char *s)\r
-#else\r
-static Graph BuildNFA_set( s )\r
-char *s;\r
-#endif\r
-{\r
- Graph g;\r
- \r
- if ( s == NULL ) return g;\r
- \r
- g.left = newNode();\r
- g.right = newNode();\r
- while ( *s != '\0' )\r
- {\r
- ArcBetweenGraphNodes(g.left, g.right, *s++);\r
- }\r
- return g;\r
-}\r
-\r
-#ifdef __USE_PROTOS\r
-static Graph BuildNFA_Astar(Graph A)\r
-#else\r
-static Graph BuildNFA_Astar( A )\r
-Graph A;\r
-#endif\r
-{\r
- Graph g;\r
-\r
- g.left = newNode();\r
- g.right = newNode();\r
- \r
- ArcBetweenGraphNodes(g.left, A.left, Epsilon);\r
- ArcBetweenGraphNodes(g.left, g.right, Epsilon);\r
- ArcBetweenGraphNodes(A.right, g.right, Epsilon);\r
- ArcBetweenGraphNodes(A.right, A.left, Epsilon);\r
- \r
- return( g );\r
-}\r
-\r
-#ifdef __USE_PROTOS\r
-static Graph BuildNFA_Aplus(Graph A)\r
-#else\r
-static Graph BuildNFA_Aplus( A )\r
-Graph A;\r
-#endif\r
-{\r
- ArcBetweenGraphNodes(A.right, A.left, Epsilon);\r
- \r
- return( A );\r
-}\r
-\r
-#ifdef __USE_PROTOS\r
-static Graph BuildNFA_Aoptional(Graph A)\r
-#else\r
-static Graph BuildNFA_Aoptional( A )\r
-Graph A;\r
-#endif\r
-{\r
- Graph g;\r
- \r
- g.left = newNode();\r
- g.right = newNode();\r
- \r
- ArcBetweenGraphNodes(g.left, A.left, Epsilon);\r
- ArcBetweenGraphNodes(g.left, g.right, Epsilon);\r
- ArcBetweenGraphNodes(A.right, g.right, Epsilon);\r
- \r
- return( g );\r
-}\r