3 Tools for scanning header files in search of function prototypes.
5 Often, the function prototypes in header files contain enough information
6 to automatically generate (or reverse-engineer) interface specifications
7 from them. The conventions used are very vendor specific, but once you've
8 figured out what they are they are often a great help, and it sure beats
9 manually entering the interface specifications. (These are needed to generate
10 the glue used to access the functions from Python.)
12 In order to make this class useful, almost every component can be overridden.
13 The defaults are (currently) tuned to scanning Apple Macintosh header files,
14 although most Mac specific details are contained in header-specific subclasses.
28 from bgenlocations
import CREATOR
, INCLUDEDIR
31 INCLUDEDIR
= os
.curdir
33 Error
= "scantools.Error"
35 BEGINHTMLREPORT
="""<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
38 <style type="text/css">
40 .commentstripping { color: grey; text-decoration: line-through }
41 .comment { text-decoration: line-through }
42 .notcomment { color: black }
43 .incomplete { color: maroon }
44 .constant { color: green }
45 .pyconstant { background-color: yellow }
46 .blconstant { background-color: yellow; color: red }
47 .declaration { color: blue }
48 .pydeclaration { background-color: yellow }
49 .type { font-style: italic }
50 .name { font-weight: bold }
51 .value { font-style: italic }
52 .arglist { text-decoration: underline }
53 .blacklisted { background-color: yellow; color: red }
55 <title>Bgen scan report</title>
58 <h1>Bgen scan report</h1>
60 <p>This scan report is intended to help you debug the regular expressions
61 used by the bgen scanner. It consists of the original ".h" header file(s)
62 marked up to show you what the regular expressions in the bgen parser matched
63 for each line. NOTE: comments in the original source files may or may not be
65 <p>The typographic conventions of this file are as follows:</p>
67 <dt>comment stripping</dt>
68 <dd><pre><span class="commentstripping"><span class="notcomment">comment stripping is </span><span class="comment">/* marked up */</span><span class="notcomment"> and the line is repeated if needed</span></span></pre>
69 <p>If anything here does not appear to happen correctly look at
70 <tt>comment1_pat</tt> and <tt>comment2_pat</tt>.</p>
72 <dt>constant definitions</dt>
73 <dd><pre><span class="constant">#define <span class="name">name</span> <span class="value">value</span></pre>
74 <p>Highlights name and value of the constant. Governed by <tt>sym_pat</tt>.</p>
76 <dt>function declaration</dt>
77 <dd><pre><span class="declaration"><span class="type">char *</span><span class="name">rindex</span><span class="arglist">(<span class="type">const char *</span><span class="name">s</span>, <span class="type">int </span><span class="name">c</span>)</span>;</span></pre>
78 <p>Highlights type, name and argument list. <tt>type_pat</tt>,
79 <tt>name_pat</tt> and <tt>args_pat</tt> are combined into <tt>whole_pat</tt>, which
80 is what is used here.</p></dd>
82 <dt>incomplete match for function declaration</dt>
83 <dd><pre><span class="incomplete"><span class="type">char *</span>foo;</span></pre>
84 <p>The beginning of this looked promising, but it did not match a function declaration.
85 In other words, it matched <tt>head_pat</tt> but not <tt>whole_pat</tt>. If the next
86 declaration has also been gobbled up you need to look at <tt>end_pat</tt>.</p>
88 <dt>unrecognized input</dt>
89 <dd><pre><span class="unmatched">#include "type.h"</span></pre>
90 <p>If there are function declarations the scanner has missed (i.e. things
91 are in this class but you want them to be declarations) you need to adapt
97 <span class="unmatched">
99 ENDHTMLREPORT
="""</span>
107 # Set to 1 in subclass to debug your scanner patterns.
110 def __init__(self
, input = None, output
= None, defsoutput
= None):
112 self
.initblacklists()
113 self
.initrepairinstructions()
117 self
.compilepatterns()
118 self
.initosspecifics()
121 self
.setoutput(output
, defsoutput
)
125 def initusedtypes(self
):
128 def typeused(self
, type, mode
):
129 if not self
.usedtypes
.has_key(type):
130 self
.usedtypes
[type] = {}
131 self
.usedtypes
[type][mode
] = None
133 def reportusedtypes(self
):
134 types
= self
.usedtypes
.keys()
137 modes
= self
.usedtypes
[type].keys()
139 self
.report("%s %s", type, " ".join(modes
))
141 def gentypetest(self
, file):
143 fp
.write("types=[\n")
144 types
= self
.usedtypes
.keys()
147 fp
.write("\t'%s',\n"%type)
149 fp
.write("""missing=0
154 print "** Missing type:", t
156 if missing: raise "Missing Types"
160 def initsilent(self
):
163 def error(self
, format
, *args
):
167 def report(self
, format
, *args
):
171 def writeinitialdefs(self
):
174 def initblacklists(self
):
175 self
.blacklistnames
= self
.makeblacklistnames()
176 self
.blacklisttypes
= ["unknown", "-"] + self
.makeblacklisttypes()
177 self
.greydictnames
= self
.greylist2dict(self
.makegreylist())
179 def greylist2dict(self
, list):
181 for define
, namelist
in list:
182 for name
in namelist
:
186 def makeblacklistnames(self
):
189 def makeblacklisttypes(self
):
192 def makegreylist(self
):
195 def initrepairinstructions(self
):
196 self
.repairinstructions
= self
.makerepairinstructions()
197 self
.inherentpointertypes
= self
.makeinherentpointertypes()
199 def makerepairinstructions(self
):
200 """Parse the repair file into repair instructions.
202 The file format is simple:
203 1) use \ to split a long logical line in multiple physical lines
204 2) everything after the first # on a line is ignored (as comment)
205 3) empty lines are ignored
206 4) remaining lines must have exactly 3 colon-separated fields:
207 functionpattern : argumentspattern : argumentsreplacement
208 5) all patterns use shell style pattern matching
209 6) an empty functionpattern means the same as *
210 7) the other two fields are each comma-separated lists of triples
211 8) a triple is a space-separated list of 1-3 words
212 9) a triple with less than 3 words is padded at the end with "*" words
213 10) when used as a pattern, a triple matches the type, name, and mode
214 of an argument, respectively
215 11) when used as a replacement, the words of a triple specify
216 replacements for the corresponding words of the argument,
217 with "*" as a word by itself meaning leave the original word
218 (no other uses of "*" is allowed)
219 12) the replacement need not have the same number of triples
222 f
= self
.openrepairfile()
224 print "Reading repair file", repr(f
.name
), "..."
232 while line
[-2:] == '\\\n':
233 line
= line
[:-2] + ' ' + f
.readline()
236 if i
>= 0: line
= line
[:i
]
237 words
= [s
.strip() for s
in line
.split(':')]
238 if words
== ['']: continue
240 print "Line", startlineno
,
241 print ": bad line (not 3 colon-separated fields)"
244 [fpat
, pat
, rep
] = words
245 if not fpat
: fpat
= "*"
247 print "Line", startlineno
,
248 print "Empty pattern"
251 patparts
= [s
.strip() for s
in pat
.split(',')]
252 repparts
= [s
.strip() for s
in rep
.split(',')]
256 print "Line", startlineno
,
257 print "Empty pattern part"
262 print "Line", startlineno
,
263 print "Pattern part has > 3 words"
265 pattern
= pattern
[:3]
267 while len(pattern
) < 3:
269 patterns
.append(pattern
)
273 print "Line", startlineno
,
274 print "Empty replacement part"
277 replacement
= p
.split()
278 if len(replacement
) > 3:
279 print "Line", startlineno
,
280 print "Pattern part has > 3 words"
282 replacement
= replacement
[:3]
284 while len(replacement
) < 3:
285 replacement
.append("*")
286 replacements
.append(replacement
)
287 list.append((fpat
, patterns
, replacements
))
290 def makeinherentpointertypes(self
):
293 def openrepairfile(self
, filename
= "REPAIR"):
295 return open(filename
, "rU")
297 print repr(filename
), ":", msg
298 print "Cannot open repair file -- assume no repair needed"
306 self
.specfile
= sys
.stdout
308 self
.scanfile
= sys
.stdin
314 self
.includepath
= [os
.curdir
, INCLUDEDIR
]
316 def initpatterns(self
):
317 self
.head_pat
= r
"^EXTERN_API[^_]"
318 self
.tail_pat
= r
"[;={}]"
319 self
.type_pat
= r
"EXTERN_API" + \
320 r
"[ \t\n]*\([ \t\n]*" + \
321 r
"(?P<type>[a-zA-Z0-9_* \t]*[a-zA-Z0-9_*])" + \
322 r
"[ \t\n]*\)[ \t\n]*"
323 self
.name_pat
= r
"(?P<name>[a-zA-Z0-9_]+)[ \t\n]*"
324 self
.args_pat
= r
"\((?P<args>([^\(;=\)]+|\([^\(;=\)]*\))*)\)"
325 self
.whole_pat
= self
.type_pat
+ self
.name_pat
+ self
.args_pat
326 self
.sym_pat
= r
"^[ \t]*(?P<name>[a-zA-Z0-9_]+)[ \t]*=" + \
327 r
"[ \t]*(?P<defn>[-0-9_a-zA-Z'\"\
(][^
\t\n,;}]*),?
"
328 self.asplit_pat = r"^
(?P
<type>.*[^a
-zA
-Z0
-9_])(?P
<name
>[a
-zA
-Z0
-9_]+)(?P
<array
>\
[\
])?$
"
329 self.comment1_pat = r"(?P
<rest
>.*)//.*"
330 # note that the next pattern only removes comments that are wholly within one line
331 self.comment2_pat = r"(?P
<rest1
>.*)/\
*.*\
*/(?P
<rest2
>.*)"
333 def compilepatterns(self):
334 for name in dir(self):
335 if name[-4:] == "_pat
":
336 pat = getattr(self, name)
337 prog = re.compile(pat)
338 setattr(self, name[:-4], prog)
340 def initosspecifics(self):
341 if MacOS and CREATOR:
342 self.filetype = 'TEXT'
343 self.filecreator = CREATOR
345 self.filetype = self.filecreator = None
347 def setfiletype(self, filename):
348 if MacOS and (self.filecreator or self.filetype):
349 creator, type = MacOS.GetCreatorAndType(filename)
350 if self.filecreator: creator = self.filecreator
351 if self.filetype: type = self.filetype
352 MacOS.SetCreatorAndType(filename, creator, type)
357 def closefiles(self):
364 tmp = self.specmine and self.specfile
369 tmp = self.defsmine and self.defsfile
374 tmp = self.scanmine and self.scanfile
379 if self.htmlfile: self.htmlfile.write(ENDHTMLREPORT)
380 tmp = self.htmlmine and self.htmlfile
384 def setoutput(self, spec, defs = None):
388 if type(spec) == StringType:
389 file = self.openoutput(spec)
397 if type(defs) == StringType:
398 file = self.openoutput(defs)
406 def sethtmloutput(self, htmlfile):
409 if type(htmlfile) == StringType:
410 file = self.openoutput(htmlfile)
417 self.htmlfile.write(BEGINHTMLREPORT)
419 def openoutput(self, filename):
421 file = open(filename, 'w')
423 raise IOError, (filename, arg)
424 self.setfiletype(filename)
427 def setinput(self, scan = sys.stdin):
428 if not type(scan) in (TupleType, ListType):
430 self.allscaninputs = scan
433 def _nextinput(self):
434 if not self.allscaninputs:
436 scan = self.allscaninputs[0]
437 self.allscaninputs = self.allscaninputs[1:]
440 if type(scan) == StringType:
441 file = self.openinput(scan)
451 def openinput(self, filename):
452 if not os.path.isabs(filename):
453 for dir in self.includepath:
454 fullname = os.path.join(dir, filename)
455 #self.report("trying full name
%r", fullname)
457 return open(fullname, 'rU')
460 # If not on the path, or absolute, try default open()
462 return open(filename, 'rU')
464 raise IOError, (arg, filename)
467 if not self.scanfile:
468 raise Error, "input file not set"
469 self.line = self.scanfile.readline()
471 if self._nextinput():
472 return self.getline()
474 self.lineno = self.lineno + 1
478 if not self.scanfile:
479 self.error("No
input file has been specified
")
481 inputname = self.scanfile.name
482 self.report("scanfile
= %r", inputname)
483 if not self.specfile:
484 self.report("(No interface specifications will be written
)")
486 self.report("specfile
= %r", self.specfile.name)
487 self.specfile.write("# Generated from %r\n\n" % (inputname,))
488 if not self
.defsfile
:
489 self
.report("(No symbol definitions will be written)")
491 self
.report("defsfile = %r", (self
.defsfile
.name
,))
492 self
.defsfile
.write("# Generated from %r\n\n" % (os
.path
.split(inputname
)[1],))
493 self
.writeinitialdefs()
494 self
.alreadydone
= []
497 try: line
= self
.getline()
498 except EOFError: break
500 self
.report("LINE: %r" % (line
,))
501 match
= self
.comment1
.match(line
)
503 self
.htmlreport(line
, klass
='commentstripping', ranges
=[(
504 match
.start('rest'), match
.end('rest'), 'notcomment')])
505 line
= match
.group('rest')
507 self
.report("\tafter comment1: %r" % (line
,))
508 match
= self
.comment2
.match(line
)
511 self
.htmlreport(line
, klass
='commentstripping', ranges
=[
512 (match
.start('rest1'), match
.end('rest1'), 'notcomment'),
513 (match
.start('rest2'), match
.end('rest2'), 'notcomment')])
514 line
= match
.group('rest1')+match
.group('rest2')
516 self
.report("\tafter comment2: %r" % (line
,))
517 match
= self
.comment2
.match(line
)
519 match
= self
.sym
.match(line
)
522 self
.report("\tmatches sym.")
523 self
.dosymdef(match
, line
)
525 match
= self
.head
.match(line
)
528 self
.report("\tmatches head.")
531 self
.htmlreport(line
, klass
='unmatched')
533 self
.error("Uncaught EOF error")
534 self
.reportusedtypes()
536 def dosymdef(self
, match
, line
):
537 name
, defn
= match
.group('name', 'defn')
538 self
.htmlreport(line
, klass
='constant', ranges
=[
539 (match
.start('name'), match
.end('name'), 'name'),
540 (match
.start('defn'), match
.end('defn'), 'value')])
541 defn
= escape8bit(defn
)
543 self
.report("\tsym: name=%r, defn=%r" % (name
, defn
))
544 if not name
in self
.blacklistnames
:
545 oline
= "%s = %s\n" % (name
, defn
)
546 self
.defsfile
.write(oline
)
547 self
.htmlreport(oline
, klass
="pyconstant")
549 self
.defsfile
.write("# %s = %s\n" % (name
, defn
))
550 self
.htmlreport("** no output: name is blacklisted", klass
="blconstant")
551 # XXXX No way to handle greylisted names
553 def dofuncspec(self
):
555 while not self
.tail
.search(raw
):
556 line
= self
.getline()
558 self
.report("* CONTINUATION LINE: %r" % (line
,))
559 match
= self
.comment1
.match(line
)
561 line
= match
.group('rest')
563 self
.report("\tafter comment1: %r" % (line
,))
564 match
= self
.comment2
.match(line
)
566 line
= match
.group('rest1')+match
.group('rest2')
568 self
.report("\tafter comment1: %r" % (line
,))
569 match
= self
.comment2
.match(line
)
572 self
.report("* WHOLE LINE: %r" % (raw
,))
573 self
.processrawspec(raw
)
576 def processrawspec(self
, raw
):
577 match
= self
.whole
.search(raw
)
579 self
.report("Bad raw spec: %r", raw
)
581 match
= self
.type.search(raw
)
583 self
.report("(Type already doesn't match)")
584 self
.htmlreport(raw
, klass
='incomplete', ranges
=[(
585 match
.start('type'), match
.end('type'), 'type')])
587 self
.report("(but type matched)")
588 self
.htmlreport(raw
, klass
='incomplete')
590 type, name
, args
= match
.group('type', 'name', 'args')
592 (match
.start('type'), match
.end('type'), 'type'),
593 (match
.start('name'), match
.end('name'), 'name'),
594 (match
.start('args'), match
.end('args'), 'arglist')]
595 self
.htmlreport(raw
, klass
='declaration', ranges
=ranges
)
596 modifiers
= self
.getmodifiers(match
)
597 type = self
.pythonizename(type)
598 name
= self
.pythonizename(name
)
599 if self
.checkduplicate(name
):
600 self
.htmlreport("*** no output generated: duplicate name", klass
="blacklisted")
602 self
.report("==> %s %s <==", type, name
)
603 if self
.blacklisted(type, name
):
604 self
.htmlreport("*** no output generated: function name or return type blacklisted", klass
="blacklisted")
605 self
.report("*** %s %s blacklisted", type, name
)
607 returnlist
= [(type, name
, 'ReturnMode')]
608 returnlist
= self
.repairarglist(name
, returnlist
)
609 [(type, name
, returnmode
)] = returnlist
610 arglist
= self
.extractarglist(args
)
611 arglist
= self
.repairarglist(name
, arglist
)
612 if self
.unmanageable(type, name
, arglist
):
613 self
.htmlreport("*** no output generated: some argument blacklisted", klass
="blacklisted")
614 ##for arg in arglist:
615 ## self.report(" %r", arg)
616 self
.report("*** %s %s unmanageable", type, name
)
619 self
.generate(type, name
, arglist
, modifiers
)
621 self
.generate(type, name
, arglist
)
623 def getmodifiers(self
, match
):
626 def checkduplicate(self
, name
):
627 if name
in self
.alreadydone
:
628 self
.report("Name has already been defined: %r", name
)
630 self
.alreadydone
.append(name
)
633 def pythonizename(self
, name
):
634 name
= re
.sub("\*", " ptr", name
)
636 name
= re
.sub("[ \t]+", "_", name
)
639 def extractarglist(self
, args
):
641 if not args
or args
== "void":
643 parts
= [s
.strip() for s
in args
.split(",")]
646 arg
= self
.extractarg(part
)
650 def extractarg(self
, part
):
653 match
= self
.asplit
.match(part
)
655 self
.error("Indecipherable argument: %r", part
)
656 return ("unknown", part
, mode
)
657 type, name
, array
= match
.group('type', 'name', 'array')
659 # array matches an optional [] after the argument name
660 type = type + " ptr "
661 type = self
.pythonizename(type)
662 return self
.modifyarg(type, name
, mode
)
664 def modifyarg(self
, type, name
, mode
):
665 if type[:6] == "const_":
667 elif type[-4:] == "_ptr":
670 elif type in self
.inherentpointertypes
:
672 if type[-4:] == "_far":
674 return type, name
, mode
676 def repairarglist(self
, functionname
, arglist
):
679 while i
< len(arglist
):
680 for item
in self
.repairinstructions
:
682 pattern
, replacement
= item
685 functionpat
, pattern
, replacement
= item
686 if not fnmatch
.fnmatchcase(functionname
, functionpat
):
689 if i
+n
> len(arglist
): continue
690 current
= arglist
[i
:i
+n
]
692 if not self
.matcharg(pattern
[j
], current
[j
]):
694 else: # All items of the pattern match
695 new
= self
.substituteargs(
696 pattern
, replacement
, current
)
699 i
= i
+len(new
) # No recursive substitutions
701 else: # No patterns match
705 def matcharg(self
, patarg
, arg
):
706 return len(filter(None, map(fnmatch
.fnmatchcase
, arg
, patarg
))) == 3
708 def substituteargs(self
, pattern
, replacement
, old
):
710 for k
in range(len(replacement
)):
711 item
= replacement
[k
]
712 newitem
= [item
[0], item
[1], item
[2]]
715 newitem
[i
] = old
[k
][i
]
716 elif item
[i
][:1] == '$':
717 index
= int(item
[i
][1:]) - 1
718 newitem
[i
] = old
[index
][i
]
719 new
.append(tuple(newitem
))
720 ##self.report("old: %r", old)
721 ##self.report("new: %r", new)
724 def generate(self
, tp
, name
, arglist
, modifiers
=[]):
726 self
.typeused(tp
, 'return')
728 classname
, listname
= self
.destination(tp
, name
, arglist
, modifiers
)
730 classname
, listname
= self
.destination(tp
, name
, arglist
)
731 if not classname
or not listname
:
732 self
.htmlreport("*** no output generated: self.destination() returned None", klass
="blacklisted")
734 if not self
.specfile
:
735 self
.htmlreport("*** no output generated: no output file specified", klass
="blacklisted")
737 self
.specfile
.write("f = %s(%s, %r,\n" % (classname
, tp
, name
))
738 for atype
, aname
, amode
in arglist
:
739 self
.typeused(atype
, amode
)
740 self
.specfile
.write(" (%s, %r, %s),\n" %
741 (atype
, aname
, amode
))
742 if self
.greydictnames
.has_key(name
):
743 self
.specfile
.write(" condition=%r,\n"%(self
.greydictnames
[name
],))
744 self
.generatemodifiers(classname
, name
, modifiers
)
745 self
.specfile
.write(")\n")
746 self
.specfile
.write("%s.append(f)\n\n" % listname
)
748 oline
= "Adding to %s:\n%s(returntype=%s, name=%r" % (listname
, classname
, tp
, name
)
749 for atype
, aname
, amode
in arglist
:
750 oline
+= ",\n (%s, %r, %s)" % (atype
, aname
, amode
)
752 self
.htmlreport(oline
, klass
="pydeclaration")
754 def destination(self
, type, name
, arglist
):
755 return "FunctionGenerator", "functions"
757 def generatemodifiers(self
, classname
, name
, modifiers
):
760 def blacklisted(self
, type, name
):
761 if type in self
.blacklisttypes
:
762 ##self.report("return type %s is blacklisted", type)
764 if name
in self
.blacklistnames
:
765 ##self.report("function name %s is blacklisted", name)
769 def unmanageable(self
, type, name
, arglist
):
770 for atype
, aname
, amode
in arglist
:
771 if atype
in self
.blacklisttypes
:
772 self
.report("argument type %s is blacklisted", atype
)
776 def htmlreport(self
, line
, klass
=None, ranges
=None):
777 if not self
.htmlfile
: return
781 ranges
.insert(0, (0, len(line
), klass
))
785 for b
, e
, name
in ranges
:
787 oline
+= '<span class="%s">' % name
792 if c
== '<': oline
+= '<'
793 elif c
== '>': oline
+= '>'
795 for b
, e
, name
in ranges
:
797 oline
+= '<span class="%s">' % name
800 if not line
or line
[-1] != '\n':
802 self
.htmlfile
.write(oline
)
804 class Scanner_PreUH3(Scanner
):
805 """Scanner for Universal Headers before release 3"""
806 def initpatterns(self
):
807 Scanner
.initpatterns(self
)
808 self
.head_pat
= "^extern pascal[ \t]+" # XXX Mac specific!
809 self
.type_pat
= "pascal[ \t\n]+(?P<type>[a-zA-Z0-9_ \t]*[a-zA-Z0-9_])[ \t\n]+"
810 self
.whole_pat
= self
.type_pat
+ self
.name_pat
+ self
.args_pat
811 self
.sym_pat
= "^[ \t]*(?P<name>[a-zA-Z0-9_]+)[ \t]*=" + \
812 "[ \t]*(?P<defn>[-0-9'\"][^\t\n,;}]*),?"
814 class Scanner_OSX(Scanner
):
815 """Scanner for modern (post UH3.3) Universal Headers """
816 def initpatterns(self
):
817 Scanner
.initpatterns(self
)
818 self
.head_pat
= "^EXTERN_API(_C)?"
819 self
.type_pat
= "EXTERN_API(_C)?" + \
820 "[ \t\n]*\([ \t\n]*" + \
821 "(?P<type>[a-zA-Z0-9_* \t]*[a-zA-Z0-9_*])" + \
823 self
.whole_pat
= self
.type_pat
+ self
.name_pat
+ self
.args_pat
824 self
.sym_pat
= "^[ \t]*(?P<name>[a-zA-Z0-9_]+)[ \t]*=" + \
825 "[ \t]*(?P<defn>[-0-9_a-zA-Z'\"\(][^\t\n,;}]*),?"
827 _8bit
= re
.compile(r
"[\200-\377]")
830 if _8bit
.search(s
) is not None:
835 out
.append("\\" + hex(o
)[1:])
842 input = "D:Development:THINK C:Mac #includes:Apple #includes:AppleEvents.h"
843 output
= "@aespecs.py"
844 defsoutput
= "@aedefs.py"
845 s
= Scanner(input, output
, defsoutput
)
848 if __name__
== '__main__':