+++ /dev/null
-"""Parse a Python module and describe its classes and methods.\r
-\r
-Parse enough of a Python file to recognize imports and class and\r
-method definitions, and to find out the superclasses of a class.\r
-\r
-The interface consists of a single function:\r
- readmodule_ex(module [, path])\r
-where module is the name of a Python module, and path is an optional\r
-list of directories where the module is to be searched. If present,\r
-path is prepended to the system search path sys.path. The return\r
-value is a dictionary. The keys of the dictionary are the names of\r
-the classes defined in the module (including classes that are defined\r
-via the from XXX import YYY construct). The values are class\r
-instances of the class Class defined here. One special key/value pair\r
-is present for packages: the key '__path__' has a list as its value\r
-which contains the package search path.\r
-\r
-A class is described by the class Class in this module. Instances\r
-of this class have the following instance variables:\r
- module -- the module name\r
- name -- the name of the class\r
- super -- a list of super classes (Class instances)\r
- methods -- a dictionary of methods\r
- file -- the file in which the class was defined\r
- lineno -- the line in the file on which the class statement occurred\r
-The dictionary of methods uses the method names as keys and the line\r
-numbers on which the method was defined as values.\r
-If the name of a super class is not recognized, the corresponding\r
-entry in the list of super classes is not a class instance but a\r
-string giving the name of the super class. Since import statements\r
-are recognized and imported modules are scanned as well, this\r
-shouldn't happen often.\r
-\r
-A function is described by the class Function in this module.\r
-Instances of this class have the following instance variables:\r
- module -- the module name\r
- name -- the name of the class\r
- file -- the file in which the class was defined\r
- lineno -- the line in the file on which the class statement occurred\r
-"""\r
-\r
-import sys\r
-import imp\r
-import tokenize\r
-from token import NAME, DEDENT, OP\r
-from operator import itemgetter\r
-\r
-__all__ = ["readmodule", "readmodule_ex", "Class", "Function"]\r
-\r
-_modules = {} # cache of modules we've seen\r
-\r
-# each Python class is represented by an instance of this class\r
-class Class:\r
- '''Class to represent a Python class.'''\r
- def __init__(self, module, name, super, file, lineno):\r
- self.module = module\r
- self.name = name\r
- if super is None:\r
- super = []\r
- self.super = super\r
- self.methods = {}\r
- self.file = file\r
- self.lineno = lineno\r
-\r
- def _addmethod(self, name, lineno):\r
- self.methods[name] = lineno\r
-\r
-class Function:\r
- '''Class to represent a top-level Python function'''\r
- def __init__(self, module, name, file, lineno):\r
- self.module = module\r
- self.name = name\r
- self.file = file\r
- self.lineno = lineno\r
-\r
-def readmodule(module, path=None):\r
- '''Backwards compatible interface.\r
-\r
- Call readmodule_ex() and then only keep Class objects from the\r
- resulting dictionary.'''\r
-\r
- res = {}\r
- for key, value in _readmodule(module, path or []).items():\r
- if isinstance(value, Class):\r
- res[key] = value\r
- return res\r
-\r
-def readmodule_ex(module, path=None):\r
- '''Read a module file and return a dictionary of classes.\r
-\r
- Search for MODULE in PATH and sys.path, read and parse the\r
- module and return a dictionary with one entry for each class\r
- found in the module.\r
- '''\r
- return _readmodule(module, path or [])\r
-\r
-def _readmodule(module, path, inpackage=None):\r
- '''Do the hard work for readmodule[_ex].\r
-\r
- If INPACKAGE is given, it must be the dotted name of the package in\r
- which we are searching for a submodule, and then PATH must be the\r
- package search path; otherwise, we are searching for a top-level\r
- module, and PATH is combined with sys.path.\r
- '''\r
- # Compute the full module name (prepending inpackage if set)\r
- if inpackage is not None:\r
- fullmodule = "%s.%s" % (inpackage, module)\r
- else:\r
- fullmodule = module\r
-\r
- # Check in the cache\r
- if fullmodule in _modules:\r
- return _modules[fullmodule]\r
-\r
- # Initialize the dict for this module's contents\r
- dict = {}\r
-\r
- # Check if it is a built-in module; we don't do much for these\r
- if module in sys.builtin_module_names and inpackage is None:\r
- _modules[module] = dict\r
- return dict\r
-\r
- # Check for a dotted module name\r
- i = module.rfind('.')\r
- if i >= 0:\r
- package = module[:i]\r
- submodule = module[i+1:]\r
- parent = _readmodule(package, path, inpackage)\r
- if inpackage is not None:\r
- package = "%s.%s" % (inpackage, package)\r
- return _readmodule(submodule, parent['__path__'], package)\r
-\r
- # Search the path for the module\r
- f = None\r
- if inpackage is not None:\r
- f, fname, (_s, _m, ty) = imp.find_module(module, path)\r
- else:\r
- f, fname, (_s, _m, ty) = imp.find_module(module, path + sys.path)\r
- if ty == imp.PKG_DIRECTORY:\r
- dict['__path__'] = [fname]\r
- path = [fname] + path\r
- f, fname, (_s, _m, ty) = imp.find_module('__init__', [fname])\r
- _modules[fullmodule] = dict\r
- if ty != imp.PY_SOURCE:\r
- # not Python source, can't do anything with this module\r
- f.close()\r
- return dict\r
-\r
- stack = [] # stack of (class, indent) pairs\r
-\r
- g = tokenize.generate_tokens(f.readline)\r
- try:\r
- for tokentype, token, start, _end, _line in g:\r
- if tokentype == DEDENT:\r
- lineno, thisindent = start\r
- # close nested classes and defs\r
- while stack and stack[-1][1] >= thisindent:\r
- del stack[-1]\r
- elif token == 'def':\r
- lineno, thisindent = start\r
- # close previous nested classes and defs\r
- while stack and stack[-1][1] >= thisindent:\r
- del stack[-1]\r
- tokentype, meth_name, start = g.next()[0:3]\r
- if tokentype != NAME:\r
- continue # Syntax error\r
- if stack:\r
- cur_class = stack[-1][0]\r
- if isinstance(cur_class, Class):\r
- # it's a method\r
- cur_class._addmethod(meth_name, lineno)\r
- # else it's a nested def\r
- else:\r
- # it's a function\r
- dict[meth_name] = Function(fullmodule, meth_name,\r
- fname, lineno)\r
- stack.append((None, thisindent)) # Marker for nested fns\r
- elif token == 'class':\r
- lineno, thisindent = start\r
- # close previous nested classes and defs\r
- while stack and stack[-1][1] >= thisindent:\r
- del stack[-1]\r
- tokentype, class_name, start = g.next()[0:3]\r
- if tokentype != NAME:\r
- continue # Syntax error\r
- # parse what follows the class name\r
- tokentype, token, start = g.next()[0:3]\r
- inherit = None\r
- if token == '(':\r
- names = [] # List of superclasses\r
- # there's a list of superclasses\r
- level = 1\r
- super = [] # Tokens making up current superclass\r
- while True:\r
- tokentype, token, start = g.next()[0:3]\r
- if token in (')', ',') and level == 1:\r
- n = "".join(super)\r
- if n in dict:\r
- # we know this super class\r
- n = dict[n]\r
- else:\r
- c = n.split('.')\r
- if len(c) > 1:\r
- # super class is of the form\r
- # module.class: look in module for\r
- # class\r
- m = c[-2]\r
- c = c[-1]\r
- if m in _modules:\r
- d = _modules[m]\r
- if c in d:\r
- n = d[c]\r
- names.append(n)\r
- super = []\r
- if token == '(':\r
- level += 1\r
- elif token == ')':\r
- level -= 1\r
- if level == 0:\r
- break\r
- elif token == ',' and level == 1:\r
- pass\r
- # only use NAME and OP (== dot) tokens for type name\r
- elif tokentype in (NAME, OP) and level == 1:\r
- super.append(token)\r
- # expressions in the base list are not supported\r
- inherit = names\r
- cur_class = Class(fullmodule, class_name, inherit,\r
- fname, lineno)\r
- if not stack:\r
- dict[class_name] = cur_class\r
- stack.append((cur_class, thisindent))\r
- elif token == 'import' and start[1] == 0:\r
- modules = _getnamelist(g)\r
- for mod, _mod2 in modules:\r
- try:\r
- # Recursively read the imported module\r
- if inpackage is None:\r
- _readmodule(mod, path)\r
- else:\r
- try:\r
- _readmodule(mod, path, inpackage)\r
- except ImportError:\r
- _readmodule(mod, [])\r
- except:\r
- # If we can't find or parse the imported module,\r
- # too bad -- don't die here.\r
- pass\r
- elif token == 'from' and start[1] == 0:\r
- mod, token = _getname(g)\r
- if not mod or token != "import":\r
- continue\r
- names = _getnamelist(g)\r
- try:\r
- # Recursively read the imported module\r
- d = _readmodule(mod, path, inpackage)\r
- except:\r
- # If we can't find or parse the imported module,\r
- # too bad -- don't die here.\r
- continue\r
- # add any classes that were defined in the imported module\r
- # to our name space if they were mentioned in the list\r
- for n, n2 in names:\r
- if n in d:\r
- dict[n2 or n] = d[n]\r
- elif n == '*':\r
- # don't add names that start with _\r
- for n in d:\r
- if n[0] != '_':\r
- dict[n] = d[n]\r
- except StopIteration:\r
- pass\r
-\r
- f.close()\r
- return dict\r
-\r
-def _getnamelist(g):\r
- # Helper to get a comma-separated list of dotted names plus 'as'\r
- # clauses. Return a list of pairs (name, name2) where name2 is\r
- # the 'as' name, or None if there is no 'as' clause.\r
- names = []\r
- while True:\r
- name, token = _getname(g)\r
- if not name:\r
- break\r
- if token == 'as':\r
- name2, token = _getname(g)\r
- else:\r
- name2 = None\r
- names.append((name, name2))\r
- while token != "," and "\n" not in token:\r
- token = g.next()[1]\r
- if token != ",":\r
- break\r
- return names\r
-\r
-def _getname(g):\r
- # Helper to get a dotted name, return a pair (name, token) where\r
- # name is the dotted name, or None if there was no dotted name,\r
- # and token is the next input token.\r
- parts = []\r
- tokentype, token = g.next()[0:2]\r
- if tokentype != NAME and token != '*':\r
- return (None, token)\r
- parts.append(token)\r
- while True:\r
- tokentype, token = g.next()[0:2]\r
- if token != '.':\r
- break\r
- tokentype, token = g.next()[0:2]\r
- if tokentype != NAME:\r
- break\r
- parts.append(token)\r
- return (".".join(parts), token)\r
-\r
-def _main():\r
- # Main program for testing.\r
- import os\r
- mod = sys.argv[1]\r
- if os.path.exists(mod):\r
- path = [os.path.dirname(mod)]\r
- mod = os.path.basename(mod)\r
- if mod.lower().endswith(".py"):\r
- mod = mod[:-3]\r
- else:\r
- path = []\r
- dict = readmodule_ex(mod, path)\r
- objs = dict.values()\r
- objs.sort(lambda a, b: cmp(getattr(a, 'lineno', 0),\r
- getattr(b, 'lineno', 0)))\r
- for obj in objs:\r
- if isinstance(obj, Class):\r
- print "class", obj.name, obj.super, obj.lineno\r
- methods = sorted(obj.methods.iteritems(), key=itemgetter(1))\r
- for name, lineno in methods:\r
- if name != "__path__":\r
- print " def", name, lineno\r
- elif isinstance(obj, Function):\r
- print "def", obj.name, obj.lineno\r
-\r
-if __name__ == "__main__":\r
- _main()\r