+++ /dev/null
-import xml.sax\r
-import xml.sax.handler\r
-import types\r
-\r
-try:\r
- _StringTypes = [types.StringType, types.UnicodeType]\r
-except AttributeError:\r
- _StringTypes = [types.StringType]\r
-\r
-START_ELEMENT = "START_ELEMENT"\r
-END_ELEMENT = "END_ELEMENT"\r
-COMMENT = "COMMENT"\r
-START_DOCUMENT = "START_DOCUMENT"\r
-END_DOCUMENT = "END_DOCUMENT"\r
-PROCESSING_INSTRUCTION = "PROCESSING_INSTRUCTION"\r
-IGNORABLE_WHITESPACE = "IGNORABLE_WHITESPACE"\r
-CHARACTERS = "CHARACTERS"\r
-\r
-class PullDOM(xml.sax.ContentHandler):\r
- _locator = None\r
- document = None\r
-\r
- def __init__(self, documentFactory=None):\r
- from xml.dom import XML_NAMESPACE\r
- self.documentFactory = documentFactory\r
- self.firstEvent = [None, None]\r
- self.lastEvent = self.firstEvent\r
- self.elementStack = []\r
- self.push = self.elementStack.append\r
- try:\r
- self.pop = self.elementStack.pop\r
- except AttributeError:\r
- # use class' pop instead\r
- pass\r
- self._ns_contexts = [{XML_NAMESPACE:'xml'}] # contains uri -> prefix dicts\r
- self._current_context = self._ns_contexts[-1]\r
- self.pending_events = []\r
-\r
- def pop(self):\r
- result = self.elementStack[-1]\r
- del self.elementStack[-1]\r
- return result\r
-\r
- def setDocumentLocator(self, locator):\r
- self._locator = locator\r
-\r
- def startPrefixMapping(self, prefix, uri):\r
- if not hasattr(self, '_xmlns_attrs'):\r
- self._xmlns_attrs = []\r
- self._xmlns_attrs.append((prefix or 'xmlns', uri))\r
- self._ns_contexts.append(self._current_context.copy())\r
- self._current_context[uri] = prefix or None\r
-\r
- def endPrefixMapping(self, prefix):\r
- self._current_context = self._ns_contexts.pop()\r
-\r
- def startElementNS(self, name, tagName , attrs):\r
- # Retrieve xml namespace declaration attributes.\r
- xmlns_uri = 'http://www.w3.org/2000/xmlns/'\r
- xmlns_attrs = getattr(self, '_xmlns_attrs', None)\r
- if xmlns_attrs is not None:\r
- for aname, value in xmlns_attrs:\r
- attrs._attrs[(xmlns_uri, aname)] = value\r
- self._xmlns_attrs = []\r
- uri, localname = name\r
- if uri:\r
- # When using namespaces, the reader may or may not\r
- # provide us with the original name. If not, create\r
- # *a* valid tagName from the current context.\r
- if tagName is None:\r
- prefix = self._current_context[uri]\r
- if prefix:\r
- tagName = prefix + ":" + localname\r
- else:\r
- tagName = localname\r
- if self.document:\r
- node = self.document.createElementNS(uri, tagName)\r
- else:\r
- node = self.buildDocument(uri, tagName)\r
- else:\r
- # When the tagname is not prefixed, it just appears as\r
- # localname\r
- if self.document:\r
- node = self.document.createElement(localname)\r
- else:\r
- node = self.buildDocument(None, localname)\r
-\r
- for aname,value in attrs.items():\r
- a_uri, a_localname = aname\r
- if a_uri == xmlns_uri:\r
- if a_localname == 'xmlns':\r
- qname = a_localname\r
- else:\r
- qname = 'xmlns:' + a_localname\r
- attr = self.document.createAttributeNS(a_uri, qname)\r
- node.setAttributeNodeNS(attr)\r
- elif a_uri:\r
- prefix = self._current_context[a_uri]\r
- if prefix:\r
- qname = prefix + ":" + a_localname\r
- else:\r
- qname = a_localname\r
- attr = self.document.createAttributeNS(a_uri, qname)\r
- node.setAttributeNodeNS(attr)\r
- else:\r
- attr = self.document.createAttribute(a_localname)\r
- node.setAttributeNode(attr)\r
- attr.value = value\r
-\r
- self.lastEvent[1] = [(START_ELEMENT, node), None]\r
- self.lastEvent = self.lastEvent[1]\r
- self.push(node)\r
-\r
- def endElementNS(self, name, tagName):\r
- self.lastEvent[1] = [(END_ELEMENT, self.pop()), None]\r
- self.lastEvent = self.lastEvent[1]\r
-\r
- def startElement(self, name, attrs):\r
- if self.document:\r
- node = self.document.createElement(name)\r
- else:\r
- node = self.buildDocument(None, name)\r
-\r
- for aname,value in attrs.items():\r
- attr = self.document.createAttribute(aname)\r
- attr.value = value\r
- node.setAttributeNode(attr)\r
-\r
- self.lastEvent[1] = [(START_ELEMENT, node), None]\r
- self.lastEvent = self.lastEvent[1]\r
- self.push(node)\r
-\r
- def endElement(self, name):\r
- self.lastEvent[1] = [(END_ELEMENT, self.pop()), None]\r
- self.lastEvent = self.lastEvent[1]\r
-\r
- def comment(self, s):\r
- if self.document:\r
- node = self.document.createComment(s)\r
- self.lastEvent[1] = [(COMMENT, node), None]\r
- self.lastEvent = self.lastEvent[1]\r
- else:\r
- event = [(COMMENT, s), None]\r
- self.pending_events.append(event)\r
-\r
- def processingInstruction(self, target, data):\r
- if self.document:\r
- node = self.document.createProcessingInstruction(target, data)\r
- self.lastEvent[1] = [(PROCESSING_INSTRUCTION, node), None]\r
- self.lastEvent = self.lastEvent[1]\r
- else:\r
- event = [(PROCESSING_INSTRUCTION, target, data), None]\r
- self.pending_events.append(event)\r
-\r
- def ignorableWhitespace(self, chars):\r
- node = self.document.createTextNode(chars)\r
- self.lastEvent[1] = [(IGNORABLE_WHITESPACE, node), None]\r
- self.lastEvent = self.lastEvent[1]\r
-\r
- def characters(self, chars):\r
- node = self.document.createTextNode(chars)\r
- self.lastEvent[1] = [(CHARACTERS, node), None]\r
- self.lastEvent = self.lastEvent[1]\r
-\r
- def startDocument(self):\r
- if self.documentFactory is None:\r
- import xml.dom.minidom\r
- self.documentFactory = xml.dom.minidom.Document.implementation\r
-\r
- def buildDocument(self, uri, tagname):\r
- # Can't do that in startDocument, since we need the tagname\r
- # XXX: obtain DocumentType\r
- node = self.documentFactory.createDocument(uri, tagname, None)\r
- self.document = node\r
- self.lastEvent[1] = [(START_DOCUMENT, node), None]\r
- self.lastEvent = self.lastEvent[1]\r
- self.push(node)\r
- # Put everything we have seen so far into the document\r
- for e in self.pending_events:\r
- if e[0][0] == PROCESSING_INSTRUCTION:\r
- _,target,data = e[0]\r
- n = self.document.createProcessingInstruction(target, data)\r
- e[0] = (PROCESSING_INSTRUCTION, n)\r
- elif e[0][0] == COMMENT:\r
- n = self.document.createComment(e[0][1])\r
- e[0] = (COMMENT, n)\r
- else:\r
- raise AssertionError("Unknown pending event ",e[0][0])\r
- self.lastEvent[1] = e\r
- self.lastEvent = e\r
- self.pending_events = None\r
- return node.firstChild\r
-\r
- def endDocument(self):\r
- self.lastEvent[1] = [(END_DOCUMENT, self.document), None]\r
- self.pop()\r
-\r
- def clear(self):\r
- "clear(): Explicitly release parsing structures"\r
- self.document = None\r
-\r
-class ErrorHandler:\r
- def warning(self, exception):\r
- print exception\r
- def error(self, exception):\r
- raise exception\r
- def fatalError(self, exception):\r
- raise exception\r
-\r
-class DOMEventStream:\r
- def __init__(self, stream, parser, bufsize):\r
- self.stream = stream\r
- self.parser = parser\r
- self.bufsize = bufsize\r
- if not hasattr(self.parser, 'feed'):\r
- self.getEvent = self._slurp\r
- self.reset()\r
-\r
- def reset(self):\r
- self.pulldom = PullDOM()\r
- # This content handler relies on namespace support\r
- self.parser.setFeature(xml.sax.handler.feature_namespaces, 1)\r
- self.parser.setContentHandler(self.pulldom)\r
-\r
- def __getitem__(self, pos):\r
- rc = self.getEvent()\r
- if rc:\r
- return rc\r
- raise IndexError\r
-\r
- def next(self):\r
- rc = self.getEvent()\r
- if rc:\r
- return rc\r
- raise StopIteration\r
-\r
- def __iter__(self):\r
- return self\r
-\r
- def expandNode(self, node):\r
- event = self.getEvent()\r
- parents = [node]\r
- while event:\r
- token, cur_node = event\r
- if cur_node is node:\r
- return\r
- if token != END_ELEMENT:\r
- parents[-1].appendChild(cur_node)\r
- if token == START_ELEMENT:\r
- parents.append(cur_node)\r
- elif token == END_ELEMENT:\r
- del parents[-1]\r
- event = self.getEvent()\r
-\r
- def getEvent(self):\r
- # use IncrementalParser interface, so we get the desired\r
- # pull effect\r
- if not self.pulldom.firstEvent[1]:\r
- self.pulldom.lastEvent = self.pulldom.firstEvent\r
- while not self.pulldom.firstEvent[1]:\r
- buf = self.stream.read(self.bufsize)\r
- if not buf:\r
- self.parser.close()\r
- return None\r
- self.parser.feed(buf)\r
- rc = self.pulldom.firstEvent[1][0]\r
- self.pulldom.firstEvent[1] = self.pulldom.firstEvent[1][1]\r
- return rc\r
-\r
- def _slurp(self):\r
- """ Fallback replacement for getEvent() using the\r
- standard SAX2 interface, which means we slurp the\r
- SAX events into memory (no performance gain, but\r
- we are compatible to all SAX parsers).\r
- """\r
- self.parser.parse(self.stream)\r
- self.getEvent = self._emit\r
- return self._emit()\r
-\r
- def _emit(self):\r
- """ Fallback replacement for getEvent() that emits\r
- the events that _slurp() read previously.\r
- """\r
- rc = self.pulldom.firstEvent[1][0]\r
- self.pulldom.firstEvent[1] = self.pulldom.firstEvent[1][1]\r
- return rc\r
-\r
- def clear(self):\r
- """clear(): Explicitly release parsing objects"""\r
- self.pulldom.clear()\r
- del self.pulldom\r
- self.parser = None\r
- self.stream = None\r
-\r
-class SAX2DOM(PullDOM):\r
-\r
- def startElementNS(self, name, tagName , attrs):\r
- PullDOM.startElementNS(self, name, tagName, attrs)\r
- curNode = self.elementStack[-1]\r
- parentNode = self.elementStack[-2]\r
- parentNode.appendChild(curNode)\r
-\r
- def startElement(self, name, attrs):\r
- PullDOM.startElement(self, name, attrs)\r
- curNode = self.elementStack[-1]\r
- parentNode = self.elementStack[-2]\r
- parentNode.appendChild(curNode)\r
-\r
- def processingInstruction(self, target, data):\r
- PullDOM.processingInstruction(self, target, data)\r
- node = self.lastEvent[0][1]\r
- parentNode = self.elementStack[-1]\r
- parentNode.appendChild(node)\r
-\r
- def ignorableWhitespace(self, chars):\r
- PullDOM.ignorableWhitespace(self, chars)\r
- node = self.lastEvent[0][1]\r
- parentNode = self.elementStack[-1]\r
- parentNode.appendChild(node)\r
-\r
- def characters(self, chars):\r
- PullDOM.characters(self, chars)\r
- node = self.lastEvent[0][1]\r
- parentNode = self.elementStack[-1]\r
- parentNode.appendChild(node)\r
-\r
-\r
-default_bufsize = (2 ** 14) - 20\r
-\r
-def parse(stream_or_string, parser=None, bufsize=None):\r
- if bufsize is None:\r
- bufsize = default_bufsize\r
- if type(stream_or_string) in _StringTypes:\r
- stream = open(stream_or_string)\r
- else:\r
- stream = stream_or_string\r
- if not parser:\r
- parser = xml.sax.make_parser()\r
- return DOMEventStream(stream, parser, bufsize)\r
-\r
-def parseString(string, parser=None):\r
- try:\r
- from cStringIO import StringIO\r
- except ImportError:\r
- from StringIO import StringIO\r
-\r
- bufsize = len(string)\r
- buf = StringIO(string)\r
- if not parser:\r
- parser = xml.sax.make_parser()\r
- return DOMEventStream(buf, parser, bufsize)\r