-"""An XML Reader is the SAX 2 name for an XML parser. XML Parsers\r
-should be based on this code. """\r
-\r
-import handler\r
-\r
-from _exceptions import SAXNotSupportedException, SAXNotRecognizedException\r
-\r
-\r
-# ===== XMLREADER =====\r
-\r
-class XMLReader:\r
- """Interface for reading an XML document using callbacks.\r
-\r
- XMLReader is the interface that an XML parser's SAX2 driver must\r
- implement. This interface allows an application to set and query\r
- features and properties in the parser, to register event handlers\r
- for document processing, and to initiate a document parse.\r
-\r
- All SAX interfaces are assumed to be synchronous: the parse\r
- methods must not return until parsing is complete, and readers\r
- must wait for an event-handler callback to return before reporting\r
- the next event."""\r
-\r
- def __init__(self):\r
- self._cont_handler = handler.ContentHandler()\r
- self._dtd_handler = handler.DTDHandler()\r
- self._ent_handler = handler.EntityResolver()\r
- self._err_handler = handler.ErrorHandler()\r
-\r
- def parse(self, source):\r
- "Parse an XML document from a system identifier or an InputSource."\r
- raise NotImplementedError("This method must be implemented!")\r
-\r
- def getContentHandler(self):\r
- "Returns the current ContentHandler."\r
- return self._cont_handler\r
-\r
- def setContentHandler(self, handler):\r
- "Registers a new object to receive document content events."\r
- self._cont_handler = handler\r
-\r
- def getDTDHandler(self):\r
- "Returns the current DTD handler."\r
- return self._dtd_handler\r
-\r
- def setDTDHandler(self, handler):\r
- "Register an object to receive basic DTD-related events."\r
- self._dtd_handler = handler\r
-\r
- def getEntityResolver(self):\r
- "Returns the current EntityResolver."\r
- return self._ent_handler\r
-\r
- def setEntityResolver(self, resolver):\r
- "Register an object to resolve external entities."\r
- self._ent_handler = resolver\r
-\r
- def getErrorHandler(self):\r
- "Returns the current ErrorHandler."\r
- return self._err_handler\r
-\r
- def setErrorHandler(self, handler):\r
- "Register an object to receive error-message events."\r
- self._err_handler = handler\r
-\r
- def setLocale(self, locale):\r
- """Allow an application to set the locale for errors and warnings.\r
-\r
- SAX parsers are not required to provide localization for errors\r
- and warnings; if they cannot support the requested locale,\r
- however, they must raise a SAX exception. Applications may\r
- request a locale change in the middle of a parse."""\r
- raise SAXNotSupportedException("Locale support not implemented")\r
-\r
- def getFeature(self, name):\r
- "Looks up and returns the state of a SAX2 feature."\r
- raise SAXNotRecognizedException("Feature '%s' not recognized" % name)\r
-\r
- def setFeature(self, name, state):\r
- "Sets the state of a SAX2 feature."\r
- raise SAXNotRecognizedException("Feature '%s' not recognized" % name)\r
-\r
- def getProperty(self, name):\r
- "Looks up and returns the value of a SAX2 property."\r
- raise SAXNotRecognizedException("Property '%s' not recognized" % name)\r
-\r
- def setProperty(self, name, value):\r
- "Sets the value of a SAX2 property."\r
- raise SAXNotRecognizedException("Property '%s' not recognized" % name)\r
-\r
-class IncrementalParser(XMLReader):\r
- """This interface adds three extra methods to the XMLReader\r
- interface that allow XML parsers to support incremental\r
- parsing. Support for this interface is optional, since not all\r
- underlying XML parsers support this functionality.\r
-\r
- When the parser is instantiated it is ready to begin accepting\r
- data from the feed method immediately. After parsing has been\r
- finished with a call to close the reset method must be called to\r
- make the parser ready to accept new data, either from feed or\r
- using the parse method.\r
-\r
- Note that these methods must _not_ be called during parsing, that\r
- is, after parse has been called and before it returns.\r
-\r
- By default, the class also implements the parse method of the XMLReader\r
- interface using the feed, close and reset methods of the\r
- IncrementalParser interface as a convenience to SAX 2.0 driver\r
- writers."""\r
-\r
- def __init__(self, bufsize=2**16):\r
- self._bufsize = bufsize\r
- XMLReader.__init__(self)\r
-\r
- def parse(self, source):\r
- import saxutils\r
- source = saxutils.prepare_input_source(source)\r
-\r
- self.prepareParser(source)\r
- file = source.getByteStream()\r
- buffer = file.read(self._bufsize)\r
- while buffer != "":\r
- self.feed(buffer)\r
- buffer = file.read(self._bufsize)\r
- self.close()\r
-\r
- def feed(self, data):\r
- """This method gives the raw XML data in the data parameter to\r
- the parser and makes it parse the data, emitting the\r
- corresponding events. It is allowed for XML constructs to be\r
- split across several calls to feed.\r
-\r
- feed may raise SAXException."""\r
- raise NotImplementedError("This method must be implemented!")\r
-\r
- def prepareParser(self, source):\r
- """This method is called by the parse implementation to allow\r
- the SAX 2.0 driver to prepare itself for parsing."""\r
- raise NotImplementedError("prepareParser must be overridden!")\r
-\r
- def close(self):\r
- """This method is called when the entire XML document has been\r
- passed to the parser through the feed method, to notify the\r
- parser that there are no more data. This allows the parser to\r
- do the final checks on the document and empty the internal\r
- data buffer.\r
-\r
- The parser will not be ready to parse another document until\r
- the reset method has been called.\r
-\r
- close may raise SAXException."""\r
- raise NotImplementedError("This method must be implemented!")\r
-\r
- def reset(self):\r
- """This method is called after close has been called to reset\r
- the parser so that it is ready to parse new documents. The\r
- results of calling parse or feed after close without calling\r
- reset are undefined."""\r
- raise NotImplementedError("This method must be implemented!")\r
-\r
-# ===== LOCATOR =====\r
-\r
-class Locator:\r
- """Interface for associating a SAX event with a document\r
- location. A locator object will return valid results only during\r
- calls to DocumentHandler methods; at any other time, the\r
- results are unpredictable."""\r
-\r
- def getColumnNumber(self):\r
- "Return the column number where the current event ends."\r
- return -1\r
-\r
- def getLineNumber(self):\r
- "Return the line number where the current event ends."\r
- return -1\r
-\r
- def getPublicId(self):\r
- "Return the public identifier for the current event."\r
- return None\r
-\r
- def getSystemId(self):\r
- "Return the system identifier for the current event."\r
- return None\r
-\r
-# ===== INPUTSOURCE =====\r
-\r
-class InputSource:\r
- """Encapsulation of the information needed by the XMLReader to\r
- read entities.\r
-\r
- This class may include information about the public identifier,\r
- system identifier, byte stream (possibly with character encoding\r
- information) and/or the character stream of an entity.\r
-\r
- Applications will create objects of this class for use in the\r
- XMLReader.parse method and for returning from\r
- EntityResolver.resolveEntity.\r
-\r
- An InputSource belongs to the application, the XMLReader is not\r
- allowed to modify InputSource objects passed to it from the\r
- application, although it may make copies and modify those."""\r
-\r
- def __init__(self, system_id = None):\r
- self.__system_id = system_id\r
- self.__public_id = None\r
- self.__encoding = None\r
- self.__bytefile = None\r
- self.__charfile = None\r
-\r
- def setPublicId(self, public_id):\r
- "Sets the public identifier of this InputSource."\r
- self.__public_id = public_id\r
-\r
- def getPublicId(self):\r
- "Returns the public identifier of this InputSource."\r
- return self.__public_id\r
-\r
- def setSystemId(self, system_id):\r
- "Sets the system identifier of this InputSource."\r
- self.__system_id = system_id\r
-\r
- def getSystemId(self):\r
- "Returns the system identifier of this InputSource."\r
- return self.__system_id\r
-\r
- def setEncoding(self, encoding):\r
- """Sets the character encoding of this InputSource.\r
-\r
- The encoding must be a string acceptable for an XML encoding\r
- declaration (see section 4.3.3 of the XML recommendation).\r
-\r
- The encoding attribute of the InputSource is ignored if the\r
- InputSource also contains a character stream."""\r
- self.__encoding = encoding\r
-\r
- def getEncoding(self):\r
- "Get the character encoding of this InputSource."\r
- return self.__encoding\r
-\r
- def setByteStream(self, bytefile):\r
- """Set the byte stream (a Python file-like object which does\r
- not perform byte-to-character conversion) for this input\r
- source.\r
-\r
- The SAX parser will ignore this if there is also a character\r
- stream specified, but it will use a byte stream in preference\r
- to opening a URI connection itself.\r
-\r
- If the application knows the character encoding of the byte\r
- stream, it should set it with the setEncoding method."""\r
- self.__bytefile = bytefile\r
-\r
- def getByteStream(self):\r
- """Get the byte stream for this input source.\r
-\r
- The getEncoding method will return the character encoding for\r
- this byte stream, or None if unknown."""\r
- return self.__bytefile\r
-\r
- def setCharacterStream(self, charfile):\r
- """Set the character stream for this input source. (The stream\r
- must be a Python 2.0 Unicode-wrapped file-like that performs\r
- conversion to Unicode strings.)\r
-\r
- If there is a character stream specified, the SAX parser will\r
- ignore any byte stream and will not attempt to open a URI\r
- connection to the system identifier."""\r
- self.__charfile = charfile\r
-\r
- def getCharacterStream(self):\r
- "Get the character stream for this input source."\r
- return self.__charfile\r
-\r
-# ===== ATTRIBUTESIMPL =====\r
-\r
-class AttributesImpl:\r
-\r
- def __init__(self, attrs):\r
- """Non-NS-aware implementation.\r
-\r
- attrs should be of the form {name : value}."""\r
- self._attrs = attrs\r
-\r
- def getLength(self):\r
- return len(self._attrs)\r
-\r
- def getType(self, name):\r
- return "CDATA"\r
-\r
- def getValue(self, name):\r
- return self._attrs[name]\r
-\r
- def getValueByQName(self, name):\r
- return self._attrs[name]\r
-\r
- def getNameByQName(self, name):\r
- if not name in self._attrs:\r
- raise KeyError, name\r
- return name\r
-\r
- def getQNameByName(self, name):\r
- if not name in self._attrs:\r
- raise KeyError, name\r
- return name\r
-\r
- def getNames(self):\r
- return self._attrs.keys()\r
-\r
- def getQNames(self):\r
- return self._attrs.keys()\r
-\r
- def __len__(self):\r
- return len(self._attrs)\r
-\r
- def __getitem__(self, name):\r
- return self._attrs[name]\r
-\r
- def keys(self):\r
- return self._attrs.keys()\r
-\r
- def has_key(self, name):\r
- return name in self._attrs\r
-\r
- def __contains__(self, name):\r
- return name in self._attrs\r
-\r
- def get(self, name, alternative=None):\r
- return self._attrs.get(name, alternative)\r
-\r
- def copy(self):\r
- return self.__class__(self._attrs)\r
-\r
- def items(self):\r
- return self._attrs.items()\r
-\r
- def values(self):\r
- return self._attrs.values()\r
-\r
-# ===== ATTRIBUTESNSIMPL =====\r
-\r
-class AttributesNSImpl(AttributesImpl):\r
-\r
- def __init__(self, attrs, qnames):\r
- """NS-aware implementation.\r
-\r
- attrs should be of the form {(ns_uri, lname): value, ...}.\r
- qnames of the form {(ns_uri, lname): qname, ...}."""\r
- self._attrs = attrs\r
- self._qnames = qnames\r
-\r
- def getValueByQName(self, name):\r
- for (nsname, qname) in self._qnames.items():\r
- if qname == name:\r
- return self._attrs[nsname]\r
-\r
- raise KeyError, name\r
-\r
- def getNameByQName(self, name):\r
- for (nsname, qname) in self._qnames.items():\r
- if qname == name:\r
- return nsname\r
-\r
- raise KeyError, name\r
-\r
- def getQNameByName(self, name):\r
- return self._qnames[name]\r
-\r
- def getQNames(self):\r
- return self._qnames.values()\r
-\r
- def copy(self):\r
- return self.__class__(self._attrs, self._qnames)\r
-\r
-\r
-def _test():\r
- XMLReader()\r
- IncrementalParser()\r
- Locator()\r
-\r
-if __name__ == "__main__":\r
- _test()\r