]>
Commit | Line | Data |
---|---|---|
3257aa99 DM |
1 | """An XML Reader is the SAX 2 name for an XML parser. XML Parsers\r |
2 | should be based on this code. """\r | |
3 | \r | |
4 | import handler\r | |
5 | \r | |
6 | from _exceptions import SAXNotSupportedException, SAXNotRecognizedException\r | |
7 | \r | |
8 | \r | |
9 | # ===== XMLREADER =====\r | |
10 | \r | |
11 | class XMLReader:\r | |
12 | """Interface for reading an XML document using callbacks.\r | |
13 | \r | |
14 | XMLReader is the interface that an XML parser's SAX2 driver must\r | |
15 | implement. This interface allows an application to set and query\r | |
16 | features and properties in the parser, to register event handlers\r | |
17 | for document processing, and to initiate a document parse.\r | |
18 | \r | |
19 | All SAX interfaces are assumed to be synchronous: the parse\r | |
20 | methods must not return until parsing is complete, and readers\r | |
21 | must wait for an event-handler callback to return before reporting\r | |
22 | the next event."""\r | |
23 | \r | |
24 | def __init__(self):\r | |
25 | self._cont_handler = handler.ContentHandler()\r | |
26 | self._dtd_handler = handler.DTDHandler()\r | |
27 | self._ent_handler = handler.EntityResolver()\r | |
28 | self._err_handler = handler.ErrorHandler()\r | |
29 | \r | |
30 | def parse(self, source):\r | |
31 | "Parse an XML document from a system identifier or an InputSource."\r | |
32 | raise NotImplementedError("This method must be implemented!")\r | |
33 | \r | |
34 | def getContentHandler(self):\r | |
35 | "Returns the current ContentHandler."\r | |
36 | return self._cont_handler\r | |
37 | \r | |
38 | def setContentHandler(self, handler):\r | |
39 | "Registers a new object to receive document content events."\r | |
40 | self._cont_handler = handler\r | |
41 | \r | |
42 | def getDTDHandler(self):\r | |
43 | "Returns the current DTD handler."\r | |
44 | return self._dtd_handler\r | |
45 | \r | |
46 | def setDTDHandler(self, handler):\r | |
47 | "Register an object to receive basic DTD-related events."\r | |
48 | self._dtd_handler = handler\r | |
49 | \r | |
50 | def getEntityResolver(self):\r | |
51 | "Returns the current EntityResolver."\r | |
52 | return self._ent_handler\r | |
53 | \r | |
54 | def setEntityResolver(self, resolver):\r | |
55 | "Register an object to resolve external entities."\r | |
56 | self._ent_handler = resolver\r | |
57 | \r | |
58 | def getErrorHandler(self):\r | |
59 | "Returns the current ErrorHandler."\r | |
60 | return self._err_handler\r | |
61 | \r | |
62 | def setErrorHandler(self, handler):\r | |
63 | "Register an object to receive error-message events."\r | |
64 | self._err_handler = handler\r | |
65 | \r | |
66 | def setLocale(self, locale):\r | |
67 | """Allow an application to set the locale for errors and warnings.\r | |
68 | \r | |
69 | SAX parsers are not required to provide localization for errors\r | |
70 | and warnings; if they cannot support the requested locale,\r | |
71 | however, they must raise a SAX exception. Applications may\r | |
72 | request a locale change in the middle of a parse."""\r | |
73 | raise SAXNotSupportedException("Locale support not implemented")\r | |
74 | \r | |
75 | def getFeature(self, name):\r | |
76 | "Looks up and returns the state of a SAX2 feature."\r | |
77 | raise SAXNotRecognizedException("Feature '%s' not recognized" % name)\r | |
78 | \r | |
79 | def setFeature(self, name, state):\r | |
80 | "Sets the state of a SAX2 feature."\r | |
81 | raise SAXNotRecognizedException("Feature '%s' not recognized" % name)\r | |
82 | \r | |
83 | def getProperty(self, name):\r | |
84 | "Looks up and returns the value of a SAX2 property."\r | |
85 | raise SAXNotRecognizedException("Property '%s' not recognized" % name)\r | |
86 | \r | |
87 | def setProperty(self, name, value):\r | |
88 | "Sets the value of a SAX2 property."\r | |
89 | raise SAXNotRecognizedException("Property '%s' not recognized" % name)\r | |
90 | \r | |
91 | class IncrementalParser(XMLReader):\r | |
92 | """This interface adds three extra methods to the XMLReader\r | |
93 | interface that allow XML parsers to support incremental\r | |
94 | parsing. Support for this interface is optional, since not all\r | |
95 | underlying XML parsers support this functionality.\r | |
96 | \r | |
97 | When the parser is instantiated it is ready to begin accepting\r | |
98 | data from the feed method immediately. After parsing has been\r | |
99 | finished with a call to close the reset method must be called to\r | |
100 | make the parser ready to accept new data, either from feed or\r | |
101 | using the parse method.\r | |
102 | \r | |
103 | Note that these methods must _not_ be called during parsing, that\r | |
104 | is, after parse has been called and before it returns.\r | |
105 | \r | |
106 | By default, the class also implements the parse method of the XMLReader\r | |
107 | interface using the feed, close and reset methods of the\r | |
108 | IncrementalParser interface as a convenience to SAX 2.0 driver\r | |
109 | writers."""\r | |
110 | \r | |
111 | def __init__(self, bufsize=2**16):\r | |
112 | self._bufsize = bufsize\r | |
113 | XMLReader.__init__(self)\r | |
114 | \r | |
115 | def parse(self, source):\r | |
116 | import saxutils\r | |
117 | source = saxutils.prepare_input_source(source)\r | |
118 | \r | |
119 | self.prepareParser(source)\r | |
120 | file = source.getByteStream()\r | |
121 | buffer = file.read(self._bufsize)\r | |
122 | while buffer != "":\r | |
123 | self.feed(buffer)\r | |
124 | buffer = file.read(self._bufsize)\r | |
125 | self.close()\r | |
126 | \r | |
127 | def feed(self, data):\r | |
128 | """This method gives the raw XML data in the data parameter to\r | |
129 | the parser and makes it parse the data, emitting the\r | |
130 | corresponding events. It is allowed for XML constructs to be\r | |
131 | split across several calls to feed.\r | |
132 | \r | |
133 | feed may raise SAXException."""\r | |
134 | raise NotImplementedError("This method must be implemented!")\r | |
135 | \r | |
136 | def prepareParser(self, source):\r | |
137 | """This method is called by the parse implementation to allow\r | |
138 | the SAX 2.0 driver to prepare itself for parsing."""\r | |
139 | raise NotImplementedError("prepareParser must be overridden!")\r | |
140 | \r | |
141 | def close(self):\r | |
142 | """This method is called when the entire XML document has been\r | |
143 | passed to the parser through the feed method, to notify the\r | |
144 | parser that there are no more data. This allows the parser to\r | |
145 | do the final checks on the document and empty the internal\r | |
146 | data buffer.\r | |
147 | \r | |
148 | The parser will not be ready to parse another document until\r | |
149 | the reset method has been called.\r | |
150 | \r | |
151 | close may raise SAXException."""\r | |
152 | raise NotImplementedError("This method must be implemented!")\r | |
153 | \r | |
154 | def reset(self):\r | |
155 | """This method is called after close has been called to reset\r | |
156 | the parser so that it is ready to parse new documents. The\r | |
157 | results of calling parse or feed after close without calling\r | |
158 | reset are undefined."""\r | |
159 | raise NotImplementedError("This method must be implemented!")\r | |
160 | \r | |
161 | # ===== LOCATOR =====\r | |
162 | \r | |
163 | class Locator:\r | |
164 | """Interface for associating a SAX event with a document\r | |
165 | location. A locator object will return valid results only during\r | |
166 | calls to DocumentHandler methods; at any other time, the\r | |
167 | results are unpredictable."""\r | |
168 | \r | |
169 | def getColumnNumber(self):\r | |
170 | "Return the column number where the current event ends."\r | |
171 | return -1\r | |
172 | \r | |
173 | def getLineNumber(self):\r | |
174 | "Return the line number where the current event ends."\r | |
175 | return -1\r | |
176 | \r | |
177 | def getPublicId(self):\r | |
178 | "Return the public identifier for the current event."\r | |
179 | return None\r | |
180 | \r | |
181 | def getSystemId(self):\r | |
182 | "Return the system identifier for the current event."\r | |
183 | return None\r | |
184 | \r | |
185 | # ===== INPUTSOURCE =====\r | |
186 | \r | |
187 | class InputSource:\r | |
188 | """Encapsulation of the information needed by the XMLReader to\r | |
189 | read entities.\r | |
190 | \r | |
191 | This class may include information about the public identifier,\r | |
192 | system identifier, byte stream (possibly with character encoding\r | |
193 | information) and/or the character stream of an entity.\r | |
194 | \r | |
195 | Applications will create objects of this class for use in the\r | |
196 | XMLReader.parse method and for returning from\r | |
197 | EntityResolver.resolveEntity.\r | |
198 | \r | |
199 | An InputSource belongs to the application, the XMLReader is not\r | |
200 | allowed to modify InputSource objects passed to it from the\r | |
201 | application, although it may make copies and modify those."""\r | |
202 | \r | |
203 | def __init__(self, system_id = None):\r | |
204 | self.__system_id = system_id\r | |
205 | self.__public_id = None\r | |
206 | self.__encoding = None\r | |
207 | self.__bytefile = None\r | |
208 | self.__charfile = None\r | |
209 | \r | |
210 | def setPublicId(self, public_id):\r | |
211 | "Sets the public identifier of this InputSource."\r | |
212 | self.__public_id = public_id\r | |
213 | \r | |
214 | def getPublicId(self):\r | |
215 | "Returns the public identifier of this InputSource."\r | |
216 | return self.__public_id\r | |
217 | \r | |
218 | def setSystemId(self, system_id):\r | |
219 | "Sets the system identifier of this InputSource."\r | |
220 | self.__system_id = system_id\r | |
221 | \r | |
222 | def getSystemId(self):\r | |
223 | "Returns the system identifier of this InputSource."\r | |
224 | return self.__system_id\r | |
225 | \r | |
226 | def setEncoding(self, encoding):\r | |
227 | """Sets the character encoding of this InputSource.\r | |
228 | \r | |
229 | The encoding must be a string acceptable for an XML encoding\r | |
230 | declaration (see section 4.3.3 of the XML recommendation).\r | |
231 | \r | |
232 | The encoding attribute of the InputSource is ignored if the\r | |
233 | InputSource also contains a character stream."""\r | |
234 | self.__encoding = encoding\r | |
235 | \r | |
236 | def getEncoding(self):\r | |
237 | "Get the character encoding of this InputSource."\r | |
238 | return self.__encoding\r | |
239 | \r | |
240 | def setByteStream(self, bytefile):\r | |
241 | """Set the byte stream (a Python file-like object which does\r | |
242 | not perform byte-to-character conversion) for this input\r | |
243 | source.\r | |
244 | \r | |
245 | The SAX parser will ignore this if there is also a character\r | |
246 | stream specified, but it will use a byte stream in preference\r | |
247 | to opening a URI connection itself.\r | |
248 | \r | |
249 | If the application knows the character encoding of the byte\r | |
250 | stream, it should set it with the setEncoding method."""\r | |
251 | self.__bytefile = bytefile\r | |
252 | \r | |
253 | def getByteStream(self):\r | |
254 | """Get the byte stream for this input source.\r | |
255 | \r | |
256 | The getEncoding method will return the character encoding for\r | |
257 | this byte stream, or None if unknown."""\r | |
258 | return self.__bytefile\r | |
259 | \r | |
260 | def setCharacterStream(self, charfile):\r | |
261 | """Set the character stream for this input source. (The stream\r | |
262 | must be a Python 2.0 Unicode-wrapped file-like that performs\r | |
263 | conversion to Unicode strings.)\r | |
264 | \r | |
265 | If there is a character stream specified, the SAX parser will\r | |
266 | ignore any byte stream and will not attempt to open a URI\r | |
267 | connection to the system identifier."""\r | |
268 | self.__charfile = charfile\r | |
269 | \r | |
270 | def getCharacterStream(self):\r | |
271 | "Get the character stream for this input source."\r | |
272 | return self.__charfile\r | |
273 | \r | |
274 | # ===== ATTRIBUTESIMPL =====\r | |
275 | \r | |
276 | class AttributesImpl:\r | |
277 | \r | |
278 | def __init__(self, attrs):\r | |
279 | """Non-NS-aware implementation.\r | |
280 | \r | |
281 | attrs should be of the form {name : value}."""\r | |
282 | self._attrs = attrs\r | |
283 | \r | |
284 | def getLength(self):\r | |
285 | return len(self._attrs)\r | |
286 | \r | |
287 | def getType(self, name):\r | |
288 | return "CDATA"\r | |
289 | \r | |
290 | def getValue(self, name):\r | |
291 | return self._attrs[name]\r | |
292 | \r | |
293 | def getValueByQName(self, name):\r | |
294 | return self._attrs[name]\r | |
295 | \r | |
296 | def getNameByQName(self, name):\r | |
297 | if not name in self._attrs:\r | |
298 | raise KeyError, name\r | |
299 | return name\r | |
300 | \r | |
301 | def getQNameByName(self, name):\r | |
302 | if not name in self._attrs:\r | |
303 | raise KeyError, name\r | |
304 | return name\r | |
305 | \r | |
306 | def getNames(self):\r | |
307 | return self._attrs.keys()\r | |
308 | \r | |
309 | def getQNames(self):\r | |
310 | return self._attrs.keys()\r | |
311 | \r | |
312 | def __len__(self):\r | |
313 | return len(self._attrs)\r | |
314 | \r | |
315 | def __getitem__(self, name):\r | |
316 | return self._attrs[name]\r | |
317 | \r | |
318 | def keys(self):\r | |
319 | return self._attrs.keys()\r | |
320 | \r | |
321 | def has_key(self, name):\r | |
322 | return name in self._attrs\r | |
323 | \r | |
324 | def __contains__(self, name):\r | |
325 | return name in self._attrs\r | |
326 | \r | |
327 | def get(self, name, alternative=None):\r | |
328 | return self._attrs.get(name, alternative)\r | |
329 | \r | |
330 | def copy(self):\r | |
331 | return self.__class__(self._attrs)\r | |
332 | \r | |
333 | def items(self):\r | |
334 | return self._attrs.items()\r | |
335 | \r | |
336 | def values(self):\r | |
337 | return self._attrs.values()\r | |
338 | \r | |
339 | # ===== ATTRIBUTESNSIMPL =====\r | |
340 | \r | |
341 | class AttributesNSImpl(AttributesImpl):\r | |
342 | \r | |
343 | def __init__(self, attrs, qnames):\r | |
344 | """NS-aware implementation.\r | |
345 | \r | |
346 | attrs should be of the form {(ns_uri, lname): value, ...}.\r | |
347 | qnames of the form {(ns_uri, lname): qname, ...}."""\r | |
348 | self._attrs = attrs\r | |
349 | self._qnames = qnames\r | |
350 | \r | |
351 | def getValueByQName(self, name):\r | |
352 | for (nsname, qname) in self._qnames.items():\r | |
353 | if qname == name:\r | |
354 | return self._attrs[nsname]\r | |
355 | \r | |
356 | raise KeyError, name\r | |
357 | \r | |
358 | def getNameByQName(self, name):\r | |
359 | for (nsname, qname) in self._qnames.items():\r | |
360 | if qname == name:\r | |
361 | return nsname\r | |
362 | \r | |
363 | raise KeyError, name\r | |
364 | \r | |
365 | def getQNameByName(self, name):\r | |
366 | return self._qnames[name]\r | |
367 | \r | |
368 | def getQNames(self):\r | |
369 | return self._qnames.values()\r | |
370 | \r | |
371 | def copy(self):\r | |
372 | return self.__class__(self._attrs, self._qnames)\r | |
373 | \r | |
374 | \r | |
375 | def _test():\r | |
376 | XMLReader()\r | |
377 | IncrementalParser()\r | |
378 | Locator()\r | |
379 | \r | |
380 | if __name__ == "__main__":\r | |
381 | _test()\r |