]>
Commit | Line | Data |
---|---|---|
3257aa99 DM |
1 | """\r |
2 | SAX driver for the pyexpat C module. This driver works with\r | |
3 | pyexpat.__version__ == '2.22'.\r | |
4 | """\r | |
5 | \r | |
6 | version = "0.20"\r | |
7 | \r | |
8 | from xml.sax._exceptions import *\r | |
9 | from xml.sax.handler import feature_validation, feature_namespaces\r | |
10 | from xml.sax.handler import feature_namespace_prefixes\r | |
11 | from xml.sax.handler import feature_external_ges, feature_external_pes\r | |
12 | from xml.sax.handler import feature_string_interning\r | |
13 | from xml.sax.handler import property_xml_string, property_interning_dict\r | |
14 | \r | |
15 | # xml.parsers.expat does not raise ImportError in Jython\r | |
16 | import sys\r | |
17 | if sys.platform[:4] == "java":\r | |
18 | raise SAXReaderNotAvailable("expat not available in Java", None)\r | |
19 | del sys\r | |
20 | \r | |
21 | try:\r | |
22 | from xml.parsers import expat\r | |
23 | except ImportError:\r | |
24 | raise SAXReaderNotAvailable("expat not supported", None)\r | |
25 | else:\r | |
26 | if not hasattr(expat, "ParserCreate"):\r | |
27 | raise SAXReaderNotAvailable("expat not supported", None)\r | |
28 | from xml.sax import xmlreader, saxutils, handler\r | |
29 | \r | |
30 | AttributesImpl = xmlreader.AttributesImpl\r | |
31 | AttributesNSImpl = xmlreader.AttributesNSImpl\r | |
32 | \r | |
33 | # If we're using a sufficiently recent version of Python, we can use\r | |
34 | # weak references to avoid cycles between the parser and content\r | |
35 | # handler, otherwise we'll just have to pretend.\r | |
36 | try:\r | |
37 | import _weakref\r | |
38 | except ImportError:\r | |
39 | def _mkproxy(o):\r | |
40 | return o\r | |
41 | else:\r | |
42 | import weakref\r | |
43 | _mkproxy = weakref.proxy\r | |
44 | del weakref, _weakref\r | |
45 | \r | |
46 | class _ClosedParser:\r | |
47 | pass\r | |
48 | \r | |
49 | # --- ExpatLocator\r | |
50 | \r | |
51 | class ExpatLocator(xmlreader.Locator):\r | |
52 | """Locator for use with the ExpatParser class.\r | |
53 | \r | |
54 | This uses a weak reference to the parser object to avoid creating\r | |
55 | a circular reference between the parser and the content handler.\r | |
56 | """\r | |
57 | def __init__(self, parser):\r | |
58 | self._ref = _mkproxy(parser)\r | |
59 | \r | |
60 | def getColumnNumber(self):\r | |
61 | parser = self._ref\r | |
62 | if parser._parser is None:\r | |
63 | return None\r | |
64 | return parser._parser.ErrorColumnNumber\r | |
65 | \r | |
66 | def getLineNumber(self):\r | |
67 | parser = self._ref\r | |
68 | if parser._parser is None:\r | |
69 | return 1\r | |
70 | return parser._parser.ErrorLineNumber\r | |
71 | \r | |
72 | def getPublicId(self):\r | |
73 | parser = self._ref\r | |
74 | if parser is None:\r | |
75 | return None\r | |
76 | return parser._source.getPublicId()\r | |
77 | \r | |
78 | def getSystemId(self):\r | |
79 | parser = self._ref\r | |
80 | if parser is None:\r | |
81 | return None\r | |
82 | return parser._source.getSystemId()\r | |
83 | \r | |
84 | \r | |
85 | # --- ExpatParser\r | |
86 | \r | |
87 | class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):\r | |
88 | """SAX driver for the pyexpat C module."""\r | |
89 | \r | |
90 | def __init__(self, namespaceHandling=0, bufsize=2**16-20):\r | |
91 | xmlreader.IncrementalParser.__init__(self, bufsize)\r | |
92 | self._source = xmlreader.InputSource()\r | |
93 | self._parser = None\r | |
94 | self._namespaces = namespaceHandling\r | |
95 | self._lex_handler_prop = None\r | |
96 | self._parsing = 0\r | |
97 | self._entity_stack = []\r | |
98 | self._external_ges = 1\r | |
99 | self._interning = None\r | |
100 | \r | |
101 | # XMLReader methods\r | |
102 | \r | |
103 | def parse(self, source):\r | |
104 | "Parse an XML document from a URL or an InputSource."\r | |
105 | source = saxutils.prepare_input_source(source)\r | |
106 | \r | |
107 | self._source = source\r | |
108 | self.reset()\r | |
109 | self._cont_handler.setDocumentLocator(ExpatLocator(self))\r | |
110 | xmlreader.IncrementalParser.parse(self, source)\r | |
111 | \r | |
112 | def prepareParser(self, source):\r | |
113 | if source.getSystemId() is not None:\r | |
114 | base = source.getSystemId()\r | |
115 | if isinstance(base, unicode):\r | |
116 | base = base.encode('utf-8')\r | |
117 | self._parser.SetBase(base)\r | |
118 | \r | |
119 | # Redefined setContentHandler to allow changing handlers during parsing\r | |
120 | \r | |
121 | def setContentHandler(self, handler):\r | |
122 | xmlreader.IncrementalParser.setContentHandler(self, handler)\r | |
123 | if self._parsing:\r | |
124 | self._reset_cont_handler()\r | |
125 | \r | |
126 | def getFeature(self, name):\r | |
127 | if name == feature_namespaces:\r | |
128 | return self._namespaces\r | |
129 | elif name == feature_string_interning:\r | |
130 | return self._interning is not None\r | |
131 | elif name in (feature_validation, feature_external_pes,\r | |
132 | feature_namespace_prefixes):\r | |
133 | return 0\r | |
134 | elif name == feature_external_ges:\r | |
135 | return self._external_ges\r | |
136 | raise SAXNotRecognizedException("Feature '%s' not recognized" % name)\r | |
137 | \r | |
138 | def setFeature(self, name, state):\r | |
139 | if self._parsing:\r | |
140 | raise SAXNotSupportedException("Cannot set features while parsing")\r | |
141 | \r | |
142 | if name == feature_namespaces:\r | |
143 | self._namespaces = state\r | |
144 | elif name == feature_external_ges:\r | |
145 | self._external_ges = state\r | |
146 | elif name == feature_string_interning:\r | |
147 | if state:\r | |
148 | if self._interning is None:\r | |
149 | self._interning = {}\r | |
150 | else:\r | |
151 | self._interning = None\r | |
152 | elif name == feature_validation:\r | |
153 | if state:\r | |
154 | raise SAXNotSupportedException(\r | |
155 | "expat does not support validation")\r | |
156 | elif name == feature_external_pes:\r | |
157 | if state:\r | |
158 | raise SAXNotSupportedException(\r | |
159 | "expat does not read external parameter entities")\r | |
160 | elif name == feature_namespace_prefixes:\r | |
161 | if state:\r | |
162 | raise SAXNotSupportedException(\r | |
163 | "expat does not report namespace prefixes")\r | |
164 | else:\r | |
165 | raise SAXNotRecognizedException(\r | |
166 | "Feature '%s' not recognized" % name)\r | |
167 | \r | |
168 | def getProperty(self, name):\r | |
169 | if name == handler.property_lexical_handler:\r | |
170 | return self._lex_handler_prop\r | |
171 | elif name == property_interning_dict:\r | |
172 | return self._interning\r | |
173 | elif name == property_xml_string:\r | |
174 | if self._parser:\r | |
175 | if hasattr(self._parser, "GetInputContext"):\r | |
176 | return self._parser.GetInputContext()\r | |
177 | else:\r | |
178 | raise SAXNotRecognizedException(\r | |
179 | "This version of expat does not support getting"\r | |
180 | " the XML string")\r | |
181 | else:\r | |
182 | raise SAXNotSupportedException(\r | |
183 | "XML string cannot be returned when not parsing")\r | |
184 | raise SAXNotRecognizedException("Property '%s' not recognized" % name)\r | |
185 | \r | |
186 | def setProperty(self, name, value):\r | |
187 | if name == handler.property_lexical_handler:\r | |
188 | self._lex_handler_prop = value\r | |
189 | if self._parsing:\r | |
190 | self._reset_lex_handler_prop()\r | |
191 | elif name == property_interning_dict:\r | |
192 | self._interning = value\r | |
193 | elif name == property_xml_string:\r | |
194 | raise SAXNotSupportedException("Property '%s' cannot be set" %\r | |
195 | name)\r | |
196 | else:\r | |
197 | raise SAXNotRecognizedException("Property '%s' not recognized" %\r | |
198 | name)\r | |
199 | \r | |
200 | # IncrementalParser methods\r | |
201 | \r | |
202 | def feed(self, data, isFinal = 0):\r | |
203 | if not self._parsing:\r | |
204 | self.reset()\r | |
205 | self._parsing = 1\r | |
206 | self._cont_handler.startDocument()\r | |
207 | \r | |
208 | try:\r | |
209 | # The isFinal parameter is internal to the expat reader.\r | |
210 | # If it is set to true, expat will check validity of the entire\r | |
211 | # document. When feeding chunks, they are not normally final -\r | |
212 | # except when invoked from close.\r | |
213 | self._parser.Parse(data, isFinal)\r | |
214 | except expat.error, e:\r | |
215 | exc = SAXParseException(expat.ErrorString(e.code), e, self)\r | |
216 | # FIXME: when to invoke error()?\r | |
217 | self._err_handler.fatalError(exc)\r | |
218 | \r | |
219 | def close(self):\r | |
220 | if (self._entity_stack or self._parser is None or\r | |
221 | isinstance(self._parser, _ClosedParser)):\r | |
222 | # If we are completing an external entity, do nothing here\r | |
223 | return\r | |
224 | try:\r | |
225 | self.feed("", isFinal = 1)\r | |
226 | self._cont_handler.endDocument()\r | |
227 | self._parsing = 0\r | |
228 | # break cycle created by expat handlers pointing to our methods\r | |
229 | self._parser = None\r | |
230 | finally:\r | |
231 | self._parsing = 0\r | |
232 | if self._parser is not None:\r | |
233 | # Keep ErrorColumnNumber and ErrorLineNumber after closing.\r | |
234 | parser = _ClosedParser()\r | |
235 | parser.ErrorColumnNumber = self._parser.ErrorColumnNumber\r | |
236 | parser.ErrorLineNumber = self._parser.ErrorLineNumber\r | |
237 | self._parser = parser\r | |
238 | \r | |
239 | def _reset_cont_handler(self):\r | |
240 | self._parser.ProcessingInstructionHandler = \\r | |
241 | self._cont_handler.processingInstruction\r | |
242 | self._parser.CharacterDataHandler = self._cont_handler.characters\r | |
243 | \r | |
244 | def _reset_lex_handler_prop(self):\r | |
245 | lex = self._lex_handler_prop\r | |
246 | parser = self._parser\r | |
247 | if lex is None:\r | |
248 | parser.CommentHandler = None\r | |
249 | parser.StartCdataSectionHandler = None\r | |
250 | parser.EndCdataSectionHandler = None\r | |
251 | parser.StartDoctypeDeclHandler = None\r | |
252 | parser.EndDoctypeDeclHandler = None\r | |
253 | else:\r | |
254 | parser.CommentHandler = lex.comment\r | |
255 | parser.StartCdataSectionHandler = lex.startCDATA\r | |
256 | parser.EndCdataSectionHandler = lex.endCDATA\r | |
257 | parser.StartDoctypeDeclHandler = self.start_doctype_decl\r | |
258 | parser.EndDoctypeDeclHandler = lex.endDTD\r | |
259 | \r | |
260 | def reset(self):\r | |
261 | if self._namespaces:\r | |
262 | self._parser = expat.ParserCreate(self._source.getEncoding(), " ",\r | |
263 | intern=self._interning)\r | |
264 | self._parser.namespace_prefixes = 1\r | |
265 | self._parser.StartElementHandler = self.start_element_ns\r | |
266 | self._parser.EndElementHandler = self.end_element_ns\r | |
267 | else:\r | |
268 | self._parser = expat.ParserCreate(self._source.getEncoding(),\r | |
269 | intern = self._interning)\r | |
270 | self._parser.StartElementHandler = self.start_element\r | |
271 | self._parser.EndElementHandler = self.end_element\r | |
272 | \r | |
273 | self._reset_cont_handler()\r | |
274 | self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl\r | |
275 | self._parser.NotationDeclHandler = self.notation_decl\r | |
276 | self._parser.StartNamespaceDeclHandler = self.start_namespace_decl\r | |
277 | self._parser.EndNamespaceDeclHandler = self.end_namespace_decl\r | |
278 | \r | |
279 | self._decl_handler_prop = None\r | |
280 | if self._lex_handler_prop:\r | |
281 | self._reset_lex_handler_prop()\r | |
282 | # self._parser.DefaultHandler =\r | |
283 | # self._parser.DefaultHandlerExpand =\r | |
284 | # self._parser.NotStandaloneHandler =\r | |
285 | self._parser.ExternalEntityRefHandler = self.external_entity_ref\r | |
286 | try:\r | |
287 | self._parser.SkippedEntityHandler = self.skipped_entity_handler\r | |
288 | except AttributeError:\r | |
289 | # This pyexpat does not support SkippedEntity\r | |
290 | pass\r | |
291 | self._parser.SetParamEntityParsing(\r | |
292 | expat.XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)\r | |
293 | \r | |
294 | self._parsing = 0\r | |
295 | self._entity_stack = []\r | |
296 | \r | |
297 | # Locator methods\r | |
298 | \r | |
299 | def getColumnNumber(self):\r | |
300 | if self._parser is None:\r | |
301 | return None\r | |
302 | return self._parser.ErrorColumnNumber\r | |
303 | \r | |
304 | def getLineNumber(self):\r | |
305 | if self._parser is None:\r | |
306 | return 1\r | |
307 | return self._parser.ErrorLineNumber\r | |
308 | \r | |
309 | def getPublicId(self):\r | |
310 | return self._source.getPublicId()\r | |
311 | \r | |
312 | def getSystemId(self):\r | |
313 | return self._source.getSystemId()\r | |
314 | \r | |
315 | # event handlers\r | |
316 | def start_element(self, name, attrs):\r | |
317 | self._cont_handler.startElement(name, AttributesImpl(attrs))\r | |
318 | \r | |
319 | def end_element(self, name):\r | |
320 | self._cont_handler.endElement(name)\r | |
321 | \r | |
322 | def start_element_ns(self, name, attrs):\r | |
323 | pair = name.split()\r | |
324 | if len(pair) == 1:\r | |
325 | # no namespace\r | |
326 | pair = (None, name)\r | |
327 | elif len(pair) == 3:\r | |
328 | pair = pair[0], pair[1]\r | |
329 | else:\r | |
330 | # default namespace\r | |
331 | pair = tuple(pair)\r | |
332 | \r | |
333 | newattrs = {}\r | |
334 | qnames = {}\r | |
335 | for (aname, value) in attrs.items():\r | |
336 | parts = aname.split()\r | |
337 | length = len(parts)\r | |
338 | if length == 1:\r | |
339 | # no namespace\r | |
340 | qname = aname\r | |
341 | apair = (None, aname)\r | |
342 | elif length == 3:\r | |
343 | qname = "%s:%s" % (parts[2], parts[1])\r | |
344 | apair = parts[0], parts[1]\r | |
345 | else:\r | |
346 | # default namespace\r | |
347 | qname = parts[1]\r | |
348 | apair = tuple(parts)\r | |
349 | \r | |
350 | newattrs[apair] = value\r | |
351 | qnames[apair] = qname\r | |
352 | \r | |
353 | self._cont_handler.startElementNS(pair, None,\r | |
354 | AttributesNSImpl(newattrs, qnames))\r | |
355 | \r | |
356 | def end_element_ns(self, name):\r | |
357 | pair = name.split()\r | |
358 | if len(pair) == 1:\r | |
359 | pair = (None, name)\r | |
360 | elif len(pair) == 3:\r | |
361 | pair = pair[0], pair[1]\r | |
362 | else:\r | |
363 | pair = tuple(pair)\r | |
364 | \r | |
365 | self._cont_handler.endElementNS(pair, None)\r | |
366 | \r | |
367 | # this is not used (call directly to ContentHandler)\r | |
368 | def processing_instruction(self, target, data):\r | |
369 | self._cont_handler.processingInstruction(target, data)\r | |
370 | \r | |
371 | # this is not used (call directly to ContentHandler)\r | |
372 | def character_data(self, data):\r | |
373 | self._cont_handler.characters(data)\r | |
374 | \r | |
375 | def start_namespace_decl(self, prefix, uri):\r | |
376 | self._cont_handler.startPrefixMapping(prefix, uri)\r | |
377 | \r | |
378 | def end_namespace_decl(self, prefix):\r | |
379 | self._cont_handler.endPrefixMapping(prefix)\r | |
380 | \r | |
381 | def start_doctype_decl(self, name, sysid, pubid, has_internal_subset):\r | |
382 | self._lex_handler_prop.startDTD(name, pubid, sysid)\r | |
383 | \r | |
384 | def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name):\r | |
385 | self._dtd_handler.unparsedEntityDecl(name, pubid, sysid, notation_name)\r | |
386 | \r | |
387 | def notation_decl(self, name, base, sysid, pubid):\r | |
388 | self._dtd_handler.notationDecl(name, pubid, sysid)\r | |
389 | \r | |
390 | def external_entity_ref(self, context, base, sysid, pubid):\r | |
391 | if not self._external_ges:\r | |
392 | return 1\r | |
393 | \r | |
394 | source = self._ent_handler.resolveEntity(pubid, sysid)\r | |
395 | source = saxutils.prepare_input_source(source,\r | |
396 | self._source.getSystemId() or\r | |
397 | "")\r | |
398 | \r | |
399 | self._entity_stack.append((self._parser, self._source))\r | |
400 | self._parser = self._parser.ExternalEntityParserCreate(context)\r | |
401 | self._source = source\r | |
402 | \r | |
403 | try:\r | |
404 | xmlreader.IncrementalParser.parse(self, source)\r | |
405 | except:\r | |
406 | return 0 # FIXME: save error info here?\r | |
407 | \r | |
408 | (self._parser, self._source) = self._entity_stack[-1]\r | |
409 | del self._entity_stack[-1]\r | |
410 | return 1\r | |
411 | \r | |
412 | def skipped_entity_handler(self, name, is_pe):\r | |
413 | if is_pe:\r | |
414 | # The SAX spec requires to report skipped PEs with a '%'\r | |
415 | name = '%'+name\r | |
416 | self._cont_handler.skippedEntity(name)\r | |
417 | \r | |
418 | # ---\r | |
419 | \r | |
420 | def create_parser(*args, **kwargs):\r | |
421 | return ExpatParser(*args, **kwargs)\r | |
422 | \r | |
423 | # ---\r | |
424 | \r | |
425 | if __name__ == "__main__":\r | |
426 | import xml.sax.saxutils\r | |
427 | p = create_parser()\r | |
428 | p.setContentHandler(xml.sax.saxutils.XMLGenerator())\r | |
429 | p.setErrorHandler(xml.sax.ErrorHandler())\r | |
430 | p.parse("http://www.ibiblio.org/xml/examples/shakespeare/hamlet.xml")\r |