]> git.proxmox.com Git - mirror_edk2.git/blame - AppPkg/Applications/Python/Python-2.7.2/Lib/xml/etree/ElementTree.py
EmbeddedPkg: Extend NvVarStoreFormattedLib LIBRARY_CLASS
[mirror_edk2.git] / AppPkg / Applications / Python / Python-2.7.2 / Lib / xml / etree / ElementTree.py
CommitLineData
4710c53d 1#\r
2# ElementTree\r
3# $Id: ElementTree.py 3440 2008-07-18 14:45:01Z fredrik $\r
4#\r
5# light-weight XML support for Python 2.3 and later.\r
6#\r
7# history (since 1.2.6):\r
8# 2005-11-12 fl added tostringlist/fromstringlist helpers\r
9# 2006-07-05 fl merged in selected changes from the 1.3 sandbox\r
10# 2006-07-05 fl removed support for 2.1 and earlier\r
11# 2007-06-21 fl added deprecation/future warnings\r
12# 2007-08-25 fl added doctype hook, added parser version attribute etc\r
13# 2007-08-26 fl added new serializer code (better namespace handling, etc)\r
14# 2007-08-27 fl warn for broken /tag searches on tree level\r
15# 2007-09-02 fl added html/text methods to serializer (experimental)\r
16# 2007-09-05 fl added method argument to tostring/tostringlist\r
17# 2007-09-06 fl improved error handling\r
18# 2007-09-13 fl added itertext, iterfind; assorted cleanups\r
19# 2007-12-15 fl added C14N hooks, copy method (experimental)\r
20#\r
21# Copyright (c) 1999-2008 by Fredrik Lundh. All rights reserved.\r
22#\r
23# fredrik@pythonware.com\r
24# http://www.pythonware.com\r
25#\r
26# --------------------------------------------------------------------\r
27# The ElementTree toolkit is\r
28#\r
29# Copyright (c) 1999-2008 by Fredrik Lundh\r
30#\r
31# By obtaining, using, and/or copying this software and/or its\r
32# associated documentation, you agree that you have read, understood,\r
33# and will comply with the following terms and conditions:\r
34#\r
35# Permission to use, copy, modify, and distribute this software and\r
36# its associated documentation for any purpose and without fee is\r
37# hereby granted, provided that the above copyright notice appears in\r
38# all copies, and that both that copyright notice and this permission\r
39# notice appear in supporting documentation, and that the name of\r
40# Secret Labs AB or the author not be used in advertising or publicity\r
41# pertaining to distribution of the software without specific, written\r
42# prior permission.\r
43#\r
44# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD\r
45# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-\r
46# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR\r
47# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY\r
48# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,\r
49# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS\r
50# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE\r
51# OF THIS SOFTWARE.\r
52# --------------------------------------------------------------------\r
53\r
54# Licensed to PSF under a Contributor Agreement.\r
55# See http://www.python.org/psf/license for licensing details.\r
56\r
57__all__ = [\r
58 # public symbols\r
59 "Comment",\r
60 "dump",\r
61 "Element", "ElementTree",\r
62 "fromstring", "fromstringlist",\r
63 "iselement", "iterparse",\r
64 "parse", "ParseError",\r
65 "PI", "ProcessingInstruction",\r
66 "QName",\r
67 "SubElement",\r
68 "tostring", "tostringlist",\r
69 "TreeBuilder",\r
70 "VERSION",\r
71 "XML",\r
72 "XMLParser", "XMLTreeBuilder",\r
73 ]\r
74\r
75VERSION = "1.3.0"\r
76\r
77##\r
78# The <b>Element</b> type is a flexible container object, designed to\r
79# store hierarchical data structures in memory. The type can be\r
80# described as a cross between a list and a dictionary.\r
81# <p>\r
82# Each element has a number of properties associated with it:\r
83# <ul>\r
84# <li>a <i>tag</i>. This is a string identifying what kind of data\r
85# this element represents (the element type, in other words).</li>\r
86# <li>a number of <i>attributes</i>, stored in a Python dictionary.</li>\r
87# <li>a <i>text</i> string.</li>\r
88# <li>an optional <i>tail</i> string.</li>\r
89# <li>a number of <i>child elements</i>, stored in a Python sequence</li>\r
90# </ul>\r
91#\r
92# To create an element instance, use the {@link #Element} constructor\r
93# or the {@link #SubElement} factory function.\r
94# <p>\r
95# The {@link #ElementTree} class can be used to wrap an element\r
96# structure, and convert it from and to XML.\r
97##\r
98\r
99import sys\r
100import re\r
101import warnings\r
102\r
103\r
104class _SimpleElementPath(object):\r
105 # emulate pre-1.2 find/findtext/findall behaviour\r
106 def find(self, element, tag, namespaces=None):\r
107 for elem in element:\r
108 if elem.tag == tag:\r
109 return elem\r
110 return None\r
111 def findtext(self, element, tag, default=None, namespaces=None):\r
112 elem = self.find(element, tag)\r
113 if elem is None:\r
114 return default\r
115 return elem.text or ""\r
116 def iterfind(self, element, tag, namespaces=None):\r
117 if tag[:3] == ".//":\r
118 for elem in element.iter(tag[3:]):\r
119 yield elem\r
120 for elem in element:\r
121 if elem.tag == tag:\r
122 yield elem\r
123 def findall(self, element, tag, namespaces=None):\r
124 return list(self.iterfind(element, tag, namespaces))\r
125\r
126try:\r
127 from . import ElementPath\r
128except ImportError:\r
129 ElementPath = _SimpleElementPath()\r
130\r
131##\r
132# Parser error. This is a subclass of <b>SyntaxError</b>.\r
133# <p>\r
134# In addition to the exception value, an exception instance contains a\r
135# specific exception code in the <b>code</b> attribute, and the line and\r
136# column of the error in the <b>position</b> attribute.\r
137\r
138class ParseError(SyntaxError):\r
139 pass\r
140\r
141# --------------------------------------------------------------------\r
142\r
143##\r
144# Checks if an object appears to be a valid element object.\r
145#\r
146# @param An element instance.\r
147# @return A true value if this is an element object.\r
148# @defreturn flag\r
149\r
150def iselement(element):\r
151 # FIXME: not sure about this; might be a better idea to look\r
152 # for tag/attrib/text attributes\r
153 return isinstance(element, Element) or hasattr(element, "tag")\r
154\r
155##\r
156# Element class. This class defines the Element interface, and\r
157# provides a reference implementation of this interface.\r
158# <p>\r
159# The element name, attribute names, and attribute values can be\r
160# either ASCII strings (ordinary Python strings containing only 7-bit\r
161# ASCII characters) or Unicode strings.\r
162#\r
163# @param tag The element name.\r
164# @param attrib An optional dictionary, containing element attributes.\r
165# @param **extra Additional attributes, given as keyword arguments.\r
166# @see Element\r
167# @see SubElement\r
168# @see Comment\r
169# @see ProcessingInstruction\r
170\r
171class Element(object):\r
172 # <tag attrib>text<child/>...</tag>tail\r
173\r
174 ##\r
175 # (Attribute) Element tag.\r
176\r
177 tag = None\r
178\r
179 ##\r
180 # (Attribute) Element attribute dictionary. Where possible, use\r
181 # {@link #Element.get},\r
182 # {@link #Element.set},\r
183 # {@link #Element.keys}, and\r
184 # {@link #Element.items} to access\r
185 # element attributes.\r
186\r
187 attrib = None\r
188\r
189 ##\r
190 # (Attribute) Text before first subelement. This is either a\r
191 # string or the value None. Note that if there was no text, this\r
192 # attribute may be either None or an empty string, depending on\r
193 # the parser.\r
194\r
195 text = None\r
196\r
197 ##\r
198 # (Attribute) Text after this element's end tag, but before the\r
199 # next sibling element's start tag. This is either a string or\r
200 # the value None. Note that if there was no text, this attribute\r
201 # may be either None or an empty string, depending on the parser.\r
202\r
203 tail = None # text after end tag, if any\r
204\r
205 # constructor\r
206\r
207 def __init__(self, tag, attrib={}, **extra):\r
208 attrib = attrib.copy()\r
209 attrib.update(extra)\r
210 self.tag = tag\r
211 self.attrib = attrib\r
212 self._children = []\r
213\r
214 def __repr__(self):\r
215 return "<Element %s at 0x%x>" % (repr(self.tag), id(self))\r
216\r
217 ##\r
218 # Creates a new element object of the same type as this element.\r
219 #\r
220 # @param tag Element tag.\r
221 # @param attrib Element attributes, given as a dictionary.\r
222 # @return A new element instance.\r
223\r
224 def makeelement(self, tag, attrib):\r
225 return self.__class__(tag, attrib)\r
226\r
227 ##\r
228 # (Experimental) Copies the current element. This creates a\r
229 # shallow copy; subelements will be shared with the original tree.\r
230 #\r
231 # @return A new element instance.\r
232\r
233 def copy(self):\r
234 elem = self.makeelement(self.tag, self.attrib)\r
235 elem.text = self.text\r
236 elem.tail = self.tail\r
237 elem[:] = self\r
238 return elem\r
239\r
240 ##\r
241 # Returns the number of subelements. Note that this only counts\r
242 # full elements; to check if there's any content in an element, you\r
243 # have to check both the length and the <b>text</b> attribute.\r
244 #\r
245 # @return The number of subelements.\r
246\r
247 def __len__(self):\r
248 return len(self._children)\r
249\r
250 def __nonzero__(self):\r
251 warnings.warn(\r
252 "The behavior of this method will change in future versions. "\r
253 "Use specific 'len(elem)' or 'elem is not None' test instead.",\r
254 FutureWarning, stacklevel=2\r
255 )\r
256 return len(self._children) != 0 # emulate old behaviour, for now\r
257\r
258 ##\r
259 # Returns the given subelement, by index.\r
260 #\r
261 # @param index What subelement to return.\r
262 # @return The given subelement.\r
263 # @exception IndexError If the given element does not exist.\r
264\r
265 def __getitem__(self, index):\r
266 return self._children[index]\r
267\r
268 ##\r
269 # Replaces the given subelement, by index.\r
270 #\r
271 # @param index What subelement to replace.\r
272 # @param element The new element value.\r
273 # @exception IndexError If the given element does not exist.\r
274\r
275 def __setitem__(self, index, element):\r
276 # if isinstance(index, slice):\r
277 # for elt in element:\r
278 # assert iselement(elt)\r
279 # else:\r
280 # assert iselement(element)\r
281 self._children[index] = element\r
282\r
283 ##\r
284 # Deletes the given subelement, by index.\r
285 #\r
286 # @param index What subelement to delete.\r
287 # @exception IndexError If the given element does not exist.\r
288\r
289 def __delitem__(self, index):\r
290 del self._children[index]\r
291\r
292 ##\r
293 # Adds a subelement to the end of this element. In document order,\r
294 # the new element will appear after the last existing subelement (or\r
295 # directly after the text, if it's the first subelement), but before\r
296 # the end tag for this element.\r
297 #\r
298 # @param element The element to add.\r
299\r
300 def append(self, element):\r
301 # assert iselement(element)\r
302 self._children.append(element)\r
303\r
304 ##\r
305 # Appends subelements from a sequence.\r
306 #\r
307 # @param elements A sequence object with zero or more elements.\r
308 # @since 1.3\r
309\r
310 def extend(self, elements):\r
311 # for element in elements:\r
312 # assert iselement(element)\r
313 self._children.extend(elements)\r
314\r
315 ##\r
316 # Inserts a subelement at the given position in this element.\r
317 #\r
318 # @param index Where to insert the new subelement.\r
319\r
320 def insert(self, index, element):\r
321 # assert iselement(element)\r
322 self._children.insert(index, element)\r
323\r
324 ##\r
325 # Removes a matching subelement. Unlike the <b>find</b> methods,\r
326 # this method compares elements based on identity, not on tag\r
327 # value or contents. To remove subelements by other means, the\r
328 # easiest way is often to use a list comprehension to select what\r
329 # elements to keep, and use slice assignment to update the parent\r
330 # element.\r
331 #\r
332 # @param element What element to remove.\r
333 # @exception ValueError If a matching element could not be found.\r
334\r
335 def remove(self, element):\r
336 # assert iselement(element)\r
337 self._children.remove(element)\r
338\r
339 ##\r
340 # (Deprecated) Returns all subelements. The elements are returned\r
341 # in document order.\r
342 #\r
343 # @return A list of subelements.\r
344 # @defreturn list of Element instances\r
345\r
346 def getchildren(self):\r
347 warnings.warn(\r
348 "This method will be removed in future versions. "\r
349 "Use 'list(elem)' or iteration over elem instead.",\r
350 DeprecationWarning, stacklevel=2\r
351 )\r
352 return self._children\r
353\r
354 ##\r
355 # Finds the first matching subelement, by tag name or path.\r
356 #\r
357 # @param path What element to look for.\r
358 # @keyparam namespaces Optional namespace prefix map.\r
359 # @return The first matching element, or None if no element was found.\r
360 # @defreturn Element or None\r
361\r
362 def find(self, path, namespaces=None):\r
363 return ElementPath.find(self, path, namespaces)\r
364\r
365 ##\r
366 # Finds text for the first matching subelement, by tag name or path.\r
367 #\r
368 # @param path What element to look for.\r
369 # @param default What to return if the element was not found.\r
370 # @keyparam namespaces Optional namespace prefix map.\r
371 # @return The text content of the first matching element, or the\r
372 # default value no element was found. Note that if the element\r
373 # is found, but has no text content, this method returns an\r
374 # empty string.\r
375 # @defreturn string\r
376\r
377 def findtext(self, path, default=None, namespaces=None):\r
378 return ElementPath.findtext(self, path, default, namespaces)\r
379\r
380 ##\r
381 # Finds all matching subelements, by tag name or path.\r
382 #\r
383 # @param path What element to look for.\r
384 # @keyparam namespaces Optional namespace prefix map.\r
385 # @return A list or other sequence containing all matching elements,\r
386 # in document order.\r
387 # @defreturn list of Element instances\r
388\r
389 def findall(self, path, namespaces=None):\r
390 return ElementPath.findall(self, path, namespaces)\r
391\r
392 ##\r
393 # Finds all matching subelements, by tag name or path.\r
394 #\r
395 # @param path What element to look for.\r
396 # @keyparam namespaces Optional namespace prefix map.\r
397 # @return An iterator or sequence containing all matching elements,\r
398 # in document order.\r
399 # @defreturn a generated sequence of Element instances\r
400\r
401 def iterfind(self, path, namespaces=None):\r
402 return ElementPath.iterfind(self, path, namespaces)\r
403\r
404 ##\r
405 # Resets an element. This function removes all subelements, clears\r
406 # all attributes, and sets the <b>text</b> and <b>tail</b> attributes\r
407 # to None.\r
408\r
409 def clear(self):\r
410 self.attrib.clear()\r
411 self._children = []\r
412 self.text = self.tail = None\r
413\r
414 ##\r
415 # Gets an element attribute. Equivalent to <b>attrib.get</b>, but\r
416 # some implementations may handle this a bit more efficiently.\r
417 #\r
418 # @param key What attribute to look for.\r
419 # @param default What to return if the attribute was not found.\r
420 # @return The attribute value, or the default value, if the\r
421 # attribute was not found.\r
422 # @defreturn string or None\r
423\r
424 def get(self, key, default=None):\r
425 return self.attrib.get(key, default)\r
426\r
427 ##\r
428 # Sets an element attribute. Equivalent to <b>attrib[key] = value</b>,\r
429 # but some implementations may handle this a bit more efficiently.\r
430 #\r
431 # @param key What attribute to set.\r
432 # @param value The attribute value.\r
433\r
434 def set(self, key, value):\r
435 self.attrib[key] = value\r
436\r
437 ##\r
438 # Gets a list of attribute names. The names are returned in an\r
439 # arbitrary order (just like for an ordinary Python dictionary).\r
440 # Equivalent to <b>attrib.keys()</b>.\r
441 #\r
442 # @return A list of element attribute names.\r
443 # @defreturn list of strings\r
444\r
445 def keys(self):\r
446 return self.attrib.keys()\r
447\r
448 ##\r
449 # Gets element attributes, as a sequence. The attributes are\r
450 # returned in an arbitrary order. Equivalent to <b>attrib.items()</b>.\r
451 #\r
452 # @return A list of (name, value) tuples for all attributes.\r
453 # @defreturn list of (string, string) tuples\r
454\r
455 def items(self):\r
456 return self.attrib.items()\r
457\r
458 ##\r
459 # Creates a tree iterator. The iterator loops over this element\r
460 # and all subelements, in document order, and returns all elements\r
461 # with a matching tag.\r
462 # <p>\r
463 # If the tree structure is modified during iteration, new or removed\r
464 # elements may or may not be included. To get a stable set, use the\r
465 # list() function on the iterator, and loop over the resulting list.\r
466 #\r
467 # @param tag What tags to look for (default is to return all elements).\r
468 # @return An iterator containing all the matching elements.\r
469 # @defreturn iterator\r
470\r
471 def iter(self, tag=None):\r
472 if tag == "*":\r
473 tag = None\r
474 if tag is None or self.tag == tag:\r
475 yield self\r
476 for e in self._children:\r
477 for e in e.iter(tag):\r
478 yield e\r
479\r
480 # compatibility\r
481 def getiterator(self, tag=None):\r
482 # Change for a DeprecationWarning in 1.4\r
483 warnings.warn(\r
484 "This method will be removed in future versions. "\r
485 "Use 'elem.iter()' or 'list(elem.iter())' instead.",\r
486 PendingDeprecationWarning, stacklevel=2\r
487 )\r
488 return list(self.iter(tag))\r
489\r
490 ##\r
491 # Creates a text iterator. The iterator loops over this element\r
492 # and all subelements, in document order, and returns all inner\r
493 # text.\r
494 #\r
495 # @return An iterator containing all inner text.\r
496 # @defreturn iterator\r
497\r
498 def itertext(self):\r
499 tag = self.tag\r
500 if not isinstance(tag, basestring) and tag is not None:\r
501 return\r
502 if self.text:\r
503 yield self.text\r
504 for e in self:\r
505 for s in e.itertext():\r
506 yield s\r
507 if e.tail:\r
508 yield e.tail\r
509\r
510# compatibility\r
511_Element = _ElementInterface = Element\r
512\r
513##\r
514# Subelement factory. This function creates an element instance, and\r
515# appends it to an existing element.\r
516# <p>\r
517# The element name, attribute names, and attribute values can be\r
518# either 8-bit ASCII strings or Unicode strings.\r
519#\r
520# @param parent The parent element.\r
521# @param tag The subelement name.\r
522# @param attrib An optional dictionary, containing element attributes.\r
523# @param **extra Additional attributes, given as keyword arguments.\r
524# @return An element instance.\r
525# @defreturn Element\r
526\r
527def SubElement(parent, tag, attrib={}, **extra):\r
528 attrib = attrib.copy()\r
529 attrib.update(extra)\r
530 element = parent.makeelement(tag, attrib)\r
531 parent.append(element)\r
532 return element\r
533\r
534##\r
535# Comment element factory. This factory function creates a special\r
536# element that will be serialized as an XML comment by the standard\r
537# serializer.\r
538# <p>\r
539# The comment string can be either an 8-bit ASCII string or a Unicode\r
540# string.\r
541#\r
542# @param text A string containing the comment string.\r
543# @return An element instance, representing a comment.\r
544# @defreturn Element\r
545\r
546def Comment(text=None):\r
547 element = Element(Comment)\r
548 element.text = text\r
549 return element\r
550\r
551##\r
552# PI element factory. This factory function creates a special element\r
553# that will be serialized as an XML processing instruction by the standard\r
554# serializer.\r
555#\r
556# @param target A string containing the PI target.\r
557# @param text A string containing the PI contents, if any.\r
558# @return An element instance, representing a PI.\r
559# @defreturn Element\r
560\r
561def ProcessingInstruction(target, text=None):\r
562 element = Element(ProcessingInstruction)\r
563 element.text = target\r
564 if text:\r
565 element.text = element.text + " " + text\r
566 return element\r
567\r
568PI = ProcessingInstruction\r
569\r
570##\r
571# QName wrapper. This can be used to wrap a QName attribute value, in\r
572# order to get proper namespace handling on output.\r
573#\r
574# @param text A string containing the QName value, in the form {uri}local,\r
575# or, if the tag argument is given, the URI part of a QName.\r
576# @param tag Optional tag. If given, the first argument is interpreted as\r
577# an URI, and this argument is interpreted as a local name.\r
578# @return An opaque object, representing the QName.\r
579\r
580class QName(object):\r
581 def __init__(self, text_or_uri, tag=None):\r
582 if tag:\r
583 text_or_uri = "{%s}%s" % (text_or_uri, tag)\r
584 self.text = text_or_uri\r
585 def __str__(self):\r
586 return self.text\r
587 def __hash__(self):\r
588 return hash(self.text)\r
589 def __cmp__(self, other):\r
590 if isinstance(other, QName):\r
591 return cmp(self.text, other.text)\r
592 return cmp(self.text, other)\r
593\r
594# --------------------------------------------------------------------\r
595\r
596##\r
597# ElementTree wrapper class. This class represents an entire element\r
598# hierarchy, and adds some extra support for serialization to and from\r
599# standard XML.\r
600#\r
601# @param element Optional root element.\r
602# @keyparam file Optional file handle or file name. If given, the\r
603# tree is initialized with the contents of this XML file.\r
604\r
605class ElementTree(object):\r
606\r
607 def __init__(self, element=None, file=None):\r
608 # assert element is None or iselement(element)\r
609 self._root = element # first node\r
610 if file:\r
611 self.parse(file)\r
612\r
613 ##\r
614 # Gets the root element for this tree.\r
615 #\r
616 # @return An element instance.\r
617 # @defreturn Element\r
618\r
619 def getroot(self):\r
620 return self._root\r
621\r
622 ##\r
623 # Replaces the root element for this tree. This discards the\r
624 # current contents of the tree, and replaces it with the given\r
625 # element. Use with care.\r
626 #\r
627 # @param element An element instance.\r
628\r
629 def _setroot(self, element):\r
630 # assert iselement(element)\r
631 self._root = element\r
632\r
633 ##\r
634 # Loads an external XML document into this element tree.\r
635 #\r
636 # @param source A file name or file object. If a file object is\r
637 # given, it only has to implement a <b>read(n)</b> method.\r
638 # @keyparam parser An optional parser instance. If not given, the\r
639 # standard {@link XMLParser} parser is used.\r
640 # @return The document root element.\r
641 # @defreturn Element\r
642 # @exception ParseError If the parser fails to parse the document.\r
643\r
644 def parse(self, source, parser=None):\r
645 if not hasattr(source, "read"):\r
646 source = open(source, "rb")\r
647 if not parser:\r
648 parser = XMLParser(target=TreeBuilder())\r
649 while 1:\r
650 data = source.read(65536)\r
651 if not data:\r
652 break\r
653 parser.feed(data)\r
654 self._root = parser.close()\r
655 return self._root\r
656\r
657 ##\r
658 # Creates a tree iterator for the root element. The iterator loops\r
659 # over all elements in this tree, in document order.\r
660 #\r
661 # @param tag What tags to look for (default is to return all elements)\r
662 # @return An iterator.\r
663 # @defreturn iterator\r
664\r
665 def iter(self, tag=None):\r
666 # assert self._root is not None\r
667 return self._root.iter(tag)\r
668\r
669 # compatibility\r
670 def getiterator(self, tag=None):\r
671 # Change for a DeprecationWarning in 1.4\r
672 warnings.warn(\r
673 "This method will be removed in future versions. "\r
674 "Use 'tree.iter()' or 'list(tree.iter())' instead.",\r
675 PendingDeprecationWarning, stacklevel=2\r
676 )\r
677 return list(self.iter(tag))\r
678\r
679 ##\r
680 # Finds the first toplevel element with given tag.\r
681 # Same as getroot().find(path).\r
682 #\r
683 # @param path What element to look for.\r
684 # @keyparam namespaces Optional namespace prefix map.\r
685 # @return The first matching element, or None if no element was found.\r
686 # @defreturn Element or None\r
687\r
688 def find(self, path, namespaces=None):\r
689 # assert self._root is not None\r
690 if path[:1] == "/":\r
691 path = "." + path\r
692 warnings.warn(\r
693 "This search is broken in 1.3 and earlier, and will be "\r
694 "fixed in a future version. If you rely on the current "\r
695 "behaviour, change it to %r" % path,\r
696 FutureWarning, stacklevel=2\r
697 )\r
698 return self._root.find(path, namespaces)\r
699\r
700 ##\r
701 # Finds the element text for the first toplevel element with given\r
702 # tag. Same as getroot().findtext(path).\r
703 #\r
704 # @param path What toplevel element to look for.\r
705 # @param default What to return if the element was not found.\r
706 # @keyparam namespaces Optional namespace prefix map.\r
707 # @return The text content of the first matching element, or the\r
708 # default value no element was found. Note that if the element\r
709 # is found, but has no text content, this method returns an\r
710 # empty string.\r
711 # @defreturn string\r
712\r
713 def findtext(self, path, default=None, namespaces=None):\r
714 # assert self._root is not None\r
715 if path[:1] == "/":\r
716 path = "." + path\r
717 warnings.warn(\r
718 "This search is broken in 1.3 and earlier, and will be "\r
719 "fixed in a future version. If you rely on the current "\r
720 "behaviour, change it to %r" % path,\r
721 FutureWarning, stacklevel=2\r
722 )\r
723 return self._root.findtext(path, default, namespaces)\r
724\r
725 ##\r
726 # Finds all toplevel elements with the given tag.\r
727 # Same as getroot().findall(path).\r
728 #\r
729 # @param path What element to look for.\r
730 # @keyparam namespaces Optional namespace prefix map.\r
731 # @return A list or iterator containing all matching elements,\r
732 # in document order.\r
733 # @defreturn list of Element instances\r
734\r
735 def findall(self, path, namespaces=None):\r
736 # assert self._root is not None\r
737 if path[:1] == "/":\r
738 path = "." + path\r
739 warnings.warn(\r
740 "This search is broken in 1.3 and earlier, and will be "\r
741 "fixed in a future version. If you rely on the current "\r
742 "behaviour, change it to %r" % path,\r
743 FutureWarning, stacklevel=2\r
744 )\r
745 return self._root.findall(path, namespaces)\r
746\r
747 ##\r
748 # Finds all matching subelements, by tag name or path.\r
749 # Same as getroot().iterfind(path).\r
750 #\r
751 # @param path What element to look for.\r
752 # @keyparam namespaces Optional namespace prefix map.\r
753 # @return An iterator or sequence containing all matching elements,\r
754 # in document order.\r
755 # @defreturn a generated sequence of Element instances\r
756\r
757 def iterfind(self, path, namespaces=None):\r
758 # assert self._root is not None\r
759 if path[:1] == "/":\r
760 path = "." + path\r
761 warnings.warn(\r
762 "This search is broken in 1.3 and earlier, and will be "\r
763 "fixed in a future version. If you rely on the current "\r
764 "behaviour, change it to %r" % path,\r
765 FutureWarning, stacklevel=2\r
766 )\r
767 return self._root.iterfind(path, namespaces)\r
768\r
769 ##\r
770 # Writes the element tree to a file, as XML.\r
771 #\r
772 # @def write(file, **options)\r
773 # @param file A file name, or a file object opened for writing.\r
774 # @param **options Options, given as keyword arguments.\r
775 # @keyparam encoding Optional output encoding (default is US-ASCII).\r
776 # @keyparam method Optional output method ("xml", "html", "text" or\r
777 # "c14n"; default is "xml").\r
778 # @keyparam xml_declaration Controls if an XML declaration should\r
779 # be added to the file. Use False for never, True for always,\r
780 # None for only if not US-ASCII or UTF-8. None is default.\r
781\r
782 def write(self, file_or_filename,\r
783 # keyword arguments\r
784 encoding=None,\r
785 xml_declaration=None,\r
786 default_namespace=None,\r
787 method=None):\r
788 # assert self._root is not None\r
789 if not method:\r
790 method = "xml"\r
791 elif method not in _serialize:\r
792 # FIXME: raise an ImportError for c14n if ElementC14N is missing?\r
793 raise ValueError("unknown method %r" % method)\r
794 if hasattr(file_or_filename, "write"):\r
795 file = file_or_filename\r
796 else:\r
797 file = open(file_or_filename, "wb")\r
798 write = file.write\r
799 if not encoding:\r
800 if method == "c14n":\r
801 encoding = "utf-8"\r
802 else:\r
803 encoding = "us-ascii"\r
804 elif xml_declaration or (xml_declaration is None and\r
805 encoding not in ("utf-8", "us-ascii")):\r
806 if method == "xml":\r
807 write("<?xml version='1.0' encoding='%s'?>\n" % encoding)\r
808 if method == "text":\r
809 _serialize_text(write, self._root, encoding)\r
810 else:\r
811 qnames, namespaces = _namespaces(\r
812 self._root, encoding, default_namespace\r
813 )\r
814 serialize = _serialize[method]\r
815 serialize(write, self._root, encoding, qnames, namespaces)\r
816 if file_or_filename is not file:\r
817 file.close()\r
818\r
819 def write_c14n(self, file):\r
820 # lxml.etree compatibility. use output method instead\r
821 return self.write(file, method="c14n")\r
822\r
823# --------------------------------------------------------------------\r
824# serialization support\r
825\r
826def _namespaces(elem, encoding, default_namespace=None):\r
827 # identify namespaces used in this tree\r
828\r
829 # maps qnames to *encoded* prefix:local names\r
830 qnames = {None: None}\r
831\r
832 # maps uri:s to prefixes\r
833 namespaces = {}\r
834 if default_namespace:\r
835 namespaces[default_namespace] = ""\r
836\r
837 def encode(text):\r
838 return text.encode(encoding)\r
839\r
840 def add_qname(qname):\r
841 # calculate serialized qname representation\r
842 try:\r
843 if qname[:1] == "{":\r
844 uri, tag = qname[1:].rsplit("}", 1)\r
845 prefix = namespaces.get(uri)\r
846 if prefix is None:\r
847 prefix = _namespace_map.get(uri)\r
848 if prefix is None:\r
849 prefix = "ns%d" % len(namespaces)\r
850 if prefix != "xml":\r
851 namespaces[uri] = prefix\r
852 if prefix:\r
853 qnames[qname] = encode("%s:%s" % (prefix, tag))\r
854 else:\r
855 qnames[qname] = encode(tag) # default element\r
856 else:\r
857 if default_namespace:\r
858 # FIXME: can this be handled in XML 1.0?\r
859 raise ValueError(\r
860 "cannot use non-qualified names with "\r
861 "default_namespace option"\r
862 )\r
863 qnames[qname] = encode(qname)\r
864 except TypeError:\r
865 _raise_serialization_error(qname)\r
866\r
867 # populate qname and namespaces table\r
868 try:\r
869 iterate = elem.iter\r
870 except AttributeError:\r
871 iterate = elem.getiterator # cET compatibility\r
872 for elem in iterate():\r
873 tag = elem.tag\r
874 if isinstance(tag, QName):\r
875 if tag.text not in qnames:\r
876 add_qname(tag.text)\r
877 elif isinstance(tag, basestring):\r
878 if tag not in qnames:\r
879 add_qname(tag)\r
880 elif tag is not None and tag is not Comment and tag is not PI:\r
881 _raise_serialization_error(tag)\r
882 for key, value in elem.items():\r
883 if isinstance(key, QName):\r
884 key = key.text\r
885 if key not in qnames:\r
886 add_qname(key)\r
887 if isinstance(value, QName) and value.text not in qnames:\r
888 add_qname(value.text)\r
889 text = elem.text\r
890 if isinstance(text, QName) and text.text not in qnames:\r
891 add_qname(text.text)\r
892 return qnames, namespaces\r
893\r
894def _serialize_xml(write, elem, encoding, qnames, namespaces):\r
895 tag = elem.tag\r
896 text = elem.text\r
897 if tag is Comment:\r
898 write("<!--%s-->" % _encode(text, encoding))\r
899 elif tag is ProcessingInstruction:\r
900 write("<?%s?>" % _encode(text, encoding))\r
901 else:\r
902 tag = qnames[tag]\r
903 if tag is None:\r
904 if text:\r
905 write(_escape_cdata(text, encoding))\r
906 for e in elem:\r
907 _serialize_xml(write, e, encoding, qnames, None)\r
908 else:\r
909 write("<" + tag)\r
910 items = elem.items()\r
911 if items or namespaces:\r
912 if namespaces:\r
913 for v, k in sorted(namespaces.items(),\r
914 key=lambda x: x[1]): # sort on prefix\r
915 if k:\r
916 k = ":" + k\r
917 write(" xmlns%s=\"%s\"" % (\r
918 k.encode(encoding),\r
919 _escape_attrib(v, encoding)\r
920 ))\r
921 for k, v in sorted(items): # lexical order\r
922 if isinstance(k, QName):\r
923 k = k.text\r
924 if isinstance(v, QName):\r
925 v = qnames[v.text]\r
926 else:\r
927 v = _escape_attrib(v, encoding)\r
928 write(" %s=\"%s\"" % (qnames[k], v))\r
929 if text or len(elem):\r
930 write(">")\r
931 if text:\r
932 write(_escape_cdata(text, encoding))\r
933 for e in elem:\r
934 _serialize_xml(write, e, encoding, qnames, None)\r
935 write("</" + tag + ">")\r
936 else:\r
937 write(" />")\r
938 if elem.tail:\r
939 write(_escape_cdata(elem.tail, encoding))\r
940\r
941HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr",\r
942 "img", "input", "isindex", "link", "meta" "param")\r
943\r
944try:\r
945 HTML_EMPTY = set(HTML_EMPTY)\r
946except NameError:\r
947 pass\r
948\r
949def _serialize_html(write, elem, encoding, qnames, namespaces):\r
950 tag = elem.tag\r
951 text = elem.text\r
952 if tag is Comment:\r
953 write("<!--%s-->" % _escape_cdata(text, encoding))\r
954 elif tag is ProcessingInstruction:\r
955 write("<?%s?>" % _escape_cdata(text, encoding))\r
956 else:\r
957 tag = qnames[tag]\r
958 if tag is None:\r
959 if text:\r
960 write(_escape_cdata(text, encoding))\r
961 for e in elem:\r
962 _serialize_html(write, e, encoding, qnames, None)\r
963 else:\r
964 write("<" + tag)\r
965 items = elem.items()\r
966 if items or namespaces:\r
967 if namespaces:\r
968 for v, k in sorted(namespaces.items(),\r
969 key=lambda x: x[1]): # sort on prefix\r
970 if k:\r
971 k = ":" + k\r
972 write(" xmlns%s=\"%s\"" % (\r
973 k.encode(encoding),\r
974 _escape_attrib(v, encoding)\r
975 ))\r
976 for k, v in sorted(items): # lexical order\r
977 if isinstance(k, QName):\r
978 k = k.text\r
979 if isinstance(v, QName):\r
980 v = qnames[v.text]\r
981 else:\r
982 v = _escape_attrib_html(v, encoding)\r
983 # FIXME: handle boolean attributes\r
984 write(" %s=\"%s\"" % (qnames[k], v))\r
985 write(">")\r
986 tag = tag.lower()\r
987 if text:\r
988 if tag == "script" or tag == "style":\r
989 write(_encode(text, encoding))\r
990 else:\r
991 write(_escape_cdata(text, encoding))\r
992 for e in elem:\r
993 _serialize_html(write, e, encoding, qnames, None)\r
994 if tag not in HTML_EMPTY:\r
995 write("</" + tag + ">")\r
996 if elem.tail:\r
997 write(_escape_cdata(elem.tail, encoding))\r
998\r
999def _serialize_text(write, elem, encoding):\r
1000 for part in elem.itertext():\r
1001 write(part.encode(encoding))\r
1002 if elem.tail:\r
1003 write(elem.tail.encode(encoding))\r
1004\r
1005_serialize = {\r
1006 "xml": _serialize_xml,\r
1007 "html": _serialize_html,\r
1008 "text": _serialize_text,\r
1009# this optional method is imported at the end of the module\r
1010# "c14n": _serialize_c14n,\r
1011}\r
1012\r
1013##\r
1014# Registers a namespace prefix. The registry is global, and any\r
1015# existing mapping for either the given prefix or the namespace URI\r
1016# will be removed.\r
1017#\r
1018# @param prefix Namespace prefix.\r
1019# @param uri Namespace uri. Tags and attributes in this namespace\r
1020# will be serialized with the given prefix, if at all possible.\r
1021# @exception ValueError If the prefix is reserved, or is otherwise\r
1022# invalid.\r
1023\r
1024def register_namespace(prefix, uri):\r
1025 if re.match("ns\d+$", prefix):\r
1026 raise ValueError("Prefix format reserved for internal use")\r
1027 for k, v in _namespace_map.items():\r
1028 if k == uri or v == prefix:\r
1029 del _namespace_map[k]\r
1030 _namespace_map[uri] = prefix\r
1031\r
1032_namespace_map = {\r
1033 # "well-known" namespace prefixes\r
1034 "http://www.w3.org/XML/1998/namespace": "xml",\r
1035 "http://www.w3.org/1999/xhtml": "html",\r
1036 "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",\r
1037 "http://schemas.xmlsoap.org/wsdl/": "wsdl",\r
1038 # xml schema\r
1039 "http://www.w3.org/2001/XMLSchema": "xs",\r
1040 "http://www.w3.org/2001/XMLSchema-instance": "xsi",\r
1041 # dublin core\r
1042 "http://purl.org/dc/elements/1.1/": "dc",\r
1043}\r
1044\r
1045def _raise_serialization_error(text):\r
1046 raise TypeError(\r
1047 "cannot serialize %r (type %s)" % (text, type(text).__name__)\r
1048 )\r
1049\r
1050def _encode(text, encoding):\r
1051 try:\r
1052 return text.encode(encoding, "xmlcharrefreplace")\r
1053 except (TypeError, AttributeError):\r
1054 _raise_serialization_error(text)\r
1055\r
1056def _escape_cdata(text, encoding):\r
1057 # escape character data\r
1058 try:\r
1059 # it's worth avoiding do-nothing calls for strings that are\r
1060 # shorter than 500 character, or so. assume that's, by far,\r
1061 # the most common case in most applications.\r
1062 if "&" in text:\r
1063 text = text.replace("&", "&amp;")\r
1064 if "<" in text:\r
1065 text = text.replace("<", "&lt;")\r
1066 if ">" in text:\r
1067 text = text.replace(">", "&gt;")\r
1068 return text.encode(encoding, "xmlcharrefreplace")\r
1069 except (TypeError, AttributeError):\r
1070 _raise_serialization_error(text)\r
1071\r
1072def _escape_attrib(text, encoding):\r
1073 # escape attribute value\r
1074 try:\r
1075 if "&" in text:\r
1076 text = text.replace("&", "&amp;")\r
1077 if "<" in text:\r
1078 text = text.replace("<", "&lt;")\r
1079 if ">" in text:\r
1080 text = text.replace(">", "&gt;")\r
1081 if "\"" in text:\r
1082 text = text.replace("\"", "&quot;")\r
1083 if "\n" in text:\r
1084 text = text.replace("\n", "&#10;")\r
1085 return text.encode(encoding, "xmlcharrefreplace")\r
1086 except (TypeError, AttributeError):\r
1087 _raise_serialization_error(text)\r
1088\r
1089def _escape_attrib_html(text, encoding):\r
1090 # escape attribute value\r
1091 try:\r
1092 if "&" in text:\r
1093 text = text.replace("&", "&amp;")\r
1094 if ">" in text:\r
1095 text = text.replace(">", "&gt;")\r
1096 if "\"" in text:\r
1097 text = text.replace("\"", "&quot;")\r
1098 return text.encode(encoding, "xmlcharrefreplace")\r
1099 except (TypeError, AttributeError):\r
1100 _raise_serialization_error(text)\r
1101\r
1102# --------------------------------------------------------------------\r
1103\r
1104##\r
1105# Generates a string representation of an XML element, including all\r
1106# subelements.\r
1107#\r
1108# @param element An Element instance.\r
1109# @keyparam encoding Optional output encoding (default is US-ASCII).\r
1110# @keyparam method Optional output method ("xml", "html", "text" or\r
1111# "c14n"; default is "xml").\r
1112# @return An encoded string containing the XML data.\r
1113# @defreturn string\r
1114\r
1115def tostring(element, encoding=None, method=None):\r
1116 class dummy:\r
1117 pass\r
1118 data = []\r
1119 file = dummy()\r
1120 file.write = data.append\r
1121 ElementTree(element).write(file, encoding, method=method)\r
1122 return "".join(data)\r
1123\r
1124##\r
1125# Generates a string representation of an XML element, including all\r
1126# subelements. The string is returned as a sequence of string fragments.\r
1127#\r
1128# @param element An Element instance.\r
1129# @keyparam encoding Optional output encoding (default is US-ASCII).\r
1130# @keyparam method Optional output method ("xml", "html", "text" or\r
1131# "c14n"; default is "xml").\r
1132# @return A sequence object containing the XML data.\r
1133# @defreturn sequence\r
1134# @since 1.3\r
1135\r
1136def tostringlist(element, encoding=None, method=None):\r
1137 class dummy:\r
1138 pass\r
1139 data = []\r
1140 file = dummy()\r
1141 file.write = data.append\r
1142 ElementTree(element).write(file, encoding, method=method)\r
1143 # FIXME: merge small fragments into larger parts\r
1144 return data\r
1145\r
1146##\r
1147# Writes an element tree or element structure to sys.stdout. This\r
1148# function should be used for debugging only.\r
1149# <p>\r
1150# The exact output format is implementation dependent. In this\r
1151# version, it's written as an ordinary XML file.\r
1152#\r
1153# @param elem An element tree or an individual element.\r
1154\r
1155def dump(elem):\r
1156 # debugging\r
1157 if not isinstance(elem, ElementTree):\r
1158 elem = ElementTree(elem)\r
1159 elem.write(sys.stdout)\r
1160 tail = elem.getroot().tail\r
1161 if not tail or tail[-1] != "\n":\r
1162 sys.stdout.write("\n")\r
1163\r
1164# --------------------------------------------------------------------\r
1165# parsing\r
1166\r
1167##\r
1168# Parses an XML document into an element tree.\r
1169#\r
1170# @param source A filename or file object containing XML data.\r
1171# @param parser An optional parser instance. If not given, the\r
1172# standard {@link XMLParser} parser is used.\r
1173# @return An ElementTree instance\r
1174\r
1175def parse(source, parser=None):\r
1176 tree = ElementTree()\r
1177 tree.parse(source, parser)\r
1178 return tree\r
1179\r
1180##\r
1181# Parses an XML document into an element tree incrementally, and reports\r
1182# what's going on to the user.\r
1183#\r
1184# @param source A filename or file object containing XML data.\r
1185# @param events A list of events to report back. If omitted, only "end"\r
1186# events are reported.\r
1187# @param parser An optional parser instance. If not given, the\r
1188# standard {@link XMLParser} parser is used.\r
1189# @return A (event, elem) iterator.\r
1190\r
1191def iterparse(source, events=None, parser=None):\r
1192 if not hasattr(source, "read"):\r
1193 source = open(source, "rb")\r
1194 if not parser:\r
1195 parser = XMLParser(target=TreeBuilder())\r
1196 return _IterParseIterator(source, events, parser)\r
1197\r
1198class _IterParseIterator(object):\r
1199\r
1200 def __init__(self, source, events, parser):\r
1201 self._file = source\r
1202 self._events = []\r
1203 self._index = 0\r
1204 self.root = self._root = None\r
1205 self._parser = parser\r
1206 # wire up the parser for event reporting\r
1207 parser = self._parser._parser\r
1208 append = self._events.append\r
1209 if events is None:\r
1210 events = ["end"]\r
1211 for event in events:\r
1212 if event == "start":\r
1213 try:\r
1214 parser.ordered_attributes = 1\r
1215 parser.specified_attributes = 1\r
1216 def handler(tag, attrib_in, event=event, append=append,\r
1217 start=self._parser._start_list):\r
1218 append((event, start(tag, attrib_in)))\r
1219 parser.StartElementHandler = handler\r
1220 except AttributeError:\r
1221 def handler(tag, attrib_in, event=event, append=append,\r
1222 start=self._parser._start):\r
1223 append((event, start(tag, attrib_in)))\r
1224 parser.StartElementHandler = handler\r
1225 elif event == "end":\r
1226 def handler(tag, event=event, append=append,\r
1227 end=self._parser._end):\r
1228 append((event, end(tag)))\r
1229 parser.EndElementHandler = handler\r
1230 elif event == "start-ns":\r
1231 def handler(prefix, uri, event=event, append=append):\r
1232 try:\r
1233 uri = (uri or "").encode("ascii")\r
1234 except UnicodeError:\r
1235 pass\r
1236 append((event, (prefix or "", uri or "")))\r
1237 parser.StartNamespaceDeclHandler = handler\r
1238 elif event == "end-ns":\r
1239 def handler(prefix, event=event, append=append):\r
1240 append((event, None))\r
1241 parser.EndNamespaceDeclHandler = handler\r
1242 else:\r
1243 raise ValueError("unknown event %r" % event)\r
1244\r
1245 def next(self):\r
1246 while 1:\r
1247 try:\r
1248 item = self._events[self._index]\r
1249 except IndexError:\r
1250 if self._parser is None:\r
1251 self.root = self._root\r
1252 raise StopIteration\r
1253 # load event buffer\r
1254 del self._events[:]\r
1255 self._index = 0\r
1256 data = self._file.read(16384)\r
1257 if data:\r
1258 self._parser.feed(data)\r
1259 else:\r
1260 self._root = self._parser.close()\r
1261 self._parser = None\r
1262 else:\r
1263 self._index = self._index + 1\r
1264 return item\r
1265\r
1266 def __iter__(self):\r
1267 return self\r
1268\r
1269##\r
1270# Parses an XML document from a string constant. This function can\r
1271# be used to embed "XML literals" in Python code.\r
1272#\r
1273# @param source A string containing XML data.\r
1274# @param parser An optional parser instance. If not given, the\r
1275# standard {@link XMLParser} parser is used.\r
1276# @return An Element instance.\r
1277# @defreturn Element\r
1278\r
1279def XML(text, parser=None):\r
1280 if not parser:\r
1281 parser = XMLParser(target=TreeBuilder())\r
1282 parser.feed(text)\r
1283 return parser.close()\r
1284\r
1285##\r
1286# Parses an XML document from a string constant, and also returns\r
1287# a dictionary which maps from element id:s to elements.\r
1288#\r
1289# @param source A string containing XML data.\r
1290# @param parser An optional parser instance. If not given, the\r
1291# standard {@link XMLParser} parser is used.\r
1292# @return A tuple containing an Element instance and a dictionary.\r
1293# @defreturn (Element, dictionary)\r
1294\r
1295def XMLID(text, parser=None):\r
1296 if not parser:\r
1297 parser = XMLParser(target=TreeBuilder())\r
1298 parser.feed(text)\r
1299 tree = parser.close()\r
1300 ids = {}\r
1301 for elem in tree.iter():\r
1302 id = elem.get("id")\r
1303 if id:\r
1304 ids[id] = elem\r
1305 return tree, ids\r
1306\r
1307##\r
1308# Parses an XML document from a string constant. Same as {@link #XML}.\r
1309#\r
1310# @def fromstring(text)\r
1311# @param source A string containing XML data.\r
1312# @return An Element instance.\r
1313# @defreturn Element\r
1314\r
1315fromstring = XML\r
1316\r
1317##\r
1318# Parses an XML document from a sequence of string fragments.\r
1319#\r
1320# @param sequence A list or other sequence containing XML data fragments.\r
1321# @param parser An optional parser instance. If not given, the\r
1322# standard {@link XMLParser} parser is used.\r
1323# @return An Element instance.\r
1324# @defreturn Element\r
1325# @since 1.3\r
1326\r
1327def fromstringlist(sequence, parser=None):\r
1328 if not parser:\r
1329 parser = XMLParser(target=TreeBuilder())\r
1330 for text in sequence:\r
1331 parser.feed(text)\r
1332 return parser.close()\r
1333\r
1334# --------------------------------------------------------------------\r
1335\r
1336##\r
1337# Generic element structure builder. This builder converts a sequence\r
1338# of {@link #TreeBuilder.start}, {@link #TreeBuilder.data}, and {@link\r
1339# #TreeBuilder.end} method calls to a well-formed element structure.\r
1340# <p>\r
1341# You can use this class to build an element structure using a custom XML\r
1342# parser, or a parser for some other XML-like format.\r
1343#\r
1344# @param element_factory Optional element factory. This factory\r
1345# is called to create new Element instances, as necessary.\r
1346\r
1347class TreeBuilder(object):\r
1348\r
1349 def __init__(self, element_factory=None):\r
1350 self._data = [] # data collector\r
1351 self._elem = [] # element stack\r
1352 self._last = None # last element\r
1353 self._tail = None # true if we're after an end tag\r
1354 if element_factory is None:\r
1355 element_factory = Element\r
1356 self._factory = element_factory\r
1357\r
1358 ##\r
1359 # Flushes the builder buffers, and returns the toplevel document\r
1360 # element.\r
1361 #\r
1362 # @return An Element instance.\r
1363 # @defreturn Element\r
1364\r
1365 def close(self):\r
1366 assert len(self._elem) == 0, "missing end tags"\r
1367 assert self._last is not None, "missing toplevel element"\r
1368 return self._last\r
1369\r
1370 def _flush(self):\r
1371 if self._data:\r
1372 if self._last is not None:\r
1373 text = "".join(self._data)\r
1374 if self._tail:\r
1375 assert self._last.tail is None, "internal error (tail)"\r
1376 self._last.tail = text\r
1377 else:\r
1378 assert self._last.text is None, "internal error (text)"\r
1379 self._last.text = text\r
1380 self._data = []\r
1381\r
1382 ##\r
1383 # Adds text to the current element.\r
1384 #\r
1385 # @param data A string. This should be either an 8-bit string\r
1386 # containing ASCII text, or a Unicode string.\r
1387\r
1388 def data(self, data):\r
1389 self._data.append(data)\r
1390\r
1391 ##\r
1392 # Opens a new element.\r
1393 #\r
1394 # @param tag The element name.\r
1395 # @param attrib A dictionary containing element attributes.\r
1396 # @return The opened element.\r
1397 # @defreturn Element\r
1398\r
1399 def start(self, tag, attrs):\r
1400 self._flush()\r
1401 self._last = elem = self._factory(tag, attrs)\r
1402 if self._elem:\r
1403 self._elem[-1].append(elem)\r
1404 self._elem.append(elem)\r
1405 self._tail = 0\r
1406 return elem\r
1407\r
1408 ##\r
1409 # Closes the current element.\r
1410 #\r
1411 # @param tag The element name.\r
1412 # @return The closed element.\r
1413 # @defreturn Element\r
1414\r
1415 def end(self, tag):\r
1416 self._flush()\r
1417 self._last = self._elem.pop()\r
1418 assert self._last.tag == tag,\\r
1419 "end tag mismatch (expected %s, got %s)" % (\r
1420 self._last.tag, tag)\r
1421 self._tail = 1\r
1422 return self._last\r
1423\r
1424##\r
1425# Element structure builder for XML source data, based on the\r
1426# <b>expat</b> parser.\r
1427#\r
1428# @keyparam target Target object. If omitted, the builder uses an\r
1429# instance of the standard {@link #TreeBuilder} class.\r
1430# @keyparam html Predefine HTML entities. This flag is not supported\r
1431# by the current implementation.\r
1432# @keyparam encoding Optional encoding. If given, the value overrides\r
1433# the encoding specified in the XML file.\r
1434# @see #ElementTree\r
1435# @see #TreeBuilder\r
1436\r
1437class XMLParser(object):\r
1438\r
1439 def __init__(self, html=0, target=None, encoding=None):\r
1440 try:\r
1441 from xml.parsers import expat\r
1442 except ImportError:\r
1443 try:\r
1444 import pyexpat as expat\r
1445 except ImportError:\r
1446 raise ImportError(\r
1447 "No module named expat; use SimpleXMLTreeBuilder instead"\r
1448 )\r
1449 parser = expat.ParserCreate(encoding, "}")\r
1450 if target is None:\r
1451 target = TreeBuilder()\r
1452 # underscored names are provided for compatibility only\r
1453 self.parser = self._parser = parser\r
1454 self.target = self._target = target\r
1455 self._error = expat.error\r
1456 self._names = {} # name memo cache\r
1457 # callbacks\r
1458 parser.DefaultHandlerExpand = self._default\r
1459 parser.StartElementHandler = self._start\r
1460 parser.EndElementHandler = self._end\r
1461 parser.CharacterDataHandler = self._data\r
1462 # optional callbacks\r
1463 parser.CommentHandler = self._comment\r
1464 parser.ProcessingInstructionHandler = self._pi\r
1465 # let expat do the buffering, if supported\r
1466 try:\r
1467 self._parser.buffer_text = 1\r
1468 except AttributeError:\r
1469 pass\r
1470 # use new-style attribute handling, if supported\r
1471 try:\r
1472 self._parser.ordered_attributes = 1\r
1473 self._parser.specified_attributes = 1\r
1474 parser.StartElementHandler = self._start_list\r
1475 except AttributeError:\r
1476 pass\r
1477 self._doctype = None\r
1478 self.entity = {}\r
1479 try:\r
1480 self.version = "Expat %d.%d.%d" % expat.version_info\r
1481 except AttributeError:\r
1482 pass # unknown\r
1483\r
1484 def _raiseerror(self, value):\r
1485 err = ParseError(value)\r
1486 err.code = value.code\r
1487 err.position = value.lineno, value.offset\r
1488 raise err\r
1489\r
1490 def _fixtext(self, text):\r
1491 # convert text string to ascii, if possible\r
1492 try:\r
1493 return text.encode("ascii")\r
1494 except UnicodeError:\r
1495 return text\r
1496\r
1497 def _fixname(self, key):\r
1498 # expand qname, and convert name string to ascii, if possible\r
1499 try:\r
1500 name = self._names[key]\r
1501 except KeyError:\r
1502 name = key\r
1503 if "}" in name:\r
1504 name = "{" + name\r
1505 self._names[key] = name = self._fixtext(name)\r
1506 return name\r
1507\r
1508 def _start(self, tag, attrib_in):\r
1509 fixname = self._fixname\r
1510 fixtext = self._fixtext\r
1511 tag = fixname(tag)\r
1512 attrib = {}\r
1513 for key, value in attrib_in.items():\r
1514 attrib[fixname(key)] = fixtext(value)\r
1515 return self.target.start(tag, attrib)\r
1516\r
1517 def _start_list(self, tag, attrib_in):\r
1518 fixname = self._fixname\r
1519 fixtext = self._fixtext\r
1520 tag = fixname(tag)\r
1521 attrib = {}\r
1522 if attrib_in:\r
1523 for i in range(0, len(attrib_in), 2):\r
1524 attrib[fixname(attrib_in[i])] = fixtext(attrib_in[i+1])\r
1525 return self.target.start(tag, attrib)\r
1526\r
1527 def _data(self, text):\r
1528 return self.target.data(self._fixtext(text))\r
1529\r
1530 def _end(self, tag):\r
1531 return self.target.end(self._fixname(tag))\r
1532\r
1533 def _comment(self, data):\r
1534 try:\r
1535 comment = self.target.comment\r
1536 except AttributeError:\r
1537 pass\r
1538 else:\r
1539 return comment(self._fixtext(data))\r
1540\r
1541 def _pi(self, target, data):\r
1542 try:\r
1543 pi = self.target.pi\r
1544 except AttributeError:\r
1545 pass\r
1546 else:\r
1547 return pi(self._fixtext(target), self._fixtext(data))\r
1548\r
1549 def _default(self, text):\r
1550 prefix = text[:1]\r
1551 if prefix == "&":\r
1552 # deal with undefined entities\r
1553 try:\r
1554 self.target.data(self.entity[text[1:-1]])\r
1555 except KeyError:\r
1556 from xml.parsers import expat\r
1557 err = expat.error(\r
1558 "undefined entity %s: line %d, column %d" %\r
1559 (text, self._parser.ErrorLineNumber,\r
1560 self._parser.ErrorColumnNumber)\r
1561 )\r
1562 err.code = 11 # XML_ERROR_UNDEFINED_ENTITY\r
1563 err.lineno = self._parser.ErrorLineNumber\r
1564 err.offset = self._parser.ErrorColumnNumber\r
1565 raise err\r
1566 elif prefix == "<" and text[:9] == "<!DOCTYPE":\r
1567 self._doctype = [] # inside a doctype declaration\r
1568 elif self._doctype is not None:\r
1569 # parse doctype contents\r
1570 if prefix == ">":\r
1571 self._doctype = None\r
1572 return\r
1573 text = text.strip()\r
1574 if not text:\r
1575 return\r
1576 self._doctype.append(text)\r
1577 n = len(self._doctype)\r
1578 if n > 2:\r
1579 type = self._doctype[1]\r
1580 if type == "PUBLIC" and n == 4:\r
1581 name, type, pubid, system = self._doctype\r
1582 elif type == "SYSTEM" and n == 3:\r
1583 name, type, system = self._doctype\r
1584 pubid = None\r
1585 else:\r
1586 return\r
1587 if pubid:\r
1588 pubid = pubid[1:-1]\r
1589 if hasattr(self.target, "doctype"):\r
1590 self.target.doctype(name, pubid, system[1:-1])\r
1591 elif self.doctype is not self._XMLParser__doctype:\r
1592 # warn about deprecated call\r
1593 self._XMLParser__doctype(name, pubid, system[1:-1])\r
1594 self.doctype(name, pubid, system[1:-1])\r
1595 self._doctype = None\r
1596\r
1597 ##\r
1598 # (Deprecated) Handles a doctype declaration.\r
1599 #\r
1600 # @param name Doctype name.\r
1601 # @param pubid Public identifier.\r
1602 # @param system System identifier.\r
1603\r
1604 def doctype(self, name, pubid, system):\r
1605 """This method of XMLParser is deprecated."""\r
1606 warnings.warn(\r
1607 "This method of XMLParser is deprecated. Define doctype() "\r
1608 "method on the TreeBuilder target.",\r
1609 DeprecationWarning,\r
1610 )\r
1611\r
1612 # sentinel, if doctype is redefined in a subclass\r
1613 __doctype = doctype\r
1614\r
1615 ##\r
1616 # Feeds data to the parser.\r
1617 #\r
1618 # @param data Encoded data.\r
1619\r
1620 def feed(self, data):\r
1621 try:\r
1622 self._parser.Parse(data, 0)\r
1623 except self._error, v:\r
1624 self._raiseerror(v)\r
1625\r
1626 ##\r
1627 # Finishes feeding data to the parser.\r
1628 #\r
1629 # @return An element structure.\r
1630 # @defreturn Element\r
1631\r
1632 def close(self):\r
1633 try:\r
1634 self._parser.Parse("", 1) # end of data\r
1635 except self._error, v:\r
1636 self._raiseerror(v)\r
1637 tree = self.target.close()\r
1638 del self.target, self._parser # get rid of circular references\r
1639 return tree\r
1640\r
1641# compatibility\r
1642XMLTreeBuilder = XMLParser\r
1643\r
1644# workaround circular import.\r
1645try:\r
1646 from ElementC14N import _serialize_c14n\r
1647 _serialize["c14n"] = _serialize_c14n\r
1648except ImportError:\r
1649 pass\r