]> git.proxmox.com Git - mirror_edk2.git/blame - AppPkg/Applications/Python/Python-2.7.10/Lib/xml/etree/ElementTree.py
EmbeddedPkg: Extend NvVarStoreFormattedLib LIBRARY_CLASS
[mirror_edk2.git] / AppPkg / Applications / Python / Python-2.7.10 / Lib / xml / etree / ElementTree.py
CommitLineData
3257aa99
DM
1#\r
2# ElementTree\r
3# $Id: ElementTree.py 3440 2008-07-18 14:45:01Z fredrik $\r
4#\r
5# light-weight XML support for Python 2.3 and later.\r
6#\r
7# history (since 1.2.6):\r
8# 2005-11-12 fl added tostringlist/fromstringlist helpers\r
9# 2006-07-05 fl merged in selected changes from the 1.3 sandbox\r
10# 2006-07-05 fl removed support for 2.1 and earlier\r
11# 2007-06-21 fl added deprecation/future warnings\r
12# 2007-08-25 fl added doctype hook, added parser version attribute etc\r
13# 2007-08-26 fl added new serializer code (better namespace handling, etc)\r
14# 2007-08-27 fl warn for broken /tag searches on tree level\r
15# 2007-09-02 fl added html/text methods to serializer (experimental)\r
16# 2007-09-05 fl added method argument to tostring/tostringlist\r
17# 2007-09-06 fl improved error handling\r
18# 2007-09-13 fl added itertext, iterfind; assorted cleanups\r
19# 2007-12-15 fl added C14N hooks, copy method (experimental)\r
20#\r
21# Copyright (c) 1999-2008 by Fredrik Lundh. All rights reserved.\r
22#\r
23# fredrik@pythonware.com\r
24# http://www.pythonware.com\r
25#\r
26# --------------------------------------------------------------------\r
27# The ElementTree toolkit is\r
28#\r
29# Copyright (c) 1999-2008 by Fredrik Lundh\r
30#\r
31# By obtaining, using, and/or copying this software and/or its\r
32# associated documentation, you agree that you have read, understood,\r
33# and will comply with the following terms and conditions:\r
34#\r
35# Permission to use, copy, modify, and distribute this software and\r
36# its associated documentation for any purpose and without fee is\r
37# hereby granted, provided that the above copyright notice appears in\r
38# all copies, and that both that copyright notice and this permission\r
39# notice appear in supporting documentation, and that the name of\r
40# Secret Labs AB or the author not be used in advertising or publicity\r
41# pertaining to distribution of the software without specific, written\r
42# prior permission.\r
43#\r
44# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD\r
45# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-\r
46# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR\r
47# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY\r
48# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,\r
49# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS\r
50# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE\r
51# OF THIS SOFTWARE.\r
52# --------------------------------------------------------------------\r
53\r
54# Licensed to PSF under a Contributor Agreement.\r
55# See http://www.python.org/psf/license for licensing details.\r
56\r
57__all__ = [\r
58 # public symbols\r
59 "Comment",\r
60 "dump",\r
61 "Element", "ElementTree",\r
62 "fromstring", "fromstringlist",\r
63 "iselement", "iterparse",\r
64 "parse", "ParseError",\r
65 "PI", "ProcessingInstruction",\r
66 "QName",\r
67 "SubElement",\r
68 "tostring", "tostringlist",\r
69 "TreeBuilder",\r
70 "VERSION",\r
71 "XML",\r
72 "XMLParser", "XMLTreeBuilder",\r
73 ]\r
74\r
75VERSION = "1.3.0"\r
76\r
77##\r
78# The <b>Element</b> type is a flexible container object, designed to\r
79# store hierarchical data structures in memory. The type can be\r
80# described as a cross between a list and a dictionary.\r
81# <p>\r
82# Each element has a number of properties associated with it:\r
83# <ul>\r
84# <li>a <i>tag</i>. This is a string identifying what kind of data\r
85# this element represents (the element type, in other words).</li>\r
86# <li>a number of <i>attributes</i>, stored in a Python dictionary.</li>\r
87# <li>a <i>text</i> string.</li>\r
88# <li>an optional <i>tail</i> string.</li>\r
89# <li>a number of <i>child elements</i>, stored in a Python sequence</li>\r
90# </ul>\r
91#\r
92# To create an element instance, use the {@link #Element} constructor\r
93# or the {@link #SubElement} factory function.\r
94# <p>\r
95# The {@link #ElementTree} class can be used to wrap an element\r
96# structure, and convert it from and to XML.\r
97##\r
98\r
99import sys\r
100import re\r
101import warnings\r
102\r
103\r
104class _SimpleElementPath(object):\r
105 # emulate pre-1.2 find/findtext/findall behaviour\r
106 def find(self, element, tag, namespaces=None):\r
107 for elem in element:\r
108 if elem.tag == tag:\r
109 return elem\r
110 return None\r
111 def findtext(self, element, tag, default=None, namespaces=None):\r
112 elem = self.find(element, tag)\r
113 if elem is None:\r
114 return default\r
115 return elem.text or ""\r
116 def iterfind(self, element, tag, namespaces=None):\r
117 if tag[:3] == ".//":\r
118 for elem in element.iter(tag[3:]):\r
119 yield elem\r
120 for elem in element:\r
121 if elem.tag == tag:\r
122 yield elem\r
123 def findall(self, element, tag, namespaces=None):\r
124 return list(self.iterfind(element, tag, namespaces))\r
125\r
126try:\r
127 from . import ElementPath\r
128except ImportError:\r
129 ElementPath = _SimpleElementPath()\r
130\r
131##\r
132# Parser error. This is a subclass of <b>SyntaxError</b>.\r
133# <p>\r
134# In addition to the exception value, an exception instance contains a\r
135# specific exception code in the <b>code</b> attribute, and the line and\r
136# column of the error in the <b>position</b> attribute.\r
137\r
138class ParseError(SyntaxError):\r
139 pass\r
140\r
141# --------------------------------------------------------------------\r
142\r
143##\r
144# Checks if an object appears to be a valid element object.\r
145#\r
146# @param An element instance.\r
147# @return A true value if this is an element object.\r
148# @defreturn flag\r
149\r
150def iselement(element):\r
151 # FIXME: not sure about this; might be a better idea to look\r
152 # for tag/attrib/text attributes\r
153 return isinstance(element, Element) or hasattr(element, "tag")\r
154\r
155##\r
156# Element class. This class defines the Element interface, and\r
157# provides a reference implementation of this interface.\r
158# <p>\r
159# The element name, attribute names, and attribute values can be\r
160# either ASCII strings (ordinary Python strings containing only 7-bit\r
161# ASCII characters) or Unicode strings.\r
162#\r
163# @param tag The element name.\r
164# @param attrib An optional dictionary, containing element attributes.\r
165# @param **extra Additional attributes, given as keyword arguments.\r
166# @see Element\r
167# @see SubElement\r
168# @see Comment\r
169# @see ProcessingInstruction\r
170\r
171class Element(object):\r
172 # <tag attrib>text<child/>...</tag>tail\r
173\r
174 ##\r
175 # (Attribute) Element tag.\r
176\r
177 tag = None\r
178\r
179 ##\r
180 # (Attribute) Element attribute dictionary. Where possible, use\r
181 # {@link #Element.get},\r
182 # {@link #Element.set},\r
183 # {@link #Element.keys}, and\r
184 # {@link #Element.items} to access\r
185 # element attributes.\r
186\r
187 attrib = None\r
188\r
189 ##\r
190 # (Attribute) Text before first subelement. This is either a\r
191 # string or the value None. Note that if there was no text, this\r
192 # attribute may be either None or an empty string, depending on\r
193 # the parser.\r
194\r
195 text = None\r
196\r
197 ##\r
198 # (Attribute) Text after this element's end tag, but before the\r
199 # next sibling element's start tag. This is either a string or\r
200 # the value None. Note that if there was no text, this attribute\r
201 # may be either None or an empty string, depending on the parser.\r
202\r
203 tail = None # text after end tag, if any\r
204\r
205 # constructor\r
206\r
207 def __init__(self, tag, attrib={}, **extra):\r
208 attrib = attrib.copy()\r
209 attrib.update(extra)\r
210 self.tag = tag\r
211 self.attrib = attrib\r
212 self._children = []\r
213\r
214 def __repr__(self):\r
215 return "<Element %s at 0x%x>" % (repr(self.tag), id(self))\r
216\r
217 ##\r
218 # Creates a new element object of the same type as this element.\r
219 #\r
220 # @param tag Element tag.\r
221 # @param attrib Element attributes, given as a dictionary.\r
222 # @return A new element instance.\r
223\r
224 def makeelement(self, tag, attrib):\r
225 return self.__class__(tag, attrib)\r
226\r
227 ##\r
228 # (Experimental) Copies the current element. This creates a\r
229 # shallow copy; subelements will be shared with the original tree.\r
230 #\r
231 # @return A new element instance.\r
232\r
233 def copy(self):\r
234 elem = self.makeelement(self.tag, self.attrib)\r
235 elem.text = self.text\r
236 elem.tail = self.tail\r
237 elem[:] = self\r
238 return elem\r
239\r
240 ##\r
241 # Returns the number of subelements. Note that this only counts\r
242 # full elements; to check if there's any content in an element, you\r
243 # have to check both the length and the <b>text</b> attribute.\r
244 #\r
245 # @return The number of subelements.\r
246\r
247 def __len__(self):\r
248 return len(self._children)\r
249\r
250 def __nonzero__(self):\r
251 warnings.warn(\r
252 "The behavior of this method will change in future versions. "\r
253 "Use specific 'len(elem)' or 'elem is not None' test instead.",\r
254 FutureWarning, stacklevel=2\r
255 )\r
256 return len(self._children) != 0 # emulate old behaviour, for now\r
257\r
258 ##\r
259 # Returns the given subelement, by index.\r
260 #\r
261 # @param index What subelement to return.\r
262 # @return The given subelement.\r
263 # @exception IndexError If the given element does not exist.\r
264\r
265 def __getitem__(self, index):\r
266 return self._children[index]\r
267\r
268 ##\r
269 # Replaces the given subelement, by index.\r
270 #\r
271 # @param index What subelement to replace.\r
272 # @param element The new element value.\r
273 # @exception IndexError If the given element does not exist.\r
274\r
275 def __setitem__(self, index, element):\r
276 # if isinstance(index, slice):\r
277 # for elt in element:\r
278 # assert iselement(elt)\r
279 # else:\r
280 # assert iselement(element)\r
281 self._children[index] = element\r
282\r
283 ##\r
284 # Deletes the given subelement, by index.\r
285 #\r
286 # @param index What subelement to delete.\r
287 # @exception IndexError If the given element does not exist.\r
288\r
289 def __delitem__(self, index):\r
290 del self._children[index]\r
291\r
292 ##\r
293 # Adds a subelement to the end of this element. In document order,\r
294 # the new element will appear after the last existing subelement (or\r
295 # directly after the text, if it's the first subelement), but before\r
296 # the end tag for this element.\r
297 #\r
298 # @param element The element to add.\r
299\r
300 def append(self, element):\r
301 # assert iselement(element)\r
302 self._children.append(element)\r
303\r
304 ##\r
305 # Appends subelements from a sequence.\r
306 #\r
307 # @param elements A sequence object with zero or more elements.\r
308 # @since 1.3\r
309\r
310 def extend(self, elements):\r
311 # for element in elements:\r
312 # assert iselement(element)\r
313 self._children.extend(elements)\r
314\r
315 ##\r
316 # Inserts a subelement at the given position in this element.\r
317 #\r
318 # @param index Where to insert the new subelement.\r
319\r
320 def insert(self, index, element):\r
321 # assert iselement(element)\r
322 self._children.insert(index, element)\r
323\r
324 ##\r
325 # Removes a matching subelement. Unlike the <b>find</b> methods,\r
326 # this method compares elements based on identity, not on tag\r
327 # value or contents. To remove subelements by other means, the\r
328 # easiest way is often to use a list comprehension to select what\r
329 # elements to keep, and use slice assignment to update the parent\r
330 # element.\r
331 #\r
332 # @param element What element to remove.\r
333 # @exception ValueError If a matching element could not be found.\r
334\r
335 def remove(self, element):\r
336 # assert iselement(element)\r
337 self._children.remove(element)\r
338\r
339 ##\r
340 # (Deprecated) Returns all subelements. The elements are returned\r
341 # in document order.\r
342 #\r
343 # @return A list of subelements.\r
344 # @defreturn list of Element instances\r
345\r
346 def getchildren(self):\r
347 warnings.warn(\r
348 "This method will be removed in future versions. "\r
349 "Use 'list(elem)' or iteration over elem instead.",\r
350 DeprecationWarning, stacklevel=2\r
351 )\r
352 return self._children\r
353\r
354 ##\r
355 # Finds the first matching subelement, by tag name or path.\r
356 #\r
357 # @param path What element to look for.\r
358 # @keyparam namespaces Optional namespace prefix map.\r
359 # @return The first matching element, or None if no element was found.\r
360 # @defreturn Element or None\r
361\r
362 def find(self, path, namespaces=None):\r
363 return ElementPath.find(self, path, namespaces)\r
364\r
365 ##\r
366 # Finds text for the first matching subelement, by tag name or path.\r
367 #\r
368 # @param path What element to look for.\r
369 # @param default What to return if the element was not found.\r
370 # @keyparam namespaces Optional namespace prefix map.\r
371 # @return The text content of the first matching element, or the\r
372 # default value no element was found. Note that if the element\r
373 # is found, but has no text content, this method returns an\r
374 # empty string.\r
375 # @defreturn string\r
376\r
377 def findtext(self, path, default=None, namespaces=None):\r
378 return ElementPath.findtext(self, path, default, namespaces)\r
379\r
380 ##\r
381 # Finds all matching subelements, by tag name or path.\r
382 #\r
383 # @param path What element to look for.\r
384 # @keyparam namespaces Optional namespace prefix map.\r
385 # @return A list or other sequence containing all matching elements,\r
386 # in document order.\r
387 # @defreturn list of Element instances\r
388\r
389 def findall(self, path, namespaces=None):\r
390 return ElementPath.findall(self, path, namespaces)\r
391\r
392 ##\r
393 # Finds all matching subelements, by tag name or path.\r
394 #\r
395 # @param path What element to look for.\r
396 # @keyparam namespaces Optional namespace prefix map.\r
397 # @return An iterator or sequence containing all matching elements,\r
398 # in document order.\r
399 # @defreturn a generated sequence of Element instances\r
400\r
401 def iterfind(self, path, namespaces=None):\r
402 return ElementPath.iterfind(self, path, namespaces)\r
403\r
404 ##\r
405 # Resets an element. This function removes all subelements, clears\r
406 # all attributes, and sets the <b>text</b> and <b>tail</b> attributes\r
407 # to None.\r
408\r
409 def clear(self):\r
410 self.attrib.clear()\r
411 self._children = []\r
412 self.text = self.tail = None\r
413\r
414 ##\r
415 # Gets an element attribute. Equivalent to <b>attrib.get</b>, but\r
416 # some implementations may handle this a bit more efficiently.\r
417 #\r
418 # @param key What attribute to look for.\r
419 # @param default What to return if the attribute was not found.\r
420 # @return The attribute value, or the default value, if the\r
421 # attribute was not found.\r
422 # @defreturn string or None\r
423\r
424 def get(self, key, default=None):\r
425 return self.attrib.get(key, default)\r
426\r
427 ##\r
428 # Sets an element attribute. Equivalent to <b>attrib[key] = value</b>,\r
429 # but some implementations may handle this a bit more efficiently.\r
430 #\r
431 # @param key What attribute to set.\r
432 # @param value The attribute value.\r
433\r
434 def set(self, key, value):\r
435 self.attrib[key] = value\r
436\r
437 ##\r
438 # Gets a list of attribute names. The names are returned in an\r
439 # arbitrary order (just like for an ordinary Python dictionary).\r
440 # Equivalent to <b>attrib.keys()</b>.\r
441 #\r
442 # @return A list of element attribute names.\r
443 # @defreturn list of strings\r
444\r
445 def keys(self):\r
446 return self.attrib.keys()\r
447\r
448 ##\r
449 # Gets element attributes, as a sequence. The attributes are\r
450 # returned in an arbitrary order. Equivalent to <b>attrib.items()</b>.\r
451 #\r
452 # @return A list of (name, value) tuples for all attributes.\r
453 # @defreturn list of (string, string) tuples\r
454\r
455 def items(self):\r
456 return self.attrib.items()\r
457\r
458 ##\r
459 # Creates a tree iterator. The iterator loops over this element\r
460 # and all subelements, in document order, and returns all elements\r
461 # with a matching tag.\r
462 # <p>\r
463 # If the tree structure is modified during iteration, new or removed\r
464 # elements may or may not be included. To get a stable set, use the\r
465 # list() function on the iterator, and loop over the resulting list.\r
466 #\r
467 # @param tag What tags to look for (default is to return all elements).\r
468 # @return An iterator containing all the matching elements.\r
469 # @defreturn iterator\r
470\r
471 def iter(self, tag=None):\r
472 if tag == "*":\r
473 tag = None\r
474 if tag is None or self.tag == tag:\r
475 yield self\r
476 for e in self._children:\r
477 for e in e.iter(tag):\r
478 yield e\r
479\r
480 # compatibility\r
481 def getiterator(self, tag=None):\r
482 # Change for a DeprecationWarning in 1.4\r
483 warnings.warn(\r
484 "This method will be removed in future versions. "\r
485 "Use 'elem.iter()' or 'list(elem.iter())' instead.",\r
486 PendingDeprecationWarning, stacklevel=2\r
487 )\r
488 return list(self.iter(tag))\r
489\r
490 ##\r
491 # Creates a text iterator. The iterator loops over this element\r
492 # and all subelements, in document order, and returns all inner\r
493 # text.\r
494 #\r
495 # @return An iterator containing all inner text.\r
496 # @defreturn iterator\r
497\r
498 def itertext(self):\r
499 tag = self.tag\r
500 if not isinstance(tag, basestring) and tag is not None:\r
501 return\r
502 if self.text:\r
503 yield self.text\r
504 for e in self:\r
505 for s in e.itertext():\r
506 yield s\r
507 if e.tail:\r
508 yield e.tail\r
509\r
510# compatibility\r
511_Element = _ElementInterface = Element\r
512\r
513##\r
514# Subelement factory. This function creates an element instance, and\r
515# appends it to an existing element.\r
516# <p>\r
517# The element name, attribute names, and attribute values can be\r
518# either 8-bit ASCII strings or Unicode strings.\r
519#\r
520# @param parent The parent element.\r
521# @param tag The subelement name.\r
522# @param attrib An optional dictionary, containing element attributes.\r
523# @param **extra Additional attributes, given as keyword arguments.\r
524# @return An element instance.\r
525# @defreturn Element\r
526\r
527def SubElement(parent, tag, attrib={}, **extra):\r
528 attrib = attrib.copy()\r
529 attrib.update(extra)\r
530 element = parent.makeelement(tag, attrib)\r
531 parent.append(element)\r
532 return element\r
533\r
534##\r
535# Comment element factory. This factory function creates a special\r
536# element that will be serialized as an XML comment by the standard\r
537# serializer.\r
538# <p>\r
539# The comment string can be either an 8-bit ASCII string or a Unicode\r
540# string.\r
541#\r
542# @param text A string containing the comment string.\r
543# @return An element instance, representing a comment.\r
544# @defreturn Element\r
545\r
546def Comment(text=None):\r
547 element = Element(Comment)\r
548 element.text = text\r
549 return element\r
550\r
551##\r
552# PI element factory. This factory function creates a special element\r
553# that will be serialized as an XML processing instruction by the standard\r
554# serializer.\r
555#\r
556# @param target A string containing the PI target.\r
557# @param text A string containing the PI contents, if any.\r
558# @return An element instance, representing a PI.\r
559# @defreturn Element\r
560\r
561def ProcessingInstruction(target, text=None):\r
562 element = Element(ProcessingInstruction)\r
563 element.text = target\r
564 if text:\r
565 element.text = element.text + " " + text\r
566 return element\r
567\r
568PI = ProcessingInstruction\r
569\r
570##\r
571# QName wrapper. This can be used to wrap a QName attribute value, in\r
572# order to get proper namespace handling on output.\r
573#\r
574# @param text A string containing the QName value, in the form {uri}local,\r
575# or, if the tag argument is given, the URI part of a QName.\r
576# @param tag Optional tag. If given, the first argument is interpreted as\r
577# an URI, and this argument is interpreted as a local name.\r
578# @return An opaque object, representing the QName.\r
579\r
580class QName(object):\r
581 def __init__(self, text_or_uri, tag=None):\r
582 if tag:\r
583 text_or_uri = "{%s}%s" % (text_or_uri, tag)\r
584 self.text = text_or_uri\r
585 def __str__(self):\r
586 return self.text\r
587 def __hash__(self):\r
588 return hash(self.text)\r
589 def __cmp__(self, other):\r
590 if isinstance(other, QName):\r
591 return cmp(self.text, other.text)\r
592 return cmp(self.text, other)\r
593\r
594# --------------------------------------------------------------------\r
595\r
596##\r
597# ElementTree wrapper class. This class represents an entire element\r
598# hierarchy, and adds some extra support for serialization to and from\r
599# standard XML.\r
600#\r
601# @param element Optional root element.\r
602# @keyparam file Optional file handle or file name. If given, the\r
603# tree is initialized with the contents of this XML file.\r
604\r
605class ElementTree(object):\r
606\r
607 def __init__(self, element=None, file=None):\r
608 # assert element is None or iselement(element)\r
609 self._root = element # first node\r
610 if file:\r
611 self.parse(file)\r
612\r
613 ##\r
614 # Gets the root element for this tree.\r
615 #\r
616 # @return An element instance.\r
617 # @defreturn Element\r
618\r
619 def getroot(self):\r
620 return self._root\r
621\r
622 ##\r
623 # Replaces the root element for this tree. This discards the\r
624 # current contents of the tree, and replaces it with the given\r
625 # element. Use with care.\r
626 #\r
627 # @param element An element instance.\r
628\r
629 def _setroot(self, element):\r
630 # assert iselement(element)\r
631 self._root = element\r
632\r
633 ##\r
634 # Loads an external XML document into this element tree.\r
635 #\r
636 # @param source A file name or file object. If a file object is\r
637 # given, it only has to implement a <b>read(n)</b> method.\r
638 # @keyparam parser An optional parser instance. If not given, the\r
639 # standard {@link XMLParser} parser is used.\r
640 # @return The document root element.\r
641 # @defreturn Element\r
642 # @exception ParseError If the parser fails to parse the document.\r
643\r
644 def parse(self, source, parser=None):\r
645 close_source = False\r
646 if not hasattr(source, "read"):\r
647 source = open(source, "rb")\r
648 close_source = True\r
649 try:\r
650 if not parser:\r
651 parser = XMLParser(target=TreeBuilder())\r
652 while 1:\r
653 data = source.read(65536)\r
654 if not data:\r
655 break\r
656 parser.feed(data)\r
657 self._root = parser.close()\r
658 return self._root\r
659 finally:\r
660 if close_source:\r
661 source.close()\r
662\r
663 ##\r
664 # Creates a tree iterator for the root element. The iterator loops\r
665 # over all elements in this tree, in document order.\r
666 #\r
667 # @param tag What tags to look for (default is to return all elements)\r
668 # @return An iterator.\r
669 # @defreturn iterator\r
670\r
671 def iter(self, tag=None):\r
672 # assert self._root is not None\r
673 return self._root.iter(tag)\r
674\r
675 # compatibility\r
676 def getiterator(self, tag=None):\r
677 # Change for a DeprecationWarning in 1.4\r
678 warnings.warn(\r
679 "This method will be removed in future versions. "\r
680 "Use 'tree.iter()' or 'list(tree.iter())' instead.",\r
681 PendingDeprecationWarning, stacklevel=2\r
682 )\r
683 return list(self.iter(tag))\r
684\r
685 ##\r
686 # Same as getroot().find(path), starting at the root of the\r
687 # tree.\r
688 #\r
689 # @param path What element to look for.\r
690 # @keyparam namespaces Optional namespace prefix map.\r
691 # @return The first matching element, or None if no element was found.\r
692 # @defreturn Element or None\r
693\r
694 def find(self, path, namespaces=None):\r
695 # assert self._root is not None\r
696 if path[:1] == "/":\r
697 path = "." + path\r
698 warnings.warn(\r
699 "This search is broken in 1.3 and earlier, and will be "\r
700 "fixed in a future version. If you rely on the current "\r
701 "behaviour, change it to %r" % path,\r
702 FutureWarning, stacklevel=2\r
703 )\r
704 return self._root.find(path, namespaces)\r
705\r
706 ##\r
707 # Same as getroot().findtext(path), starting at the root of the tree.\r
708 #\r
709 # @param path What element to look for.\r
710 # @param default What to return if the element was not found.\r
711 # @keyparam namespaces Optional namespace prefix map.\r
712 # @return The text content of the first matching element, or the\r
713 # default value no element was found. Note that if the element\r
714 # is found, but has no text content, this method returns an\r
715 # empty string.\r
716 # @defreturn string\r
717\r
718 def findtext(self, path, default=None, namespaces=None):\r
719 # assert self._root is not None\r
720 if path[:1] == "/":\r
721 path = "." + path\r
722 warnings.warn(\r
723 "This search is broken in 1.3 and earlier, and will be "\r
724 "fixed in a future version. If you rely on the current "\r
725 "behaviour, change it to %r" % path,\r
726 FutureWarning, stacklevel=2\r
727 )\r
728 return self._root.findtext(path, default, namespaces)\r
729\r
730 ##\r
731 # Same as getroot().findall(path), starting at the root of the tree.\r
732 #\r
733 # @param path What element to look for.\r
734 # @keyparam namespaces Optional namespace prefix map.\r
735 # @return A list or iterator containing all matching elements,\r
736 # in document order.\r
737 # @defreturn list of Element instances\r
738\r
739 def findall(self, path, namespaces=None):\r
740 # assert self._root is not None\r
741 if path[:1] == "/":\r
742 path = "." + path\r
743 warnings.warn(\r
744 "This search is broken in 1.3 and earlier, and will be "\r
745 "fixed in a future version. If you rely on the current "\r
746 "behaviour, change it to %r" % path,\r
747 FutureWarning, stacklevel=2\r
748 )\r
749 return self._root.findall(path, namespaces)\r
750\r
751 ##\r
752 # Finds all matching subelements, by tag name or path.\r
753 # Same as getroot().iterfind(path).\r
754 #\r
755 # @param path What element to look for.\r
756 # @keyparam namespaces Optional namespace prefix map.\r
757 # @return An iterator or sequence containing all matching elements,\r
758 # in document order.\r
759 # @defreturn a generated sequence of Element instances\r
760\r
761 def iterfind(self, path, namespaces=None):\r
762 # assert self._root is not None\r
763 if path[:1] == "/":\r
764 path = "." + path\r
765 warnings.warn(\r
766 "This search is broken in 1.3 and earlier, and will be "\r
767 "fixed in a future version. If you rely on the current "\r
768 "behaviour, change it to %r" % path,\r
769 FutureWarning, stacklevel=2\r
770 )\r
771 return self._root.iterfind(path, namespaces)\r
772\r
773 ##\r
774 # Writes the element tree to a file, as XML.\r
775 #\r
776 # @def write(file, **options)\r
777 # @param file A file name, or a file object opened for writing.\r
778 # @param **options Options, given as keyword arguments.\r
779 # @keyparam encoding Optional output encoding (default is US-ASCII).\r
780 # @keyparam xml_declaration Controls if an XML declaration should\r
781 # be added to the file. Use False for never, True for always,\r
782 # None for only if not US-ASCII or UTF-8. None is default.\r
783 # @keyparam default_namespace Sets the default XML namespace (for "xmlns").\r
784 # @keyparam method Optional output method ("xml", "html", "text" or\r
785 # "c14n"; default is "xml").\r
786\r
787 def write(self, file_or_filename,\r
788 # keyword arguments\r
789 encoding=None,\r
790 xml_declaration=None,\r
791 default_namespace=None,\r
792 method=None):\r
793 # assert self._root is not None\r
794 if not method:\r
795 method = "xml"\r
796 elif method not in _serialize:\r
797 # FIXME: raise an ImportError for c14n if ElementC14N is missing?\r
798 raise ValueError("unknown method %r" % method)\r
799 if hasattr(file_or_filename, "write"):\r
800 file = file_or_filename\r
801 else:\r
802 file = open(file_or_filename, "wb")\r
803 write = file.write\r
804 if not encoding:\r
805 if method == "c14n":\r
806 encoding = "utf-8"\r
807 else:\r
808 encoding = "us-ascii"\r
809 elif xml_declaration or (xml_declaration is None and\r
810 encoding not in ("utf-8", "us-ascii")):\r
811 if method == "xml":\r
812 write("<?xml version='1.0' encoding='%s'?>\n" % encoding)\r
813 if method == "text":\r
814 _serialize_text(write, self._root, encoding)\r
815 else:\r
816 qnames, namespaces = _namespaces(\r
817 self._root, encoding, default_namespace\r
818 )\r
819 serialize = _serialize[method]\r
820 serialize(write, self._root, encoding, qnames, namespaces)\r
821 if file_or_filename is not file:\r
822 file.close()\r
823\r
824 def write_c14n(self, file):\r
825 # lxml.etree compatibility. use output method instead\r
826 return self.write(file, method="c14n")\r
827\r
828# --------------------------------------------------------------------\r
829# serialization support\r
830\r
831def _namespaces(elem, encoding, default_namespace=None):\r
832 # identify namespaces used in this tree\r
833\r
834 # maps qnames to *encoded* prefix:local names\r
835 qnames = {None: None}\r
836\r
837 # maps uri:s to prefixes\r
838 namespaces = {}\r
839 if default_namespace:\r
840 namespaces[default_namespace] = ""\r
841\r
842 def encode(text):\r
843 return text.encode(encoding)\r
844\r
845 def add_qname(qname):\r
846 # calculate serialized qname representation\r
847 try:\r
848 if qname[:1] == "{":\r
849 uri, tag = qname[1:].rsplit("}", 1)\r
850 prefix = namespaces.get(uri)\r
851 if prefix is None:\r
852 prefix = _namespace_map.get(uri)\r
853 if prefix is None:\r
854 prefix = "ns%d" % len(namespaces)\r
855 if prefix != "xml":\r
856 namespaces[uri] = prefix\r
857 if prefix:\r
858 qnames[qname] = encode("%s:%s" % (prefix, tag))\r
859 else:\r
860 qnames[qname] = encode(tag) # default element\r
861 else:\r
862 if default_namespace:\r
863 # FIXME: can this be handled in XML 1.0?\r
864 raise ValueError(\r
865 "cannot use non-qualified names with "\r
866 "default_namespace option"\r
867 )\r
868 qnames[qname] = encode(qname)\r
869 except TypeError:\r
870 _raise_serialization_error(qname)\r
871\r
872 # populate qname and namespaces table\r
873 try:\r
874 iterate = elem.iter\r
875 except AttributeError:\r
876 iterate = elem.getiterator # cET compatibility\r
877 for elem in iterate():\r
878 tag = elem.tag\r
879 if isinstance(tag, QName):\r
880 if tag.text not in qnames:\r
881 add_qname(tag.text)\r
882 elif isinstance(tag, basestring):\r
883 if tag not in qnames:\r
884 add_qname(tag)\r
885 elif tag is not None and tag is not Comment and tag is not PI:\r
886 _raise_serialization_error(tag)\r
887 for key, value in elem.items():\r
888 if isinstance(key, QName):\r
889 key = key.text\r
890 if key not in qnames:\r
891 add_qname(key)\r
892 if isinstance(value, QName) and value.text not in qnames:\r
893 add_qname(value.text)\r
894 text = elem.text\r
895 if isinstance(text, QName) and text.text not in qnames:\r
896 add_qname(text.text)\r
897 return qnames, namespaces\r
898\r
899def _serialize_xml(write, elem, encoding, qnames, namespaces):\r
900 tag = elem.tag\r
901 text = elem.text\r
902 if tag is Comment:\r
903 write("<!--%s-->" % _encode(text, encoding))\r
904 elif tag is ProcessingInstruction:\r
905 write("<?%s?>" % _encode(text, encoding))\r
906 else:\r
907 tag = qnames[tag]\r
908 if tag is None:\r
909 if text:\r
910 write(_escape_cdata(text, encoding))\r
911 for e in elem:\r
912 _serialize_xml(write, e, encoding, qnames, None)\r
913 else:\r
914 write("<" + tag)\r
915 items = elem.items()\r
916 if items or namespaces:\r
917 if namespaces:\r
918 for v, k in sorted(namespaces.items(),\r
919 key=lambda x: x[1]): # sort on prefix\r
920 if k:\r
921 k = ":" + k\r
922 write(" xmlns%s=\"%s\"" % (\r
923 k.encode(encoding),\r
924 _escape_attrib(v, encoding)\r
925 ))\r
926 for k, v in sorted(items): # lexical order\r
927 if isinstance(k, QName):\r
928 k = k.text\r
929 if isinstance(v, QName):\r
930 v = qnames[v.text]\r
931 else:\r
932 v = _escape_attrib(v, encoding)\r
933 write(" %s=\"%s\"" % (qnames[k], v))\r
934 if text or len(elem):\r
935 write(">")\r
936 if text:\r
937 write(_escape_cdata(text, encoding))\r
938 for e in elem:\r
939 _serialize_xml(write, e, encoding, qnames, None)\r
940 write("</" + tag + ">")\r
941 else:\r
942 write(" />")\r
943 if elem.tail:\r
944 write(_escape_cdata(elem.tail, encoding))\r
945\r
946HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr",\r
947 "img", "input", "isindex", "link", "meta", "param")\r
948\r
949try:\r
950 HTML_EMPTY = set(HTML_EMPTY)\r
951except NameError:\r
952 pass\r
953\r
954def _serialize_html(write, elem, encoding, qnames, namespaces):\r
955 tag = elem.tag\r
956 text = elem.text\r
957 if tag is Comment:\r
958 write("<!--%s-->" % _escape_cdata(text, encoding))\r
959 elif tag is ProcessingInstruction:\r
960 write("<?%s?>" % _escape_cdata(text, encoding))\r
961 else:\r
962 tag = qnames[tag]\r
963 if tag is None:\r
964 if text:\r
965 write(_escape_cdata(text, encoding))\r
966 for e in elem:\r
967 _serialize_html(write, e, encoding, qnames, None)\r
968 else:\r
969 write("<" + tag)\r
970 items = elem.items()\r
971 if items or namespaces:\r
972 if namespaces:\r
973 for v, k in sorted(namespaces.items(),\r
974 key=lambda x: x[1]): # sort on prefix\r
975 if k:\r
976 k = ":" + k\r
977 write(" xmlns%s=\"%s\"" % (\r
978 k.encode(encoding),\r
979 _escape_attrib(v, encoding)\r
980 ))\r
981 for k, v in sorted(items): # lexical order\r
982 if isinstance(k, QName):\r
983 k = k.text\r
984 if isinstance(v, QName):\r
985 v = qnames[v.text]\r
986 else:\r
987 v = _escape_attrib_html(v, encoding)\r
988 # FIXME: handle boolean attributes\r
989 write(" %s=\"%s\"" % (qnames[k], v))\r
990 write(">")\r
991 ltag = tag.lower()\r
992 if text:\r
993 if ltag == "script" or ltag == "style":\r
994 write(_encode(text, encoding))\r
995 else:\r
996 write(_escape_cdata(text, encoding))\r
997 for e in elem:\r
998 _serialize_html(write, e, encoding, qnames, None)\r
999 if ltag not in HTML_EMPTY:\r
1000 write("</" + tag + ">")\r
1001 if elem.tail:\r
1002 write(_escape_cdata(elem.tail, encoding))\r
1003\r
1004def _serialize_text(write, elem, encoding):\r
1005 for part in elem.itertext():\r
1006 write(part.encode(encoding))\r
1007 if elem.tail:\r
1008 write(elem.tail.encode(encoding))\r
1009\r
1010_serialize = {\r
1011 "xml": _serialize_xml,\r
1012 "html": _serialize_html,\r
1013 "text": _serialize_text,\r
1014# this optional method is imported at the end of the module\r
1015# "c14n": _serialize_c14n,\r
1016}\r
1017\r
1018##\r
1019# Registers a namespace prefix. The registry is global, and any\r
1020# existing mapping for either the given prefix or the namespace URI\r
1021# will be removed.\r
1022#\r
1023# @param prefix Namespace prefix.\r
1024# @param uri Namespace uri. Tags and attributes in this namespace\r
1025# will be serialized with the given prefix, if at all possible.\r
1026# @exception ValueError If the prefix is reserved, or is otherwise\r
1027# invalid.\r
1028\r
1029def register_namespace(prefix, uri):\r
1030 if re.match("ns\d+$", prefix):\r
1031 raise ValueError("Prefix format reserved for internal use")\r
1032 for k, v in _namespace_map.items():\r
1033 if k == uri or v == prefix:\r
1034 del _namespace_map[k]\r
1035 _namespace_map[uri] = prefix\r
1036\r
1037_namespace_map = {\r
1038 # "well-known" namespace prefixes\r
1039 "http://www.w3.org/XML/1998/namespace": "xml",\r
1040 "http://www.w3.org/1999/xhtml": "html",\r
1041 "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",\r
1042 "http://schemas.xmlsoap.org/wsdl/": "wsdl",\r
1043 # xml schema\r
1044 "http://www.w3.org/2001/XMLSchema": "xs",\r
1045 "http://www.w3.org/2001/XMLSchema-instance": "xsi",\r
1046 # dublin core\r
1047 "http://purl.org/dc/elements/1.1/": "dc",\r
1048}\r
1049\r
1050def _raise_serialization_error(text):\r
1051 raise TypeError(\r
1052 "cannot serialize %r (type %s)" % (text, type(text).__name__)\r
1053 )\r
1054\r
1055def _encode(text, encoding):\r
1056 try:\r
1057 return text.encode(encoding, "xmlcharrefreplace")\r
1058 except (TypeError, AttributeError):\r
1059 _raise_serialization_error(text)\r
1060\r
1061def _escape_cdata(text, encoding):\r
1062 # escape character data\r
1063 try:\r
1064 # it's worth avoiding do-nothing calls for strings that are\r
1065 # shorter than 500 character, or so. assume that's, by far,\r
1066 # the most common case in most applications.\r
1067 if "&" in text:\r
1068 text = text.replace("&", "&amp;")\r
1069 if "<" in text:\r
1070 text = text.replace("<", "&lt;")\r
1071 if ">" in text:\r
1072 text = text.replace(">", "&gt;")\r
1073 return text.encode(encoding, "xmlcharrefreplace")\r
1074 except (TypeError, AttributeError):\r
1075 _raise_serialization_error(text)\r
1076\r
1077def _escape_attrib(text, encoding):\r
1078 # escape attribute value\r
1079 try:\r
1080 if "&" in text:\r
1081 text = text.replace("&", "&amp;")\r
1082 if "<" in text:\r
1083 text = text.replace("<", "&lt;")\r
1084 if ">" in text:\r
1085 text = text.replace(">", "&gt;")\r
1086 if "\"" in text:\r
1087 text = text.replace("\"", "&quot;")\r
1088 if "\n" in text:\r
1089 text = text.replace("\n", "&#10;")\r
1090 return text.encode(encoding, "xmlcharrefreplace")\r
1091 except (TypeError, AttributeError):\r
1092 _raise_serialization_error(text)\r
1093\r
1094def _escape_attrib_html(text, encoding):\r
1095 # escape attribute value\r
1096 try:\r
1097 if "&" in text:\r
1098 text = text.replace("&", "&amp;")\r
1099 if ">" in text:\r
1100 text = text.replace(">", "&gt;")\r
1101 if "\"" in text:\r
1102 text = text.replace("\"", "&quot;")\r
1103 return text.encode(encoding, "xmlcharrefreplace")\r
1104 except (TypeError, AttributeError):\r
1105 _raise_serialization_error(text)\r
1106\r
1107# --------------------------------------------------------------------\r
1108\r
1109##\r
1110# Generates a string representation of an XML element, including all\r
1111# subelements.\r
1112#\r
1113# @param element An Element instance.\r
1114# @keyparam encoding Optional output encoding (default is US-ASCII).\r
1115# @keyparam method Optional output method ("xml", "html", "text" or\r
1116# "c14n"; default is "xml").\r
1117# @return An encoded string containing the XML data.\r
1118# @defreturn string\r
1119\r
1120def tostring(element, encoding=None, method=None):\r
1121 class dummy:\r
1122 pass\r
1123 data = []\r
1124 file = dummy()\r
1125 file.write = data.append\r
1126 ElementTree(element).write(file, encoding, method=method)\r
1127 return "".join(data)\r
1128\r
1129##\r
1130# Generates a string representation of an XML element, including all\r
1131# subelements. The string is returned as a sequence of string fragments.\r
1132#\r
1133# @param element An Element instance.\r
1134# @keyparam encoding Optional output encoding (default is US-ASCII).\r
1135# @keyparam method Optional output method ("xml", "html", "text" or\r
1136# "c14n"; default is "xml").\r
1137# @return A sequence object containing the XML data.\r
1138# @defreturn sequence\r
1139# @since 1.3\r
1140\r
1141def tostringlist(element, encoding=None, method=None):\r
1142 class dummy:\r
1143 pass\r
1144 data = []\r
1145 file = dummy()\r
1146 file.write = data.append\r
1147 ElementTree(element).write(file, encoding, method=method)\r
1148 # FIXME: merge small fragments into larger parts\r
1149 return data\r
1150\r
1151##\r
1152# Writes an element tree or element structure to sys.stdout. This\r
1153# function should be used for debugging only.\r
1154# <p>\r
1155# The exact output format is implementation dependent. In this\r
1156# version, it's written as an ordinary XML file.\r
1157#\r
1158# @param elem An element tree or an individual element.\r
1159\r
1160def dump(elem):\r
1161 # debugging\r
1162 if not isinstance(elem, ElementTree):\r
1163 elem = ElementTree(elem)\r
1164 elem.write(sys.stdout)\r
1165 tail = elem.getroot().tail\r
1166 if not tail or tail[-1] != "\n":\r
1167 sys.stdout.write("\n")\r
1168\r
1169# --------------------------------------------------------------------\r
1170# parsing\r
1171\r
1172##\r
1173# Parses an XML document into an element tree.\r
1174#\r
1175# @param source A filename or file object containing XML data.\r
1176# @param parser An optional parser instance. If not given, the\r
1177# standard {@link XMLParser} parser is used.\r
1178# @return An ElementTree instance\r
1179\r
1180def parse(source, parser=None):\r
1181 tree = ElementTree()\r
1182 tree.parse(source, parser)\r
1183 return tree\r
1184\r
1185##\r
1186# Parses an XML document into an element tree incrementally, and reports\r
1187# what's going on to the user.\r
1188#\r
1189# @param source A filename or file object containing XML data.\r
1190# @param events A list of events to report back. If omitted, only "end"\r
1191# events are reported.\r
1192# @param parser An optional parser instance. If not given, the\r
1193# standard {@link XMLParser} parser is used.\r
1194# @return A (event, elem) iterator.\r
1195\r
1196def iterparse(source, events=None, parser=None):\r
1197 close_source = False\r
1198 if not hasattr(source, "read"):\r
1199 source = open(source, "rb")\r
1200 close_source = True\r
1201 if not parser:\r
1202 parser = XMLParser(target=TreeBuilder())\r
1203 return _IterParseIterator(source, events, parser, close_source)\r
1204\r
1205class _IterParseIterator(object):\r
1206\r
1207 def __init__(self, source, events, parser, close_source=False):\r
1208 self._file = source\r
1209 self._close_file = close_source\r
1210 self._events = []\r
1211 self._index = 0\r
1212 self._error = None\r
1213 self.root = self._root = None\r
1214 self._parser = parser\r
1215 # wire up the parser for event reporting\r
1216 parser = self._parser._parser\r
1217 append = self._events.append\r
1218 if events is None:\r
1219 events = ["end"]\r
1220 for event in events:\r
1221 if event == "start":\r
1222 try:\r
1223 parser.ordered_attributes = 1\r
1224 parser.specified_attributes = 1\r
1225 def handler(tag, attrib_in, event=event, append=append,\r
1226 start=self._parser._start_list):\r
1227 append((event, start(tag, attrib_in)))\r
1228 parser.StartElementHandler = handler\r
1229 except AttributeError:\r
1230 def handler(tag, attrib_in, event=event, append=append,\r
1231 start=self._parser._start):\r
1232 append((event, start(tag, attrib_in)))\r
1233 parser.StartElementHandler = handler\r
1234 elif event == "end":\r
1235 def handler(tag, event=event, append=append,\r
1236 end=self._parser._end):\r
1237 append((event, end(tag)))\r
1238 parser.EndElementHandler = handler\r
1239 elif event == "start-ns":\r
1240 def handler(prefix, uri, event=event, append=append):\r
1241 try:\r
1242 uri = (uri or "").encode("ascii")\r
1243 except UnicodeError:\r
1244 pass\r
1245 append((event, (prefix or "", uri or "")))\r
1246 parser.StartNamespaceDeclHandler = handler\r
1247 elif event == "end-ns":\r
1248 def handler(prefix, event=event, append=append):\r
1249 append((event, None))\r
1250 parser.EndNamespaceDeclHandler = handler\r
1251 else:\r
1252 raise ValueError("unknown event %r" % event)\r
1253\r
1254 def next(self):\r
1255 while 1:\r
1256 try:\r
1257 item = self._events[self._index]\r
1258 self._index += 1\r
1259 return item\r
1260 except IndexError:\r
1261 pass\r
1262 if self._error:\r
1263 e = self._error\r
1264 self._error = None\r
1265 raise e\r
1266 if self._parser is None:\r
1267 self.root = self._root\r
1268 if self._close_file:\r
1269 self._file.close()\r
1270 raise StopIteration\r
1271 # load event buffer\r
1272 del self._events[:]\r
1273 self._index = 0\r
1274 data = self._file.read(16384)\r
1275 if data:\r
1276 try:\r
1277 self._parser.feed(data)\r
1278 except SyntaxError as exc:\r
1279 self._error = exc\r
1280 else:\r
1281 self._root = self._parser.close()\r
1282 self._parser = None\r
1283\r
1284 def __iter__(self):\r
1285 return self\r
1286\r
1287##\r
1288# Parses an XML document from a string constant. This function can\r
1289# be used to embed "XML literals" in Python code.\r
1290#\r
1291# @param source A string containing XML data.\r
1292# @param parser An optional parser instance. If not given, the\r
1293# standard {@link XMLParser} parser is used.\r
1294# @return An Element instance.\r
1295# @defreturn Element\r
1296\r
1297def XML(text, parser=None):\r
1298 if not parser:\r
1299 parser = XMLParser(target=TreeBuilder())\r
1300 parser.feed(text)\r
1301 return parser.close()\r
1302\r
1303##\r
1304# Parses an XML document from a string constant, and also returns\r
1305# a dictionary which maps from element id:s to elements.\r
1306#\r
1307# @param source A string containing XML data.\r
1308# @param parser An optional parser instance. If not given, the\r
1309# standard {@link XMLParser} parser is used.\r
1310# @return A tuple containing an Element instance and a dictionary.\r
1311# @defreturn (Element, dictionary)\r
1312\r
1313def XMLID(text, parser=None):\r
1314 if not parser:\r
1315 parser = XMLParser(target=TreeBuilder())\r
1316 parser.feed(text)\r
1317 tree = parser.close()\r
1318 ids = {}\r
1319 for elem in tree.iter():\r
1320 id = elem.get("id")\r
1321 if id:\r
1322 ids[id] = elem\r
1323 return tree, ids\r
1324\r
1325##\r
1326# Parses an XML document from a string constant. Same as {@link #XML}.\r
1327#\r
1328# @def fromstring(text)\r
1329# @param source A string containing XML data.\r
1330# @return An Element instance.\r
1331# @defreturn Element\r
1332\r
1333fromstring = XML\r
1334\r
1335##\r
1336# Parses an XML document from a sequence of string fragments.\r
1337#\r
1338# @param sequence A list or other sequence containing XML data fragments.\r
1339# @param parser An optional parser instance. If not given, the\r
1340# standard {@link XMLParser} parser is used.\r
1341# @return An Element instance.\r
1342# @defreturn Element\r
1343# @since 1.3\r
1344\r
1345def fromstringlist(sequence, parser=None):\r
1346 if not parser:\r
1347 parser = XMLParser(target=TreeBuilder())\r
1348 for text in sequence:\r
1349 parser.feed(text)\r
1350 return parser.close()\r
1351\r
1352# --------------------------------------------------------------------\r
1353\r
1354##\r
1355# Generic element structure builder. This builder converts a sequence\r
1356# of {@link #TreeBuilder.start}, {@link #TreeBuilder.data}, and {@link\r
1357# #TreeBuilder.end} method calls to a well-formed element structure.\r
1358# <p>\r
1359# You can use this class to build an element structure using a custom XML\r
1360# parser, or a parser for some other XML-like format.\r
1361#\r
1362# @param element_factory Optional element factory. This factory\r
1363# is called to create new Element instances, as necessary.\r
1364\r
1365class TreeBuilder(object):\r
1366\r
1367 def __init__(self, element_factory=None):\r
1368 self._data = [] # data collector\r
1369 self._elem = [] # element stack\r
1370 self._last = None # last element\r
1371 self._tail = None # true if we're after an end tag\r
1372 if element_factory is None:\r
1373 element_factory = Element\r
1374 self._factory = element_factory\r
1375\r
1376 ##\r
1377 # Flushes the builder buffers, and returns the toplevel document\r
1378 # element.\r
1379 #\r
1380 # @return An Element instance.\r
1381 # @defreturn Element\r
1382\r
1383 def close(self):\r
1384 assert len(self._elem) == 0, "missing end tags"\r
1385 assert self._last is not None, "missing toplevel element"\r
1386 return self._last\r
1387\r
1388 def _flush(self):\r
1389 if self._data:\r
1390 if self._last is not None:\r
1391 text = "".join(self._data)\r
1392 if self._tail:\r
1393 assert self._last.tail is None, "internal error (tail)"\r
1394 self._last.tail = text\r
1395 else:\r
1396 assert self._last.text is None, "internal error (text)"\r
1397 self._last.text = text\r
1398 self._data = []\r
1399\r
1400 ##\r
1401 # Adds text to the current element.\r
1402 #\r
1403 # @param data A string. This should be either an 8-bit string\r
1404 # containing ASCII text, or a Unicode string.\r
1405\r
1406 def data(self, data):\r
1407 self._data.append(data)\r
1408\r
1409 ##\r
1410 # Opens a new element.\r
1411 #\r
1412 # @param tag The element name.\r
1413 # @param attrib A dictionary containing element attributes.\r
1414 # @return The opened element.\r
1415 # @defreturn Element\r
1416\r
1417 def start(self, tag, attrs):\r
1418 self._flush()\r
1419 self._last = elem = self._factory(tag, attrs)\r
1420 if self._elem:\r
1421 self._elem[-1].append(elem)\r
1422 self._elem.append(elem)\r
1423 self._tail = 0\r
1424 return elem\r
1425\r
1426 ##\r
1427 # Closes the current element.\r
1428 #\r
1429 # @param tag The element name.\r
1430 # @return The closed element.\r
1431 # @defreturn Element\r
1432\r
1433 def end(self, tag):\r
1434 self._flush()\r
1435 self._last = self._elem.pop()\r
1436 assert self._last.tag == tag,\\r
1437 "end tag mismatch (expected %s, got %s)" % (\r
1438 self._last.tag, tag)\r
1439 self._tail = 1\r
1440 return self._last\r
1441\r
1442##\r
1443# Element structure builder for XML source data, based on the\r
1444# <b>expat</b> parser.\r
1445#\r
1446# @keyparam target Target object. If omitted, the builder uses an\r
1447# instance of the standard {@link #TreeBuilder} class.\r
1448# @keyparam html Predefine HTML entities. This flag is not supported\r
1449# by the current implementation.\r
1450# @keyparam encoding Optional encoding. If given, the value overrides\r
1451# the encoding specified in the XML file.\r
1452# @see #ElementTree\r
1453# @see #TreeBuilder\r
1454\r
1455class XMLParser(object):\r
1456\r
1457 def __init__(self, html=0, target=None, encoding=None):\r
1458 try:\r
1459 from xml.parsers import expat\r
1460 except ImportError:\r
1461 try:\r
1462 import pyexpat as expat\r
1463 except ImportError:\r
1464 raise ImportError(\r
1465 "No module named expat; use SimpleXMLTreeBuilder instead"\r
1466 )\r
1467 parser = expat.ParserCreate(encoding, "}")\r
1468 if target is None:\r
1469 target = TreeBuilder()\r
1470 # underscored names are provided for compatibility only\r
1471 self.parser = self._parser = parser\r
1472 self.target = self._target = target\r
1473 self._error = expat.error\r
1474 self._names = {} # name memo cache\r
1475 # callbacks\r
1476 parser.DefaultHandlerExpand = self._default\r
1477 parser.StartElementHandler = self._start\r
1478 parser.EndElementHandler = self._end\r
1479 parser.CharacterDataHandler = self._data\r
1480 # optional callbacks\r
1481 parser.CommentHandler = self._comment\r
1482 parser.ProcessingInstructionHandler = self._pi\r
1483 # let expat do the buffering, if supported\r
1484 try:\r
1485 self._parser.buffer_text = 1\r
1486 except AttributeError:\r
1487 pass\r
1488 # use new-style attribute handling, if supported\r
1489 try:\r
1490 self._parser.ordered_attributes = 1\r
1491 self._parser.specified_attributes = 1\r
1492 parser.StartElementHandler = self._start_list\r
1493 except AttributeError:\r
1494 pass\r
1495 self._doctype = None\r
1496 self.entity = {}\r
1497 try:\r
1498 self.version = "Expat %d.%d.%d" % expat.version_info\r
1499 except AttributeError:\r
1500 pass # unknown\r
1501\r
1502 def _raiseerror(self, value):\r
1503 err = ParseError(value)\r
1504 err.code = value.code\r
1505 err.position = value.lineno, value.offset\r
1506 raise err\r
1507\r
1508 def _fixtext(self, text):\r
1509 # convert text string to ascii, if possible\r
1510 try:\r
1511 return text.encode("ascii")\r
1512 except UnicodeError:\r
1513 return text\r
1514\r
1515 def _fixname(self, key):\r
1516 # expand qname, and convert name string to ascii, if possible\r
1517 try:\r
1518 name = self._names[key]\r
1519 except KeyError:\r
1520 name = key\r
1521 if "}" in name:\r
1522 name = "{" + name\r
1523 self._names[key] = name = self._fixtext(name)\r
1524 return name\r
1525\r
1526 def _start(self, tag, attrib_in):\r
1527 fixname = self._fixname\r
1528 fixtext = self._fixtext\r
1529 tag = fixname(tag)\r
1530 attrib = {}\r
1531 for key, value in attrib_in.items():\r
1532 attrib[fixname(key)] = fixtext(value)\r
1533 return self.target.start(tag, attrib)\r
1534\r
1535 def _start_list(self, tag, attrib_in):\r
1536 fixname = self._fixname\r
1537 fixtext = self._fixtext\r
1538 tag = fixname(tag)\r
1539 attrib = {}\r
1540 if attrib_in:\r
1541 for i in range(0, len(attrib_in), 2):\r
1542 attrib[fixname(attrib_in[i])] = fixtext(attrib_in[i+1])\r
1543 return self.target.start(tag, attrib)\r
1544\r
1545 def _data(self, text):\r
1546 return self.target.data(self._fixtext(text))\r
1547\r
1548 def _end(self, tag):\r
1549 return self.target.end(self._fixname(tag))\r
1550\r
1551 def _comment(self, data):\r
1552 try:\r
1553 comment = self.target.comment\r
1554 except AttributeError:\r
1555 pass\r
1556 else:\r
1557 return comment(self._fixtext(data))\r
1558\r
1559 def _pi(self, target, data):\r
1560 try:\r
1561 pi = self.target.pi\r
1562 except AttributeError:\r
1563 pass\r
1564 else:\r
1565 return pi(self._fixtext(target), self._fixtext(data))\r
1566\r
1567 def _default(self, text):\r
1568 prefix = text[:1]\r
1569 if prefix == "&":\r
1570 # deal with undefined entities\r
1571 try:\r
1572 self.target.data(self.entity[text[1:-1]])\r
1573 except KeyError:\r
1574 from xml.parsers import expat\r
1575 err = expat.error(\r
1576 "undefined entity %s: line %d, column %d" %\r
1577 (text, self._parser.ErrorLineNumber,\r
1578 self._parser.ErrorColumnNumber)\r
1579 )\r
1580 err.code = 11 # XML_ERROR_UNDEFINED_ENTITY\r
1581 err.lineno = self._parser.ErrorLineNumber\r
1582 err.offset = self._parser.ErrorColumnNumber\r
1583 raise err\r
1584 elif prefix == "<" and text[:9] == "<!DOCTYPE":\r
1585 self._doctype = [] # inside a doctype declaration\r
1586 elif self._doctype is not None:\r
1587 # parse doctype contents\r
1588 if prefix == ">":\r
1589 self._doctype = None\r
1590 return\r
1591 text = text.strip()\r
1592 if not text:\r
1593 return\r
1594 self._doctype.append(text)\r
1595 n = len(self._doctype)\r
1596 if n > 2:\r
1597 type = self._doctype[1]\r
1598 if type == "PUBLIC" and n == 4:\r
1599 name, type, pubid, system = self._doctype\r
1600 elif type == "SYSTEM" and n == 3:\r
1601 name, type, system = self._doctype\r
1602 pubid = None\r
1603 else:\r
1604 return\r
1605 if pubid:\r
1606 pubid = pubid[1:-1]\r
1607 if hasattr(self.target, "doctype"):\r
1608 self.target.doctype(name, pubid, system[1:-1])\r
1609 elif self.doctype is not self._XMLParser__doctype:\r
1610 # warn about deprecated call\r
1611 self._XMLParser__doctype(name, pubid, system[1:-1])\r
1612 self.doctype(name, pubid, system[1:-1])\r
1613 self._doctype = None\r
1614\r
1615 ##\r
1616 # (Deprecated) Handles a doctype declaration.\r
1617 #\r
1618 # @param name Doctype name.\r
1619 # @param pubid Public identifier.\r
1620 # @param system System identifier.\r
1621\r
1622 def doctype(self, name, pubid, system):\r
1623 """This method of XMLParser is deprecated."""\r
1624 warnings.warn(\r
1625 "This method of XMLParser is deprecated. Define doctype() "\r
1626 "method on the TreeBuilder target.",\r
1627 DeprecationWarning,\r
1628 )\r
1629\r
1630 # sentinel, if doctype is redefined in a subclass\r
1631 __doctype = doctype\r
1632\r
1633 ##\r
1634 # Feeds data to the parser.\r
1635 #\r
1636 # @param data Encoded data.\r
1637\r
1638 def feed(self, data):\r
1639 try:\r
1640 self._parser.Parse(data, 0)\r
1641 except self._error, v:\r
1642 self._raiseerror(v)\r
1643\r
1644 ##\r
1645 # Finishes feeding data to the parser.\r
1646 #\r
1647 # @return An element structure.\r
1648 # @defreturn Element\r
1649\r
1650 def close(self):\r
1651 try:\r
1652 self._parser.Parse("", 1) # end of data\r
1653 except self._error, v:\r
1654 self._raiseerror(v)\r
1655 tree = self.target.close()\r
1656 del self.target, self._parser # get rid of circular references\r
1657 return tree\r
1658\r
1659# compatibility\r
1660XMLTreeBuilder = XMLParser\r
1661\r
1662# workaround circular import.\r
1663try:\r
1664 from ElementC14N import _serialize_c14n\r
1665 _serialize["c14n"] = _serialize_c14n\r
1666except ImportError:\r
1667 pass\r