]> git.proxmox.com Git - mirror_edk2.git/blob - AppPkg/Applications/Python/Python-2.7.2/Lib/xml/etree/ElementTree.py
EmbeddedPkg: Extend NvVarStoreFormattedLib LIBRARY_CLASS
[mirror_edk2.git] / AppPkg / Applications / Python / Python-2.7.2 / Lib / xml / etree / ElementTree.py
1 #
2 # ElementTree
3 # $Id: ElementTree.py 3440 2008-07-18 14:45:01Z fredrik $
4 #
5 # light-weight XML support for Python 2.3 and later.
6 #
7 # history (since 1.2.6):
8 # 2005-11-12 fl added tostringlist/fromstringlist helpers
9 # 2006-07-05 fl merged in selected changes from the 1.3 sandbox
10 # 2006-07-05 fl removed support for 2.1 and earlier
11 # 2007-06-21 fl added deprecation/future warnings
12 # 2007-08-25 fl added doctype hook, added parser version attribute etc
13 # 2007-08-26 fl added new serializer code (better namespace handling, etc)
14 # 2007-08-27 fl warn for broken /tag searches on tree level
15 # 2007-09-02 fl added html/text methods to serializer (experimental)
16 # 2007-09-05 fl added method argument to tostring/tostringlist
17 # 2007-09-06 fl improved error handling
18 # 2007-09-13 fl added itertext, iterfind; assorted cleanups
19 # 2007-12-15 fl added C14N hooks, copy method (experimental)
20 #
21 # Copyright (c) 1999-2008 by Fredrik Lundh. All rights reserved.
22 #
23 # fredrik@pythonware.com
24 # http://www.pythonware.com
25 #
26 # --------------------------------------------------------------------
27 # The ElementTree toolkit is
28 #
29 # Copyright (c) 1999-2008 by Fredrik Lundh
30 #
31 # By obtaining, using, and/or copying this software and/or its
32 # associated documentation, you agree that you have read, understood,
33 # and will comply with the following terms and conditions:
34 #
35 # Permission to use, copy, modify, and distribute this software and
36 # its associated documentation for any purpose and without fee is
37 # hereby granted, provided that the above copyright notice appears in
38 # all copies, and that both that copyright notice and this permission
39 # notice appear in supporting documentation, and that the name of
40 # Secret Labs AB or the author not be used in advertising or publicity
41 # pertaining to distribution of the software without specific, written
42 # prior permission.
43 #
44 # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
45 # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
46 # ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
47 # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
48 # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
49 # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
50 # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
51 # OF THIS SOFTWARE.
52 # --------------------------------------------------------------------
53
54 # Licensed to PSF under a Contributor Agreement.
55 # See http://www.python.org/psf/license for licensing details.
56
57 __all__ = [
58 # public symbols
59 "Comment",
60 "dump",
61 "Element", "ElementTree",
62 "fromstring", "fromstringlist",
63 "iselement", "iterparse",
64 "parse", "ParseError",
65 "PI", "ProcessingInstruction",
66 "QName",
67 "SubElement",
68 "tostring", "tostringlist",
69 "TreeBuilder",
70 "VERSION",
71 "XML",
72 "XMLParser", "XMLTreeBuilder",
73 ]
74
75 VERSION = "1.3.0"
76
77 ##
78 # The <b>Element</b> type is a flexible container object, designed to
79 # store hierarchical data structures in memory. The type can be
80 # described as a cross between a list and a dictionary.
81 # <p>
82 # Each element has a number of properties associated with it:
83 # <ul>
84 # <li>a <i>tag</i>. This is a string identifying what kind of data
85 # this element represents (the element type, in other words).</li>
86 # <li>a number of <i>attributes</i>, stored in a Python dictionary.</li>
87 # <li>a <i>text</i> string.</li>
88 # <li>an optional <i>tail</i> string.</li>
89 # <li>a number of <i>child elements</i>, stored in a Python sequence</li>
90 # </ul>
91 #
92 # To create an element instance, use the {@link #Element} constructor
93 # or the {@link #SubElement} factory function.
94 # <p>
95 # The {@link #ElementTree} class can be used to wrap an element
96 # structure, and convert it from and to XML.
97 ##
98
99 import sys
100 import re
101 import warnings
102
103
104 class _SimpleElementPath(object):
105 # emulate pre-1.2 find/findtext/findall behaviour
106 def find(self, element, tag, namespaces=None):
107 for elem in element:
108 if elem.tag == tag:
109 return elem
110 return None
111 def findtext(self, element, tag, default=None, namespaces=None):
112 elem = self.find(element, tag)
113 if elem is None:
114 return default
115 return elem.text or ""
116 def iterfind(self, element, tag, namespaces=None):
117 if tag[:3] == ".//":
118 for elem in element.iter(tag[3:]):
119 yield elem
120 for elem in element:
121 if elem.tag == tag:
122 yield elem
123 def findall(self, element, tag, namespaces=None):
124 return list(self.iterfind(element, tag, namespaces))
125
126 try:
127 from . import ElementPath
128 except ImportError:
129 ElementPath = _SimpleElementPath()
130
131 ##
132 # Parser error. This is a subclass of <b>SyntaxError</b>.
133 # <p>
134 # In addition to the exception value, an exception instance contains a
135 # specific exception code in the <b>code</b> attribute, and the line and
136 # column of the error in the <b>position</b> attribute.
137
138 class ParseError(SyntaxError):
139 pass
140
141 # --------------------------------------------------------------------
142
143 ##
144 # Checks if an object appears to be a valid element object.
145 #
146 # @param An element instance.
147 # @return A true value if this is an element object.
148 # @defreturn flag
149
150 def iselement(element):
151 # FIXME: not sure about this; might be a better idea to look
152 # for tag/attrib/text attributes
153 return isinstance(element, Element) or hasattr(element, "tag")
154
155 ##
156 # Element class. This class defines the Element interface, and
157 # provides a reference implementation of this interface.
158 # <p>
159 # The element name, attribute names, and attribute values can be
160 # either ASCII strings (ordinary Python strings containing only 7-bit
161 # ASCII characters) or Unicode strings.
162 #
163 # @param tag The element name.
164 # @param attrib An optional dictionary, containing element attributes.
165 # @param **extra Additional attributes, given as keyword arguments.
166 # @see Element
167 # @see SubElement
168 # @see Comment
169 # @see ProcessingInstruction
170
171 class Element(object):
172 # <tag attrib>text<child/>...</tag>tail
173
174 ##
175 # (Attribute) Element tag.
176
177 tag = None
178
179 ##
180 # (Attribute) Element attribute dictionary. Where possible, use
181 # {@link #Element.get},
182 # {@link #Element.set},
183 # {@link #Element.keys}, and
184 # {@link #Element.items} to access
185 # element attributes.
186
187 attrib = None
188
189 ##
190 # (Attribute) Text before first subelement. This is either a
191 # string or the value None. Note that if there was no text, this
192 # attribute may be either None or an empty string, depending on
193 # the parser.
194
195 text = None
196
197 ##
198 # (Attribute) Text after this element's end tag, but before the
199 # next sibling element's start tag. This is either a string or
200 # the value None. Note that if there was no text, this attribute
201 # may be either None or an empty string, depending on the parser.
202
203 tail = None # text after end tag, if any
204
205 # constructor
206
207 def __init__(self, tag, attrib={}, **extra):
208 attrib = attrib.copy()
209 attrib.update(extra)
210 self.tag = tag
211 self.attrib = attrib
212 self._children = []
213
214 def __repr__(self):
215 return "<Element %s at 0x%x>" % (repr(self.tag), id(self))
216
217 ##
218 # Creates a new element object of the same type as this element.
219 #
220 # @param tag Element tag.
221 # @param attrib Element attributes, given as a dictionary.
222 # @return A new element instance.
223
224 def makeelement(self, tag, attrib):
225 return self.__class__(tag, attrib)
226
227 ##
228 # (Experimental) Copies the current element. This creates a
229 # shallow copy; subelements will be shared with the original tree.
230 #
231 # @return A new element instance.
232
233 def copy(self):
234 elem = self.makeelement(self.tag, self.attrib)
235 elem.text = self.text
236 elem.tail = self.tail
237 elem[:] = self
238 return elem
239
240 ##
241 # Returns the number of subelements. Note that this only counts
242 # full elements; to check if there's any content in an element, you
243 # have to check both the length and the <b>text</b> attribute.
244 #
245 # @return The number of subelements.
246
247 def __len__(self):
248 return len(self._children)
249
250 def __nonzero__(self):
251 warnings.warn(
252 "The behavior of this method will change in future versions. "
253 "Use specific 'len(elem)' or 'elem is not None' test instead.",
254 FutureWarning, stacklevel=2
255 )
256 return len(self._children) != 0 # emulate old behaviour, for now
257
258 ##
259 # Returns the given subelement, by index.
260 #
261 # @param index What subelement to return.
262 # @return The given subelement.
263 # @exception IndexError If the given element does not exist.
264
265 def __getitem__(self, index):
266 return self._children[index]
267
268 ##
269 # Replaces the given subelement, by index.
270 #
271 # @param index What subelement to replace.
272 # @param element The new element value.
273 # @exception IndexError If the given element does not exist.
274
275 def __setitem__(self, index, element):
276 # if isinstance(index, slice):
277 # for elt in element:
278 # assert iselement(elt)
279 # else:
280 # assert iselement(element)
281 self._children[index] = element
282
283 ##
284 # Deletes the given subelement, by index.
285 #
286 # @param index What subelement to delete.
287 # @exception IndexError If the given element does not exist.
288
289 def __delitem__(self, index):
290 del self._children[index]
291
292 ##
293 # Adds a subelement to the end of this element. In document order,
294 # the new element will appear after the last existing subelement (or
295 # directly after the text, if it's the first subelement), but before
296 # the end tag for this element.
297 #
298 # @param element The element to add.
299
300 def append(self, element):
301 # assert iselement(element)
302 self._children.append(element)
303
304 ##
305 # Appends subelements from a sequence.
306 #
307 # @param elements A sequence object with zero or more elements.
308 # @since 1.3
309
310 def extend(self, elements):
311 # for element in elements:
312 # assert iselement(element)
313 self._children.extend(elements)
314
315 ##
316 # Inserts a subelement at the given position in this element.
317 #
318 # @param index Where to insert the new subelement.
319
320 def insert(self, index, element):
321 # assert iselement(element)
322 self._children.insert(index, element)
323
324 ##
325 # Removes a matching subelement. Unlike the <b>find</b> methods,
326 # this method compares elements based on identity, not on tag
327 # value or contents. To remove subelements by other means, the
328 # easiest way is often to use a list comprehension to select what
329 # elements to keep, and use slice assignment to update the parent
330 # element.
331 #
332 # @param element What element to remove.
333 # @exception ValueError If a matching element could not be found.
334
335 def remove(self, element):
336 # assert iselement(element)
337 self._children.remove(element)
338
339 ##
340 # (Deprecated) Returns all subelements. The elements are returned
341 # in document order.
342 #
343 # @return A list of subelements.
344 # @defreturn list of Element instances
345
346 def getchildren(self):
347 warnings.warn(
348 "This method will be removed in future versions. "
349 "Use 'list(elem)' or iteration over elem instead.",
350 DeprecationWarning, stacklevel=2
351 )
352 return self._children
353
354 ##
355 # Finds the first matching subelement, by tag name or path.
356 #
357 # @param path What element to look for.
358 # @keyparam namespaces Optional namespace prefix map.
359 # @return The first matching element, or None if no element was found.
360 # @defreturn Element or None
361
362 def find(self, path, namespaces=None):
363 return ElementPath.find(self, path, namespaces)
364
365 ##
366 # Finds text for the first matching subelement, by tag name or path.
367 #
368 # @param path What element to look for.
369 # @param default What to return if the element was not found.
370 # @keyparam namespaces Optional namespace prefix map.
371 # @return The text content of the first matching element, or the
372 # default value no element was found. Note that if the element
373 # is found, but has no text content, this method returns an
374 # empty string.
375 # @defreturn string
376
377 def findtext(self, path, default=None, namespaces=None):
378 return ElementPath.findtext(self, path, default, namespaces)
379
380 ##
381 # Finds all matching subelements, by tag name or path.
382 #
383 # @param path What element to look for.
384 # @keyparam namespaces Optional namespace prefix map.
385 # @return A list or other sequence containing all matching elements,
386 # in document order.
387 # @defreturn list of Element instances
388
389 def findall(self, path, namespaces=None):
390 return ElementPath.findall(self, path, namespaces)
391
392 ##
393 # Finds all matching subelements, by tag name or path.
394 #
395 # @param path What element to look for.
396 # @keyparam namespaces Optional namespace prefix map.
397 # @return An iterator or sequence containing all matching elements,
398 # in document order.
399 # @defreturn a generated sequence of Element instances
400
401 def iterfind(self, path, namespaces=None):
402 return ElementPath.iterfind(self, path, namespaces)
403
404 ##
405 # Resets an element. This function removes all subelements, clears
406 # all attributes, and sets the <b>text</b> and <b>tail</b> attributes
407 # to None.
408
409 def clear(self):
410 self.attrib.clear()
411 self._children = []
412 self.text = self.tail = None
413
414 ##
415 # Gets an element attribute. Equivalent to <b>attrib.get</b>, but
416 # some implementations may handle this a bit more efficiently.
417 #
418 # @param key What attribute to look for.
419 # @param default What to return if the attribute was not found.
420 # @return The attribute value, or the default value, if the
421 # attribute was not found.
422 # @defreturn string or None
423
424 def get(self, key, default=None):
425 return self.attrib.get(key, default)
426
427 ##
428 # Sets an element attribute. Equivalent to <b>attrib[key] = value</b>,
429 # but some implementations may handle this a bit more efficiently.
430 #
431 # @param key What attribute to set.
432 # @param value The attribute value.
433
434 def set(self, key, value):
435 self.attrib[key] = value
436
437 ##
438 # Gets a list of attribute names. The names are returned in an
439 # arbitrary order (just like for an ordinary Python dictionary).
440 # Equivalent to <b>attrib.keys()</b>.
441 #
442 # @return A list of element attribute names.
443 # @defreturn list of strings
444
445 def keys(self):
446 return self.attrib.keys()
447
448 ##
449 # Gets element attributes, as a sequence. The attributes are
450 # returned in an arbitrary order. Equivalent to <b>attrib.items()</b>.
451 #
452 # @return A list of (name, value) tuples for all attributes.
453 # @defreturn list of (string, string) tuples
454
455 def items(self):
456 return self.attrib.items()
457
458 ##
459 # Creates a tree iterator. The iterator loops over this element
460 # and all subelements, in document order, and returns all elements
461 # with a matching tag.
462 # <p>
463 # If the tree structure is modified during iteration, new or removed
464 # elements may or may not be included. To get a stable set, use the
465 # list() function on the iterator, and loop over the resulting list.
466 #
467 # @param tag What tags to look for (default is to return all elements).
468 # @return An iterator containing all the matching elements.
469 # @defreturn iterator
470
471 def iter(self, tag=None):
472 if tag == "*":
473 tag = None
474 if tag is None or self.tag == tag:
475 yield self
476 for e in self._children:
477 for e in e.iter(tag):
478 yield e
479
480 # compatibility
481 def getiterator(self, tag=None):
482 # Change for a DeprecationWarning in 1.4
483 warnings.warn(
484 "This method will be removed in future versions. "
485 "Use 'elem.iter()' or 'list(elem.iter())' instead.",
486 PendingDeprecationWarning, stacklevel=2
487 )
488 return list(self.iter(tag))
489
490 ##
491 # Creates a text iterator. The iterator loops over this element
492 # and all subelements, in document order, and returns all inner
493 # text.
494 #
495 # @return An iterator containing all inner text.
496 # @defreturn iterator
497
498 def itertext(self):
499 tag = self.tag
500 if not isinstance(tag, basestring) and tag is not None:
501 return
502 if self.text:
503 yield self.text
504 for e in self:
505 for s in e.itertext():
506 yield s
507 if e.tail:
508 yield e.tail
509
510 # compatibility
511 _Element = _ElementInterface = Element
512
513 ##
514 # Subelement factory. This function creates an element instance, and
515 # appends it to an existing element.
516 # <p>
517 # The element name, attribute names, and attribute values can be
518 # either 8-bit ASCII strings or Unicode strings.
519 #
520 # @param parent The parent element.
521 # @param tag The subelement name.
522 # @param attrib An optional dictionary, containing element attributes.
523 # @param **extra Additional attributes, given as keyword arguments.
524 # @return An element instance.
525 # @defreturn Element
526
527 def SubElement(parent, tag, attrib={}, **extra):
528 attrib = attrib.copy()
529 attrib.update(extra)
530 element = parent.makeelement(tag, attrib)
531 parent.append(element)
532 return element
533
534 ##
535 # Comment element factory. This factory function creates a special
536 # element that will be serialized as an XML comment by the standard
537 # serializer.
538 # <p>
539 # The comment string can be either an 8-bit ASCII string or a Unicode
540 # string.
541 #
542 # @param text A string containing the comment string.
543 # @return An element instance, representing a comment.
544 # @defreturn Element
545
546 def Comment(text=None):
547 element = Element(Comment)
548 element.text = text
549 return element
550
551 ##
552 # PI element factory. This factory function creates a special element
553 # that will be serialized as an XML processing instruction by the standard
554 # serializer.
555 #
556 # @param target A string containing the PI target.
557 # @param text A string containing the PI contents, if any.
558 # @return An element instance, representing a PI.
559 # @defreturn Element
560
561 def ProcessingInstruction(target, text=None):
562 element = Element(ProcessingInstruction)
563 element.text = target
564 if text:
565 element.text = element.text + " " + text
566 return element
567
568 PI = ProcessingInstruction
569
570 ##
571 # QName wrapper. This can be used to wrap a QName attribute value, in
572 # order to get proper namespace handling on output.
573 #
574 # @param text A string containing the QName value, in the form {uri}local,
575 # or, if the tag argument is given, the URI part of a QName.
576 # @param tag Optional tag. If given, the first argument is interpreted as
577 # an URI, and this argument is interpreted as a local name.
578 # @return An opaque object, representing the QName.
579
580 class QName(object):
581 def __init__(self, text_or_uri, tag=None):
582 if tag:
583 text_or_uri = "{%s}%s" % (text_or_uri, tag)
584 self.text = text_or_uri
585 def __str__(self):
586 return self.text
587 def __hash__(self):
588 return hash(self.text)
589 def __cmp__(self, other):
590 if isinstance(other, QName):
591 return cmp(self.text, other.text)
592 return cmp(self.text, other)
593
594 # --------------------------------------------------------------------
595
596 ##
597 # ElementTree wrapper class. This class represents an entire element
598 # hierarchy, and adds some extra support for serialization to and from
599 # standard XML.
600 #
601 # @param element Optional root element.
602 # @keyparam file Optional file handle or file name. If given, the
603 # tree is initialized with the contents of this XML file.
604
605 class ElementTree(object):
606
607 def __init__(self, element=None, file=None):
608 # assert element is None or iselement(element)
609 self._root = element # first node
610 if file:
611 self.parse(file)
612
613 ##
614 # Gets the root element for this tree.
615 #
616 # @return An element instance.
617 # @defreturn Element
618
619 def getroot(self):
620 return self._root
621
622 ##
623 # Replaces the root element for this tree. This discards the
624 # current contents of the tree, and replaces it with the given
625 # element. Use with care.
626 #
627 # @param element An element instance.
628
629 def _setroot(self, element):
630 # assert iselement(element)
631 self._root = element
632
633 ##
634 # Loads an external XML document into this element tree.
635 #
636 # @param source A file name or file object. If a file object is
637 # given, it only has to implement a <b>read(n)</b> method.
638 # @keyparam parser An optional parser instance. If not given, the
639 # standard {@link XMLParser} parser is used.
640 # @return The document root element.
641 # @defreturn Element
642 # @exception ParseError If the parser fails to parse the document.
643
644 def parse(self, source, parser=None):
645 if not hasattr(source, "read"):
646 source = open(source, "rb")
647 if not parser:
648 parser = XMLParser(target=TreeBuilder())
649 while 1:
650 data = source.read(65536)
651 if not data:
652 break
653 parser.feed(data)
654 self._root = parser.close()
655 return self._root
656
657 ##
658 # Creates a tree iterator for the root element. The iterator loops
659 # over all elements in this tree, in document order.
660 #
661 # @param tag What tags to look for (default is to return all elements)
662 # @return An iterator.
663 # @defreturn iterator
664
665 def iter(self, tag=None):
666 # assert self._root is not None
667 return self._root.iter(tag)
668
669 # compatibility
670 def getiterator(self, tag=None):
671 # Change for a DeprecationWarning in 1.4
672 warnings.warn(
673 "This method will be removed in future versions. "
674 "Use 'tree.iter()' or 'list(tree.iter())' instead.",
675 PendingDeprecationWarning, stacklevel=2
676 )
677 return list(self.iter(tag))
678
679 ##
680 # Finds the first toplevel element with given tag.
681 # Same as getroot().find(path).
682 #
683 # @param path What element to look for.
684 # @keyparam namespaces Optional namespace prefix map.
685 # @return The first matching element, or None if no element was found.
686 # @defreturn Element or None
687
688 def find(self, path, namespaces=None):
689 # assert self._root is not None
690 if path[:1] == "/":
691 path = "." + path
692 warnings.warn(
693 "This search is broken in 1.3 and earlier, and will be "
694 "fixed in a future version. If you rely on the current "
695 "behaviour, change it to %r" % path,
696 FutureWarning, stacklevel=2
697 )
698 return self._root.find(path, namespaces)
699
700 ##
701 # Finds the element text for the first toplevel element with given
702 # tag. Same as getroot().findtext(path).
703 #
704 # @param path What toplevel element to look for.
705 # @param default What to return if the element was not found.
706 # @keyparam namespaces Optional namespace prefix map.
707 # @return The text content of the first matching element, or the
708 # default value no element was found. Note that if the element
709 # is found, but has no text content, this method returns an
710 # empty string.
711 # @defreturn string
712
713 def findtext(self, path, default=None, namespaces=None):
714 # assert self._root is not None
715 if path[:1] == "/":
716 path = "." + path
717 warnings.warn(
718 "This search is broken in 1.3 and earlier, and will be "
719 "fixed in a future version. If you rely on the current "
720 "behaviour, change it to %r" % path,
721 FutureWarning, stacklevel=2
722 )
723 return self._root.findtext(path, default, namespaces)
724
725 ##
726 # Finds all toplevel elements with the given tag.
727 # Same as getroot().findall(path).
728 #
729 # @param path What element to look for.
730 # @keyparam namespaces Optional namespace prefix map.
731 # @return A list or iterator containing all matching elements,
732 # in document order.
733 # @defreturn list of Element instances
734
735 def findall(self, path, namespaces=None):
736 # assert self._root is not None
737 if path[:1] == "/":
738 path = "." + path
739 warnings.warn(
740 "This search is broken in 1.3 and earlier, and will be "
741 "fixed in a future version. If you rely on the current "
742 "behaviour, change it to %r" % path,
743 FutureWarning, stacklevel=2
744 )
745 return self._root.findall(path, namespaces)
746
747 ##
748 # Finds all matching subelements, by tag name or path.
749 # Same as getroot().iterfind(path).
750 #
751 # @param path What element to look for.
752 # @keyparam namespaces Optional namespace prefix map.
753 # @return An iterator or sequence containing all matching elements,
754 # in document order.
755 # @defreturn a generated sequence of Element instances
756
757 def iterfind(self, path, namespaces=None):
758 # assert self._root is not None
759 if path[:1] == "/":
760 path = "." + path
761 warnings.warn(
762 "This search is broken in 1.3 and earlier, and will be "
763 "fixed in a future version. If you rely on the current "
764 "behaviour, change it to %r" % path,
765 FutureWarning, stacklevel=2
766 )
767 return self._root.iterfind(path, namespaces)
768
769 ##
770 # Writes the element tree to a file, as XML.
771 #
772 # @def write(file, **options)
773 # @param file A file name, or a file object opened for writing.
774 # @param **options Options, given as keyword arguments.
775 # @keyparam encoding Optional output encoding (default is US-ASCII).
776 # @keyparam method Optional output method ("xml", "html", "text" or
777 # "c14n"; default is "xml").
778 # @keyparam xml_declaration Controls if an XML declaration should
779 # be added to the file. Use False for never, True for always,
780 # None for only if not US-ASCII or UTF-8. None is default.
781
782 def write(self, file_or_filename,
783 # keyword arguments
784 encoding=None,
785 xml_declaration=None,
786 default_namespace=None,
787 method=None):
788 # assert self._root is not None
789 if not method:
790 method = "xml"
791 elif method not in _serialize:
792 # FIXME: raise an ImportError for c14n if ElementC14N is missing?
793 raise ValueError("unknown method %r" % method)
794 if hasattr(file_or_filename, "write"):
795 file = file_or_filename
796 else:
797 file = open(file_or_filename, "wb")
798 write = file.write
799 if not encoding:
800 if method == "c14n":
801 encoding = "utf-8"
802 else:
803 encoding = "us-ascii"
804 elif xml_declaration or (xml_declaration is None and
805 encoding not in ("utf-8", "us-ascii")):
806 if method == "xml":
807 write("<?xml version='1.0' encoding='%s'?>\n" % encoding)
808 if method == "text":
809 _serialize_text(write, self._root, encoding)
810 else:
811 qnames, namespaces = _namespaces(
812 self._root, encoding, default_namespace
813 )
814 serialize = _serialize[method]
815 serialize(write, self._root, encoding, qnames, namespaces)
816 if file_or_filename is not file:
817 file.close()
818
819 def write_c14n(self, file):
820 # lxml.etree compatibility. use output method instead
821 return self.write(file, method="c14n")
822
823 # --------------------------------------------------------------------
824 # serialization support
825
826 def _namespaces(elem, encoding, default_namespace=None):
827 # identify namespaces used in this tree
828
829 # maps qnames to *encoded* prefix:local names
830 qnames = {None: None}
831
832 # maps uri:s to prefixes
833 namespaces = {}
834 if default_namespace:
835 namespaces[default_namespace] = ""
836
837 def encode(text):
838 return text.encode(encoding)
839
840 def add_qname(qname):
841 # calculate serialized qname representation
842 try:
843 if qname[:1] == "{":
844 uri, tag = qname[1:].rsplit("}", 1)
845 prefix = namespaces.get(uri)
846 if prefix is None:
847 prefix = _namespace_map.get(uri)
848 if prefix is None:
849 prefix = "ns%d" % len(namespaces)
850 if prefix != "xml":
851 namespaces[uri] = prefix
852 if prefix:
853 qnames[qname] = encode("%s:%s" % (prefix, tag))
854 else:
855 qnames[qname] = encode(tag) # default element
856 else:
857 if default_namespace:
858 # FIXME: can this be handled in XML 1.0?
859 raise ValueError(
860 "cannot use non-qualified names with "
861 "default_namespace option"
862 )
863 qnames[qname] = encode(qname)
864 except TypeError:
865 _raise_serialization_error(qname)
866
867 # populate qname and namespaces table
868 try:
869 iterate = elem.iter
870 except AttributeError:
871 iterate = elem.getiterator # cET compatibility
872 for elem in iterate():
873 tag = elem.tag
874 if isinstance(tag, QName):
875 if tag.text not in qnames:
876 add_qname(tag.text)
877 elif isinstance(tag, basestring):
878 if tag not in qnames:
879 add_qname(tag)
880 elif tag is not None and tag is not Comment and tag is not PI:
881 _raise_serialization_error(tag)
882 for key, value in elem.items():
883 if isinstance(key, QName):
884 key = key.text
885 if key not in qnames:
886 add_qname(key)
887 if isinstance(value, QName) and value.text not in qnames:
888 add_qname(value.text)
889 text = elem.text
890 if isinstance(text, QName) and text.text not in qnames:
891 add_qname(text.text)
892 return qnames, namespaces
893
894 def _serialize_xml(write, elem, encoding, qnames, namespaces):
895 tag = elem.tag
896 text = elem.text
897 if tag is Comment:
898 write("<!--%s-->" % _encode(text, encoding))
899 elif tag is ProcessingInstruction:
900 write("<?%s?>" % _encode(text, encoding))
901 else:
902 tag = qnames[tag]
903 if tag is None:
904 if text:
905 write(_escape_cdata(text, encoding))
906 for e in elem:
907 _serialize_xml(write, e, encoding, qnames, None)
908 else:
909 write("<" + tag)
910 items = elem.items()
911 if items or namespaces:
912 if namespaces:
913 for v, k in sorted(namespaces.items(),
914 key=lambda x: x[1]): # sort on prefix
915 if k:
916 k = ":" + k
917 write(" xmlns%s=\"%s\"" % (
918 k.encode(encoding),
919 _escape_attrib(v, encoding)
920 ))
921 for k, v in sorted(items): # lexical order
922 if isinstance(k, QName):
923 k = k.text
924 if isinstance(v, QName):
925 v = qnames[v.text]
926 else:
927 v = _escape_attrib(v, encoding)
928 write(" %s=\"%s\"" % (qnames[k], v))
929 if text or len(elem):
930 write(">")
931 if text:
932 write(_escape_cdata(text, encoding))
933 for e in elem:
934 _serialize_xml(write, e, encoding, qnames, None)
935 write("</" + tag + ">")
936 else:
937 write(" />")
938 if elem.tail:
939 write(_escape_cdata(elem.tail, encoding))
940
941 HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr",
942 "img", "input", "isindex", "link", "meta" "param")
943
944 try:
945 HTML_EMPTY = set(HTML_EMPTY)
946 except NameError:
947 pass
948
949 def _serialize_html(write, elem, encoding, qnames, namespaces):
950 tag = elem.tag
951 text = elem.text
952 if tag is Comment:
953 write("<!--%s-->" % _escape_cdata(text, encoding))
954 elif tag is ProcessingInstruction:
955 write("<?%s?>" % _escape_cdata(text, encoding))
956 else:
957 tag = qnames[tag]
958 if tag is None:
959 if text:
960 write(_escape_cdata(text, encoding))
961 for e in elem:
962 _serialize_html(write, e, encoding, qnames, None)
963 else:
964 write("<" + tag)
965 items = elem.items()
966 if items or namespaces:
967 if namespaces:
968 for v, k in sorted(namespaces.items(),
969 key=lambda x: x[1]): # sort on prefix
970 if k:
971 k = ":" + k
972 write(" xmlns%s=\"%s\"" % (
973 k.encode(encoding),
974 _escape_attrib(v, encoding)
975 ))
976 for k, v in sorted(items): # lexical order
977 if isinstance(k, QName):
978 k = k.text
979 if isinstance(v, QName):
980 v = qnames[v.text]
981 else:
982 v = _escape_attrib_html(v, encoding)
983 # FIXME: handle boolean attributes
984 write(" %s=\"%s\"" % (qnames[k], v))
985 write(">")
986 tag = tag.lower()
987 if text:
988 if tag == "script" or tag == "style":
989 write(_encode(text, encoding))
990 else:
991 write(_escape_cdata(text, encoding))
992 for e in elem:
993 _serialize_html(write, e, encoding, qnames, None)
994 if tag not in HTML_EMPTY:
995 write("</" + tag + ">")
996 if elem.tail:
997 write(_escape_cdata(elem.tail, encoding))
998
999 def _serialize_text(write, elem, encoding):
1000 for part in elem.itertext():
1001 write(part.encode(encoding))
1002 if elem.tail:
1003 write(elem.tail.encode(encoding))
1004
1005 _serialize = {
1006 "xml": _serialize_xml,
1007 "html": _serialize_html,
1008 "text": _serialize_text,
1009 # this optional method is imported at the end of the module
1010 # "c14n": _serialize_c14n,
1011 }
1012
1013 ##
1014 # Registers a namespace prefix. The registry is global, and any
1015 # existing mapping for either the given prefix or the namespace URI
1016 # will be removed.
1017 #
1018 # @param prefix Namespace prefix.
1019 # @param uri Namespace uri. Tags and attributes in this namespace
1020 # will be serialized with the given prefix, if at all possible.
1021 # @exception ValueError If the prefix is reserved, or is otherwise
1022 # invalid.
1023
1024 def register_namespace(prefix, uri):
1025 if re.match("ns\d+$", prefix):
1026 raise ValueError("Prefix format reserved for internal use")
1027 for k, v in _namespace_map.items():
1028 if k == uri or v == prefix:
1029 del _namespace_map[k]
1030 _namespace_map[uri] = prefix
1031
1032 _namespace_map = {
1033 # "well-known" namespace prefixes
1034 "http://www.w3.org/XML/1998/namespace": "xml",
1035 "http://www.w3.org/1999/xhtml": "html",
1036 "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",
1037 "http://schemas.xmlsoap.org/wsdl/": "wsdl",
1038 # xml schema
1039 "http://www.w3.org/2001/XMLSchema": "xs",
1040 "http://www.w3.org/2001/XMLSchema-instance": "xsi",
1041 # dublin core
1042 "http://purl.org/dc/elements/1.1/": "dc",
1043 }
1044
1045 def _raise_serialization_error(text):
1046 raise TypeError(
1047 "cannot serialize %r (type %s)" % (text, type(text).__name__)
1048 )
1049
1050 def _encode(text, encoding):
1051 try:
1052 return text.encode(encoding, "xmlcharrefreplace")
1053 except (TypeError, AttributeError):
1054 _raise_serialization_error(text)
1055
1056 def _escape_cdata(text, encoding):
1057 # escape character data
1058 try:
1059 # it's worth avoiding do-nothing calls for strings that are
1060 # shorter than 500 character, or so. assume that's, by far,
1061 # the most common case in most applications.
1062 if "&" in text:
1063 text = text.replace("&", "&amp;")
1064 if "<" in text:
1065 text = text.replace("<", "&lt;")
1066 if ">" in text:
1067 text = text.replace(">", "&gt;")
1068 return text.encode(encoding, "xmlcharrefreplace")
1069 except (TypeError, AttributeError):
1070 _raise_serialization_error(text)
1071
1072 def _escape_attrib(text, encoding):
1073 # escape attribute value
1074 try:
1075 if "&" in text:
1076 text = text.replace("&", "&amp;")
1077 if "<" in text:
1078 text = text.replace("<", "&lt;")
1079 if ">" in text:
1080 text = text.replace(">", "&gt;")
1081 if "\"" in text:
1082 text = text.replace("\"", "&quot;")
1083 if "\n" in text:
1084 text = text.replace("\n", "&#10;")
1085 return text.encode(encoding, "xmlcharrefreplace")
1086 except (TypeError, AttributeError):
1087 _raise_serialization_error(text)
1088
1089 def _escape_attrib_html(text, encoding):
1090 # escape attribute value
1091 try:
1092 if "&" in text:
1093 text = text.replace("&", "&amp;")
1094 if ">" in text:
1095 text = text.replace(">", "&gt;")
1096 if "\"" in text:
1097 text = text.replace("\"", "&quot;")
1098 return text.encode(encoding, "xmlcharrefreplace")
1099 except (TypeError, AttributeError):
1100 _raise_serialization_error(text)
1101
1102 # --------------------------------------------------------------------
1103
1104 ##
1105 # Generates a string representation of an XML element, including all
1106 # subelements.
1107 #
1108 # @param element An Element instance.
1109 # @keyparam encoding Optional output encoding (default is US-ASCII).
1110 # @keyparam method Optional output method ("xml", "html", "text" or
1111 # "c14n"; default is "xml").
1112 # @return An encoded string containing the XML data.
1113 # @defreturn string
1114
1115 def tostring(element, encoding=None, method=None):
1116 class dummy:
1117 pass
1118 data = []
1119 file = dummy()
1120 file.write = data.append
1121 ElementTree(element).write(file, encoding, method=method)
1122 return "".join(data)
1123
1124 ##
1125 # Generates a string representation of an XML element, including all
1126 # subelements. The string is returned as a sequence of string fragments.
1127 #
1128 # @param element An Element instance.
1129 # @keyparam encoding Optional output encoding (default is US-ASCII).
1130 # @keyparam method Optional output method ("xml", "html", "text" or
1131 # "c14n"; default is "xml").
1132 # @return A sequence object containing the XML data.
1133 # @defreturn sequence
1134 # @since 1.3
1135
1136 def tostringlist(element, encoding=None, method=None):
1137 class dummy:
1138 pass
1139 data = []
1140 file = dummy()
1141 file.write = data.append
1142 ElementTree(element).write(file, encoding, method=method)
1143 # FIXME: merge small fragments into larger parts
1144 return data
1145
1146 ##
1147 # Writes an element tree or element structure to sys.stdout. This
1148 # function should be used for debugging only.
1149 # <p>
1150 # The exact output format is implementation dependent. In this
1151 # version, it's written as an ordinary XML file.
1152 #
1153 # @param elem An element tree or an individual element.
1154
1155 def dump(elem):
1156 # debugging
1157 if not isinstance(elem, ElementTree):
1158 elem = ElementTree(elem)
1159 elem.write(sys.stdout)
1160 tail = elem.getroot().tail
1161 if not tail or tail[-1] != "\n":
1162 sys.stdout.write("\n")
1163
1164 # --------------------------------------------------------------------
1165 # parsing
1166
1167 ##
1168 # Parses an XML document into an element tree.
1169 #
1170 # @param source A filename or file object containing XML data.
1171 # @param parser An optional parser instance. If not given, the
1172 # standard {@link XMLParser} parser is used.
1173 # @return An ElementTree instance
1174
1175 def parse(source, parser=None):
1176 tree = ElementTree()
1177 tree.parse(source, parser)
1178 return tree
1179
1180 ##
1181 # Parses an XML document into an element tree incrementally, and reports
1182 # what's going on to the user.
1183 #
1184 # @param source A filename or file object containing XML data.
1185 # @param events A list of events to report back. If omitted, only "end"
1186 # events are reported.
1187 # @param parser An optional parser instance. If not given, the
1188 # standard {@link XMLParser} parser is used.
1189 # @return A (event, elem) iterator.
1190
1191 def iterparse(source, events=None, parser=None):
1192 if not hasattr(source, "read"):
1193 source = open(source, "rb")
1194 if not parser:
1195 parser = XMLParser(target=TreeBuilder())
1196 return _IterParseIterator(source, events, parser)
1197
1198 class _IterParseIterator(object):
1199
1200 def __init__(self, source, events, parser):
1201 self._file = source
1202 self._events = []
1203 self._index = 0
1204 self.root = self._root = None
1205 self._parser = parser
1206 # wire up the parser for event reporting
1207 parser = self._parser._parser
1208 append = self._events.append
1209 if events is None:
1210 events = ["end"]
1211 for event in events:
1212 if event == "start":
1213 try:
1214 parser.ordered_attributes = 1
1215 parser.specified_attributes = 1
1216 def handler(tag, attrib_in, event=event, append=append,
1217 start=self._parser._start_list):
1218 append((event, start(tag, attrib_in)))
1219 parser.StartElementHandler = handler
1220 except AttributeError:
1221 def handler(tag, attrib_in, event=event, append=append,
1222 start=self._parser._start):
1223 append((event, start(tag, attrib_in)))
1224 parser.StartElementHandler = handler
1225 elif event == "end":
1226 def handler(tag, event=event, append=append,
1227 end=self._parser._end):
1228 append((event, end(tag)))
1229 parser.EndElementHandler = handler
1230 elif event == "start-ns":
1231 def handler(prefix, uri, event=event, append=append):
1232 try:
1233 uri = (uri or "").encode("ascii")
1234 except UnicodeError:
1235 pass
1236 append((event, (prefix or "", uri or "")))
1237 parser.StartNamespaceDeclHandler = handler
1238 elif event == "end-ns":
1239 def handler(prefix, event=event, append=append):
1240 append((event, None))
1241 parser.EndNamespaceDeclHandler = handler
1242 else:
1243 raise ValueError("unknown event %r" % event)
1244
1245 def next(self):
1246 while 1:
1247 try:
1248 item = self._events[self._index]
1249 except IndexError:
1250 if self._parser is None:
1251 self.root = self._root
1252 raise StopIteration
1253 # load event buffer
1254 del self._events[:]
1255 self._index = 0
1256 data = self._file.read(16384)
1257 if data:
1258 self._parser.feed(data)
1259 else:
1260 self._root = self._parser.close()
1261 self._parser = None
1262 else:
1263 self._index = self._index + 1
1264 return item
1265
1266 def __iter__(self):
1267 return self
1268
1269 ##
1270 # Parses an XML document from a string constant. This function can
1271 # be used to embed "XML literals" in Python code.
1272 #
1273 # @param source A string containing XML data.
1274 # @param parser An optional parser instance. If not given, the
1275 # standard {@link XMLParser} parser is used.
1276 # @return An Element instance.
1277 # @defreturn Element
1278
1279 def XML(text, parser=None):
1280 if not parser:
1281 parser = XMLParser(target=TreeBuilder())
1282 parser.feed(text)
1283 return parser.close()
1284
1285 ##
1286 # Parses an XML document from a string constant, and also returns
1287 # a dictionary which maps from element id:s to elements.
1288 #
1289 # @param source A string containing XML data.
1290 # @param parser An optional parser instance. If not given, the
1291 # standard {@link XMLParser} parser is used.
1292 # @return A tuple containing an Element instance and a dictionary.
1293 # @defreturn (Element, dictionary)
1294
1295 def XMLID(text, parser=None):
1296 if not parser:
1297 parser = XMLParser(target=TreeBuilder())
1298 parser.feed(text)
1299 tree = parser.close()
1300 ids = {}
1301 for elem in tree.iter():
1302 id = elem.get("id")
1303 if id:
1304 ids[id] = elem
1305 return tree, ids
1306
1307 ##
1308 # Parses an XML document from a string constant. Same as {@link #XML}.
1309 #
1310 # @def fromstring(text)
1311 # @param source A string containing XML data.
1312 # @return An Element instance.
1313 # @defreturn Element
1314
1315 fromstring = XML
1316
1317 ##
1318 # Parses an XML document from a sequence of string fragments.
1319 #
1320 # @param sequence A list or other sequence containing XML data fragments.
1321 # @param parser An optional parser instance. If not given, the
1322 # standard {@link XMLParser} parser is used.
1323 # @return An Element instance.
1324 # @defreturn Element
1325 # @since 1.3
1326
1327 def fromstringlist(sequence, parser=None):
1328 if not parser:
1329 parser = XMLParser(target=TreeBuilder())
1330 for text in sequence:
1331 parser.feed(text)
1332 return parser.close()
1333
1334 # --------------------------------------------------------------------
1335
1336 ##
1337 # Generic element structure builder. This builder converts a sequence
1338 # of {@link #TreeBuilder.start}, {@link #TreeBuilder.data}, and {@link
1339 # #TreeBuilder.end} method calls to a well-formed element structure.
1340 # <p>
1341 # You can use this class to build an element structure using a custom XML
1342 # parser, or a parser for some other XML-like format.
1343 #
1344 # @param element_factory Optional element factory. This factory
1345 # is called to create new Element instances, as necessary.
1346
1347 class TreeBuilder(object):
1348
1349 def __init__(self, element_factory=None):
1350 self._data = [] # data collector
1351 self._elem = [] # element stack
1352 self._last = None # last element
1353 self._tail = None # true if we're after an end tag
1354 if element_factory is None:
1355 element_factory = Element
1356 self._factory = element_factory
1357
1358 ##
1359 # Flushes the builder buffers, and returns the toplevel document
1360 # element.
1361 #
1362 # @return An Element instance.
1363 # @defreturn Element
1364
1365 def close(self):
1366 assert len(self._elem) == 0, "missing end tags"
1367 assert self._last is not None, "missing toplevel element"
1368 return self._last
1369
1370 def _flush(self):
1371 if self._data:
1372 if self._last is not None:
1373 text = "".join(self._data)
1374 if self._tail:
1375 assert self._last.tail is None, "internal error (tail)"
1376 self._last.tail = text
1377 else:
1378 assert self._last.text is None, "internal error (text)"
1379 self._last.text = text
1380 self._data = []
1381
1382 ##
1383 # Adds text to the current element.
1384 #
1385 # @param data A string. This should be either an 8-bit string
1386 # containing ASCII text, or a Unicode string.
1387
1388 def data(self, data):
1389 self._data.append(data)
1390
1391 ##
1392 # Opens a new element.
1393 #
1394 # @param tag The element name.
1395 # @param attrib A dictionary containing element attributes.
1396 # @return The opened element.
1397 # @defreturn Element
1398
1399 def start(self, tag, attrs):
1400 self._flush()
1401 self._last = elem = self._factory(tag, attrs)
1402 if self._elem:
1403 self._elem[-1].append(elem)
1404 self._elem.append(elem)
1405 self._tail = 0
1406 return elem
1407
1408 ##
1409 # Closes the current element.
1410 #
1411 # @param tag The element name.
1412 # @return The closed element.
1413 # @defreturn Element
1414
1415 def end(self, tag):
1416 self._flush()
1417 self._last = self._elem.pop()
1418 assert self._last.tag == tag,\
1419 "end tag mismatch (expected %s, got %s)" % (
1420 self._last.tag, tag)
1421 self._tail = 1
1422 return self._last
1423
1424 ##
1425 # Element structure builder for XML source data, based on the
1426 # <b>expat</b> parser.
1427 #
1428 # @keyparam target Target object. If omitted, the builder uses an
1429 # instance of the standard {@link #TreeBuilder} class.
1430 # @keyparam html Predefine HTML entities. This flag is not supported
1431 # by the current implementation.
1432 # @keyparam encoding Optional encoding. If given, the value overrides
1433 # the encoding specified in the XML file.
1434 # @see #ElementTree
1435 # @see #TreeBuilder
1436
1437 class XMLParser(object):
1438
1439 def __init__(self, html=0, target=None, encoding=None):
1440 try:
1441 from xml.parsers import expat
1442 except ImportError:
1443 try:
1444 import pyexpat as expat
1445 except ImportError:
1446 raise ImportError(
1447 "No module named expat; use SimpleXMLTreeBuilder instead"
1448 )
1449 parser = expat.ParserCreate(encoding, "}")
1450 if target is None:
1451 target = TreeBuilder()
1452 # underscored names are provided for compatibility only
1453 self.parser = self._parser = parser
1454 self.target = self._target = target
1455 self._error = expat.error
1456 self._names = {} # name memo cache
1457 # callbacks
1458 parser.DefaultHandlerExpand = self._default
1459 parser.StartElementHandler = self._start
1460 parser.EndElementHandler = self._end
1461 parser.CharacterDataHandler = self._data
1462 # optional callbacks
1463 parser.CommentHandler = self._comment
1464 parser.ProcessingInstructionHandler = self._pi
1465 # let expat do the buffering, if supported
1466 try:
1467 self._parser.buffer_text = 1
1468 except AttributeError:
1469 pass
1470 # use new-style attribute handling, if supported
1471 try:
1472 self._parser.ordered_attributes = 1
1473 self._parser.specified_attributes = 1
1474 parser.StartElementHandler = self._start_list
1475 except AttributeError:
1476 pass
1477 self._doctype = None
1478 self.entity = {}
1479 try:
1480 self.version = "Expat %d.%d.%d" % expat.version_info
1481 except AttributeError:
1482 pass # unknown
1483
1484 def _raiseerror(self, value):
1485 err = ParseError(value)
1486 err.code = value.code
1487 err.position = value.lineno, value.offset
1488 raise err
1489
1490 def _fixtext(self, text):
1491 # convert text string to ascii, if possible
1492 try:
1493 return text.encode("ascii")
1494 except UnicodeError:
1495 return text
1496
1497 def _fixname(self, key):
1498 # expand qname, and convert name string to ascii, if possible
1499 try:
1500 name = self._names[key]
1501 except KeyError:
1502 name = key
1503 if "}" in name:
1504 name = "{" + name
1505 self._names[key] = name = self._fixtext(name)
1506 return name
1507
1508 def _start(self, tag, attrib_in):
1509 fixname = self._fixname
1510 fixtext = self._fixtext
1511 tag = fixname(tag)
1512 attrib = {}
1513 for key, value in attrib_in.items():
1514 attrib[fixname(key)] = fixtext(value)
1515 return self.target.start(tag, attrib)
1516
1517 def _start_list(self, tag, attrib_in):
1518 fixname = self._fixname
1519 fixtext = self._fixtext
1520 tag = fixname(tag)
1521 attrib = {}
1522 if attrib_in:
1523 for i in range(0, len(attrib_in), 2):
1524 attrib[fixname(attrib_in[i])] = fixtext(attrib_in[i+1])
1525 return self.target.start(tag, attrib)
1526
1527 def _data(self, text):
1528 return self.target.data(self._fixtext(text))
1529
1530 def _end(self, tag):
1531 return self.target.end(self._fixname(tag))
1532
1533 def _comment(self, data):
1534 try:
1535 comment = self.target.comment
1536 except AttributeError:
1537 pass
1538 else:
1539 return comment(self._fixtext(data))
1540
1541 def _pi(self, target, data):
1542 try:
1543 pi = self.target.pi
1544 except AttributeError:
1545 pass
1546 else:
1547 return pi(self._fixtext(target), self._fixtext(data))
1548
1549 def _default(self, text):
1550 prefix = text[:1]
1551 if prefix == "&":
1552 # deal with undefined entities
1553 try:
1554 self.target.data(self.entity[text[1:-1]])
1555 except KeyError:
1556 from xml.parsers import expat
1557 err = expat.error(
1558 "undefined entity %s: line %d, column %d" %
1559 (text, self._parser.ErrorLineNumber,
1560 self._parser.ErrorColumnNumber)
1561 )
1562 err.code = 11 # XML_ERROR_UNDEFINED_ENTITY
1563 err.lineno = self._parser.ErrorLineNumber
1564 err.offset = self._parser.ErrorColumnNumber
1565 raise err
1566 elif prefix == "<" and text[:9] == "<!DOCTYPE":
1567 self._doctype = [] # inside a doctype declaration
1568 elif self._doctype is not None:
1569 # parse doctype contents
1570 if prefix == ">":
1571 self._doctype = None
1572 return
1573 text = text.strip()
1574 if not text:
1575 return
1576 self._doctype.append(text)
1577 n = len(self._doctype)
1578 if n > 2:
1579 type = self._doctype[1]
1580 if type == "PUBLIC" and n == 4:
1581 name, type, pubid, system = self._doctype
1582 elif type == "SYSTEM" and n == 3:
1583 name, type, system = self._doctype
1584 pubid = None
1585 else:
1586 return
1587 if pubid:
1588 pubid = pubid[1:-1]
1589 if hasattr(self.target, "doctype"):
1590 self.target.doctype(name, pubid, system[1:-1])
1591 elif self.doctype is not self._XMLParser__doctype:
1592 # warn about deprecated call
1593 self._XMLParser__doctype(name, pubid, system[1:-1])
1594 self.doctype(name, pubid, system[1:-1])
1595 self._doctype = None
1596
1597 ##
1598 # (Deprecated) Handles a doctype declaration.
1599 #
1600 # @param name Doctype name.
1601 # @param pubid Public identifier.
1602 # @param system System identifier.
1603
1604 def doctype(self, name, pubid, system):
1605 """This method of XMLParser is deprecated."""
1606 warnings.warn(
1607 "This method of XMLParser is deprecated. Define doctype() "
1608 "method on the TreeBuilder target.",
1609 DeprecationWarning,
1610 )
1611
1612 # sentinel, if doctype is redefined in a subclass
1613 __doctype = doctype
1614
1615 ##
1616 # Feeds data to the parser.
1617 #
1618 # @param data Encoded data.
1619
1620 def feed(self, data):
1621 try:
1622 self._parser.Parse(data, 0)
1623 except self._error, v:
1624 self._raiseerror(v)
1625
1626 ##
1627 # Finishes feeding data to the parser.
1628 #
1629 # @return An element structure.
1630 # @defreturn Element
1631
1632 def close(self):
1633 try:
1634 self._parser.Parse("", 1) # end of data
1635 except self._error, v:
1636 self._raiseerror(v)
1637 tree = self.target.close()
1638 del self.target, self._parser # get rid of circular references
1639 return tree
1640
1641 # compatibility
1642 XMLTreeBuilder = XMLParser
1643
1644 # workaround circular import.
1645 try:
1646 from ElementC14N import _serialize_c14n
1647 _serialize["c14n"] = _serialize_c14n
1648 except ImportError:
1649 pass