]> git.proxmox.com Git - mirror_edk2.git/blob - AppPkg/Applications/Python/Python-2.7.2/Lib/test/test_xml_etree.py
EmbeddedPkg: Extend NvVarStoreFormattedLib LIBRARY_CLASS
[mirror_edk2.git] / AppPkg / Applications / Python / Python-2.7.2 / Lib / test / test_xml_etree.py
1 # xml.etree test. This file contains enough tests to make sure that
2 # all included components work as they should.
3 # Large parts are extracted from the upstream test suite.
4
5 # IMPORTANT: the same doctests are run from "test_xml_etree_c" in
6 # order to ensure consistency between the C implementation and the
7 # Python implementation.
8 #
9 # For this purpose, the module-level "ET" symbol is temporarily
10 # monkey-patched when running the "test_xml_etree_c" test suite.
11 # Don't re-import "xml.etree.ElementTree" module in the docstring,
12 # except if the test is specific to the Python implementation.
13
14 import sys
15 import cgi
16
17 from test import test_support
18 from test.test_support import findfile
19
20 from xml.etree import ElementTree as ET
21
22 SIMPLE_XMLFILE = findfile("simple.xml", subdir="xmltestdata")
23 SIMPLE_NS_XMLFILE = findfile("simple-ns.xml", subdir="xmltestdata")
24
25 SAMPLE_XML = """\
26 <body>
27 <tag class='a'>text</tag>
28 <tag class='b' />
29 <section>
30 <tag class='b' id='inner'>subtext</tag>
31 </section>
32 </body>
33 """
34
35 SAMPLE_SECTION = """\
36 <section>
37 <tag class='b' id='inner'>subtext</tag>
38 <nexttag />
39 <nextsection>
40 <tag />
41 </nextsection>
42 </section>
43 """
44
45 SAMPLE_XML_NS = """
46 <body xmlns="http://effbot.org/ns">
47 <tag>text</tag>
48 <tag />
49 <section>
50 <tag>subtext</tag>
51 </section>
52 </body>
53 """
54
55
56 def sanity():
57 """
58 Import sanity.
59
60 >>> from xml.etree import ElementTree
61 >>> from xml.etree import ElementInclude
62 >>> from xml.etree import ElementPath
63 """
64
65 def check_method(method):
66 if not hasattr(method, '__call__'):
67 print method, "not callable"
68
69 def serialize(elem, to_string=True, **options):
70 import StringIO
71 file = StringIO.StringIO()
72 tree = ET.ElementTree(elem)
73 tree.write(file, **options)
74 if to_string:
75 return file.getvalue()
76 else:
77 file.seek(0)
78 return file
79
80 def summarize(elem):
81 if elem.tag == ET.Comment:
82 return "<Comment>"
83 return elem.tag
84
85 def summarize_list(seq):
86 return [summarize(elem) for elem in seq]
87
88 def normalize_crlf(tree):
89 for elem in tree.iter():
90 if elem.text:
91 elem.text = elem.text.replace("\r\n", "\n")
92 if elem.tail:
93 elem.tail = elem.tail.replace("\r\n", "\n")
94
95 def check_string(string):
96 len(string)
97 for char in string:
98 if len(char) != 1:
99 print "expected one-character string, got %r" % char
100 new_string = string + ""
101 new_string = string + " "
102 string[:0]
103
104 def check_mapping(mapping):
105 len(mapping)
106 keys = mapping.keys()
107 items = mapping.items()
108 for key in keys:
109 item = mapping[key]
110 mapping["key"] = "value"
111 if mapping["key"] != "value":
112 print "expected value string, got %r" % mapping["key"]
113
114 def check_element(element):
115 if not ET.iselement(element):
116 print "not an element"
117 if not hasattr(element, "tag"):
118 print "no tag member"
119 if not hasattr(element, "attrib"):
120 print "no attrib member"
121 if not hasattr(element, "text"):
122 print "no text member"
123 if not hasattr(element, "tail"):
124 print "no tail member"
125
126 check_string(element.tag)
127 check_mapping(element.attrib)
128 if element.text is not None:
129 check_string(element.text)
130 if element.tail is not None:
131 check_string(element.tail)
132 for elem in element:
133 check_element(elem)
134
135 # --------------------------------------------------------------------
136 # element tree tests
137
138 def interface():
139 r"""
140 Test element tree interface.
141
142 >>> element = ET.Element("tag")
143 >>> check_element(element)
144 >>> tree = ET.ElementTree(element)
145 >>> check_element(tree.getroot())
146
147 >>> element = ET.Element("t\xe4g", key="value")
148 >>> tree = ET.ElementTree(element)
149 >>> repr(element) # doctest: +ELLIPSIS
150 "<Element 't\\xe4g' at 0x...>"
151 >>> element = ET.Element("tag", key="value")
152
153 Make sure all standard element methods exist.
154
155 >>> check_method(element.append)
156 >>> check_method(element.extend)
157 >>> check_method(element.insert)
158 >>> check_method(element.remove)
159 >>> check_method(element.getchildren)
160 >>> check_method(element.find)
161 >>> check_method(element.iterfind)
162 >>> check_method(element.findall)
163 >>> check_method(element.findtext)
164 >>> check_method(element.clear)
165 >>> check_method(element.get)
166 >>> check_method(element.set)
167 >>> check_method(element.keys)
168 >>> check_method(element.items)
169 >>> check_method(element.iter)
170 >>> check_method(element.itertext)
171 >>> check_method(element.getiterator)
172
173 These methods return an iterable. See bug 6472.
174
175 >>> check_method(element.iter("tag").next)
176 >>> check_method(element.iterfind("tag").next)
177 >>> check_method(element.iterfind("*").next)
178 >>> check_method(tree.iter("tag").next)
179 >>> check_method(tree.iterfind("tag").next)
180 >>> check_method(tree.iterfind("*").next)
181
182 These aliases are provided:
183
184 >>> assert ET.XML == ET.fromstring
185 >>> assert ET.PI == ET.ProcessingInstruction
186 >>> assert ET.XMLParser == ET.XMLTreeBuilder
187 """
188
189 def simpleops():
190 """
191 Basic method sanity checks.
192
193 >>> elem = ET.XML("<body><tag/></body>")
194 >>> serialize(elem)
195 '<body><tag /></body>'
196 >>> e = ET.Element("tag2")
197 >>> elem.append(e)
198 >>> serialize(elem)
199 '<body><tag /><tag2 /></body>'
200 >>> elem.remove(e)
201 >>> serialize(elem)
202 '<body><tag /></body>'
203 >>> elem.insert(0, e)
204 >>> serialize(elem)
205 '<body><tag2 /><tag /></body>'
206 >>> elem.remove(e)
207 >>> elem.extend([e])
208 >>> serialize(elem)
209 '<body><tag /><tag2 /></body>'
210 >>> elem.remove(e)
211
212 >>> element = ET.Element("tag", key="value")
213 >>> serialize(element) # 1
214 '<tag key="value" />'
215 >>> subelement = ET.Element("subtag")
216 >>> element.append(subelement)
217 >>> serialize(element) # 2
218 '<tag key="value"><subtag /></tag>'
219 >>> element.insert(0, subelement)
220 >>> serialize(element) # 3
221 '<tag key="value"><subtag /><subtag /></tag>'
222 >>> element.remove(subelement)
223 >>> serialize(element) # 4
224 '<tag key="value"><subtag /></tag>'
225 >>> element.remove(subelement)
226 >>> serialize(element) # 5
227 '<tag key="value" />'
228 >>> element.remove(subelement)
229 Traceback (most recent call last):
230 ValueError: list.remove(x): x not in list
231 >>> serialize(element) # 6
232 '<tag key="value" />'
233 >>> element[0:0] = [subelement, subelement, subelement]
234 >>> serialize(element[1])
235 '<subtag />'
236 >>> element[1:9] == [element[1], element[2]]
237 True
238 >>> element[:9:2] == [element[0], element[2]]
239 True
240 >>> del element[1:2]
241 >>> serialize(element)
242 '<tag key="value"><subtag /><subtag /></tag>'
243 """
244
245 def cdata():
246 """
247 Test CDATA handling (etc).
248
249 >>> serialize(ET.XML("<tag>hello</tag>"))
250 '<tag>hello</tag>'
251 >>> serialize(ET.XML("<tag>&#104;&#101;&#108;&#108;&#111;</tag>"))
252 '<tag>hello</tag>'
253 >>> serialize(ET.XML("<tag><![CDATA[hello]]></tag>"))
254 '<tag>hello</tag>'
255 """
256
257 # Only with Python implementation
258 def simplefind():
259 """
260 Test find methods using the elementpath fallback.
261
262 >>> from xml.etree import ElementTree
263
264 >>> CurrentElementPath = ElementTree.ElementPath
265 >>> ElementTree.ElementPath = ElementTree._SimpleElementPath()
266 >>> elem = ElementTree.XML(SAMPLE_XML)
267 >>> elem.find("tag").tag
268 'tag'
269 >>> ElementTree.ElementTree(elem).find("tag").tag
270 'tag'
271 >>> elem.findtext("tag")
272 'text'
273 >>> elem.findtext("tog")
274 >>> elem.findtext("tog", "default")
275 'default'
276 >>> ElementTree.ElementTree(elem).findtext("tag")
277 'text'
278 >>> summarize_list(elem.findall("tag"))
279 ['tag', 'tag']
280 >>> summarize_list(elem.findall(".//tag"))
281 ['tag', 'tag', 'tag']
282
283 Path syntax doesn't work in this case.
284
285 >>> elem.find("section/tag")
286 >>> elem.findtext("section/tag")
287 >>> summarize_list(elem.findall("section/tag"))
288 []
289
290 >>> ElementTree.ElementPath = CurrentElementPath
291 """
292
293 def find():
294 """
295 Test find methods (including xpath syntax).
296
297 >>> elem = ET.XML(SAMPLE_XML)
298 >>> elem.find("tag").tag
299 'tag'
300 >>> ET.ElementTree(elem).find("tag").tag
301 'tag'
302 >>> elem.find("section/tag").tag
303 'tag'
304 >>> elem.find("./tag").tag
305 'tag'
306 >>> ET.ElementTree(elem).find("./tag").tag
307 'tag'
308 >>> ET.ElementTree(elem).find("/tag").tag
309 'tag'
310 >>> elem[2] = ET.XML(SAMPLE_SECTION)
311 >>> elem.find("section/nexttag").tag
312 'nexttag'
313 >>> ET.ElementTree(elem).find("section/tag").tag
314 'tag'
315 >>> ET.ElementTree(elem).find("tog")
316 >>> ET.ElementTree(elem).find("tog/foo")
317 >>> elem.findtext("tag")
318 'text'
319 >>> elem.findtext("section/nexttag")
320 ''
321 >>> elem.findtext("section/nexttag", "default")
322 ''
323 >>> elem.findtext("tog")
324 >>> elem.findtext("tog", "default")
325 'default'
326 >>> ET.ElementTree(elem).findtext("tag")
327 'text'
328 >>> ET.ElementTree(elem).findtext("tog/foo")
329 >>> ET.ElementTree(elem).findtext("tog/foo", "default")
330 'default'
331 >>> ET.ElementTree(elem).findtext("./tag")
332 'text'
333 >>> ET.ElementTree(elem).findtext("/tag")
334 'text'
335 >>> elem.findtext("section/tag")
336 'subtext'
337 >>> ET.ElementTree(elem).findtext("section/tag")
338 'subtext'
339 >>> summarize_list(elem.findall("."))
340 ['body']
341 >>> summarize_list(elem.findall("tag"))
342 ['tag', 'tag']
343 >>> summarize_list(elem.findall("tog"))
344 []
345 >>> summarize_list(elem.findall("tog/foo"))
346 []
347 >>> summarize_list(elem.findall("*"))
348 ['tag', 'tag', 'section']
349 >>> summarize_list(elem.findall(".//tag"))
350 ['tag', 'tag', 'tag', 'tag']
351 >>> summarize_list(elem.findall("section/tag"))
352 ['tag']
353 >>> summarize_list(elem.findall("section//tag"))
354 ['tag', 'tag']
355 >>> summarize_list(elem.findall("section/*"))
356 ['tag', 'nexttag', 'nextsection']
357 >>> summarize_list(elem.findall("section//*"))
358 ['tag', 'nexttag', 'nextsection', 'tag']
359 >>> summarize_list(elem.findall("section/.//*"))
360 ['tag', 'nexttag', 'nextsection', 'tag']
361 >>> summarize_list(elem.findall("*/*"))
362 ['tag', 'nexttag', 'nextsection']
363 >>> summarize_list(elem.findall("*//*"))
364 ['tag', 'nexttag', 'nextsection', 'tag']
365 >>> summarize_list(elem.findall("*/tag"))
366 ['tag']
367 >>> summarize_list(elem.findall("*/./tag"))
368 ['tag']
369 >>> summarize_list(elem.findall("./tag"))
370 ['tag', 'tag']
371 >>> summarize_list(elem.findall(".//tag"))
372 ['tag', 'tag', 'tag', 'tag']
373 >>> summarize_list(elem.findall("././tag"))
374 ['tag', 'tag']
375 >>> summarize_list(elem.findall(".//tag[@class]"))
376 ['tag', 'tag', 'tag']
377 >>> summarize_list(elem.findall(".//tag[@class='a']"))
378 ['tag']
379 >>> summarize_list(elem.findall(".//tag[@class='b']"))
380 ['tag', 'tag']
381 >>> summarize_list(elem.findall(".//tag[@id]"))
382 ['tag']
383 >>> summarize_list(elem.findall(".//section[tag]"))
384 ['section']
385 >>> summarize_list(elem.findall(".//section[element]"))
386 []
387 >>> summarize_list(elem.findall("../tag"))
388 []
389 >>> summarize_list(elem.findall("section/../tag"))
390 ['tag', 'tag']
391 >>> summarize_list(ET.ElementTree(elem).findall("./tag"))
392 ['tag', 'tag']
393
394 Following example is invalid in 1.2.
395 A leading '*' is assumed in 1.3.
396
397 >>> elem.findall("section//") == elem.findall("section//*")
398 True
399
400 ET's Path module handles this case incorrectly; this gives
401 a warning in 1.3, and the behaviour will be modified in 1.4.
402
403 >>> summarize_list(ET.ElementTree(elem).findall("/tag"))
404 ['tag', 'tag']
405
406 >>> elem = ET.XML(SAMPLE_XML_NS)
407 >>> summarize_list(elem.findall("tag"))
408 []
409 >>> summarize_list(elem.findall("{http://effbot.org/ns}tag"))
410 ['{http://effbot.org/ns}tag', '{http://effbot.org/ns}tag']
411 >>> summarize_list(elem.findall(".//{http://effbot.org/ns}tag"))
412 ['{http://effbot.org/ns}tag', '{http://effbot.org/ns}tag', '{http://effbot.org/ns}tag']
413 """
414
415 def file_init():
416 """
417 >>> import StringIO
418
419 >>> stringfile = StringIO.StringIO(SAMPLE_XML)
420 >>> tree = ET.ElementTree(file=stringfile)
421 >>> tree.find("tag").tag
422 'tag'
423 >>> tree.find("section/tag").tag
424 'tag'
425
426 >>> tree = ET.ElementTree(file=SIMPLE_XMLFILE)
427 >>> tree.find("element").tag
428 'element'
429 >>> tree.find("element/../empty-element").tag
430 'empty-element'
431 """
432
433 def bad_find():
434 """
435 Check bad or unsupported path expressions.
436
437 >>> elem = ET.XML(SAMPLE_XML)
438 >>> elem.findall("/tag")
439 Traceback (most recent call last):
440 SyntaxError: cannot use absolute path on element
441 """
442
443 def path_cache():
444 """
445 Check that the path cache behaves sanely.
446
447 >>> elem = ET.XML(SAMPLE_XML)
448 >>> for i in range(10): ET.ElementTree(elem).find('./'+str(i))
449 >>> cache_len_10 = len(ET.ElementPath._cache)
450 >>> for i in range(10): ET.ElementTree(elem).find('./'+str(i))
451 >>> len(ET.ElementPath._cache) == cache_len_10
452 True
453 >>> for i in range(20): ET.ElementTree(elem).find('./'+str(i))
454 >>> len(ET.ElementPath._cache) > cache_len_10
455 True
456 >>> for i in range(600): ET.ElementTree(elem).find('./'+str(i))
457 >>> len(ET.ElementPath._cache) < 500
458 True
459 """
460
461 def copy():
462 """
463 Test copy handling (etc).
464
465 >>> import copy
466 >>> e1 = ET.XML("<tag>hello<foo/></tag>")
467 >>> e2 = copy.copy(e1)
468 >>> e3 = copy.deepcopy(e1)
469 >>> e1.find("foo").tag = "bar"
470 >>> serialize(e1)
471 '<tag>hello<bar /></tag>'
472 >>> serialize(e2)
473 '<tag>hello<bar /></tag>'
474 >>> serialize(e3)
475 '<tag>hello<foo /></tag>'
476
477 """
478
479 def attrib():
480 """
481 Test attribute handling.
482
483 >>> elem = ET.Element("tag")
484 >>> elem.get("key") # 1.1
485 >>> elem.get("key", "default") # 1.2
486 'default'
487 >>> elem.set("key", "value")
488 >>> elem.get("key") # 1.3
489 'value'
490
491 >>> elem = ET.Element("tag", key="value")
492 >>> elem.get("key") # 2.1
493 'value'
494 >>> elem.attrib # 2.2
495 {'key': 'value'}
496
497 >>> attrib = {"key": "value"}
498 >>> elem = ET.Element("tag", attrib)
499 >>> attrib.clear() # check for aliasing issues
500 >>> elem.get("key") # 3.1
501 'value'
502 >>> elem.attrib # 3.2
503 {'key': 'value'}
504
505 >>> attrib = {"key": "value"}
506 >>> elem = ET.Element("tag", **attrib)
507 >>> attrib.clear() # check for aliasing issues
508 >>> elem.get("key") # 4.1
509 'value'
510 >>> elem.attrib # 4.2
511 {'key': 'value'}
512
513 >>> elem = ET.Element("tag", {"key": "other"}, key="value")
514 >>> elem.get("key") # 5.1
515 'value'
516 >>> elem.attrib # 5.2
517 {'key': 'value'}
518
519 >>> elem = ET.Element('test')
520 >>> elem.text = "aa"
521 >>> elem.set('testa', 'testval')
522 >>> elem.set('testb', 'test2')
523 >>> ET.tostring(elem)
524 '<test testa="testval" testb="test2">aa</test>'
525 >>> sorted(elem.keys())
526 ['testa', 'testb']
527 >>> sorted(elem.items())
528 [('testa', 'testval'), ('testb', 'test2')]
529 >>> elem.attrib['testb']
530 'test2'
531 >>> elem.attrib['testb'] = 'test1'
532 >>> elem.attrib['testc'] = 'test2'
533 >>> ET.tostring(elem)
534 '<test testa="testval" testb="test1" testc="test2">aa</test>'
535 """
536
537 def makeelement():
538 """
539 Test makeelement handling.
540
541 >>> elem = ET.Element("tag")
542 >>> attrib = {"key": "value"}
543 >>> subelem = elem.makeelement("subtag", attrib)
544 >>> if subelem.attrib is attrib:
545 ... print "attrib aliasing"
546 >>> elem.append(subelem)
547 >>> serialize(elem)
548 '<tag><subtag key="value" /></tag>'
549
550 >>> elem.clear()
551 >>> serialize(elem)
552 '<tag />'
553 >>> elem.append(subelem)
554 >>> serialize(elem)
555 '<tag><subtag key="value" /></tag>'
556 >>> elem.extend([subelem, subelem])
557 >>> serialize(elem)
558 '<tag><subtag key="value" /><subtag key="value" /><subtag key="value" /></tag>'
559 >>> elem[:] = [subelem]
560 >>> serialize(elem)
561 '<tag><subtag key="value" /></tag>'
562 >>> elem[:] = tuple([subelem])
563 >>> serialize(elem)
564 '<tag><subtag key="value" /></tag>'
565
566 """
567
568 def parsefile():
569 """
570 Test parsing from file.
571
572 >>> tree = ET.parse(SIMPLE_XMLFILE)
573 >>> normalize_crlf(tree)
574 >>> tree.write(sys.stdout)
575 <root>
576 <element key="value">text</element>
577 <element>text</element>tail
578 <empty-element />
579 </root>
580 >>> tree = ET.parse(SIMPLE_NS_XMLFILE)
581 >>> normalize_crlf(tree)
582 >>> tree.write(sys.stdout)
583 <ns0:root xmlns:ns0="namespace">
584 <ns0:element key="value">text</ns0:element>
585 <ns0:element>text</ns0:element>tail
586 <ns0:empty-element />
587 </ns0:root>
588
589 >>> with open(SIMPLE_XMLFILE) as f:
590 ... data = f.read()
591
592 >>> parser = ET.XMLParser()
593 >>> parser.version # doctest: +ELLIPSIS
594 'Expat ...'
595 >>> parser.feed(data)
596 >>> print serialize(parser.close())
597 <root>
598 <element key="value">text</element>
599 <element>text</element>tail
600 <empty-element />
601 </root>
602
603 >>> parser = ET.XMLTreeBuilder() # 1.2 compatibility
604 >>> parser.feed(data)
605 >>> print serialize(parser.close())
606 <root>
607 <element key="value">text</element>
608 <element>text</element>tail
609 <empty-element />
610 </root>
611
612 >>> target = ET.TreeBuilder()
613 >>> parser = ET.XMLParser(target=target)
614 >>> parser.feed(data)
615 >>> print serialize(parser.close())
616 <root>
617 <element key="value">text</element>
618 <element>text</element>tail
619 <empty-element />
620 </root>
621 """
622
623 def parseliteral():
624 """
625 >>> element = ET.XML("<html><body>text</body></html>")
626 >>> ET.ElementTree(element).write(sys.stdout)
627 <html><body>text</body></html>
628 >>> element = ET.fromstring("<html><body>text</body></html>")
629 >>> ET.ElementTree(element).write(sys.stdout)
630 <html><body>text</body></html>
631 >>> sequence = ["<html><body>", "text</bo", "dy></html>"]
632 >>> element = ET.fromstringlist(sequence)
633 >>> print ET.tostring(element)
634 <html><body>text</body></html>
635 >>> print "".join(ET.tostringlist(element))
636 <html><body>text</body></html>
637 >>> ET.tostring(element, "ascii")
638 "<?xml version='1.0' encoding='ascii'?>\\n<html><body>text</body></html>"
639 >>> _, ids = ET.XMLID("<html><body>text</body></html>")
640 >>> len(ids)
641 0
642 >>> _, ids = ET.XMLID("<html><body id='body'>text</body></html>")
643 >>> len(ids)
644 1
645 >>> ids["body"].tag
646 'body'
647 """
648
649 def iterparse():
650 """
651 Test iterparse interface.
652
653 >>> iterparse = ET.iterparse
654
655 >>> context = iterparse(SIMPLE_XMLFILE)
656 >>> action, elem = next(context)
657 >>> print action, elem.tag
658 end element
659 >>> for action, elem in context:
660 ... print action, elem.tag
661 end element
662 end empty-element
663 end root
664 >>> context.root.tag
665 'root'
666
667 >>> context = iterparse(SIMPLE_NS_XMLFILE)
668 >>> for action, elem in context:
669 ... print action, elem.tag
670 end {namespace}element
671 end {namespace}element
672 end {namespace}empty-element
673 end {namespace}root
674
675 >>> events = ()
676 >>> context = iterparse(SIMPLE_XMLFILE, events)
677 >>> for action, elem in context:
678 ... print action, elem.tag
679
680 >>> events = ()
681 >>> context = iterparse(SIMPLE_XMLFILE, events=events)
682 >>> for action, elem in context:
683 ... print action, elem.tag
684
685 >>> events = ("start", "end")
686 >>> context = iterparse(SIMPLE_XMLFILE, events)
687 >>> for action, elem in context:
688 ... print action, elem.tag
689 start root
690 start element
691 end element
692 start element
693 end element
694 start empty-element
695 end empty-element
696 end root
697
698 >>> events = ("start", "end", "start-ns", "end-ns")
699 >>> context = iterparse(SIMPLE_NS_XMLFILE, events)
700 >>> for action, elem in context:
701 ... if action in ("start", "end"):
702 ... print action, elem.tag
703 ... else:
704 ... print action, elem
705 start-ns ('', 'namespace')
706 start {namespace}root
707 start {namespace}element
708 end {namespace}element
709 start {namespace}element
710 end {namespace}element
711 start {namespace}empty-element
712 end {namespace}empty-element
713 end {namespace}root
714 end-ns None
715
716 >>> events = ("start", "end", "bogus")
717 >>> with open(SIMPLE_XMLFILE, "rb") as f:
718 ... iterparse(f, events)
719 Traceback (most recent call last):
720 ValueError: unknown event 'bogus'
721
722 >>> import StringIO
723
724 >>> source = StringIO.StringIO(
725 ... "<?xml version='1.0' encoding='iso-8859-1'?>\\n"
726 ... "<body xmlns='http://&#233;ffbot.org/ns'\\n"
727 ... " xmlns:cl\\xe9='http://effbot.org/ns'>text</body>\\n")
728 >>> events = ("start-ns",)
729 >>> context = iterparse(source, events)
730 >>> for action, elem in context:
731 ... print action, elem
732 start-ns ('', u'http://\\xe9ffbot.org/ns')
733 start-ns (u'cl\\xe9', 'http://effbot.org/ns')
734
735 >>> source = StringIO.StringIO("<document />junk")
736 >>> try:
737 ... for action, elem in iterparse(source):
738 ... print action, elem.tag
739 ... except ET.ParseError, v:
740 ... print v
741 junk after document element: line 1, column 12
742 """
743
744 def writefile():
745 """
746 >>> elem = ET.Element("tag")
747 >>> elem.text = "text"
748 >>> serialize(elem)
749 '<tag>text</tag>'
750 >>> ET.SubElement(elem, "subtag").text = "subtext"
751 >>> serialize(elem)
752 '<tag>text<subtag>subtext</subtag></tag>'
753
754 Test tag suppression
755 >>> elem.tag = None
756 >>> serialize(elem)
757 'text<subtag>subtext</subtag>'
758 >>> elem.insert(0, ET.Comment("comment"))
759 >>> serialize(elem) # assumes 1.3
760 'text<!--comment--><subtag>subtext</subtag>'
761 >>> elem[0] = ET.PI("key", "value")
762 >>> serialize(elem)
763 'text<?key value?><subtag>subtext</subtag>'
764 """
765
766 def custom_builder():
767 """
768 Test parser w. custom builder.
769
770 >>> with open(SIMPLE_XMLFILE) as f:
771 ... data = f.read()
772 >>> class Builder:
773 ... def start(self, tag, attrib):
774 ... print "start", tag
775 ... def end(self, tag):
776 ... print "end", tag
777 ... def data(self, text):
778 ... pass
779 >>> builder = Builder()
780 >>> parser = ET.XMLParser(target=builder)
781 >>> parser.feed(data)
782 start root
783 start element
784 end element
785 start element
786 end element
787 start empty-element
788 end empty-element
789 end root
790
791 >>> with open(SIMPLE_NS_XMLFILE) as f:
792 ... data = f.read()
793 >>> class Builder:
794 ... def start(self, tag, attrib):
795 ... print "start", tag
796 ... def end(self, tag):
797 ... print "end", tag
798 ... def data(self, text):
799 ... pass
800 ... def pi(self, target, data):
801 ... print "pi", target, repr(data)
802 ... def comment(self, data):
803 ... print "comment", repr(data)
804 >>> builder = Builder()
805 >>> parser = ET.XMLParser(target=builder)
806 >>> parser.feed(data)
807 pi pi 'data'
808 comment ' comment '
809 start {namespace}root
810 start {namespace}element
811 end {namespace}element
812 start {namespace}element
813 end {namespace}element
814 start {namespace}empty-element
815 end {namespace}empty-element
816 end {namespace}root
817
818 """
819
820 def getchildren():
821 """
822 Test Element.getchildren()
823
824 >>> with open(SIMPLE_XMLFILE, "r") as f:
825 ... tree = ET.parse(f)
826 >>> for elem in tree.getroot().iter():
827 ... summarize_list(elem.getchildren())
828 ['element', 'element', 'empty-element']
829 []
830 []
831 []
832 >>> for elem in tree.getiterator():
833 ... summarize_list(elem.getchildren())
834 ['element', 'element', 'empty-element']
835 []
836 []
837 []
838
839 >>> elem = ET.XML(SAMPLE_XML)
840 >>> len(elem.getchildren())
841 3
842 >>> len(elem[2].getchildren())
843 1
844 >>> elem[:] == elem.getchildren()
845 True
846 >>> child1 = elem[0]
847 >>> child2 = elem[2]
848 >>> del elem[1:2]
849 >>> len(elem.getchildren())
850 2
851 >>> child1 == elem[0]
852 True
853 >>> child2 == elem[1]
854 True
855 >>> elem[0:2] = [child2, child1]
856 >>> child2 == elem[0]
857 True
858 >>> child1 == elem[1]
859 True
860 >>> child1 == elem[0]
861 False
862 >>> elem.clear()
863 >>> elem.getchildren()
864 []
865 """
866
867 def writestring():
868 """
869 >>> elem = ET.XML("<html><body>text</body></html>")
870 >>> ET.tostring(elem)
871 '<html><body>text</body></html>'
872 >>> elem = ET.fromstring("<html><body>text</body></html>")
873 >>> ET.tostring(elem)
874 '<html><body>text</body></html>'
875 """
876
877 def check_encoding(encoding):
878 """
879 >>> check_encoding("ascii")
880 >>> check_encoding("us-ascii")
881 >>> check_encoding("iso-8859-1")
882 >>> check_encoding("iso-8859-15")
883 >>> check_encoding("cp437")
884 >>> check_encoding("mac-roman")
885 """
886 ET.XML("<?xml version='1.0' encoding='%s'?><xml />" % encoding)
887
888 def encoding():
889 r"""
890 Test encoding issues.
891
892 >>> elem = ET.Element("tag")
893 >>> elem.text = u"abc"
894 >>> serialize(elem)
895 '<tag>abc</tag>'
896 >>> serialize(elem, encoding="utf-8")
897 '<tag>abc</tag>'
898 >>> serialize(elem, encoding="us-ascii")
899 '<tag>abc</tag>'
900 >>> serialize(elem, encoding="iso-8859-1")
901 "<?xml version='1.0' encoding='iso-8859-1'?>\n<tag>abc</tag>"
902
903 >>> elem.text = "<&\"\'>"
904 >>> serialize(elem)
905 '<tag>&lt;&amp;"\'&gt;</tag>'
906 >>> serialize(elem, encoding="utf-8")
907 '<tag>&lt;&amp;"\'&gt;</tag>'
908 >>> serialize(elem, encoding="us-ascii") # cdata characters
909 '<tag>&lt;&amp;"\'&gt;</tag>'
910 >>> serialize(elem, encoding="iso-8859-1")
911 '<?xml version=\'1.0\' encoding=\'iso-8859-1\'?>\n<tag>&lt;&amp;"\'&gt;</tag>'
912
913 >>> elem.attrib["key"] = "<&\"\'>"
914 >>> elem.text = None
915 >>> serialize(elem)
916 '<tag key="&lt;&amp;&quot;\'&gt;" />'
917 >>> serialize(elem, encoding="utf-8")
918 '<tag key="&lt;&amp;&quot;\'&gt;" />'
919 >>> serialize(elem, encoding="us-ascii")
920 '<tag key="&lt;&amp;&quot;\'&gt;" />'
921 >>> serialize(elem, encoding="iso-8859-1")
922 '<?xml version=\'1.0\' encoding=\'iso-8859-1\'?>\n<tag key="&lt;&amp;&quot;\'&gt;" />'
923
924 >>> elem.text = u'\xe5\xf6\xf6<>'
925 >>> elem.attrib.clear()
926 >>> serialize(elem)
927 '<tag>&#229;&#246;&#246;&lt;&gt;</tag>'
928 >>> serialize(elem, encoding="utf-8")
929 '<tag>\xc3\xa5\xc3\xb6\xc3\xb6&lt;&gt;</tag>'
930 >>> serialize(elem, encoding="us-ascii")
931 '<tag>&#229;&#246;&#246;&lt;&gt;</tag>'
932 >>> serialize(elem, encoding="iso-8859-1")
933 "<?xml version='1.0' encoding='iso-8859-1'?>\n<tag>\xe5\xf6\xf6&lt;&gt;</tag>"
934
935 >>> elem.attrib["key"] = u'\xe5\xf6\xf6<>'
936 >>> elem.text = None
937 >>> serialize(elem)
938 '<tag key="&#229;&#246;&#246;&lt;&gt;" />'
939 >>> serialize(elem, encoding="utf-8")
940 '<tag key="\xc3\xa5\xc3\xb6\xc3\xb6&lt;&gt;" />'
941 >>> serialize(elem, encoding="us-ascii")
942 '<tag key="&#229;&#246;&#246;&lt;&gt;" />'
943 >>> serialize(elem, encoding="iso-8859-1")
944 '<?xml version=\'1.0\' encoding=\'iso-8859-1\'?>\n<tag key="\xe5\xf6\xf6&lt;&gt;" />'
945 """
946
947 def methods():
948 r"""
949 Test serialization methods.
950
951 >>> e = ET.XML("<html><link/><script>1 &lt; 2</script></html>")
952 >>> e.tail = "\n"
953 >>> serialize(e)
954 '<html><link /><script>1 &lt; 2</script></html>\n'
955 >>> serialize(e, method=None)
956 '<html><link /><script>1 &lt; 2</script></html>\n'
957 >>> serialize(e, method="xml")
958 '<html><link /><script>1 &lt; 2</script></html>\n'
959 >>> serialize(e, method="html")
960 '<html><link><script>1 < 2</script></html>\n'
961 >>> serialize(e, method="text")
962 '1 < 2\n'
963 """
964
965 def iterators():
966 """
967 Test iterators.
968
969 >>> e = ET.XML("<html><body>this is a <i>paragraph</i>.</body>..</html>")
970 >>> summarize_list(e.iter())
971 ['html', 'body', 'i']
972 >>> summarize_list(e.find("body").iter())
973 ['body', 'i']
974 >>> summarize(next(e.iter()))
975 'html'
976 >>> "".join(e.itertext())
977 'this is a paragraph...'
978 >>> "".join(e.find("body").itertext())
979 'this is a paragraph.'
980 >>> next(e.itertext())
981 'this is a '
982
983 Method iterparse should return an iterator. See bug 6472.
984
985 >>> sourcefile = serialize(e, to_string=False)
986 >>> next(ET.iterparse(sourcefile)) # doctest: +ELLIPSIS
987 ('end', <Element 'i' at 0x...>)
988
989 >>> tree = ET.ElementTree(None)
990 >>> tree.iter()
991 Traceback (most recent call last):
992 AttributeError: 'NoneType' object has no attribute 'iter'
993 """
994
995 ENTITY_XML = """\
996 <!DOCTYPE points [
997 <!ENTITY % user-entities SYSTEM 'user-entities.xml'>
998 %user-entities;
999 ]>
1000 <document>&entity;</document>
1001 """
1002
1003 def entity():
1004 """
1005 Test entity handling.
1006
1007 1) good entities
1008
1009 >>> e = ET.XML("<document title='&#x8230;'>test</document>")
1010 >>> serialize(e)
1011 '<document title="&#33328;">test</document>'
1012
1013 2) bad entities
1014
1015 >>> ET.XML("<document>&entity;</document>")
1016 Traceback (most recent call last):
1017 ParseError: undefined entity: line 1, column 10
1018
1019 >>> ET.XML(ENTITY_XML)
1020 Traceback (most recent call last):
1021 ParseError: undefined entity &entity;: line 5, column 10
1022
1023 3) custom entity
1024
1025 >>> parser = ET.XMLParser()
1026 >>> parser.entity["entity"] = "text"
1027 >>> parser.feed(ENTITY_XML)
1028 >>> root = parser.close()
1029 >>> serialize(root)
1030 '<document>text</document>'
1031 """
1032
1033 def error(xml):
1034 """
1035
1036 Test error handling.
1037
1038 >>> issubclass(ET.ParseError, SyntaxError)
1039 True
1040 >>> error("foo").position
1041 (1, 0)
1042 >>> error("<tag>&foo;</tag>").position
1043 (1, 5)
1044 >>> error("foobar<").position
1045 (1, 6)
1046
1047 """
1048 try:
1049 ET.XML(xml)
1050 except ET.ParseError:
1051 return sys.exc_value
1052
1053 def namespace():
1054 """
1055 Test namespace issues.
1056
1057 1) xml namespace
1058
1059 >>> elem = ET.XML("<tag xml:lang='en' />")
1060 >>> serialize(elem) # 1.1
1061 '<tag xml:lang="en" />'
1062
1063 2) other "well-known" namespaces
1064
1065 >>> elem = ET.XML("<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#' />")
1066 >>> serialize(elem) # 2.1
1067 '<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" />'
1068
1069 >>> elem = ET.XML("<html:html xmlns:html='http://www.w3.org/1999/xhtml' />")
1070 >>> serialize(elem) # 2.2
1071 '<html:html xmlns:html="http://www.w3.org/1999/xhtml" />'
1072
1073 >>> elem = ET.XML("<soap:Envelope xmlns:soap='http://schemas.xmlsoap.org/soap/envelope' />")
1074 >>> serialize(elem) # 2.3
1075 '<ns0:Envelope xmlns:ns0="http://schemas.xmlsoap.org/soap/envelope" />'
1076
1077 3) unknown namespaces
1078 >>> elem = ET.XML(SAMPLE_XML_NS)
1079 >>> print serialize(elem)
1080 <ns0:body xmlns:ns0="http://effbot.org/ns">
1081 <ns0:tag>text</ns0:tag>
1082 <ns0:tag />
1083 <ns0:section>
1084 <ns0:tag>subtext</ns0:tag>
1085 </ns0:section>
1086 </ns0:body>
1087 """
1088
1089 def qname():
1090 """
1091 Test QName handling.
1092
1093 1) decorated tags
1094
1095 >>> elem = ET.Element("{uri}tag")
1096 >>> serialize(elem) # 1.1
1097 '<ns0:tag xmlns:ns0="uri" />'
1098 >>> elem = ET.Element(ET.QName("{uri}tag"))
1099 >>> serialize(elem) # 1.2
1100 '<ns0:tag xmlns:ns0="uri" />'
1101 >>> elem = ET.Element(ET.QName("uri", "tag"))
1102 >>> serialize(elem) # 1.3
1103 '<ns0:tag xmlns:ns0="uri" />'
1104 >>> elem = ET.Element(ET.QName("uri", "tag"))
1105 >>> subelem = ET.SubElement(elem, ET.QName("uri", "tag1"))
1106 >>> subelem = ET.SubElement(elem, ET.QName("uri", "tag2"))
1107 >>> serialize(elem) # 1.4
1108 '<ns0:tag xmlns:ns0="uri"><ns0:tag1 /><ns0:tag2 /></ns0:tag>'
1109
1110 2) decorated attributes
1111
1112 >>> elem.clear()
1113 >>> elem.attrib["{uri}key"] = "value"
1114 >>> serialize(elem) # 2.1
1115 '<ns0:tag xmlns:ns0="uri" ns0:key="value" />'
1116
1117 >>> elem.clear()
1118 >>> elem.attrib[ET.QName("{uri}key")] = "value"
1119 >>> serialize(elem) # 2.2
1120 '<ns0:tag xmlns:ns0="uri" ns0:key="value" />'
1121
1122 3) decorated values are not converted by default, but the
1123 QName wrapper can be used for values
1124
1125 >>> elem.clear()
1126 >>> elem.attrib["{uri}key"] = "{uri}value"
1127 >>> serialize(elem) # 3.1
1128 '<ns0:tag xmlns:ns0="uri" ns0:key="{uri}value" />'
1129
1130 >>> elem.clear()
1131 >>> elem.attrib["{uri}key"] = ET.QName("{uri}value")
1132 >>> serialize(elem) # 3.2
1133 '<ns0:tag xmlns:ns0="uri" ns0:key="ns0:value" />'
1134
1135 >>> elem.clear()
1136 >>> subelem = ET.Element("tag")
1137 >>> subelem.attrib["{uri1}key"] = ET.QName("{uri2}value")
1138 >>> elem.append(subelem)
1139 >>> elem.append(subelem)
1140 >>> serialize(elem) # 3.3
1141 '<ns0:tag xmlns:ns0="uri" xmlns:ns1="uri1" xmlns:ns2="uri2"><tag ns1:key="ns2:value" /><tag ns1:key="ns2:value" /></ns0:tag>'
1142
1143 4) Direct QName tests
1144
1145 >>> str(ET.QName('ns', 'tag'))
1146 '{ns}tag'
1147 >>> str(ET.QName('{ns}tag'))
1148 '{ns}tag'
1149 >>> q1 = ET.QName('ns', 'tag')
1150 >>> q2 = ET.QName('ns', 'tag')
1151 >>> q1 == q2
1152 True
1153 >>> q2 = ET.QName('ns', 'other-tag')
1154 >>> q1 == q2
1155 False
1156 >>> q1 == 'ns:tag'
1157 False
1158 >>> q1 == '{ns}tag'
1159 True
1160 """
1161
1162 def doctype_public():
1163 """
1164 Test PUBLIC doctype.
1165
1166 >>> elem = ET.XML('<!DOCTYPE html PUBLIC'
1167 ... ' "-//W3C//DTD XHTML 1.0 Transitional//EN"'
1168 ... ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
1169 ... '<html>text</html>')
1170
1171 """
1172
1173 def xpath_tokenizer(p):
1174 """
1175 Test the XPath tokenizer.
1176
1177 >>> # tests from the xml specification
1178 >>> xpath_tokenizer("*")
1179 ['*']
1180 >>> xpath_tokenizer("text()")
1181 ['text', '()']
1182 >>> xpath_tokenizer("@name")
1183 ['@', 'name']
1184 >>> xpath_tokenizer("@*")
1185 ['@', '*']
1186 >>> xpath_tokenizer("para[1]")
1187 ['para', '[', '1', ']']
1188 >>> xpath_tokenizer("para[last()]")
1189 ['para', '[', 'last', '()', ']']
1190 >>> xpath_tokenizer("*/para")
1191 ['*', '/', 'para']
1192 >>> xpath_tokenizer("/doc/chapter[5]/section[2]")
1193 ['/', 'doc', '/', 'chapter', '[', '5', ']', '/', 'section', '[', '2', ']']
1194 >>> xpath_tokenizer("chapter//para")
1195 ['chapter', '//', 'para']
1196 >>> xpath_tokenizer("//para")
1197 ['//', 'para']
1198 >>> xpath_tokenizer("//olist/item")
1199 ['//', 'olist', '/', 'item']
1200 >>> xpath_tokenizer(".")
1201 ['.']
1202 >>> xpath_tokenizer(".//para")
1203 ['.', '//', 'para']
1204 >>> xpath_tokenizer("..")
1205 ['..']
1206 >>> xpath_tokenizer("../@lang")
1207 ['..', '/', '@', 'lang']
1208 >>> xpath_tokenizer("chapter[title]")
1209 ['chapter', '[', 'title', ']']
1210 >>> xpath_tokenizer("employee[@secretary and @assistant]")
1211 ['employee', '[', '@', 'secretary', '', 'and', '', '@', 'assistant', ']']
1212
1213 >>> # additional tests
1214 >>> xpath_tokenizer("{http://spam}egg")
1215 ['{http://spam}egg']
1216 >>> xpath_tokenizer("./spam.egg")
1217 ['.', '/', 'spam.egg']
1218 >>> xpath_tokenizer(".//{http://spam}egg")
1219 ['.', '//', '{http://spam}egg']
1220 """
1221 from xml.etree import ElementPath
1222 out = []
1223 for op, tag in ElementPath.xpath_tokenizer(p):
1224 out.append(op or tag)
1225 return out
1226
1227 def processinginstruction():
1228 """
1229 Test ProcessingInstruction directly
1230
1231 >>> ET.tostring(ET.ProcessingInstruction('test', 'instruction'))
1232 '<?test instruction?>'
1233 >>> ET.tostring(ET.PI('test', 'instruction'))
1234 '<?test instruction?>'
1235
1236 Issue #2746
1237
1238 >>> ET.tostring(ET.PI('test', '<testing&>'))
1239 '<?test <testing&>?>'
1240 >>> ET.tostring(ET.PI('test', u'<testing&>\xe3'), 'latin1')
1241 "<?xml version='1.0' encoding='latin1'?>\\n<?test <testing&>\\xe3?>"
1242 """
1243
1244 #
1245 # xinclude tests (samples from appendix C of the xinclude specification)
1246
1247 XINCLUDE = {}
1248
1249 XINCLUDE["C1.xml"] = """\
1250 <?xml version='1.0'?>
1251 <document xmlns:xi="http://www.w3.org/2001/XInclude">
1252 <p>120 Mz is adequate for an average home user.</p>
1253 <xi:include href="disclaimer.xml"/>
1254 </document>
1255 """
1256
1257 XINCLUDE["disclaimer.xml"] = """\
1258 <?xml version='1.0'?>
1259 <disclaimer>
1260 <p>The opinions represented herein represent those of the individual
1261 and should not be interpreted as official policy endorsed by this
1262 organization.</p>
1263 </disclaimer>
1264 """
1265
1266 XINCLUDE["C2.xml"] = """\
1267 <?xml version='1.0'?>
1268 <document xmlns:xi="http://www.w3.org/2001/XInclude">
1269 <p>This document has been accessed
1270 <xi:include href="count.txt" parse="text"/> times.</p>
1271 </document>
1272 """
1273
1274 XINCLUDE["count.txt"] = "324387"
1275
1276 XINCLUDE["C2b.xml"] = """\
1277 <?xml version='1.0'?>
1278 <document xmlns:xi="http://www.w3.org/2001/XInclude">
1279 <p>This document has been <em>accessed</em>
1280 <xi:include href="count.txt" parse="text"/> times.</p>
1281 </document>
1282 """
1283
1284 XINCLUDE["C3.xml"] = """\
1285 <?xml version='1.0'?>
1286 <document xmlns:xi="http://www.w3.org/2001/XInclude">
1287 <p>The following is the source of the "data.xml" resource:</p>
1288 <example><xi:include href="data.xml" parse="text"/></example>
1289 </document>
1290 """
1291
1292 XINCLUDE["data.xml"] = """\
1293 <?xml version='1.0'?>
1294 <data>
1295 <item><![CDATA[Brooks & Shields]]></item>
1296 </data>
1297 """
1298
1299 XINCLUDE["C5.xml"] = """\
1300 <?xml version='1.0'?>
1301 <div xmlns:xi="http://www.w3.org/2001/XInclude">
1302 <xi:include href="example.txt" parse="text">
1303 <xi:fallback>
1304 <xi:include href="fallback-example.txt" parse="text">
1305 <xi:fallback><a href="mailto:bob@example.org">Report error</a></xi:fallback>
1306 </xi:include>
1307 </xi:fallback>
1308 </xi:include>
1309 </div>
1310 """
1311
1312 XINCLUDE["default.xml"] = """\
1313 <?xml version='1.0'?>
1314 <document xmlns:xi="http://www.w3.org/2001/XInclude">
1315 <p>Example.</p>
1316 <xi:include href="{}"/>
1317 </document>
1318 """.format(cgi.escape(SIMPLE_XMLFILE, True))
1319
1320 def xinclude_loader(href, parse="xml", encoding=None):
1321 try:
1322 data = XINCLUDE[href]
1323 except KeyError:
1324 raise IOError("resource not found")
1325 if parse == "xml":
1326 from xml.etree.ElementTree import XML
1327 return XML(data)
1328 return data
1329
1330 def xinclude():
1331 r"""
1332 Basic inclusion example (XInclude C.1)
1333
1334 >>> from xml.etree import ElementTree as ET
1335 >>> from xml.etree import ElementInclude
1336
1337 >>> document = xinclude_loader("C1.xml")
1338 >>> ElementInclude.include(document, xinclude_loader)
1339 >>> print serialize(document) # C1
1340 <document>
1341 <p>120 Mz is adequate for an average home user.</p>
1342 <disclaimer>
1343 <p>The opinions represented herein represent those of the individual
1344 and should not be interpreted as official policy endorsed by this
1345 organization.</p>
1346 </disclaimer>
1347 </document>
1348
1349 Textual inclusion example (XInclude C.2)
1350
1351 >>> document = xinclude_loader("C2.xml")
1352 >>> ElementInclude.include(document, xinclude_loader)
1353 >>> print serialize(document) # C2
1354 <document>
1355 <p>This document has been accessed
1356 324387 times.</p>
1357 </document>
1358
1359 Textual inclusion after sibling element (based on modified XInclude C.2)
1360
1361 >>> document = xinclude_loader("C2b.xml")
1362 >>> ElementInclude.include(document, xinclude_loader)
1363 >>> print(serialize(document)) # C2b
1364 <document>
1365 <p>This document has been <em>accessed</em>
1366 324387 times.</p>
1367 </document>
1368
1369 Textual inclusion of XML example (XInclude C.3)
1370
1371 >>> document = xinclude_loader("C3.xml")
1372 >>> ElementInclude.include(document, xinclude_loader)
1373 >>> print serialize(document) # C3
1374 <document>
1375 <p>The following is the source of the "data.xml" resource:</p>
1376 <example>&lt;?xml version='1.0'?&gt;
1377 &lt;data&gt;
1378 &lt;item&gt;&lt;![CDATA[Brooks &amp; Shields]]&gt;&lt;/item&gt;
1379 &lt;/data&gt;
1380 </example>
1381 </document>
1382
1383 Fallback example (XInclude C.5)
1384 Note! Fallback support is not yet implemented
1385
1386 >>> document = xinclude_loader("C5.xml")
1387 >>> ElementInclude.include(document, xinclude_loader)
1388 Traceback (most recent call last):
1389 IOError: resource not found
1390 >>> # print serialize(document) # C5
1391 """
1392
1393 def xinclude_default():
1394 """
1395 >>> from xml.etree import ElementInclude
1396
1397 >>> document = xinclude_loader("default.xml")
1398 >>> ElementInclude.include(document)
1399 >>> print serialize(document) # default
1400 <document>
1401 <p>Example.</p>
1402 <root>
1403 <element key="value">text</element>
1404 <element>text</element>tail
1405 <empty-element />
1406 </root>
1407 </document>
1408 """
1409
1410 #
1411 # badly formatted xi:include tags
1412
1413 XINCLUDE_BAD = {}
1414
1415 XINCLUDE_BAD["B1.xml"] = """\
1416 <?xml version='1.0'?>
1417 <document xmlns:xi="http://www.w3.org/2001/XInclude">
1418 <p>120 Mz is adequate for an average home user.</p>
1419 <xi:include href="disclaimer.xml" parse="BAD_TYPE"/>
1420 </document>
1421 """
1422
1423 XINCLUDE_BAD["B2.xml"] = """\
1424 <?xml version='1.0'?>
1425 <div xmlns:xi="http://www.w3.org/2001/XInclude">
1426 <xi:fallback></xi:fallback>
1427 </div>
1428 """
1429
1430 def xinclude_failures():
1431 r"""
1432 Test failure to locate included XML file.
1433
1434 >>> from xml.etree import ElementInclude
1435
1436 >>> def none_loader(href, parser, encoding=None):
1437 ... return None
1438
1439 >>> document = ET.XML(XINCLUDE["C1.xml"])
1440 >>> ElementInclude.include(document, loader=none_loader)
1441 Traceback (most recent call last):
1442 FatalIncludeError: cannot load 'disclaimer.xml' as 'xml'
1443
1444 Test failure to locate included text file.
1445
1446 >>> document = ET.XML(XINCLUDE["C2.xml"])
1447 >>> ElementInclude.include(document, loader=none_loader)
1448 Traceback (most recent call last):
1449 FatalIncludeError: cannot load 'count.txt' as 'text'
1450
1451 Test bad parse type.
1452
1453 >>> document = ET.XML(XINCLUDE_BAD["B1.xml"])
1454 >>> ElementInclude.include(document, loader=none_loader)
1455 Traceback (most recent call last):
1456 FatalIncludeError: unknown parse type in xi:include tag ('BAD_TYPE')
1457
1458 Test xi:fallback outside xi:include.
1459
1460 >>> document = ET.XML(XINCLUDE_BAD["B2.xml"])
1461 >>> ElementInclude.include(document, loader=none_loader)
1462 Traceback (most recent call last):
1463 FatalIncludeError: xi:fallback tag must be child of xi:include ('{http://www.w3.org/2001/XInclude}fallback')
1464 """
1465
1466 # --------------------------------------------------------------------
1467 # reported bugs
1468
1469 def bug_xmltoolkit21():
1470 """
1471
1472 marshaller gives obscure errors for non-string values
1473
1474 >>> elem = ET.Element(123)
1475 >>> serialize(elem) # tag
1476 Traceback (most recent call last):
1477 TypeError: cannot serialize 123 (type int)
1478 >>> elem = ET.Element("elem")
1479 >>> elem.text = 123
1480 >>> serialize(elem) # text
1481 Traceback (most recent call last):
1482 TypeError: cannot serialize 123 (type int)
1483 >>> elem = ET.Element("elem")
1484 >>> elem.tail = 123
1485 >>> serialize(elem) # tail
1486 Traceback (most recent call last):
1487 TypeError: cannot serialize 123 (type int)
1488 >>> elem = ET.Element("elem")
1489 >>> elem.set(123, "123")
1490 >>> serialize(elem) # attribute key
1491 Traceback (most recent call last):
1492 TypeError: cannot serialize 123 (type int)
1493 >>> elem = ET.Element("elem")
1494 >>> elem.set("123", 123)
1495 >>> serialize(elem) # attribute value
1496 Traceback (most recent call last):
1497 TypeError: cannot serialize 123 (type int)
1498
1499 """
1500
1501 def bug_xmltoolkit25():
1502 """
1503
1504 typo in ElementTree.findtext
1505
1506 >>> elem = ET.XML(SAMPLE_XML)
1507 >>> tree = ET.ElementTree(elem)
1508 >>> tree.findtext("tag")
1509 'text'
1510 >>> tree.findtext("section/tag")
1511 'subtext'
1512
1513 """
1514
1515 def bug_xmltoolkit28():
1516 """
1517
1518 .//tag causes exceptions
1519
1520 >>> tree = ET.XML("<doc><table><tbody/></table></doc>")
1521 >>> summarize_list(tree.findall(".//thead"))
1522 []
1523 >>> summarize_list(tree.findall(".//tbody"))
1524 ['tbody']
1525
1526 """
1527
1528 def bug_xmltoolkitX1():
1529 """
1530
1531 dump() doesn't flush the output buffer
1532
1533 >>> tree = ET.XML("<doc><table><tbody/></table></doc>")
1534 >>> ET.dump(tree); sys.stdout.write("tail")
1535 <doc><table><tbody /></table></doc>
1536 tail
1537
1538 """
1539
1540 def bug_xmltoolkit39():
1541 """
1542
1543 non-ascii element and attribute names doesn't work
1544
1545 >>> tree = ET.XML("<?xml version='1.0' encoding='iso-8859-1'?><t\xe4g />")
1546 >>> ET.tostring(tree, "utf-8")
1547 '<t\\xc3\\xa4g />'
1548
1549 >>> tree = ET.XML("<?xml version='1.0' encoding='iso-8859-1'?><tag \xe4ttr='v&#228;lue' />")
1550 >>> tree.attrib
1551 {u'\\xe4ttr': u'v\\xe4lue'}
1552 >>> ET.tostring(tree, "utf-8")
1553 '<tag \\xc3\\xa4ttr="v\\xc3\\xa4lue" />'
1554
1555 >>> tree = ET.XML("<?xml version='1.0' encoding='iso-8859-1'?><t\xe4g>text</t\xe4g>")
1556 >>> ET.tostring(tree, "utf-8")
1557 '<t\\xc3\\xa4g>text</t\\xc3\\xa4g>'
1558
1559 >>> tree = ET.Element(u"t\u00e4g")
1560 >>> ET.tostring(tree, "utf-8")
1561 '<t\\xc3\\xa4g />'
1562
1563 >>> tree = ET.Element("tag")
1564 >>> tree.set(u"\u00e4ttr", u"v\u00e4lue")
1565 >>> ET.tostring(tree, "utf-8")
1566 '<tag \\xc3\\xa4ttr="v\\xc3\\xa4lue" />'
1567
1568 """
1569
1570 def bug_xmltoolkit54():
1571 """
1572
1573 problems handling internally defined entities
1574
1575 >>> e = ET.XML("<!DOCTYPE doc [<!ENTITY ldots '&#x8230;'>]><doc>&ldots;</doc>")
1576 >>> serialize(e)
1577 '<doc>&#33328;</doc>'
1578
1579 """
1580
1581 def bug_xmltoolkit55():
1582 """
1583
1584 make sure we're reporting the first error, not the last
1585
1586 >>> e = ET.XML("<!DOCTYPE doc SYSTEM 'doc.dtd'><doc>&ldots;&ndots;&rdots;</doc>")
1587 Traceback (most recent call last):
1588 ParseError: undefined entity &ldots;: line 1, column 36
1589
1590 """
1591
1592 class ExceptionFile:
1593 def read(self, x):
1594 raise IOError
1595
1596 def xmltoolkit60():
1597 """
1598
1599 Handle crash in stream source.
1600 >>> tree = ET.parse(ExceptionFile())
1601 Traceback (most recent call last):
1602 IOError
1603
1604 """
1605
1606 XMLTOOLKIT62_DOC = """<?xml version="1.0" encoding="UTF-8"?>
1607 <!DOCTYPE patent-application-publication SYSTEM "pap-v15-2001-01-31.dtd" []>
1608 <patent-application-publication>
1609 <subdoc-abstract>
1610 <paragraph id="A-0001" lvl="0">A new cultivar of Begonia plant named &lsquo;BCT9801BEG&rsquo;.</paragraph>
1611 </subdoc-abstract>
1612 </patent-application-publication>"""
1613
1614
1615 def xmltoolkit62():
1616 """
1617
1618 Don't crash when using custom entities.
1619
1620 >>> xmltoolkit62()
1621 u'A new cultivar of Begonia plant named \u2018BCT9801BEG\u2019.'
1622
1623 """
1624 ENTITIES = {u'rsquo': u'\u2019', u'lsquo': u'\u2018'}
1625 parser = ET.XMLTreeBuilder()
1626 parser.entity.update(ENTITIES)
1627 parser.feed(XMLTOOLKIT62_DOC)
1628 t = parser.close()
1629 return t.find('.//paragraph').text
1630
1631 def xmltoolkit63():
1632 """
1633
1634 Check reference leak.
1635 >>> xmltoolkit63()
1636 >>> count = sys.getrefcount(None)
1637 >>> for i in range(1000):
1638 ... xmltoolkit63()
1639 >>> sys.getrefcount(None) - count
1640 0
1641
1642 """
1643 tree = ET.TreeBuilder()
1644 tree.start("tag", {})
1645 tree.data("text")
1646 tree.end("tag")
1647
1648 # --------------------------------------------------------------------
1649
1650
1651 def bug_200708_newline():
1652 r"""
1653
1654 Preserve newlines in attributes.
1655
1656 >>> e = ET.Element('SomeTag', text="def _f():\n return 3\n")
1657 >>> ET.tostring(e)
1658 '<SomeTag text="def _f():&#10; return 3&#10;" />'
1659 >>> ET.XML(ET.tostring(e)).get("text")
1660 'def _f():\n return 3\n'
1661 >>> ET.tostring(ET.XML(ET.tostring(e)))
1662 '<SomeTag text="def _f():&#10; return 3&#10;" />'
1663
1664 """
1665
1666 def bug_200708_close():
1667 """
1668
1669 Test default builder.
1670 >>> parser = ET.XMLParser() # default
1671 >>> parser.feed("<element>some text</element>")
1672 >>> summarize(parser.close())
1673 'element'
1674
1675 Test custom builder.
1676 >>> class EchoTarget:
1677 ... def close(self):
1678 ... return ET.Element("element") # simulate root
1679 >>> parser = ET.XMLParser(EchoTarget())
1680 >>> parser.feed("<element>some text</element>")
1681 >>> summarize(parser.close())
1682 'element'
1683
1684 """
1685
1686 def bug_200709_default_namespace():
1687 """
1688
1689 >>> e = ET.Element("{default}elem")
1690 >>> s = ET.SubElement(e, "{default}elem")
1691 >>> serialize(e, default_namespace="default") # 1
1692 '<elem xmlns="default"><elem /></elem>'
1693
1694 >>> e = ET.Element("{default}elem")
1695 >>> s = ET.SubElement(e, "{default}elem")
1696 >>> s = ET.SubElement(e, "{not-default}elem")
1697 >>> serialize(e, default_namespace="default") # 2
1698 '<elem xmlns="default" xmlns:ns1="not-default"><elem /><ns1:elem /></elem>'
1699
1700 >>> e = ET.Element("{default}elem")
1701 >>> s = ET.SubElement(e, "{default}elem")
1702 >>> s = ET.SubElement(e, "elem") # unprefixed name
1703 >>> serialize(e, default_namespace="default") # 3
1704 Traceback (most recent call last):
1705 ValueError: cannot use non-qualified names with default_namespace option
1706
1707 """
1708
1709 def bug_200709_register_namespace():
1710 """
1711
1712 >>> ET.tostring(ET.Element("{http://namespace.invalid/does/not/exist/}title"))
1713 '<ns0:title xmlns:ns0="http://namespace.invalid/does/not/exist/" />'
1714 >>> ET.register_namespace("foo", "http://namespace.invalid/does/not/exist/")
1715 >>> ET.tostring(ET.Element("{http://namespace.invalid/does/not/exist/}title"))
1716 '<foo:title xmlns:foo="http://namespace.invalid/does/not/exist/" />'
1717
1718 And the Dublin Core namespace is in the default list:
1719
1720 >>> ET.tostring(ET.Element("{http://purl.org/dc/elements/1.1/}title"))
1721 '<dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" />'
1722
1723 """
1724
1725 def bug_200709_element_comment():
1726 """
1727
1728 Not sure if this can be fixed, really (since the serializer needs
1729 ET.Comment, not cET.comment).
1730
1731 >>> a = ET.Element('a')
1732 >>> a.append(ET.Comment('foo'))
1733 >>> a[0].tag == ET.Comment
1734 True
1735
1736 >>> a = ET.Element('a')
1737 >>> a.append(ET.PI('foo'))
1738 >>> a[0].tag == ET.PI
1739 True
1740
1741 """
1742
1743 def bug_200709_element_insert():
1744 """
1745
1746 >>> a = ET.Element('a')
1747 >>> b = ET.SubElement(a, 'b')
1748 >>> c = ET.SubElement(a, 'c')
1749 >>> d = ET.Element('d')
1750 >>> a.insert(0, d)
1751 >>> summarize_list(a)
1752 ['d', 'b', 'c']
1753 >>> a.insert(-1, d)
1754 >>> summarize_list(a)
1755 ['d', 'b', 'd', 'c']
1756
1757 """
1758
1759 def bug_200709_iter_comment():
1760 """
1761
1762 >>> a = ET.Element('a')
1763 >>> b = ET.SubElement(a, 'b')
1764 >>> comment_b = ET.Comment("TEST-b")
1765 >>> b.append(comment_b)
1766 >>> summarize_list(a.iter(ET.Comment))
1767 ['<Comment>']
1768
1769 """
1770
1771 # --------------------------------------------------------------------
1772 # reported on bugs.python.org
1773
1774 def bug_1534630():
1775 """
1776
1777 >>> bob = ET.TreeBuilder()
1778 >>> e = bob.data("data")
1779 >>> e = bob.start("tag", {})
1780 >>> e = bob.end("tag")
1781 >>> e = bob.close()
1782 >>> serialize(e)
1783 '<tag />'
1784
1785 """
1786
1787 def check_issue6233():
1788 """
1789
1790 >>> e = ET.XML("<?xml version='1.0' encoding='utf-8'?><body>t\\xc3\\xa3g</body>")
1791 >>> ET.tostring(e, 'ascii')
1792 "<?xml version='1.0' encoding='ascii'?>\\n<body>t&#227;g</body>"
1793 >>> e = ET.XML("<?xml version='1.0' encoding='iso-8859-1'?><body>t\\xe3g</body>")
1794 >>> ET.tostring(e, 'ascii')
1795 "<?xml version='1.0' encoding='ascii'?>\\n<body>t&#227;g</body>"
1796
1797 """
1798
1799 def check_issue3151():
1800 """
1801
1802 >>> e = ET.XML('<prefix:localname xmlns:prefix="${stuff}"/>')
1803 >>> e.tag
1804 '{${stuff}}localname'
1805 >>> t = ET.ElementTree(e)
1806 >>> ET.tostring(e)
1807 '<ns0:localname xmlns:ns0="${stuff}" />'
1808
1809 """
1810
1811 def check_issue6565():
1812 """
1813
1814 >>> elem = ET.XML("<body><tag/></body>")
1815 >>> summarize_list(elem)
1816 ['tag']
1817 >>> newelem = ET.XML(SAMPLE_XML)
1818 >>> elem[:] = newelem[:]
1819 >>> summarize_list(elem)
1820 ['tag', 'tag', 'section']
1821
1822 """
1823
1824 # --------------------------------------------------------------------
1825
1826
1827 class CleanContext(object):
1828 """Provide default namespace mapping and path cache."""
1829 checkwarnings = None
1830
1831 def __init__(self, quiet=False):
1832 if sys.flags.optimize >= 2:
1833 # under -OO, doctests cannot be run and therefore not all warnings
1834 # will be emitted
1835 quiet = True
1836 deprecations = (
1837 # Search behaviour is broken if search path starts with "/".
1838 ("This search is broken in 1.3 and earlier, and will be fixed "
1839 "in a future version. If you rely on the current behaviour, "
1840 "change it to '.+'", FutureWarning),
1841 # Element.getchildren() and Element.getiterator() are deprecated.
1842 ("This method will be removed in future versions. "
1843 "Use .+ instead.", DeprecationWarning),
1844 ("This method will be removed in future versions. "
1845 "Use .+ instead.", PendingDeprecationWarning),
1846 # XMLParser.doctype() is deprecated.
1847 ("This method of XMLParser is deprecated. Define doctype.. "
1848 "method on the TreeBuilder target.", DeprecationWarning))
1849 self.checkwarnings = test_support.check_warnings(*deprecations,
1850 quiet=quiet)
1851
1852 def __enter__(self):
1853 from xml.etree import ElementTree
1854 self._nsmap = ElementTree._namespace_map
1855 self._path_cache = ElementTree.ElementPath._cache
1856 # Copy the default namespace mapping
1857 ElementTree._namespace_map = self._nsmap.copy()
1858 # Copy the path cache (should be empty)
1859 ElementTree.ElementPath._cache = self._path_cache.copy()
1860 self.checkwarnings.__enter__()
1861
1862 def __exit__(self, *args):
1863 from xml.etree import ElementTree
1864 # Restore mapping and path cache
1865 ElementTree._namespace_map = self._nsmap
1866 ElementTree.ElementPath._cache = self._path_cache
1867 self.checkwarnings.__exit__(*args)
1868
1869
1870 def test_main(module_name='xml.etree.ElementTree'):
1871 from test import test_xml_etree
1872
1873 use_py_module = (module_name == 'xml.etree.ElementTree')
1874
1875 # The same doctests are used for both the Python and the C implementations
1876 assert test_xml_etree.ET.__name__ == module_name
1877
1878 # XXX the C module should give the same warnings as the Python module
1879 with CleanContext(quiet=not use_py_module):
1880 test_support.run_doctest(test_xml_etree, verbosity=True)
1881
1882 # The module should not be changed by the tests
1883 assert test_xml_etree.ET.__name__ == module_name
1884
1885 if __name__ == '__main__':
1886 test_main()