]>
git.proxmox.com Git - mirror_edk2.git/blob - AppPkg/Applications/Python/Python-2.7.2/Lib/test/test_pyexpat.py
40aa782e98e9f01b06704adf22f179939f385b8a
1 # XXX TypeErrors on calling handlers, or on bad return values from a
2 # handler, are obscure and unhelpful.
7 from xml
.parsers
import expat
9 from test
import test_support
10 from test
.test_support
import sortdict
, run_unittest
13 class SetAttributeTest(unittest
.TestCase
):
15 self
.parser
= expat
.ParserCreate(namespace_separator
='!')
16 self
.set_get_pairs
= [
23 def test_returns_unicode(self
):
24 for x
, y
in self
.set_get_pairs
:
25 self
.parser
.returns_unicode
= x
26 self
.assertEqual(self
.parser
.returns_unicode
, y
)
28 def test_ordered_attributes(self
):
29 for x
, y
in self
.set_get_pairs
:
30 self
.parser
.ordered_attributes
= x
31 self
.assertEqual(self
.parser
.ordered_attributes
, y
)
33 def test_specified_attributes(self
):
34 for x
, y
in self
.set_get_pairs
:
35 self
.parser
.specified_attributes
= x
36 self
.assertEqual(self
.parser
.specified_attributes
, y
)
40 <?xml version="1.0" encoding="iso-8859-1" standalone="no"?>
41 <?xml-stylesheet href="stylesheet.css"?>
43 <!DOCTYPE quotations SYSTEM "quotations.dtd" [
45 <!NOTATION notation SYSTEM "notation.jpeg">
46 <!ENTITY acirc "â">
47 <!ENTITY external_entity SYSTEM "entity.file">
48 <!ENTITY unparsed_entity SYSTEM "entity.file" NDATA notation>
52 <root attr1="value1" attr2="value2ὀ">
53 <myns:subelement xmlns:myns="http://www.python.org/namespace">
54 Contents of subelements
56 <sub2><![CDATA[contents of CDATA section]]></sub2>
62 # Produce UTF-8 output
63 class ParseTest(unittest
.TestCase
):
68 def StartElementHandler(self
, name
, attrs
):
69 self
.out
.append('Start element: ' + repr(name
) + ' ' +
72 def EndElementHandler(self
, name
):
73 self
.out
.append('End element: ' + repr(name
))
75 def CharacterDataHandler(self
, data
):
78 self
.out
.append('Character data: ' + repr(data
))
80 def ProcessingInstructionHandler(self
, target
, data
):
81 self
.out
.append('PI: ' + repr(target
) + ' ' + repr(data
))
83 def StartNamespaceDeclHandler(self
, prefix
, uri
):
84 self
.out
.append('NS decl: ' + repr(prefix
) + ' ' + repr(uri
))
86 def EndNamespaceDeclHandler(self
, prefix
):
87 self
.out
.append('End of NS decl: ' + repr(prefix
))
89 def StartCdataSectionHandler(self
):
90 self
.out
.append('Start of CDATA section')
92 def EndCdataSectionHandler(self
):
93 self
.out
.append('End of CDATA section')
95 def CommentHandler(self
, text
):
96 self
.out
.append('Comment: ' + repr(text
))
98 def NotationDeclHandler(self
, *args
):
99 name
, base
, sysid
, pubid
= args
100 self
.out
.append('Notation declared: %s' %(args
,))
102 def UnparsedEntityDeclHandler(self
, *args
):
103 entityName
, base
, systemId
, publicId
, notationName
= args
104 self
.out
.append('Unparsed entity decl: %s' %(args
,))
106 def NotStandaloneHandler(self
, userData
):
107 self
.out
.append('Not standalone')
110 def ExternalEntityRefHandler(self
, *args
):
111 context
, base
, sysId
, pubId
= args
112 self
.out
.append('External entity ref: %s' %(args
[1:],))
115 def DefaultHandler(self
, userData
):
118 def DefaultHandlerExpand(self
, userData
):
122 'StartElementHandler', 'EndElementHandler',
123 'CharacterDataHandler', 'ProcessingInstructionHandler',
124 'UnparsedEntityDeclHandler', 'NotationDeclHandler',
125 'StartNamespaceDeclHandler', 'EndNamespaceDeclHandler',
126 'CommentHandler', 'StartCdataSectionHandler',
127 'EndCdataSectionHandler',
128 'DefaultHandler', 'DefaultHandlerExpand',
129 #'NotStandaloneHandler',
130 'ExternalEntityRefHandler'
135 out
= self
.Outputter()
136 parser
= expat
.ParserCreate(namespace_separator
='!')
137 for name
in self
.handler_names
:
138 setattr(parser
, name
, getattr(out
, name
))
139 parser
.returns_unicode
= 0
140 parser
.Parse(data
, 1)
144 self
.assertEqual(op
[0], 'PI: \'xml-stylesheet\' \'href="stylesheet.css"\'')
145 self
.assertEqual(op
[1], "Comment: ' comment data '")
146 self
.assertEqual(op
[2], "Notation declared: ('notation', None, 'notation.jpeg', None)")
147 self
.assertEqual(op
[3], "Unparsed entity decl: ('unparsed_entity', None, 'entity.file', None, 'notation')")
148 self
.assertEqual(op
[4], "Start element: 'root' {'attr1': 'value1', 'attr2': 'value2\\xe1\\xbd\\x80'}")
149 self
.assertEqual(op
[5], "NS decl: 'myns' 'http://www.python.org/namespace'")
150 self
.assertEqual(op
[6], "Start element: 'http://www.python.org/namespace!subelement' {}")
151 self
.assertEqual(op
[7], "Character data: 'Contents of subelements'")
152 self
.assertEqual(op
[8], "End element: 'http://www.python.org/namespace!subelement'")
153 self
.assertEqual(op
[9], "End of NS decl: 'myns'")
154 self
.assertEqual(op
[10], "Start element: 'sub2' {}")
155 self
.assertEqual(op
[11], 'Start of CDATA section')
156 self
.assertEqual(op
[12], "Character data: 'contents of CDATA section'")
157 self
.assertEqual(op
[13], 'End of CDATA section')
158 self
.assertEqual(op
[14], "End element: 'sub2'")
159 self
.assertEqual(op
[15], "External entity ref: (None, 'entity.file', None)")
160 self
.assertEqual(op
[16], "End element: 'root'")
162 def test_unicode(self
):
163 # Try the parse again, this time producing Unicode output
164 out
= self
.Outputter()
165 parser
= expat
.ParserCreate(namespace_separator
='!')
166 parser
.returns_unicode
= 1
167 for name
in self
.handler_names
:
168 setattr(parser
, name
, getattr(out
, name
))
170 parser
.Parse(data
, 1)
173 self
.assertEqual(op
[0], 'PI: u\'xml-stylesheet\' u\'href="stylesheet.css"\'')
174 self
.assertEqual(op
[1], "Comment: u' comment data '")
175 self
.assertEqual(op
[2], "Notation declared: (u'notation', None, u'notation.jpeg', None)")
176 self
.assertEqual(op
[3], "Unparsed entity decl: (u'unparsed_entity', None, u'entity.file', None, u'notation')")
177 self
.assertEqual(op
[4], "Start element: u'root' {u'attr1': u'value1', u'attr2': u'value2\\u1f40'}")
178 self
.assertEqual(op
[5], "NS decl: u'myns' u'http://www.python.org/namespace'")
179 self
.assertEqual(op
[6], "Start element: u'http://www.python.org/namespace!subelement' {}")
180 self
.assertEqual(op
[7], "Character data: u'Contents of subelements'")
181 self
.assertEqual(op
[8], "End element: u'http://www.python.org/namespace!subelement'")
182 self
.assertEqual(op
[9], "End of NS decl: u'myns'")
183 self
.assertEqual(op
[10], "Start element: u'sub2' {}")
184 self
.assertEqual(op
[11], 'Start of CDATA section')
185 self
.assertEqual(op
[12], "Character data: u'contents of CDATA section'")
186 self
.assertEqual(op
[13], 'End of CDATA section')
187 self
.assertEqual(op
[14], "End element: u'sub2'")
188 self
.assertEqual(op
[15], "External entity ref: (None, u'entity.file', None)")
189 self
.assertEqual(op
[16], "End element: u'root'")
191 def test_parse_file(self
):
193 out
= self
.Outputter()
194 parser
= expat
.ParserCreate(namespace_separator
='!')
195 parser
.returns_unicode
= 1
196 for name
in self
.handler_names
:
197 setattr(parser
, name
, getattr(out
, name
))
198 file = StringIO
.StringIO(data
)
200 parser
.ParseFile(file)
203 self
.assertEqual(op
[0], 'PI: u\'xml-stylesheet\' u\'href="stylesheet.css"\'')
204 self
.assertEqual(op
[1], "Comment: u' comment data '")
205 self
.assertEqual(op
[2], "Notation declared: (u'notation', None, u'notation.jpeg', None)")
206 self
.assertEqual(op
[3], "Unparsed entity decl: (u'unparsed_entity', None, u'entity.file', None, u'notation')")
207 self
.assertEqual(op
[4], "Start element: u'root' {u'attr1': u'value1', u'attr2': u'value2\\u1f40'}")
208 self
.assertEqual(op
[5], "NS decl: u'myns' u'http://www.python.org/namespace'")
209 self
.assertEqual(op
[6], "Start element: u'http://www.python.org/namespace!subelement' {}")
210 self
.assertEqual(op
[7], "Character data: u'Contents of subelements'")
211 self
.assertEqual(op
[8], "End element: u'http://www.python.org/namespace!subelement'")
212 self
.assertEqual(op
[9], "End of NS decl: u'myns'")
213 self
.assertEqual(op
[10], "Start element: u'sub2' {}")
214 self
.assertEqual(op
[11], 'Start of CDATA section')
215 self
.assertEqual(op
[12], "Character data: u'contents of CDATA section'")
216 self
.assertEqual(op
[13], 'End of CDATA section')
217 self
.assertEqual(op
[14], "End element: u'sub2'")
218 self
.assertEqual(op
[15], "External entity ref: (None, u'entity.file', None)")
219 self
.assertEqual(op
[16], "End element: u'root'")
221 # Issue 4877: expat.ParseFile causes segfault on a closed file.
222 fp
= open(test_support
.TESTFN
, 'wb')
225 parser
= expat
.ParserCreate()
226 with self
.assertRaises(ValueError):
229 test_support
.unlink(test_support
.TESTFN
)
232 class NamespaceSeparatorTest(unittest
.TestCase
):
233 def test_legal(self
):
234 # Tests that make sure we get errors when the namespace_separator value
235 # is illegal, and that we don't for good values:
237 expat
.ParserCreate(namespace_separator
=None)
238 expat
.ParserCreate(namespace_separator
=' ')
240 def test_illegal(self
):
242 expat
.ParserCreate(namespace_separator
=42)
245 self
.assertEqual(str(e
),
246 'ParserCreate() argument 2 must be string or None, not int')
249 expat
.ParserCreate(namespace_separator
='too long')
251 except ValueError, e
:
252 self
.assertEqual(str(e
),
253 'namespace_separator must be at most one character, omitted, or None')
255 def test_zero_length(self
):
256 # ParserCreate() needs to accept a namespace_separator of zero length
257 # to satisfy the requirements of RDF applications that are required
258 # to simply glue together the namespace URI and the localname. Though
259 # considered a wart of the RDF specifications, it needs to be supported.
261 # See XML-SIG mailing list thread starting with
262 # http://mail.python.org/pipermail/xml-sig/2001-April/005202.html
264 expat
.ParserCreate(namespace_separator
='') # too short
267 class InterningTest(unittest
.TestCase
):
269 # Test the interning machinery.
270 p
= expat
.ParserCreate()
272 def collector(name
, *args
):
274 p
.StartElementHandler
= collector
275 p
.EndElementHandler
= collector
276 p
.Parse("<e> <e/> <e></e> </e>", 1)
278 self
.assertEqual(len(L
), 6)
280 # L should have the same string repeated over and over.
281 self
.assertTrue(tag
is entry
)
284 class BufferTextTest(unittest
.TestCase
):
287 self
.parser
= expat
.ParserCreate()
288 self
.parser
.buffer_text
= 1
289 self
.parser
.CharacterDataHandler
= self
.CharacterDataHandler
291 def check(self
, expected
, label
):
292 self
.assertEqual(self
.stuff
, expected
,
293 "%s\nstuff = %r\nexpected = %r"
294 % (label
, self
.stuff
, map(unicode, expected
)))
296 def CharacterDataHandler(self
, text
):
297 self
.stuff
.append(text
)
299 def StartElementHandler(self
, name
, attrs
):
300 self
.stuff
.append("<%s>" % name
)
301 bt
= attrs
.get("buffer-text")
303 self
.parser
.buffer_text
= 1
305 self
.parser
.buffer_text
= 0
307 def EndElementHandler(self
, name
):
308 self
.stuff
.append("</%s>" % name
)
310 def CommentHandler(self
, data
):
311 self
.stuff
.append("<!--%s-->" % data
)
313 def setHandlers(self
, handlers
=[]):
314 for name
in handlers
:
315 setattr(self
.parser
, name
, getattr(self
, name
))
317 def test_default_to_disabled(self
):
318 parser
= expat
.ParserCreate()
319 self
.assertFalse(parser
.buffer_text
)
321 def test_buffering_enabled(self
):
322 # Make sure buffering is turned on
323 self
.assertTrue(self
.parser
.buffer_text
)
324 self
.parser
.Parse("<a>1<b/>2<c/>3</a>", 1)
325 self
.assertEqual(self
.stuff
, ['123'],
326 "buffered text not properly collapsed")
329 # XXX This test exposes more detail of Expat's text chunking than we
330 # XXX like, but it tests what we need to concisely.
331 self
.setHandlers(["StartElementHandler"])
332 self
.parser
.Parse("<a>1<b buffer-text='no'/>2\n3<c buffer-text='yes'/>4\n5</a>", 1)
333 self
.assertEqual(self
.stuff
,
334 ["<a>", "1", "<b>", "2", "\n", "3", "<c>", "4\n5"],
335 "buffering control not reacting as expected")
338 self
.parser
.Parse("<a>1<b/><2><c/> \n 3</a>", 1)
339 self
.assertEqual(self
.stuff
, ["1<2> \n 3"],
340 "buffered text not properly collapsed")
343 self
.setHandlers(["StartElementHandler"])
344 self
.parser
.Parse("<a>1<b/>2<c/>3</a>", 1)
345 self
.assertEqual(self
.stuff
, ["<a>", "1", "<b>", "2", "<c>", "3"],
346 "buffered text not properly split")
349 self
.setHandlers(["StartElementHandler", "EndElementHandler"])
350 self
.parser
.CharacterDataHandler
= None
351 self
.parser
.Parse("<a>1<b/>2<c/>3</a>", 1)
352 self
.assertEqual(self
.stuff
,
353 ["<a>", "<b>", "</b>", "<c>", "</c>", "</a>"])
356 self
.setHandlers(["StartElementHandler", "EndElementHandler"])
357 self
.parser
.Parse("<a>1<b></b>2<c/>3</a>", 1)
358 self
.assertEqual(self
.stuff
,
359 ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3", "</a>"])
362 self
.setHandlers(["CommentHandler", "EndElementHandler",
363 "StartElementHandler"])
364 self
.parser
.Parse("<a>1<b/>2<c></c>345</a> ", 1)
365 self
.assertEqual(self
.stuff
,
366 ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "345", "</a>"],
367 "buffered text not properly split")
370 self
.setHandlers(["CommentHandler", "EndElementHandler",
371 "StartElementHandler"])
372 self
.parser
.Parse("<a>1<b/>2<c></c>3<!--abc-->4<!--def-->5</a> ", 1)
373 self
.assertEqual(self
.stuff
,
374 ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3",
375 "<!--abc-->", "4", "<!--def-->", "5", "</a>"],
376 "buffered text not properly split")
379 # Test handling of exception from callback:
380 class HandlerExceptionTest(unittest
.TestCase
):
381 def StartElementHandler(self
, name
, attrs
):
382 raise RuntimeError(name
)
385 parser
= expat
.ParserCreate()
386 parser
.StartElementHandler
= self
.StartElementHandler
388 parser
.Parse("<a><b><c/></b></a>", 1)
390 except RuntimeError, e
:
391 self
.assertEqual(e
.args
[0], 'a',
392 "Expected RuntimeError for element 'a', but" + \
393 " found %r" % e
.args
[0])
396 # Test Current* members:
397 class PositionTest(unittest
.TestCase
):
398 def StartElementHandler(self
, name
, attrs
):
401 def EndElementHandler(self
, name
):
404 def check_pos(self
, event
):
406 self
.parser
.CurrentByteIndex
,
407 self
.parser
.CurrentLineNumber
,
408 self
.parser
.CurrentColumnNumber
)
409 self
.assertTrue(self
.upto
< len(self
.expected_list
),
410 'too many parser events')
411 expected
= self
.expected_list
[self
.upto
]
412 self
.assertEqual(pos
, expected
,
413 'Expected position %s, got position %s' %(pos
, expected
))
417 self
.parser
= expat
.ParserCreate()
418 self
.parser
.StartElementHandler
= self
.StartElementHandler
419 self
.parser
.EndElementHandler
= self
.EndElementHandler
421 self
.expected_list
= [('s', 0, 1, 0), ('s', 5, 2, 1), ('s', 11, 3, 2),
422 ('e', 15, 3, 6), ('e', 17, 4, 1), ('e', 22, 5, 0)]
424 xml
= '<a>\n <b>\n <c/>\n </b>\n</a>'
425 self
.parser
.Parse(xml
, 1)
428 class sf1296433Test(unittest
.TestCase
):
429 def test_parse_only_xml_data(self
):
430 # http://python.org/sf/1296433
432 xml
= "<?xml version='1.0' encoding='iso8859'?><s>%s</s>" % ('a' * 1025)
433 # this one doesn't crash
434 #xml = "<?xml version='1.0'?><s>%s</s>" % ('a' * 10000)
436 class SpecificException(Exception):
440 raise SpecificException
442 parser
= expat
.ParserCreate()
443 parser
.CharacterDataHandler
= handler
445 self
.assertRaises(Exception, parser
.Parse
, xml
)
447 class ChardataBufferTest(unittest
.TestCase
):
449 test setting of chardata buffer size
452 def test_1025_bytes(self
):
453 self
.assertEqual(self
.small_buffer_test(1025), 2)
455 def test_1000_bytes(self
):
456 self
.assertEqual(self
.small_buffer_test(1000), 1)
458 def test_wrong_size(self
):
459 parser
= expat
.ParserCreate()
460 parser
.buffer_text
= 1
462 parser
.buffer_size
= size
464 self
.assertRaises(TypeError, f
, sys
.maxint
+1)
465 self
.assertRaises(ValueError, f
, -1)
466 self
.assertRaises(ValueError, f
, 0)
468 def test_unchanged_size(self
):
469 xml1
= ("<?xml version='1.0' encoding='iso8859'?><s>%s" % ('a' * 512))
470 xml2
= 'a'*512 + '</s>'
471 parser
= expat
.ParserCreate()
472 parser
.CharacterDataHandler
= self
.counting_handler
473 parser
.buffer_size
= 512
474 parser
.buffer_text
= 1
476 # Feed 512 bytes of character data: the handler should be called
480 self
.assertEqual(self
.n
, 1)
482 # Reassign to buffer_size, but assign the same size.
483 parser
.buffer_size
= parser
.buffer_size
484 self
.assertEqual(self
.n
, 1)
486 # Try parsing rest of the document
488 self
.assertEqual(self
.n
, 2)
491 def test_disabling_buffer(self
):
492 xml1
= "<?xml version='1.0' encoding='iso8859'?><a>%s" % ('a' * 512)
494 xml3
= "%s</a>" % ('c' * 1024)
495 parser
= expat
.ParserCreate()
496 parser
.CharacterDataHandler
= self
.counting_handler
497 parser
.buffer_text
= 1
498 parser
.buffer_size
= 1024
499 self
.assertEqual(parser
.buffer_size
, 1024)
501 # Parse one chunk of XML
503 parser
.Parse(xml1
, 0)
504 self
.assertEqual(parser
.buffer_size
, 1024)
505 self
.assertEqual(self
.n
, 1)
507 # Turn off buffering and parse the next chunk.
508 parser
.buffer_text
= 0
509 self
.assertFalse(parser
.buffer_text
)
510 self
.assertEqual(parser
.buffer_size
, 1024)
512 parser
.Parse(xml2
, 0)
513 self
.assertEqual(self
.n
, 11)
515 parser
.buffer_text
= 1
516 self
.assertTrue(parser
.buffer_text
)
517 self
.assertEqual(parser
.buffer_size
, 1024)
518 parser
.Parse(xml3
, 1)
519 self
.assertEqual(self
.n
, 12)
523 def make_document(self
, bytes
):
524 return ("<?xml version='1.0'?><tag>" + bytes
* 'a' + '</tag>')
526 def counting_handler(self
, text
):
529 def small_buffer_test(self
, buffer_len
):
530 xml
= "<?xml version='1.0' encoding='iso8859'?><s>%s</s>" % ('a' * buffer_len
)
531 parser
= expat
.ParserCreate()
532 parser
.CharacterDataHandler
= self
.counting_handler
533 parser
.buffer_size
= 1024
534 parser
.buffer_text
= 1
540 def test_change_size_1(self
):
541 xml1
= "<?xml version='1.0' encoding='iso8859'?><a><s>%s" % ('a' * 1024)
542 xml2
= "aaa</s><s>%s</s></a>" % ('a' * 1025)
543 parser
= expat
.ParserCreate()
544 parser
.CharacterDataHandler
= self
.counting_handler
545 parser
.buffer_text
= 1
546 parser
.buffer_size
= 1024
547 self
.assertEqual(parser
.buffer_size
, 1024)
550 parser
.Parse(xml1
, 0)
551 parser
.buffer_size
*= 2
552 self
.assertEqual(parser
.buffer_size
, 2048)
553 parser
.Parse(xml2
, 1)
554 self
.assertEqual(self
.n
, 2)
556 def test_change_size_2(self
):
557 xml1
= "<?xml version='1.0' encoding='iso8859'?><a>a<s>%s" % ('a' * 1023)
558 xml2
= "aaa</s><s>%s</s></a>" % ('a' * 1025)
559 parser
= expat
.ParserCreate()
560 parser
.CharacterDataHandler
= self
.counting_handler
561 parser
.buffer_text
= 1
562 parser
.buffer_size
= 2048
563 self
.assertEqual(parser
.buffer_size
, 2048)
566 parser
.Parse(xml1
, 0)
567 parser
.buffer_size
//= 2
568 self
.assertEqual(parser
.buffer_size
, 1024)
569 parser
.Parse(xml2
, 1)
570 self
.assertEqual(self
.n
, 4)
572 class MalformedInputText(unittest
.TestCase
):
575 parser
= expat
.ParserCreate()
577 parser
.Parse(xml
, True)
579 except expat
.ExpatError
as e
:
580 self
.assertEqual(str(e
), 'unclosed token: line 2, column 0')
583 xml
= "<?xml version\xc2\x85='1.0'?>\r\n"
584 parser
= expat
.ParserCreate()
586 parser
.Parse(xml
, True)
588 except expat
.ExpatError
as e
:
589 self
.assertEqual(str(e
), 'XML declaration not well-formed: line 1, column 14')
592 run_unittest(SetAttributeTest
,
594 NamespaceSeparatorTest
,
597 HandlerExceptionTest
,
603 if __name__
== "__main__":