]> git.proxmox.com Git - mirror_edk2.git/blame - AppPkg/Applications/Python/Python-2.7.2/Lib/test/test_sgmllib.py
EmbeddedPkg: Extend NvVarStoreFormattedLib LIBRARY_CLASS
[mirror_edk2.git] / AppPkg / Applications / Python / Python-2.7.2 / Lib / test / test_sgmllib.py
CommitLineData
4710c53d 1import pprint\r
2import re\r
3import unittest\r
4from test import test_support\r
5sgmllib = test_support.import_module('sgmllib', deprecated=True)\r
6\r
7\r
8class EventCollector(sgmllib.SGMLParser):\r
9\r
10 def __init__(self):\r
11 self.events = []\r
12 self.append = self.events.append\r
13 sgmllib.SGMLParser.__init__(self)\r
14\r
15 def get_events(self):\r
16 # Normalize the list of events so that buffer artefacts don't\r
17 # separate runs of contiguous characters.\r
18 L = []\r
19 prevtype = None\r
20 for event in self.events:\r
21 type = event[0]\r
22 if type == prevtype == "data":\r
23 L[-1] = ("data", L[-1][1] + event[1])\r
24 else:\r
25 L.append(event)\r
26 prevtype = type\r
27 self.events = L\r
28 return L\r
29\r
30 # structure markup\r
31\r
32 def unknown_starttag(self, tag, attrs):\r
33 self.append(("starttag", tag, attrs))\r
34\r
35 def unknown_endtag(self, tag):\r
36 self.append(("endtag", tag))\r
37\r
38 # all other markup\r
39\r
40 def handle_comment(self, data):\r
41 self.append(("comment", data))\r
42\r
43 def handle_charref(self, data):\r
44 self.append(("charref", data))\r
45\r
46 def handle_data(self, data):\r
47 self.append(("data", data))\r
48\r
49 def handle_decl(self, decl):\r
50 self.append(("decl", decl))\r
51\r
52 def handle_entityref(self, data):\r
53 self.append(("entityref", data))\r
54\r
55 def handle_pi(self, data):\r
56 self.append(("pi", data))\r
57\r
58 def unknown_decl(self, decl):\r
59 self.append(("unknown decl", decl))\r
60\r
61\r
62class CDATAEventCollector(EventCollector):\r
63 def start_cdata(self, attrs):\r
64 self.append(("starttag", "cdata", attrs))\r
65 self.setliteral()\r
66\r
67\r
68class HTMLEntityCollector(EventCollector):\r
69\r
70 entity_or_charref = re.compile('(?:&([a-zA-Z][-.a-zA-Z0-9]*)'\r
71 '|&#(x[0-9a-zA-Z]+|[0-9]+))(;?)')\r
72\r
73 def convert_charref(self, name):\r
74 self.append(("charref", "convert", name))\r
75 if name[0] != "x":\r
76 return EventCollector.convert_charref(self, name)\r
77\r
78 def convert_codepoint(self, codepoint):\r
79 self.append(("codepoint", "convert", codepoint))\r
80 EventCollector.convert_codepoint(self, codepoint)\r
81\r
82 def convert_entityref(self, name):\r
83 self.append(("entityref", "convert", name))\r
84 return EventCollector.convert_entityref(self, name)\r
85\r
86 # These to record that they were called, then pass the call along\r
87 # to the default implementation so that it's actions can be\r
88 # recorded.\r
89\r
90 def handle_charref(self, data):\r
91 self.append(("charref", data))\r
92 sgmllib.SGMLParser.handle_charref(self, data)\r
93\r
94 def handle_entityref(self, data):\r
95 self.append(("entityref", data))\r
96 sgmllib.SGMLParser.handle_entityref(self, data)\r
97\r
98\r
99class SGMLParserTestCase(unittest.TestCase):\r
100\r
101 collector = EventCollector\r
102\r
103 def get_events(self, source):\r
104 parser = self.collector()\r
105 try:\r
106 for s in source:\r
107 parser.feed(s)\r
108 parser.close()\r
109 except:\r
110 #self.events = parser.events\r
111 raise\r
112 return parser.get_events()\r
113\r
114 def check_events(self, source, expected_events):\r
115 try:\r
116 events = self.get_events(source)\r
117 except:\r
118 #import sys\r
119 #print >>sys.stderr, pprint.pformat(self.events)\r
120 raise\r
121 if events != expected_events:\r
122 self.fail("received events did not match expected events\n"\r
123 "Expected:\n" + pprint.pformat(expected_events) +\r
124 "\nReceived:\n" + pprint.pformat(events))\r
125\r
126 def check_parse_error(self, source):\r
127 parser = EventCollector()\r
128 try:\r
129 parser.feed(source)\r
130 parser.close()\r
131 except sgmllib.SGMLParseError:\r
132 pass\r
133 else:\r
134 self.fail("expected SGMLParseError for %r\nReceived:\n%s"\r
135 % (source, pprint.pformat(parser.get_events())))\r
136\r
137 def test_doctype_decl_internal(self):\r
138 inside = """\\r
139DOCTYPE html PUBLIC '-//W3C//DTD HTML 4.01//EN'\r
140 SYSTEM 'http://www.w3.org/TR/html401/strict.dtd' [\r
141 <!ELEMENT html - O EMPTY>\r
142 <!ATTLIST html\r
143 version CDATA #IMPLIED\r
144 profile CDATA 'DublinCore'>\r
145 <!NOTATION datatype SYSTEM 'http://xml.python.org/notations/python-module'>\r
146 <!ENTITY myEntity 'internal parsed entity'>\r
147 <!ENTITY anEntity SYSTEM 'http://xml.python.org/entities/something.xml'>\r
148 <!ENTITY % paramEntity 'name|name|name'>\r
149 %paramEntity;\r
150 <!-- comment -->\r
151]"""\r
152 self.check_events(["<!%s>" % inside], [\r
153 ("decl", inside),\r
154 ])\r
155\r
156 def test_doctype_decl_external(self):\r
157 inside = "DOCTYPE html PUBLIC '-//W3C//DTD HTML 4.01//EN'"\r
158 self.check_events("<!%s>" % inside, [\r
159 ("decl", inside),\r
160 ])\r
161\r
162 def test_underscore_in_attrname(self):\r
163 # SF bug #436621\r
164 """Make sure attribute names with underscores are accepted"""\r
165 self.check_events("<a has_under _under>", [\r
166 ("starttag", "a", [("has_under", "has_under"),\r
167 ("_under", "_under")]),\r
168 ])\r
169\r
170 def test_underscore_in_tagname(self):\r
171 # SF bug #436621\r
172 """Make sure tag names with underscores are accepted"""\r
173 self.check_events("<has_under></has_under>", [\r
174 ("starttag", "has_under", []),\r
175 ("endtag", "has_under"),\r
176 ])\r
177\r
178 def test_quotes_in_unquoted_attrs(self):\r
179 # SF bug #436621\r
180 """Be sure quotes in unquoted attributes are made part of the value"""\r
181 self.check_events("<a href=foo'bar\"baz>", [\r
182 ("starttag", "a", [("href", "foo'bar\"baz")]),\r
183 ])\r
184\r
185 def test_xhtml_empty_tag(self):\r
186 """Handling of XHTML-style empty start tags"""\r
187 self.check_events("<br />text<i></i>", [\r
188 ("starttag", "br", []),\r
189 ("data", "text"),\r
190 ("starttag", "i", []),\r
191 ("endtag", "i"),\r
192 ])\r
193\r
194 def test_processing_instruction_only(self):\r
195 self.check_events("<?processing instruction>", [\r
196 ("pi", "processing instruction"),\r
197 ])\r
198\r
199 def test_bad_nesting(self):\r
200 self.check_events("<a><b></a></b>", [\r
201 ("starttag", "a", []),\r
202 ("starttag", "b", []),\r
203 ("endtag", "a"),\r
204 ("endtag", "b"),\r
205 ])\r
206\r
207 def test_bare_ampersands(self):\r
208 self.check_events("this text & contains & ampersands &", [\r
209 ("data", "this text & contains & ampersands &"),\r
210 ])\r
211\r
212 def test_bare_pointy_brackets(self):\r
213 self.check_events("this < text > contains < bare>pointy< brackets", [\r
214 ("data", "this < text > contains < bare>pointy< brackets"),\r
215 ])\r
216\r
217 def test_attr_syntax(self):\r
218 output = [\r
219 ("starttag", "a", [("b", "v"), ("c", "v"), ("d", "v"), ("e", "e")])\r
220 ]\r
221 self.check_events("""<a b='v' c="v" d=v e>""", output)\r
222 self.check_events("""<a b = 'v' c = "v" d = v e>""", output)\r
223 self.check_events("""<a\nb\n=\n'v'\nc\n=\n"v"\nd\n=\nv\ne>""", output)\r
224 self.check_events("""<a\tb\t=\t'v'\tc\t=\t"v"\td\t=\tv\te>""", output)\r
225\r
226 def test_attr_values(self):\r
227 self.check_events("""<a b='xxx\n\txxx' c="yyy\t\nyyy" d='\txyz\n'>""",\r
228 [("starttag", "a", [("b", "xxx\n\txxx"),\r
229 ("c", "yyy\t\nyyy"),\r
230 ("d", "\txyz\n")])\r
231 ])\r
232 self.check_events("""<a b='' c="">""", [\r
233 ("starttag", "a", [("b", ""), ("c", "")]),\r
234 ])\r
235 # URL construction stuff from RFC 1808:\r
236 safe = "$-_.+"\r
237 extra = "!*'(),"\r
238 reserved = ";/?:@&="\r
239 url = "http://example.com:8080/path/to/file?%s%s%s" % (\r
240 safe, extra, reserved)\r
241 self.check_events("""<e a=%s>""" % url, [\r
242 ("starttag", "e", [("a", url)]),\r
243 ])\r
244 # Regression test for SF patch #669683.\r
245 self.check_events("<e a=rgb(1,2,3)>", [\r
246 ("starttag", "e", [("a", "rgb(1,2,3)")]),\r
247 ])\r
248\r
249 def test_attr_values_entities(self):\r
250 """Substitution of entities and charrefs in attribute values"""\r
251 # SF bug #1452246\r
252 self.check_events("""<a b=&lt; c=&lt;&gt; d=&lt-&gt; e='&lt; '\r
253 f="&xxx;" g='&#32;&#33;' h='&#500;'\r
254 i='x?a=b&c=d;'\r
255 j='&amp;#42;' k='&#38;#42;'>""",\r
256 [("starttag", "a", [("b", "<"),\r
257 ("c", "<>"),\r
258 ("d", "&lt->"),\r
259 ("e", "< "),\r
260 ("f", "&xxx;"),\r
261 ("g", " !"),\r
262 ("h", "&#500;"),\r
263 ("i", "x?a=b&c=d;"),\r
264 ("j", "&#42;"),\r
265 ("k", "&#42;"),\r
266 ])])\r
267\r
268 def test_convert_overrides(self):\r
269 # This checks that the character and entity reference\r
270 # conversion helpers are called at the documented times. No\r
271 # attempt is made to really change what the parser accepts.\r
272 #\r
273 self.collector = HTMLEntityCollector\r
274 self.check_events(('<a title="&ldquo;test&#x201d;">foo</a>'\r
275 '&foobar;&#42;'), [\r
276 ('entityref', 'convert', 'ldquo'),\r
277 ('charref', 'convert', 'x201d'),\r
278 ('starttag', 'a', [('title', '&ldquo;test&#x201d;')]),\r
279 ('data', 'foo'),\r
280 ('endtag', 'a'),\r
281 ('entityref', 'foobar'),\r
282 ('entityref', 'convert', 'foobar'),\r
283 ('charref', '42'),\r
284 ('charref', 'convert', '42'),\r
285 ('codepoint', 'convert', 42),\r
286 ])\r
287\r
288 def test_attr_funky_names(self):\r
289 self.check_events("""<a a.b='v' c:d=v e-f=v>""", [\r
290 ("starttag", "a", [("a.b", "v"), ("c:d", "v"), ("e-f", "v")]),\r
291 ])\r
292\r
293 def test_attr_value_ip6_url(self):\r
294 # http://www.python.org/sf/853506\r
295 self.check_events(("<a href='http://[1080::8:800:200C:417A]/'>"\r
296 "<a href=http://[1080::8:800:200C:417A]/>"), [\r
297 ("starttag", "a", [("href", "http://[1080::8:800:200C:417A]/")]),\r
298 ("starttag", "a", [("href", "http://[1080::8:800:200C:417A]/")]),\r
299 ])\r
300\r
301 def test_weird_starttags(self):\r
302 self.check_events("<a<a>", [\r
303 ("starttag", "a", []),\r
304 ("starttag", "a", []),\r
305 ])\r
306 self.check_events("</a<a>", [\r
307 ("endtag", "a"),\r
308 ("starttag", "a", []),\r
309 ])\r
310\r
311 def test_declaration_junk_chars(self):\r
312 self.check_parse_error("<!DOCTYPE foo $ >")\r
313\r
314 def test_get_starttag_text(self):\r
315 s = """<foobar \n one="1"\ttwo=2 >"""\r
316 self.check_events(s, [\r
317 ("starttag", "foobar", [("one", "1"), ("two", "2")]),\r
318 ])\r
319\r
320 def test_cdata_content(self):\r
321 s = ("<cdata> <!-- not a comment --> &not-an-entity-ref; </cdata>"\r
322 "<notcdata> <!-- comment --> </notcdata>")\r
323 self.collector = CDATAEventCollector\r
324 self.check_events(s, [\r
325 ("starttag", "cdata", []),\r
326 ("data", " <!-- not a comment --> &not-an-entity-ref; "),\r
327 ("endtag", "cdata"),\r
328 ("starttag", "notcdata", []),\r
329 ("data", " "),\r
330 ("comment", " comment "),\r
331 ("data", " "),\r
332 ("endtag", "notcdata"),\r
333 ])\r
334 s = """<cdata> <not a='start tag'> </cdata>"""\r
335 self.check_events(s, [\r
336 ("starttag", "cdata", []),\r
337 ("data", " <not a='start tag'> "),\r
338 ("endtag", "cdata"),\r
339 ])\r
340\r
341 def test_illegal_declarations(self):\r
342 s = 'abc<!spacer type="block" height="25">def'\r
343 self.check_events(s, [\r
344 ("data", "abc"),\r
345 ("unknown decl", 'spacer type="block" height="25"'),\r
346 ("data", "def"),\r
347 ])\r
348\r
349 def test_enumerated_attr_type(self):\r
350 s = "<!DOCTYPE doc [<!ATTLIST doc attr (a | b) >]>"\r
351 self.check_events(s, [\r
352 ('decl', 'DOCTYPE doc [<!ATTLIST doc attr (a | b) >]'),\r
353 ])\r
354\r
355 def test_read_chunks(self):\r
356 # SF bug #1541697, this caused sgml parser to hang\r
357 # Just verify this code doesn't cause a hang.\r
358 CHUNK = 1024 # increasing this to 8212 makes the problem go away\r
359\r
360 f = open(test_support.findfile('sgml_input.html'))\r
361 fp = sgmllib.SGMLParser()\r
362 while 1:\r
363 data = f.read(CHUNK)\r
364 fp.feed(data)\r
365 if len(data) != CHUNK:\r
366 break\r
367\r
368 def test_only_decode_ascii(self):\r
369 # SF bug #1651995, make sure non-ascii character references are not decoded\r
370 s = '<signs exclamation="&#33" copyright="&#169" quoteleft="&#8216;">'\r
371 self.check_events(s, [\r
372 ('starttag', 'signs',\r
373 [('exclamation', '!'), ('copyright', '&#169'),\r
374 ('quoteleft', '&#8216;')]),\r
375 ])\r
376\r
377 # XXX These tests have been disabled by prefixing their names with\r
378 # an underscore. The first two exercise outstanding bugs in the\r
379 # sgmllib module, and the third exhibits questionable behavior\r
380 # that needs to be carefully considered before changing it.\r
381\r
382 def _test_starttag_end_boundary(self):\r
383 self.check_events("<a b='<'>", [("starttag", "a", [("b", "<")])])\r
384 self.check_events("<a b='>'>", [("starttag", "a", [("b", ">")])])\r
385\r
386 def _test_buffer_artefacts(self):\r
387 output = [("starttag", "a", [("b", "<")])]\r
388 self.check_events(["<a b='<'>"], output)\r
389 self.check_events(["<a ", "b='<'>"], output)\r
390 self.check_events(["<a b", "='<'>"], output)\r
391 self.check_events(["<a b=", "'<'>"], output)\r
392 self.check_events(["<a b='<", "'>"], output)\r
393 self.check_events(["<a b='<'", ">"], output)\r
394\r
395 output = [("starttag", "a", [("b", ">")])]\r
396 self.check_events(["<a b='>'>"], output)\r
397 self.check_events(["<a ", "b='>'>"], output)\r
398 self.check_events(["<a b", "='>'>"], output)\r
399 self.check_events(["<a b=", "'>'>"], output)\r
400 self.check_events(["<a b='>", "'>"], output)\r
401 self.check_events(["<a b='>'", ">"], output)\r
402\r
403 output = [("comment", "abc")]\r
404 self.check_events(["", "<!--abc-->"], output)\r
405 self.check_events(["<", "!--abc-->"], output)\r
406 self.check_events(["<!", "--abc-->"], output)\r
407 self.check_events(["<!-", "-abc-->"], output)\r
408 self.check_events(["<!--", "abc-->"], output)\r
409 self.check_events(["<!--a", "bc-->"], output)\r
410 self.check_events(["<!--ab", "c-->"], output)\r
411 self.check_events(["<!--abc", "-->"], output)\r
412 self.check_events(["<!--abc-", "->"], output)\r
413 self.check_events(["<!--abc--", ">"], output)\r
414 self.check_events(["<!--abc-->", ""], output)\r
415\r
416 def _test_starttag_junk_chars(self):\r
417 self.check_parse_error("<")\r
418 self.check_parse_error("<>")\r
419 self.check_parse_error("</$>")\r
420 self.check_parse_error("</")\r
421 self.check_parse_error("</a")\r
422 self.check_parse_error("<$")\r
423 self.check_parse_error("<$>")\r
424 self.check_parse_error("<!")\r
425 self.check_parse_error("<a $>")\r
426 self.check_parse_error("<a")\r
427 self.check_parse_error("<a foo='bar'")\r
428 self.check_parse_error("<a foo='bar")\r
429 self.check_parse_error("<a foo='>'")\r
430 self.check_parse_error("<a foo='>")\r
431 self.check_parse_error("<a foo=>")\r
432\r
433\r
434def test_main():\r
435 test_support.run_unittest(SGMLParserTestCase)\r
436\r
437\r
438if __name__ == "__main__":\r
439 test_main()\r