[mirror_edk2.git] / AppPkg / Applications / Python / Python-2.7.2 / Lib / test / test_htmlparser.py

"""Tests for HTMLParser.py."""\r
\r
import HTMLParser\r
import pprint\r
import unittest\r
from test import test_support\r
\r
\r
class EventCollector(HTMLParser.HTMLParser):\r
\r
    def __init__(self):\r
        self.events = []\r
        self.append = self.events.append\r
        HTMLParser.HTMLParser.__init__(self)\r
\r
    def get_events(self):\r
        # Normalize the list of events so that buffer artefacts don't\r
        # separate runs of contiguous characters.\r
        L = []\r
        prevtype = None\r
        for event in self.events:\r
            type = event[0]\r
            if type == prevtype == "data":\r
                L[-1] = ("data", L[-1][1] + event[1])\r
            else:\r
                L.append(event)\r
            prevtype = type\r
        self.events = L\r
        return L\r
\r
    # structure markup\r
\r
    def handle_starttag(self, tag, attrs):\r
        self.append(("starttag", tag, attrs))\r
\r
    def handle_startendtag(self, tag, attrs):\r
        self.append(("startendtag", tag, attrs))\r
\r
    def handle_endtag(self, tag):\r
        self.append(("endtag", tag))\r
\r
    # all other markup\r
\r
    def handle_comment(self, data):\r
        self.append(("comment", data))\r
\r
    def handle_charref(self, data):\r
        self.append(("charref", data))\r
\r
    def handle_data(self, data):\r
        self.append(("data", data))\r
\r
    def handle_decl(self, data):\r
        self.append(("decl", data))\r
\r
    def handle_entityref(self, data):\r
        self.append(("entityref", data))\r
\r
    def handle_pi(self, data):\r
        self.append(("pi", data))\r
\r
    def unknown_decl(self, decl):\r
        self.append(("unknown decl", decl))\r
\r
\r
class EventCollectorExtra(EventCollector):\r
\r
    def handle_starttag(self, tag, attrs):\r
        EventCollector.handle_starttag(self, tag, attrs)\r
        self.append(("starttag_text", self.get_starttag_text()))\r
\r
\r
class TestCaseBase(unittest.TestCase):\r
\r
    def _run_check(self, source, expected_events, collector=EventCollector):\r
        parser = collector()\r
        for s in source:\r
            parser.feed(s)\r
        parser.close()\r
        events = parser.get_events()\r
        if events != expected_events:\r
            self.fail("received events did not match expected events\n"\r
                      "Expected:\n" + pprint.pformat(expected_events) +\r
                      "\nReceived:\n" + pprint.pformat(events))\r
\r
    def _run_check_extra(self, source, events):\r
        self._run_check(source, events, EventCollectorExtra)\r
\r
    def _parse_error(self, source):\r
        def parse(source=source):\r
            parser = HTMLParser.HTMLParser()\r
            parser.feed(source)\r
            parser.close()\r
        self.assertRaises(HTMLParser.HTMLParseError, parse)\r
\r
\r
class HTMLParserTestCase(TestCaseBase):\r
\r
    def test_processing_instruction_only(self):\r
        self._run_check("<?processing instruction>", [\r
            ("pi", "processing instruction"),\r
            ])\r
        self._run_check("<?processing instruction ?>", [\r
            ("pi", "processing instruction ?"),\r
            ])\r
\r
    def test_simple_html(self):\r
        self._run_check("""\r
<!DOCTYPE html PUBLIC 'foo'>\r
<HTML>&entity;&#32;\r
<!--comment1a\r
-></foo><bar>&lt;<?pi?></foo<bar\r
comment1b-->\r
<Img sRc='Bar' isMAP>sample\r
text\r
&#x201C;\r
<!--comment2a-- --comment2b--><!>\r
</Html>\r
""", [\r
    ("data", "\n"),\r
    ("decl", "DOCTYPE html PUBLIC 'foo'"),\r
    ("data", "\n"),\r
    ("starttag", "html", []),\r
    ("entityref", "entity"),\r
    ("charref", "32"),\r
    ("data", "\n"),\r
    ("comment", "comment1a\n-></foo><bar>&lt;<?pi?></foo<bar\ncomment1b"),\r
    ("data", "\n"),\r
    ("starttag", "img", [("src", "Bar"), ("ismap", None)]),\r
    ("data", "sample\ntext\n"),\r
    ("charref", "x201C"),\r
    ("data", "\n"),\r
    ("comment", "comment2a-- --comment2b"),\r
    ("data", "\n"),\r
    ("endtag", "html"),\r
    ("data", "\n"),\r
    ])\r
\r
    def test_unclosed_entityref(self):\r
        self._run_check("&entityref foo", [\r
            ("entityref", "entityref"),\r
            ("data", " foo"),\r
            ])\r
\r
    def test_doctype_decl(self):\r
        inside = """\\r
DOCTYPE html [\r
  <!ELEMENT html - O EMPTY>\r
  <!ATTLIST html\r
      version CDATA #IMPLIED\r
      profile CDATA 'DublinCore'>\r
  <!NOTATION datatype SYSTEM 'http://xml.python.org/notations/python-module'>\r
  <!ENTITY myEntity 'internal parsed entity'>\r
  <!ENTITY anEntity SYSTEM 'http://xml.python.org/entities/something.xml'>\r
  <!ENTITY % paramEntity 'name|name|name'>\r
  %paramEntity;\r
  <!-- comment -->\r
]"""\r
        self._run_check("<!%s>" % inside, [\r
            ("decl", inside),\r
            ])\r
\r
    def test_bad_nesting(self):\r
        # Strangely, this *is* supposed to test that overlapping\r
        # elements are allowed.  HTMLParser is more geared toward\r
        # lexing the input that parsing the structure.\r
        self._run_check("<a><b></a></b>", [\r
            ("starttag", "a", []),\r
            ("starttag", "b", []),\r
            ("endtag", "a"),\r
            ("endtag", "b"),\r
            ])\r
\r
    def test_bare_ampersands(self):\r
        self._run_check("this text & contains & ampersands &", [\r
            ("data", "this text & contains & ampersands &"),\r
            ])\r
\r
    def test_bare_pointy_brackets(self):\r
        self._run_check("this < text > contains < bare>pointy< brackets", [\r
            ("data", "this < text > contains < bare>pointy< brackets"),\r
            ])\r
\r
    def test_attr_syntax(self):\r
        output = [\r
          ("starttag", "a", [("b", "v"), ("c", "v"), ("d", "v"), ("e", None)])\r
          ]\r
        self._run_check("""<a b='v' c="v" d=v e>""", output)\r
        self._run_check("""<a  b = 'v' c = "v" d = v e>""", output)\r
        self._run_check("""<a\nb\n=\n'v'\nc\n=\n"v"\nd\n=\nv\ne>""", output)\r
        self._run_check("""<a\tb\t=\t'v'\tc\t=\t"v"\td\t=\tv\te>""", output)\r
\r
    def test_attr_values(self):\r
        self._run_check("""<a b='xxx\n\txxx' c="yyy\t\nyyy" d='\txyz\n'>""",\r
                        [("starttag", "a", [("b", "xxx\n\txxx"),\r
                                            ("c", "yyy\t\nyyy"),\r
                                            ("d", "\txyz\n")])\r
                         ])\r
        self._run_check("""<a b='' c="">""", [\r
            ("starttag", "a", [("b", ""), ("c", "")]),\r
            ])\r
        # Regression test for SF patch #669683.\r
        self._run_check("<e a=rgb(1,2,3)>", [\r
            ("starttag", "e", [("a", "rgb(1,2,3)")]),\r
            ])\r
        # Regression test for SF bug #921657.\r
        self._run_check("<a href=mailto:xyz@example.com>", [\r
            ("starttag", "a", [("href", "mailto:xyz@example.com")]),\r
            ])\r
\r
    def test_attr_nonascii(self):\r
        # see issue 7311\r
        self._run_check(u"<img src=/foo/bar.png alt=\u4e2d\u6587>", [\r
            ("starttag", "img", [("src", "/foo/bar.png"),\r
                                 ("alt", u"\u4e2d\u6587")]),\r
            ])\r
        self._run_check(u"<a title='\u30c6\u30b9\u30c8' "\r
                        u"href='\u30c6\u30b9\u30c8.html'>", [\r
            ("starttag", "a", [("title", u"\u30c6\u30b9\u30c8"),\r
                               ("href", u"\u30c6\u30b9\u30c8.html")]),\r
            ])\r
        self._run_check(u'<a title="\u30c6\u30b9\u30c8" '\r
                        u'href="\u30c6\u30b9\u30c8.html">', [\r
            ("starttag", "a", [("title", u"\u30c6\u30b9\u30c8"),\r
                               ("href", u"\u30c6\u30b9\u30c8.html")]),\r
            ])\r
\r
    def test_attr_entity_replacement(self):\r
        self._run_check("""<a b='&amp;&gt;&lt;&quot;&apos;'>""", [\r
            ("starttag", "a", [("b", "&><\"'")]),\r
            ])\r
\r
    def test_attr_funky_names(self):\r
        self._run_check("""<a a.b='v' c:d=v e-f=v>""", [\r
            ("starttag", "a", [("a.b", "v"), ("c:d", "v"), ("e-f", "v")]),\r
            ])\r
\r
    def test_illegal_declarations(self):\r
        self._parse_error('<!spacer type="block" height="25">')\r
\r
    def test_starttag_end_boundary(self):\r
        self._run_check("""<a b='<'>""", [("starttag", "a", [("b", "<")])])\r
        self._run_check("""<a b='>'>""", [("starttag", "a", [("b", ">")])])\r
\r
    def test_buffer_artefacts(self):\r
        output = [("starttag", "a", [("b", "<")])]\r
        self._run_check(["<a b='<'>"], output)\r
        self._run_check(["<a ", "b='<'>"], output)\r
        self._run_check(["<a b", "='<'>"], output)\r
        self._run_check(["<a b=", "'<'>"], output)\r
        self._run_check(["<a b='<", "'>"], output)\r
        self._run_check(["<a b='<'", ">"], output)\r
\r
        output = [("starttag", "a", [("b", ">")])]\r
        self._run_check(["<a b='>'>"], output)\r
        self._run_check(["<a ", "b='>'>"], output)\r
        self._run_check(["<a b", "='>'>"], output)\r
        self._run_check(["<a b=", "'>'>"], output)\r
        self._run_check(["<a b='>", "'>"], output)\r
        self._run_check(["<a b='>'", ">"], output)\r
\r
        output = [("comment", "abc")]\r
        self._run_check(["", "<!--abc-->"], output)\r
        self._run_check(["<", "!--abc-->"], output)\r
        self._run_check(["<!", "--abc-->"], output)\r
        self._run_check(["<!-", "-abc-->"], output)\r
        self._run_check(["<!--", "abc-->"], output)\r
        self._run_check(["<!--a", "bc-->"], output)\r
        self._run_check(["<!--ab", "c-->"], output)\r
        self._run_check(["<!--abc", "-->"], output)\r
        self._run_check(["<!--abc-", "->"], output)\r
        self._run_check(["<!--abc--", ">"], output)\r
        self._run_check(["<!--abc-->", ""], output)\r
\r
    def test_starttag_junk_chars(self):\r
        self._parse_error("</>")\r
        self._parse_error("</$>")\r
        self._parse_error("</")\r
        self._parse_error("</a")\r
        self._parse_error("<a<a>")\r
        self._parse_error("</a<a>")\r
        self._parse_error("<!")\r
        self._parse_error("<a $>")\r
        self._parse_error("<a")\r
        self._parse_error("<a foo='bar'")\r
        self._parse_error("<a foo='bar")\r
        self._parse_error("<a foo='>'")\r
        self._parse_error("<a foo='>")\r
        self._parse_error("<a foo=>")\r
\r
    def test_declaration_junk_chars(self):\r
        self._parse_error("<!DOCTYPE foo $ >")\r
\r
    def test_startendtag(self):\r
        self._run_check("<p/>", [\r
            ("startendtag", "p", []),\r
            ])\r
        self._run_check("<p></p>", [\r
            ("starttag", "p", []),\r
            ("endtag", "p"),\r
            ])\r
        self._run_check("<p><img src='foo' /></p>", [\r
            ("starttag", "p", []),\r
            ("startendtag", "img", [("src", "foo")]),\r
            ("endtag", "p"),\r
            ])\r
\r
    def test_get_starttag_text(self):\r
        s = """<foo:bar   \n   one="1"\ttwo=2   >"""\r
        self._run_check_extra(s, [\r
            ("starttag", "foo:bar", [("one", "1"), ("two", "2")]),\r
            ("starttag_text", s)])\r
\r
    def test_cdata_content(self):\r
        s = """<script> <!-- not a comment --> &not-an-entity-ref; </script>"""\r
        self._run_check(s, [\r
            ("starttag", "script", []),\r
            ("data", " <!-- not a comment --> &not-an-entity-ref; "),\r
            ("endtag", "script"),\r
            ])\r
        s = """<script> <not a='start tag'> </script>"""\r
        self._run_check(s, [\r
            ("starttag", "script", []),\r
            ("data", " <not a='start tag'> "),\r
            ("endtag", "script"),\r
            ])\r
\r
    def test_entityrefs_in_attributes(self):\r
        self._run_check("<html foo='&euro;&amp;&#97;&#x61;&unsupported;'>", [\r
                ("starttag", "html", [("foo", u"\u20AC&aa&unsupported;")])\r
                ])\r
\r
    def test_malformatted_charref(self):\r
        self._run_check("<p>&#bad;</p>", [\r
            ("starttag", "p", []),\r
            ("data", "&#bad;"),\r
            ("endtag", "p"),\r
        ])\r
\r
    def test_unescape_function(self):\r
        parser = HTMLParser.HTMLParser()\r
        self.assertEqual(parser.unescape('&#bad;'),'&#bad;')\r
        self.assertEqual(parser.unescape('&#0038;'),'&')\r
\r
\r
def test_main():\r
    test_support.run_unittest(HTMLParserTestCase)\r
\r
\r
if __name__ == "__main__":\r
    test_main()\r
Commit	Line	Data
4710c53d	1	"""Tests for HTMLParser.py."""\r
	2	\r
	3	import HTMLParser\r
	4	import pprint\r
	5	import unittest\r
	6	from test import test_support\r
	7	\r
	8	\r
	9	class EventCollector(HTMLParser.HTMLParser):\r
	10	\r
	11	def __init__(self):\r
	12	self.events = []\r
	13	self.append = self.events.append\r
	14	HTMLParser.HTMLParser.__init__(self)\r
	15	\r
	16	def get_events(self):\r
	17	# Normalize the list of events so that buffer artefacts don't\r
	18	# separate runs of contiguous characters.\r
	19	L = []\r
	20	prevtype = None\r
	21	for event in self.events:\r
	22	type = event[0]\r
	23	if type == prevtype == "data":\r
	24	L[-1] = ("data", L[-1][1] + event[1])\r
	25	else:\r
	26	L.append(event)\r
	27	prevtype = type\r
	28	self.events = L\r
	29	return L\r
	30	\r
	31	# structure markup\r
	32	\r
	33	def handle_starttag(self, tag, attrs):\r
	34	self.append(("starttag", tag, attrs))\r
	35	\r
	36	def handle_startendtag(self, tag, attrs):\r
	37	self.append(("startendtag", tag, attrs))\r
	38	\r
	39	def handle_endtag(self, tag):\r
	40	self.append(("endtag", tag))\r
	41	\r
	42	# all other markup\r
	43	\r
	44	def handle_comment(self, data):\r
	45	self.append(("comment", data))\r
	46	\r
	47	def handle_charref(self, data):\r
	48	self.append(("charref", data))\r
	49	\r
	50	def handle_data(self, data):\r
	51	self.append(("data", data))\r
	52	\r
	53	def handle_decl(self, data):\r
	54	self.append(("decl", data))\r
	55	\r
	56	def handle_entityref(self, data):\r
	57	self.append(("entityref", data))\r
	58	\r
	59	def handle_pi(self, data):\r
	60	self.append(("pi", data))\r
	61	\r
	62	def unknown_decl(self, decl):\r
	63	self.append(("unknown decl", decl))\r
	64	\r
65	\r
66	class EventCollectorExtra(EventCollector):\r
67	\r
68	def handle_starttag(self, tag, attrs):\r
69	EventCollector.handle_starttag(self, tag, attrs)\r
70	self.append(("starttag_text", self.get_starttag_text()))\r
71	\r
72	\r
73	class TestCaseBase(unittest.TestCase):\r
74	\r
75	def _run_check(self, source, expected_events, collector=EventCollector):\r
76	parser = collector()\r
77	for s in source:\r
78	parser.feed(s)\r
79	parser.close()\r
80	events = parser.get_events()\r
81	if events != expected_events:\r
82	self.fail("received events did not match expected events\n"\r
83	"Expected:\n" + pprint.pformat(expected_events) +\r
84	"\nReceived:\n" + pprint.pformat(events))\r
85	\r
86	def _run_check_extra(self, source, events):\r
87	self._run_check(source, events, EventCollectorExtra)\r
88	\r
89	def _parse_error(self, source):\r
90	def parse(source=source):\r
91	parser = HTMLParser.HTMLParser()\r
92	parser.feed(source)\r
93	parser.close()\r
94	self.assertRaises(HTMLParser.HTMLParseError, parse)\r
95	\r
96	\r
97	class HTMLParserTestCase(TestCaseBase):\r
98	\r
99	def test_processing_instruction_only(self):\r
100	self._run_check("<?processing instruction>", [\r
101	("pi", "processing instruction"),\r
102	])\r
103	self._run_check("<?processing instruction ?>", [\r
104	("pi", "processing instruction ?"),\r
105	])\r
106	\r
107	def test_simple_html(self):\r
108	self._run_check("""\r
109	<!DOCTYPE html PUBLIC 'foo'>\r
110	<HTML>&entity; \r
111	<!--comment1a\r
112	-></foo><bar><<?pi?></foo<bar\r
113	comment1b-->\r
114	<Img sRc='Bar' isMAP>sample\r
115	text\r
116	“\r
117	<!--comment2a-- --comment2b--><!>\r
118	</Html>\r
119	""", [\r
120	("data", "\n"),\r
121	("decl", "DOCTYPE html PUBLIC 'foo'"),\r
122	("data", "\n"),\r
123	("starttag", "html", []),\r
124	("entityref", "entity"),\r
125	("charref", "32"),\r
126	("data", "\n"),\r
127	("comment", "comment1a\n-></foo><bar><<?pi?></foo<bar\ncomment1b"),\r
128	("data", "\n"),\r
129	("starttag", "img", [("src", "Bar"), ("ismap", None)]),\r
130	("data", "sample\ntext\n"),\r
131	("charref", "x201C"),\r
132	("data", "\n"),\r
133	("comment", "comment2a-- --comment2b"),\r
134	("data", "\n"),\r
135	("endtag", "html"),\r
136	("data", "\n"),\r
137	])\r
138	\r
139	def test_unclosed_entityref(self):\r
140	self._run_check("&entityref foo", [\r
141	("entityref", "entityref"),\r
142	("data", " foo"),\r
143	])\r
144	\r
145	def test_doctype_decl(self):\r
146	inside = """\\r
147	DOCTYPE html [\r
148	<!ELEMENT html - O EMPTY>\r
149	<!ATTLIST html\r
150	version CDATA #IMPLIED\r
151	profile CDATA 'DublinCore'>\r
152	<!NOTATION datatype SYSTEM 'http://xml.python.org/notations/python-module'>\r
153	<!ENTITY myEntity 'internal parsed entity'>\r
154	<!ENTITY anEntity SYSTEM 'http://xml.python.org/entities/something.xml'>\r
155	<!ENTITY % paramEntity 'name\|name\|name'>\r
156	%paramEntity;\r
157	<!-- comment -->\r
158	]"""\r
159	self._run_check("<!%s>" % inside, [\r
160	("decl", inside),\r
161	])\r
162	\r
163	def test_bad_nesting(self):\r
164	# Strangely, this is supposed to test that overlapping\r
165	# elements are allowed. HTMLParser is more geared toward\r
166	# lexing the input that parsing the structure.\r
167	self._run_check("<a><b></a></b>", [\r
168	("starttag", "a", []),\r
169	("starttag", "b", []),\r
170	("endtag", "a"),\r
171	("endtag", "b"),\r
172	])\r
173	\r
174	def test_bare_ampersands(self):\r
175	self._run_check("this text & contains & ampersands &", [\r
176	("data", "this text & contains & ampersands &"),\r
177	])\r
178	\r
179	def test_bare_pointy_brackets(self):\r
180	self._run_check("this < text > contains < bare>pointy< brackets", [\r
181	("data", "this < text > contains < bare>pointy< brackets"),\r
182	])\r
183	\r
184	def test_attr_syntax(self):\r
185	output = [\r
186	("starttag", "a", [("b", "v"), ("c", "v"), ("d", "v"), ("e", None)])\r
187	]\r
188	self._run_check("""<a b='v' c="v" d=v e>""", output)\r
189	self._run_check("""<a b = 'v' c = "v" d = v e>""", output)\r
190	self._run_check("""<a\nb\n=\n'v'\nc\n=\n"v"\nd\n=\nv\ne>""", output)\r
191	self._run_check("""<a\tb\t=\t'v'\tc\t=\t"v"\td\t=\tv\te>""", output)\r
192	\r
193	def test_attr_values(self):\r
194	self._run_check("""<a b='xxx\n\txxx' c="yyy\t\nyyy" d='\txyz\n'>""",\r
195	[("starttag", "a", [("b", "xxx\n\txxx"),\r
196	("c", "yyy\t\nyyy"),\r
197	("d", "\txyz\n")])\r
198	])\r
199	self._run_check("""<a b='' c="">""", [\r
200	("starttag", "a", [("b", ""), ("c", "")]),\r
201	])\r
202	# Regression test for SF patch #669683.\r
203	self._run_check("<e a=rgb(1,2,3)>", [\r
204	("starttag", "e", [("a", "rgb(1,2,3)")]),\r
205	])\r
206	# Regression test for SF bug #921657.\r
207	self._run_check("<a href=mailto:xyz@example.com>", [\r
208	("starttag", "a", [("href", "mailto:xyz@example.com")]),\r
209	])\r
210	\r
211	def test_attr_nonascii(self):\r
212	# see issue 7311\r
213	self._run_check(u"<img src=/foo/bar.png alt=\u4e2d\u6587>", [\r
214	("starttag", "img", [("src", "/foo/bar.png"),\r
215	("alt", u"\u4e2d\u6587")]),\r
216	])\r
217	self._run_check(u"<a title='\u30c6\u30b9\u30c8' "\r
218	u"href='\u30c6\u30b9\u30c8.html'>", [\r
219	("starttag", "a", [("title", u"\u30c6\u30b9\u30c8"),\r
220	("href", u"\u30c6\u30b9\u30c8.html")]),\r
221	])\r
222	self._run_check(u'<a title="\u30c6\u30b9\u30c8" '\r
223	u'href="\u30c6\u30b9\u30c8.html">', [\r
224	("starttag", "a", [("title", u"\u30c6\u30b9\u30c8"),\r
225	("href", u"\u30c6\u30b9\u30c8.html")]),\r
226	])\r
227	\r
228	def test_attr_entity_replacement(self):\r
229	self._run_check("""<a b='&><"''>""", [\r
230	("starttag", "a", [("b", "&><\"'")]),\r
231	])\r
232	\r
233	def test_attr_funky_names(self):\r
234	self._run_check("""<a a.b='v' c:d=v e-f=v>""", [\r
235	("starttag", "a", [("a.b", "v"), ("c:d", "v"), ("e-f", "v")]),\r
236	])\r
237	\r
238	def test_illegal_declarations(self):\r
239	self._parse_error('<!spacer type="block" height="25">')\r
240	\r
241	def test_starttag_end_boundary(self):\r
242	self._run_check("""<a b='<'>""", [("starttag", "a", [("b", "<")])])\r
243	self._run_check("""<a b='>'>""", [("starttag", "a", [("b", ">")])])\r
244	\r
245	def test_buffer_artefacts(self):\r
246	output = [("starttag", "a", [("b", "<")])]\r
247	self._run_check(["<a b='<'>"], output)\r
248	self._run_check(["<a ", "b='<'>"], output)\r
249	self._run_check(["<a b", "='<'>"], output)\r
250	self._run_check(["<a b=", "'<'>"], output)\r
251	self._run_check(["<a b='<", "'>"], output)\r
252	self._run_check(["<a b='<'", ">"], output)\r
253	\r
254	output = [("starttag", "a", [("b", ">")])]\r
255	self._run_check(["<a b='>'>"], output)\r
256	self._run_check(["<a ", "b='>'>"], output)\r
257	self._run_check(["<a b", "='>'>"], output)\r
258	self._run_check(["<a b=", "'>'>"], output)\r
259	self._run_check(["<a b='>", "'>"], output)\r
260	self._run_check(["<a b='>'", ">"], output)\r
261	\r
262	output = [("comment", "abc")]\r
263	self._run_check(["", "<!--abc-->"], output)\r
264	self._run_check(["<", "!--abc-->"], output)\r
265	self._run_check(["<!", "--abc-->"], output)\r
266	self._run_check(["<!-", "-abc-->"], output)\r
267	self._run_check(["<!--", "abc-->"], output)\r
268	self._run_check(["<!--a", "bc-->"], output)\r
269	self._run_check(["<!--ab", "c-->"], output)\r
270	self._run_check(["<!--abc", "-->"], output)\r
271	self._run_check(["<!--abc-", "->"], output)\r
272	self._run_check(["<!--abc--", ">"], output)\r
273	self._run_check(["<!--abc-->", ""], output)\r
274	\r
275	def test_starttag_junk_chars(self):\r
276	self._parse_error("</>")\r
277	self._parse_error("</$>")\r
278	self._parse_error("</")\r
279	self._parse_error("</a")\r
280	self._parse_error("<a<a>")\r
281	self._parse_error("</a<a>")\r
282	self._parse_error("<!")\r
283	self._parse_error("<a $>")\r
284	self._parse_error("<a")\r
285	self._parse_error("<a foo='bar'")\r
286	self._parse_error("<a foo='bar")\r
287	self._parse_error("<a foo='>'")\r
288	self._parse_error("<a foo='>")\r
289	self._parse_error("<a foo=>")\r
290	\r
291	def test_declaration_junk_chars(self):\r
292	self._parse_error("<!DOCTYPE foo $ >")\r
293	\r
294	def test_startendtag(self):\r
295	self._run_check("<p/>", [\r
296	("startendtag", "p", []),\r
297	])\r
298	self._run_check("<p></p>", [\r
299	("starttag", "p", []),\r
300	("endtag", "p"),\r
301	])\r
302	self._run_check("<p><img src='foo' /></p>", [\r
303	("starttag", "p", []),\r
304	("startendtag", "img", [("src", "foo")]),\r
305	("endtag", "p"),\r
306	])\r
307	\r
308	def test_get_starttag_text(self):\r
309	s = """<foo:bar \n one="1"\ttwo=2 >"""\r
310	self._run_check_extra(s, [\r
311	("starttag", "foo:bar", [("one", "1"), ("two", "2")]),\r
312	("starttag_text", s)])\r
313	\r
314	def test_cdata_content(self):\r
315	s = """<script> <!-- not a comment --> &not-an-entity-ref; </script>"""\r
316	self._run_check(s, [\r
317	("starttag", "script", []),\r
318	("data", " <!-- not a comment --> &not-an-entity-ref; "),\r
319	("endtag", "script"),\r
320	])\r
321	s = """<script> <not a='start tag'> </script>"""\r
322	self._run_check(s, [\r
323	("starttag", "script", []),\r
324	("data", " <not a='start tag'> "),\r
325	("endtag", "script"),\r
326	])\r
327	\r
328	def test_entityrefs_in_attributes(self):\r
329	self._run_check("<html foo='€&aa&unsupported;'>", [\r
330	("starttag", "html", [("foo", u"\u20AC&aa&unsupported;")])\r
331	])\r
332	\r
333	def test_malformatted_charref(self):\r
334	self._run_check("<p>&#bad;</p>", [\r
335	("starttag", "p", []),\r
336	("data", "&#bad;"),\r
337	("endtag", "p"),\r
338	])\r
339	\r
340	def test_unescape_function(self):\r
341	parser = HTMLParser.HTMLParser()\r
342	self.assertEqual(parser.unescape('&#bad;'),'&#bad;')\r
343	self.assertEqual(parser.unescape('&'),'&')\r
344	\r
345	\r
346	def test_main():\r
347	test_support.run_unittest(HTMLParserTestCase)\r
348	\r
349	\r
350	if __name__ == "__main__":\r
351	test_main()\r