]> git.proxmox.com Git - mirror_edk2.git/blame - AppPkg/Applications/Python/Python-2.7.2/Lib/test/test_multibytecodec.py
EmbeddedPkg: Extend NvVarStoreFormattedLib LIBRARY_CLASS
[mirror_edk2.git] / AppPkg / Applications / Python / Python-2.7.2 / Lib / test / test_multibytecodec.py
CommitLineData
4710c53d 1#!/usr/bin/env python\r
2#\r
3# test_multibytecodec.py\r
4# Unit test for multibytecodec itself\r
5#\r
6\r
7from test import test_support\r
8from test.test_support import TESTFN\r
9import unittest, StringIO, codecs, sys, os\r
10import _multibytecodec\r
11\r
12ALL_CJKENCODINGS = [\r
13# _codecs_cn\r
14 'gb2312', 'gbk', 'gb18030', 'hz',\r
15# _codecs_hk\r
16 'big5hkscs',\r
17# _codecs_jp\r
18 'cp932', 'shift_jis', 'euc_jp', 'euc_jisx0213', 'shift_jisx0213',\r
19 'euc_jis_2004', 'shift_jis_2004',\r
20# _codecs_kr\r
21 'cp949', 'euc_kr', 'johab',\r
22# _codecs_tw\r
23 'big5', 'cp950',\r
24# _codecs_iso2022\r
25 'iso2022_jp', 'iso2022_jp_1', 'iso2022_jp_2', 'iso2022_jp_2004',\r
26 'iso2022_jp_3', 'iso2022_jp_ext', 'iso2022_kr',\r
27]\r
28\r
29class Test_MultibyteCodec(unittest.TestCase):\r
30\r
31 def test_nullcoding(self):\r
32 for enc in ALL_CJKENCODINGS:\r
33 self.assertEqual(''.decode(enc), u'')\r
34 self.assertEqual(unicode('', enc), u'')\r
35 self.assertEqual(u''.encode(enc), '')\r
36\r
37 def test_str_decode(self):\r
38 for enc in ALL_CJKENCODINGS:\r
39 self.assertEqual('abcd'.encode(enc), 'abcd')\r
40\r
41 def test_errorcallback_longindex(self):\r
42 dec = codecs.getdecoder('euc-kr')\r
43 myreplace = lambda exc: (u'', sys.maxint+1)\r
44 codecs.register_error('test.cjktest', myreplace)\r
45 self.assertRaises(IndexError, dec,\r
46 'apple\x92ham\x93spam', 'test.cjktest')\r
47\r
48 def test_codingspec(self):\r
49 for enc in ALL_CJKENCODINGS:\r
50 code = '# coding: {}\n'.format(enc)\r
51 exec code\r
52\r
53 def test_init_segfault(self):\r
54 # bug #3305: this used to segfault\r
55 self.assertRaises(AttributeError,\r
56 _multibytecodec.MultibyteStreamReader, None)\r
57 self.assertRaises(AttributeError,\r
58 _multibytecodec.MultibyteStreamWriter, None)\r
59\r
60\r
61class Test_IncrementalEncoder(unittest.TestCase):\r
62\r
63 def test_stateless(self):\r
64 # cp949 encoder isn't stateful at all.\r
65 encoder = codecs.getincrementalencoder('cp949')()\r
66 self.assertEqual(encoder.encode(u'\ud30c\uc774\uc36c \ub9c8\uc744'),\r
67 '\xc6\xc4\xc0\xcc\xbd\xe3 \xb8\xb6\xc0\xbb')\r
68 self.assertEqual(encoder.reset(), None)\r
69 self.assertEqual(encoder.encode(u'\u2606\u223c\u2606', True),\r
70 '\xa1\xd9\xa1\xad\xa1\xd9')\r
71 self.assertEqual(encoder.reset(), None)\r
72 self.assertEqual(encoder.encode(u'', True), '')\r
73 self.assertEqual(encoder.encode(u'', False), '')\r
74 self.assertEqual(encoder.reset(), None)\r
75\r
76 def test_stateful(self):\r
77 # jisx0213 encoder is stateful for a few codepoints. eg)\r
78 # U+00E6 => A9DC\r
79 # U+00E6 U+0300 => ABC4\r
80 # U+0300 => ABDC\r
81\r
82 encoder = codecs.getincrementalencoder('jisx0213')()\r
83 self.assertEqual(encoder.encode(u'\u00e6\u0300'), '\xab\xc4')\r
84 self.assertEqual(encoder.encode(u'\u00e6'), '')\r
85 self.assertEqual(encoder.encode(u'\u0300'), '\xab\xc4')\r
86 self.assertEqual(encoder.encode(u'\u00e6', True), '\xa9\xdc')\r
87\r
88 self.assertEqual(encoder.reset(), None)\r
89 self.assertEqual(encoder.encode(u'\u0300'), '\xab\xdc')\r
90\r
91 self.assertEqual(encoder.encode(u'\u00e6'), '')\r
92 self.assertEqual(encoder.encode('', True), '\xa9\xdc')\r
93 self.assertEqual(encoder.encode('', True), '')\r
94\r
95 def test_stateful_keep_buffer(self):\r
96 encoder = codecs.getincrementalencoder('jisx0213')()\r
97 self.assertEqual(encoder.encode(u'\u00e6'), '')\r
98 self.assertRaises(UnicodeEncodeError, encoder.encode, u'\u0123')\r
99 self.assertEqual(encoder.encode(u'\u0300\u00e6'), '\xab\xc4')\r
100 self.assertRaises(UnicodeEncodeError, encoder.encode, u'\u0123')\r
101 self.assertEqual(encoder.reset(), None)\r
102 self.assertEqual(encoder.encode(u'\u0300'), '\xab\xdc')\r
103 self.assertEqual(encoder.encode(u'\u00e6'), '')\r
104 self.assertRaises(UnicodeEncodeError, encoder.encode, u'\u0123')\r
105 self.assertEqual(encoder.encode(u'', True), '\xa9\xdc')\r
106\r
107 def test_issue5640(self):\r
108 encoder = codecs.getincrementalencoder('shift-jis')('backslashreplace')\r
109 self.assertEqual(encoder.encode(u'\xff'), b'\\xff')\r
110 self.assertEqual(encoder.encode(u'\n'), b'\n')\r
111\r
112class Test_IncrementalDecoder(unittest.TestCase):\r
113\r
114 def test_dbcs(self):\r
115 # cp949 decoder is simple with only 1 or 2 bytes sequences.\r
116 decoder = codecs.getincrementaldecoder('cp949')()\r
117 self.assertEqual(decoder.decode('\xc6\xc4\xc0\xcc\xbd'),\r
118 u'\ud30c\uc774')\r
119 self.assertEqual(decoder.decode('\xe3 \xb8\xb6\xc0\xbb'),\r
120 u'\uc36c \ub9c8\uc744')\r
121 self.assertEqual(decoder.decode(''), u'')\r
122\r
123 def test_dbcs_keep_buffer(self):\r
124 decoder = codecs.getincrementaldecoder('cp949')()\r
125 self.assertEqual(decoder.decode('\xc6\xc4\xc0'), u'\ud30c')\r
126 self.assertRaises(UnicodeDecodeError, decoder.decode, '', True)\r
127 self.assertEqual(decoder.decode('\xcc'), u'\uc774')\r
128\r
129 self.assertEqual(decoder.decode('\xc6\xc4\xc0'), u'\ud30c')\r
130 self.assertRaises(UnicodeDecodeError, decoder.decode, '\xcc\xbd', True)\r
131 self.assertEqual(decoder.decode('\xcc'), u'\uc774')\r
132\r
133 def test_iso2022(self):\r
134 decoder = codecs.getincrementaldecoder('iso2022-jp')()\r
135 ESC = '\x1b'\r
136 self.assertEqual(decoder.decode(ESC + '('), u'')\r
137 self.assertEqual(decoder.decode('B', True), u'')\r
138 self.assertEqual(decoder.decode(ESC + '$'), u'')\r
139 self.assertEqual(decoder.decode('B@$'), u'\u4e16')\r
140 self.assertEqual(decoder.decode('@$@'), u'\u4e16')\r
141 self.assertEqual(decoder.decode('$', True), u'\u4e16')\r
142 self.assertEqual(decoder.reset(), None)\r
143 self.assertEqual(decoder.decode('@$'), u'@$')\r
144 self.assertEqual(decoder.decode(ESC + '$'), u'')\r
145 self.assertRaises(UnicodeDecodeError, decoder.decode, '', True)\r
146 self.assertEqual(decoder.decode('B@$'), u'\u4e16')\r
147\r
148class Test_StreamReader(unittest.TestCase):\r
149 def test_bug1728403(self):\r
150 try:\r
151 open(TESTFN, 'w').write('\xa1')\r
152 f = codecs.open(TESTFN, encoding='cp949')\r
153 self.assertRaises(UnicodeDecodeError, f.read, 2)\r
154 finally:\r
155 try: f.close()\r
156 except: pass\r
157 os.unlink(TESTFN)\r
158\r
159class Test_StreamWriter(unittest.TestCase):\r
160 if len(u'\U00012345') == 2: # UCS2\r
161 def test_gb18030(self):\r
162 s = StringIO.StringIO()\r
163 c = codecs.getwriter('gb18030')(s)\r
164 c.write(u'123')\r
165 self.assertEqual(s.getvalue(), '123')\r
166 c.write(u'\U00012345')\r
167 self.assertEqual(s.getvalue(), '123\x907\x959')\r
168 c.write(u'\U00012345'[0])\r
169 self.assertEqual(s.getvalue(), '123\x907\x959')\r
170 c.write(u'\U00012345'[1] + u'\U00012345' + u'\uac00\u00ac')\r
171 self.assertEqual(s.getvalue(),\r
172 '123\x907\x959\x907\x959\x907\x959\x827\xcf5\x810\x851')\r
173 c.write(u'\U00012345'[0])\r
174 self.assertEqual(s.getvalue(),\r
175 '123\x907\x959\x907\x959\x907\x959\x827\xcf5\x810\x851')\r
176 self.assertRaises(UnicodeError, c.reset)\r
177 self.assertEqual(s.getvalue(),\r
178 '123\x907\x959\x907\x959\x907\x959\x827\xcf5\x810\x851')\r
179\r
180 def test_utf_8(self):\r
181 s= StringIO.StringIO()\r
182 c = codecs.getwriter('utf-8')(s)\r
183 c.write(u'123')\r
184 self.assertEqual(s.getvalue(), '123')\r
185 c.write(u'\U00012345')\r
186 self.assertEqual(s.getvalue(), '123\xf0\x92\x8d\x85')\r
187\r
188 # Python utf-8 codec can't buffer surrogate pairs yet.\r
189 if 0:\r
190 c.write(u'\U00012345'[0])\r
191 self.assertEqual(s.getvalue(), '123\xf0\x92\x8d\x85')\r
192 c.write(u'\U00012345'[1] + u'\U00012345' + u'\uac00\u00ac')\r
193 self.assertEqual(s.getvalue(),\r
194 '123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85'\r
195 '\xea\xb0\x80\xc2\xac')\r
196 c.write(u'\U00012345'[0])\r
197 self.assertEqual(s.getvalue(),\r
198 '123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85'\r
199 '\xea\xb0\x80\xc2\xac')\r
200 c.reset()\r
201 self.assertEqual(s.getvalue(),\r
202 '123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85'\r
203 '\xea\xb0\x80\xc2\xac\xed\xa0\x88')\r
204 c.write(u'\U00012345'[1])\r
205 self.assertEqual(s.getvalue(),\r
206 '123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85'\r
207 '\xea\xb0\x80\xc2\xac\xed\xa0\x88\xed\xbd\x85')\r
208\r
209 else: # UCS4\r
210 pass\r
211\r
212 def test_streamwriter_strwrite(self):\r
213 s = StringIO.StringIO()\r
214 wr = codecs.getwriter('gb18030')(s)\r
215 wr.write('abcd')\r
216 self.assertEqual(s.getvalue(), 'abcd')\r
217\r
218class Test_ISO2022(unittest.TestCase):\r
219 def test_g2(self):\r
220 iso2022jp2 = '\x1b(B:hu4:unit\x1b.A\x1bNi de famille'\r
221 uni = u':hu4:unit\xe9 de famille'\r
222 self.assertEqual(iso2022jp2.decode('iso2022-jp-2'), uni)\r
223\r
224 def test_iso2022_jp_g0(self):\r
225 self.assertNotIn('\x0e', u'\N{SOFT HYPHEN}'.encode('iso-2022-jp-2'))\r
226 for encoding in ('iso-2022-jp-2004', 'iso-2022-jp-3'):\r
227 e = u'\u3406'.encode(encoding)\r
228 self.assertFalse(filter(lambda x: x >= '\x80', e))\r
229\r
230 def test_bug1572832(self):\r
231 if sys.maxunicode >= 0x10000:\r
232 myunichr = unichr\r
233 else:\r
234 myunichr = lambda x: unichr(0xD7C0+(x>>10)) + unichr(0xDC00+(x&0x3FF))\r
235\r
236 for x in xrange(0x10000, 0x110000):\r
237 # Any ISO 2022 codec will cause the segfault\r
238 myunichr(x).encode('iso_2022_jp', 'ignore')\r
239\r
240class TestStateful(unittest.TestCase):\r
241 text = u'\u4E16\u4E16'\r
242 encoding = 'iso-2022-jp'\r
243 expected = b'\x1b$B@$@$'\r
244 expected_reset = b'\x1b$B@$@$\x1b(B'\r
245\r
246 def test_encode(self):\r
247 self.assertEqual(self.text.encode(self.encoding), self.expected_reset)\r
248\r
249 def test_incrementalencoder(self):\r
250 encoder = codecs.getincrementalencoder(self.encoding)()\r
251 output = b''.join(\r
252 encoder.encode(char)\r
253 for char in self.text)\r
254 self.assertEqual(output, self.expected)\r
255\r
256 def test_incrementalencoder_final(self):\r
257 encoder = codecs.getincrementalencoder(self.encoding)()\r
258 last_index = len(self.text) - 1\r
259 output = b''.join(\r
260 encoder.encode(char, index == last_index)\r
261 for index, char in enumerate(self.text))\r
262 self.assertEqual(output, self.expected_reset)\r
263\r
264class TestHZStateful(TestStateful):\r
265 text = u'\u804a\u804a'\r
266 encoding = 'hz'\r
267 expected = b'~{ADAD'\r
268 expected_reset = b'~{ADAD~}'\r
269\r
270def test_main():\r
271 test_support.run_unittest(__name__)\r
272\r
273if __name__ == "__main__":\r
274 test_main()\r