]>
git.proxmox.com Git - mirror_edk2.git/blob - AppPkg/Applications/Python/Python-2.7.2/Lib/test/test_multibytecodec.py
3 # test_multibytecodec.py
4 # Unit test for multibytecodec itself
7 from test
import test_support
8 from test
.test_support
import TESTFN
9 import unittest
, StringIO
, codecs
, sys
, os
10 import _multibytecodec
14 'gb2312', 'gbk', 'gb18030', 'hz',
18 'cp932', 'shift_jis', 'euc_jp', 'euc_jisx0213', 'shift_jisx0213',
19 'euc_jis_2004', 'shift_jis_2004',
21 'cp949', 'euc_kr', 'johab',
25 'iso2022_jp', 'iso2022_jp_1', 'iso2022_jp_2', 'iso2022_jp_2004',
26 'iso2022_jp_3', 'iso2022_jp_ext', 'iso2022_kr',
29 class Test_MultibyteCodec(unittest
.TestCase
):
31 def test_nullcoding(self
):
32 for enc
in ALL_CJKENCODINGS
:
33 self
.assertEqual(''.decode(enc
), u
'')
34 self
.assertEqual(unicode('', enc
), u
'')
35 self
.assertEqual(u
''.encode(enc
), '')
37 def test_str_decode(self
):
38 for enc
in ALL_CJKENCODINGS
:
39 self
.assertEqual('abcd'.encode(enc
), 'abcd')
41 def test_errorcallback_longindex(self
):
42 dec
= codecs
.getdecoder('euc-kr')
43 myreplace
= lambda exc
: (u
'', sys
.maxint
+1)
44 codecs
.register_error('test.cjktest', myreplace
)
45 self
.assertRaises(IndexError, dec
,
46 'apple\x92ham\x93spam', 'test.cjktest')
48 def test_codingspec(self
):
49 for enc
in ALL_CJKENCODINGS
:
50 code
= '# coding: {}\n'.format(enc
)
53 def test_init_segfault(self
):
54 # bug #3305: this used to segfault
55 self
.assertRaises(AttributeError,
56 _multibytecodec
.MultibyteStreamReader
, None)
57 self
.assertRaises(AttributeError,
58 _multibytecodec
.MultibyteStreamWriter
, None)
61 class Test_IncrementalEncoder(unittest
.TestCase
):
63 def test_stateless(self
):
64 # cp949 encoder isn't stateful at all.
65 encoder
= codecs
.getincrementalencoder('cp949')()
66 self
.assertEqual(encoder
.encode(u
'\ud30c\uc774\uc36c \ub9c8\uc744'),
67 '\xc6\xc4\xc0\xcc\xbd\xe3 \xb8\xb6\xc0\xbb')
68 self
.assertEqual(encoder
.reset(), None)
69 self
.assertEqual(encoder
.encode(u
'\u2606\u223c\u2606', True),
70 '\xa1\xd9\xa1\xad\xa1\xd9')
71 self
.assertEqual(encoder
.reset(), None)
72 self
.assertEqual(encoder
.encode(u
'', True), '')
73 self
.assertEqual(encoder
.encode(u
'', False), '')
74 self
.assertEqual(encoder
.reset(), None)
76 def test_stateful(self
):
77 # jisx0213 encoder is stateful for a few codepoints. eg)
79 # U+00E6 U+0300 => ABC4
82 encoder
= codecs
.getincrementalencoder('jisx0213')()
83 self
.assertEqual(encoder
.encode(u
'\u00e6\u0300'), '\xab\xc4')
84 self
.assertEqual(encoder
.encode(u
'\u00e6'), '')
85 self
.assertEqual(encoder
.encode(u
'\u0300'), '\xab\xc4')
86 self
.assertEqual(encoder
.encode(u
'\u00e6', True), '\xa9\xdc')
88 self
.assertEqual(encoder
.reset(), None)
89 self
.assertEqual(encoder
.encode(u
'\u0300'), '\xab\xdc')
91 self
.assertEqual(encoder
.encode(u
'\u00e6'), '')
92 self
.assertEqual(encoder
.encode('', True), '\xa9\xdc')
93 self
.assertEqual(encoder
.encode('', True), '')
95 def test_stateful_keep_buffer(self
):
96 encoder
= codecs
.getincrementalencoder('jisx0213')()
97 self
.assertEqual(encoder
.encode(u
'\u00e6'), '')
98 self
.assertRaises(UnicodeEncodeError, encoder
.encode
, u
'\u0123')
99 self
.assertEqual(encoder
.encode(u
'\u0300\u00e6'), '\xab\xc4')
100 self
.assertRaises(UnicodeEncodeError, encoder
.encode
, u
'\u0123')
101 self
.assertEqual(encoder
.reset(), None)
102 self
.assertEqual(encoder
.encode(u
'\u0300'), '\xab\xdc')
103 self
.assertEqual(encoder
.encode(u
'\u00e6'), '')
104 self
.assertRaises(UnicodeEncodeError, encoder
.encode
, u
'\u0123')
105 self
.assertEqual(encoder
.encode(u
'', True), '\xa9\xdc')
107 def test_issue5640(self
):
108 encoder
= codecs
.getincrementalencoder('shift-jis')('backslashreplace')
109 self
.assertEqual(encoder
.encode(u
'\xff'), b
'\\xff')
110 self
.assertEqual(encoder
.encode(u
'\n'), b
'\n')
112 class Test_IncrementalDecoder(unittest
.TestCase
):
115 # cp949 decoder is simple with only 1 or 2 bytes sequences.
116 decoder
= codecs
.getincrementaldecoder('cp949')()
117 self
.assertEqual(decoder
.decode('\xc6\xc4\xc0\xcc\xbd'),
119 self
.assertEqual(decoder
.decode('\xe3 \xb8\xb6\xc0\xbb'),
120 u
'\uc36c \ub9c8\uc744')
121 self
.assertEqual(decoder
.decode(''), u
'')
123 def test_dbcs_keep_buffer(self
):
124 decoder
= codecs
.getincrementaldecoder('cp949')()
125 self
.assertEqual(decoder
.decode('\xc6\xc4\xc0'), u
'\ud30c')
126 self
.assertRaises(UnicodeDecodeError, decoder
.decode
, '', True)
127 self
.assertEqual(decoder
.decode('\xcc'), u
'\uc774')
129 self
.assertEqual(decoder
.decode('\xc6\xc4\xc0'), u
'\ud30c')
130 self
.assertRaises(UnicodeDecodeError, decoder
.decode
, '\xcc\xbd', True)
131 self
.assertEqual(decoder
.decode('\xcc'), u
'\uc774')
133 def test_iso2022(self
):
134 decoder
= codecs
.getincrementaldecoder('iso2022-jp')()
136 self
.assertEqual(decoder
.decode(ESC
+ '('), u
'')
137 self
.assertEqual(decoder
.decode('B', True), u
'')
138 self
.assertEqual(decoder
.decode(ESC
+ '$'), u
'')
139 self
.assertEqual(decoder
.decode('B@$'), u
'\u4e16')
140 self
.assertEqual(decoder
.decode('@$@'), u
'\u4e16')
141 self
.assertEqual(decoder
.decode('$', True), u
'\u4e16')
142 self
.assertEqual(decoder
.reset(), None)
143 self
.assertEqual(decoder
.decode('@$'), u
'@$')
144 self
.assertEqual(decoder
.decode(ESC
+ '$'), u
'')
145 self
.assertRaises(UnicodeDecodeError, decoder
.decode
, '', True)
146 self
.assertEqual(decoder
.decode('B@$'), u
'\u4e16')
148 class Test_StreamReader(unittest
.TestCase
):
149 def test_bug1728403(self
):
151 open(TESTFN
, 'w').write('\xa1')
152 f
= codecs
.open(TESTFN
, encoding
='cp949')
153 self
.assertRaises(UnicodeDecodeError, f
.read
, 2)
159 class Test_StreamWriter(unittest
.TestCase
):
160 if len(u
'\U00012345') == 2: # UCS2
161 def test_gb18030(self
):
162 s
= StringIO
.StringIO()
163 c
= codecs
.getwriter('gb18030')(s
)
165 self
.assertEqual(s
.getvalue(), '123')
166 c
.write(u
'\U00012345')
167 self
.assertEqual(s
.getvalue(), '123\x907\x959')
168 c
.write(u
'\U00012345'[0])
169 self
.assertEqual(s
.getvalue(), '123\x907\x959')
170 c
.write(u
'\U00012345'[1] + u
'\U00012345' + u
'\uac00\u00ac')
171 self
.assertEqual(s
.getvalue(),
172 '123\x907\x959\x907\x959\x907\x959\x827\xcf5\x810\x851')
173 c
.write(u
'\U00012345'[0])
174 self
.assertEqual(s
.getvalue(),
175 '123\x907\x959\x907\x959\x907\x959\x827\xcf5\x810\x851')
176 self
.assertRaises(UnicodeError, c
.reset
)
177 self
.assertEqual(s
.getvalue(),
178 '123\x907\x959\x907\x959\x907\x959\x827\xcf5\x810\x851')
180 def test_utf_8(self
):
181 s
= StringIO
.StringIO()
182 c
= codecs
.getwriter('utf-8')(s
)
184 self
.assertEqual(s
.getvalue(), '123')
185 c
.write(u
'\U00012345')
186 self
.assertEqual(s
.getvalue(), '123\xf0\x92\x8d\x85')
188 # Python utf-8 codec can't buffer surrogate pairs yet.
190 c
.write(u
'\U00012345'[0])
191 self
.assertEqual(s
.getvalue(), '123\xf0\x92\x8d\x85')
192 c
.write(u
'\U00012345'[1] + u
'\U00012345' + u
'\uac00\u00ac')
193 self
.assertEqual(s
.getvalue(),
194 '123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85'
195 '\xea\xb0\x80\xc2\xac')
196 c
.write(u
'\U00012345'[0])
197 self
.assertEqual(s
.getvalue(),
198 '123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85'
199 '\xea\xb0\x80\xc2\xac')
201 self
.assertEqual(s
.getvalue(),
202 '123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85'
203 '\xea\xb0\x80\xc2\xac\xed\xa0\x88')
204 c
.write(u
'\U00012345'[1])
205 self
.assertEqual(s
.getvalue(),
206 '123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85'
207 '\xea\xb0\x80\xc2\xac\xed\xa0\x88\xed\xbd\x85')
212 def test_streamwriter_strwrite(self
):
213 s
= StringIO
.StringIO()
214 wr
= codecs
.getwriter('gb18030')(s
)
216 self
.assertEqual(s
.getvalue(), 'abcd')
218 class Test_ISO2022(unittest
.TestCase
):
220 iso2022jp2
= '\x1b(B:hu4:unit\x1b.A\x1bNi de famille'
221 uni
= u
':hu4:unit\xe9 de famille'
222 self
.assertEqual(iso2022jp2
.decode('iso2022-jp-2'), uni
)
224 def test_iso2022_jp_g0(self
):
225 self
.assertNotIn('\x0e', u
'\N{SOFT HYPHEN}'.encode('iso-2022-jp-2'))
226 for encoding
in ('iso-2022-jp-2004', 'iso-2022-jp-3'):
227 e
= u
'\u3406'.encode(encoding
)
228 self
.assertFalse(filter(lambda x
: x
>= '\x80', e
))
230 def test_bug1572832(self
):
231 if sys
.maxunicode
>= 0x10000:
234 myunichr
= lambda x
: unichr(0xD7C0+(x
>>10)) + unichr(0xDC00+(x
&0x3FF))
236 for x
in xrange(0x10000, 0x110000):
237 # Any ISO 2022 codec will cause the segfault
238 myunichr(x
).encode('iso_2022_jp', 'ignore')
240 class TestStateful(unittest
.TestCase
):
241 text
= u
'\u4E16\u4E16'
242 encoding
= 'iso-2022-jp'
243 expected
= b
'\x1b$B@$@$'
244 expected_reset
= b
'\x1b$B@$@$\x1b(B'
246 def test_encode(self
):
247 self
.assertEqual(self
.text
.encode(self
.encoding
), self
.expected_reset
)
249 def test_incrementalencoder(self
):
250 encoder
= codecs
.getincrementalencoder(self
.encoding
)()
253 for char
in self
.text
)
254 self
.assertEqual(output
, self
.expected
)
256 def test_incrementalencoder_final(self
):
257 encoder
= codecs
.getincrementalencoder(self
.encoding
)()
258 last_index
= len(self
.text
) - 1
260 encoder
.encode(char
, index
== last_index
)
261 for index
, char
in enumerate(self
.text
))
262 self
.assertEqual(output
, self
.expected_reset
)
264 class TestHZStateful(TestStateful
):
265 text
= u
'\u804a\u804a'
268 expected_reset
= b
'~{ADAD~}'
271 test_support
.run_unittest(__name__
)
273 if __name__
== "__main__":