]>
git.proxmox.com Git - mirror_edk2.git/blob - AppPkg/Applications/Python/Python-2.7.2/Lib/test/test_multibytecodec_support.py
3 # test_multibytecodec_support.py
4 # Common Unittest Routines for CJK codecs
12 from httplib
import HTTPException
13 from test
import test_support
14 from StringIO
import StringIO
17 encoding
= '' # codec name
18 codec
= None # codec tuple (with 4 elements)
19 tstring
= '' # string to test StreamReader
21 codectests
= None # must set. codec test tuple
22 roundtriptest
= 1 # set if roundtrip is possible with unicode
23 has_iso10646
= 0 # set if this encoding contains whole iso10646 map
24 xmlcharnametest
= None # string to test xmlcharrefreplace
25 unmappedunicode
= u
'\udeee' # a unicode codepoint that is not mapped.
28 if self
.codec
is None:
29 self
.codec
= codecs
.lookup(self
.encoding
)
30 self
.encode
= self
.codec
.encode
31 self
.decode
= self
.codec
.decode
32 self
.reader
= self
.codec
.streamreader
33 self
.writer
= self
.codec
.streamwriter
34 self
.incrementalencoder
= self
.codec
.incrementalencoder
35 self
.incrementaldecoder
= self
.codec
.incrementaldecoder
37 def test_chunkcoding(self
):
38 for native
, utf8
in zip(*[StringIO(f
).readlines()
39 for f
in self
.tstring
]):
40 u
= self
.decode(native
)[0]
41 self
.assertEqual(u
, utf8
.decode('utf-8'))
42 if self
.roundtriptest
:
43 self
.assertEqual(native
, self
.encode(u
)[0])
45 def test_errorhandle(self
):
46 for source
, scheme
, expected
in self
.codectests
:
47 if type(source
) == type(''):
52 result
= func(source
, scheme
)[0]
53 self
.assertEqual(result
, expected
)
55 self
.assertRaises(UnicodeError, func
, source
, scheme
)
57 def test_xmlcharrefreplace(self
):
61 s
= u
"\u0b13\u0b23\u0b60 nd eggs"
63 self
.encode(s
, "xmlcharrefreplace")[0],
64 "ଓଣୠ nd eggs"
67 def test_customreplace_encode(self
):
71 from htmlentitydefs
import codepoint2name
73 def xmlcharnamereplace(exc
):
74 if not isinstance(exc
, UnicodeEncodeError):
75 raise TypeError("don't know how to handle %r" % exc
)
77 for c
in exc
.object[exc
.start
:exc
.end
]:
78 if ord(c
) in codepoint2name
:
79 l
.append(u
"&%s;" % codepoint2name
[ord(c
)])
81 l
.append(u
"&#%d;" % ord(c
))
82 return (u
"".join(l
), exc
.end
)
84 codecs
.register_error("test.xmlcharnamereplace", xmlcharnamereplace
)
86 if self
.xmlcharnametest
:
87 sin
, sout
= self
.xmlcharnametest
89 sin
= u
"\xab\u211c\xbb = \u2329\u1234\u232a"
90 sout
= "«ℜ» = ⟨ሴ⟩"
91 self
.assertEqual(self
.encode(sin
,
92 "test.xmlcharnamereplace")[0], sout
)
94 def test_callback_wrong_objects(self
):
97 codecs
.register_error("test.cjktest", myreplace
)
99 for ret
in ([1, 2, 3], [], None, object(), 'string', ''):
100 self
.assertRaises(TypeError, self
.encode
, self
.unmappedunicode
,
103 def test_callback_long_index(self
):
105 return (u
'x', long(exc
.end
))
106 codecs
.register_error("test.cjktest", myreplace
)
107 self
.assertEqual(self
.encode(u
'abcd' + self
.unmappedunicode
+ u
'efgh',
108 'test.cjktest'), ('abcdxefgh', 9))
111 return (u
'x', sys
.maxint
+ 1)
112 codecs
.register_error("test.cjktest", myreplace
)
113 self
.assertRaises(IndexError, self
.encode
, self
.unmappedunicode
,
116 def test_callback_None_index(self
):
119 codecs
.register_error("test.cjktest", myreplace
)
120 self
.assertRaises(TypeError, self
.encode
, self
.unmappedunicode
,
123 def test_callback_backward_index(self
):
125 if myreplace
.limit
> 0:
127 return (u
'REPLACED', 0)
129 return (u
'TERMINAL', exc
.end
)
131 codecs
.register_error("test.cjktest", myreplace
)
132 self
.assertEqual(self
.encode(u
'abcd' + self
.unmappedunicode
+ u
'efgh',
134 ('abcdREPLACEDabcdREPLACEDabcdREPLACEDabcdTERMINALefgh', 9))
136 def test_callback_forward_index(self
):
138 return (u
'REPLACED', exc
.end
+ 2)
139 codecs
.register_error("test.cjktest", myreplace
)
140 self
.assertEqual(self
.encode(u
'abcd' + self
.unmappedunicode
+ u
'efgh',
141 'test.cjktest'), ('abcdREPLACEDgh', 9))
143 def test_callback_index_outofbound(self
):
145 return (u
'TERM', 100)
146 codecs
.register_error("test.cjktest", myreplace
)
147 self
.assertRaises(IndexError, self
.encode
, self
.unmappedunicode
,
150 def test_incrementalencoder(self
):
151 UTF8Reader
= codecs
.getreader('utf-8')
152 for sizehint
in [None] + range(1, 33) + \
153 [64, 128, 256, 512, 1024]:
154 istream
= UTF8Reader(StringIO(self
.tstring
[1]))
156 encoder
= self
.incrementalencoder()
158 if sizehint
is not None:
159 data
= istream
.read(sizehint
)
161 data
= istream
.read()
165 e
= encoder
.encode(data
)
168 self
.assertEqual(ostream
.getvalue(), self
.tstring
[0])
170 def test_incrementaldecoder(self
):
171 UTF8Writer
= codecs
.getwriter('utf-8')
172 for sizehint
in [None, -1] + range(1, 33) + \
173 [64, 128, 256, 512, 1024]:
174 istream
= StringIO(self
.tstring
[0])
175 ostream
= UTF8Writer(StringIO())
176 decoder
= self
.incrementaldecoder()
178 data
= istream
.read(sizehint
)
182 u
= decoder
.decode(data
)
185 self
.assertEqual(ostream
.getvalue(), self
.tstring
[1])
187 def test_incrementalencoder_error_callback(self
):
188 inv
= self
.unmappedunicode
190 e
= self
.incrementalencoder()
191 self
.assertRaises(UnicodeEncodeError, e
.encode
, inv
, True)
194 self
.assertEqual(e
.encode(inv
, True), '')
197 def tempreplace(exc
):
198 return (u
'called', exc
.end
)
199 codecs
.register_error('test.incremental_error_callback', tempreplace
)
200 e
.errors
= 'test.incremental_error_callback'
201 self
.assertEqual(e
.encode(inv
, True), 'called')
205 self
.assertEqual(e
.encode(inv
, True), '')
207 def test_streamreader(self
):
208 UTF8Writer
= codecs
.getwriter('utf-8')
209 for name
in ["read", "readline", "readlines"]:
210 for sizehint
in [None, -1] + range(1, 33) + \
211 [64, 128, 256, 512, 1024]:
212 istream
= self
.reader(StringIO(self
.tstring
[0]))
213 ostream
= UTF8Writer(StringIO())
214 func
= getattr(istream
, name
)
216 data
= func(sizehint
)
219 if name
== "readlines":
220 ostream
.writelines(data
)
224 self
.assertEqual(ostream
.getvalue(), self
.tstring
[1])
226 def test_streamwriter(self
):
227 readfuncs
= ('read', 'readline', 'readlines')
228 UTF8Reader
= codecs
.getreader('utf-8')
229 for name
in readfuncs
:
230 for sizehint
in [None] + range(1, 33) + \
231 [64, 128, 256, 512, 1024]:
232 istream
= UTF8Reader(StringIO(self
.tstring
[1]))
233 ostream
= self
.writer(StringIO())
234 func
= getattr(istream
, name
)
236 if sizehint
is not None:
237 data
= func(sizehint
)
243 if name
== "readlines":
244 ostream
.writelines(data
)
248 self
.assertEqual(ostream
.getvalue(), self
.tstring
[0])
250 class TestBase_Mapping(unittest
.TestCase
):
255 def __init__(self
, *args
, **kw
):
256 unittest
.TestCase
.__init
__(self
, *args
, **kw
)
258 self
.open_mapping_file().close() # test it to report the error early
259 except (IOError, HTTPException
):
260 self
.skipTest("Could not retrieve "+self
.mapfileurl
)
262 def open_mapping_file(self
):
263 return test_support
.open_urlresource(self
.mapfileurl
)
265 def test_mapping_file(self
):
266 if self
.mapfileurl
.endswith('.xml'):
267 self
._test
_mapping
_file
_ucm
()
269 self
._test
_mapping
_file
_plain
()
271 def _test_mapping_file_plain(self
):
272 _unichr
= lambda c
: eval("u'\\U%08x'" % int(c
, 16))
273 unichrs
= lambda s
: u
''.join(_unichr(c
) for c
in s
.split('+'))
276 with self
.open_mapping_file() as f
:
280 data
= line
.split('#')[0].strip().split()
284 csetval
= eval(data
[0])
286 csetch
= chr(csetval
& 0xff)
287 elif csetval
>= 0x1000000:
288 csetch
= chr(csetval
>> 24) + chr((csetval
>> 16) & 0xff) + \
289 chr((csetval
>> 8) & 0xff) + chr(csetval
& 0xff)
290 elif csetval
>= 0x10000:
291 csetch
= chr(csetval
>> 16) + \
292 chr((csetval
>> 8) & 0xff) + chr(csetval
& 0xff)
293 elif csetval
>= 0x100:
294 csetch
= chr(csetval
>> 8) + chr(csetval
& 0xff)
298 unich
= unichrs(data
[1])
299 if unich
== u
'\ufffd' or unich
in urt_wa
:
301 urt_wa
[unich
] = csetch
303 self
._testpoint
(csetch
, unich
)
305 def _test_mapping_file_ucm(self
):
306 with self
.open_mapping_file() as f
:
308 uc
= re
.findall('<a u="([A-F0-9]{4})" b="([0-9A-F ]+)"/>', ucmdata
)
309 for uni
, coded
in uc
:
310 unich
= unichr(int(uni
, 16))
311 codech
= ''.join(chr(int(c
, 16)) for c
in coded
.split())
312 self
._testpoint
(codech
, unich
)
314 def test_mapping_supplemental(self
):
315 for mapping
in self
.supmaps
:
316 self
._testpoint
(*mapping
)
318 def _testpoint(self
, csetch
, unich
):
319 if (csetch
, unich
) not in self
.pass_enctest
:
321 self
.assertEqual(unich
.encode(self
.encoding
), csetch
)
322 except UnicodeError, exc
:
323 self
.fail('Encoding failed while testing %s -> %s: %s' % (
324 repr(unich
), repr(csetch
), exc
.reason
))
325 if (csetch
, unich
) not in self
.pass_dectest
:
327 self
.assertEqual(csetch
.decode(self
.encoding
), unich
)
328 except UnicodeError, exc
:
329 self
.fail('Decoding failed while testing %s -> %s: %s' % (
330 repr(csetch
), repr(unich
), exc
.reason
))
332 def load_teststring(name
):
333 dir = os
.path
.join(os
.path
.dirname(__file__
), 'cjkencodings')
334 with
open(os
.path
.join(dir, name
+ '.txt'), 'rb') as f
:
336 with
open(os
.path
.join(dir, name
+ '-utf8.txt'), 'rb') as f
: