]> git.proxmox.com Git - mirror_edk2.git/blob - AppPkg/Applications/Python/Python-2.7.2/Lib/test/test_unicodedata.py
AppPkg/Applications/Python: Add Python 2.7.2 sources since the release of Python...
[mirror_edk2.git] / AppPkg / Applications / Python / Python-2.7.2 / Lib / test / test_unicodedata.py
1 """ Test script for the unicodedata module.
2
3 Written by Marc-Andre Lemburg (mal@lemburg.com).
4
5 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
6
7 """
8
9 import sys
10 import unittest
11 import hashlib
12 import subprocess
13 import test.test_support
14
15 encoding = 'utf-8'
16
17
18 ### Run tests
19
20 class UnicodeMethodsTest(unittest.TestCase):
21
22 # update this, if the database changes
23 expectedchecksum = '4504dffd035baea02c5b9de82bebc3d65e0e0baf'
24
25 def test_method_checksum(self):
26 h = hashlib.sha1()
27 for i in range(0x10000):
28 char = unichr(i)
29 data = [
30 # Predicates (single char)
31 u"01"[char.isalnum()],
32 u"01"[char.isalpha()],
33 u"01"[char.isdecimal()],
34 u"01"[char.isdigit()],
35 u"01"[char.islower()],
36 u"01"[char.isnumeric()],
37 u"01"[char.isspace()],
38 u"01"[char.istitle()],
39 u"01"[char.isupper()],
40
41 # Predicates (multiple chars)
42 u"01"[(char + u'abc').isalnum()],
43 u"01"[(char + u'abc').isalpha()],
44 u"01"[(char + u'123').isdecimal()],
45 u"01"[(char + u'123').isdigit()],
46 u"01"[(char + u'abc').islower()],
47 u"01"[(char + u'123').isnumeric()],
48 u"01"[(char + u' \t').isspace()],
49 u"01"[(char + u'abc').istitle()],
50 u"01"[(char + u'ABC').isupper()],
51
52 # Mappings (single char)
53 char.lower(),
54 char.upper(),
55 char.title(),
56
57 # Mappings (multiple chars)
58 (char + u'abc').lower(),
59 (char + u'ABC').upper(),
60 (char + u'abc').title(),
61 (char + u'ABC').title(),
62
63 ]
64 h.update(u''.join(data).encode(encoding))
65 result = h.hexdigest()
66 self.assertEqual(result, self.expectedchecksum)
67
68 class UnicodeDatabaseTest(unittest.TestCase):
69
70 def setUp(self):
71 # In case unicodedata is not available, this will raise an ImportError,
72 # but the other test cases will still be run
73 import unicodedata
74 self.db = unicodedata
75
76 def tearDown(self):
77 del self.db
78
79 class UnicodeFunctionsTest(UnicodeDatabaseTest):
80
81 # update this, if the database changes
82 expectedchecksum = '6ccf1b1a36460d2694f9b0b0f0324942fe70ede6'
83
84 def test_function_checksum(self):
85 data = []
86 h = hashlib.sha1()
87
88 for i in range(0x10000):
89 char = unichr(i)
90 data = [
91 # Properties
92 str(self.db.digit(char, -1)),
93 str(self.db.numeric(char, -1)),
94 str(self.db.decimal(char, -1)),
95 self.db.category(char),
96 self.db.bidirectional(char),
97 self.db.decomposition(char),
98 str(self.db.mirrored(char)),
99 str(self.db.combining(char)),
100 ]
101 h.update(''.join(data))
102 result = h.hexdigest()
103 self.assertEqual(result, self.expectedchecksum)
104
105 def test_digit(self):
106 self.assertEqual(self.db.digit(u'A', None), None)
107 self.assertEqual(self.db.digit(u'9'), 9)
108 self.assertEqual(self.db.digit(u'\u215b', None), None)
109 self.assertEqual(self.db.digit(u'\u2468'), 9)
110 self.assertEqual(self.db.digit(u'\U00020000', None), None)
111
112 self.assertRaises(TypeError, self.db.digit)
113 self.assertRaises(TypeError, self.db.digit, u'xx')
114 self.assertRaises(ValueError, self.db.digit, u'x')
115
116 def test_numeric(self):
117 self.assertEqual(self.db.numeric(u'A',None), None)
118 self.assertEqual(self.db.numeric(u'9'), 9)
119 self.assertEqual(self.db.numeric(u'\u215b'), 0.125)
120 self.assertEqual(self.db.numeric(u'\u2468'), 9.0)
121 self.assertEqual(self.db.numeric(u'\ua627'), 7.0)
122 self.assertEqual(self.db.numeric(u'\U00020000', None), None)
123
124 self.assertRaises(TypeError, self.db.numeric)
125 self.assertRaises(TypeError, self.db.numeric, u'xx')
126 self.assertRaises(ValueError, self.db.numeric, u'x')
127
128 def test_decimal(self):
129 self.assertEqual(self.db.decimal(u'A',None), None)
130 self.assertEqual(self.db.decimal(u'9'), 9)
131 self.assertEqual(self.db.decimal(u'\u215b', None), None)
132 self.assertEqual(self.db.decimal(u'\u2468', None), None)
133 self.assertEqual(self.db.decimal(u'\U00020000', None), None)
134
135 self.assertRaises(TypeError, self.db.decimal)
136 self.assertRaises(TypeError, self.db.decimal, u'xx')
137 self.assertRaises(ValueError, self.db.decimal, u'x')
138
139 def test_category(self):
140 self.assertEqual(self.db.category(u'\uFFFE'), 'Cn')
141 self.assertEqual(self.db.category(u'a'), 'Ll')
142 self.assertEqual(self.db.category(u'A'), 'Lu')
143 self.assertEqual(self.db.category(u'\U00020000'), 'Lo')
144
145 self.assertRaises(TypeError, self.db.category)
146 self.assertRaises(TypeError, self.db.category, u'xx')
147
148 def test_bidirectional(self):
149 self.assertEqual(self.db.bidirectional(u'\uFFFE'), '')
150 self.assertEqual(self.db.bidirectional(u' '), 'WS')
151 self.assertEqual(self.db.bidirectional(u'A'), 'L')
152 self.assertEqual(self.db.bidirectional(u'\U00020000'), 'L')
153
154 self.assertRaises(TypeError, self.db.bidirectional)
155 self.assertRaises(TypeError, self.db.bidirectional, u'xx')
156
157 def test_decomposition(self):
158 self.assertEqual(self.db.decomposition(u'\uFFFE'),'')
159 self.assertEqual(self.db.decomposition(u'\u00bc'), '<fraction> 0031 2044 0034')
160
161 self.assertRaises(TypeError, self.db.decomposition)
162 self.assertRaises(TypeError, self.db.decomposition, u'xx')
163
164 def test_mirrored(self):
165 self.assertEqual(self.db.mirrored(u'\uFFFE'), 0)
166 self.assertEqual(self.db.mirrored(u'a'), 0)
167 self.assertEqual(self.db.mirrored(u'\u2201'), 1)
168 self.assertEqual(self.db.mirrored(u'\U00020000'), 0)
169
170 self.assertRaises(TypeError, self.db.mirrored)
171 self.assertRaises(TypeError, self.db.mirrored, u'xx')
172
173 def test_combining(self):
174 self.assertEqual(self.db.combining(u'\uFFFE'), 0)
175 self.assertEqual(self.db.combining(u'a'), 0)
176 self.assertEqual(self.db.combining(u'\u20e1'), 230)
177 self.assertEqual(self.db.combining(u'\U00020000'), 0)
178
179 self.assertRaises(TypeError, self.db.combining)
180 self.assertRaises(TypeError, self.db.combining, u'xx')
181
182 def test_normalize(self):
183 self.assertRaises(TypeError, self.db.normalize)
184 self.assertRaises(ValueError, self.db.normalize, 'unknown', u'xx')
185 self.assertEqual(self.db.normalize('NFKC', u''), u'')
186 # The rest can be found in test_normalization.py
187 # which requires an external file.
188
189 def test_pr29(self):
190 # http://www.unicode.org/review/pr-29.html
191 # See issues #1054943 and #10254.
192 composed = (u"\u0b47\u0300\u0b3e", u"\u1100\u0300\u1161",
193 u'Li\u030dt-s\u1e73\u0301',
194 u'\u092e\u093e\u0930\u094d\u0915 \u091c\u093c'
195 + u'\u0941\u0915\u0947\u0930\u092c\u0930\u094d\u0917',
196 u'\u0915\u093f\u0930\u094d\u0917\u093f\u091c\u093c'
197 + 'u\u0938\u094d\u0924\u093e\u0928')
198 for text in composed:
199 self.assertEqual(self.db.normalize('NFC', text), text)
200
201 def test_issue10254(self):
202 # Crash reported in #10254
203 a = u'C\u0338' * 20 + u'C\u0327'
204 b = u'C\u0338' * 20 + u'\xC7'
205 self.assertEqual(self.db.normalize('NFC', a), b)
206
207 def test_east_asian_width(self):
208 eaw = self.db.east_asian_width
209 self.assertRaises(TypeError, eaw, 'a')
210 self.assertRaises(TypeError, eaw, u'')
211 self.assertRaises(TypeError, eaw, u'ra')
212 self.assertEqual(eaw(u'\x1e'), 'N')
213 self.assertEqual(eaw(u'\x20'), 'Na')
214 self.assertEqual(eaw(u'\uC894'), 'W')
215 self.assertEqual(eaw(u'\uFF66'), 'H')
216 self.assertEqual(eaw(u'\uFF1F'), 'F')
217 self.assertEqual(eaw(u'\u2010'), 'A')
218 self.assertEqual(eaw(u'\U00020000'), 'W')
219
220 class UnicodeMiscTest(UnicodeDatabaseTest):
221
222 def test_failed_import_during_compiling(self):
223 # Issue 4367
224 # Decoding \N escapes requires the unicodedata module. If it can't be
225 # imported, we shouldn't segfault.
226
227 # This program should raise a SyntaxError in the eval.
228 code = "import sys;" \
229 "sys.modules['unicodedata'] = None;" \
230 """eval("u'\N{SOFT HYPHEN}'")"""
231 args = [sys.executable, "-c", code]
232 # We use a subprocess because the unicodedata module may already have
233 # been loaded in this process.
234 popen = subprocess.Popen(args, stderr=subprocess.PIPE)
235 popen.wait()
236 self.assertEqual(popen.returncode, 1)
237 error = "SyntaxError: (unicode error) \N escapes not supported " \
238 "(can't load unicodedata module)"
239 self.assertIn(error, popen.stderr.read())
240
241 def test_decimal_numeric_consistent(self):
242 # Test that decimal and numeric are consistent,
243 # i.e. if a character has a decimal value,
244 # its numeric value should be the same.
245 count = 0
246 for i in xrange(0x10000):
247 c = unichr(i)
248 dec = self.db.decimal(c, -1)
249 if dec != -1:
250 self.assertEqual(dec, self.db.numeric(c))
251 count += 1
252 self.assertTrue(count >= 10) # should have tested at least the ASCII digits
253
254 def test_digit_numeric_consistent(self):
255 # Test that digit and numeric are consistent,
256 # i.e. if a character has a digit value,
257 # its numeric value should be the same.
258 count = 0
259 for i in xrange(0x10000):
260 c = unichr(i)
261 dec = self.db.digit(c, -1)
262 if dec != -1:
263 self.assertEqual(dec, self.db.numeric(c))
264 count += 1
265 self.assertTrue(count >= 10) # should have tested at least the ASCII digits
266
267 def test_bug_1704793(self):
268 self.assertEqual(self.db.lookup("GOTHIC LETTER FAIHU"), u'\U00010346')
269
270 def test_ucd_510(self):
271 import unicodedata
272 # In UCD 5.1.0, a mirrored property changed wrt. UCD 3.2.0
273 self.assertTrue(unicodedata.mirrored(u"\u0f3a"))
274 self.assertTrue(not unicodedata.ucd_3_2_0.mirrored(u"\u0f3a"))
275 # Also, we now have two ways of representing
276 # the upper-case mapping: as delta, or as absolute value
277 self.assertTrue(u"a".upper()==u'A')
278 self.assertTrue(u"\u1d79".upper()==u'\ua77d')
279 self.assertTrue(u".".upper()==u".")
280
281 def test_bug_5828(self):
282 self.assertEqual(u"\u1d79".lower(), u"\u1d79")
283 # Only U+0000 should have U+0000 as its upper/lower/titlecase variant
284 self.assertEqual(
285 [
286 c for c in range(sys.maxunicode+1)
287 if u"\x00" in unichr(c).lower()+unichr(c).upper()+unichr(c).title()
288 ],
289 [0]
290 )
291
292 def test_bug_4971(self):
293 # LETTER DZ WITH CARON: DZ, Dz, dz
294 self.assertEqual(u"\u01c4".title(), u"\u01c5")
295 self.assertEqual(u"\u01c5".title(), u"\u01c5")
296 self.assertEqual(u"\u01c6".title(), u"\u01c5")
297
298 def test_linebreak_7643(self):
299 for i in range(0x10000):
300 lines = (unichr(i) + u'A').splitlines()
301 if i in (0x0a, 0x0b, 0x0c, 0x0d, 0x85,
302 0x1c, 0x1d, 0x1e, 0x2028, 0x2029):
303 self.assertEqual(len(lines), 2,
304 r"\u%.4x should be a linebreak" % i)
305 else:
306 self.assertEqual(len(lines), 1,
307 r"\u%.4x should not be a linebreak" % i)
308
309 def test_main():
310 test.test_support.run_unittest(
311 UnicodeMiscTest,
312 UnicodeMethodsTest,
313 UnicodeFunctionsTest
314 )
315
316 if __name__ == "__main__":
317 test_main()