AppPkg/Applications/Python/Python-2.7.2/Lib/test/test_unicodedata.py

   1 """ Test script for the unicodedata module.
   2
   3     Written by Marc-Andre Lemburg (mal@lemburg.com).
   4
   5     (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
   6
   7 """
   8
   9 import sys
  10 import unittest
  11 import hashlib
  12 import subprocess
  13 import test.test_support
  14
  15 encoding = 'utf-8'
  16
  17
  18 ### Run tests
  19
  20 class UnicodeMethodsTest(unittest.TestCase):
  21
  22     # update this, if the database changes
  23     expectedchecksum = '4504dffd035baea02c5b9de82bebc3d65e0e0baf'
  24
  25     def test_method_checksum(self):
  26         h = hashlib.sha1()
  27         for i in range(0x10000):
  28             char = unichr(i)
  29             data = [
  30                 # Predicates (single char)
  31                 u"01"[char.isalnum()],
  32                 u"01"[char.isalpha()],
  33                 u"01"[char.isdecimal()],
  34                 u"01"[char.isdigit()],
  35                 u"01"[char.islower()],
  36                 u"01"[char.isnumeric()],
  37                 u"01"[char.isspace()],
  38                 u"01"[char.istitle()],
  39                 u"01"[char.isupper()],
  40
  41                 # Predicates (multiple chars)
  42                 u"01"[(char + u'abc').isalnum()],
  43                 u"01"[(char + u'abc').isalpha()],
  44                 u"01"[(char + u'123').isdecimal()],
  45                 u"01"[(char + u'123').isdigit()],
  46                 u"01"[(char + u'abc').islower()],
  47                 u"01"[(char + u'123').isnumeric()],
  48                 u"01"[(char + u' \t').isspace()],
  49                 u"01"[(char + u'abc').istitle()],
  50                 u"01"[(char + u'ABC').isupper()],
  51
  52                 # Mappings (single char)
  53                 char.lower(),
  54                 char.upper(),
  55                 char.title(),
  56
  57                 # Mappings (multiple chars)
  58                 (char + u'abc').lower(),
  59                 (char + u'ABC').upper(),
  60                 (char + u'abc').title(),
  61                 (char + u'ABC').title(),
  62
  63                 ]
  64             h.update(u''.join(data).encode(encoding))
  65         result = h.hexdigest()
  66         self.assertEqual(result, self.expectedchecksum)
  67
  68 class UnicodeDatabaseTest(unittest.TestCase):
  69
  70     def setUp(self):
  71         # In case unicodedata is not available, this will raise an ImportError,
  72         # but the other test cases will still be run
  73         import unicodedata
  74         self.db = unicodedata
  75
  76     def tearDown(self):
  77         del self.db
  78
  79 class UnicodeFunctionsTest(UnicodeDatabaseTest):
  80
  81     # update this, if the database changes
  82     expectedchecksum = '6ccf1b1a36460d2694f9b0b0f0324942fe70ede6'
  83
  84     def test_function_checksum(self):
  85         data = []
  86         h = hashlib.sha1()
  87
  88         for i in range(0x10000):
  89             char = unichr(i)
  90             data = [
  91                 # Properties
  92                 str(self.db.digit(char, -1)),
  93                 str(self.db.numeric(char, -1)),
  94                 str(self.db.decimal(char, -1)),
  95                 self.db.category(char),
  96                 self.db.bidirectional(char),
  97                 self.db.decomposition(char),
  98                 str(self.db.mirrored(char)),
  99                 str(self.db.combining(char)),
 100             ]
 101             h.update(''.join(data))
 102         result = h.hexdigest()
 103         self.assertEqual(result, self.expectedchecksum)
 104
 105     def test_digit(self):
 106         self.assertEqual(self.db.digit(u'A', None), None)
 107         self.assertEqual(self.db.digit(u'9'), 9)
 108         self.assertEqual(self.db.digit(u'\u215b', None), None)
 109         self.assertEqual(self.db.digit(u'\u2468'), 9)
 110         self.assertEqual(self.db.digit(u'\U00020000', None), None)
 111
 112         self.assertRaises(TypeError, self.db.digit)
 113         self.assertRaises(TypeError, self.db.digit, u'xx')
 114         self.assertRaises(ValueError, self.db.digit, u'x')
 115
 116     def test_numeric(self):
 117         self.assertEqual(self.db.numeric(u'A',None), None)
 118         self.assertEqual(self.db.numeric(u'9'), 9)
 119         self.assertEqual(self.db.numeric(u'\u215b'), 0.125)
 120         self.assertEqual(self.db.numeric(u'\u2468'), 9.0)
 121         self.assertEqual(self.db.numeric(u'\ua627'), 7.0)
 122         self.assertEqual(self.db.numeric(u'\U00020000', None), None)
 123
 124         self.assertRaises(TypeError, self.db.numeric)
 125         self.assertRaises(TypeError, self.db.numeric, u'xx')
 126         self.assertRaises(ValueError, self.db.numeric, u'x')
 127
 128     def test_decimal(self):
 129         self.assertEqual(self.db.decimal(u'A',None), None)
 130         self.assertEqual(self.db.decimal(u'9'), 9)
 131         self.assertEqual(self.db.decimal(u'\u215b', None), None)
 132         self.assertEqual(self.db.decimal(u'\u2468', None), None)
 133         self.assertEqual(self.db.decimal(u'\U00020000', None), None)
 134
 135         self.assertRaises(TypeError, self.db.decimal)
 136         self.assertRaises(TypeError, self.db.decimal, u'xx')
 137         self.assertRaises(ValueError, self.db.decimal, u'x')
 138
 139     def test_category(self):
 140         self.assertEqual(self.db.category(u'\uFFFE'), 'Cn')
 141         self.assertEqual(self.db.category(u'a'), 'Ll')
 142         self.assertEqual(self.db.category(u'A'), 'Lu')
 143         self.assertEqual(self.db.category(u'\U00020000'), 'Lo')
 144
 145         self.assertRaises(TypeError, self.db.category)
 146         self.assertRaises(TypeError, self.db.category, u'xx')
 147
 148     def test_bidirectional(self):
 149         self.assertEqual(self.db.bidirectional(u'\uFFFE'), '')
 150         self.assertEqual(self.db.bidirectional(u' '), 'WS')
 151         self.assertEqual(self.db.bidirectional(u'A'), 'L')
 152         self.assertEqual(self.db.bidirectional(u'\U00020000'), 'L')
 153
 154         self.assertRaises(TypeError, self.db.bidirectional)
 155         self.assertRaises(TypeError, self.db.bidirectional, u'xx')
 156
 157     def test_decomposition(self):
 158         self.assertEqual(self.db.decomposition(u'\uFFFE'),'')
 159         self.assertEqual(self.db.decomposition(u'\u00bc'), '<fraction> 0031 2044 0034')
 160
 161         self.assertRaises(TypeError, self.db.decomposition)
 162         self.assertRaises(TypeError, self.db.decomposition, u'xx')
 163
 164     def test_mirrored(self):
 165         self.assertEqual(self.db.mirrored(u'\uFFFE'), 0)
 166         self.assertEqual(self.db.mirrored(u'a'), 0)
 167         self.assertEqual(self.db.mirrored(u'\u2201'), 1)
 168         self.assertEqual(self.db.mirrored(u'\U00020000'), 0)
 169
 170         self.assertRaises(TypeError, self.db.mirrored)
 171         self.assertRaises(TypeError, self.db.mirrored, u'xx')
 172
 173     def test_combining(self):
 174         self.assertEqual(self.db.combining(u'\uFFFE'), 0)
 175         self.assertEqual(self.db.combining(u'a'), 0)
 176         self.assertEqual(self.db.combining(u'\u20e1'), 230)
 177         self.assertEqual(self.db.combining(u'\U00020000'), 0)
 178
 179         self.assertRaises(TypeError, self.db.combining)
 180         self.assertRaises(TypeError, self.db.combining, u'xx')
 181
 182     def test_normalize(self):
 183         self.assertRaises(TypeError, self.db.normalize)
 184         self.assertRaises(ValueError, self.db.normalize, 'unknown', u'xx')
 185         self.assertEqual(self.db.normalize('NFKC', u''), u'')
 186         # The rest can be found in test_normalization.py
 187         # which requires an external file.
 188
 189     def test_pr29(self):
 190         # http://www.unicode.org/review/pr-29.html
 191         # See issues #1054943 and #10254.
 192         composed = (u"\u0b47\u0300\u0b3e", u"\u1100\u0300\u1161",
 193                     u'Li\u030dt-s\u1e73\u0301',
 194                     u'\u092e\u093e\u0930\u094d\u0915 \u091c\u093c'
 195                     + u'\u0941\u0915\u0947\u0930\u092c\u0930\u094d\u0917',
 196                     u'\u0915\u093f\u0930\u094d\u0917\u093f\u091c\u093c'
 197                     + 'u\u0938\u094d\u0924\u093e\u0928')
 198         for text in composed:
 199             self.assertEqual(self.db.normalize('NFC', text), text)
 200
 201     def test_issue10254(self):
 202         # Crash reported in #10254
 203         a = u'C\u0338' * 20  + u'C\u0327'
 204         b = u'C\u0338' * 20  + u'\xC7'
 205         self.assertEqual(self.db.normalize('NFC', a), b)
 206
 207     def test_east_asian_width(self):
 208         eaw = self.db.east_asian_width
 209         self.assertRaises(TypeError, eaw, 'a')
 210         self.assertRaises(TypeError, eaw, u'')
 211         self.assertRaises(TypeError, eaw, u'ra')
 212         self.assertEqual(eaw(u'\x1e'), 'N')
 213         self.assertEqual(eaw(u'\x20'), 'Na')
 214         self.assertEqual(eaw(u'\uC894'), 'W')
 215         self.assertEqual(eaw(u'\uFF66'), 'H')
 216         self.assertEqual(eaw(u'\uFF1F'), 'F')
 217         self.assertEqual(eaw(u'\u2010'), 'A')
 218         self.assertEqual(eaw(u'\U00020000'), 'W')
 219
 220 class UnicodeMiscTest(UnicodeDatabaseTest):
 221
 222     def test_failed_import_during_compiling(self):
 223         # Issue 4367
 224         # Decoding \N escapes requires the unicodedata module. If it can't be
 225         # imported, we shouldn't segfault.
 226
 227         # This program should raise a SyntaxError in the eval.
 228         code = "import sys;" \
 229             "sys.modules['unicodedata'] = None;" \
 230             """eval("u'\N{SOFT HYPHEN}'")"""
 231         args = [sys.executable, "-c", code]
 232         # We use a subprocess because the unicodedata module may already have
 233         # been loaded in this process.
 234         popen = subprocess.Popen(args, stderr=subprocess.PIPE)
 235         popen.wait()
 236         self.assertEqual(popen.returncode, 1)
 237         error = "SyntaxError: (unicode error) \N escapes not supported " \
 238             "(can't load unicodedata module)"
 239         self.assertIn(error, popen.stderr.read())
 240
 241     def test_decimal_numeric_consistent(self):
 242         # Test that decimal and numeric are consistent,
 243         # i.e. if a character has a decimal value,
 244         # its numeric value should be the same.
 245         count = 0
 246         for i in xrange(0x10000):
 247             c = unichr(i)
 248             dec = self.db.decimal(c, -1)
 249             if dec != -1:
 250                 self.assertEqual(dec, self.db.numeric(c))
 251                 count += 1
 252         self.assertTrue(count >= 10) # should have tested at least the ASCII digits
 253
 254     def test_digit_numeric_consistent(self):
 255         # Test that digit and numeric are consistent,
 256         # i.e. if a character has a digit value,
 257         # its numeric value should be the same.
 258         count = 0
 259         for i in xrange(0x10000):
 260             c = unichr(i)
 261             dec = self.db.digit(c, -1)
 262             if dec != -1:
 263                 self.assertEqual(dec, self.db.numeric(c))
 264                 count += 1
 265         self.assertTrue(count >= 10) # should have tested at least the ASCII digits
 266
 267     def test_bug_1704793(self):
 268         self.assertEqual(self.db.lookup("GOTHIC LETTER FAIHU"), u'\U00010346')
 269
 270     def test_ucd_510(self):
 271         import unicodedata
 272         # In UCD 5.1.0, a mirrored property changed wrt. UCD 3.2.0
 273         self.assertTrue(unicodedata.mirrored(u"\u0f3a"))
 274         self.assertTrue(not unicodedata.ucd_3_2_0.mirrored(u"\u0f3a"))
 275         # Also, we now have two ways of representing
 276         # the upper-case mapping: as delta, or as absolute value
 277         self.assertTrue(u"a".upper()==u'A')
 278         self.assertTrue(u"\u1d79".upper()==u'\ua77d')
 279         self.assertTrue(u".".upper()==u".")
 280
 281     def test_bug_5828(self):
 282         self.assertEqual(u"\u1d79".lower(), u"\u1d79")
 283         # Only U+0000 should have U+0000 as its upper/lower/titlecase variant
 284         self.assertEqual(
 285             [
 286                 c for c in range(sys.maxunicode+1)
 287                 if u"\x00" in unichr(c).lower()+unichr(c).upper()+unichr(c).title()
 288             ],
 289             [0]
 290         )
 291
 292     def test_bug_4971(self):
 293         # LETTER DZ WITH CARON: DZ, Dz, dz
 294         self.assertEqual(u"\u01c4".title(), u"\u01c5")
 295         self.assertEqual(u"\u01c5".title(), u"\u01c5")
 296         self.assertEqual(u"\u01c6".title(), u"\u01c5")
 297
 298     def test_linebreak_7643(self):
 299         for i in range(0x10000):
 300             lines = (unichr(i) + u'A').splitlines()
 301             if i in (0x0a, 0x0b, 0x0c, 0x0d, 0x85,
 302                      0x1c, 0x1d, 0x1e, 0x2028, 0x2029):
 303                 self.assertEqual(len(lines), 2,
 304                                  r"\u%.4x should be a linebreak" % i)
 305             else:
 306                 self.assertEqual(len(lines), 1,
 307                                  r"\u%.4x should not be a linebreak" % i)
 308
 309 def test_main():
 310     test.test_support.run_unittest(
 311         UnicodeMiscTest,
 312         UnicodeMethodsTest,
 313         UnicodeFunctionsTest
 314     )
 315
 316 if __name__ == "__main__":
 317     test_main()