src/vendor/unicode-normalization/scripts/unicode_gen_normtests.py

   1 #!/usr/bin/env python
   2 #
   3 # Copyright 2015 The Rust Project Developers. See the COPYRIGHT
   4 # file at the top-level directory of this distribution and at
   5 # http://rust-lang.org/COPYRIGHT.
   6 #
   7 # Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
   8 # http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
   9 # <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
  10 # option. This file may not be copied, modified, or distributed
  11 # except according to those terms.
  12
  13 # This script uses the following Unicode tables:
  14 # - NormalizationTest.txt
  15 #
  16 # Since this should not require frequent updates, we just store this
  17 # out-of-line and check the unicode.rs file into git.
  18
  19 import unicode, re, os, fileinput
  20
  21 def load_test_data(f):
  22     outls = []
  23     testRe = re.compile("^(.*?);(.*?);(.*?);(.*?);(.*?);\s+#.*$")
  24
  25     unicode.fetch(f)
  26     for line in fileinput.input(os.path.basename(f)):
  27         # comment and header lines start with # and @ respectively
  28         if len(line) < 1 or line[0:1] == '#' or line[0:1] == '@':
  29             continue
  30
  31         m = testRe.match(line)
  32         groups = []
  33         if not m:
  34             print "error: no match on line where test was expected: %s" % line
  35             continue
  36
  37         has_surrogates = False
  38         for i in range(1, 6):
  39             group = []
  40             chs = m.group(i).split()
  41             for ch in chs:
  42                 intch = int(ch,16)
  43                 if unicode.is_surrogate(intch):
  44                     has_surrogates = True
  45                     break
  46                 group.append(intch)
  47
  48             if has_surrogates:
  49                 break
  50             groups.append(group)
  51
  52         if has_surrogates:
  53             continue
  54         outls.append(groups)
  55
  56     return outls
  57
  58 def showfun(gs):
  59     outstr = '('
  60     gfirst = True
  61     for g in gs:
  62         if not gfirst:
  63             outstr += ','
  64         gfirst = False
  65
  66         outstr += '"'
  67         for ch in g:
  68             outstr += "\\u{%x}" % ch
  69         outstr += '"'
  70     outstr += ')'
  71     return outstr
  72
  73 if __name__ == "__main__":
  74     d = load_test_data("NormalizationTest.txt")
  75     ntype = "&'static [(&'static str, &'static str, &'static str, &'static str, &'static str)]"
  76     with open("testdata.rs", "w") as nf:
  77         nf.write(unicode.preamble)
  78         nf.write("\n")
  79         nf.write("    // official Unicode test data\n")
  80         nf.write("    // http://www.unicode.org/Public/UNIDATA/NormalizationTest.txt\n")
  81         unicode.emit_table(nf, "TEST_NORM", d, ntype, True, showfun)