]> git.proxmox.com Git - rustc.git/blob - src/vendor/unicode-normalization/scripts/unicode_gen_normtests.py
New upstream version 1.23.0+dfsg1
[rustc.git] / src / vendor / unicode-normalization / scripts / unicode_gen_normtests.py
1 #!/usr/bin/env python
2 #
3 # Copyright 2015 The Rust Project Developers. See the COPYRIGHT
4 # file at the top-level directory of this distribution and at
5 # http://rust-lang.org/COPYRIGHT.
6 #
7 # Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
8 # http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
9 # <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
10 # option. This file may not be copied, modified, or distributed
11 # except according to those terms.
12
13 # This script uses the following Unicode tables:
14 # - NormalizationTest.txt
15 #
16 # Since this should not require frequent updates, we just store this
17 # out-of-line and check the unicode.rs file into git.
18
19 import unicode, re, os, fileinput
20
21 def load_test_data(f):
22 outls = []
23 testRe = re.compile("^(.*?);(.*?);(.*?);(.*?);(.*?);\s+#.*$")
24
25 unicode.fetch(f)
26 for line in fileinput.input(os.path.basename(f)):
27 # comment and header lines start with # and @ respectively
28 if len(line) < 1 or line[0:1] == '#' or line[0:1] == '@':
29 continue
30
31 m = testRe.match(line)
32 groups = []
33 if not m:
34 print "error: no match on line where test was expected: %s" % line
35 continue
36
37 has_surrogates = False
38 for i in range(1, 6):
39 group = []
40 chs = m.group(i).split()
41 for ch in chs:
42 intch = int(ch,16)
43 if unicode.is_surrogate(intch):
44 has_surrogates = True
45 break
46 group.append(intch)
47
48 if has_surrogates:
49 break
50 groups.append(group)
51
52 if has_surrogates:
53 continue
54 outls.append(groups)
55
56 return outls
57
58 def showfun(gs):
59 outstr = '('
60 gfirst = True
61 for g in gs:
62 if not gfirst:
63 outstr += ','
64 gfirst = False
65
66 outstr += '"'
67 for ch in g:
68 outstr += "\\u{%x}" % ch
69 outstr += '"'
70 outstr += ')'
71 return outstr
72
73 if __name__ == "__main__":
74 d = load_test_data("NormalizationTest.txt")
75 ntype = "&'static [(&'static str, &'static str, &'static str, &'static str, &'static str)]"
76 with open("testdata.rs", "w") as nf:
77 nf.write(unicode.preamble)
78 nf.write("\n")
79 nf.write(" // official Unicode test data\n")
80 nf.write(" // http://www.unicode.org/Public/UNIDATA/NormalizationTest.txt\n")
81 unicode.emit_table(nf, "TEST_NORM", d, ntype, True, showfun)