]> git.proxmox.com Git - rustc.git/blob - src/etc/char_private.py
New upstream version 1.15.0+dfsg1
[rustc.git] / src / etc / char_private.py
1 #!/usr/bin/env python
2 #
3 # Copyright 2011-2016 The Rust Project Developers. See the COPYRIGHT
4 # file at the top-level directory of this distribution and at
5 # http://rust-lang.org/COPYRIGHT.
6 #
7 # Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
8 # http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
9 # <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
10 # option. This file may not be copied, modified, or distributed
11 # except according to those terms.
12
13 # This script uses the following Unicode tables:
14 # - UnicodeData.txt
15
16
17 from collections import namedtuple
18 import csv
19 import os
20 import subprocess
21
22 NUM_CODEPOINTS=0x110000
23
24 def to_ranges(iter):
25 current = None
26 for i in iter:
27 if current is None or i != current[1] or i in (0x10000, 0x20000):
28 if current is not None:
29 yield tuple(current)
30 current = [i, i + 1]
31 else:
32 current[1] += 1
33 if current is not None:
34 yield tuple(current)
35
36 def get_escaped(codepoints):
37 for c in codepoints:
38 if (c.class_ or "Cn") in "Cc Cf Cs Co Cn Zl Zp Zs".split() and c.value != ord(' '):
39 yield c.value
40
41 def get_file(f):
42 try:
43 return open(os.path.basename(f))
44 except FileNotFoundError:
45 subprocess.run(["curl", "-O", f], check=True)
46 return open(os.path.basename(f))
47
48 Codepoint = namedtuple('Codepoint', 'value class_')
49
50 def get_codepoints(f):
51 r = csv.reader(f, delimiter=";")
52 prev_codepoint = 0
53 class_first = None
54 for row in r:
55 codepoint = int(row[0], 16)
56 name = row[1]
57 class_ = row[2]
58
59 if class_first is not None:
60 if not name.endswith("Last>"):
61 raise ValueError("Missing Last after First")
62
63 for c in range(prev_codepoint + 1, codepoint):
64 yield Codepoint(c, class_first)
65
66 class_first = None
67 if name.endswith("First>"):
68 class_first = class_
69
70 yield Codepoint(codepoint, class_)
71 prev_codepoint = codepoint
72
73 if class_first != None:
74 raise ValueError("Missing Last after First")
75
76 for c in range(prev_codepoint + 1, NUM_CODEPOINTS):
77 yield Codepoint(c, None)
78
79 def main():
80 file = get_file("http://www.unicode.org/Public/UNIDATA/UnicodeData.txt")
81
82 codepoints = get_codepoints(file)
83
84 CUTOFF=0x10000
85 singletons0 = []
86 singletons1 = []
87 normal0 = []
88 normal1 = []
89 extra = []
90
91 for a, b in to_ranges(get_escaped(codepoints)):
92 if a > 2 * CUTOFF:
93 extra.append((a, b - a))
94 elif a == b - 1:
95 if a & CUTOFF:
96 singletons1.append(a & ~CUTOFF)
97 else:
98 singletons0.append(a)
99 elif a == b - 2:
100 if a & CUTOFF:
101 singletons1.append(a & ~CUTOFF)
102 singletons1.append((a + 1) & ~CUTOFF)
103 else:
104 singletons0.append(a)
105 singletons0.append(a + 1)
106 else:
107 if a >= 2 * CUTOFF:
108 extra.append((a, b - a))
109 elif a & CUTOFF:
110 normal1.append((a & ~CUTOFF, b - a))
111 else:
112 normal0.append((a, b - a))
113
114 print("""\
115 // Copyright 2012-2016 The Rust Project Developers. See the COPYRIGHT
116 // file at the top-level directory of this distribution and at
117 // http://rust-lang.org/COPYRIGHT.
118 //
119 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
120 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
121 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
122 // option. This file may not be copied, modified, or distributed
123 // except according to those terms.
124
125 // NOTE: The following code was generated by "src/etc/char_private.py",
126 // do not edit directly!
127
128 use slice::SliceExt;
129
130 fn check(x: u16, singletons: &[u16], normal: &[u16]) -> bool {
131 for &s in singletons {
132 if x == s {
133 return false;
134 } else if x < s {
135 break;
136 }
137 }
138 for w in normal.chunks(2) {
139 let start = w[0];
140 let len = w[1];
141 let difference = (x as i32) - (start as i32);
142 if 0 <= difference {
143 if difference < len as i32 {
144 return false;
145 }
146 } else {
147 break;
148 }
149 }
150 true
151 }
152
153 pub fn is_printable(x: char) -> bool {
154 let x = x as u32;
155 let lower = x as u16;
156 if x < 0x10000 {
157 check(lower, SINGLETONS0, NORMAL0)
158 } else if x < 0x20000 {
159 check(lower, SINGLETONS1, NORMAL1)
160 } else {\
161 """)
162 for a, b in extra:
163 print(" if 0x{:x} <= x && x < 0x{:x} {{".format(a, a + b))
164 print(" return false;")
165 print(" }")
166 print("""\
167 true
168 }
169 }\
170 """)
171 print()
172 print("const SINGLETONS0: &'static [u16] = &[")
173 for s in singletons0:
174 print(" 0x{:x},".format(s))
175 print("];")
176 print("const SINGLETONS1: &'static [u16] = &[")
177 for s in singletons1:
178 print(" 0x{:x},".format(s))
179 print("];")
180 print("const NORMAL0: &'static [u16] = &[")
181 for a, b in normal0:
182 print(" 0x{:x}, 0x{:x},".format(a, b))
183 print("];")
184 print("const NORMAL1: &'static [u16] = &[")
185 for a, b in normal1:
186 print(" 0x{:x}, 0x{:x},".format(a, b))
187 print("];")
188
189 if __name__ == '__main__':
190 main()