]>
git.proxmox.com Git - rustc.git/blob - src/etc/char_private.py
3 # Copyright 2011-2016 The Rust Project Developers. See the COPYRIGHT
4 # file at the top-level directory of this distribution and at
5 # http://rust-lang.org/COPYRIGHT.
7 # Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
8 # http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
9 # <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
10 # option. This file may not be copied, modified, or distributed
11 # except according to those terms.
13 # This script uses the following Unicode tables:
17 from collections
import namedtuple
22 NUM_CODEPOINTS
=0x110000
27 if current
is None or i
!= current
[1] or i
in (0x10000, 0x20000):
28 if current
is not None:
33 if current
is not None:
36 def get_escaped(codepoints
):
38 if (c
.class_
or "Cn") in "Cc Cf Cs Co Cn Zl Zp Zs".split() and c
.value
!= ord(' '):
43 return open(os
.path
.basename(f
))
44 except FileNotFoundError
:
45 subprocess
.run(["curl", "-O", f
], check
=True)
46 return open(os
.path
.basename(f
))
48 Codepoint
= namedtuple('Codepoint', 'value class_')
50 def get_codepoints(f
):
51 r
= csv
.reader(f
, delimiter
=";")
55 codepoint
= int(row
[0], 16)
59 if class_first
is not None:
60 if not name
.endswith("Last>"):
61 raise ValueError("Missing Last after First")
63 for c
in range(prev_codepoint
+ 1, codepoint
):
64 yield Codepoint(c
, class_first
)
67 if name
.endswith("First>"):
70 yield Codepoint(codepoint
, class_
)
71 prev_codepoint
= codepoint
73 if class_first
!= None:
74 raise ValueError("Missing Last after First")
76 for c
in range(prev_codepoint
+ 1, NUM_CODEPOINTS
):
77 yield Codepoint(c
, None)
79 def compress_singletons(singletons
):
80 uppers
= [] # (upper, # items in lowers)
86 if len(uppers
) == 0 or uppers
[-1][0] != upper
:
87 uppers
.append((upper
, 1))
89 upper
, count
= uppers
[-1]
90 uppers
[-1] = upper
, count
+ 1
95 def compress_normal(normal
):
96 # lengths 0x00..0x7f are encoded as 00, 01, ..., 7e, 7f
97 # lengths 0x80..0x7fff are encoded as 80 80, 80 81, ..., ff fe, ff ff
98 compressed
= [] # [truelen, (truelenaux), falselen, (falselenaux)]
101 for start
, count
in normal
:
102 truelen
= start
- prev_start
104 prev_start
= start
+ count
106 assert truelen
< 0x8000 and falselen
< 0x8000
109 entry
.append(0x80 |
(truelen
>> 8))
110 entry
.append(truelen
& 0xff)
112 entry
.append(truelen
& 0x7f)
114 entry
.append(0x80 |
(falselen
>> 8))
115 entry
.append(falselen
& 0xff)
117 entry
.append(falselen
& 0x7f)
119 compressed
.append(entry
)
123 def print_singletons(uppers
, lowers
, uppersname
, lowersname
):
124 print("const {}: &'static [(u8, u8)] = &[".format(uppersname
))
126 print(" ({:#04x}, {}),".format(u
, c
))
128 print("const {}: &'static [u8] = &[".format(lowersname
))
129 for i
in range(0, len(lowers
), 8):
130 print(" {}".format(" ".join("{:#04x},".format(l
) for l
in lowers
[i
:i
+8])))
133 def print_normal(normal
, normalname
):
134 print("const {}: &'static [u8] = &[".format(normalname
))
136 print(" {}".format(" ".join("{:#04x},".format(i
) for i
in v
)))
140 file = get_file("http://www.unicode.org/Public/UNIDATA/UnicodeData.txt")
142 codepoints
= get_codepoints(file)
151 for a
, b
in to_ranges(get_escaped(codepoints
)):
153 extra
.append((a
, b
- a
))
156 singletons1
.append(a
& ~CUTOFF
)
158 singletons0
.append(a
)
161 singletons1
.append(a
& ~CUTOFF
)
162 singletons1
.append((a
+ 1) & ~CUTOFF
)
164 singletons0
.append(a
)
165 singletons0
.append(a
+ 1)
168 extra
.append((a
, b
- a
))
170 normal1
.append((a
& ~CUTOFF
, b
- a
))
172 normal0
.append((a
, b
- a
))
174 singletons0u
, singletons0l
= compress_singletons(singletons0
)
175 singletons1u
, singletons1l
= compress_singletons(singletons1
)
176 normal0
= compress_normal(normal0
)
177 normal1
= compress_normal(normal1
)
180 // Copyright 2012-2017 The Rust Project Developers. See the COPYRIGHT
181 // file at the top-level directory of this distribution and at
182 // http://rust-lang.org/COPYRIGHT.
184 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
185 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
186 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
187 // option. This file may not be copied, modified, or distributed
188 // except according to those terms.
190 // NOTE: The following code was generated by "src/etc/char_private.py",
191 // do not edit directly!
193 fn check(x: u16, singletonuppers: &[(u8, u8)], singletonlowers: &[u8],
194 normal: &[u8]) -> bool {
195 let xupper = (x >> 8) as u8;
196 let mut lowerstart = 0;
197 for &(upper, lowercount) in singletonuppers {
198 let lowerend = lowerstart + lowercount as usize;
200 for &lower in &singletonlowers[lowerstart..lowerend] {
201 if lower == x as u8 {
205 } else if xupper < upper {
208 lowerstart = lowerend;
211 let mut x = x as i32;
212 let mut normal = normal.iter().cloned();
213 let mut current = true;
214 while let Some(v) = normal.next() {
215 let len = if v & 0x80 != 0 {
216 ((v & 0x7f) as i32) << 8 | normal.next().unwrap() as i32
229 pub(crate) fn is_printable(x: char) -> bool {
231 let lower = x as u16;
233 check(lower, SINGLETONS0U, SINGLETONS0L, NORMAL0)
234 } else if x < 0x20000 {
235 check(lower, SINGLETONS1U, SINGLETONS1L, NORMAL1)
239 print(" if 0x{:x} <= x && x < 0x{:x} {{".format(a
, a
+ b
))
240 print(" return false;")
248 print_singletons(singletons0u
, singletons0l
, 'SINGLETONS0U', 'SINGLETONS0L')
249 print_singletons(singletons1u
, singletons1l
, 'SINGLETONS1U', 'SINGLETONS1L')
250 print_normal(normal0
, 'NORMAL0')
251 print_normal(normal1
, 'NORMAL1')
253 if __name__
== '__main__':