]>
git.proxmox.com Git - rustc.git/blob - src/tools/unicode-table-generator/src/cascading_map.rs
2 use crate::raw_emitter
::RawEmitter
;
3 use std
::collections
::HashMap
;
4 use std
::fmt
::Write
as _
;
8 pub fn emit_cascading_map(&mut self, ranges
: &[Range
<u32>]) -> bool
{
9 let mut map
: [u8; 256] = [
10 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
11 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
14 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
15 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
16 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
18 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
23 .flat_map(|r
| (r
.start
..r
.end
).into_iter().collect
::<Vec
<u32>>())
24 .collect
::<Vec
<u32>>();
26 println
!("there are {} points", points
.len());
28 // how many distinct ranges need to be counted?
29 let mut codepoints_by_high_bytes
= HashMap
::<usize, Vec
<u32>>::new();
31 // assert that there is no whitespace over the 0x3000 range.
32 assert
!(point
<= 0x3000, "the highest unicode whitespace value has changed");
33 let high_bytes
= point
as usize >> 8;
34 let codepoints
= codepoints_by_high_bytes
.entry(high_bytes
).or_insert_with(Vec
::new
);
35 codepoints
.push(point
);
38 let mut bit_for_high_byte
= 1u8;
39 let mut arms
= Vec
::<String
>::new();
41 let mut high_bytes
: Vec
<usize> =
42 codepoints_by_high_bytes
.keys().map(|k
| k
.clone()).collect();
44 for high_byte
in high_bytes
{
45 let codepoints
= codepoints_by_high_bytes
.get_mut(&high_byte
).unwrap();
46 if codepoints
.len() == 1 {
47 let ch
= codepoints
.pop().unwrap();
48 arms
.push(format
!("{} => c as u32 == {:#04x}", high_byte
, ch
));
51 // more than 1 codepoint in this arm
52 for codepoint
in codepoints
{
53 map
[(*codepoint
& 0xff) as usize] |= bit_for_high_byte
;
56 "{} => WHITESPACE_MAP[c as usize & 0xff] & {} != 0",
57 high_byte
, bit_for_high_byte
59 bit_for_high_byte
<<= 1;
62 writeln
!(&mut self.file
, "static WHITESPACE_MAP: [u8; 256] = [{}];", fmt_list(map
.iter()))
64 self.bytes_used
+= 256;
66 writeln
!(&mut self.file
, "#[inline]").unwrap();
67 writeln
!(&mut self.file
, "pub fn lookup(c: char) -> bool {{").unwrap();
68 writeln
!(&mut self.file
, " match c as u32 >> 8 {{").unwrap();
70 writeln
!(&mut self.file
, " {},", arm
).unwrap();
72 writeln
!(&mut self.file
, " _ => false,").unwrap();
73 writeln
!(&mut self.file
, " }}").unwrap();
74 writeln
!(&mut self.file
, "}}").unwrap();