]> git.proxmox.com Git - rustc.git/blob - src/tools/unicode-table-generator/src/cascading_map.rs
New upstream version 1.65.0+dfsg1
[rustc.git] / src / tools / unicode-table-generator / src / cascading_map.rs
1 use crate::fmt_list;
2 use crate::raw_emitter::RawEmitter;
3 use std::collections::HashMap;
4 use std::fmt::Write as _;
5 use std::ops::Range;
6
7 impl RawEmitter {
8 pub fn emit_cascading_map(&mut self, ranges: &[Range<u32>]) -> bool {
9 let mut map: [u8; 256] = [
10 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
11 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
14 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
15 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
16 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
18 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
19 ];
20
21 let points = ranges
22 .iter()
23 .flat_map(|r| (r.start..r.end).into_iter().collect::<Vec<u32>>())
24 .collect::<Vec<u32>>();
25
26 println!("there are {} points", points.len());
27
28 // how many distinct ranges need to be counted?
29 let mut codepoints_by_high_bytes = HashMap::<usize, Vec<u32>>::new();
30 for point in points {
31 // assert that there is no whitespace over the 0x3000 range.
32 assert!(point <= 0x3000, "the highest unicode whitespace value has changed");
33 let high_bytes = point as usize >> 8;
34 let codepoints = codepoints_by_high_bytes.entry(high_bytes).or_insert_with(Vec::new);
35 codepoints.push(point);
36 }
37
38 let mut bit_for_high_byte = 1u8;
39 let mut arms = Vec::<String>::new();
40
41 let mut high_bytes: Vec<usize> =
42 codepoints_by_high_bytes.keys().map(|k| k.clone()).collect();
43 high_bytes.sort();
44 for high_byte in high_bytes {
45 let codepoints = codepoints_by_high_bytes.get_mut(&high_byte).unwrap();
46 if codepoints.len() == 1 {
47 let ch = codepoints.pop().unwrap();
48 arms.push(format!("{} => c as u32 == {:#04x}", high_byte, ch));
49 continue;
50 }
51 // more than 1 codepoint in this arm
52 for codepoint in codepoints {
53 map[(*codepoint & 0xff) as usize] |= bit_for_high_byte;
54 }
55 arms.push(format!(
56 "{} => WHITESPACE_MAP[c as usize & 0xff] & {} != 0",
57 high_byte, bit_for_high_byte
58 ));
59 bit_for_high_byte <<= 1;
60 }
61
62 writeln!(&mut self.file, "static WHITESPACE_MAP: [u8; 256] = [{}];", fmt_list(map.iter()))
63 .unwrap();
64 self.bytes_used += 256;
65
66 writeln!(&mut self.file, "#[inline]").unwrap();
67 writeln!(&mut self.file, "pub fn lookup(c: char) -> bool {{").unwrap();
68 writeln!(&mut self.file, " match c as u32 >> 8 {{").unwrap();
69 for arm in arms {
70 writeln!(&mut self.file, " {},", arm).unwrap();
71 }
72 writeln!(&mut self.file, " _ => false,").unwrap();
73 writeln!(&mut self.file, " }}").unwrap();
74 writeln!(&mut self.file, "}}").unwrap();
75
76 true
77 }
78 }