]> git.proxmox.com Git - rustc.git/blame - src/libcore/unicode/bool_trie.rs
New upstream version 1.41.1+dfsg1
[rustc.git] / src / libcore / unicode / bool_trie.rs
CommitLineData
2c00a5a8
XL
1/// BoolTrie is a trie for representing a set of Unicode codepoints. It is
2/// implemented with postfix compression (sharing of identical child nodes),
3/// which gives both compact size and fast lookup.
4///
5/// The space of Unicode codepoints is divided into 3 subareas, each
6/// represented by a trie with different depth. In the first (0..0x800), there
7/// is no trie structure at all; each u64 entry corresponds to a bitvector
8/// effectively holding 64 bool values.
9///
10/// In the second (0x800..0x10000), each child of the root node represents a
11/// 64-wide subrange, but instead of storing the full 64-bit value of the leaf,
12/// the trie stores an 8-bit index into a shared table of leaf values. This
13/// exploits the fact that in reasonable sets, many such leaves can be shared.
14///
15/// In the third (0x10000..0x110000), each child of the root node represents a
16/// 4096-wide subrange, and the trie stores an 8-bit index into a 64-byte slice
17/// of a child tree. Each of these 64 bytes represents an index into the table
18/// of shared 64-bit leaf values. This exploits the sparse structure in the
19/// non-BMP range of most Unicode sets.
20pub struct BoolTrie {
21 // 0..0x800 (corresponding to 1 and 2 byte utf-8 sequences)
60c5eb7d 22 pub r1: [u64; 32], // leaves
2c00a5a8
XL
23
24 // 0x800..0x10000 (corresponding to 3 byte utf-8 sequences)
25 pub r2: [u8; 992], // first level
60c5eb7d 26 pub r3: &'static [u64], // leaves
2c00a5a8
XL
27
28 // 0x10000..0x110000 (corresponding to 4 byte utf-8 sequences)
60c5eb7d
XL
29 pub r4: [u8; 256], // first level
30 pub r5: &'static [u8], // second level
31 pub r6: &'static [u64], // leaves
2c00a5a8
XL
32}
33impl BoolTrie {
34 pub fn lookup(&self, c: char) -> bool {
83c7162d 35 let c = c as u32;
2c00a5a8 36 if c < 0x800 {
83c7162d 37 trie_range_leaf(c, self.r1[(c >> 6) as usize])
2c00a5a8 38 } else if c < 0x10000 {
83c7162d 39 let child = self.r2[(c >> 6) as usize - 0x20];
2c00a5a8
XL
40 trie_range_leaf(c, self.r3[child as usize])
41 } else {
83c7162d
XL
42 let child = self.r4[(c >> 12) as usize - 0x10];
43 let leaf = self.r5[((child as usize) << 6) + ((c >> 6) as usize & 0x3f)];
2c00a5a8
XL
44 trie_range_leaf(c, self.r6[leaf as usize])
45 }
46 }
47}
48
49pub struct SmallBoolTrie {
50 pub(crate) r1: &'static [u8], // first level
60c5eb7d 51 pub(crate) r2: &'static [u64], // leaves
2c00a5a8
XL
52}
53
54impl SmallBoolTrie {
55 pub fn lookup(&self, c: char) -> bool {
83c7162d
XL
56 let c = c as u32;
57 match self.r1.get((c >> 6) as usize) {
2c00a5a8
XL
58 Some(&child) => trie_range_leaf(c, self.r2[child as usize]),
59 None => false,
60 }
61 }
62}
63
83c7162d 64fn trie_range_leaf(c: u32, bitmap_chunk: u64) -> bool {
2c00a5a8
XL
65 ((bitmap_chunk >> (c & 63)) & 1) != 0
66}