]>
git.proxmox.com Git - rustc.git/blob - src/libcore/unicode/bool_trie.rs
1 /// BoolTrie is a trie for representing a set of Unicode codepoints. It is
2 /// implemented with postfix compression (sharing of identical child nodes),
3 /// which gives both compact size and fast lookup.
5 /// The space of Unicode codepoints is divided into 3 subareas, each
6 /// represented by a trie with different depth. In the first (0..0x800), there
7 /// is no trie structure at all; each u64 entry corresponds to a bitvector
8 /// effectively holding 64 bool values.
10 /// In the second (0x800..0x10000), each child of the root node represents a
11 /// 64-wide subrange, but instead of storing the full 64-bit value of the leaf,
12 /// the trie stores an 8-bit index into a shared table of leaf values. This
13 /// exploits the fact that in reasonable sets, many such leaves can be shared.
15 /// In the third (0x10000..0x110000), each child of the root node represents a
16 /// 4096-wide subrange, and the trie stores an 8-bit index into a 64-byte slice
17 /// of a child tree. Each of these 64 bytes represents an index into the table
18 /// of shared 64-bit leaf values. This exploits the sparse structure in the
19 /// non-BMP range of most Unicode sets.
21 // 0..0x800 (corresponding to 1 and 2 byte utf-8 sequences)
22 pub r1
: [u64; 32], // leaves
24 // 0x800..0x10000 (corresponding to 3 byte utf-8 sequences)
25 pub r2
: [u8; 992], // first level
26 pub r3
: &'
static [u64], // leaves
28 // 0x10000..0x110000 (corresponding to 4 byte utf-8 sequences)
29 pub r4
: [u8; 256], // first level
30 pub r5
: &'
static [u8], // second level
31 pub r6
: &'
static [u64], // leaves
34 pub fn lookup(&self, c
: char) -> bool
{
37 trie_range_leaf(c
, self.r1
[(c
>> 6) as usize])
38 } else if c
< 0x10000 {
39 let child
= self.r2
[(c
>> 6) as usize - 0x20];
40 trie_range_leaf(c
, self.r3
[child
as usize])
42 let child
= self.r4
[(c
>> 12) as usize - 0x10];
43 let leaf
= self.r5
[((child
as usize) << 6) + ((c
>> 6) as usize & 0x3f)];
44 trie_range_leaf(c
, self.r6
[leaf
as usize])
49 pub struct SmallBoolTrie
{
50 pub(crate) r1
: &'
static [u8], // first level
51 pub(crate) r2
: &'
static [u64], // leaves
55 pub fn lookup(&self, c
: char) -> bool
{
57 match self.r1
.get((c
>> 6) as usize) {
58 Some(&child
) => trie_range_leaf(c
, self.r2
[child
as usize]),
64 fn trie_range_leaf(c
: u32, bitmap_chunk
: u64) -> bool
{
65 ((bitmap_chunk
>> (c
& 63)) & 1) != 0