]> git.proxmox.com Git - rustc.git/blob - src/tools/unicode-table-generator/src/range_search.rs
New upstream version 1.65.0+dfsg1
[rustc.git] / src / tools / unicode-table-generator / src / range_search.rs
1 #[rustc_const_unstable(feature = "const_unicode_case_lookup", issue = "101400")]
2 #[inline(always)]
3 const fn bitset_search<
4 const N: usize,
5 const CHUNK_SIZE: usize,
6 const N1: usize,
7 const CANONICAL: usize,
8 const CANONICALIZED: usize,
9 >(
10 needle: u32,
11 chunk_idx_map: &[u8; N],
12 bitset_chunk_idx: &[[u8; CHUNK_SIZE]; N1],
13 bitset_canonical: &[u64; CANONICAL],
14 bitset_canonicalized: &[(u8, u8); CANONICALIZED],
15 ) -> bool {
16 let bucket_idx = (needle / 64) as usize;
17 let chunk_map_idx = bucket_idx / CHUNK_SIZE;
18 let chunk_piece = bucket_idx % CHUNK_SIZE;
19 // FIXME: const-hack: Revert to `slice::get` after `const_slice_index`
20 // feature stabilizes.
21 let chunk_idx = if chunk_map_idx < chunk_idx_map.len() {
22 chunk_idx_map[chunk_map_idx]
23 } else {
24 return false;
25 };
26 let idx = bitset_chunk_idx[chunk_idx as usize][chunk_piece] as usize;
27 // FIXME: const-hack: Revert to `slice::get` after `const_slice_index`
28 // feature stabilizes.
29 let word = if idx < bitset_canonical.len() {
30 bitset_canonical[idx]
31 } else {
32 let (real_idx, mapping) = bitset_canonicalized[idx - bitset_canonical.len()];
33 let mut word = bitset_canonical[real_idx as usize];
34 let should_invert = mapping & (1 << 6) != 0;
35 if should_invert {
36 word = !word;
37 }
38 // Lower 6 bits
39 let quantity = mapping & ((1 << 6) - 1);
40 if mapping & (1 << 7) != 0 {
41 // shift
42 word >>= quantity as u64;
43 } else {
44 word = word.rotate_left(quantity as u32);
45 }
46 word
47 };
48 (word & (1 << (needle % 64) as u64)) != 0
49 }
50
51 fn decode_prefix_sum(short_offset_run_header: u32) -> u32 {
52 short_offset_run_header & ((1 << 21) - 1)
53 }
54
55 fn decode_length(short_offset_run_header: u32) -> usize {
56 (short_offset_run_header >> 21) as usize
57 }
58
59 #[inline(always)]
60 fn skip_search<const SOR: usize, const OFFSETS: usize>(
61 needle: u32,
62 short_offset_runs: &[u32; SOR],
63 offsets: &[u8; OFFSETS],
64 ) -> bool {
65 // Note that this *cannot* be past the end of the array, as the last
66 // element is greater than std::char::MAX (the largest possible needle).
67 //
68 // So, we cannot have found it (i.e. Ok(idx) + 1 != length) and the correct
69 // location cannot be past it, so Err(idx) != length either.
70 //
71 // This means that we can avoid bounds checking for the accesses below, too.
72 let last_idx =
73 match short_offset_runs.binary_search_by_key(&(needle << 11), |header| header << 11) {
74 Ok(idx) => idx + 1,
75 Err(idx) => idx,
76 };
77
78 let mut offset_idx = decode_length(short_offset_runs[last_idx]);
79 let length = if let Some(next) = short_offset_runs.get(last_idx + 1) {
80 decode_length(*next) - offset_idx
81 } else {
82 offsets.len() - offset_idx
83 };
84 let prev =
85 last_idx.checked_sub(1).map(|prev| decode_prefix_sum(short_offset_runs[prev])).unwrap_or(0);
86
87 let total = needle - prev;
88 let mut prefix_sum = 0;
89 for _ in 0..(length - 1) {
90 let offset = offsets[offset_idx];
91 prefix_sum += offset as u32;
92 if prefix_sum > total {
93 break;
94 }
95 offset_idx += 1;
96 }
97 offset_idx % 2 == 1
98 }