]>
Commit | Line | Data |
---|---|---|
4fa71e05 DM |
1 | //! Helpers to generate a binary search tree stored in an array from a |
2 | //! sorted array. | |
3 | //! | |
4 | //! Specifically, for any given sorted array 'input' permute the | |
5 | //! array so that the following rule holds: | |
6 | //! | |
7 | //! For each array item with index i, the item at 2i+1 is smaller and | |
8 | //! the item 2i+2 is larger. | |
9 | //! | |
10 | //! This structure permits efficient (meaning: O(log(n)) binary | |
11 | //! searches: start with item i=0 (i.e. the root of the BST), compare | |
12 | //! the value with the searched item, if smaller proceed at item | |
13 | //! 2i+1, if larger proceed at item 2i+2, and repeat, until either | |
14 | //! the item is found, or the indexes grow beyond the array size, | |
15 | //! which means the entry does not exist. | |
16 | //! | |
17 | //! Effectively this implements bisection, but instead of jumping | |
18 | //! around wildly in the array during a single search we only search | |
19 | //! with strictly monotonically increasing indexes. | |
20 | //! | |
21 | //! Algorithm is from casync (camakebst.c), simplified and optimized | |
22 | //! for rust. Permutation function originally by L. Bressel, 2017. We | |
23 | //! pass permutation info to user provided callback, which actually | |
24 | //! implements the data copy. | |
25 | //! | |
26 | //! The Wikipedia Artikel for [Binary | |
27 | //! Heap](https://en.wikipedia.org/wiki/Binary_heap) gives a short | |
28 | //! intro howto store binary trees using an array. | |
48147efd | 29 | |
02491b8f CE |
30 | use std::cmp::Ordering; |
31 | ||
f58f426e | 32 | #[allow(clippy::many_single_char_names)] |
95bd5dfe DM |
33 | fn copy_binary_search_tree_inner<F: FnMut(usize, usize)>( |
34 | copy_func: &mut F, | |
b17d7149 | 35 | // we work on input array input[o..o+n] |
95bd5dfe | 36 | n: usize, |
b17d7149 | 37 | o: usize, |
95bd5dfe DM |
38 | e: usize, |
39 | i: usize, | |
40 | ) { | |
41 | let p = 1 << e; | |
42 | ||
43 | let t = p + (p>>1) - 1; | |
44 | ||
45 | let m = if n > t { | |
46 | // |...........p.............t....n........(2p)| | |
47 | p - 1 | |
48 | } else { | |
49 | // |...........p.....n.......t.............(2p)| | |
50 | p - 1 - (t-n) | |
51 | }; | |
52 | ||
53 | (copy_func)(o+m, i); | |
54 | ||
55 | if m > 0 { | |
0b78833d | 56 | copy_binary_search_tree_inner(copy_func, m, o, e-1, i*2+1); |
95bd5dfe DM |
57 | } |
58 | ||
59 | if (m + 1) < n { | |
60 | copy_binary_search_tree_inner(copy_func, n-m-1, o+m+1, e-1, i*2+2); | |
61 | } | |
62 | } | |
63 | ||
add5861e | 64 | /// This function calls the provided `copy_func()` with the permutation |
4fa71e05 DM |
65 | /// info. |
66 | /// | |
67 | /// ``` | |
7dfa17c7 | 68 | /// # use proxmox_backup::pxar::copy_binary_search_tree; |
4fa71e05 DM |
69 | /// copy_binary_search_tree(5, |src, dest| { |
70 | /// println!("Copy {} to {}", src, dest); | |
71 | /// }); | |
72 | /// ``` | |
73 | /// | |
add5861e | 74 | /// This will produce the following output: |
4fa71e05 | 75 | /// |
6cd28d20 DM |
76 | /// ```no-compile |
77 | /// Copy 3 to 0 | |
78 | /// Copy 1 to 1 | |
79 | /// Copy 0 to 3 | |
80 | /// Copy 2 to 4 | |
81 | /// Copy 4 to 2 | |
82 | /// ``` | |
4fa71e05 | 83 | /// |
add5861e | 84 | /// So this generates the following permutation: `[3,1,4,0,2]`. |
4fa71e05 | 85 | |
95bd5dfe DM |
86 | pub fn copy_binary_search_tree<F: FnMut(usize, usize)>( |
87 | n: usize, | |
88 | mut copy_func: F, | |
89 | ) { | |
90 | if n == 0 { return }; | |
91 | let e = (64 - n.leading_zeros() - 1) as usize; // fast log2(n) | |
0866748d | 92 | |
95bd5dfe DM |
93 | copy_binary_search_tree_inner(&mut copy_func, n, 0, e, 0); |
94 | } | |
0b78833d | 95 | |
02491b8f CE |
96 | |
97 | /// This function searches for the index where the comparison by the provided | |
98 | /// `compare()` function returns `Ordering::Equal`. | |
99 | /// The order of the comparison matters (noncommutative) and should be search | |
100 | /// value compared to value at given index as shown in the examples. | |
101 | /// The parameter `skip_multiples` defines the number of matches to ignore while | |
102 | /// searching before returning the index in order to lookup duplicate entries in | |
103 | /// the tree. | |
104 | /// | |
105 | /// ``` | |
106 | /// # use proxmox_backup::pxar::{copy_binary_search_tree, search_binary_tree_by}; | |
107 | /// let mut vals = vec![0,1,2,2,2,3,4,5,6,6,7,8,8,8]; | |
108 | /// | |
109 | /// let clone = vals.clone(); | |
110 | /// copy_binary_search_tree(vals.len(), |s, d| { | |
111 | /// vals[d] = clone[s]; | |
112 | /// }); | |
113 | /// let should_be = vec![5,2,8,1,3,6,8,0,2,2,4,6,7,8]; | |
114 | /// assert_eq!(vals, should_be); | |
115 | /// | |
116 | /// let find = 8; | |
117 | /// let skip_multiples = 0; | |
118 | /// let idx = search_binary_tree_by(0, vals.len(), skip_multiples, |idx| find.cmp(&vals[idx])); | |
119 | /// assert_eq!(idx, Some(2)); | |
120 | /// | |
121 | /// let find = 8; | |
122 | /// let skip_multiples = 1; | |
123 | /// let idx = search_binary_tree_by(2, vals.len(), skip_multiples, |idx| find.cmp(&vals[idx])); | |
124 | /// assert_eq!(idx, Some(6)); | |
125 | /// | |
126 | /// let find = 8; | |
127 | /// let skip_multiples = 1; | |
128 | /// let idx = search_binary_tree_by(6, vals.len(), skip_multiples, |idx| find.cmp(&vals[idx])); | |
129 | /// assert_eq!(idx, Some(13)); | |
130 | /// | |
131 | /// let find = 5; | |
132 | /// let skip_multiples = 1; | |
133 | /// let idx = search_binary_tree_by(0, vals.len(), skip_multiples, |idx| find.cmp(&vals[idx])); | |
134 | /// assert!(idx.is_none()); | |
ebfb2df2 CE |
135 | /// |
136 | /// let find = 5; | |
137 | /// let skip_multiples = 0; | |
138 | /// // if start index is equal to the array length, `None` is returned. | |
139 | /// let idx = search_binary_tree_by(vals.len(), vals.len(), skip_multiples, |idx| find.cmp(&vals[idx])); | |
140 | /// assert!(idx.is_none()); | |
141 | /// | |
142 | /// let find = 5; | |
143 | /// let skip_multiples = 0; | |
144 | /// // if start index is larger than length, `None` is returned. | |
145 | /// let idx = search_binary_tree_by(vals.len() + 1, vals.len(), skip_multiples, |idx| find.cmp(&vals[idx])); | |
146 | /// assert!(idx.is_none()); | |
02491b8f CE |
147 | /// ``` |
148 | ||
149 | pub fn search_binary_tree_by<F: Copy + Fn(usize) -> Ordering>( | |
150 | start: usize, | |
151 | size: usize, | |
152 | skip_multiples: usize, | |
153 | compare: F | |
154 | ) -> Option<usize> { | |
48f6d677 | 155 | if start >= size { |
02491b8f CE |
156 | return None; |
157 | } | |
158 | ||
159 | let mut skip = skip_multiples; | |
160 | let cmp = compare(start); | |
161 | if cmp == Ordering::Equal { | |
162 | if skip == 0 { | |
163 | // Found matching hash and want this one | |
164 | return Some(start); | |
165 | } | |
166 | // Found matching hash, but we should skip the first `skip_multiple`, | |
167 | // so continue search with reduced skip count. | |
168 | skip -= 1; | |
169 | } | |
170 | ||
171 | if cmp == Ordering::Less || cmp == Ordering::Equal { | |
172 | let res = search_binary_tree_by(2 * start + 1, size, skip, compare); | |
173 | if res.is_some() { | |
174 | return res; | |
175 | } | |
176 | } | |
177 | ||
178 | if cmp == Ordering::Greater || cmp == Ordering::Equal { | |
179 | let res = search_binary_tree_by(2 * start + 2, size, skip, compare); | |
180 | if res.is_some() { | |
181 | return res; | |
182 | } | |
183 | } | |
184 | ||
185 | None | |
186 | } | |
187 | ||
0b78833d DM |
188 | #[test] |
189 | fn test_binary_search_tree() { | |
190 | ||
191 | fn run_test(len: usize) -> Vec<usize> { | |
192 | ||
193 | const MARKER: usize = 0xfffffff; | |
194 | let mut output = vec![]; | |
8f8d5a42 | 195 | for _i in 0..len { output.push(MARKER); } |
0b78833d DM |
196 | copy_binary_search_tree(len, |s, d| { |
197 | assert!(output[d] == MARKER); | |
198 | output[d] = s; | |
199 | }); | |
200 | if len < 32 { println!("GOT:{}:{:?}", len, output); } | |
201 | for i in 0..len { | |
202 | assert!(output[i] != MARKER); | |
203 | } | |
204 | output | |
205 | } | |
206 | ||
207 | assert!(run_test(0).len() == 0); | |
208 | assert!(run_test(1) == [0]); | |
209 | assert!(run_test(2) == [1,0]); | |
210 | assert!(run_test(3) == [1,0,2]); | |
211 | assert!(run_test(4) == [2,1,3,0]); | |
212 | assert!(run_test(5) == [3,1,4,0,2]); | |
213 | assert!(run_test(6) == [3,1,5,0,2,4]); | |
214 | assert!(run_test(7) == [3,1,5,0,2,4,6]); | |
215 | assert!(run_test(8) == [4,2,6,1,3,5,7,0]); | |
216 | assert!(run_test(9) == [5,3,7,1,4,6,8,0,2]); | |
217 | assert!(run_test(10) == [6,3,8,1,5,7,9,0,2,4]); | |
218 | assert!(run_test(11) == [7,3,9,1,5,8,10,0,2,4,6]); | |
219 | assert!(run_test(12) == [7,3,10,1,5,9,11,0,2,4,6,8]); | |
220 | assert!(run_test(13) == [7,3,11,1,5,9,12,0,2,4,6,8,10]); | |
221 | assert!(run_test(14) == [7,3,11,1,5,9,13,0,2,4,6,8,10,12]); | |
222 | assert!(run_test(15) == [7,3,11,1,5,9,13,0,2,4,6,8,10,12,14]); | |
223 | assert!(run_test(16) == [8,4,12,2,6,10,14,1,3,5,7,9,11,13,15,0]); | |
224 | assert!(run_test(17) == [9,5,13,3,7,11,15,1,4,6,8,10,12,14,16,0,2]); | |
225 | ||
226 | for len in 18..1000 { | |
227 | run_test(len); | |
228 | } | |
229 | } |