]> git.proxmox.com Git - proxmox-backup.git/blob - src/pxar/binary_search_tree.rs
63a1d3fa43f068d4a18cfb5cc2a1fd87c7df3dc2
[proxmox-backup.git] / src / pxar / binary_search_tree.rs
1 //! Helpers to generate a binary search tree stored in an array from a
2 //! sorted array.
3 //!
4 //! Specifically, for any given sorted array 'input' permute the
5 //! array so that the following rule holds:
6 //!
7 //! For each array item with index i, the item at 2i+1 is smaller and
8 //! the item 2i+2 is larger.
9 //!
10 //! This structure permits efficient (meaning: O(log(n)) binary
11 //! searches: start with item i=0 (i.e. the root of the BST), compare
12 //! the value with the searched item, if smaller proceed at item
13 //! 2i+1, if larger proceed at item 2i+2, and repeat, until either
14 //! the item is found, or the indexes grow beyond the array size,
15 //! which means the entry does not exist.
16 //!
17 //! Effectively this implements bisection, but instead of jumping
18 //! around wildly in the array during a single search we only search
19 //! with strictly monotonically increasing indexes.
20 //!
21 //! Algorithm is from casync (camakebst.c), simplified and optimized
22 //! for rust. Permutation function originally by L. Bressel, 2017. We
23 //! pass permutation info to user provided callback, which actually
24 //! implements the data copy.
25 //!
26 //! The Wikipedia Artikel for [Binary
27 //! Heap](https://en.wikipedia.org/wiki/Binary_heap) gives a short
28 //! intro howto store binary trees using an array.
29
30 use std::cmp::Ordering;
31
32 #[allow(clippy::many_single_char_names)]
33 fn copy_binary_search_tree_inner<F: FnMut(usize, usize)>(
34 copy_func: &mut F,
35 // we work on input array input[o..o+n]
36 n: usize,
37 o: usize,
38 e: usize,
39 i: usize,
40 ) {
41 let p = 1 << e;
42
43 let t = p + (p>>1) - 1;
44
45 let m = if n > t {
46 // |...........p.............t....n........(2p)|
47 p - 1
48 } else {
49 // |...........p.....n.......t.............(2p)|
50 p - 1 - (t-n)
51 };
52
53 (copy_func)(o+m, i);
54
55 if m > 0 {
56 copy_binary_search_tree_inner(copy_func, m, o, e-1, i*2+1);
57 }
58
59 if (m + 1) < n {
60 copy_binary_search_tree_inner(copy_func, n-m-1, o+m+1, e-1, i*2+2);
61 }
62 }
63
64 /// This function calls the provided `copy_func()` with the permutation
65 /// info.
66 ///
67 /// ```
68 /// # use proxmox_backup::pxar::copy_binary_search_tree;
69 /// copy_binary_search_tree(5, |src, dest| {
70 /// println!("Copy {} to {}", src, dest);
71 /// });
72 /// ```
73 ///
74 /// This will produce the following output:
75 ///
76 /// ```no-compile
77 /// Copy 3 to 0
78 /// Copy 1 to 1
79 /// Copy 0 to 3
80 /// Copy 2 to 4
81 /// Copy 4 to 2
82 /// ```
83 ///
84 /// So this generates the following permutation: `[3,1,4,0,2]`.
85
86 pub fn copy_binary_search_tree<F: FnMut(usize, usize)>(
87 n: usize,
88 mut copy_func: F,
89 ) {
90 if n == 0 { return };
91 let e = (64 - n.leading_zeros() - 1) as usize; // fast log2(n)
92
93 copy_binary_search_tree_inner(&mut copy_func, n, 0, e, 0);
94 }
95
96
97 /// This function searches for the index where the comparison by the provided
98 /// `compare()` function returns `Ordering::Equal`.
99 /// The order of the comparison matters (noncommutative) and should be search
100 /// value compared to value at given index as shown in the examples.
101 /// The parameter `skip_multiples` defines the number of matches to ignore while
102 /// searching before returning the index in order to lookup duplicate entries in
103 /// the tree.
104 ///
105 /// ```
106 /// # use proxmox_backup::pxar::{copy_binary_search_tree, search_binary_tree_by};
107 /// let mut vals = vec![0,1,2,2,2,3,4,5,6,6,7,8,8,8];
108 ///
109 /// let clone = vals.clone();
110 /// copy_binary_search_tree(vals.len(), |s, d| {
111 /// vals[d] = clone[s];
112 /// });
113 /// let should_be = vec![5,2,8,1,3,6,8,0,2,2,4,6,7,8];
114 /// assert_eq!(vals, should_be);
115 ///
116 /// let find = 8;
117 /// let skip_multiples = 0;
118 /// let idx = search_binary_tree_by(0, vals.len(), skip_multiples, |idx| find.cmp(&vals[idx]));
119 /// assert_eq!(idx, Some(2));
120 ///
121 /// let find = 8;
122 /// let skip_multiples = 1;
123 /// let idx = search_binary_tree_by(2, vals.len(), skip_multiples, |idx| find.cmp(&vals[idx]));
124 /// assert_eq!(idx, Some(6));
125 ///
126 /// let find = 8;
127 /// let skip_multiples = 1;
128 /// let idx = search_binary_tree_by(6, vals.len(), skip_multiples, |idx| find.cmp(&vals[idx]));
129 /// assert_eq!(idx, Some(13));
130 ///
131 /// let find = 5;
132 /// let skip_multiples = 1;
133 /// let idx = search_binary_tree_by(0, vals.len(), skip_multiples, |idx| find.cmp(&vals[idx]));
134 /// assert!(idx.is_none());
135 ///
136 /// let find = 5;
137 /// let skip_multiples = 0;
138 /// // if start index is equal to the array length, `None` is returned.
139 /// let idx = search_binary_tree_by(vals.len(), vals.len(), skip_multiples, |idx| find.cmp(&vals[idx]));
140 /// assert!(idx.is_none());
141 ///
142 /// let find = 5;
143 /// let skip_multiples = 0;
144 /// // if start index is larger than length, `None` is returned.
145 /// let idx = search_binary_tree_by(vals.len() + 1, vals.len(), skip_multiples, |idx| find.cmp(&vals[idx]));
146 /// assert!(idx.is_none());
147 /// ```
148
149 pub fn search_binary_tree_by<F: Copy + Fn(usize) -> Ordering>(
150 start: usize,
151 size: usize,
152 skip_multiples: usize,
153 compare: F
154 ) -> Option<usize> {
155 if start >= size {
156 return None;
157 }
158
159 let mut skip = skip_multiples;
160 let cmp = compare(start);
161 if cmp == Ordering::Equal {
162 if skip == 0 {
163 // Found matching hash and want this one
164 return Some(start);
165 }
166 // Found matching hash, but we should skip the first `skip_multiple`,
167 // so continue search with reduced skip count.
168 skip -= 1;
169 }
170
171 if cmp == Ordering::Less || cmp == Ordering::Equal {
172 let res = search_binary_tree_by(2 * start + 1, size, skip, compare);
173 if res.is_some() {
174 return res;
175 }
176 }
177
178 if cmp == Ordering::Greater || cmp == Ordering::Equal {
179 let res = search_binary_tree_by(2 * start + 2, size, skip, compare);
180 if res.is_some() {
181 return res;
182 }
183 }
184
185 None
186 }
187
188 #[test]
189 fn test_binary_search_tree() {
190
191 fn run_test(len: usize) -> Vec<usize> {
192
193 const MARKER: usize = 0xfffffff;
194 let mut output = vec![];
195 for _i in 0..len { output.push(MARKER); }
196 copy_binary_search_tree(len, |s, d| {
197 assert!(output[d] == MARKER);
198 output[d] = s;
199 });
200 if len < 32 { println!("GOT:{}:{:?}", len, output); }
201 for i in 0..len {
202 assert!(output[i] != MARKER);
203 }
204 output
205 }
206
207 assert!(run_test(0).len() == 0);
208 assert!(run_test(1) == [0]);
209 assert!(run_test(2) == [1,0]);
210 assert!(run_test(3) == [1,0,2]);
211 assert!(run_test(4) == [2,1,3,0]);
212 assert!(run_test(5) == [3,1,4,0,2]);
213 assert!(run_test(6) == [3,1,5,0,2,4]);
214 assert!(run_test(7) == [3,1,5,0,2,4,6]);
215 assert!(run_test(8) == [4,2,6,1,3,5,7,0]);
216 assert!(run_test(9) == [5,3,7,1,4,6,8,0,2]);
217 assert!(run_test(10) == [6,3,8,1,5,7,9,0,2,4]);
218 assert!(run_test(11) == [7,3,9,1,5,8,10,0,2,4,6]);
219 assert!(run_test(12) == [7,3,10,1,5,9,11,0,2,4,6,8]);
220 assert!(run_test(13) == [7,3,11,1,5,9,12,0,2,4,6,8,10]);
221 assert!(run_test(14) == [7,3,11,1,5,9,13,0,2,4,6,8,10,12]);
222 assert!(run_test(15) == [7,3,11,1,5,9,13,0,2,4,6,8,10,12,14]);
223 assert!(run_test(16) == [8,4,12,2,6,10,14,1,3,5,7,9,11,13,15,0]);
224 assert!(run_test(17) == [9,5,13,3,7,11,15,1,4,6,8,10,12,14,16,0,2]);
225
226 for len in 18..1000 {
227 run_test(len);
228 }
229 }