]>
Commit | Line | Data |
---|---|---|
fa17b1ce | 1 | use std::collections::HashMap; |
c443f58b | 2 | use std::ops::Range; |
ee53955f | 3 | |
c443f58b WB |
4 | pub struct ChunkReadInfo { |
5 | pub range: Range<u64>, | |
6 | pub digest: [u8; 32], | |
7 | } | |
8 | ||
9 | impl ChunkReadInfo { | |
10 | #[inline] | |
11 | pub fn size(&self) -> u64 { | |
12 | self.range.end - self.range.start | |
13 | } | |
14 | } | |
15 | ||
a660978c DM |
16 | /// Trait to get digest list from index files |
17 | /// | |
18 | /// To allow easy iteration over all used chunks. | |
5c1130df | 19 | pub trait IndexFile { |
7bc1d727 WB |
20 | fn index_count(&self) -> usize; |
21 | fn index_digest(&self, pos: usize) -> Option<&[u8; 32]>; | |
a660978c | 22 | fn index_bytes(&self) -> u64; |
fdaab0df | 23 | fn chunk_info(&self, pos: usize) -> Option<ChunkReadInfo>; |
f4bf7dfc | 24 | |
1f82f9b7 | 25 | /// Compute index checksum and size |
2e079b8b | 26 | fn compute_csum(&self) -> ([u8; 32], u64); |
1f82f9b7 | 27 | |
f4bf7dfc DM |
28 | /// Returns most often used chunks |
29 | fn find_most_used_chunks(&self, max: usize) -> HashMap<[u8; 32], usize> { | |
30 | let mut map = HashMap::new(); | |
31 | ||
32 | for pos in 0..self.index_count() { | |
33 | let digest = self.index_digest(pos).unwrap(); | |
34 | ||
35 | let count = map.entry(*digest).or_insert(0); | |
36 | *count += 1; | |
37 | } | |
38 | ||
39 | let mut most_used = Vec::new(); | |
40 | ||
41 | for (digest, count) in map { | |
42 | if count <= 1 { continue; } | |
43 | match most_used.binary_search_by_key(&count, |&(_digest, count)| count) { | |
44 | Ok(p) => most_used.insert(p, (digest, count)), | |
45 | Err(p) => most_used.insert(p, (digest, count)), | |
46 | } | |
47 | ||
48 | if most_used.len() > max { let _ = most_used.pop(); } | |
49 | } | |
50 | ||
51 | let mut map = HashMap::new(); | |
52 | ||
53 | for data in most_used { | |
54 | map.insert(data.0, data.1); | |
55 | } | |
56 | ||
57 | map | |
58 | } | |
7bc1d727 | 59 | } |