]>
Commit | Line | Data |
---|---|---|
0a29b90c FG |
1 | use std::{ops::Deref, option::Option::None, sync::Arc, vec::IntoIter}; |
2 | ||
3 | use gix_hash::ObjectId; | |
4 | ||
5 | use crate::{ | |
6 | loose, | |
7 | store::{handle, handle::SingleOrMultiIndex, types::PackId}, | |
8 | store_impls::dynamic, | |
9 | }; | |
10 | ||
11 | struct EntryForOrdering { | |
12 | pack_offset: u64, | |
13 | entry_index: u32, | |
14 | pack_index: u16, | |
15 | } | |
16 | ||
17 | enum State { | |
18 | Pack { | |
19 | index_iter: IntoIter<handle::IndexLookup>, | |
20 | index: handle::IndexLookup, | |
21 | ordered_entries: Option<Vec<EntryForOrdering>>, | |
22 | entry_index: u32, | |
23 | num_objects: u32, | |
24 | }, | |
25 | Loose { | |
26 | iter: loose::Iter, | |
27 | index: usize, | |
28 | }, | |
29 | Depleted, | |
30 | } | |
31 | ||
32 | /// Define the order in which objects are returned. | |
49aad941 | 33 | #[derive(Default, Debug, Copy, Clone)] |
0a29b90c FG |
34 | pub enum Ordering { |
35 | /// Traverse packs first as sorted by their index files in lexicographical order (sorted by object id), then traverse loose objects | |
36 | /// as sorted by their names as well. | |
37 | /// | |
38 | /// This mode uses no memory as it's the natural ordering of objects, and is best to obtain all object ids as quickly as possible, | |
39 | /// while noting that these may contain duplicates. However, it's very costly to obtain object information or decode them with this | |
40 | /// scheme as cache-hits are unlikely with it and memory maps are less efficient when loading them in random order. | |
49aad941 | 41 | #[default] |
0a29b90c FG |
42 | PackLexicographicalThenLooseLexicographical, |
43 | /// Traverse packs first yielding object ids sorted by their position in the pack, with those at the beginning of the pack file coming first. | |
44 | /// Then follow loose objects sorted by their names. | |
45 | /// | |
46 | /// This mode allocates and as to pre-sort objects by their offsets, delaying the start of the iteration once per pack while keeping | |
47 | /// memory allocated once per pack. This price is usually worth paying once querying object information is planned as pack caches | |
48 | /// are more efficiently used that way. | |
49 | PackAscendingOffsetThenLooseLexicographical, | |
50 | } | |
51 | ||
0a29b90c FG |
52 | /// An iterator over all, _possibly duplicate_, objects of an object store, which by default uses no extra memory but yields an |
53 | /// order that is costly to traverse when querying object information or decoding them. | |
54 | /// | |
55 | /// Use [`with_ordering()`][AllObjects::with_ordering()] to choose a performance trade-off. | |
56 | pub struct AllObjects { | |
57 | state: State, | |
58 | num_objects: usize, | |
59 | loose_dbs: Arc<Vec<loose::Store>>, | |
60 | order: Ordering, | |
61 | } | |
62 | ||
63 | /// Builder | |
64 | impl AllObjects { | |
65 | /// Set the ordering of the objects returned, trading off memory and latency for object query performance. | |
66 | pub fn with_ordering(mut self, order: Ordering) -> Self { | |
67 | self.order = order; | |
68 | self | |
69 | } | |
70 | } | |
71 | ||
72 | impl AllObjects { | |
73 | /// Create a new iterator from a dynamic store, which will be forced to load all indices eagerly and in the current thread. | |
74 | pub fn new(db: &dynamic::Store) -> Result<Self, crate::store::load_index::Error> { | |
75 | let snapshot = db.load_all_indices()?; | |
76 | ||
77 | let packed_objects = snapshot | |
78 | .indices | |
79 | .iter() | |
80 | .fold(0usize, |dbc, index| dbc.saturating_add(index.num_objects() as usize)); | |
81 | let mut index_iter = snapshot.indices.into_iter(); | |
82 | let loose_dbs = snapshot.loose_dbs; | |
83 | let order = Default::default(); | |
84 | let state = match index_iter.next() { | |
85 | Some(index) => { | |
86 | let num_objects = index.num_objects(); | |
87 | State::Pack { | |
88 | index_iter, | |
89 | ordered_entries: maybe_sort_entries(&index, order), | |
90 | index, | |
91 | entry_index: 0, | |
92 | num_objects, | |
93 | } | |
94 | } | |
95 | None => { | |
96 | let index = 0; | |
97 | State::Loose { | |
98 | iter: loose_dbs.get(index).expect("at least one loose db").iter(), | |
99 | index, | |
100 | } | |
101 | } | |
102 | }; | |
103 | Ok(AllObjects { | |
104 | state, | |
105 | loose_dbs, | |
106 | num_objects: packed_objects, | |
107 | order, | |
108 | }) | |
109 | } | |
110 | } | |
111 | ||
112 | fn maybe_sort_entries(index: &handle::IndexLookup, order: Ordering) -> Option<Vec<EntryForOrdering>> { | |
113 | let mut order: Vec<_> = match order { | |
114 | Ordering::PackLexicographicalThenLooseLexicographical => return None, | |
115 | Ordering::PackAscendingOffsetThenLooseLexicographical => match &index.file { | |
116 | // We know that we cannot have more than u32 entry indices per pack. | |
117 | SingleOrMultiIndex::Single { index, .. } => index | |
118 | .iter() | |
119 | .enumerate() | |
120 | .map(|(idx, e)| EntryForOrdering { | |
121 | pack_offset: e.pack_offset, | |
122 | entry_index: idx as u32, | |
123 | pack_index: 0, | |
124 | }) | |
125 | .collect(), | |
126 | SingleOrMultiIndex::Multi { index, .. } => index | |
127 | .iter() | |
128 | .enumerate() | |
129 | .map(|(idx, e)| EntryForOrdering { | |
130 | pack_offset: e.pack_offset, | |
131 | entry_index: idx as u32, | |
132 | pack_index: { | |
133 | debug_assert!( | |
134 | e.pack_index < PackId::max_packs_in_multi_index(), | |
135 | "this shows the relation between u16 and pack_index (u32) and why this is OK" | |
136 | ); | |
137 | e.pack_index as u16 | |
138 | }, | |
139 | }) | |
140 | .collect(), | |
141 | }, | |
142 | }; | |
143 | order.sort_by(|a, b| { | |
144 | a.pack_index | |
145 | .cmp(&b.pack_index) | |
146 | .then_with(|| a.pack_offset.cmp(&b.pack_offset)) | |
147 | }); | |
148 | Some(order) | |
149 | } | |
150 | ||
151 | impl Iterator for AllObjects { | |
152 | type Item = Result<ObjectId, loose::iter::Error>; | |
153 | ||
154 | fn next(&mut self) -> Option<Self::Item> { | |
155 | match &mut self.state { | |
156 | State::Depleted => None, | |
157 | State::Pack { | |
158 | index_iter, | |
159 | ordered_entries, | |
160 | index, | |
161 | entry_index, | |
162 | num_objects, | |
163 | } => { | |
164 | if *entry_index < *num_objects { | |
165 | let oid = match ordered_entries { | |
166 | Some(entries) => index.oid_at_index(entries[*entry_index as usize].entry_index), | |
167 | None => index.oid_at_index(*entry_index), | |
168 | } | |
169 | .to_owned(); | |
170 | *entry_index += 1; | |
171 | Some(Ok(oid)) | |
172 | } else { | |
173 | match index_iter.next() { | |
174 | Some(new_index) => { | |
175 | *ordered_entries = maybe_sort_entries(&new_index, self.order); | |
176 | *index = new_index; | |
177 | *entry_index = 0; | |
178 | *num_objects = index.num_objects(); | |
179 | } | |
180 | None => { | |
181 | let index = 0; | |
182 | self.state = State::Loose { | |
183 | iter: self.loose_dbs.get(index).expect("at least one loose odb").iter(), | |
184 | index, | |
185 | } | |
186 | } | |
187 | } | |
188 | self.next() | |
189 | } | |
190 | } | |
191 | State::Loose { iter, index } => match iter.next() { | |
192 | Some(id) => Some(id), | |
193 | None => { | |
194 | *index += 1; | |
781aab86 | 195 | match self.loose_dbs.get(*index).map(loose::Store::iter) { |
0a29b90c FG |
196 | Some(new_iter) => { |
197 | *iter = new_iter; | |
198 | self.next() | |
199 | } | |
200 | None => { | |
201 | self.state = State::Depleted; | |
202 | None | |
203 | } | |
204 | } | |
205 | } | |
206 | }, | |
207 | } | |
208 | } | |
209 | ||
210 | fn size_hint(&self) -> (usize, Option<usize>) { | |
211 | (self.num_objects, None) | |
212 | } | |
213 | } | |
214 | ||
215 | impl<S> super::Handle<S> | |
216 | where | |
217 | S: Deref<Target = super::Store> + Clone, | |
218 | { | |
219 | /// Return an iterator over all, _possibly duplicate_, objects, first the ones in all packs of all linked databases (via alternates), | |
220 | /// followed by all loose objects. | |
221 | pub fn iter(&self) -> Result<AllObjects, dynamic::load_index::Error> { | |
222 | AllObjects::new(self.store_ref()) | |
223 | } | |
224 | } | |
225 | ||
226 | impl dynamic::Store { | |
227 | /// Like [`Handle::iter()`][super::Handle::iter()], but accessible directly on the store. | |
228 | pub fn iter(&self) -> Result<AllObjects, dynamic::load_index::Error> { | |
229 | AllObjects::new(self) | |
230 | } | |
231 | } |