]> git.proxmox.com Git - rustc.git/blame - vendor/gix-odb/src/store_impls/dynamic/iter.rs
New upstream version 1.74.1+dfsg1
[rustc.git] / vendor / gix-odb / src / store_impls / dynamic / iter.rs
CommitLineData
0a29b90c
FG
1use std::{ops::Deref, option::Option::None, sync::Arc, vec::IntoIter};
2
3use gix_hash::ObjectId;
4
5use crate::{
6 loose,
7 store::{handle, handle::SingleOrMultiIndex, types::PackId},
8 store_impls::dynamic,
9};
10
11struct EntryForOrdering {
12 pack_offset: u64,
13 entry_index: u32,
14 pack_index: u16,
15}
16
17enum State {
18 Pack {
19 index_iter: IntoIter<handle::IndexLookup>,
20 index: handle::IndexLookup,
21 ordered_entries: Option<Vec<EntryForOrdering>>,
22 entry_index: u32,
23 num_objects: u32,
24 },
25 Loose {
26 iter: loose::Iter,
27 index: usize,
28 },
29 Depleted,
30}
31
32/// Define the order in which objects are returned.
49aad941 33#[derive(Default, Debug, Copy, Clone)]
0a29b90c
FG
34pub enum Ordering {
35 /// Traverse packs first as sorted by their index files in lexicographical order (sorted by object id), then traverse loose objects
36 /// as sorted by their names as well.
37 ///
38 /// This mode uses no memory as it's the natural ordering of objects, and is best to obtain all object ids as quickly as possible,
39 /// while noting that these may contain duplicates. However, it's very costly to obtain object information or decode them with this
40 /// scheme as cache-hits are unlikely with it and memory maps are less efficient when loading them in random order.
49aad941 41 #[default]
0a29b90c
FG
42 PackLexicographicalThenLooseLexicographical,
43 /// Traverse packs first yielding object ids sorted by their position in the pack, with those at the beginning of the pack file coming first.
44 /// Then follow loose objects sorted by their names.
45 ///
46 /// This mode allocates and as to pre-sort objects by their offsets, delaying the start of the iteration once per pack while keeping
47 /// memory allocated once per pack. This price is usually worth paying once querying object information is planned as pack caches
48 /// are more efficiently used that way.
49 PackAscendingOffsetThenLooseLexicographical,
50}
51
0a29b90c
FG
52/// An iterator over all, _possibly duplicate_, objects of an object store, which by default uses no extra memory but yields an
53/// order that is costly to traverse when querying object information or decoding them.
54///
55/// Use [`with_ordering()`][AllObjects::with_ordering()] to choose a performance trade-off.
56pub struct AllObjects {
57 state: State,
58 num_objects: usize,
59 loose_dbs: Arc<Vec<loose::Store>>,
60 order: Ordering,
61}
62
63/// Builder
64impl AllObjects {
65 /// Set the ordering of the objects returned, trading off memory and latency for object query performance.
66 pub fn with_ordering(mut self, order: Ordering) -> Self {
67 self.order = order;
68 self
69 }
70}
71
72impl AllObjects {
73 /// Create a new iterator from a dynamic store, which will be forced to load all indices eagerly and in the current thread.
74 pub fn new(db: &dynamic::Store) -> Result<Self, crate::store::load_index::Error> {
75 let snapshot = db.load_all_indices()?;
76
77 let packed_objects = snapshot
78 .indices
79 .iter()
80 .fold(0usize, |dbc, index| dbc.saturating_add(index.num_objects() as usize));
81 let mut index_iter = snapshot.indices.into_iter();
82 let loose_dbs = snapshot.loose_dbs;
83 let order = Default::default();
84 let state = match index_iter.next() {
85 Some(index) => {
86 let num_objects = index.num_objects();
87 State::Pack {
88 index_iter,
89 ordered_entries: maybe_sort_entries(&index, order),
90 index,
91 entry_index: 0,
92 num_objects,
93 }
94 }
95 None => {
96 let index = 0;
97 State::Loose {
98 iter: loose_dbs.get(index).expect("at least one loose db").iter(),
99 index,
100 }
101 }
102 };
103 Ok(AllObjects {
104 state,
105 loose_dbs,
106 num_objects: packed_objects,
107 order,
108 })
109 }
110}
111
112fn maybe_sort_entries(index: &handle::IndexLookup, order: Ordering) -> Option<Vec<EntryForOrdering>> {
113 let mut order: Vec<_> = match order {
114 Ordering::PackLexicographicalThenLooseLexicographical => return None,
115 Ordering::PackAscendingOffsetThenLooseLexicographical => match &index.file {
116 // We know that we cannot have more than u32 entry indices per pack.
117 SingleOrMultiIndex::Single { index, .. } => index
118 .iter()
119 .enumerate()
120 .map(|(idx, e)| EntryForOrdering {
121 pack_offset: e.pack_offset,
122 entry_index: idx as u32,
123 pack_index: 0,
124 })
125 .collect(),
126 SingleOrMultiIndex::Multi { index, .. } => index
127 .iter()
128 .enumerate()
129 .map(|(idx, e)| EntryForOrdering {
130 pack_offset: e.pack_offset,
131 entry_index: idx as u32,
132 pack_index: {
133 debug_assert!(
134 e.pack_index < PackId::max_packs_in_multi_index(),
135 "this shows the relation between u16 and pack_index (u32) and why this is OK"
136 );
137 e.pack_index as u16
138 },
139 })
140 .collect(),
141 },
142 };
143 order.sort_by(|a, b| {
144 a.pack_index
145 .cmp(&b.pack_index)
146 .then_with(|| a.pack_offset.cmp(&b.pack_offset))
147 });
148 Some(order)
149}
150
151impl Iterator for AllObjects {
152 type Item = Result<ObjectId, loose::iter::Error>;
153
154 fn next(&mut self) -> Option<Self::Item> {
155 match &mut self.state {
156 State::Depleted => None,
157 State::Pack {
158 index_iter,
159 ordered_entries,
160 index,
161 entry_index,
162 num_objects,
163 } => {
164 if *entry_index < *num_objects {
165 let oid = match ordered_entries {
166 Some(entries) => index.oid_at_index(entries[*entry_index as usize].entry_index),
167 None => index.oid_at_index(*entry_index),
168 }
169 .to_owned();
170 *entry_index += 1;
171 Some(Ok(oid))
172 } else {
173 match index_iter.next() {
174 Some(new_index) => {
175 *ordered_entries = maybe_sort_entries(&new_index, self.order);
176 *index = new_index;
177 *entry_index = 0;
178 *num_objects = index.num_objects();
179 }
180 None => {
181 let index = 0;
182 self.state = State::Loose {
183 iter: self.loose_dbs.get(index).expect("at least one loose odb").iter(),
184 index,
185 }
186 }
187 }
188 self.next()
189 }
190 }
191 State::Loose { iter, index } => match iter.next() {
192 Some(id) => Some(id),
193 None => {
194 *index += 1;
781aab86 195 match self.loose_dbs.get(*index).map(loose::Store::iter) {
0a29b90c
FG
196 Some(new_iter) => {
197 *iter = new_iter;
198 self.next()
199 }
200 None => {
201 self.state = State::Depleted;
202 None
203 }
204 }
205 }
206 },
207 }
208 }
209
210 fn size_hint(&self) -> (usize, Option<usize>) {
211 (self.num_objects, None)
212 }
213}
214
215impl<S> super::Handle<S>
216where
217 S: Deref<Target = super::Store> + Clone,
218{
219 /// Return an iterator over all, _possibly duplicate_, objects, first the ones in all packs of all linked databases (via alternates),
220 /// followed by all loose objects.
221 pub fn iter(&self) -> Result<AllObjects, dynamic::load_index::Error> {
222 AllObjects::new(self.store_ref())
223 }
224}
225
226impl dynamic::Store {
227 /// Like [`Handle::iter()`][super::Handle::iter()], but accessible directly on the store.
228 pub fn iter(&self) -> Result<AllObjects, dynamic::load_index::Error> {
229 AllObjects::new(self)
230 }
231}