[rustc.git] / vendor / gix-odb / src / store_impls / dynamic / iter.rs

use std::{ops::Deref, option::Option::None, sync::Arc, vec::IntoIter};

use gix_hash::ObjectId;

use crate::{
    loose,
    store::{handle, handle::SingleOrMultiIndex, types::PackId},
    store_impls::dynamic,
};

struct EntryForOrdering {
    pack_offset: u64,
    entry_index: u32,
    pack_index: u16,
}

enum State {
    Pack {
        index_iter: IntoIter<handle::IndexLookup>,
        index: handle::IndexLookup,
        ordered_entries: Option<Vec<EntryForOrdering>>,
        entry_index: u32,
        num_objects: u32,
    },
    Loose {
        iter: loose::Iter,
        index: usize,
    },
    Depleted,
}

/// Define the order in which objects are returned.
#[derive(Default, Debug, Copy, Clone)]
pub enum Ordering {
    /// Traverse packs first as sorted by their index files in lexicographical order (sorted by object id), then traverse loose objects
    /// as sorted by their names as well.
    ///
    /// This mode uses no memory as it's the natural ordering of objects, and is best to obtain all object ids as quickly as possible,
    /// while noting that these may contain duplicates. However, it's very costly to obtain object information or decode them with this
    /// scheme as cache-hits are unlikely with it and memory maps are less efficient when loading them in random order.
    #[default]
    PackLexicographicalThenLooseLexicographical,
    /// Traverse packs first yielding object ids sorted by their position in the pack, with those at the beginning of the pack file coming first.
    /// Then follow loose objects sorted by their names.
    ///
    /// This mode allocates and as to pre-sort objects by their offsets, delaying the start of the iteration once per pack while keeping
    /// memory allocated once per pack. This price is usually worth paying once querying object information is planned as pack caches
    /// are more efficiently used that way.
    PackAscendingOffsetThenLooseLexicographical,
}

/// An iterator over all, _possibly duplicate_, objects of an object store, which by default uses no extra memory but yields an
/// order that is costly to traverse when querying object information or decoding them.
///
/// Use [`with_ordering()`][AllObjects::with_ordering()] to choose a performance trade-off.
pub struct AllObjects {
    state: State,
    num_objects: usize,
    loose_dbs: Arc<Vec<loose::Store>>,
    order: Ordering,
}

/// Builder
impl AllObjects {
    /// Set the ordering of the objects returned, trading off memory and latency for object query performance.
    pub fn with_ordering(mut self, order: Ordering) -> Self {
        self.order = order;
        self
    }
}

impl AllObjects {
    /// Create a new iterator from a dynamic store, which will be forced to load all indices eagerly and in the current thread.
    pub fn new(db: &dynamic::Store) -> Result<Self, crate::store::load_index::Error> {
        let snapshot = db.load_all_indices()?;

        let packed_objects = snapshot
            .indices
            .iter()
            .fold(0usize, |dbc, index| dbc.saturating_add(index.num_objects() as usize));
        let mut index_iter = snapshot.indices.into_iter();
        let loose_dbs = snapshot.loose_dbs;
        let order = Default::default();
        let state = match index_iter.next() {
            Some(index) => {
                let num_objects = index.num_objects();
                State::Pack {
                    index_iter,
                    ordered_entries: maybe_sort_entries(&index, order),
                    index,
                    entry_index: 0,
                    num_objects,
                }
            }
            None => {
                let index = 0;
                State::Loose {
                    iter: loose_dbs.get(index).expect("at least one loose db").iter(),
                    index,
                }
            }
        };
        Ok(AllObjects {
            state,
            loose_dbs,
            num_objects: packed_objects,
            order,
        })
    }
}

fn maybe_sort_entries(index: &handle::IndexLookup, order: Ordering) -> Option<Vec<EntryForOrdering>> {
    let mut order: Vec<_> = match order {
        Ordering::PackLexicographicalThenLooseLexicographical => return None,
        Ordering::PackAscendingOffsetThenLooseLexicographical => match &index.file {
            // We know that we cannot have more than u32 entry indices per pack.
            SingleOrMultiIndex::Single { index, .. } => index
                .iter()
                .enumerate()
                .map(|(idx, e)| EntryForOrdering {
                    pack_offset: e.pack_offset,
                    entry_index: idx as u32,
                    pack_index: 0,
                })
                .collect(),
            SingleOrMultiIndex::Multi { index, .. } => index
                .iter()
                .enumerate()
                .map(|(idx, e)| EntryForOrdering {
                    pack_offset: e.pack_offset,
                    entry_index: idx as u32,
                    pack_index: {
                        debug_assert!(
                            e.pack_index < PackId::max_packs_in_multi_index(),
                            "this shows the relation between u16 and pack_index (u32) and why this is OK"
                        );
                        e.pack_index as u16
                    },
                })
                .collect(),
        },
    };
    order.sort_by(|a, b| {
        a.pack_index
            .cmp(&b.pack_index)
            .then_with(|| a.pack_offset.cmp(&b.pack_offset))
    });
    Some(order)
}

impl Iterator for AllObjects {
    type Item = Result<ObjectId, loose::iter::Error>;

    fn next(&mut self) -> Option<Self::Item> {
        match &mut self.state {
            State::Depleted => None,
            State::Pack {
                index_iter,
                ordered_entries,
                index,
                entry_index,
                num_objects,
            } => {
                if *entry_index < *num_objects {
                    let oid = match ordered_entries {
                        Some(entries) => index.oid_at_index(entries[*entry_index as usize].entry_index),
                        None => index.oid_at_index(*entry_index),
                    }
                    .to_owned();
                    *entry_index += 1;
                    Some(Ok(oid))
                } else {
                    match index_iter.next() {
                        Some(new_index) => {
                            *ordered_entries = maybe_sort_entries(&new_index, self.order);
                            *index = new_index;
                            *entry_index = 0;
                            *num_objects = index.num_objects();
                        }
                        None => {
                            let index = 0;
                            self.state = State::Loose {
                                iter: self.loose_dbs.get(index).expect("at least one loose odb").iter(),
                                index,
                            }
                        }
                    }
                    self.next()
                }
            }
            State::Loose { iter, index } => match iter.next() {
                Some(id) => Some(id),
                None => {
                    *index += 1;
                    match self.loose_dbs.get(*index).map(loose::Store::iter) {
                        Some(new_iter) => {
                            *iter = new_iter;
                            self.next()
                        }
                        None => {
                            self.state = State::Depleted;
                            None
                        }
                    }
                }
            },
        }
    }

    fn size_hint(&self) -> (usize, Option<usize>) {
        (self.num_objects, None)
    }
}

impl<S> super::Handle<S>
where
    S: Deref<Target = super::Store> + Clone,
{
    /// Return an iterator over all, _possibly duplicate_, objects, first the ones in all packs of all linked databases (via alternates),
    /// followed by all loose objects.
    pub fn iter(&self) -> Result<AllObjects, dynamic::load_index::Error> {
        AllObjects::new(self.store_ref())
    }
}

impl dynamic::Store {
    /// Like [`Handle::iter()`][super::Handle::iter()], but accessible directly on the store.
    pub fn iter(&self) -> Result<AllObjects, dynamic::load_index::Error> {
        AllObjects::new(self)
    }
}
Commit	Line	Data
0a29b90c FG	1	use std::{ops::Deref, option::Option::None, sync::Arc, vec::IntoIter};
	2
	3	use gix_hash::ObjectId;
	4
	5	use crate::{
	6	loose,
	7	store::{handle, handle::SingleOrMultiIndex, types::PackId},
	8	store_impls::dynamic,
	9	};
	10
	11	struct EntryForOrdering {
	12	pack_offset: u64,
	13	entry_index: u32,
	14	pack_index: u16,
	15	}
	16
	17	enum State {
	18	Pack {
	19	index_iter: IntoIter<handle::IndexLookup>,
	20	index: handle::IndexLookup,
	21	ordered_entries: Option<Vec<EntryForOrdering>>,
	22	entry_index: u32,
	23	num_objects: u32,
	24	},
	25	Loose {
	26	iter: loose::Iter,
	27	index: usize,
	28	},
	29	Depleted,
	30	}
	31
	32	/// Define the order in which objects are returned.
49aad941	33	#[derive(Default, Debug, Copy, Clone)]
0a29b90c FG	34	pub enum Ordering {
	35	/// Traverse packs first as sorted by their index files in lexicographical order (sorted by object id), then traverse loose objects
	36	/// as sorted by their names as well.
	37	///
	38	/// This mode uses no memory as it's the natural ordering of objects, and is best to obtain all object ids as quickly as possible,
	39	/// while noting that these may contain duplicates. However, it's very costly to obtain object information or decode them with this
	40	/// scheme as cache-hits are unlikely with it and memory maps are less efficient when loading them in random order.
49aad941	41	#[default]
0a29b90c FG	42	PackLexicographicalThenLooseLexicographical,
	43	/// Traverse packs first yielding object ids sorted by their position in the pack, with those at the beginning of the pack file coming first.
	44	/// Then follow loose objects sorted by their names.
	45	///
	46	/// This mode allocates and as to pre-sort objects by their offsets, delaying the start of the iteration once per pack while keeping
	47	/// memory allocated once per pack. This price is usually worth paying once querying object information is planned as pack caches
	48	/// are more efficiently used that way.
	49	PackAscendingOffsetThenLooseLexicographical,
	50	}
	51
0a29b90c FG	52	/// An iterator over all, _possibly duplicate_, objects of an object store, which by default uses no extra memory but yields an
	53	/// order that is costly to traverse when querying object information or decoding them.
	54	///
	55	/// Use [`with_ordering()`][AllObjects::with_ordering()] to choose a performance trade-off.
	56	pub struct AllObjects {
	57	state: State,
	58	num_objects: usize,
	59	loose_dbs: Arc<Vec<loose::Store>>,
	60	order: Ordering,
	61	}
	62
	63	/// Builder
	64	impl AllObjects {
	65	/// Set the ordering of the objects returned, trading off memory and latency for object query performance.
	66	pub fn with_ordering(mut self, order: Ordering) -> Self {
	67	self.order = order;
	68	self
	69	}
	70	}
	71
	72	impl AllObjects {
	73	/// Create a new iterator from a dynamic store, which will be forced to load all indices eagerly and in the current thread.
	74	pub fn new(db: &dynamic::Store) -> Result<Self, crate::store::load_index::Error> {
	75	let snapshot = db.load_all_indices()?;
	76
	77	let packed_objects = snapshot
	78	.indices
	79	.iter()
	80	.fold(0usize, \|dbc, index\| dbc.saturating_add(index.num_objects() as usize));
	81	let mut index_iter = snapshot.indices.into_iter();
	82	let loose_dbs = snapshot.loose_dbs;
	83	let order = Default::default();
	84	let state = match index_iter.next() {
	85	Some(index) => {
	86	let num_objects = index.num_objects();
	87	State::Pack {
	88	index_iter,
	89	ordered_entries: maybe_sort_entries(&index, order),
	90	index,
	91	entry_index: 0,
	92	num_objects,
	93	}
	94	}
	95	None => {
	96	let index = 0;
	97	State::Loose {
	98	iter: loose_dbs.get(index).expect("at least one loose db").iter(),
	99	index,
	100	}
	101	}
	102	};
	103	Ok(AllObjects {
	104	state,
	105	loose_dbs,
	106	num_objects: packed_objects,
	107	order,
	108	})
	109	}
	110	}
	111
	112	fn maybe_sort_entries(index: &handle::IndexLookup, order: Ordering) -> Option<Vec<EntryForOrdering>> {
	113	let mut order: Vec<_> = match order {
	114	Ordering::PackLexicographicalThenLooseLexicographical => return None,
	115	Ordering::PackAscendingOffsetThenLooseLexicographical => match &index.file {
116	// We know that we cannot have more than u32 entry indices per pack.
117	SingleOrMultiIndex::Single { index, .. } => index
118	.iter()
119	.enumerate()
120	.map(\|(idx, e)\| EntryForOrdering {
121	pack_offset: e.pack_offset,
122	entry_index: idx as u32,
123	pack_index: 0,
124	})
125	.collect(),
126	SingleOrMultiIndex::Multi { index, .. } => index
127	.iter()
128	.enumerate()
129	.map(\|(idx, e)\| EntryForOrdering {
130	pack_offset: e.pack_offset,
131	entry_index: idx as u32,
132	pack_index: {
133	debug_assert!(
134	e.pack_index < PackId::max_packs_in_multi_index(),
135	"this shows the relation between u16 and pack_index (u32) and why this is OK"
136	);
137	e.pack_index as u16
138	},
139	})
140	.collect(),
141	},
142	};
143	order.sort_by(\|a, b\| {
144	a.pack_index
145	.cmp(&b.pack_index)
146	.then_with(\|\| a.pack_offset.cmp(&b.pack_offset))
147	});
148	Some(order)
149	}
150
151	impl Iterator for AllObjects {
152	type Item = Result<ObjectId, loose::iter::Error>;
153
154	fn next(&mut self) -> Option<Self::Item> {
155	match &mut self.state {
156	State::Depleted => None,
157	State::Pack {
158	index_iter,
159	ordered_entries,
160	index,
161	entry_index,
162	num_objects,
163	} => {
164	if entry_index < num_objects {
165	let oid = match ordered_entries {
166	Some(entries) => index.oid_at_index(entries[*entry_index as usize].entry_index),
167	None => index.oid_at_index(*entry_index),
168	}
169	.to_owned();
170	*entry_index += 1;
171	Some(Ok(oid))
172	} else {
173	match index_iter.next() {
174	Some(new_index) => {
175	*ordered_entries = maybe_sort_entries(&new_index, self.order);
176	*index = new_index;
177	*entry_index = 0;
178	*num_objects = index.num_objects();
179	}
180	None => {
181	let index = 0;
182	self.state = State::Loose {
183	iter: self.loose_dbs.get(index).expect("at least one loose odb").iter(),
184	index,
185	}
186	}
187	}
188	self.next()
189	}
190	}
191	State::Loose { iter, index } => match iter.next() {
192	Some(id) => Some(id),
193	None => {
194	*index += 1;
781aab86	195	match self.loose_dbs.get(*index).map(loose::Store::iter) {
0a29b90c FG	196	Some(new_iter) => {
	197	*iter = new_iter;
	198	self.next()
	199	}
	200	None => {
	201	self.state = State::Depleted;
	202	None
	203	}
	204	}
	205	}
	206	},
	207	}
	208	}
	209
	210	fn size_hint(&self) -> (usize, Option<usize>) {
	211	(self.num_objects, None)
	212	}
	213	}
	214
	215	impl<S> super::Handle<S>
	216	where
	217	S: Deref<Target = super::Store> + Clone,
	218	{
	219	/// Return an iterator over all, _possibly duplicate_, objects, first the ones in all packs of all linked databases (via alternates),
	220	/// followed by all loose objects.
	221	pub fn iter(&self) -> Result<AllObjects, dynamic::load_index::Error> {
	222	AllObjects::new(self.store_ref())
	223	}
	224	}
	225
	226	impl dynamic::Store {
	227	/// Like [`Handle::iter()`][super::Handle::iter()], but accessible directly on the store.
	228	pub fn iter(&self) -> Result<AllObjects, dynamic::load_index::Error> {
	229	AllObjects::new(self)
	230	}
	231	}