1 use std
::{ops::Deref, option::Option::None, sync::Arc, vec::IntoIter}
;
3 use gix_hash
::ObjectId
;
7 store
::{handle, handle::SingleOrMultiIndex, types::PackId}
,
11 struct EntryForOrdering
{
19 index_iter
: IntoIter
<handle
::IndexLookup
>,
20 index
: handle
::IndexLookup
,
21 ordered_entries
: Option
<Vec
<EntryForOrdering
>>,
32 /// Define the order in which objects are returned.
33 #[derive(Default, Debug, Copy, Clone)]
35 /// Traverse packs first as sorted by their index files in lexicographical order (sorted by object id), then traverse loose objects
36 /// as sorted by their names as well.
38 /// This mode uses no memory as it's the natural ordering of objects, and is best to obtain all object ids as quickly as possible,
39 /// while noting that these may contain duplicates. However, it's very costly to obtain object information or decode them with this
40 /// scheme as cache-hits are unlikely with it and memory maps are less efficient when loading them in random order.
42 PackLexicographicalThenLooseLexicographical
,
43 /// Traverse packs first yielding object ids sorted by their position in the pack, with those at the beginning of the pack file coming first.
44 /// Then follow loose objects sorted by their names.
46 /// This mode allocates and as to pre-sort objects by their offsets, delaying the start of the iteration once per pack while keeping
47 /// memory allocated once per pack. This price is usually worth paying once querying object information is planned as pack caches
48 /// are more efficiently used that way.
49 PackAscendingOffsetThenLooseLexicographical
,
52 /// An iterator over all, _possibly duplicate_, objects of an object store, which by default uses no extra memory but yields an
53 /// order that is costly to traverse when querying object information or decoding them.
55 /// Use [`with_ordering()`][AllObjects::with_ordering()] to choose a performance trade-off.
56 pub struct AllObjects
{
59 loose_dbs
: Arc
<Vec
<loose
::Store
>>,
65 /// Set the ordering of the objects returned, trading off memory and latency for object query performance.
66 pub fn with_ordering(mut self, order
: Ordering
) -> Self {
73 /// Create a new iterator from a dynamic store, which will be forced to load all indices eagerly and in the current thread.
74 pub fn new(db
: &dynamic
::Store
) -> Result
<Self, crate::store
::load_index
::Error
> {
75 let snapshot
= db
.load_all_indices()?
;
77 let packed_objects
= snapshot
80 .fold(0usize
, |dbc
, index
| dbc
.saturating_add(index
.num_objects() as usize));
81 let mut index_iter
= snapshot
.indices
.into_iter();
82 let loose_dbs
= snapshot
.loose_dbs
;
83 let order
= Default
::default();
84 let state
= match index_iter
.next() {
86 let num_objects
= index
.num_objects();
89 ordered_entries
: maybe_sort_entries(&index
, order
),
98 iter
: loose_dbs
.get(index
).expect("at least one loose db").iter(),
106 num_objects
: packed_objects
,
112 fn maybe_sort_entries(index
: &handle
::IndexLookup
, order
: Ordering
) -> Option
<Vec
<EntryForOrdering
>> {
113 let mut order
: Vec
<_
> = match order
{
114 Ordering
::PackLexicographicalThenLooseLexicographical
=> return None
,
115 Ordering
::PackAscendingOffsetThenLooseLexicographical
=> match &index
.file
{
116 // We know that we cannot have more than u32 entry indices per pack.
117 SingleOrMultiIndex
::Single { index, .. }
=> index
120 .map(|(idx
, e
)| EntryForOrdering
{
121 pack_offset
: e
.pack_offset
,
122 entry_index
: idx
as u32,
126 SingleOrMultiIndex
::Multi { index, .. }
=> index
129 .map(|(idx
, e
)| EntryForOrdering
{
130 pack_offset
: e
.pack_offset
,
131 entry_index
: idx
as u32,
134 e
.pack_index
< PackId
::max_packs_in_multi_index(),
135 "this shows the relation between u16 and pack_index (u32) and why this is OK"
143 order
.sort_by(|a
, b
| {
146 .then_with(|| a
.pack_offset
.cmp(&b
.pack_offset
))
151 impl Iterator
for AllObjects
{
152 type Item
= Result
<ObjectId
, loose
::iter
::Error
>;
154 fn next(&mut self) -> Option
<Self::Item
> {
155 match &mut self.state
{
156 State
::Depleted
=> None
,
164 if *entry_index
< *num_objects
{
165 let oid
= match ordered_entries
{
166 Some(entries
) => index
.oid_at_index(entries
[*entry_index
as usize].entry_index
),
167 None
=> index
.oid_at_index(*entry_index
),
173 match index_iter
.next() {
175 *ordered_entries
= maybe_sort_entries(&new_index
, self.order
);
178 *num_objects
= index
.num_objects();
182 self.state
= State
::Loose
{
183 iter
: self.loose_dbs
.get(index
).expect("at least one loose odb").iter(),
191 State
::Loose { iter, index }
=> match iter
.next() {
192 Some(id
) => Some(id
),
195 match self.loose_dbs
.get(*index
).map(|ldb
| ldb
.iter()) {
201 self.state
= State
::Depleted
;
210 fn size_hint(&self) -> (usize, Option
<usize>) {
211 (self.num_objects
, None
)
215 impl<S
> super::Handle
<S
>
217 S
: Deref
<Target
= super::Store
> + Clone
,
219 /// Return an iterator over all, _possibly duplicate_, objects, first the ones in all packs of all linked databases (via alternates),
220 /// followed by all loose objects.
221 pub fn iter(&self) -> Result
<AllObjects
, dynamic
::load_index
::Error
> {
222 AllObjects
::new(self.store_ref())
226 impl dynamic
::Store
{
227 /// Like [`Handle::iter()`][super::Handle::iter()], but accessible directly on the store.
228 pub fn iter(&self) -> Result
<AllObjects
, dynamic
::load_index
::Error
> {
229 AllObjects
::new(self)