vendor/gix-odb/src/store_impls/dynamic/iter.rs

   1 use std::{ops::Deref, option::Option::None, sync::Arc, vec::IntoIter};
   2
   3 use gix_hash::ObjectId;
   4
   5 use crate::{
   6     loose,
   7     store::{handle, handle::SingleOrMultiIndex, types::PackId},
   8     store_impls::dynamic,
   9 };
  10
  11 struct EntryForOrdering {
  12     pack_offset: u64,
  13     entry_index: u32,
  14     pack_index: u16,
  15 }
  16
  17 enum State {
  18     Pack {
  19         index_iter: IntoIter<handle::IndexLookup>,
  20         index: handle::IndexLookup,
  21         ordered_entries: Option<Vec<EntryForOrdering>>,
  22         entry_index: u32,
  23         num_objects: u32,
  24     },
  25     Loose {
  26         iter: loose::Iter,
  27         index: usize,
  28     },
  29     Depleted,
  30 }
  31
  32 /// Define the order in which objects are returned.
  33 #[derive(Default, Debug, Copy, Clone)]
  34 pub enum Ordering {
  35     /// Traverse packs first as sorted by their index files in lexicographical order (sorted by object id), then traverse loose objects
  36     /// as sorted by their names as well.
  37     ///
  38     /// This mode uses no memory as it's the natural ordering of objects, and is best to obtain all object ids as quickly as possible,
  39     /// while noting that these may contain duplicates. However, it's very costly to obtain object information or decode them with this
  40     /// scheme as cache-hits are unlikely with it and memory maps are less efficient when loading them in random order.
  41     #[default]
  42     PackLexicographicalThenLooseLexicographical,
  43     /// Traverse packs first yielding object ids sorted by their position in the pack, with those at the beginning of the pack file coming first.
  44     /// Then follow loose objects sorted by their names.
  45     ///
  46     /// This mode allocates and as to pre-sort objects by their offsets, delaying the start of the iteration once per pack while keeping
  47     /// memory allocated once per pack. This price is usually worth paying once querying object information is planned as pack caches
  48     /// are more efficiently used that way.
  49     PackAscendingOffsetThenLooseLexicographical,
  50 }
  51
  52 /// An iterator over all, _possibly duplicate_, objects of an object store, which by default uses no extra memory but yields an
  53 /// order that is costly to traverse when querying object information or decoding them.
  54 ///
  55 /// Use [`with_ordering()`][AllObjects::with_ordering()] to choose a performance trade-off.
  56 pub struct AllObjects {
  57     state: State,
  58     num_objects: usize,
  59     loose_dbs: Arc<Vec<loose::Store>>,
  60     order: Ordering,
  61 }
  62
  63 /// Builder
  64 impl AllObjects {
  65     /// Set the ordering of the objects returned, trading off memory and latency for object query performance.
  66     pub fn with_ordering(mut self, order: Ordering) -> Self {
  67         self.order = order;
  68         self
  69     }
  70 }
  71
  72 impl AllObjects {
  73     /// Create a new iterator from a dynamic store, which will be forced to load all indices eagerly and in the current thread.
  74     pub fn new(db: &dynamic::Store) -> Result<Self, crate::store::load_index::Error> {
  75         let snapshot = db.load_all_indices()?;
  76
  77         let packed_objects = snapshot
  78             .indices
  79             .iter()
  80             .fold(0usize, |dbc, index| dbc.saturating_add(index.num_objects() as usize));
  81         let mut index_iter = snapshot.indices.into_iter();
  82         let loose_dbs = snapshot.loose_dbs;
  83         let order = Default::default();
  84         let state = match index_iter.next() {
  85             Some(index) => {
  86                 let num_objects = index.num_objects();
  87                 State::Pack {
  88                     index_iter,
  89                     ordered_entries: maybe_sort_entries(&index, order),
  90                     index,
  91                     entry_index: 0,
  92                     num_objects,
  93                 }
  94             }
  95             None => {
  96                 let index = 0;
  97                 State::Loose {
  98                     iter: loose_dbs.get(index).expect("at least one loose db").iter(),
  99                     index,
 100                 }
 101             }
 102         };
 103         Ok(AllObjects {
 104             state,
 105             loose_dbs,
 106             num_objects: packed_objects,
 107             order,
 108         })
 109     }
 110 }
 111
 112 fn maybe_sort_entries(index: &handle::IndexLookup, order: Ordering) -> Option<Vec<EntryForOrdering>> {
 113     let mut order: Vec<_> = match order {
 114         Ordering::PackLexicographicalThenLooseLexicographical => return None,
 115         Ordering::PackAscendingOffsetThenLooseLexicographical => match &index.file {
 116             // We know that we cannot have more than u32 entry indices per pack.
 117             SingleOrMultiIndex::Single { index, .. } => index
 118                 .iter()
 119                 .enumerate()
 120                 .map(|(idx, e)| EntryForOrdering {
 121                     pack_offset: e.pack_offset,
 122                     entry_index: idx as u32,
 123                     pack_index: 0,
 124                 })
 125                 .collect(),
 126             SingleOrMultiIndex::Multi { index, .. } => index
 127                 .iter()
 128                 .enumerate()
 129                 .map(|(idx, e)| EntryForOrdering {
 130                     pack_offset: e.pack_offset,
 131                     entry_index: idx as u32,
 132                     pack_index: {
 133                         debug_assert!(
 134                             e.pack_index < PackId::max_packs_in_multi_index(),
 135                             "this shows the relation between u16 and pack_index (u32) and why this is OK"
 136                         );
 137                         e.pack_index as u16
 138                     },
 139                 })
 140                 .collect(),
 141         },
 142     };
 143     order.sort_by(|a, b| {
 144         a.pack_index
 145             .cmp(&b.pack_index)
 146             .then_with(|| a.pack_offset.cmp(&b.pack_offset))
 147     });
 148     Some(order)
 149 }
 150
 151 impl Iterator for AllObjects {
 152     type Item = Result<ObjectId, loose::iter::Error>;
 153
 154     fn next(&mut self) -> Option<Self::Item> {
 155         match &mut self.state {
 156             State::Depleted => None,
 157             State::Pack {
 158                 index_iter,
 159                 ordered_entries,
 160                 index,
 161                 entry_index,
 162                 num_objects,
 163             } => {
 164                 if *entry_index < *num_objects {
 165                     let oid = match ordered_entries {
 166                         Some(entries) => index.oid_at_index(entries[*entry_index as usize].entry_index),
 167                         None => index.oid_at_index(*entry_index),
 168                     }
 169                     .to_owned();
 170                     *entry_index += 1;
 171                     Some(Ok(oid))
 172                 } else {
 173                     match index_iter.next() {
 174                         Some(new_index) => {
 175                             *ordered_entries = maybe_sort_entries(&new_index, self.order);
 176                             *index = new_index;
 177                             *entry_index = 0;
 178                             *num_objects = index.num_objects();
 179                         }
 180                         None => {
 181                             let index = 0;
 182                             self.state = State::Loose {
 183                                 iter: self.loose_dbs.get(index).expect("at least one loose odb").iter(),
 184                                 index,
 185                             }
 186                         }
 187                     }
 188                     self.next()
 189                 }
 190             }
 191             State::Loose { iter, index } => match iter.next() {
 192                 Some(id) => Some(id),
 193                 None => {
 194                     *index += 1;
 195                     match self.loose_dbs.get(*index).map(|ldb| ldb.iter()) {
 196                         Some(new_iter) => {
 197                             *iter = new_iter;
 198                             self.next()
 199                         }
 200                         None => {
 201                             self.state = State::Depleted;
 202                             None
 203                         }
 204                     }
 205                 }
 206             },
 207         }
 208     }
 209
 210     fn size_hint(&self) -> (usize, Option<usize>) {
 211         (self.num_objects, None)
 212     }
 213 }
 214
 215 impl<S> super::Handle<S>
 216 where
 217     S: Deref<Target = super::Store> + Clone,
 218 {
 219     /// Return an iterator over all, _possibly duplicate_, objects, first the ones in all packs of all linked databases (via alternates),
 220     /// followed by all loose objects.
 221     pub fn iter(&self) -> Result<AllObjects, dynamic::load_index::Error> {
 222         AllObjects::new(self.store_ref())
 223     }
 224 }
 225
 226 impl dynamic::Store {
 227     /// Like [`Handle::iter()`][super::Handle::iter()], but accessible directly on the store.
 228     pub fn iter(&self) -> Result<AllObjects, dynamic::load_index::Error> {
 229         AllObjects::new(self)
 230     }
 231 }