]>
Commit | Line | Data |
---|---|---|
74b04a01 XL |
1 | //! A variant of `SortedMap` that preserves insertion order. |
2 | ||
3 | use std::borrow::Borrow; | |
4 | use std::hash::{Hash, Hasher}; | |
5 | use std::iter::FromIterator; | |
6 | ||
7 | use crate::stable_hasher::{HashStable, StableHasher}; | |
8 | use rustc_index::vec::{Idx, IndexVec}; | |
9 | ||
3dfed10e XL |
10 | /// An indexed multi-map that preserves insertion order while permitting both *O*(log *n*) lookup of |
11 | /// an item by key and *O*(1) lookup by index. | |
74b04a01 XL |
12 | /// |
13 | /// This data structure is a hybrid of an [`IndexVec`] and a [`SortedMap`]. Like `IndexVec`, | |
14 | /// `SortedIndexMultiMap` assigns a typed index to each item while preserving insertion order. | |
15 | /// Like `SortedMap`, `SortedIndexMultiMap` has efficient lookup of items by key. However, this | |
16 | /// is accomplished by sorting an array of item indices instead of the items themselves. | |
17 | /// | |
18 | /// Unlike `SortedMap`, this data structure can hold multiple equivalent items at once, so the | |
19 | /// `get_by_key` method and its variants return an iterator instead of an `Option`. Equivalent | |
20 | /// items will be yielded in insertion order. | |
21 | /// | |
22 | /// Unlike a general-purpose map like `BTreeSet` or `HashSet`, `SortedMap` and | |
3dfed10e | 23 | /// `SortedIndexMultiMap` require *O*(*n*) time to insert a single item. This is because we may need |
74b04a01 XL |
24 | /// to insert into the middle of the sorted array. Users should avoid mutating this data structure |
25 | /// in-place. | |
26 | /// | |
27 | /// [`IndexVec`]: ../../rustc_index/vec/struct.IndexVec.html | |
28 | /// [`SortedMap`]: ../sorted_map/struct.SortedMap.html | |
29 | #[derive(Clone, Debug)] | |
30 | pub struct SortedIndexMultiMap<I: Idx, K, V> { | |
31 | /// The elements of the map in insertion order. | |
32 | items: IndexVec<I, (K, V)>, | |
33 | ||
34 | /// Indices of the items in the set, sorted by the item's key. | |
35 | idx_sorted_by_item_key: Vec<I>, | |
36 | } | |
37 | ||
38 | impl<I: Idx, K: Ord, V> SortedIndexMultiMap<I, K, V> { | |
39 | pub fn new() -> Self { | |
40 | SortedIndexMultiMap { items: IndexVec::new(), idx_sorted_by_item_key: Vec::new() } | |
41 | } | |
42 | ||
43 | pub fn len(&self) -> usize { | |
44 | self.items.len() | |
45 | } | |
46 | ||
47 | pub fn is_empty(&self) -> bool { | |
48 | self.items.is_empty() | |
49 | } | |
50 | ||
51 | /// Returns an iterator over the items in the map in insertion order. | |
52 | pub fn into_iter(self) -> impl DoubleEndedIterator<Item = (K, V)> { | |
53 | self.items.into_iter() | |
54 | } | |
55 | ||
56 | /// Returns an iterator over the items in the map in insertion order along with their indices. | |
57 | pub fn into_iter_enumerated(self) -> impl DoubleEndedIterator<Item = (I, (K, V))> { | |
58 | self.items.into_iter_enumerated() | |
59 | } | |
60 | ||
61 | /// Returns an iterator over the items in the map in insertion order. | |
62 | pub fn iter(&self) -> impl '_ + DoubleEndedIterator<Item = (&K, &V)> { | |
63 | self.items.iter().map(|(ref k, ref v)| (k, v)) | |
64 | } | |
65 | ||
66 | /// Returns an iterator over the items in the map in insertion order along with their indices. | |
67 | pub fn iter_enumerated(&self) -> impl '_ + DoubleEndedIterator<Item = (I, (&K, &V))> { | |
68 | self.items.iter_enumerated().map(|(i, (ref k, ref v))| (i, (k, v))) | |
69 | } | |
70 | ||
71 | /// Returns the item in the map with the given index. | |
72 | pub fn get(&self, idx: I) -> Option<&(K, V)> { | |
73 | self.items.get(idx) | |
74 | } | |
75 | ||
76 | /// Returns an iterator over the items in the map that are equal to `key`. | |
77 | /// | |
78 | /// If there are multiple items that are equivalent to `key`, they will be yielded in | |
79 | /// insertion order. | |
80 | pub fn get_by_key<Q: 'a>(&'a self, key: &Q) -> impl 'a + Iterator<Item = &'a V> | |
81 | where | |
82 | Q: Ord + ?Sized, | |
83 | K: Borrow<Q>, | |
84 | { | |
85 | self.get_by_key_enumerated(key).map(|(_, v)| v) | |
86 | } | |
87 | ||
88 | /// Returns an iterator over the items in the map that are equal to `key` along with their | |
89 | /// indices. | |
90 | /// | |
91 | /// If there are multiple items that are equivalent to `key`, they will be yielded in | |
92 | /// insertion order. | |
93 | pub fn get_by_key_enumerated<Q>(&self, key: &Q) -> impl '_ + Iterator<Item = (I, &V)> | |
94 | where | |
95 | Q: Ord + ?Sized, | |
96 | K: Borrow<Q>, | |
97 | { | |
98 | // FIXME: This should be in the standard library as `equal_range`. See rust-lang/rfcs#2184. | |
99 | match self.binary_search_idx(key) { | |
100 | Err(_) => self.idxs_to_items_enumerated(&[]), | |
101 | ||
102 | Ok(idx) => { | |
103 | let start = self.find_lower_bound(key, idx); | |
104 | let end = self.find_upper_bound(key, idx); | |
105 | self.idxs_to_items_enumerated(&self.idx_sorted_by_item_key[start..end]) | |
106 | } | |
107 | } | |
108 | } | |
109 | ||
110 | fn binary_search_idx<Q>(&self, key: &Q) -> Result<usize, usize> | |
111 | where | |
112 | Q: Ord + ?Sized, | |
113 | K: Borrow<Q>, | |
114 | { | |
115 | self.idx_sorted_by_item_key.binary_search_by(|&idx| self.items[idx].0.borrow().cmp(key)) | |
116 | } | |
117 | ||
118 | /// Returns the index into the `idx_sorted_by_item_key` array of the first item equal to | |
119 | /// `key`. | |
120 | /// | |
121 | /// `initial` must be an index into that same array for an item that is equal to `key`. | |
122 | fn find_lower_bound<Q>(&self, key: &Q, initial: usize) -> usize | |
123 | where | |
124 | Q: Ord + ?Sized, | |
125 | K: Borrow<Q>, | |
126 | { | |
127 | debug_assert!(self.items[self.idx_sorted_by_item_key[initial]].0.borrow() == key); | |
128 | ||
129 | // FIXME: At present, this uses linear search, meaning lookup is only `O(log n)` if duplicate | |
130 | // entries are rare. It would be better to start with a linear search for the common case but | |
131 | // fall back to an exponential search if many duplicates are found. This applies to | |
132 | // `upper_bound` as well. | |
133 | let mut start = initial; | |
134 | while start != 0 && self.items[self.idx_sorted_by_item_key[start - 1]].0.borrow() == key { | |
135 | start -= 1; | |
136 | } | |
137 | ||
138 | start | |
139 | } | |
140 | ||
141 | /// Returns the index into the `idx_sorted_by_item_key` array of the first item greater than | |
142 | /// `key`, or `self.len()` if no such item exists. | |
143 | /// | |
144 | /// `initial` must be an index into that same array for an item that is equal to `key`. | |
145 | fn find_upper_bound<Q>(&self, key: &Q, initial: usize) -> usize | |
146 | where | |
147 | Q: Ord + ?Sized, | |
148 | K: Borrow<Q>, | |
149 | { | |
150 | debug_assert!(self.items[self.idx_sorted_by_item_key[initial]].0.borrow() == key); | |
151 | ||
152 | // See the FIXME for `find_lower_bound`. | |
153 | let mut end = initial + 1; | |
154 | let len = self.items.len(); | |
155 | while end < len && self.items[self.idx_sorted_by_item_key[end]].0.borrow() == key { | |
156 | end += 1; | |
157 | } | |
158 | ||
159 | end | |
160 | } | |
161 | ||
162 | fn idxs_to_items_enumerated(&'a self, idxs: &'a [I]) -> impl 'a + Iterator<Item = (I, &'a V)> { | |
163 | idxs.iter().map(move |&idx| (idx, &self.items[idx].1)) | |
164 | } | |
165 | } | |
166 | ||
167 | impl<I: Idx, K: Eq, V: Eq> Eq for SortedIndexMultiMap<I, K, V> {} | |
168 | impl<I: Idx, K: PartialEq, V: PartialEq> PartialEq for SortedIndexMultiMap<I, K, V> { | |
169 | fn eq(&self, other: &Self) -> bool { | |
170 | // No need to compare the sorted index. If the items are the same, the index will be too. | |
171 | self.items == other.items | |
172 | } | |
173 | } | |
174 | ||
175 | impl<I: Idx, K, V> Hash for SortedIndexMultiMap<I, K, V> | |
176 | where | |
177 | K: Hash, | |
178 | V: Hash, | |
179 | { | |
180 | fn hash<H: Hasher>(&self, hasher: &mut H) { | |
181 | self.items.hash(hasher) | |
182 | } | |
183 | } | |
184 | impl<I: Idx, K, V, C> HashStable<C> for SortedIndexMultiMap<I, K, V> | |
185 | where | |
186 | K: HashStable<C>, | |
187 | V: HashStable<C>, | |
188 | { | |
189 | fn hash_stable(&self, ctx: &mut C, hasher: &mut StableHasher) { | |
190 | self.items.hash_stable(ctx, hasher) | |
191 | } | |
192 | } | |
193 | ||
194 | impl<I: Idx, K: Ord, V> FromIterator<(K, V)> for SortedIndexMultiMap<I, K, V> { | |
195 | fn from_iter<J>(iter: J) -> Self | |
196 | where | |
197 | J: IntoIterator<Item = (K, V)>, | |
198 | { | |
199 | let items = IndexVec::from_iter(iter); | |
200 | let mut idx_sorted_by_item_key: Vec<_> = items.indices().collect(); | |
201 | ||
202 | // `sort_by_key` is stable, so insertion order is preserved for duplicate items. | |
203 | idx_sorted_by_item_key.sort_by_key(|&idx| &items[idx].0); | |
204 | ||
205 | SortedIndexMultiMap { items, idx_sorted_by_item_key } | |
206 | } | |
207 | } | |
208 | ||
209 | impl<I: Idx, K, V> std::ops::Index<I> for SortedIndexMultiMap<I, K, V> { | |
210 | type Output = V; | |
211 | ||
212 | fn index(&self, idx: I) -> &Self::Output { | |
213 | &self.items[idx].1 | |
214 | } | |
215 | } | |
216 | ||
217 | #[cfg(tests)] | |
218 | mod tests; |