]>
Commit | Line | Data |
---|---|---|
2e11264a EC |
1 | /* |
2 | * qht.c - QEMU Hash Table, designed to scale for read-mostly workloads. | |
3 | * | |
4 | * Copyright (C) 2016, Emilio G. Cota <cota@braap.org> | |
5 | * | |
6 | * License: GNU GPL, version 2 or later. | |
7 | * See the COPYING file in the top-level directory. | |
8 | * | |
9 | * Assumptions: | |
10 | * - NULL cannot be inserted/removed as a pointer value. | |
11 | * - Trying to insert an already-existing hash-pointer pair is OK. However, | |
12 | * it is not OK to insert into the same hash table different hash-pointer | |
13 | * pairs that have the same pointer value, but not the hashes. | |
14 | * - Lookups are performed under an RCU read-critical section; removals | |
15 | * must wait for a grace period to elapse before freeing removed objects. | |
16 | * | |
17 | * Features: | |
18 | * - Reads (i.e. lookups and iterators) can be concurrent with other reads. | |
19 | * Lookups that are concurrent with writes to the same bucket will retry | |
20 | * via a seqlock; iterators acquire all bucket locks and therefore can be | |
21 | * concurrent with lookups and are serialized wrt writers. | |
22 | * - Writes (i.e. insertions/removals) can be concurrent with writes to | |
23 | * different buckets; writes to the same bucket are serialized through a lock. | |
24 | * - Optional auto-resizing: the hash table resizes up if the load surpasses | |
25 | * a certain threshold. Resizing is done concurrently with readers; writes | |
26 | * are serialized with the resize operation. | |
27 | * | |
28 | * The key structure is the bucket, which is cacheline-sized. Buckets | |
29 | * contain a few hash values and pointers; the u32 hash values are stored in | |
30 | * full so that resizing is fast. Having this structure instead of directly | |
31 | * chaining items has two advantages: | |
32 | * - Failed lookups fail fast, and touch a minimum number of cache lines. | |
33 | * - Resizing the hash table with concurrent lookups is easy. | |
34 | * | |
35 | * There are two types of buckets: | |
36 | * 1. "head" buckets are the ones allocated in the array of buckets in qht_map. | |
37 | * 2. all "non-head" buckets (i.e. all others) are members of a chain that | |
38 | * starts from a head bucket. | |
39 | * Note that the seqlock and spinlock of a head bucket applies to all buckets | |
40 | * chained to it; these two fields are unused in non-head buckets. | |
41 | * | |
42 | * On removals, we move the last valid item in the chain to the position of the | |
43 | * just-removed entry. This makes lookups slightly faster, since the moment an | |
44 | * invalid entry is found, the (failed) lookup is over. | |
45 | * | |
46 | * Resizing is done by taking all bucket spinlocks (so that no other writers can | |
47 | * race with us) and then copying all entries into a new hash map. Then, the | |
48 | * ht->map pointer is set, and the old map is freed once no RCU readers can see | |
49 | * it anymore. | |
50 | * | |
51 | * Writers check for concurrent resizes by comparing ht->map before and after | |
8cc360b9 | 52 | * acquiring their bucket lock. If they don't match, a resize has occurred |
2e11264a EC |
53 | * while the bucket spinlock was being acquired. |
54 | * | |
55 | * Related Work: | |
56 | * - Idea of cacheline-sized buckets with full hashes taken from: | |
57 | * David, Guerraoui & Trigonakis, "Asynchronized Concurrency: | |
58 | * The Secret to Scaling Concurrent Search Data Structures", ASPLOS'15. | |
59 | * - Why not RCU-based hash tables? They would allow us to get rid of the | |
60 | * seqlock, but resizing would take forever since RCU read critical | |
61 | * sections in QEMU take quite a long time. | |
62 | * More info on relativistic hash tables: | |
63 | * + Triplett, McKenney & Walpole, "Resizable, Scalable, Concurrent Hash | |
64 | * Tables via Relativistic Programming", USENIX ATC'11. | |
65 | * + Corbet, "Relativistic hash tables, part 1: Algorithms", @ lwn.net, 2014. | |
66 | * https://lwn.net/Articles/612021/ | |
67 | */ | |
e9abfcb5 | 68 | #include "qemu/osdep.h" |
2e11264a EC |
69 | #include "qemu/qht.h" |
70 | #include "qemu/atomic.h" | |
71 | #include "qemu/rcu.h" | |
5df022cf | 72 | #include "qemu/memalign.h" |
2e11264a EC |
73 | |
74 | //#define QHT_DEBUG | |
75 | ||
76 | /* | |
77 | * We want to avoid false sharing of cache lines. Most systems have 64-byte | |
78 | * cache lines so we go with it for simplicity. | |
79 | * | |
80 | * Note that systems with smaller cache lines will be fine (the struct is | |
81 | * almost 64-bytes); systems with larger cache lines might suffer from | |
82 | * some false sharing. | |
83 | */ | |
84 | #define QHT_BUCKET_ALIGN 64 | |
85 | ||
86 | /* define these to keep sizeof(qht_bucket) within QHT_BUCKET_ALIGN */ | |
87 | #if HOST_LONG_BITS == 32 | |
88 | #define QHT_BUCKET_ENTRIES 6 | |
89 | #else /* 64-bit */ | |
90 | #define QHT_BUCKET_ENTRIES 4 | |
91 | #endif | |
92 | ||
69d55e9c EC |
93 | enum qht_iter_type { |
94 | QHT_ITER_VOID, /* do nothing; use retvoid */ | |
95 | QHT_ITER_RM, /* remove element if retbool returns true */ | |
96 | }; | |
97 | ||
98 | struct qht_iter { | |
99 | union { | |
100 | qht_iter_func_t retvoid; | |
101 | qht_iter_bool_func_t retbool; | |
102 | } f; | |
103 | enum qht_iter_type type; | |
104 | }; | |
105 | ||
fe9959a2 EC |
106 | /* |
107 | * Do _not_ use qemu_mutex_[try]lock directly! Use these macros, otherwise | |
108 | * the profiler (QSP) will deadlock. | |
109 | */ | |
110 | static inline void qht_lock(struct qht *ht) | |
111 | { | |
112 | if (ht->mode & QHT_MODE_RAW_MUTEXES) { | |
113 | qemu_mutex_lock__raw(&ht->lock); | |
114 | } else { | |
115 | qemu_mutex_lock(&ht->lock); | |
116 | } | |
117 | } | |
118 | ||
119 | static inline int qht_trylock(struct qht *ht) | |
120 | { | |
121 | if (ht->mode & QHT_MODE_RAW_MUTEXES) { | |
122 | return qemu_mutex_trylock__raw(&(ht)->lock); | |
123 | } | |
124 | return qemu_mutex_trylock(&(ht)->lock); | |
125 | } | |
126 | ||
127 | /* this inline is not really necessary, but it helps keep code consistent */ | |
128 | static inline void qht_unlock(struct qht *ht) | |
129 | { | |
130 | qemu_mutex_unlock(&ht->lock); | |
131 | } | |
132 | ||
2e11264a EC |
133 | /* |
134 | * Note: reading partially-updated pointers in @pointers could lead to | |
d73415a3 | 135 | * segfaults. We thus access them with qatomic_read/set; this guarantees |
2e11264a | 136 | * that the compiler makes all those accesses atomic. We also need the |
d73415a3 | 137 | * volatile-like behavior in qatomic_read, since otherwise the compiler |
2e11264a | 138 | * might refetch the pointer. |
d73415a3 | 139 | * qatomic_read's are of course not necessary when the bucket lock is held. |
2e11264a EC |
140 | * |
141 | * If both ht->lock and b->lock are grabbed, ht->lock should always | |
142 | * be grabbed first. | |
143 | */ | |
144 | struct qht_bucket { | |
145 | QemuSpin lock; | |
146 | QemuSeqLock sequence; | |
147 | uint32_t hashes[QHT_BUCKET_ENTRIES]; | |
148 | void *pointers[QHT_BUCKET_ENTRIES]; | |
149 | struct qht_bucket *next; | |
150 | } QEMU_ALIGNED(QHT_BUCKET_ALIGN); | |
151 | ||
152 | QEMU_BUILD_BUG_ON(sizeof(struct qht_bucket) > QHT_BUCKET_ALIGN); | |
153 | ||
68f7b2be EC |
154 | /* |
155 | * Under TSAN, we use striped locks instead of one lock per bucket chain. | |
156 | * This avoids crashing under TSAN, since TSAN aborts the program if more than | |
157 | * 64 locks are held (this is a hardcoded limit in TSAN). | |
158 | * When resizing a QHT we grab all the buckets' locks, which can easily | |
159 | * go over TSAN's limit. By using striped locks, we avoid this problem. | |
160 | * | |
161 | * Note: this number must be a power of two for easy index computation. | |
162 | */ | |
163 | #define QHT_TSAN_BUCKET_LOCKS_BITS 4 | |
164 | #define QHT_TSAN_BUCKET_LOCKS (1 << QHT_TSAN_BUCKET_LOCKS_BITS) | |
165 | ||
166 | struct qht_tsan_lock { | |
167 | QemuSpin lock; | |
168 | } QEMU_ALIGNED(QHT_BUCKET_ALIGN); | |
169 | ||
2e11264a EC |
170 | /** |
171 | * struct qht_map - structure to track an array of buckets | |
172 | * @rcu: used by RCU. Keep it as the top field in the struct to help valgrind | |
173 | * find the whole struct. | |
174 | * @buckets: array of head buckets. It is constant once the map is created. | |
175 | * @n_buckets: number of head buckets. It is constant once the map is created. | |
176 | * @n_added_buckets: number of added (i.e. "non-head") buckets | |
177 | * @n_added_buckets_threshold: threshold to trigger an upward resize once the | |
178 | * number of added buckets surpasses it. | |
68f7b2be | 179 | * @tsan_bucket_locks: Array of striped locks to be used only under TSAN. |
2e11264a EC |
180 | * |
181 | * Buckets are tracked in what we call a "map", i.e. this structure. | |
182 | */ | |
183 | struct qht_map { | |
184 | struct rcu_head rcu; | |
185 | struct qht_bucket *buckets; | |
186 | size_t n_buckets; | |
187 | size_t n_added_buckets; | |
188 | size_t n_added_buckets_threshold; | |
68f7b2be EC |
189 | #ifdef CONFIG_TSAN |
190 | struct qht_tsan_lock tsan_bucket_locks[QHT_TSAN_BUCKET_LOCKS]; | |
191 | #endif | |
2e11264a EC |
192 | }; |
193 | ||
194 | /* trigger a resize when n_added_buckets > n_buckets / div */ | |
195 | #define QHT_NR_ADDED_BUCKETS_THRESHOLD_DIV 8 | |
196 | ||
76b553b3 EC |
197 | static void qht_do_resize_reset(struct qht *ht, struct qht_map *new, |
198 | bool reset); | |
2e11264a EC |
199 | static void qht_grow_maybe(struct qht *ht); |
200 | ||
201 | #ifdef QHT_DEBUG | |
202 | ||
203 | #define qht_debug_assert(X) do { assert(X); } while (0) | |
204 | ||
205 | static void qht_bucket_debug__locked(struct qht_bucket *b) | |
206 | { | |
207 | bool seen_empty = false; | |
208 | bool corrupt = false; | |
209 | int i; | |
210 | ||
211 | do { | |
212 | for (i = 0; i < QHT_BUCKET_ENTRIES; i++) { | |
213 | if (b->pointers[i] == NULL) { | |
214 | seen_empty = true; | |
215 | continue; | |
216 | } | |
217 | if (seen_empty) { | |
218 | fprintf(stderr, "%s: b: %p, pos: %i, hash: 0x%x, p: %p\n", | |
219 | __func__, b, i, b->hashes[i], b->pointers[i]); | |
220 | corrupt = true; | |
221 | } | |
222 | } | |
223 | b = b->next; | |
224 | } while (b); | |
225 | qht_debug_assert(!corrupt); | |
226 | } | |
227 | ||
228 | static void qht_map_debug__all_locked(struct qht_map *map) | |
229 | { | |
230 | int i; | |
231 | ||
232 | for (i = 0; i < map->n_buckets; i++) { | |
233 | qht_bucket_debug__locked(&map->buckets[i]); | |
234 | } | |
235 | } | |
236 | #else | |
237 | ||
238 | #define qht_debug_assert(X) do { (void)(X); } while (0) | |
239 | ||
240 | static inline void qht_bucket_debug__locked(struct qht_bucket *b) | |
241 | { } | |
242 | ||
243 | static inline void qht_map_debug__all_locked(struct qht_map *map) | |
244 | { } | |
245 | #endif /* QHT_DEBUG */ | |
246 | ||
247 | static inline size_t qht_elems_to_buckets(size_t n_elems) | |
248 | { | |
249 | return pow2ceil(n_elems / QHT_BUCKET_ENTRIES); | |
250 | } | |
251 | ||
68f7b2be EC |
252 | /* |
253 | * When using striped locks (i.e. under TSAN), we have to be careful not | |
254 | * to operate on the same lock twice (e.g. when iterating through all buckets). | |
255 | * We achieve this by operating only on each stripe's first matching lock. | |
256 | */ | |
257 | static inline void qht_do_if_first_in_stripe(struct qht_map *map, | |
258 | struct qht_bucket *b, | |
259 | void (*func)(QemuSpin *spin)) | |
260 | { | |
261 | #ifdef CONFIG_TSAN | |
262 | unsigned long bucket_idx = b - map->buckets; | |
263 | bool is_first_in_stripe = (bucket_idx >> QHT_TSAN_BUCKET_LOCKS_BITS) == 0; | |
264 | if (is_first_in_stripe) { | |
265 | unsigned long lock_idx = bucket_idx & (QHT_TSAN_BUCKET_LOCKS - 1); | |
266 | func(&map->tsan_bucket_locks[lock_idx].lock); | |
267 | } | |
268 | #else | |
269 | func(&b->lock); | |
270 | #endif | |
271 | } | |
272 | ||
273 | static inline void qht_bucket_lock_do(struct qht_map *map, | |
274 | struct qht_bucket *b, | |
275 | void (*func)(QemuSpin *lock)) | |
276 | { | |
277 | #ifdef CONFIG_TSAN | |
278 | unsigned long bucket_idx = b - map->buckets; | |
279 | unsigned long lock_idx = bucket_idx & (QHT_TSAN_BUCKET_LOCKS - 1); | |
280 | func(&map->tsan_bucket_locks[lock_idx].lock); | |
281 | #else | |
282 | func(&b->lock); | |
283 | #endif | |
284 | } | |
285 | ||
286 | static inline void qht_bucket_lock(struct qht_map *map, | |
287 | struct qht_bucket *b) | |
288 | { | |
289 | qht_bucket_lock_do(map, b, qemu_spin_lock); | |
290 | } | |
291 | ||
292 | static inline void qht_bucket_unlock(struct qht_map *map, | |
293 | struct qht_bucket *b) | |
294 | { | |
295 | qht_bucket_lock_do(map, b, qemu_spin_unlock); | |
296 | } | |
297 | ||
298 | static inline void qht_head_init(struct qht_map *map, struct qht_bucket *b) | |
2e11264a EC |
299 | { |
300 | memset(b, 0, sizeof(*b)); | |
68f7b2be | 301 | qht_do_if_first_in_stripe(map, b, qemu_spin_init); |
2e11264a EC |
302 | seqlock_init(&b->sequence); |
303 | } | |
304 | ||
305 | static inline | |
e6c58299 | 306 | struct qht_bucket *qht_map_to_bucket(const struct qht_map *map, uint32_t hash) |
2e11264a EC |
307 | { |
308 | return &map->buckets[hash & (map->n_buckets - 1)]; | |
309 | } | |
310 | ||
311 | /* acquire all bucket locks from a map */ | |
312 | static void qht_map_lock_buckets(struct qht_map *map) | |
313 | { | |
314 | size_t i; | |
315 | ||
316 | for (i = 0; i < map->n_buckets; i++) { | |
317 | struct qht_bucket *b = &map->buckets[i]; | |
318 | ||
68f7b2be | 319 | qht_do_if_first_in_stripe(map, b, qemu_spin_lock); |
2e11264a EC |
320 | } |
321 | } | |
322 | ||
323 | static void qht_map_unlock_buckets(struct qht_map *map) | |
324 | { | |
325 | size_t i; | |
326 | ||
327 | for (i = 0; i < map->n_buckets; i++) { | |
328 | struct qht_bucket *b = &map->buckets[i]; | |
329 | ||
68f7b2be | 330 | qht_do_if_first_in_stripe(map, b, qemu_spin_unlock); |
2e11264a EC |
331 | } |
332 | } | |
333 | ||
334 | /* | |
335 | * Call with at least a bucket lock held. | |
336 | * @map should be the value read before acquiring the lock (or locks). | |
337 | */ | |
1911c8a3 EC |
338 | static inline bool qht_map_is_stale__locked(const struct qht *ht, |
339 | const struct qht_map *map) | |
2e11264a EC |
340 | { |
341 | return map != ht->map; | |
342 | } | |
343 | ||
344 | /* | |
345 | * Grab all bucket locks, and set @pmap after making sure the map isn't stale. | |
346 | * | |
347 | * Pairs with qht_map_unlock_buckets(), hence the pass-by-reference. | |
348 | * | |
349 | * Note: callers cannot have ht->lock held. | |
350 | */ | |
351 | static inline | |
352 | void qht_map_lock_buckets__no_stale(struct qht *ht, struct qht_map **pmap) | |
353 | { | |
354 | struct qht_map *map; | |
355 | ||
d73415a3 | 356 | map = qatomic_rcu_read(&ht->map); |
2e11264a EC |
357 | qht_map_lock_buckets(map); |
358 | if (likely(!qht_map_is_stale__locked(ht, map))) { | |
359 | *pmap = map; | |
360 | return; | |
361 | } | |
362 | qht_map_unlock_buckets(map); | |
363 | ||
364 | /* we raced with a resize; acquire ht->lock to see the updated ht->map */ | |
fe9959a2 | 365 | qht_lock(ht); |
2e11264a EC |
366 | map = ht->map; |
367 | qht_map_lock_buckets(map); | |
fe9959a2 | 368 | qht_unlock(ht); |
2e11264a EC |
369 | *pmap = map; |
370 | return; | |
371 | } | |
372 | ||
373 | /* | |
374 | * Get a head bucket and lock it, making sure its parent map is not stale. | |
375 | * @pmap is filled with a pointer to the bucket's parent map. | |
376 | * | |
68f7b2be | 377 | * Unlock with qht_bucket_unlock. |
2e11264a EC |
378 | * |
379 | * Note: callers cannot have ht->lock held. | |
380 | */ | |
381 | static inline | |
382 | struct qht_bucket *qht_bucket_lock__no_stale(struct qht *ht, uint32_t hash, | |
383 | struct qht_map **pmap) | |
384 | { | |
385 | struct qht_bucket *b; | |
386 | struct qht_map *map; | |
387 | ||
d73415a3 | 388 | map = qatomic_rcu_read(&ht->map); |
2e11264a EC |
389 | b = qht_map_to_bucket(map, hash); |
390 | ||
68f7b2be | 391 | qht_bucket_lock(map, b); |
2e11264a EC |
392 | if (likely(!qht_map_is_stale__locked(ht, map))) { |
393 | *pmap = map; | |
394 | return b; | |
395 | } | |
68f7b2be | 396 | qht_bucket_unlock(map, b); |
2e11264a EC |
397 | |
398 | /* we raced with a resize; acquire ht->lock to see the updated ht->map */ | |
fe9959a2 | 399 | qht_lock(ht); |
2e11264a EC |
400 | map = ht->map; |
401 | b = qht_map_to_bucket(map, hash); | |
68f7b2be | 402 | qht_bucket_lock(map, b); |
fe9959a2 | 403 | qht_unlock(ht); |
2e11264a EC |
404 | *pmap = map; |
405 | return b; | |
406 | } | |
407 | ||
1911c8a3 | 408 | static inline bool qht_map_needs_resize(const struct qht_map *map) |
2e11264a | 409 | { |
d73415a3 SH |
410 | return qatomic_read(&map->n_added_buckets) > |
411 | map->n_added_buckets_threshold; | |
2e11264a EC |
412 | } |
413 | ||
68f7b2be EC |
414 | static inline void qht_chain_destroy(struct qht_map *map, |
415 | struct qht_bucket *head) | |
2e11264a EC |
416 | { |
417 | struct qht_bucket *curr = head->next; | |
418 | struct qht_bucket *prev; | |
419 | ||
68f7b2be | 420 | qht_do_if_first_in_stripe(map, head, qemu_spin_destroy); |
2e11264a EC |
421 | while (curr) { |
422 | prev = curr; | |
423 | curr = curr->next; | |
424 | qemu_vfree(prev); | |
425 | } | |
426 | } | |
427 | ||
428 | /* pass only an orphan map */ | |
429 | static void qht_map_destroy(struct qht_map *map) | |
430 | { | |
431 | size_t i; | |
432 | ||
433 | for (i = 0; i < map->n_buckets; i++) { | |
68f7b2be | 434 | qht_chain_destroy(map, &map->buckets[i]); |
2e11264a EC |
435 | } |
436 | qemu_vfree(map->buckets); | |
437 | g_free(map); | |
438 | } | |
439 | ||
440 | static struct qht_map *qht_map_create(size_t n_buckets) | |
441 | { | |
442 | struct qht_map *map; | |
443 | size_t i; | |
444 | ||
445 | map = g_malloc(sizeof(*map)); | |
446 | map->n_buckets = n_buckets; | |
447 | ||
448 | map->n_added_buckets = 0; | |
449 | map->n_added_buckets_threshold = n_buckets / | |
450 | QHT_NR_ADDED_BUCKETS_THRESHOLD_DIV; | |
451 | ||
452 | /* let tiny hash tables to at least add one non-head bucket */ | |
453 | if (unlikely(map->n_added_buckets_threshold == 0)) { | |
454 | map->n_added_buckets_threshold = 1; | |
455 | } | |
456 | ||
457 | map->buckets = qemu_memalign(QHT_BUCKET_ALIGN, | |
458 | sizeof(*map->buckets) * n_buckets); | |
459 | for (i = 0; i < n_buckets; i++) { | |
68f7b2be | 460 | qht_head_init(map, &map->buckets[i]); |
2e11264a EC |
461 | } |
462 | return map; | |
463 | } | |
464 | ||
61b8cef1 EC |
465 | void qht_init(struct qht *ht, qht_cmp_func_t cmp, size_t n_elems, |
466 | unsigned int mode) | |
2e11264a EC |
467 | { |
468 | struct qht_map *map; | |
469 | size_t n_buckets = qht_elems_to_buckets(n_elems); | |
470 | ||
61b8cef1 EC |
471 | g_assert(cmp); |
472 | ht->cmp = cmp; | |
2e11264a EC |
473 | ht->mode = mode; |
474 | qemu_mutex_init(&ht->lock); | |
475 | map = qht_map_create(n_buckets); | |
d73415a3 | 476 | qatomic_rcu_set(&ht->map, map); |
2e11264a EC |
477 | } |
478 | ||
479 | /* call only when there are no readers/writers left */ | |
480 | void qht_destroy(struct qht *ht) | |
481 | { | |
482 | qht_map_destroy(ht->map); | |
483 | memset(ht, 0, sizeof(*ht)); | |
484 | } | |
485 | ||
486 | static void qht_bucket_reset__locked(struct qht_bucket *head) | |
487 | { | |
488 | struct qht_bucket *b = head; | |
489 | int i; | |
490 | ||
491 | seqlock_write_begin(&head->sequence); | |
492 | do { | |
493 | for (i = 0; i < QHT_BUCKET_ENTRIES; i++) { | |
494 | if (b->pointers[i] == NULL) { | |
495 | goto done; | |
496 | } | |
d73415a3 SH |
497 | qatomic_set(&b->hashes[i], 0); |
498 | qatomic_set(&b->pointers[i], NULL); | |
2e11264a EC |
499 | } |
500 | b = b->next; | |
501 | } while (b); | |
502 | done: | |
503 | seqlock_write_end(&head->sequence); | |
504 | } | |
505 | ||
506 | /* call with all bucket locks held */ | |
507 | static void qht_map_reset__all_locked(struct qht_map *map) | |
508 | { | |
509 | size_t i; | |
510 | ||
511 | for (i = 0; i < map->n_buckets; i++) { | |
512 | qht_bucket_reset__locked(&map->buckets[i]); | |
513 | } | |
514 | qht_map_debug__all_locked(map); | |
515 | } | |
516 | ||
517 | void qht_reset(struct qht *ht) | |
518 | { | |
519 | struct qht_map *map; | |
520 | ||
521 | qht_map_lock_buckets__no_stale(ht, &map); | |
522 | qht_map_reset__all_locked(map); | |
523 | qht_map_unlock_buckets(map); | |
524 | } | |
525 | ||
76b553b3 EC |
526 | static inline void qht_do_resize(struct qht *ht, struct qht_map *new) |
527 | { | |
528 | qht_do_resize_reset(ht, new, false); | |
529 | } | |
530 | ||
531 | static inline void qht_do_resize_and_reset(struct qht *ht, struct qht_map *new) | |
532 | { | |
533 | qht_do_resize_reset(ht, new, true); | |
534 | } | |
535 | ||
2e11264a EC |
536 | bool qht_reset_size(struct qht *ht, size_t n_elems) |
537 | { | |
f555a9d0 | 538 | struct qht_map *new = NULL; |
2e11264a EC |
539 | struct qht_map *map; |
540 | size_t n_buckets; | |
2e11264a EC |
541 | |
542 | n_buckets = qht_elems_to_buckets(n_elems); | |
543 | ||
fe9959a2 | 544 | qht_lock(ht); |
2e11264a EC |
545 | map = ht->map; |
546 | if (n_buckets != map->n_buckets) { | |
547 | new = qht_map_create(n_buckets); | |
2e11264a | 548 | } |
76b553b3 | 549 | qht_do_resize_and_reset(ht, new); |
fe9959a2 | 550 | qht_unlock(ht); |
2e11264a | 551 | |
f555a9d0 | 552 | return !!new; |
2e11264a EC |
553 | } |
554 | ||
555 | static inline | |
e6c58299 | 556 | void *qht_do_lookup(const struct qht_bucket *head, qht_lookup_func_t func, |
2e11264a EC |
557 | const void *userp, uint32_t hash) |
558 | { | |
e6c58299 | 559 | const struct qht_bucket *b = head; |
2e11264a EC |
560 | int i; |
561 | ||
562 | do { | |
563 | for (i = 0; i < QHT_BUCKET_ENTRIES; i++) { | |
d73415a3 | 564 | if (qatomic_read(&b->hashes[i]) == hash) { |
34506b30 PB |
565 | /* The pointer is dereferenced before seqlock_read_retry, |
566 | * so (unlike qht_insert__locked) we need to use | |
d73415a3 | 567 | * qatomic_rcu_read here. |
34506b30 | 568 | */ |
d73415a3 | 569 | void *p = qatomic_rcu_read(&b->pointers[i]); |
2e11264a EC |
570 | |
571 | if (likely(p) && likely(func(p, userp))) { | |
572 | return p; | |
573 | } | |
574 | } | |
575 | } | |
d73415a3 | 576 | b = qatomic_rcu_read(&b->next); |
2e11264a EC |
577 | } while (b); |
578 | ||
579 | return NULL; | |
580 | } | |
581 | ||
582 | static __attribute__((noinline)) | |
e6c58299 | 583 | void *qht_lookup__slowpath(const struct qht_bucket *b, qht_lookup_func_t func, |
2e11264a EC |
584 | const void *userp, uint32_t hash) |
585 | { | |
586 | unsigned int version; | |
587 | void *ret; | |
588 | ||
589 | do { | |
590 | version = seqlock_read_begin(&b->sequence); | |
591 | ret = qht_do_lookup(b, func, userp, hash); | |
592 | } while (seqlock_read_retry(&b->sequence, version)); | |
593 | return ret; | |
594 | } | |
595 | ||
e6c58299 | 596 | void *qht_lookup_custom(const struct qht *ht, const void *userp, uint32_t hash, |
61b8cef1 | 597 | qht_lookup_func_t func) |
2e11264a | 598 | { |
e6c58299 EC |
599 | const struct qht_bucket *b; |
600 | const struct qht_map *map; | |
2e11264a EC |
601 | unsigned int version; |
602 | void *ret; | |
603 | ||
d73415a3 | 604 | map = qatomic_rcu_read(&ht->map); |
2e11264a EC |
605 | b = qht_map_to_bucket(map, hash); |
606 | ||
607 | version = seqlock_read_begin(&b->sequence); | |
608 | ret = qht_do_lookup(b, func, userp, hash); | |
609 | if (likely(!seqlock_read_retry(&b->sequence, version))) { | |
610 | return ret; | |
611 | } | |
612 | /* | |
613 | * Removing the do/while from the fastpath gives a 4% perf. increase when | |
614 | * running a 100%-lookup microbenchmark. | |
615 | */ | |
616 | return qht_lookup__slowpath(b, func, userp, hash); | |
617 | } | |
618 | ||
e6c58299 | 619 | void *qht_lookup(const struct qht *ht, const void *userp, uint32_t hash) |
61b8cef1 EC |
620 | { |
621 | return qht_lookup_custom(ht, userp, hash, ht->cmp); | |
622 | } | |
623 | ||
1911c8a3 EC |
624 | /* |
625 | * call with head->lock held | |
626 | * @ht is const since it is only used for ht->cmp() | |
627 | */ | |
628 | static void *qht_insert__locked(const struct qht *ht, struct qht_map *map, | |
32359d52 EC |
629 | struct qht_bucket *head, void *p, uint32_t hash, |
630 | bool *needs_resize) | |
2e11264a EC |
631 | { |
632 | struct qht_bucket *b = head; | |
633 | struct qht_bucket *prev = NULL; | |
634 | struct qht_bucket *new = NULL; | |
635 | int i; | |
636 | ||
637 | do { | |
638 | for (i = 0; i < QHT_BUCKET_ENTRIES; i++) { | |
639 | if (b->pointers[i]) { | |
32359d52 EC |
640 | if (unlikely(b->hashes[i] == hash && |
641 | ht->cmp(b->pointers[i], p))) { | |
642 | return b->pointers[i]; | |
2e11264a EC |
643 | } |
644 | } else { | |
645 | goto found; | |
646 | } | |
647 | } | |
648 | prev = b; | |
649 | b = b->next; | |
650 | } while (b); | |
651 | ||
652 | b = qemu_memalign(QHT_BUCKET_ALIGN, sizeof(*b)); | |
653 | memset(b, 0, sizeof(*b)); | |
654 | new = b; | |
655 | i = 0; | |
d73415a3 | 656 | qatomic_inc(&map->n_added_buckets); |
2e11264a EC |
657 | if (unlikely(qht_map_needs_resize(map)) && needs_resize) { |
658 | *needs_resize = true; | |
659 | } | |
660 | ||
661 | found: | |
662 | /* found an empty key: acquire the seqlock and write */ | |
663 | seqlock_write_begin(&head->sequence); | |
664 | if (new) { | |
d73415a3 | 665 | qatomic_rcu_set(&prev->next, b); |
2e11264a | 666 | } |
34506b30 | 667 | /* smp_wmb() implicit in seqlock_write_begin. */ |
d73415a3 SH |
668 | qatomic_set(&b->hashes[i], hash); |
669 | qatomic_set(&b->pointers[i], p); | |
2e11264a | 670 | seqlock_write_end(&head->sequence); |
32359d52 | 671 | return NULL; |
2e11264a EC |
672 | } |
673 | ||
674 | static __attribute__((noinline)) void qht_grow_maybe(struct qht *ht) | |
675 | { | |
676 | struct qht_map *map; | |
677 | ||
678 | /* | |
679 | * If the lock is taken it probably means there's an ongoing resize, | |
680 | * so bail out. | |
681 | */ | |
fe9959a2 | 682 | if (qht_trylock(ht)) { |
2e11264a EC |
683 | return; |
684 | } | |
685 | map = ht->map; | |
686 | /* another thread might have just performed the resize we were after */ | |
687 | if (qht_map_needs_resize(map)) { | |
688 | struct qht_map *new = qht_map_create(map->n_buckets * 2); | |
689 | ||
2e11264a | 690 | qht_do_resize(ht, new); |
2e11264a | 691 | } |
fe9959a2 | 692 | qht_unlock(ht); |
2e11264a EC |
693 | } |
694 | ||
32359d52 | 695 | bool qht_insert(struct qht *ht, void *p, uint32_t hash, void **existing) |
2e11264a EC |
696 | { |
697 | struct qht_bucket *b; | |
698 | struct qht_map *map; | |
699 | bool needs_resize = false; | |
32359d52 | 700 | void *prev; |
2e11264a EC |
701 | |
702 | /* NULL pointers are not supported */ | |
703 | qht_debug_assert(p); | |
704 | ||
705 | b = qht_bucket_lock__no_stale(ht, hash, &map); | |
32359d52 | 706 | prev = qht_insert__locked(ht, map, b, p, hash, &needs_resize); |
2e11264a | 707 | qht_bucket_debug__locked(b); |
68f7b2be | 708 | qht_bucket_unlock(map, b); |
2e11264a EC |
709 | |
710 | if (unlikely(needs_resize) && ht->mode & QHT_MODE_AUTO_RESIZE) { | |
711 | qht_grow_maybe(ht); | |
712 | } | |
32359d52 EC |
713 | if (likely(prev == NULL)) { |
714 | return true; | |
715 | } | |
716 | if (existing) { | |
717 | *existing = prev; | |
718 | } | |
719 | return false; | |
2e11264a EC |
720 | } |
721 | ||
1911c8a3 | 722 | static inline bool qht_entry_is_last(const struct qht_bucket *b, int pos) |
2e11264a EC |
723 | { |
724 | if (pos == QHT_BUCKET_ENTRIES - 1) { | |
725 | if (b->next == NULL) { | |
726 | return true; | |
727 | } | |
728 | return b->next->pointers[0] == NULL; | |
729 | } | |
730 | return b->pointers[pos + 1] == NULL; | |
731 | } | |
732 | ||
733 | static void | |
734 | qht_entry_move(struct qht_bucket *to, int i, struct qht_bucket *from, int j) | |
735 | { | |
736 | qht_debug_assert(!(to == from && i == j)); | |
737 | qht_debug_assert(to->pointers[i]); | |
738 | qht_debug_assert(from->pointers[j]); | |
739 | ||
d73415a3 SH |
740 | qatomic_set(&to->hashes[i], from->hashes[j]); |
741 | qatomic_set(&to->pointers[i], from->pointers[j]); | |
2e11264a | 742 | |
d73415a3 SH |
743 | qatomic_set(&from->hashes[j], 0); |
744 | qatomic_set(&from->pointers[j], NULL); | |
2e11264a EC |
745 | } |
746 | ||
747 | /* | |
9650ad3e | 748 | * Find the last valid entry in @orig, and swap it with @orig[pos], which has |
2e11264a EC |
749 | * just been invalidated. |
750 | */ | |
751 | static inline void qht_bucket_remove_entry(struct qht_bucket *orig, int pos) | |
752 | { | |
753 | struct qht_bucket *b = orig; | |
754 | struct qht_bucket *prev = NULL; | |
755 | int i; | |
756 | ||
757 | if (qht_entry_is_last(orig, pos)) { | |
def48ddd | 758 | qatomic_set(&orig->hashes[pos], 0); |
d73415a3 | 759 | qatomic_set(&orig->pointers[pos], NULL); |
2e11264a EC |
760 | return; |
761 | } | |
762 | do { | |
763 | for (i = 0; i < QHT_BUCKET_ENTRIES; i++) { | |
764 | if (b->pointers[i]) { | |
765 | continue; | |
766 | } | |
767 | if (i > 0) { | |
768 | return qht_entry_move(orig, pos, b, i - 1); | |
769 | } | |
770 | qht_debug_assert(prev); | |
771 | return qht_entry_move(orig, pos, prev, QHT_BUCKET_ENTRIES - 1); | |
772 | } | |
773 | prev = b; | |
774 | b = b->next; | |
775 | } while (b); | |
776 | /* no free entries other than orig[pos], so swap it with the last one */ | |
777 | qht_entry_move(orig, pos, prev, QHT_BUCKET_ENTRIES - 1); | |
778 | } | |
779 | ||
780 | /* call with b->lock held */ | |
781 | static inline | |
e2f07efa | 782 | bool qht_remove__locked(struct qht_bucket *head, const void *p, uint32_t hash) |
2e11264a EC |
783 | { |
784 | struct qht_bucket *b = head; | |
785 | int i; | |
786 | ||
787 | do { | |
788 | for (i = 0; i < QHT_BUCKET_ENTRIES; i++) { | |
789 | void *q = b->pointers[i]; | |
790 | ||
791 | if (unlikely(q == NULL)) { | |
792 | return false; | |
793 | } | |
794 | if (q == p) { | |
795 | qht_debug_assert(b->hashes[i] == hash); | |
796 | seqlock_write_begin(&head->sequence); | |
797 | qht_bucket_remove_entry(b, i); | |
798 | seqlock_write_end(&head->sequence); | |
799 | return true; | |
800 | } | |
801 | } | |
802 | b = b->next; | |
803 | } while (b); | |
804 | return false; | |
805 | } | |
806 | ||
807 | bool qht_remove(struct qht *ht, const void *p, uint32_t hash) | |
808 | { | |
809 | struct qht_bucket *b; | |
810 | struct qht_map *map; | |
811 | bool ret; | |
812 | ||
813 | /* NULL pointers are not supported */ | |
814 | qht_debug_assert(p); | |
815 | ||
816 | b = qht_bucket_lock__no_stale(ht, hash, &map); | |
e2f07efa | 817 | ret = qht_remove__locked(b, p, hash); |
2e11264a | 818 | qht_bucket_debug__locked(b); |
68f7b2be | 819 | qht_bucket_unlock(map, b); |
2e11264a EC |
820 | return ret; |
821 | } | |
822 | ||
78255ba2 | 823 | static inline void qht_bucket_iter(struct qht_bucket *head, |
69d55e9c | 824 | const struct qht_iter *iter, void *userp) |
2e11264a | 825 | { |
69d55e9c | 826 | struct qht_bucket *b = head; |
2e11264a EC |
827 | int i; |
828 | ||
829 | do { | |
830 | for (i = 0; i < QHT_BUCKET_ENTRIES; i++) { | |
831 | if (b->pointers[i] == NULL) { | |
832 | return; | |
833 | } | |
69d55e9c EC |
834 | switch (iter->type) { |
835 | case QHT_ITER_VOID: | |
78255ba2 | 836 | iter->f.retvoid(b->pointers[i], b->hashes[i], userp); |
69d55e9c EC |
837 | break; |
838 | case QHT_ITER_RM: | |
78255ba2 | 839 | if (iter->f.retbool(b->pointers[i], b->hashes[i], userp)) { |
69d55e9c EC |
840 | /* replace i with the last valid element in the bucket */ |
841 | seqlock_write_begin(&head->sequence); | |
842 | qht_bucket_remove_entry(b, i); | |
843 | seqlock_write_end(&head->sequence); | |
844 | qht_bucket_debug__locked(b); | |
845 | /* reevaluate i, since it just got replaced */ | |
846 | i--; | |
847 | continue; | |
848 | } | |
849 | break; | |
850 | default: | |
851 | g_assert_not_reached(); | |
852 | } | |
2e11264a EC |
853 | } |
854 | b = b->next; | |
855 | } while (b); | |
856 | } | |
857 | ||
858 | /* call with all of the map's locks held */ | |
78255ba2 | 859 | static inline void qht_map_iter__all_locked(struct qht_map *map, |
69d55e9c EC |
860 | const struct qht_iter *iter, |
861 | void *userp) | |
2e11264a EC |
862 | { |
863 | size_t i; | |
864 | ||
865 | for (i = 0; i < map->n_buckets; i++) { | |
78255ba2 | 866 | qht_bucket_iter(&map->buckets[i], iter, userp); |
2e11264a EC |
867 | } |
868 | } | |
869 | ||
69d55e9c EC |
870 | static inline void |
871 | do_qht_iter(struct qht *ht, const struct qht_iter *iter, void *userp) | |
2e11264a EC |
872 | { |
873 | struct qht_map *map; | |
874 | ||
d73415a3 | 875 | map = qatomic_rcu_read(&ht->map); |
2e11264a | 876 | qht_map_lock_buckets(map); |
78255ba2 | 877 | qht_map_iter__all_locked(map, iter, userp); |
2e11264a EC |
878 | qht_map_unlock_buckets(map); |
879 | } | |
880 | ||
69d55e9c EC |
881 | void qht_iter(struct qht *ht, qht_iter_func_t func, void *userp) |
882 | { | |
883 | const struct qht_iter iter = { | |
884 | .f.retvoid = func, | |
885 | .type = QHT_ITER_VOID, | |
886 | }; | |
887 | ||
888 | do_qht_iter(ht, &iter, userp); | |
889 | } | |
890 | ||
891 | void qht_iter_remove(struct qht *ht, qht_iter_bool_func_t func, void *userp) | |
892 | { | |
893 | const struct qht_iter iter = { | |
894 | .f.retbool = func, | |
895 | .type = QHT_ITER_RM, | |
896 | }; | |
897 | ||
898 | do_qht_iter(ht, &iter, userp); | |
899 | } | |
900 | ||
78255ba2 EC |
901 | struct qht_map_copy_data { |
902 | struct qht *ht; | |
903 | struct qht_map *new; | |
904 | }; | |
905 | ||
906 | static void qht_map_copy(void *p, uint32_t hash, void *userp) | |
2e11264a | 907 | { |
78255ba2 EC |
908 | struct qht_map_copy_data *data = userp; |
909 | struct qht *ht = data->ht; | |
910 | struct qht_map *new = data->new; | |
2e11264a EC |
911 | struct qht_bucket *b = qht_map_to_bucket(new, hash); |
912 | ||
913 | /* no need to acquire b->lock because no thread has seen this map yet */ | |
914 | qht_insert__locked(ht, new, b, p, hash, NULL); | |
915 | } | |
916 | ||
917 | /* | |
76b553b3 EC |
918 | * Atomically perform a resize and/or reset. |
919 | * Call with ht->lock held. | |
2e11264a | 920 | */ |
76b553b3 | 921 | static void qht_do_resize_reset(struct qht *ht, struct qht_map *new, bool reset) |
2e11264a EC |
922 | { |
923 | struct qht_map *old; | |
69d55e9c EC |
924 | const struct qht_iter iter = { |
925 | .f.retvoid = qht_map_copy, | |
926 | .type = QHT_ITER_VOID, | |
927 | }; | |
78255ba2 | 928 | struct qht_map_copy_data data; |
2e11264a EC |
929 | |
930 | old = ht->map; | |
76b553b3 | 931 | qht_map_lock_buckets(old); |
2e11264a | 932 | |
76b553b3 EC |
933 | if (reset) { |
934 | qht_map_reset__all_locked(old); | |
935 | } | |
936 | ||
937 | if (new == NULL) { | |
938 | qht_map_unlock_buckets(old); | |
939 | return; | |
940 | } | |
941 | ||
719a3077 | 942 | g_assert(new->n_buckets != old->n_buckets); |
78255ba2 EC |
943 | data.ht = ht; |
944 | data.new = new; | |
945 | qht_map_iter__all_locked(old, &iter, &data); | |
2e11264a EC |
946 | qht_map_debug__all_locked(new); |
947 | ||
d73415a3 | 948 | qatomic_rcu_set(&ht->map, new); |
76b553b3 | 949 | qht_map_unlock_buckets(old); |
2e11264a EC |
950 | call_rcu(old, qht_map_destroy, rcu); |
951 | } | |
952 | ||
953 | bool qht_resize(struct qht *ht, size_t n_elems) | |
954 | { | |
955 | size_t n_buckets = qht_elems_to_buckets(n_elems); | |
956 | size_t ret = false; | |
957 | ||
fe9959a2 | 958 | qht_lock(ht); |
2e11264a EC |
959 | if (n_buckets != ht->map->n_buckets) { |
960 | struct qht_map *new; | |
2e11264a EC |
961 | |
962 | new = qht_map_create(n_buckets); | |
2e11264a | 963 | qht_do_resize(ht, new); |
2e11264a EC |
964 | ret = true; |
965 | } | |
fe9959a2 | 966 | qht_unlock(ht); |
2e11264a EC |
967 | |
968 | return ret; | |
969 | } | |
970 | ||
971 | /* pass @stats to qht_statistics_destroy() when done */ | |
6579f107 | 972 | void qht_statistics_init(const struct qht *ht, struct qht_stats *stats) |
2e11264a | 973 | { |
6579f107 | 974 | const struct qht_map *map; |
2e11264a EC |
975 | int i; |
976 | ||
d73415a3 | 977 | map = qatomic_rcu_read(&ht->map); |
2e11264a | 978 | |
2e11264a EC |
979 | stats->used_head_buckets = 0; |
980 | stats->entries = 0; | |
981 | qdist_init(&stats->chain); | |
982 | qdist_init(&stats->occupancy); | |
7266ae91 EC |
983 | /* bail out if the qht has not yet been initialized */ |
984 | if (unlikely(map == NULL)) { | |
985 | stats->head_buckets = 0; | |
986 | return; | |
987 | } | |
988 | stats->head_buckets = map->n_buckets; | |
2e11264a EC |
989 | |
990 | for (i = 0; i < map->n_buckets; i++) { | |
6579f107 EC |
991 | const struct qht_bucket *head = &map->buckets[i]; |
992 | const struct qht_bucket *b; | |
2e11264a EC |
993 | unsigned int version; |
994 | size_t buckets; | |
995 | size_t entries; | |
996 | int j; | |
997 | ||
998 | do { | |
999 | version = seqlock_read_begin(&head->sequence); | |
1000 | buckets = 0; | |
1001 | entries = 0; | |
1002 | b = head; | |
1003 | do { | |
1004 | for (j = 0; j < QHT_BUCKET_ENTRIES; j++) { | |
d73415a3 | 1005 | if (qatomic_read(&b->pointers[j]) == NULL) { |
2e11264a EC |
1006 | break; |
1007 | } | |
1008 | entries++; | |
1009 | } | |
1010 | buckets++; | |
d73415a3 | 1011 | b = qatomic_rcu_read(&b->next); |
2e11264a EC |
1012 | } while (b); |
1013 | } while (seqlock_read_retry(&head->sequence, version)); | |
1014 | ||
1015 | if (entries) { | |
1016 | qdist_inc(&stats->chain, buckets); | |
1017 | qdist_inc(&stats->occupancy, | |
1018 | (double)entries / QHT_BUCKET_ENTRIES / buckets); | |
1019 | stats->used_head_buckets++; | |
1020 | stats->entries += entries; | |
1021 | } else { | |
1022 | qdist_inc(&stats->occupancy, 0); | |
1023 | } | |
1024 | } | |
1025 | } | |
1026 | ||
1027 | void qht_statistics_destroy(struct qht_stats *stats) | |
1028 | { | |
1029 | qdist_destroy(&stats->occupancy); | |
1030 | qdist_destroy(&stats->chain); | |
1031 | } |