]> git.proxmox.com Git - mirror_ovs.git/blame - lib/cmap.c
lib: Fix MPLS masking.
[mirror_ovs.git] / lib / cmap.c
CommitLineData
0e666160
BP
1/*
2 * Copyright (c) 2014 Nicira, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <config.h>
18#include "cmap.h"
19#include "hash.h"
20#include "ovs-rcu.h"
21#include "random.h"
22#include "util.h"
23
24/* Optimistic Concurrent Cuckoo Hash
25 * =================================
26 *
27 * A "cuckoo hash" is an open addressing hash table schema, designed such that
28 * a given element can be in one of only a small number of buckets 'd', each of
29 * which holds up to a small number 'k' elements. Thus, the expected and
30 * worst-case lookup times are O(1) because they require comparing no more than
31 * a fixed number of elements (k * d). Inserting a new element can require
32 * moving around existing elements, but it is also O(1) amortized expected
33 * time.
34 *
35 * An optimistic concurrent hash table goes one step further, making it
36 * possible for a single writer to execute concurrently with any number of
37 * readers without requiring the readers to take any locks.
38 *
39 * This cuckoo hash implementation uses:
40 *
41 * - Two hash functions (d=2). More hash functions allow for a higher load
42 * factor, but increasing 'k' is easier and the benefits of increasing 'd'
43 * quickly fall off with the 'k' values used here. Also, the method of
44 * generating hashes used in this implementation is hard to reasonably
45 * extend beyond d=2. Finally, each additional hash function means that a
46 * lookup has to look at least one extra cache line.
47 *
48 * - 5 or 7 elements per bucket (k=5 or k=7), chosen to make buckets
49 * exactly one cache line in size.
50 *
51 * According to Erlingsson [4], these parameters suggest a maximum load factor
52 * of about 93%. The current implementation is conservative, expanding the
53 * hash table when it is over 85% full.
54 *
55 *
56 * Hash Functions
57 * ==============
58 *
59 * A cuckoo hash requires multiple hash functions. When reorganizing the hash
60 * becomes too difficult, it also requires the ability to change the hash
61 * functions. Requiring the client to provide multiple hashes and to be able
62 * to change them to new hashes upon insertion is inconvenient.
63 *
64 * This implementation takes another approach. The client provides a single,
65 * fixed hash. The cuckoo hash internally "rehashes" this hash against a
66 * randomly selected basis value (see rehash()). This rehashed value is one of
67 * the two hashes. The other hash is computed by 16-bit circular rotation of
68 * the rehashed value. Updating the basis changes the hash functions.
69 *
70 * To work properly, the hash functions used by a cuckoo hash must be
71 * independent. If one hash function is a function of the other (e.g. h2(x) =
72 * h1(x) + 1, or h2(x) = hash(h1(x))), then insertion will eventually fail
73 * catastrophically (loop forever) because of collisions. With this rehashing
74 * technique, the two hashes are completely independent for masks up to 16 bits
75 * wide. For masks wider than 16 bits, only 32-n bits are independent between
76 * the two hashes. Thus, it becomes risky to grow a cuckoo hash table beyond
77 * about 2**24 buckets (about 71 million elements with k=5 and maximum load
78 * 85%). Fortunately, Open vSwitch does not normally deal with hash tables
79 * this large.
80 *
81 *
82 * Handling Duplicates
83 * ===================
84 *
85 * This cuckoo hash table implementation deals with duplicate client-provided
86 * hash values by chaining: the second and subsequent cmap_nodes with a given
87 * hash are chained off the initially inserted node's 'next' member. The hash
88 * table maintains the invariant that a single client-provided hash value
89 * exists in only a single chain in a single bucket (even though that hash
90 * could be stored in two buckets).
91 *
92 *
93 * References
94 * ==========
95 *
96 * [1] D. Zhou, B. Fan, H. Lim, M. Kaminsky, D. G. Andersen, "Scalable, High
97 * Performance Ethernet Forwarding with CuckooSwitch". In Proc. 9th
98 * CoNEXT, Dec. 2013.
99 *
100 * [2] B. Fan, D. G. Andersen, and M. Kaminsky. "MemC3: Compact and concurrent
101 * memcache with dumber caching and smarter hashing". In Proc. 10th USENIX
102 * NSDI, Apr. 2013
103 *
104 * [3] R. Pagh and F. Rodler. "Cuckoo hashing". Journal of Algorithms, 51(2):
105 * 122-144, May 2004.
106 *
107 * [4] U. Erlingsson, M. Manasse, F. McSherry, "A Cool and Practical
108 * Alternative to Traditional Hash Tables". In Proc. 7th Workshop on
109 * Distributed Data and Structures (WDAS'06), 2006.
110 */
111/* An entry is an int and a pointer: 8 bytes on 32-bit, 12 bytes on 64-bit. */
112#define CMAP_ENTRY_SIZE (4 + (UINTPTR_MAX == UINT32_MAX ? 4 : 8))
113
114/* Number of entries per bucket: 7 on 32-bit, 5 on 64-bit. */
115#define CMAP_K ((CACHE_LINE_SIZE - 4) / CMAP_ENTRY_SIZE)
116
117/* Pad to make a bucket a full cache line in size: 4 on 32-bit, 0 on 64-bit. */
118#define CMAP_PADDING ((CACHE_LINE_SIZE - 4) - (CMAP_K * CMAP_ENTRY_SIZE))
119
120/* A cuckoo hash bucket. Designed to be cache-aligned and exactly one cache
121 * line long. */
122struct cmap_bucket {
123 /* Allows readers to track in-progress changes. Initially zero, each
124 * writer increments this value just before and just after each change (see
125 * cmap_set_bucket()). Thus, a reader can ensure that it gets a consistent
126 * snapshot by waiting for the counter to become even (see
127 * read_even_counter()), then checking that its value does not change while
128 * examining the bucket (see cmap_find()). */
129 atomic_uint32_t counter;
130
131 /* (hash, node) slots. They are parallel arrays instead of an array of
132 * structs to reduce the amount of space lost to padding.
133 *
134 * The slots are in no particular order. A null pointer indicates that a
135 * pair is unused. In-use slots are not necessarily in the earliest
136 * slots. */
6ff0d5d6
JR
137 atomic_uint32_t hashes[CMAP_K];
138 struct cmap_node nodes[CMAP_K];
0e666160
BP
139
140 /* Padding to make cmap_bucket exactly one cache line long. */
141#if CMAP_PADDING > 0
142 uint8_t pad[CMAP_PADDING];
143#endif
144};
145BUILD_ASSERT_DECL(sizeof(struct cmap_bucket) == CACHE_LINE_SIZE);
146
147/* Default maximum load factor (as a fraction of UINT32_MAX + 1) before
148 * enlarging a cmap. Reasonable values lie between about 75% and 93%. Smaller
149 * values waste memory; larger values increase the average insertion time. */
150#define CMAP_MAX_LOAD ((uint32_t) (UINT32_MAX * .85))
151
152/* The implementation of a concurrent hash map. */
153struct cmap_impl {
154 unsigned int n; /* Number of in-use elements. */
155 unsigned int max_n; /* Max elements before enlarging. */
156 uint32_t mask; /* Number of 'buckets', minus one. */
157 uint32_t basis; /* Basis for rehashing client's hash values. */
158
159 /* Padding to make cmap_impl exactly one cache line long. */
160 uint8_t pad[CACHE_LINE_SIZE - sizeof(unsigned int) * 4];
161
162 struct cmap_bucket buckets[];
163};
164BUILD_ASSERT_DECL(sizeof(struct cmap_impl) == CACHE_LINE_SIZE);
165
6ff0d5d6
JR
166static uint32_t cmap_get_hash__(const atomic_uint32_t *hash,
167 memory_order order)
168{
169 uint32_t hash__;
170
171 atomic_read_explicit(CONST_CAST(ATOMIC(uint32_t) *, hash), &hash__, order);
172 return hash__;
173}
174
175#define cmap_get_hash(HASH) \
176 cmap_get_hash__(HASH, memory_order_acquire)
177#define cmap_get_hash_protected(HASH) \
178 cmap_get_hash__(HASH, memory_order_relaxed)
179
0e666160
BP
180static struct cmap_impl *cmap_rehash(struct cmap *, uint32_t mask);
181
182/* Given a rehashed value 'hash', returns the other hash for that rehashed
183 * value. This is symmetric: other_hash(other_hash(x)) == x. (See also "Hash
184 * Functions" at the top of this file.) */
185static uint32_t
186other_hash(uint32_t hash)
187{
188 return (hash << 16) | (hash >> 16);
189}
190
191/* Returns the rehashed value for 'hash' within 'impl'. (See also "Hash
192 * Functions" at the top of this file.) */
193static uint32_t
194rehash(const struct cmap_impl *impl, uint32_t hash)
195{
33c6a1b9 196 return hash_finish(impl->basis, hash);
0e666160
BP
197}
198
199static struct cmap_impl *
200cmap_get_impl(const struct cmap *cmap)
201{
202 return ovsrcu_get(struct cmap_impl *, &cmap->impl);
203}
204
205static uint32_t
206calc_max_n(uint32_t mask)
207{
208 return ((uint64_t) (mask + 1) * CMAP_K * CMAP_MAX_LOAD) >> 32;
209}
210
211static struct cmap_impl *
212cmap_impl_create(uint32_t mask)
213{
214 struct cmap_impl *impl;
215
216 ovs_assert(is_pow2(mask + 1));
217
218 impl = xzalloc_cacheline(sizeof *impl
219 + (mask + 1) * sizeof *impl->buckets);
220 impl->n = 0;
221 impl->max_n = calc_max_n(mask);
222 impl->mask = mask;
223 impl->basis = random_uint32();
224
225 return impl;
226}
227
228/* Initializes 'cmap' as an empty concurrent hash map. */
229void
230cmap_init(struct cmap *cmap)
231{
232 ovsrcu_set(&cmap->impl, cmap_impl_create(0));
233}
234
235/* Destroys 'cmap'.
236 *
237 * The client is responsible for destroying any data previously held in
238 * 'cmap'. */
239void
240cmap_destroy(struct cmap *cmap)
241{
242 if (cmap) {
7530fe0d 243 ovsrcu_postpone(free_cacheline, cmap_get_impl(cmap));
0e666160
BP
244 }
245}
246
247/* Returns the number of elements in 'cmap'. */
248size_t
249cmap_count(const struct cmap *cmap)
250{
251 return cmap_get_impl(cmap)->n;
252}
253
254/* Returns true if 'cmap' is empty, false otherwise. */
255bool
256cmap_is_empty(const struct cmap *cmap)
257{
258 return cmap_count(cmap) == 0;
259}
260
261static uint32_t
34a0d778 262read_counter(struct cmap_bucket *bucket)
0e666160
BP
263{
264 uint32_t counter;
265
34a0d778 266 atomic_read_explicit(&bucket->counter, &counter, memory_order_acquire);
0e666160
BP
267 return counter;
268}
269
270static uint32_t
271read_even_counter(struct cmap_bucket *bucket)
272{
273 uint32_t counter;
274
275 do {
34a0d778 276 counter = read_counter(bucket);
0e666160
BP
277 } while (OVS_UNLIKELY(counter & 1));
278
279 return counter;
280}
281
282static bool
283counter_changed(struct cmap_bucket *b, uint32_t c)
284{
34a0d778 285 return OVS_UNLIKELY(read_counter(b) != c);
0e666160
BP
286}
287
288/* Searches 'cmap' for an element with the specified 'hash'. If one or more is
289 * found, returns a pointer to the first one, otherwise a null pointer. All of
290 * the nodes on the returned list are guaranteed to have exactly the given
291 * 'hash'.
292 *
293 * This function works even if 'cmap' is changing concurrently. If 'cmap' is
294 * not changing, then cmap_find_protected() is slightly faster.
295 *
296 * CMAP_FOR_EACH_WITH_HASH is usually more convenient. */
297struct cmap_node *
298cmap_find(const struct cmap *cmap, uint32_t hash)
299{
300 struct cmap_impl *impl = cmap_get_impl(cmap);
301 uint32_t h1 = rehash(impl, hash);
302 uint32_t h2 = other_hash(h1);
303 struct cmap_bucket *b1;
304 struct cmap_bucket *b2;
305 uint32_t c1, c2;
306 int i;
307
308retry:
309 b1 = &impl->buckets[h1 & impl->mask];
310 c1 = read_even_counter(b1);
311 for (i = 0; i < CMAP_K; i++) {
6ff0d5d6
JR
312 struct cmap_node *node = cmap_node_next(&b1->nodes[i]);
313
314 if (node && cmap_get_hash(&b1->hashes[i]) == hash) {
0e666160
BP
315 if (counter_changed(b1, c1)) {
316 goto retry;
317 }
318 return node;
319 }
320 }
321
322 b2 = &impl->buckets[h2 & impl->mask];
323 c2 = read_even_counter(b2);
324 for (i = 0; i < CMAP_K; i++) {
6ff0d5d6
JR
325 struct cmap_node *node = cmap_node_next(&b2->nodes[i]);
326
327 if (node && cmap_get_hash(&b2->hashes[i]) == hash) {
0e666160
BP
328 if (counter_changed(b2, c2)) {
329 goto retry;
330 }
331 return node;
332 }
333 }
334
335 if (counter_changed(b1, c1) || counter_changed(b2, c2)) {
336 goto retry;
337 }
338 return NULL;
339}
340
341static int
6ff0d5d6 342cmap_find_slot_protected(struct cmap_bucket *b, uint32_t hash)
0e666160
BP
343{
344 int i;
345
346 for (i = 0; i < CMAP_K; i++) {
6ff0d5d6
JR
347 struct cmap_node *node = cmap_node_next_protected(&b->nodes[i]);
348
349 if (node && cmap_get_hash_protected(&b->hashes[i]) == hash) {
0e666160
BP
350 return i;
351 }
352 }
353 return -1;
354}
355
356static struct cmap_node *
357cmap_find_bucket_protected(struct cmap_impl *impl, uint32_t hash, uint32_t h)
358{
359 struct cmap_bucket *b = &impl->buckets[h & impl->mask];
360 int i;
361
362 for (i = 0; i < CMAP_K; i++) {
6ff0d5d6
JR
363 struct cmap_node *node = cmap_node_next_protected(&b->nodes[i]);
364
365 if (node && cmap_get_hash_protected(&b->hashes[i]) == hash) {
0e666160
BP
366 return node;
367 }
368 }
369 return NULL;
370}
371
372/* Like cmap_find(), but only for use if 'cmap' cannot change concurrently.
373 *
374 * CMAP_FOR_EACH_WITH_HASH_PROTECTED is usually more convenient. */
375struct cmap_node *
376cmap_find_protected(const struct cmap *cmap, uint32_t hash)
377{
378 struct cmap_impl *impl = cmap_get_impl(cmap);
379 uint32_t h1 = rehash(impl, hash);
380 uint32_t h2 = other_hash(hash);
381 struct cmap_node *node;
382
383 node = cmap_find_bucket_protected(impl, hash, h1);
384 if (node) {
385 return node;
386 }
387 return cmap_find_bucket_protected(impl, hash, h2);
388}
389
390static int
6ff0d5d6 391cmap_find_empty_slot_protected(const struct cmap_bucket *b)
0e666160
BP
392{
393 int i;
394
395 for (i = 0; i < CMAP_K; i++) {
6ff0d5d6 396 if (!cmap_node_next_protected(&b->nodes[i])) {
0e666160
BP
397 return i;
398 }
399 }
400 return -1;
401}
402
403static void
404cmap_set_bucket(struct cmap_bucket *b, int i,
405 struct cmap_node *node, uint32_t hash)
406{
407 uint32_t c;
408
409 atomic_read_explicit(&b->counter, &c, memory_order_acquire);
410 atomic_store_explicit(&b->counter, c + 1, memory_order_release);
6ff0d5d6
JR
411 ovsrcu_set(&b->nodes[i].next, node); /* Also atomic. */
412 atomic_store_explicit(&b->hashes[i], hash, memory_order_release);
0e666160
BP
413 atomic_store_explicit(&b->counter, c + 2, memory_order_release);
414}
415
416/* Searches 'b' for a node with the given 'hash'. If it finds one, adds
417 * 'new_node' to the node's linked list and returns true. If it does not find
418 * one, returns false. */
419static bool
420cmap_insert_dup(struct cmap_node *new_node, uint32_t hash,
421 struct cmap_bucket *b)
422{
423 int i;
424
425 for (i = 0; i < CMAP_K; i++) {
6ff0d5d6
JR
426 struct cmap_node *node = cmap_node_next_protected(&b->nodes[i]);
427
428 if (cmap_get_hash_protected(&b->hashes[i]) == hash) {
0e666160
BP
429 if (node) {
430 struct cmap_node *p;
431
6ff0d5d6
JR
432 /* The common case is that 'new_node' is a singleton,
433 * with a null 'next' pointer. Rehashing can add a
434 * longer chain, but due to our invariant of always
435 * having all nodes with the same (user) hash value at
436 * a single chain, rehashing will always insert the
437 * chain to an empty node. The only way we can end up
438 * here is by the user inserting a chain of nodes at
439 * once. Find the end of the chain starting at
440 * 'new_node', then splice 'node' to the end of that
441 * chain. */
0e666160
BP
442 p = new_node;
443 for (;;) {
444 struct cmap_node *next = cmap_node_next_protected(p);
6ff0d5d6 445
0e666160
BP
446 if (!next) {
447 break;
448 }
449 p = next;
450 }
7e5f06c3 451 ovsrcu_set_hidden(&p->next, node);
0e666160
BP
452 } else {
453 /* The hash value is there from some previous insertion, but
454 * the associated node has been removed. We're not really
455 * inserting a duplicate, but we can still reuse the slot.
456 * Carry on. */
457 }
458
459 /* Change the bucket to point to 'new_node'. This is a degenerate
460 * form of cmap_set_bucket() that doesn't update the counter since
461 * we're only touching one field and in a way that doesn't change
462 * the bucket's meaning for readers. */
6ff0d5d6 463 ovsrcu_set(&b->nodes[i].next, new_node);
0e666160
BP
464
465 return true;
466 }
467 }
468 return false;
469}
470
471/* Searches 'b' for an empty slot. If successful, stores 'node' and 'hash' in
472 * the slot and returns true. Otherwise, returns false. */
473static bool
474cmap_insert_bucket(struct cmap_node *node, uint32_t hash,
475 struct cmap_bucket *b)
476{
477 int i;
478
479 for (i = 0; i < CMAP_K; i++) {
6ff0d5d6 480 if (!cmap_node_next_protected(&b->nodes[i])) {
0e666160
BP
481 cmap_set_bucket(b, i, node, hash);
482 return true;
483 }
484 }
485 return false;
486}
487
488/* Returns the other bucket that b->nodes[slot] could occupy in 'impl'. (This
489 * might be the same as 'b'.) */
490static struct cmap_bucket *
6ff0d5d6 491other_bucket_protected(struct cmap_impl *impl, struct cmap_bucket *b, int slot)
0e666160 492{
6ff0d5d6 493 uint32_t h1 = rehash(impl, cmap_get_hash_protected(&b->hashes[slot]));
0e666160
BP
494 uint32_t h2 = other_hash(h1);
495 uint32_t b_idx = b - impl->buckets;
496 uint32_t other_h = (h1 & impl->mask) == b_idx ? h2 : h1;
497
498 return &impl->buckets[other_h & impl->mask];
499}
500
501/* 'new_node' is to be inserted into 'impl', but both candidate buckets 'b1'
502 * and 'b2' are full. This function attempts to rearrange buckets within
503 * 'impl' to make room for 'new_node'.
504 *
505 * The implementation is a general-purpose breadth-first search. At first
506 * glance, this is more complex than a random walk through 'impl' (suggested by
507 * some references), but random walks have a tendency to loop back through a
508 * single bucket. We have to move nodes backward along the path that we find,
509 * so that no node actually disappears from the hash table, which means a
510 * random walk would have to be careful to deal with loops. By contrast, a
511 * successful breadth-first search always finds a *shortest* path through the
512 * hash table, and a shortest path will never contain loops, so it avoids that
513 * problem entirely.
514 */
515static bool
516cmap_insert_bfs(struct cmap_impl *impl, struct cmap_node *new_node,
517 uint32_t hash, struct cmap_bucket *b1, struct cmap_bucket *b2)
518{
52a6bdaf 519 enum { MAX_DEPTH = 4 };
0e666160
BP
520
521 /* A path from 'start' to 'end' via the 'n' steps in 'slots[]'.
522 *
523 * One can follow the path via:
524 *
525 * struct cmap_bucket *b;
526 * int i;
527 *
528 * b = path->start;
529 * for (i = 0; i < path->n; i++) {
6ff0d5d6 530 * b = other_bucket_protected(impl, b, path->slots[i]);
0e666160
BP
531 * }
532 * ovs_assert(b == path->end);
533 */
534 struct cmap_path {
535 struct cmap_bucket *start; /* First bucket along the path. */
536 struct cmap_bucket *end; /* Last bucket on the path. */
52a6bdaf 537 uint8_t slots[MAX_DEPTH]; /* Slots used for each hop. */
0e666160
BP
538 int n; /* Number of slots[]. */
539 };
540
541 /* We need to limit the amount of work we do trying to find a path. It
542 * might actually be impossible to rearrange the cmap, and after some time
543 * it is likely to be easier to rehash the entire cmap.
544 *
545 * This value of MAX_QUEUE is an arbitrary limit suggested by one of the
546 * references. Empirically, it seems to work OK. */
547 enum { MAX_QUEUE = 500 };
548 struct cmap_path queue[MAX_QUEUE];
549 int head = 0;
550 int tail = 0;
551
552 /* Add 'b1' and 'b2' as starting points for the search. */
553 queue[head].start = b1;
554 queue[head].end = b1;
555 queue[head].n = 0;
556 head++;
557 if (b1 != b2) {
558 queue[head].start = b2;
559 queue[head].end = b2;
560 queue[head].n = 0;
561 head++;
562 }
563
564 while (tail < head) {
565 const struct cmap_path *path = &queue[tail++];
566 struct cmap_bucket *this = path->end;
567 int i;
568
569 for (i = 0; i < CMAP_K; i++) {
6ff0d5d6 570 struct cmap_bucket *next = other_bucket_protected(impl, this, i);
0e666160
BP
571 int j;
572
573 if (this == next) {
574 continue;
575 }
576
6ff0d5d6 577 j = cmap_find_empty_slot_protected(next);
0e666160
BP
578 if (j >= 0) {
579 /* We've found a path along which we can rearrange the hash
580 * table: Start at path->start, follow all the slots in
581 * path->slots[], then follow slot 'i', then the bucket you
582 * arrive at has slot 'j' empty. */
52a6bdaf
GS
583 struct cmap_bucket *buckets[MAX_DEPTH + 2];
584 int slots[MAX_DEPTH + 2];
0e666160
BP
585 int k;
586
587 /* Figure out the full sequence of slots. */
588 for (k = 0; k < path->n; k++) {
589 slots[k] = path->slots[k];
590 }
591 slots[path->n] = i;
592 slots[path->n + 1] = j;
593
594 /* Figure out the full sequence of buckets. */
595 buckets[0] = path->start;
596 for (k = 0; k <= path->n; k++) {
6ff0d5d6 597 buckets[k + 1] = other_bucket_protected(impl, buckets[k], slots[k]);
0e666160
BP
598 }
599
600 /* Now the path is fully expressed. One can start from
601 * buckets[0], go via slots[0] to buckets[1], via slots[1] to
602 * buckets[2], and so on.
603 *
604 * Move all the nodes across the path "backward". After each
605 * step some node appears in two buckets. Thus, every node is
606 * always visible to a concurrent search. */
607 for (k = path->n + 1; k > 0; k--) {
608 int slot = slots[k - 1];
609
610 cmap_set_bucket(buckets[k], slots[k],
6ff0d5d6
JR
611 cmap_node_next_protected(&buckets[k - 1]->nodes[slot]),
612 cmap_get_hash_protected(&buckets[k - 1]->hashes[slot]));
0e666160
BP
613 }
614
615 /* Finally, replace the first node on the path by
616 * 'new_node'. */
617 cmap_set_bucket(buckets[0], slots[0], new_node, hash);
618
619 return true;
620 }
621
52a6bdaf 622 if (path->n < MAX_DEPTH && head < MAX_QUEUE) {
0e666160
BP
623 struct cmap_path *new_path = &queue[head++];
624
625 *new_path = *path;
626 new_path->end = next;
627 new_path->slots[new_path->n++] = i;
628 }
629 }
630 }
631
632 return false;
633}
634
635/* Adds 'node', with the given 'hash', to 'impl'.
636 *
637 * 'node' is ordinarily a single node, with a null 'next' pointer. When
638 * rehashing, however, it may be a longer chain of nodes. */
639static bool
640cmap_try_insert(struct cmap_impl *impl, struct cmap_node *node, uint32_t hash)
641{
642 uint32_t h1 = rehash(impl, hash);
643 uint32_t h2 = other_hash(h1);
644 struct cmap_bucket *b1 = &impl->buckets[h1 & impl->mask];
645 struct cmap_bucket *b2 = &impl->buckets[h2 & impl->mask];
646
647 return (OVS_UNLIKELY(cmap_insert_dup(node, hash, b1) ||
648 cmap_insert_dup(node, hash, b2)) ||
649 OVS_LIKELY(cmap_insert_bucket(node, hash, b1) ||
650 cmap_insert_bucket(node, hash, b2)) ||
651 cmap_insert_bfs(impl, node, hash, b1, b2));
652}
653
654/* Inserts 'node', with the given 'hash', into 'cmap'. The caller must ensure
655 * that 'cmap' cannot change concurrently (from another thread). If duplicates
656 * are undesirable, the caller must have already verified that 'cmap' does not
657 * contain a duplicate of 'node'. */
658void
659cmap_insert(struct cmap *cmap, struct cmap_node *node, uint32_t hash)
660{
661 struct cmap_impl *impl = cmap_get_impl(cmap);
662
7e5f06c3 663 ovsrcu_set_hidden(&node->next, NULL);
0e666160
BP
664
665 if (OVS_UNLIKELY(impl->n >= impl->max_n)) {
666 impl = cmap_rehash(cmap, (impl->mask << 1) | 1);
667 }
668
669 while (OVS_UNLIKELY(!cmap_try_insert(impl, node, hash))) {
670 impl = cmap_rehash(cmap, impl->mask);
671 }
672 impl->n++;
673}
674
675static bool
9d933fc7
JR
676cmap_replace__(struct cmap_impl *impl, struct cmap_node *node,
677 struct cmap_node *replacement, uint32_t hash, uint32_t h)
0e666160
BP
678{
679 struct cmap_bucket *b = &impl->buckets[h & impl->mask];
680 int slot;
681
6ff0d5d6 682 slot = cmap_find_slot_protected(b, hash);
0e666160
BP
683 if (slot < 0) {
684 return false;
685 }
686
9d933fc7
JR
687 /* The pointer to 'node' is changed to point to 'replacement',
688 * which is the next node if no replacement node is given. */
689 if (!replacement) {
690 replacement = cmap_node_next_protected(node);
691 } else {
692 /* 'replacement' takes the position of 'node' in the list. */
7e5f06c3 693 ovsrcu_set_hidden(&replacement->next, cmap_node_next_protected(node));
9d933fc7
JR
694 }
695
6ff0d5d6
JR
696 struct cmap_node *iter = &b->nodes[slot];
697 for (;;) {
698 struct cmap_node *next = cmap_node_next_protected(iter);
699
700 if (next == node) {
701 ovsrcu_set(&iter->next, replacement);
702 return true;
0e666160 703 }
6ff0d5d6 704 iter = next;
0e666160 705 }
0e666160
BP
706}
707
708/* Removes 'node' from 'cmap'. The caller must ensure that 'cmap' cannot
709 * change concurrently (from another thread).
710 *
711 * 'node' must not be destroyed or modified or inserted back into 'cmap' or
712 * into any other concurrent hash map while any other thread might be accessing
713 * it. One correct way to do this is to free it from an RCU callback with
714 * ovsrcu_postpone(). */
715void
716cmap_remove(struct cmap *cmap, struct cmap_node *node, uint32_t hash)
9d933fc7
JR
717{
718 cmap_replace(cmap, node, NULL, hash);
719 cmap_get_impl(cmap)->n--;
720}
721
722/* Replaces 'old_node' in 'cmap' with 'new_node'. The caller must
723 * ensure that 'cmap' cannot change concurrently (from another thread).
724 *
725 * 'old_node' must not be destroyed or modified or inserted back into 'cmap' or
726 * into any other concurrent hash map while any other thread might be accessing
727 * it. One correct way to do this is to free it from an RCU callback with
728 * ovsrcu_postpone(). */
729void
730cmap_replace(struct cmap *cmap, struct cmap_node *old_node,
731 struct cmap_node *new_node, uint32_t hash)
0e666160
BP
732{
733 struct cmap_impl *impl = cmap_get_impl(cmap);
734 uint32_t h1 = rehash(impl, hash);
735 uint32_t h2 = other_hash(h1);
736 bool ok;
737
9d933fc7
JR
738 ok = cmap_replace__(impl, old_node, new_node, hash, h1)
739 || cmap_replace__(impl, old_node, new_node, hash, h2);
0e666160 740 ovs_assert(ok);
0e666160
BP
741}
742
743static bool
744cmap_try_rehash(const struct cmap_impl *old, struct cmap_impl *new)
745{
746 const struct cmap_bucket *b;
747
748 for (b = old->buckets; b <= &old->buckets[old->mask]; b++) {
749 int i;
750
751 for (i = 0; i < CMAP_K; i++) {
752 /* possible optimization here because we know the hashes are
753 * unique */
6ff0d5d6 754 struct cmap_node *node = cmap_node_next_protected(&b->nodes[i]);
0e666160 755
6ff0d5d6
JR
756 if (node &&
757 !cmap_try_insert(new, node,
758 cmap_get_hash_protected(&b->hashes[i]))) {
0e666160
BP
759 return false;
760 }
761 }
762 }
763 return true;
764}
765
766static struct cmap_impl *
767cmap_rehash(struct cmap *cmap, uint32_t mask)
768{
769 struct cmap_impl *old = cmap_get_impl(cmap);
770 struct cmap_impl *new;
771
772 new = cmap_impl_create(mask);
773 ovs_assert(old->n < new->max_n);
774
775 while (!cmap_try_rehash(old, new)) {
776 memset(new->buckets, 0, (mask + 1) * sizeof *new->buckets);
777 new->basis = random_uint32();
778 }
779
780 new->n = old->n;
781 ovsrcu_set(&cmap->impl, new);
782 ovsrcu_postpone(free_cacheline, old);
783
784 return new;
785}
786
78c8df12
BP
787struct cmap_cursor
788cmap_cursor_start(const struct cmap *cmap)
0e666160 789{
78c8df12
BP
790 struct cmap_cursor cursor;
791
792 cursor.impl = cmap_get_impl(cmap);
793 cursor.bucket_idx = 0;
794 cursor.entry_idx = 0;
795 cursor.node = NULL;
796 cmap_cursor_advance(&cursor);
797
798 return cursor;
0e666160
BP
799}
800
78c8df12
BP
801void
802cmap_cursor_advance(struct cmap_cursor *cursor)
0e666160
BP
803{
804 const struct cmap_impl *impl = cursor->impl;
805
78c8df12
BP
806 if (cursor->node) {
807 cursor->node = cmap_node_next(cursor->node);
808 if (cursor->node) {
809 return;
0e666160
BP
810 }
811 }
812
813 while (cursor->bucket_idx <= impl->mask) {
814 const struct cmap_bucket *b = &impl->buckets[cursor->bucket_idx];
815
816 while (cursor->entry_idx < CMAP_K) {
78c8df12
BP
817 cursor->node = cmap_node_next(&b->nodes[cursor->entry_idx++]);
818 if (cursor->node) {
819 return;
0e666160
BP
820 }
821 }
822
823 cursor->bucket_idx++;
824 cursor->entry_idx = 0;
825 }
0e666160
BP
826}
827
828/* Returns the next node in 'cmap' in hash order, or NULL if no nodes remain in
829 * 'cmap'. Uses '*pos' to determine where to begin iteration, and updates
830 * '*pos' to pass on the next iteration into them before returning.
831 *
832 * It's better to use plain CMAP_FOR_EACH and related functions, since they are
833 * faster and better at dealing with cmaps that change during iteration.
834 *
835 * Before beginning iteration, set '*pos' to all zeros. */
836struct cmap_node *
837cmap_next_position(const struct cmap *cmap,
838 struct cmap_position *pos)
839{
840 struct cmap_impl *impl = cmap_get_impl(cmap);
841 unsigned int bucket = pos->bucket;
842 unsigned int entry = pos->entry;
843 unsigned int offset = pos->offset;
844
845 while (bucket <= impl->mask) {
846 const struct cmap_bucket *b = &impl->buckets[bucket];
847
848 while (entry < CMAP_K) {
6ff0d5d6 849 const struct cmap_node *node = cmap_node_next(&b->nodes[entry]);
0e666160
BP
850 unsigned int i;
851
cd50723c 852 for (i = 0; node; i++, node = cmap_node_next(node)) {
0e666160
BP
853 if (i == offset) {
854 if (cmap_node_next(node)) {
855 offset++;
856 } else {
857 entry++;
858 offset = 0;
859 }
860 pos->bucket = bucket;
861 pos->entry = entry;
862 pos->offset = offset;
863 return CONST_CAST(struct cmap_node *, node);
864 }
865 }
866
867 entry++;
868 offset = 0;
869 }
870
871 bucket++;
872 entry = offset = 0;
873 }
874
875 pos->bucket = pos->entry = pos->offset = 0;
876 return NULL;
877}