1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
5 * Development of this code funded by Astaro AG (http://www.astaro.com/)
8 #include <linux/kernel.h>
9 #include <linux/init.h>
10 #include <linux/module.h>
11 #include <linux/list.h>
12 #include <linux/log2.h>
13 #include <linux/jhash.h>
14 #include <linux/netlink.h>
15 #include <linux/workqueue.h>
16 #include <linux/rhashtable.h>
17 #include <linux/netfilter.h>
18 #include <linux/netfilter/nf_tables.h>
19 #include <net/netfilter/nf_tables.h>
21 /* We target a hash table size of 4, element hint is 75% of final size */
22 #define NFT_RHASH_ELEMENT_HINT 3
26 struct delayed_work gc_work
;
29 struct nft_rhash_elem
{
30 struct rhash_head node
;
31 struct nft_set_ext ext
;
34 struct nft_rhash_cmp_arg
{
35 const struct nft_set
*set
;
40 static inline u32
nft_rhash_key(const void *data
, u32 len
, u32 seed
)
42 const struct nft_rhash_cmp_arg
*arg
= data
;
44 return jhash(arg
->key
, len
, seed
);
47 static inline u32
nft_rhash_obj(const void *data
, u32 len
, u32 seed
)
49 const struct nft_rhash_elem
*he
= data
;
51 return jhash(nft_set_ext_key(&he
->ext
), len
, seed
);
54 static inline int nft_rhash_cmp(struct rhashtable_compare_arg
*arg
,
57 const struct nft_rhash_cmp_arg
*x
= arg
->key
;
58 const struct nft_rhash_elem
*he
= ptr
;
60 if (memcmp(nft_set_ext_key(&he
->ext
), x
->key
, x
->set
->klen
))
62 if (nft_set_elem_expired(&he
->ext
))
64 if (!nft_set_elem_active(&he
->ext
, x
->genmask
))
69 static const struct rhashtable_params nft_rhash_params
= {
70 .head_offset
= offsetof(struct nft_rhash_elem
, node
),
71 .hashfn
= nft_rhash_key
,
72 .obj_hashfn
= nft_rhash_obj
,
73 .obj_cmpfn
= nft_rhash_cmp
,
74 .automatic_shrinking
= true,
77 static bool nft_rhash_lookup(const struct net
*net
, const struct nft_set
*set
,
78 const u32
*key
, const struct nft_set_ext
**ext
)
80 struct nft_rhash
*priv
= nft_set_priv(set
);
81 const struct nft_rhash_elem
*he
;
82 struct nft_rhash_cmp_arg arg
= {
83 .genmask
= nft_genmask_cur(net
),
88 he
= rhashtable_lookup(&priv
->ht
, &arg
, nft_rhash_params
);
95 static void *nft_rhash_get(const struct net
*net
, const struct nft_set
*set
,
96 const struct nft_set_elem
*elem
, unsigned int flags
)
98 struct nft_rhash
*priv
= nft_set_priv(set
);
99 struct nft_rhash_elem
*he
;
100 struct nft_rhash_cmp_arg arg
= {
101 .genmask
= nft_genmask_cur(net
),
103 .key
= elem
->key
.val
.data
,
106 he
= rhashtable_lookup(&priv
->ht
, &arg
, nft_rhash_params
);
110 return ERR_PTR(-ENOENT
);
113 static bool nft_rhash_update(struct nft_set
*set
, const u32
*key
,
114 void *(*new)(struct nft_set
*,
115 const struct nft_expr
*,
116 struct nft_regs
*regs
),
117 const struct nft_expr
*expr
,
118 struct nft_regs
*regs
,
119 const struct nft_set_ext
**ext
)
121 struct nft_rhash
*priv
= nft_set_priv(set
);
122 struct nft_rhash_elem
*he
, *prev
;
123 struct nft_rhash_cmp_arg arg
= {
124 .genmask
= NFT_GENMASK_ANY
,
129 he
= rhashtable_lookup(&priv
->ht
, &arg
, nft_rhash_params
);
133 he
= new(set
, expr
, regs
);
137 prev
= rhashtable_lookup_get_insert_key(&priv
->ht
, &arg
, &he
->node
,
142 /* Another cpu may race to insert the element with the same key */
144 nft_set_elem_destroy(set
, he
, true);
153 nft_set_elem_destroy(set
, he
, true);
158 static int nft_rhash_insert(const struct net
*net
, const struct nft_set
*set
,
159 const struct nft_set_elem
*elem
,
160 struct nft_set_ext
**ext
)
162 struct nft_rhash
*priv
= nft_set_priv(set
);
163 struct nft_rhash_elem
*he
= elem
->priv
;
164 struct nft_rhash_cmp_arg arg
= {
165 .genmask
= nft_genmask_next(net
),
167 .key
= elem
->key
.val
.data
,
169 struct nft_rhash_elem
*prev
;
171 prev
= rhashtable_lookup_get_insert_key(&priv
->ht
, &arg
, &he
->node
,
174 return PTR_ERR(prev
);
182 static void nft_rhash_activate(const struct net
*net
, const struct nft_set
*set
,
183 const struct nft_set_elem
*elem
)
185 struct nft_rhash_elem
*he
= elem
->priv
;
187 nft_set_elem_change_active(net
, set
, &he
->ext
);
188 nft_set_elem_clear_busy(&he
->ext
);
191 static bool nft_rhash_flush(const struct net
*net
,
192 const struct nft_set
*set
, void *priv
)
194 struct nft_rhash_elem
*he
= priv
;
196 if (!nft_set_elem_mark_busy(&he
->ext
) ||
197 !nft_is_active(net
, &he
->ext
)) {
198 nft_set_elem_change_active(net
, set
, &he
->ext
);
204 static void *nft_rhash_deactivate(const struct net
*net
,
205 const struct nft_set
*set
,
206 const struct nft_set_elem
*elem
)
208 struct nft_rhash
*priv
= nft_set_priv(set
);
209 struct nft_rhash_elem
*he
;
210 struct nft_rhash_cmp_arg arg
= {
211 .genmask
= nft_genmask_next(net
),
213 .key
= elem
->key
.val
.data
,
217 he
= rhashtable_lookup(&priv
->ht
, &arg
, nft_rhash_params
);
219 !nft_rhash_flush(net
, set
, he
))
227 static void nft_rhash_remove(const struct net
*net
,
228 const struct nft_set
*set
,
229 const struct nft_set_elem
*elem
)
231 struct nft_rhash
*priv
= nft_set_priv(set
);
232 struct nft_rhash_elem
*he
= elem
->priv
;
234 rhashtable_remove_fast(&priv
->ht
, &he
->node
, nft_rhash_params
);
237 static void nft_rhash_walk(const struct nft_ctx
*ctx
, struct nft_set
*set
,
238 struct nft_set_iter
*iter
)
240 struct nft_rhash
*priv
= nft_set_priv(set
);
241 struct nft_rhash_elem
*he
;
242 struct rhashtable_iter hti
;
243 struct nft_set_elem elem
;
245 rhashtable_walk_enter(&priv
->ht
, &hti
);
246 rhashtable_walk_start(&hti
);
248 while ((he
= rhashtable_walk_next(&hti
))) {
250 if (PTR_ERR(he
) != -EAGAIN
) {
251 iter
->err
= PTR_ERR(he
);
258 if (iter
->count
< iter
->skip
)
260 if (nft_set_elem_expired(&he
->ext
))
262 if (!nft_set_elem_active(&he
->ext
, iter
->genmask
))
267 iter
->err
= iter
->fn(ctx
, set
, iter
, &elem
);
274 rhashtable_walk_stop(&hti
);
275 rhashtable_walk_exit(&hti
);
278 static void nft_rhash_gc(struct work_struct
*work
)
281 struct nft_rhash_elem
*he
;
282 struct nft_rhash
*priv
;
283 struct nft_set_gc_batch
*gcb
= NULL
;
284 struct rhashtable_iter hti
;
286 priv
= container_of(work
, struct nft_rhash
, gc_work
.work
);
287 set
= nft_set_container_of(priv
);
289 rhashtable_walk_enter(&priv
->ht
, &hti
);
290 rhashtable_walk_start(&hti
);
292 while ((he
= rhashtable_walk_next(&hti
))) {
294 if (PTR_ERR(he
) != -EAGAIN
)
299 if (nft_set_ext_exists(&he
->ext
, NFT_SET_EXT_EXPR
)) {
300 struct nft_expr
*expr
= nft_set_ext_expr(&he
->ext
);
303 expr
->ops
->gc(read_pnet(&set
->net
), expr
))
306 if (!nft_set_elem_expired(&he
->ext
))
309 if (nft_set_elem_mark_busy(&he
->ext
))
312 gcb
= nft_set_gc_batch_check(set
, gcb
, GFP_ATOMIC
);
315 rhashtable_remove_fast(&priv
->ht
, &he
->node
, nft_rhash_params
);
316 atomic_dec(&set
->nelems
);
317 nft_set_gc_batch_add(gcb
, he
);
319 rhashtable_walk_stop(&hti
);
320 rhashtable_walk_exit(&hti
);
322 nft_set_gc_batch_complete(gcb
);
323 queue_delayed_work(system_power_efficient_wq
, &priv
->gc_work
,
324 nft_set_gc_interval(set
));
327 static u64
nft_rhash_privsize(const struct nlattr
* const nla
[],
328 const struct nft_set_desc
*desc
)
330 return sizeof(struct nft_rhash
);
333 static void nft_rhash_gc_init(const struct nft_set
*set
)
335 struct nft_rhash
*priv
= nft_set_priv(set
);
337 queue_delayed_work(system_power_efficient_wq
, &priv
->gc_work
,
338 nft_set_gc_interval(set
));
341 static int nft_rhash_init(const struct nft_set
*set
,
342 const struct nft_set_desc
*desc
,
343 const struct nlattr
* const tb
[])
345 struct nft_rhash
*priv
= nft_set_priv(set
);
346 struct rhashtable_params params
= nft_rhash_params
;
349 params
.nelem_hint
= desc
->size
?: NFT_RHASH_ELEMENT_HINT
;
350 params
.key_len
= set
->klen
;
352 err
= rhashtable_init(&priv
->ht
, ¶ms
);
356 INIT_DEFERRABLE_WORK(&priv
->gc_work
, nft_rhash_gc
);
357 if (set
->flags
& NFT_SET_TIMEOUT
)
358 nft_rhash_gc_init(set
);
363 static void nft_rhash_elem_destroy(void *ptr
, void *arg
)
365 nft_set_elem_destroy(arg
, ptr
, true);
368 static void nft_rhash_destroy(const struct nft_set
*set
)
370 struct nft_rhash
*priv
= nft_set_priv(set
);
372 cancel_delayed_work_sync(&priv
->gc_work
);
374 rhashtable_free_and_destroy(&priv
->ht
, nft_rhash_elem_destroy
,
378 static u32
nft_hash_buckets(u32 size
)
380 return roundup_pow_of_two(size
* 4 / 3);
383 static bool nft_rhash_estimate(const struct nft_set_desc
*desc
, u32 features
,
384 struct nft_set_estimate
*est
)
387 est
->lookup
= NFT_SET_CLASS_O_1
;
388 est
->space
= NFT_SET_CLASS_O_N
;
396 struct hlist_head table
[];
399 struct nft_hash_elem
{
400 struct hlist_node node
;
401 struct nft_set_ext ext
;
404 static bool nft_hash_lookup(const struct net
*net
, const struct nft_set
*set
,
405 const u32
*key
, const struct nft_set_ext
**ext
)
407 struct nft_hash
*priv
= nft_set_priv(set
);
408 u8 genmask
= nft_genmask_cur(net
);
409 const struct nft_hash_elem
*he
;
412 hash
= jhash(key
, set
->klen
, priv
->seed
);
413 hash
= reciprocal_scale(hash
, priv
->buckets
);
414 hlist_for_each_entry_rcu(he
, &priv
->table
[hash
], node
) {
415 if (!memcmp(nft_set_ext_key(&he
->ext
), key
, set
->klen
) &&
416 nft_set_elem_active(&he
->ext
, genmask
)) {
424 static void *nft_hash_get(const struct net
*net
, const struct nft_set
*set
,
425 const struct nft_set_elem
*elem
, unsigned int flags
)
427 struct nft_hash
*priv
= nft_set_priv(set
);
428 u8 genmask
= nft_genmask_cur(net
);
429 struct nft_hash_elem
*he
;
432 hash
= jhash(elem
->key
.val
.data
, set
->klen
, priv
->seed
);
433 hash
= reciprocal_scale(hash
, priv
->buckets
);
434 hlist_for_each_entry_rcu(he
, &priv
->table
[hash
], node
) {
435 if (!memcmp(nft_set_ext_key(&he
->ext
), elem
->key
.val
.data
, set
->klen
) &&
436 nft_set_elem_active(&he
->ext
, genmask
))
439 return ERR_PTR(-ENOENT
);
442 static bool nft_hash_lookup_fast(const struct net
*net
,
443 const struct nft_set
*set
,
444 const u32
*key
, const struct nft_set_ext
**ext
)
446 struct nft_hash
*priv
= nft_set_priv(set
);
447 u8 genmask
= nft_genmask_cur(net
);
448 const struct nft_hash_elem
*he
;
452 hash
= jhash_1word(k1
, priv
->seed
);
453 hash
= reciprocal_scale(hash
, priv
->buckets
);
454 hlist_for_each_entry_rcu(he
, &priv
->table
[hash
], node
) {
455 k2
= *(u32
*)nft_set_ext_key(&he
->ext
)->data
;
457 nft_set_elem_active(&he
->ext
, genmask
)) {
465 static u32
nft_jhash(const struct nft_set
*set
, const struct nft_hash
*priv
,
466 const struct nft_set_ext
*ext
)
468 const struct nft_data
*key
= nft_set_ext_key(ext
);
471 if (set
->klen
== 4) {
473 hash
= jhash_1word(k1
, priv
->seed
);
475 hash
= jhash(key
, set
->klen
, priv
->seed
);
477 hash
= reciprocal_scale(hash
, priv
->buckets
);
482 static int nft_hash_insert(const struct net
*net
, const struct nft_set
*set
,
483 const struct nft_set_elem
*elem
,
484 struct nft_set_ext
**ext
)
486 struct nft_hash_elem
*this = elem
->priv
, *he
;
487 struct nft_hash
*priv
= nft_set_priv(set
);
488 u8 genmask
= nft_genmask_next(net
);
491 hash
= nft_jhash(set
, priv
, &this->ext
);
492 hlist_for_each_entry(he
, &priv
->table
[hash
], node
) {
493 if (!memcmp(nft_set_ext_key(&this->ext
),
494 nft_set_ext_key(&he
->ext
), set
->klen
) &&
495 nft_set_elem_active(&he
->ext
, genmask
)) {
500 hlist_add_head_rcu(&this->node
, &priv
->table
[hash
]);
504 static void nft_hash_activate(const struct net
*net
, const struct nft_set
*set
,
505 const struct nft_set_elem
*elem
)
507 struct nft_hash_elem
*he
= elem
->priv
;
509 nft_set_elem_change_active(net
, set
, &he
->ext
);
512 static bool nft_hash_flush(const struct net
*net
,
513 const struct nft_set
*set
, void *priv
)
515 struct nft_hash_elem
*he
= priv
;
517 nft_set_elem_change_active(net
, set
, &he
->ext
);
521 static void *nft_hash_deactivate(const struct net
*net
,
522 const struct nft_set
*set
,
523 const struct nft_set_elem
*elem
)
525 struct nft_hash
*priv
= nft_set_priv(set
);
526 struct nft_hash_elem
*this = elem
->priv
, *he
;
527 u8 genmask
= nft_genmask_next(net
);
530 hash
= nft_jhash(set
, priv
, &this->ext
);
531 hlist_for_each_entry(he
, &priv
->table
[hash
], node
) {
532 if (!memcmp(nft_set_ext_key(&he
->ext
), &elem
->key
.val
,
534 nft_set_elem_active(&he
->ext
, genmask
)) {
535 nft_set_elem_change_active(net
, set
, &he
->ext
);
542 static void nft_hash_remove(const struct net
*net
,
543 const struct nft_set
*set
,
544 const struct nft_set_elem
*elem
)
546 struct nft_hash_elem
*he
= elem
->priv
;
548 hlist_del_rcu(&he
->node
);
551 static void nft_hash_walk(const struct nft_ctx
*ctx
, struct nft_set
*set
,
552 struct nft_set_iter
*iter
)
554 struct nft_hash
*priv
= nft_set_priv(set
);
555 struct nft_hash_elem
*he
;
556 struct nft_set_elem elem
;
559 for (i
= 0; i
< priv
->buckets
; i
++) {
560 hlist_for_each_entry_rcu(he
, &priv
->table
[i
], node
) {
561 if (iter
->count
< iter
->skip
)
563 if (!nft_set_elem_active(&he
->ext
, iter
->genmask
))
568 iter
->err
= iter
->fn(ctx
, set
, iter
, &elem
);
577 static u64
nft_hash_privsize(const struct nlattr
* const nla
[],
578 const struct nft_set_desc
*desc
)
580 return sizeof(struct nft_hash
) +
581 nft_hash_buckets(desc
->size
) * sizeof(struct hlist_head
);
584 static int nft_hash_init(const struct nft_set
*set
,
585 const struct nft_set_desc
*desc
,
586 const struct nlattr
* const tb
[])
588 struct nft_hash
*priv
= nft_set_priv(set
);
590 priv
->buckets
= nft_hash_buckets(desc
->size
);
591 get_random_bytes(&priv
->seed
, sizeof(priv
->seed
));
596 static void nft_hash_destroy(const struct nft_set
*set
)
598 struct nft_hash
*priv
= nft_set_priv(set
);
599 struct nft_hash_elem
*he
;
600 struct hlist_node
*next
;
603 for (i
= 0; i
< priv
->buckets
; i
++) {
604 hlist_for_each_entry_safe(he
, next
, &priv
->table
[i
], node
) {
605 hlist_del_rcu(&he
->node
);
606 nft_set_elem_destroy(set
, he
, true);
611 static bool nft_hash_estimate(const struct nft_set_desc
*desc
, u32 features
,
612 struct nft_set_estimate
*est
)
620 est
->size
= sizeof(struct nft_hash
) +
621 nft_hash_buckets(desc
->size
) * sizeof(struct hlist_head
) +
622 desc
->size
* sizeof(struct nft_hash_elem
);
623 est
->lookup
= NFT_SET_CLASS_O_1
;
624 est
->space
= NFT_SET_CLASS_O_N
;
629 static bool nft_hash_fast_estimate(const struct nft_set_desc
*desc
, u32 features
,
630 struct nft_set_estimate
*est
)
638 est
->size
= sizeof(struct nft_hash
) +
639 nft_hash_buckets(desc
->size
) * sizeof(struct hlist_head
) +
640 desc
->size
* sizeof(struct nft_hash_elem
);
641 est
->lookup
= NFT_SET_CLASS_O_1
;
642 est
->space
= NFT_SET_CLASS_O_N
;
647 struct nft_set_type nft_set_rhash_type __read_mostly
= {
648 .owner
= THIS_MODULE
,
649 .features
= NFT_SET_MAP
| NFT_SET_OBJECT
|
650 NFT_SET_TIMEOUT
| NFT_SET_EVAL
,
652 .privsize
= nft_rhash_privsize
,
653 .elemsize
= offsetof(struct nft_rhash_elem
, ext
),
654 .estimate
= nft_rhash_estimate
,
655 .init
= nft_rhash_init
,
656 .gc_init
= nft_rhash_gc_init
,
657 .destroy
= nft_rhash_destroy
,
658 .insert
= nft_rhash_insert
,
659 .activate
= nft_rhash_activate
,
660 .deactivate
= nft_rhash_deactivate
,
661 .flush
= nft_rhash_flush
,
662 .remove
= nft_rhash_remove
,
663 .lookup
= nft_rhash_lookup
,
664 .update
= nft_rhash_update
,
665 .walk
= nft_rhash_walk
,
666 .get
= nft_rhash_get
,
670 struct nft_set_type nft_set_hash_type __read_mostly
= {
671 .owner
= THIS_MODULE
,
672 .features
= NFT_SET_MAP
| NFT_SET_OBJECT
,
674 .privsize
= nft_hash_privsize
,
675 .elemsize
= offsetof(struct nft_hash_elem
, ext
),
676 .estimate
= nft_hash_estimate
,
677 .init
= nft_hash_init
,
678 .destroy
= nft_hash_destroy
,
679 .insert
= nft_hash_insert
,
680 .activate
= nft_hash_activate
,
681 .deactivate
= nft_hash_deactivate
,
682 .flush
= nft_hash_flush
,
683 .remove
= nft_hash_remove
,
684 .lookup
= nft_hash_lookup
,
685 .walk
= nft_hash_walk
,
690 struct nft_set_type nft_set_hash_fast_type __read_mostly
= {
691 .owner
= THIS_MODULE
,
692 .features
= NFT_SET_MAP
| NFT_SET_OBJECT
,
694 .privsize
= nft_hash_privsize
,
695 .elemsize
= offsetof(struct nft_hash_elem
, ext
),
696 .estimate
= nft_hash_fast_estimate
,
697 .init
= nft_hash_init
,
698 .destroy
= nft_hash_destroy
,
699 .insert
= nft_hash_insert
,
700 .activate
= nft_hash_activate
,
701 .deactivate
= nft_hash_deactivate
,
702 .flush
= nft_hash_flush
,
703 .remove
= nft_hash_remove
,
704 .lookup
= nft_hash_lookup_fast
,
705 .walk
= nft_hash_walk
,