2 * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
8 * Development of this code funded by Astaro AG (http://www.astaro.com/)
11 #include <linux/kernel.h>
12 #include <linux/init.h>
13 #include <linux/module.h>
14 #include <linux/list.h>
15 #include <linux/log2.h>
16 #include <linux/jhash.h>
17 #include <linux/netlink.h>
18 #include <linux/workqueue.h>
19 #include <linux/rhashtable.h>
20 #include <linux/netfilter.h>
21 #include <linux/netfilter/nf_tables.h>
22 #include <net/netfilter/nf_tables.h>
24 /* We target a hash table size of 4, element hint is 75% of final size */
25 #define NFT_RHASH_ELEMENT_HINT 3
29 struct delayed_work gc_work
;
32 struct nft_rhash_elem
{
33 struct rhash_head node
;
34 struct nft_set_ext ext
;
37 struct nft_rhash_cmp_arg
{
38 const struct nft_set
*set
;
43 static inline u32
nft_rhash_key(const void *data
, u32 len
, u32 seed
)
45 const struct nft_rhash_cmp_arg
*arg
= data
;
47 return jhash(arg
->key
, len
, seed
);
50 static inline u32
nft_rhash_obj(const void *data
, u32 len
, u32 seed
)
52 const struct nft_rhash_elem
*he
= data
;
54 return jhash(nft_set_ext_key(&he
->ext
), len
, seed
);
57 static inline int nft_rhash_cmp(struct rhashtable_compare_arg
*arg
,
60 const struct nft_rhash_cmp_arg
*x
= arg
->key
;
61 const struct nft_rhash_elem
*he
= ptr
;
63 if (memcmp(nft_set_ext_key(&he
->ext
), x
->key
, x
->set
->klen
))
65 if (nft_set_elem_expired(&he
->ext
))
67 if (!nft_set_elem_active(&he
->ext
, x
->genmask
))
72 static const struct rhashtable_params nft_rhash_params
= {
73 .head_offset
= offsetof(struct nft_rhash_elem
, node
),
74 .hashfn
= nft_rhash_key
,
75 .obj_hashfn
= nft_rhash_obj
,
76 .obj_cmpfn
= nft_rhash_cmp
,
77 .automatic_shrinking
= true,
80 static bool nft_rhash_lookup(const struct net
*net
, const struct nft_set
*set
,
81 const u32
*key
, const struct nft_set_ext
**ext
)
83 struct nft_rhash
*priv
= nft_set_priv(set
);
84 const struct nft_rhash_elem
*he
;
85 struct nft_rhash_cmp_arg arg
= {
86 .genmask
= nft_genmask_cur(net
),
91 he
= rhashtable_lookup_fast(&priv
->ht
, &arg
, nft_rhash_params
);
98 static void *nft_rhash_get(const struct net
*net
, const struct nft_set
*set
,
99 const struct nft_set_elem
*elem
, unsigned int flags
)
101 struct nft_rhash
*priv
= nft_set_priv(set
);
102 struct nft_rhash_elem
*he
;
103 struct nft_rhash_cmp_arg arg
= {
104 .genmask
= nft_genmask_cur(net
),
106 .key
= elem
->key
.val
.data
,
109 he
= rhashtable_lookup_fast(&priv
->ht
, &arg
, nft_rhash_params
);
113 return ERR_PTR(-ENOENT
);
116 static bool nft_rhash_update(struct nft_set
*set
, const u32
*key
,
117 void *(*new)(struct nft_set
*,
118 const struct nft_expr
*,
119 struct nft_regs
*regs
),
120 const struct nft_expr
*expr
,
121 struct nft_regs
*regs
,
122 const struct nft_set_ext
**ext
)
124 struct nft_rhash
*priv
= nft_set_priv(set
);
125 struct nft_rhash_elem
*he
, *prev
;
126 struct nft_rhash_cmp_arg arg
= {
127 .genmask
= NFT_GENMASK_ANY
,
132 he
= rhashtable_lookup_fast(&priv
->ht
, &arg
, nft_rhash_params
);
136 he
= new(set
, expr
, regs
);
140 prev
= rhashtable_lookup_get_insert_key(&priv
->ht
, &arg
, &he
->node
,
145 /* Another cpu may race to insert the element with the same key */
147 nft_set_elem_destroy(set
, he
, true);
156 nft_set_elem_destroy(set
, he
, true);
161 static int nft_rhash_insert(const struct net
*net
, const struct nft_set
*set
,
162 const struct nft_set_elem
*elem
,
163 struct nft_set_ext
**ext
)
165 struct nft_rhash
*priv
= nft_set_priv(set
);
166 struct nft_rhash_elem
*he
= elem
->priv
;
167 struct nft_rhash_cmp_arg arg
= {
168 .genmask
= nft_genmask_next(net
),
170 .key
= elem
->key
.val
.data
,
172 struct nft_rhash_elem
*prev
;
174 prev
= rhashtable_lookup_get_insert_key(&priv
->ht
, &arg
, &he
->node
,
177 return PTR_ERR(prev
);
185 static void nft_rhash_activate(const struct net
*net
, const struct nft_set
*set
,
186 const struct nft_set_elem
*elem
)
188 struct nft_rhash_elem
*he
= elem
->priv
;
190 nft_set_elem_change_active(net
, set
, &he
->ext
);
191 nft_set_elem_clear_busy(&he
->ext
);
194 static bool nft_rhash_flush(const struct net
*net
,
195 const struct nft_set
*set
, void *priv
)
197 struct nft_rhash_elem
*he
= priv
;
199 if (!nft_set_elem_mark_busy(&he
->ext
) ||
200 !nft_is_active(net
, &he
->ext
)) {
201 nft_set_elem_change_active(net
, set
, &he
->ext
);
207 static void *nft_rhash_deactivate(const struct net
*net
,
208 const struct nft_set
*set
,
209 const struct nft_set_elem
*elem
)
211 struct nft_rhash
*priv
= nft_set_priv(set
);
212 struct nft_rhash_elem
*he
;
213 struct nft_rhash_cmp_arg arg
= {
214 .genmask
= nft_genmask_next(net
),
216 .key
= elem
->key
.val
.data
,
220 he
= rhashtable_lookup_fast(&priv
->ht
, &arg
, nft_rhash_params
);
222 !nft_rhash_flush(net
, set
, he
))
230 static void nft_rhash_remove(const struct net
*net
,
231 const struct nft_set
*set
,
232 const struct nft_set_elem
*elem
)
234 struct nft_rhash
*priv
= nft_set_priv(set
);
235 struct nft_rhash_elem
*he
= elem
->priv
;
237 rhashtable_remove_fast(&priv
->ht
, &he
->node
, nft_rhash_params
);
240 static void nft_rhash_walk(const struct nft_ctx
*ctx
, struct nft_set
*set
,
241 struct nft_set_iter
*iter
)
243 struct nft_rhash
*priv
= nft_set_priv(set
);
244 struct nft_rhash_elem
*he
;
245 struct rhashtable_iter hti
;
246 struct nft_set_elem elem
;
249 err
= rhashtable_walk_init(&priv
->ht
, &hti
, GFP_ATOMIC
);
254 err
= rhashtable_walk_start(&hti
);
255 if (err
&& err
!= -EAGAIN
) {
260 while ((he
= rhashtable_walk_next(&hti
))) {
263 if (err
!= -EAGAIN
) {
271 if (iter
->count
< iter
->skip
)
273 if (nft_set_elem_expired(&he
->ext
))
275 if (!nft_set_elem_active(&he
->ext
, iter
->genmask
))
280 iter
->err
= iter
->fn(ctx
, set
, iter
, &elem
);
289 rhashtable_walk_stop(&hti
);
290 rhashtable_walk_exit(&hti
);
293 static void nft_rhash_gc(struct work_struct
*work
)
296 struct nft_rhash_elem
*he
;
297 struct nft_rhash
*priv
;
298 struct nft_set_gc_batch
*gcb
= NULL
;
299 struct rhashtable_iter hti
;
302 priv
= container_of(work
, struct nft_rhash
, gc_work
.work
);
303 set
= nft_set_container_of(priv
);
305 err
= rhashtable_walk_init(&priv
->ht
, &hti
, GFP_KERNEL
);
309 err
= rhashtable_walk_start(&hti
);
310 if (err
&& err
!= -EAGAIN
)
313 while ((he
= rhashtable_walk_next(&hti
))) {
315 if (PTR_ERR(he
) != -EAGAIN
)
320 if (!nft_set_elem_expired(&he
->ext
))
322 if (nft_set_elem_mark_busy(&he
->ext
))
325 gcb
= nft_set_gc_batch_check(set
, gcb
, GFP_ATOMIC
);
328 rhashtable_remove_fast(&priv
->ht
, &he
->node
, nft_rhash_params
);
329 atomic_dec(&set
->nelems
);
330 nft_set_gc_batch_add(gcb
, he
);
333 rhashtable_walk_stop(&hti
);
334 rhashtable_walk_exit(&hti
);
336 nft_set_gc_batch_complete(gcb
);
338 queue_delayed_work(system_power_efficient_wq
, &priv
->gc_work
,
339 nft_set_gc_interval(set
));
342 static unsigned int nft_rhash_privsize(const struct nlattr
* const nla
[],
343 const struct nft_set_desc
*desc
)
345 return sizeof(struct nft_rhash
);
348 static int nft_rhash_init(const struct nft_set
*set
,
349 const struct nft_set_desc
*desc
,
350 const struct nlattr
* const tb
[])
352 struct nft_rhash
*priv
= nft_set_priv(set
);
353 struct rhashtable_params params
= nft_rhash_params
;
356 params
.nelem_hint
= desc
->size
?: NFT_RHASH_ELEMENT_HINT
;
357 params
.key_len
= set
->klen
;
359 err
= rhashtable_init(&priv
->ht
, ¶ms
);
363 INIT_DEFERRABLE_WORK(&priv
->gc_work
, nft_rhash_gc
);
364 if (set
->flags
& NFT_SET_TIMEOUT
)
365 queue_delayed_work(system_power_efficient_wq
, &priv
->gc_work
,
366 nft_set_gc_interval(set
));
370 static void nft_rhash_elem_destroy(void *ptr
, void *arg
)
372 nft_set_elem_destroy(arg
, ptr
, true);
375 static void nft_rhash_destroy(const struct nft_set
*set
)
377 struct nft_rhash
*priv
= nft_set_priv(set
);
379 cancel_delayed_work_sync(&priv
->gc_work
);
380 rhashtable_free_and_destroy(&priv
->ht
, nft_rhash_elem_destroy
,
384 static u32
nft_hash_buckets(u32 size
)
386 return roundup_pow_of_two(size
* 4 / 3);
389 static bool nft_rhash_estimate(const struct nft_set_desc
*desc
, u32 features
,
390 struct nft_set_estimate
*est
)
393 est
->lookup
= NFT_SET_CLASS_O_1
;
394 est
->space
= NFT_SET_CLASS_O_N
;
402 struct hlist_head table
[];
405 struct nft_hash_elem
{
406 struct hlist_node node
;
407 struct nft_set_ext ext
;
410 static bool nft_hash_lookup(const struct net
*net
, const struct nft_set
*set
,
411 const u32
*key
, const struct nft_set_ext
**ext
)
413 struct nft_hash
*priv
= nft_set_priv(set
);
414 u8 genmask
= nft_genmask_cur(net
);
415 const struct nft_hash_elem
*he
;
418 hash
= jhash(key
, set
->klen
, priv
->seed
);
419 hash
= reciprocal_scale(hash
, priv
->buckets
);
420 hlist_for_each_entry_rcu(he
, &priv
->table
[hash
], node
) {
421 if (!memcmp(nft_set_ext_key(&he
->ext
), key
, set
->klen
) &&
422 nft_set_elem_active(&he
->ext
, genmask
)) {
430 static void *nft_hash_get(const struct net
*net
, const struct nft_set
*set
,
431 const struct nft_set_elem
*elem
, unsigned int flags
)
433 struct nft_hash
*priv
= nft_set_priv(set
);
434 u8 genmask
= nft_genmask_cur(net
);
435 struct nft_hash_elem
*he
;
438 hash
= jhash(elem
->key
.val
.data
, set
->klen
, priv
->seed
);
439 hash
= reciprocal_scale(hash
, priv
->buckets
);
440 hlist_for_each_entry_rcu(he
, &priv
->table
[hash
], node
) {
441 if (!memcmp(nft_set_ext_key(&he
->ext
), elem
->key
.val
.data
, set
->klen
) &&
442 nft_set_elem_active(&he
->ext
, genmask
))
445 return ERR_PTR(-ENOENT
);
448 /* nft_hash_select_ops() makes sure key size can be either 2 or 4 bytes . */
449 static inline u32
nft_hash_key(const u32
*key
, u32 klen
)
457 static bool nft_hash_lookup_fast(const struct net
*net
,
458 const struct nft_set
*set
,
459 const u32
*key
, const struct nft_set_ext
**ext
)
461 struct nft_hash
*priv
= nft_set_priv(set
);
462 u8 genmask
= nft_genmask_cur(net
);
463 const struct nft_hash_elem
*he
;
466 k1
= nft_hash_key(key
, set
->klen
);
467 hash
= jhash_1word(k1
, priv
->seed
);
468 hash
= reciprocal_scale(hash
, priv
->buckets
);
469 hlist_for_each_entry_rcu(he
, &priv
->table
[hash
], node
) {
470 k2
= nft_hash_key(nft_set_ext_key(&he
->ext
)->data
, set
->klen
);
472 nft_set_elem_active(&he
->ext
, genmask
)) {
480 static int nft_hash_insert(const struct net
*net
, const struct nft_set
*set
,
481 const struct nft_set_elem
*elem
,
482 struct nft_set_ext
**ext
)
484 struct nft_hash_elem
*this = elem
->priv
, *he
;
485 struct nft_hash
*priv
= nft_set_priv(set
);
486 u8 genmask
= nft_genmask_next(net
);
489 hash
= jhash(nft_set_ext_key(&this->ext
), set
->klen
, priv
->seed
);
490 hash
= reciprocal_scale(hash
, priv
->buckets
);
491 hlist_for_each_entry(he
, &priv
->table
[hash
], node
) {
492 if (!memcmp(nft_set_ext_key(&this->ext
),
493 nft_set_ext_key(&he
->ext
), set
->klen
) &&
494 nft_set_elem_active(&he
->ext
, genmask
)) {
499 hlist_add_head_rcu(&this->node
, &priv
->table
[hash
]);
503 static void nft_hash_activate(const struct net
*net
, const struct nft_set
*set
,
504 const struct nft_set_elem
*elem
)
506 struct nft_hash_elem
*he
= elem
->priv
;
508 nft_set_elem_change_active(net
, set
, &he
->ext
);
511 static bool nft_hash_flush(const struct net
*net
,
512 const struct nft_set
*set
, void *priv
)
514 struct nft_hash_elem
*he
= priv
;
516 nft_set_elem_change_active(net
, set
, &he
->ext
);
520 static void *nft_hash_deactivate(const struct net
*net
,
521 const struct nft_set
*set
,
522 const struct nft_set_elem
*elem
)
524 struct nft_hash
*priv
= nft_set_priv(set
);
525 struct nft_hash_elem
*this = elem
->priv
, *he
;
526 u8 genmask
= nft_genmask_next(net
);
529 hash
= jhash(nft_set_ext_key(&this->ext
), set
->klen
, priv
->seed
);
530 hash
= reciprocal_scale(hash
, priv
->buckets
);
531 hlist_for_each_entry(he
, &priv
->table
[hash
], node
) {
532 if (!memcmp(nft_set_ext_key(&this->ext
), &elem
->key
.val
,
534 nft_set_elem_active(&he
->ext
, genmask
)) {
535 nft_set_elem_change_active(net
, set
, &he
->ext
);
542 static void nft_hash_remove(const struct net
*net
,
543 const struct nft_set
*set
,
544 const struct nft_set_elem
*elem
)
546 struct nft_hash_elem
*he
= elem
->priv
;
548 hlist_del_rcu(&he
->node
);
551 static void nft_hash_walk(const struct nft_ctx
*ctx
, struct nft_set
*set
,
552 struct nft_set_iter
*iter
)
554 struct nft_hash
*priv
= nft_set_priv(set
);
555 struct nft_hash_elem
*he
;
556 struct nft_set_elem elem
;
559 for (i
= 0; i
< priv
->buckets
; i
++) {
560 hlist_for_each_entry_rcu(he
, &priv
->table
[i
], node
) {
561 if (iter
->count
< iter
->skip
)
563 if (!nft_set_elem_active(&he
->ext
, iter
->genmask
))
568 iter
->err
= iter
->fn(ctx
, set
, iter
, &elem
);
577 static unsigned int nft_hash_privsize(const struct nlattr
* const nla
[],
578 const struct nft_set_desc
*desc
)
580 return sizeof(struct nft_hash
) +
581 nft_hash_buckets(desc
->size
) * sizeof(struct hlist_head
);
584 static int nft_hash_init(const struct nft_set
*set
,
585 const struct nft_set_desc
*desc
,
586 const struct nlattr
* const tb
[])
588 struct nft_hash
*priv
= nft_set_priv(set
);
590 priv
->buckets
= nft_hash_buckets(desc
->size
);
591 get_random_bytes(&priv
->seed
, sizeof(priv
->seed
));
596 static void nft_hash_destroy(const struct nft_set
*set
)
598 struct nft_hash
*priv
= nft_set_priv(set
);
599 struct nft_hash_elem
*he
;
600 struct hlist_node
*next
;
603 for (i
= 0; i
< priv
->buckets
; i
++) {
604 hlist_for_each_entry_safe(he
, next
, &priv
->table
[i
], node
) {
605 hlist_del_rcu(&he
->node
);
606 nft_set_elem_destroy(set
, he
, true);
611 static bool nft_hash_estimate(const struct nft_set_desc
*desc
, u32 features
,
612 struct nft_set_estimate
*est
)
614 est
->size
= sizeof(struct nft_hash
) +
615 nft_hash_buckets(desc
->size
) * sizeof(struct hlist_head
) +
616 desc
->size
* sizeof(struct nft_hash_elem
);
617 est
->lookup
= NFT_SET_CLASS_O_1
;
618 est
->space
= NFT_SET_CLASS_O_N
;
623 static struct nft_set_type nft_hash_type
;
624 static struct nft_set_ops nft_rhash_ops __read_mostly
= {
625 .type
= &nft_hash_type
,
626 .privsize
= nft_rhash_privsize
,
627 .elemsize
= offsetof(struct nft_rhash_elem
, ext
),
628 .estimate
= nft_rhash_estimate
,
629 .init
= nft_rhash_init
,
630 .destroy
= nft_rhash_destroy
,
631 .insert
= nft_rhash_insert
,
632 .activate
= nft_rhash_activate
,
633 .deactivate
= nft_rhash_deactivate
,
634 .flush
= nft_rhash_flush
,
635 .remove
= nft_rhash_remove
,
636 .lookup
= nft_rhash_lookup
,
637 .update
= nft_rhash_update
,
638 .walk
= nft_rhash_walk
,
639 .get
= nft_rhash_get
,
640 .features
= NFT_SET_MAP
| NFT_SET_OBJECT
| NFT_SET_TIMEOUT
,
643 static struct nft_set_ops nft_hash_ops __read_mostly
= {
644 .type
= &nft_hash_type
,
645 .privsize
= nft_hash_privsize
,
646 .elemsize
= offsetof(struct nft_hash_elem
, ext
),
647 .estimate
= nft_hash_estimate
,
648 .init
= nft_hash_init
,
649 .destroy
= nft_hash_destroy
,
650 .insert
= nft_hash_insert
,
651 .activate
= nft_hash_activate
,
652 .deactivate
= nft_hash_deactivate
,
653 .flush
= nft_hash_flush
,
654 .remove
= nft_hash_remove
,
655 .lookup
= nft_hash_lookup
,
656 .walk
= nft_hash_walk
,
658 .features
= NFT_SET_MAP
| NFT_SET_OBJECT
,
661 static struct nft_set_ops nft_hash_fast_ops __read_mostly
= {
662 .type
= &nft_hash_type
,
663 .privsize
= nft_hash_privsize
,
664 .elemsize
= offsetof(struct nft_hash_elem
, ext
),
665 .estimate
= nft_hash_estimate
,
666 .init
= nft_hash_init
,
667 .destroy
= nft_hash_destroy
,
668 .insert
= nft_hash_insert
,
669 .activate
= nft_hash_activate
,
670 .deactivate
= nft_hash_deactivate
,
671 .flush
= nft_hash_flush
,
672 .remove
= nft_hash_remove
,
673 .lookup
= nft_hash_lookup_fast
,
674 .walk
= nft_hash_walk
,
676 .features
= NFT_SET_MAP
| NFT_SET_OBJECT
,
679 static const struct nft_set_ops
*
680 nft_hash_select_ops(const struct nft_ctx
*ctx
, const struct nft_set_desc
*desc
,
684 switch (desc
->klen
) {
686 return &nft_hash_fast_ops
;
688 return &nft_hash_ops
;
692 return &nft_rhash_ops
;
695 static struct nft_set_type nft_hash_type __read_mostly
= {
696 .select_ops
= nft_hash_select_ops
,
697 .owner
= THIS_MODULE
,
700 static int __init
nft_hash_module_init(void)
702 return nft_register_set(&nft_hash_type
);
705 static void __exit
nft_hash_module_exit(void)
707 nft_unregister_set(&nft_hash_type
);
710 module_init(nft_hash_module_init
);
711 module_exit(nft_hash_module_exit
);
713 MODULE_LICENSE("GPL");
714 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
715 MODULE_ALIAS_NFT_SET();