1 /* Copyright (C) 2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 as
5 * published by the Free Software Foundation.
8 #ifndef _IP_SET_HASH_GEN_H
9 #define _IP_SET_HASH_GEN_H
11 #include <linux/rcupdate.h>
12 #include <linux/jhash.h>
13 #include <linux/netfilter/ipset/ip_set_timeout.h>
14 #ifndef rcu_dereference_bh
15 #define rcu_dereference_bh(p) rcu_dereference(p)
18 #define rcu_dereference_bh_nfnl(p) rcu_dereference_bh_check(p, 1)
20 /* Hashing which uses arrays to resolve clashing. The hash table is resized
21 * (doubled) when searching becomes too long.
22 * Internally jhash is used with the assumption that the size of the
23 * stored data is a multiple of sizeof(u32). If storage supports timeout,
24 * the timeout field must be the last one in the data structure - that field
25 * is ignored when computing the hash key.
27 * Readers and resizing
29 * Resizing can be triggered by userspace command only, and those
30 * are serialized by the nfnl mutex. During resizing the set is
31 * read-locked, so the only possible concurrent operations are
32 * the kernel side readers. Those must be protected by proper RCU locking.
35 /* Number of elements to store in an initial array block */
36 #define AHASH_INIT_SIZE 4
37 /* Max number of elements to store in an array block */
38 #define AHASH_MAX_SIZE (3*AHASH_INIT_SIZE)
40 /* Max number of elements can be tuned */
41 #ifdef IP_SET_HASH_WITH_MULTI
42 #define AHASH_MAX(h) ((h)->ahash_max)
45 tune_ahash_max(u8 curr
, u32 multi
)
52 n
= curr
+ AHASH_INIT_SIZE
;
53 /* Currently, at listing one hash bucket must fit into a message.
54 * Therefore we have a hard limit here.
56 return n
> curr
&& n
<= 64 ? n
: curr
;
58 #define TUNE_AHASH_MAX(h, multi) \
59 ((h)->ahash_max = tune_ahash_max((h)->ahash_max, multi))
61 #define AHASH_MAX(h) AHASH_MAX_SIZE
62 #define TUNE_AHASH_MAX(h, multi)
67 void *value
; /* the array of the values */
68 u8 size
; /* size of the array */
69 u8 pos
; /* position of the first free entry */
72 /* The hash table: the table size stored here in order to make resizing easy */
74 u8 htable_bits
; /* size of hash table == 2^htable_bits */
75 struct hbucket bucket
[0]; /* hashtable buckets */
78 #define hbucket(h, i) (&((h)->bucket[i]))
80 #ifndef IPSET_NET_COUNT
81 #define IPSET_NET_COUNT 1
84 /* Book-keeping of the prefixes added to the set */
86 u32 nets
[IPSET_NET_COUNT
]; /* number of elements per cidr */
87 u8 cidr
[IPSET_NET_COUNT
]; /* the different cidr values in the set */
90 /* Compute the hash table size */
96 /* We must fit both into u32 in jhash and size_t */
99 hsize
= jhash_size(hbits
);
100 if ((((size_t)-1) - sizeof(struct htable
))/sizeof(struct hbucket
)
104 return hsize
* sizeof(struct hbucket
) + sizeof(struct htable
);
107 /* Compute htable_bits from the user input parameter hashsize */
109 htable_bits(u32 hashsize
)
111 /* Assume that hashsize == 2^htable_bits */
112 u8 bits
= fls(hashsize
- 1);
113 if (jhash_size(bits
) != hashsize
)
114 /* Round up to the first 2^n value */
115 bits
= fls(hashsize
);
121 hbucket_elem_add(struct hbucket
*n
, u8 ahash_max
, size_t dsize
)
123 if (n
->pos
>= n
->size
) {
126 if (n
->size
>= ahash_max
)
127 /* Trigger rehashing */
130 tmp
= kzalloc((n
->size
+ AHASH_INIT_SIZE
) * dsize
,
135 memcpy(tmp
, n
->value
, n
->size
* dsize
);
139 n
->size
+= AHASH_INIT_SIZE
;
144 #ifdef IP_SET_HASH_WITH_NETS
145 #if IPSET_NET_COUNT > 1
146 #define __CIDR(cidr, i) (cidr[i])
148 #define __CIDR(cidr, i) (cidr)
150 #ifdef IP_SET_HASH_WITH_NETS_PACKED
151 /* When cidr is packed with nomatch, cidr - 1 is stored in the entry */
152 #define CIDR(cidr, i) (__CIDR(cidr, i) + 1)
154 #define CIDR(cidr, i) (__CIDR(cidr, i))
157 #define SET_HOST_MASK(family) (family == AF_INET ? 32 : 128)
159 #ifdef IP_SET_HASH_WITH_MULTI
160 #define NLEN(family) (SET_HOST_MASK(family) + 1)
162 #define NLEN(family) SET_HOST_MASK(family)
166 #define NLEN(family) 0
167 #endif /* IP_SET_HASH_WITH_NETS */
169 #endif /* _IP_SET_HASH_GEN_H */
171 /* Family dependent templates */
174 #undef mtype_data_equal
175 #undef mtype_do_data_match
176 #undef mtype_data_set_flags
177 #undef mtype_data_reset_flags
178 #undef mtype_data_netmask
179 #undef mtype_data_list
180 #undef mtype_data_next
183 #undef mtype_ahash_destroy
184 #undef mtype_ext_cleanup
185 #undef mtype_add_cidr
186 #undef mtype_del_cidr
187 #undef mtype_ahash_memsize
191 #undef mtype_same_set
198 #undef mtype_test_cidrs
207 #undef mtype_data_match
211 #define mtype_data_equal IPSET_TOKEN(MTYPE, _data_equal)
212 #ifdef IP_SET_HASH_WITH_NETS
213 #define mtype_do_data_match IPSET_TOKEN(MTYPE, _do_data_match)
215 #define mtype_do_data_match(d) 1
217 #define mtype_data_set_flags IPSET_TOKEN(MTYPE, _data_set_flags)
218 #define mtype_data_reset_elem IPSET_TOKEN(MTYPE, _data_reset_elem)
219 #define mtype_data_reset_flags IPSET_TOKEN(MTYPE, _data_reset_flags)
220 #define mtype_data_netmask IPSET_TOKEN(MTYPE, _data_netmask)
221 #define mtype_data_list IPSET_TOKEN(MTYPE, _data_list)
222 #define mtype_data_next IPSET_TOKEN(MTYPE, _data_next)
223 #define mtype_elem IPSET_TOKEN(MTYPE, _elem)
224 #define mtype_ahash_destroy IPSET_TOKEN(MTYPE, _ahash_destroy)
225 #define mtype_ext_cleanup IPSET_TOKEN(MTYPE, _ext_cleanup)
226 #define mtype_add_cidr IPSET_TOKEN(MTYPE, _add_cidr)
227 #define mtype_del_cidr IPSET_TOKEN(MTYPE, _del_cidr)
228 #define mtype_ahash_memsize IPSET_TOKEN(MTYPE, _ahash_memsize)
229 #define mtype_flush IPSET_TOKEN(MTYPE, _flush)
230 #define mtype_destroy IPSET_TOKEN(MTYPE, _destroy)
231 #define mtype_gc_init IPSET_TOKEN(MTYPE, _gc_init)
232 #define mtype_same_set IPSET_TOKEN(MTYPE, _same_set)
233 #define mtype_kadt IPSET_TOKEN(MTYPE, _kadt)
234 #define mtype_uadt IPSET_TOKEN(MTYPE, _uadt)
237 #define mtype_add IPSET_TOKEN(MTYPE, _add)
238 #define mtype_del IPSET_TOKEN(MTYPE, _del)
239 #define mtype_test_cidrs IPSET_TOKEN(MTYPE, _test_cidrs)
240 #define mtype_test IPSET_TOKEN(MTYPE, _test)
241 #define mtype_expire IPSET_TOKEN(MTYPE, _expire)
242 #define mtype_resize IPSET_TOKEN(MTYPE, _resize)
243 #define mtype_head IPSET_TOKEN(MTYPE, _head)
244 #define mtype_list IPSET_TOKEN(MTYPE, _list)
245 #define mtype_gc IPSET_TOKEN(MTYPE, _gc)
246 #define mtype_variant IPSET_TOKEN(MTYPE, _variant)
247 #define mtype_data_match IPSET_TOKEN(MTYPE, _data_match)
250 #define HKEY_DATALEN sizeof(struct mtype_elem)
253 #define HKEY(data, initval, htable_bits) \
254 (jhash2((u32 *)(data), HKEY_DATALEN/sizeof(u32), initval) \
255 & jhash_mask(htable_bits))
260 /* The generic hash structure */
262 struct htable __rcu
*table
; /* the hash table */
263 u32 maxelem
; /* max elements in the hash */
264 u32 elements
; /* current element (vs timeout) */
265 u32 initval
; /* random jhash init value */
266 #ifdef IP_SET_HASH_WITH_MARKMASK
267 u32 markmask
; /* markmask value for mark mask to store */
269 struct timer_list gc
; /* garbage collection when timeout enabled */
270 struct mtype_elem next
; /* temporary storage for uadd */
271 #ifdef IP_SET_HASH_WITH_MULTI
272 u8 ahash_max
; /* max elements in an array block */
274 #ifdef IP_SET_HASH_WITH_NETMASK
275 u8 netmask
; /* netmask value for subnets to store */
277 #ifdef IP_SET_HASH_WITH_RBTREE
278 struct rb_root rbtree
;
280 #ifdef IP_SET_HASH_WITH_NETS
281 struct net_prefixes nets
[0]; /* book-keeping of prefixes */
286 #ifdef IP_SET_HASH_WITH_NETS
287 /* Network cidr size book keeping when the hash stores different
290 mtype_add_cidr(struct htype
*h
, u8 cidr
, u8 nets_length
, u8 n
)
294 /* Add in increasing prefix order, so larger cidr first */
295 for (i
= 0, j
= -1; i
< nets_length
&& h
->nets
[i
].nets
[n
]; i
++) {
298 else if (h
->nets
[i
].cidr
[n
] < cidr
)
300 else if (h
->nets
[i
].cidr
[n
] == cidr
) {
301 h
->nets
[i
].nets
[n
]++;
307 h
->nets
[i
].cidr
[n
] = h
->nets
[i
- 1].cidr
[n
];
308 h
->nets
[i
].nets
[n
] = h
->nets
[i
- 1].nets
[n
];
311 h
->nets
[i
].cidr
[n
] = cidr
;
312 h
->nets
[i
].nets
[n
] = 1;
316 mtype_del_cidr(struct htype
*h
, u8 cidr
, u8 nets_length
, u8 n
)
318 u8 i
, j
, net_end
= nets_length
- 1;
320 for (i
= 0; i
< nets_length
; i
++) {
321 if (h
->nets
[i
].cidr
[n
] != cidr
)
323 if (h
->nets
[i
].nets
[n
] > 1 || i
== net_end
||
324 h
->nets
[i
+ 1].nets
[n
] == 0) {
325 h
->nets
[i
].nets
[n
]--;
328 for (j
= i
; j
< net_end
&& h
->nets
[j
].nets
[n
]; j
++) {
329 h
->nets
[j
].cidr
[n
] = h
->nets
[j
+ 1].cidr
[n
];
330 h
->nets
[j
].nets
[n
] = h
->nets
[j
+ 1].nets
[n
];
332 h
->nets
[j
].nets
[n
] = 0;
338 /* Calculate the actual memory size of the set data */
340 mtype_ahash_memsize(const struct htype
*h
, const struct htable
*t
,
341 u8 nets_length
, size_t dsize
)
344 size_t memsize
= sizeof(*h
)
346 #ifdef IP_SET_HASH_WITH_NETS
347 + sizeof(struct net_prefixes
) * nets_length
349 + jhash_size(t
->htable_bits
) * sizeof(struct hbucket
);
351 for (i
= 0; i
< jhash_size(t
->htable_bits
); i
++)
352 memsize
+= t
->bucket
[i
].size
* dsize
;
357 /* Get the ith element from the array block n */
358 #define ahash_data(n, i, dsize) \
359 ((struct mtype_elem *)((n)->value + ((i) * (dsize))))
362 mtype_ext_cleanup(struct ip_set
*set
, struct hbucket
*n
)
366 for (i
= 0; i
< n
->pos
; i
++)
367 ip_set_ext_destroy(set
, ahash_data(n
, i
, set
->dsize
));
370 /* Flush a hash type of set: destroy all elements */
372 mtype_flush(struct ip_set
*set
)
374 struct htype
*h
= set
->data
;
379 t
= rcu_dereference_bh_nfnl(h
->table
);
380 for (i
= 0; i
< jhash_size(t
->htable_bits
); i
++) {
383 if (set
->extensions
& IPSET_EXT_DESTROY
)
384 mtype_ext_cleanup(set
, n
);
385 n
->size
= n
->pos
= 0;
386 /* FIXME: use slab cache */
390 #ifdef IP_SET_HASH_WITH_NETS
391 memset(h
->nets
, 0, sizeof(struct net_prefixes
) * NLEN(set
->family
));
396 /* Destroy the hashtable part of the set */
398 mtype_ahash_destroy(struct ip_set
*set
, struct htable
*t
, bool ext_destroy
)
403 for (i
= 0; i
< jhash_size(t
->htable_bits
); i
++) {
406 if (set
->extensions
& IPSET_EXT_DESTROY
&& ext_destroy
)
407 mtype_ext_cleanup(set
, n
);
408 /* FIXME: use slab cache */
416 /* Destroy a hash type of set */
418 mtype_destroy(struct ip_set
*set
)
420 struct htype
*h
= set
->data
;
422 if (set
->extensions
& IPSET_EXT_TIMEOUT
)
423 del_timer_sync(&h
->gc
);
425 mtype_ahash_destroy(set
, rcu_dereference_bh_nfnl(h
->table
), true);
426 #ifdef IP_SET_HASH_WITH_RBTREE
427 rbtree_destroy(&h
->rbtree
);
435 mtype_gc_init(struct ip_set
*set
, void (*gc
)(unsigned long ul_set
))
437 struct htype
*h
= set
->data
;
440 h
->gc
.data
= (unsigned long) set
;
442 h
->gc
.expires
= jiffies
+ IPSET_GC_PERIOD(set
->timeout
) * HZ
;
444 pr_debug("gc initialized, run in every %u\n",
445 IPSET_GC_PERIOD(set
->timeout
));
449 mtype_same_set(const struct ip_set
*a
, const struct ip_set
*b
)
451 const struct htype
*x
= a
->data
;
452 const struct htype
*y
= b
->data
;
454 /* Resizing changes htable_bits, so we ignore it */
455 return x
->maxelem
== y
->maxelem
&&
456 a
->timeout
== b
->timeout
&&
457 #ifdef IP_SET_HASH_WITH_NETMASK
458 x
->netmask
== y
->netmask
&&
460 #ifdef IP_SET_HASH_WITH_MARKMASK
461 x
->markmask
== y
->markmask
&&
463 a
->extensions
== b
->extensions
;
466 /* Delete expired elements from the hashtable */
468 mtype_expire(struct ip_set
*set
, struct htype
*h
, u8 nets_length
, size_t dsize
)
472 struct mtype_elem
*data
;
475 #ifdef IP_SET_HASH_WITH_NETS
480 t
= rcu_dereference_bh(h
->table
);
481 for (i
= 0; i
< jhash_size(t
->htable_bits
); i
++) {
483 for (j
= 0; j
< n
->pos
; j
++) {
484 data
= ahash_data(n
, j
, dsize
);
485 if (ip_set_timeout_expired(ext_timeout(data
, set
))) {
486 pr_debug("expired %u/%u\n", i
, j
);
487 #ifdef IP_SET_HASH_WITH_NETS
488 for (k
= 0; k
< IPSET_NET_COUNT
; k
++)
489 mtype_del_cidr(h
, CIDR(data
->cidr
, k
),
492 ip_set_ext_destroy(set
, data
);
496 ahash_data(n
, n
->pos
- 1, dsize
),
502 if (n
->pos
+ AHASH_INIT_SIZE
< n
->size
) {
503 void *tmp
= kzalloc((n
->size
- AHASH_INIT_SIZE
)
507 /* Still try to delete expired elements */
509 n
->size
-= AHASH_INIT_SIZE
;
510 memcpy(tmp
, n
->value
, n
->size
* dsize
);
515 rcu_read_unlock_bh();
519 mtype_gc(unsigned long ul_set
)
521 struct ip_set
*set
= (struct ip_set
*) ul_set
;
522 struct htype
*h
= set
->data
;
524 pr_debug("called\n");
525 write_lock_bh(&set
->lock
);
526 mtype_expire(set
, h
, NLEN(set
->family
), set
->dsize
);
527 write_unlock_bh(&set
->lock
);
529 h
->gc
.expires
= jiffies
+ IPSET_GC_PERIOD(set
->timeout
) * HZ
;
533 /* Resize a hash: create a new hash table with doubling the hashsize
534 * and inserting the elements to it. Repeat until we succeed or
535 * fail due to memory pressures. */
537 mtype_resize(struct ip_set
*set
, bool retried
)
539 struct htype
*h
= set
->data
;
540 struct htable
*t
, *orig
= rcu_dereference_bh_nfnl(h
->table
);
541 u8 htable_bits
= orig
->htable_bits
;
542 #ifdef IP_SET_HASH_WITH_NETS
545 struct mtype_elem
*data
;
546 struct mtype_elem
*d
;
547 struct hbucket
*n
, *m
;
551 /* Try to cleanup once */
552 if (SET_WITH_TIMEOUT(set
) && !retried
) {
554 write_lock_bh(&set
->lock
);
555 mtype_expire(set
, set
->data
, NLEN(set
->family
), set
->dsize
);
556 write_unlock_bh(&set
->lock
);
564 pr_debug("attempt to resize set %s from %u to %u, t %p\n",
565 set
->name
, orig
->htable_bits
, htable_bits
, orig
);
567 /* In case we have plenty of memory :-) */
568 pr_warning("Cannot increase the hashsize of set %s further\n",
570 return -IPSET_ERR_HASH_FULL
;
572 t
= ip_set_alloc(sizeof(*t
)
573 + jhash_size(htable_bits
) * sizeof(struct hbucket
));
576 t
->htable_bits
= htable_bits
;
578 read_lock_bh(&set
->lock
);
579 for (i
= 0; i
< jhash_size(orig
->htable_bits
); i
++) {
580 n
= hbucket(orig
, i
);
581 for (j
= 0; j
< n
->pos
; j
++) {
582 data
= ahash_data(n
, j
, set
->dsize
);
583 #ifdef IP_SET_HASH_WITH_NETS
585 mtype_data_reset_flags(data
, &flags
);
587 m
= hbucket(t
, HKEY(data
, h
->initval
, htable_bits
));
588 ret
= hbucket_elem_add(m
, AHASH_MAX(h
), set
->dsize
);
590 #ifdef IP_SET_HASH_WITH_NETS
591 mtype_data_reset_flags(data
, &flags
);
593 read_unlock_bh(&set
->lock
);
594 mtype_ahash_destroy(set
, t
, false);
599 d
= ahash_data(m
, m
->pos
++, set
->dsize
);
600 memcpy(d
, data
, set
->dsize
);
601 #ifdef IP_SET_HASH_WITH_NETS
602 mtype_data_reset_flags(d
, &flags
);
607 rcu_assign_pointer(h
->table
, t
);
608 read_unlock_bh(&set
->lock
);
610 /* Give time to other readers of the set */
611 synchronize_rcu_bh();
613 pr_debug("set %s resized from %u (%p) to %u (%p)\n", set
->name
,
614 orig
->htable_bits
, orig
, t
->htable_bits
, t
);
615 mtype_ahash_destroy(set
, orig
, false);
620 /* Add an element to a hash and update the internal counters when succeeded,
621 * otherwise report the proper error code. */
623 mtype_add(struct ip_set
*set
, void *value
, const struct ip_set_ext
*ext
,
624 struct ip_set_ext
*mext
, u32 flags
)
626 struct htype
*h
= set
->data
;
628 const struct mtype_elem
*d
= value
;
629 struct mtype_elem
*data
;
632 int j
= AHASH_MAX(h
) + 1;
633 bool flag_exist
= flags
& IPSET_FLAG_EXIST
;
636 if (h
->elements
>= h
->maxelem
&& SET_WITH_FORCEADD(set
)) {
638 t
= rcu_dereference_bh(h
->table
);
639 key
= HKEY(value
, h
->initval
, t
->htable_bits
);
642 /* Choosing the first entry in the array to replace */
646 rcu_read_unlock_bh();
648 if (SET_WITH_TIMEOUT(set
) && h
->elements
>= h
->maxelem
)
649 /* FIXME: when set is full, we slow down here */
650 mtype_expire(set
, h
, NLEN(set
->family
), set
->dsize
);
652 if (h
->elements
>= h
->maxelem
) {
654 pr_warning("Set %s is full, maxelem %u reached\n",
655 set
->name
, h
->maxelem
);
656 return -IPSET_ERR_HASH_FULL
;
660 t
= rcu_dereference_bh(h
->table
);
661 key
= HKEY(value
, h
->initval
, t
->htable_bits
);
663 for (i
= 0; i
< n
->pos
; i
++) {
664 data
= ahash_data(n
, i
, set
->dsize
);
665 if (mtype_data_equal(data
, d
, &multi
)) {
667 (SET_WITH_TIMEOUT(set
) &&
668 ip_set_timeout_expired(ext_timeout(data
, set
)))) {
669 /* Just the extensions could be overwritten */
673 ret
= -IPSET_ERR_EXIST
;
677 /* Reuse first timed out entry */
678 if (SET_WITH_TIMEOUT(set
) &&
679 ip_set_timeout_expired(ext_timeout(data
, set
)) &&
680 j
!= AHASH_MAX(h
) + 1)
684 if (j
!= AHASH_MAX(h
) + 1) {
685 /* Fill out reused slot */
686 data
= ahash_data(n
, j
, set
->dsize
);
687 #ifdef IP_SET_HASH_WITH_NETS
688 for (i
= 0; i
< IPSET_NET_COUNT
; i
++) {
689 mtype_del_cidr(h
, CIDR(data
->cidr
, i
),
690 NLEN(set
->family
), i
);
691 mtype_add_cidr(h
, CIDR(d
->cidr
, i
),
692 NLEN(set
->family
), i
);
695 ip_set_ext_destroy(set
, data
);
697 /* Use/create a new slot */
698 TUNE_AHASH_MAX(h
, multi
);
699 ret
= hbucket_elem_add(n
, AHASH_MAX(h
), set
->dsize
);
702 mtype_data_next(&h
->next
, d
);
705 data
= ahash_data(n
, n
->pos
++, set
->dsize
);
706 #ifdef IP_SET_HASH_WITH_NETS
707 for (i
= 0; i
< IPSET_NET_COUNT
; i
++)
708 mtype_add_cidr(h
, CIDR(d
->cidr
, i
), NLEN(set
->family
),
713 memcpy(data
, d
, sizeof(struct mtype_elem
));
714 #ifdef IP_SET_HASH_WITH_NETS
715 mtype_data_set_flags(data
, flags
);
717 if (SET_WITH_TIMEOUT(set
))
718 ip_set_timeout_set(ext_timeout(data
, set
), ext
->timeout
);
719 if (SET_WITH_COUNTER(set
))
720 ip_set_init_counter(ext_counter(data
, set
), ext
);
721 if (SET_WITH_COMMENT(set
))
722 ip_set_init_comment(ext_comment(data
, set
), ext
);
725 rcu_read_unlock_bh();
729 /* Delete an element from the hash: swap it with the last element
730 * and free up space if possible.
733 mtype_del(struct ip_set
*set
, void *value
, const struct ip_set_ext
*ext
,
734 struct ip_set_ext
*mext
, u32 flags
)
736 struct htype
*h
= set
->data
;
738 const struct mtype_elem
*d
= value
;
739 struct mtype_elem
*data
;
741 int i
, ret
= -IPSET_ERR_EXIST
;
742 #ifdef IP_SET_HASH_WITH_NETS
748 t
= rcu_dereference_bh(h
->table
);
749 key
= HKEY(value
, h
->initval
, t
->htable_bits
);
751 for (i
= 0; i
< n
->pos
; i
++) {
752 data
= ahash_data(n
, i
, set
->dsize
);
753 if (!mtype_data_equal(data
, d
, &multi
))
755 if (SET_WITH_TIMEOUT(set
) &&
756 ip_set_timeout_expired(ext_timeout(data
, set
)))
760 memcpy(data
, ahash_data(n
, n
->pos
- 1, set
->dsize
),
765 #ifdef IP_SET_HASH_WITH_NETS
766 for (j
= 0; j
< IPSET_NET_COUNT
; j
++)
767 mtype_del_cidr(h
, CIDR(d
->cidr
, j
), NLEN(set
->family
),
770 ip_set_ext_destroy(set
, data
);
771 if (n
->pos
+ AHASH_INIT_SIZE
< n
->size
) {
772 void *tmp
= kzalloc((n
->size
- AHASH_INIT_SIZE
)
779 n
->size
-= AHASH_INIT_SIZE
;
780 memcpy(tmp
, n
->value
, n
->size
* set
->dsize
);
789 rcu_read_unlock_bh();
794 mtype_data_match(struct mtype_elem
*data
, const struct ip_set_ext
*ext
,
795 struct ip_set_ext
*mext
, struct ip_set
*set
, u32 flags
)
797 if (SET_WITH_COUNTER(set
))
798 ip_set_update_counter(ext_counter(data
, set
),
800 return mtype_do_data_match(data
);
803 #ifdef IP_SET_HASH_WITH_NETS
804 /* Special test function which takes into account the different network
805 * sizes added to the set */
807 mtype_test_cidrs(struct ip_set
*set
, struct mtype_elem
*d
,
808 const struct ip_set_ext
*ext
,
809 struct ip_set_ext
*mext
, u32 flags
)
811 struct htype
*h
= set
->data
;
812 struct htable
*t
= rcu_dereference_bh(h
->table
);
814 struct mtype_elem
*data
;
815 #if IPSET_NET_COUNT == 2
816 struct mtype_elem orig
= *d
;
822 u8 nets_length
= NLEN(set
->family
);
824 pr_debug("test by nets\n");
825 for (; j
< nets_length
&& h
->nets
[j
].nets
[0] && !multi
; j
++) {
826 #if IPSET_NET_COUNT == 2
827 mtype_data_reset_elem(d
, &orig
);
828 mtype_data_netmask(d
, h
->nets
[j
].cidr
[0], false);
829 for (k
= 0; k
< nets_length
&& h
->nets
[k
].nets
[1] && !multi
;
831 mtype_data_netmask(d
, h
->nets
[k
].cidr
[1], true);
833 mtype_data_netmask(d
, h
->nets
[j
].cidr
[0]);
835 key
= HKEY(d
, h
->initval
, t
->htable_bits
);
837 for (i
= 0; i
< n
->pos
; i
++) {
838 data
= ahash_data(n
, i
, set
->dsize
);
839 if (!mtype_data_equal(data
, d
, &multi
))
841 if (SET_WITH_TIMEOUT(set
)) {
842 if (!ip_set_timeout_expired(
843 ext_timeout(data
, set
)))
844 return mtype_data_match(data
, ext
,
847 #ifdef IP_SET_HASH_WITH_MULTI
851 return mtype_data_match(data
, ext
,
854 #if IPSET_NET_COUNT == 2
862 /* Test whether the element is added to the set */
864 mtype_test(struct ip_set
*set
, void *value
, const struct ip_set_ext
*ext
,
865 struct ip_set_ext
*mext
, u32 flags
)
867 struct htype
*h
= set
->data
;
869 struct mtype_elem
*d
= value
;
871 struct mtype_elem
*data
;
876 t
= rcu_dereference_bh(h
->table
);
877 #ifdef IP_SET_HASH_WITH_NETS
878 /* If we test an IP address and not a network address,
879 * try all possible network sizes */
880 for (i
= 0; i
< IPSET_NET_COUNT
; i
++)
881 if (CIDR(d
->cidr
, i
) != SET_HOST_MASK(set
->family
))
883 if (i
== IPSET_NET_COUNT
) {
884 ret
= mtype_test_cidrs(set
, d
, ext
, mext
, flags
);
889 key
= HKEY(d
, h
->initval
, t
->htable_bits
);
891 for (i
= 0; i
< n
->pos
; i
++) {
892 data
= ahash_data(n
, i
, set
->dsize
);
893 if (mtype_data_equal(data
, d
, &multi
) &&
894 !(SET_WITH_TIMEOUT(set
) &&
895 ip_set_timeout_expired(ext_timeout(data
, set
)))) {
896 ret
= mtype_data_match(data
, ext
, mext
, set
, flags
);
901 rcu_read_unlock_bh();
905 /* Reply a HEADER request: fill out the header part of the set */
907 mtype_head(struct ip_set
*set
, struct sk_buff
*skb
)
909 const struct htype
*h
= set
->data
;
910 const struct htable
*t
;
911 struct nlattr
*nested
;
914 t
= rcu_dereference_bh_nfnl(h
->table
);
915 memsize
= mtype_ahash_memsize(h
, t
, NLEN(set
->family
), set
->dsize
);
917 nested
= ipset_nest_start(skb
, IPSET_ATTR_DATA
);
919 goto nla_put_failure
;
920 if (nla_put_net32(skb
, IPSET_ATTR_HASHSIZE
,
921 htonl(jhash_size(t
->htable_bits
))) ||
922 nla_put_net32(skb
, IPSET_ATTR_MAXELEM
, htonl(h
->maxelem
)))
923 goto nla_put_failure
;
924 #ifdef IP_SET_HASH_WITH_NETMASK
925 if (h
->netmask
!= HOST_MASK
&&
926 nla_put_u8(skb
, IPSET_ATTR_NETMASK
, h
->netmask
))
927 goto nla_put_failure
;
929 #ifdef IP_SET_HASH_WITH_MARKMASK
930 if (nla_put_u32(skb
, IPSET_ATTR_MARKMASK
, h
->markmask
))
931 goto nla_put_failure
;
933 if (nla_put_net32(skb
, IPSET_ATTR_REFERENCES
, htonl(set
->ref
- 1)) ||
934 nla_put_net32(skb
, IPSET_ATTR_MEMSIZE
, htonl(memsize
)))
935 goto nla_put_failure
;
936 if (unlikely(ip_set_put_flags(skb
, set
)))
937 goto nla_put_failure
;
938 ipset_nest_end(skb
, nested
);
945 /* Reply a LIST/SAVE request: dump the elements of the specified set */
947 mtype_list(const struct ip_set
*set
,
948 struct sk_buff
*skb
, struct netlink_callback
*cb
)
950 const struct htype
*h
= set
->data
;
951 const struct htable
*t
= rcu_dereference_bh_nfnl(h
->table
);
952 struct nlattr
*atd
, *nested
;
953 const struct hbucket
*n
;
954 const struct mtype_elem
*e
;
955 u32 first
= cb
->args
[IPSET_CB_ARG0
];
956 /* We assume that one hash bucket fills into one page */
960 atd
= ipset_nest_start(skb
, IPSET_ATTR_ADT
);
963 pr_debug("list hash set %s\n", set
->name
);
964 for (; cb
->args
[IPSET_CB_ARG0
] < jhash_size(t
->htable_bits
);
965 cb
->args
[IPSET_CB_ARG0
]++) {
966 incomplete
= skb_tail_pointer(skb
);
967 n
= hbucket(t
, cb
->args
[IPSET_CB_ARG0
]);
968 pr_debug("cb->arg bucket: %lu, t %p n %p\n",
969 cb
->args
[IPSET_CB_ARG0
], t
, n
);
970 for (i
= 0; i
< n
->pos
; i
++) {
971 e
= ahash_data(n
, i
, set
->dsize
);
972 if (SET_WITH_TIMEOUT(set
) &&
973 ip_set_timeout_expired(ext_timeout(e
, set
)))
975 pr_debug("list hash %lu hbucket %p i %u, data %p\n",
976 cb
->args
[IPSET_CB_ARG0
], n
, i
, e
);
977 nested
= ipset_nest_start(skb
, IPSET_ATTR_DATA
);
979 if (cb
->args
[IPSET_CB_ARG0
] == first
) {
980 nla_nest_cancel(skb
, atd
);
983 goto nla_put_failure
;
985 if (mtype_data_list(skb
, e
))
986 goto nla_put_failure
;
987 if (ip_set_put_extensions(skb
, set
, e
, true))
988 goto nla_put_failure
;
989 ipset_nest_end(skb
, nested
);
992 ipset_nest_end(skb
, atd
);
993 /* Set listing finished */
994 cb
->args
[IPSET_CB_ARG0
] = 0;
999 nlmsg_trim(skb
, incomplete
);
1000 if (unlikely(first
== cb
->args
[IPSET_CB_ARG0
])) {
1001 pr_warning("Can't list set %s: one bucket does not fit into "
1002 "a message. Please report it!\n", set
->name
);
1003 cb
->args
[IPSET_CB_ARG0
] = 0;
1006 ipset_nest_end(skb
, atd
);
1011 IPSET_TOKEN(MTYPE
, _kadt
)(struct ip_set
*set
, const struct sk_buff
*skb
,
1012 const struct xt_action_param
*par
,
1013 enum ipset_adt adt
, struct ip_set_adt_opt
*opt
);
1016 IPSET_TOKEN(MTYPE
, _uadt
)(struct ip_set
*set
, struct nlattr
*tb
[],
1017 enum ipset_adt adt
, u32
*lineno
, u32 flags
, bool retried
);
1019 static const struct ip_set_type_variant mtype_variant
= {
1023 [IPSET_ADD
] = mtype_add
,
1024 [IPSET_DEL
] = mtype_del
,
1025 [IPSET_TEST
] = mtype_test
,
1027 .destroy
= mtype_destroy
,
1028 .flush
= mtype_flush
,
1031 .resize
= mtype_resize
,
1032 .same_set
= mtype_same_set
,
1035 #ifdef IP_SET_EMIT_CREATE
1037 IPSET_TOKEN(HTYPE
, _create
)(struct net
*net
, struct ip_set
*set
,
1038 struct nlattr
*tb
[], u32 flags
)
1040 u32 hashsize
= IPSET_DEFAULT_HASHSIZE
, maxelem
= IPSET_DEFAULT_MAXELEM
;
1041 #ifdef IP_SET_HASH_WITH_MARKMASK
1045 #ifdef IP_SET_HASH_WITH_NETMASK
1052 if (!(set
->family
== NFPROTO_IPV4
|| set
->family
== NFPROTO_IPV6
))
1053 return -IPSET_ERR_INVALID_FAMILY
;
1055 #ifdef IP_SET_HASH_WITH_MARKMASK
1056 markmask
= 0xffffffff;
1058 #ifdef IP_SET_HASH_WITH_NETMASK
1059 netmask
= set
->family
== NFPROTO_IPV4
? 32 : 128;
1060 pr_debug("Create set %s with family %s\n",
1061 set
->name
, set
->family
== NFPROTO_IPV4
? "inet" : "inet6");
1064 if (unlikely(!ip_set_optattr_netorder(tb
, IPSET_ATTR_HASHSIZE
) ||
1065 !ip_set_optattr_netorder(tb
, IPSET_ATTR_MAXELEM
) ||
1066 #ifdef IP_SET_HASH_WITH_MARKMASK
1067 !ip_set_optattr_netorder(tb
, IPSET_ATTR_MARKMASK
) ||
1069 !ip_set_optattr_netorder(tb
, IPSET_ATTR_TIMEOUT
) ||
1070 !ip_set_optattr_netorder(tb
, IPSET_ATTR_CADT_FLAGS
)))
1071 return -IPSET_ERR_PROTOCOL
;
1073 if (tb
[IPSET_ATTR_HASHSIZE
]) {
1074 hashsize
= ip_set_get_h32(tb
[IPSET_ATTR_HASHSIZE
]);
1075 if (hashsize
< IPSET_MIMINAL_HASHSIZE
)
1076 hashsize
= IPSET_MIMINAL_HASHSIZE
;
1079 if (tb
[IPSET_ATTR_MAXELEM
])
1080 maxelem
= ip_set_get_h32(tb
[IPSET_ATTR_MAXELEM
]);
1082 #ifdef IP_SET_HASH_WITH_NETMASK
1083 if (tb
[IPSET_ATTR_NETMASK
]) {
1084 netmask
= nla_get_u8(tb
[IPSET_ATTR_NETMASK
]);
1086 if ((set
->family
== NFPROTO_IPV4
&& netmask
> 32) ||
1087 (set
->family
== NFPROTO_IPV6
&& netmask
> 128) ||
1089 return -IPSET_ERR_INVALID_NETMASK
;
1092 #ifdef IP_SET_HASH_WITH_MARKMASK
1093 if (tb
[IPSET_ATTR_MARKMASK
]) {
1094 markmask
= ntohl(nla_get_u32(tb
[IPSET_ATTR_MARKMASK
]));
1096 if ((markmask
> 4294967295u) || markmask
== 0)
1097 return -IPSET_ERR_INVALID_MARKMASK
;
1102 #ifdef IP_SET_HASH_WITH_NETS
1103 hsize
+= sizeof(struct net_prefixes
) *
1104 (set
->family
== NFPROTO_IPV4
? 32 : 128);
1106 h
= kzalloc(hsize
, GFP_KERNEL
);
1110 h
->maxelem
= maxelem
;
1111 #ifdef IP_SET_HASH_WITH_NETMASK
1112 h
->netmask
= netmask
;
1114 #ifdef IP_SET_HASH_WITH_MARKMASK
1115 h
->markmask
= markmask
;
1117 get_random_bytes(&h
->initval
, sizeof(h
->initval
));
1118 set
->timeout
= IPSET_NO_TIMEOUT
;
1120 hbits
= htable_bits(hashsize
);
1121 hsize
= htable_size(hbits
);
1126 t
= ip_set_alloc(hsize
);
1131 t
->htable_bits
= hbits
;
1132 rcu_assign_pointer(h
->table
, t
);
1135 if (set
->family
== NFPROTO_IPV4
) {
1136 set
->variant
= &IPSET_TOKEN(HTYPE
, 4_variant
);
1137 set
->dsize
= ip_set_elem_len(set
, tb
,
1138 sizeof(struct IPSET_TOKEN(HTYPE
, 4_elem
)));
1140 set
->variant
= &IPSET_TOKEN(HTYPE
, 6_variant
);
1141 set
->dsize
= ip_set_elem_len(set
, tb
,
1142 sizeof(struct IPSET_TOKEN(HTYPE
, 6_elem
)));
1144 if (tb
[IPSET_ATTR_TIMEOUT
]) {
1145 set
->timeout
= ip_set_timeout_uget(tb
[IPSET_ATTR_TIMEOUT
]);
1146 if (set
->family
== NFPROTO_IPV4
)
1147 IPSET_TOKEN(HTYPE
, 4_gc_init
)(set
,
1148 IPSET_TOKEN(HTYPE
, 4_gc
));
1150 IPSET_TOKEN(HTYPE
, 6_gc_init
)(set
,
1151 IPSET_TOKEN(HTYPE
, 6_gc
));
1154 pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
1155 set
->name
, jhash_size(t
->htable_bits
),
1156 t
->htable_bits
, h
->maxelem
, set
->data
, t
);
1160 #endif /* IP_SET_EMIT_CREATE */