]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - net/netfilter/nf_conntrack_expect.c
[NETFILTER]: nf_conntrack_core: avoid taking nf_conntrack_lock in nf_conntrack_alter_...
[mirror_ubuntu-artful-kernel.git] / net / netfilter / nf_conntrack_expect.c
CommitLineData
77ab9cff
MJ
1/* Expectation handling for nf_conntrack. */
2
3/* (C) 1999-2001 Paul `Rusty' Russell
4 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
5 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11
12#include <linux/types.h>
13#include <linux/netfilter.h>
14#include <linux/skbuff.h>
15#include <linux/proc_fs.h>
16#include <linux/seq_file.h>
17#include <linux/stddef.h>
18#include <linux/slab.h>
19#include <linux/err.h>
20#include <linux/percpu.h>
21#include <linux/kernel.h>
a71c0855 22#include <linux/jhash.h>
457c4cbc 23#include <net/net_namespace.h>
77ab9cff
MJ
24
25#include <net/netfilter/nf_conntrack.h>
26#include <net/netfilter/nf_conntrack_core.h>
27#include <net/netfilter/nf_conntrack_expect.h>
28#include <net/netfilter/nf_conntrack_helper.h>
29#include <net/netfilter/nf_conntrack_tuple.h>
30
a71c0855
PM
31struct hlist_head *nf_ct_expect_hash __read_mostly;
32EXPORT_SYMBOL_GPL(nf_ct_expect_hash);
33
34unsigned int nf_ct_expect_hsize __read_mostly;
35EXPORT_SYMBOL_GPL(nf_ct_expect_hsize);
36
37static unsigned int nf_ct_expect_hash_rnd __read_mostly;
38static unsigned int nf_ct_expect_count;
f264a7df 39unsigned int nf_ct_expect_max __read_mostly;
a71c0855
PM
40static int nf_ct_expect_hash_rnd_initted __read_mostly;
41static int nf_ct_expect_vmalloc;
42
e9c1b084 43static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
77ab9cff
MJ
44
45/* nf_conntrack_expect helper functions */
46void nf_ct_unlink_expect(struct nf_conntrack_expect *exp)
47{
48 struct nf_conn_help *master_help = nfct_help(exp->master);
49
50 NF_CT_ASSERT(master_help);
51 NF_CT_ASSERT(!timer_pending(&exp->timeout));
52
a71c0855
PM
53 hlist_del(&exp->hnode);
54 nf_ct_expect_count--;
55
b560580a 56 hlist_del(&exp->lnode);
77ab9cff 57 master_help->expecting--;
6823645d 58 nf_ct_expect_put(exp);
b560580a
PM
59
60 NF_CT_STAT_INC(expect_delete);
77ab9cff 61}
13b18339 62EXPORT_SYMBOL_GPL(nf_ct_unlink_expect);
77ab9cff 63
6823645d 64static void nf_ct_expectation_timed_out(unsigned long ul_expect)
77ab9cff
MJ
65{
66 struct nf_conntrack_expect *exp = (void *)ul_expect;
67
68 write_lock_bh(&nf_conntrack_lock);
69 nf_ct_unlink_expect(exp);
70 write_unlock_bh(&nf_conntrack_lock);
6823645d 71 nf_ct_expect_put(exp);
77ab9cff
MJ
72}
73
a71c0855
PM
74static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple)
75{
34498825
PM
76 unsigned int hash;
77
a71c0855
PM
78 if (unlikely(!nf_ct_expect_hash_rnd_initted)) {
79 get_random_bytes(&nf_ct_expect_hash_rnd, 4);
80 nf_ct_expect_hash_rnd_initted = 1;
81 }
82
34498825 83 hash = jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all),
a71c0855 84 (((tuple->dst.protonum ^ tuple->src.l3num) << 16) |
34498825
PM
85 (__force __u16)tuple->dst.u.all) ^ nf_ct_expect_hash_rnd);
86 return ((u64)hash * nf_ct_expect_hsize) >> 32;
a71c0855
PM
87}
88
77ab9cff 89struct nf_conntrack_expect *
6823645d 90__nf_ct_expect_find(const struct nf_conntrack_tuple *tuple)
77ab9cff
MJ
91{
92 struct nf_conntrack_expect *i;
a71c0855
PM
93 struct hlist_node *n;
94 unsigned int h;
95
96 if (!nf_ct_expect_count)
97 return NULL;
77ab9cff 98
a71c0855
PM
99 h = nf_ct_expect_dst_hash(tuple);
100 hlist_for_each_entry(i, n, &nf_ct_expect_hash[h], hnode) {
77ab9cff
MJ
101 if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask))
102 return i;
103 }
104 return NULL;
105}
6823645d 106EXPORT_SYMBOL_GPL(__nf_ct_expect_find);
77ab9cff
MJ
107
108/* Just find a expectation corresponding to a tuple. */
109struct nf_conntrack_expect *
6823645d 110nf_ct_expect_find_get(const struct nf_conntrack_tuple *tuple)
77ab9cff
MJ
111{
112 struct nf_conntrack_expect *i;
113
114 read_lock_bh(&nf_conntrack_lock);
6823645d 115 i = __nf_ct_expect_find(tuple);
77ab9cff
MJ
116 if (i)
117 atomic_inc(&i->use);
118 read_unlock_bh(&nf_conntrack_lock);
119
120 return i;
121}
6823645d 122EXPORT_SYMBOL_GPL(nf_ct_expect_find_get);
77ab9cff
MJ
123
124/* If an expectation for this connection is found, it gets delete from
125 * global list then returned. */
126struct nf_conntrack_expect *
6823645d 127nf_ct_find_expectation(const struct nf_conntrack_tuple *tuple)
77ab9cff 128{
ece00641
YK
129 struct nf_conntrack_expect *exp;
130
6823645d 131 exp = __nf_ct_expect_find(tuple);
ece00641
YK
132 if (!exp)
133 return NULL;
77ab9cff 134
77ab9cff
MJ
135 /* If master is not in hash table yet (ie. packet hasn't left
136 this machine yet), how can other end know about expected?
137 Hence these are not the droids you are looking for (if
138 master ct never got confirmed, we'd hold a reference to it
139 and weird things would happen to future packets). */
ece00641
YK
140 if (!nf_ct_is_confirmed(exp->master))
141 return NULL;
142
143 if (exp->flags & NF_CT_EXPECT_PERMANENT) {
144 atomic_inc(&exp->use);
145 return exp;
146 } else if (del_timer(&exp->timeout)) {
147 nf_ct_unlink_expect(exp);
148 return exp;
77ab9cff 149 }
ece00641 150
77ab9cff
MJ
151 return NULL;
152}
153
154/* delete all expectations for this conntrack */
155void nf_ct_remove_expectations(struct nf_conn *ct)
156{
77ab9cff 157 struct nf_conn_help *help = nfct_help(ct);
b560580a
PM
158 struct nf_conntrack_expect *exp;
159 struct hlist_node *n, *next;
77ab9cff
MJ
160
161 /* Optimization: most connection never expect any others. */
162 if (!help || help->expecting == 0)
163 return;
164
b560580a
PM
165 hlist_for_each_entry_safe(exp, n, next, &help->expectations, lnode) {
166 if (del_timer(&exp->timeout)) {
167 nf_ct_unlink_expect(exp);
168 nf_ct_expect_put(exp);
601e68e1 169 }
77ab9cff
MJ
170 }
171}
13b18339 172EXPORT_SYMBOL_GPL(nf_ct_remove_expectations);
77ab9cff
MJ
173
174/* Would two expected things clash? */
175static inline int expect_clash(const struct nf_conntrack_expect *a,
176 const struct nf_conntrack_expect *b)
177{
178 /* Part covered by intersection of masks must be unequal,
179 otherwise they clash */
d4156e8c 180 struct nf_conntrack_tuple_mask intersect_mask;
77ab9cff
MJ
181 int count;
182
77ab9cff 183 intersect_mask.src.u.all = a->mask.src.u.all & b->mask.src.u.all;
77ab9cff
MJ
184
185 for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
186 intersect_mask.src.u3.all[count] =
187 a->mask.src.u3.all[count] & b->mask.src.u3.all[count];
188 }
189
77ab9cff
MJ
190 return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
191}
192
193static inline int expect_matches(const struct nf_conntrack_expect *a,
194 const struct nf_conntrack_expect *b)
195{
196 return a->master == b->master
197 && nf_ct_tuple_equal(&a->tuple, &b->tuple)
d4156e8c 198 && nf_ct_tuple_mask_equal(&a->mask, &b->mask);
77ab9cff
MJ
199}
200
201/* Generally a bad idea to call this: could have matched already. */
6823645d 202void nf_ct_unexpect_related(struct nf_conntrack_expect *exp)
77ab9cff 203{
77ab9cff 204 write_lock_bh(&nf_conntrack_lock);
4e1d4e6c
PM
205 if (del_timer(&exp->timeout)) {
206 nf_ct_unlink_expect(exp);
207 nf_ct_expect_put(exp);
77ab9cff
MJ
208 }
209 write_unlock_bh(&nf_conntrack_lock);
210}
6823645d 211EXPORT_SYMBOL_GPL(nf_ct_unexpect_related);
77ab9cff
MJ
212
213/* We don't increase the master conntrack refcount for non-fulfilled
214 * conntracks. During the conntrack destruction, the expectations are
215 * always killed before the conntrack itself */
6823645d 216struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me)
77ab9cff
MJ
217{
218 struct nf_conntrack_expect *new;
219
6823645d 220 new = kmem_cache_alloc(nf_ct_expect_cachep, GFP_ATOMIC);
77ab9cff
MJ
221 if (!new)
222 return NULL;
223
224 new->master = me;
225 atomic_set(&new->use, 1);
226 return new;
227}
6823645d 228EXPORT_SYMBOL_GPL(nf_ct_expect_alloc);
77ab9cff 229
6823645d 230void nf_ct_expect_init(struct nf_conntrack_expect *exp, int family,
643a2c15
JE
231 union nf_inet_addr *saddr,
232 union nf_inet_addr *daddr,
6823645d 233 u_int8_t proto, __be16 *src, __be16 *dst)
d6a9b650
PM
234{
235 int len;
236
237 if (family == AF_INET)
238 len = 4;
239 else
240 len = 16;
241
242 exp->flags = 0;
243 exp->expectfn = NULL;
244 exp->helper = NULL;
245 exp->tuple.src.l3num = family;
246 exp->tuple.dst.protonum = proto;
d6a9b650
PM
247
248 if (saddr) {
249 memcpy(&exp->tuple.src.u3, saddr, len);
250 if (sizeof(exp->tuple.src.u3) > len)
251 /* address needs to be cleared for nf_ct_tuple_equal */
252 memset((void *)&exp->tuple.src.u3 + len, 0x00,
253 sizeof(exp->tuple.src.u3) - len);
254 memset(&exp->mask.src.u3, 0xFF, len);
255 if (sizeof(exp->mask.src.u3) > len)
256 memset((void *)&exp->mask.src.u3 + len, 0x00,
257 sizeof(exp->mask.src.u3) - len);
258 } else {
259 memset(&exp->tuple.src.u3, 0x00, sizeof(exp->tuple.src.u3));
260 memset(&exp->mask.src.u3, 0x00, sizeof(exp->mask.src.u3));
261 }
262
d6a9b650 263 if (src) {
a34c4589
AV
264 exp->tuple.src.u.all = *src;
265 exp->mask.src.u.all = htons(0xFFFF);
d6a9b650
PM
266 } else {
267 exp->tuple.src.u.all = 0;
268 exp->mask.src.u.all = 0;
269 }
270
d4156e8c
PM
271 memcpy(&exp->tuple.dst.u3, daddr, len);
272 if (sizeof(exp->tuple.dst.u3) > len)
273 /* address needs to be cleared for nf_ct_tuple_equal */
274 memset((void *)&exp->tuple.dst.u3 + len, 0x00,
275 sizeof(exp->tuple.dst.u3) - len);
276
a34c4589 277 exp->tuple.dst.u.all = *dst;
d6a9b650 278}
6823645d 279EXPORT_SYMBOL_GPL(nf_ct_expect_init);
d6a9b650 280
6823645d 281void nf_ct_expect_put(struct nf_conntrack_expect *exp)
77ab9cff
MJ
282{
283 if (atomic_dec_and_test(&exp->use))
6823645d 284 kmem_cache_free(nf_ct_expect_cachep, exp);
77ab9cff 285}
6823645d 286EXPORT_SYMBOL_GPL(nf_ct_expect_put);
77ab9cff 287
6823645d 288static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
77ab9cff
MJ
289{
290 struct nf_conn_help *master_help = nfct_help(exp->master);
a71c0855 291 unsigned int h = nf_ct_expect_dst_hash(&exp->tuple);
77ab9cff
MJ
292
293 atomic_inc(&exp->use);
b560580a
PM
294
295 hlist_add_head(&exp->lnode, &master_help->expectations);
77ab9cff 296 master_help->expecting++;
a71c0855 297
a71c0855
PM
298 hlist_add_head(&exp->hnode, &nf_ct_expect_hash[h]);
299 nf_ct_expect_count++;
77ab9cff 300
6823645d
PM
301 setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
302 (unsigned long)exp);
77ab9cff
MJ
303 exp->timeout.expires = jiffies + master_help->helper->timeout * HZ;
304 add_timer(&exp->timeout);
305
77ab9cff
MJ
306 atomic_inc(&exp->use);
307 NF_CT_STAT_INC(expect_create);
308}
309
310/* Race with expectations being used means we could have none to find; OK. */
311static void evict_oldest_expect(struct nf_conn *master)
312{
b560580a
PM
313 struct nf_conn_help *master_help = nfct_help(master);
314 struct nf_conntrack_expect *exp = NULL;
315 struct hlist_node *n;
77ab9cff 316
b560580a
PM
317 hlist_for_each_entry(exp, n, &master_help->expectations, lnode)
318 ; /* nothing */
319
320 if (exp && del_timer(&exp->timeout)) {
321 nf_ct_unlink_expect(exp);
322 nf_ct_expect_put(exp);
77ab9cff
MJ
323 }
324}
325
326static inline int refresh_timer(struct nf_conntrack_expect *i)
327{
328 struct nf_conn_help *master_help = nfct_help(i->master);
329
330 if (!del_timer(&i->timeout))
331 return 0;
332
333 i->timeout.expires = jiffies + master_help->helper->timeout*HZ;
334 add_timer(&i->timeout);
335 return 1;
336}
337
6823645d 338int nf_ct_expect_related(struct nf_conntrack_expect *expect)
77ab9cff
MJ
339{
340 struct nf_conntrack_expect *i;
341 struct nf_conn *master = expect->master;
342 struct nf_conn_help *master_help = nfct_help(master);
a71c0855
PM
343 struct hlist_node *n;
344 unsigned int h;
77ab9cff
MJ
345 int ret;
346
347 NF_CT_ASSERT(master_help);
348
349 write_lock_bh(&nf_conntrack_lock);
3c158f7f
PM
350 if (!master_help->helper) {
351 ret = -ESHUTDOWN;
352 goto out;
353 }
a71c0855
PM
354 h = nf_ct_expect_dst_hash(&expect->tuple);
355 hlist_for_each_entry(i, n, &nf_ct_expect_hash[h], hnode) {
77ab9cff
MJ
356 if (expect_matches(i, expect)) {
357 /* Refresh timer: if it's dying, ignore.. */
358 if (refresh_timer(i)) {
359 ret = 0;
360 goto out;
361 }
362 } else if (expect_clash(i, expect)) {
363 ret = -EBUSY;
364 goto out;
365 }
366 }
367 /* Will be over limit? */
368 if (master_help->helper->max_expected &&
369 master_help->expecting >= master_help->helper->max_expected)
370 evict_oldest_expect(master);
371
f264a7df
PM
372 if (nf_ct_expect_count >= nf_ct_expect_max) {
373 if (net_ratelimit())
374 printk(KERN_WARNING
375 "nf_conntrack: expectation table full");
376 ret = -EMFILE;
377 goto out;
378 }
379
6823645d
PM
380 nf_ct_expect_insert(expect);
381 nf_ct_expect_event(IPEXP_NEW, expect);
77ab9cff
MJ
382 ret = 0;
383out:
384 write_unlock_bh(&nf_conntrack_lock);
385 return ret;
386}
6823645d 387EXPORT_SYMBOL_GPL(nf_ct_expect_related);
77ab9cff
MJ
388
389#ifdef CONFIG_PROC_FS
5d08ad44
PM
390struct ct_expect_iter_state {
391 unsigned int bucket;
392};
393
394static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
77ab9cff 395{
5d08ad44 396 struct ct_expect_iter_state *st = seq->private;
77ab9cff 397
5d08ad44
PM
398 for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
399 if (!hlist_empty(&nf_ct_expect_hash[st->bucket]))
400 return nf_ct_expect_hash[st->bucket].first;
401 }
402 return NULL;
403}
77ab9cff 404
5d08ad44
PM
405static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
406 struct hlist_node *head)
407{
408 struct ct_expect_iter_state *st = seq->private;
77ab9cff 409
5d08ad44
PM
410 head = head->next;
411 while (head == NULL) {
412 if (++st->bucket >= nf_ct_expect_hsize)
77ab9cff 413 return NULL;
5d08ad44 414 head = nf_ct_expect_hash[st->bucket].first;
77ab9cff 415 }
5d08ad44 416 return head;
77ab9cff
MJ
417}
418
5d08ad44 419static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos)
77ab9cff 420{
5d08ad44 421 struct hlist_node *head = ct_expect_get_first(seq);
77ab9cff 422
5d08ad44
PM
423 if (head)
424 while (pos && (head = ct_expect_get_next(seq, head)))
425 pos--;
426 return pos ? NULL : head;
427}
77ab9cff 428
5d08ad44 429static void *exp_seq_start(struct seq_file *seq, loff_t *pos)
ca7c48ca 430 __acquires(nf_conntrack_lock)
5d08ad44
PM
431{
432 read_lock_bh(&nf_conntrack_lock);
433 return ct_expect_get_idx(seq, *pos);
434}
77ab9cff 435
5d08ad44
PM
436static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
437{
438 (*pos)++;
439 return ct_expect_get_next(seq, v);
77ab9cff
MJ
440}
441
5d08ad44 442static void exp_seq_stop(struct seq_file *seq, void *v)
ca7c48ca 443 __releases(nf_conntrack_lock)
77ab9cff
MJ
444{
445 read_unlock_bh(&nf_conntrack_lock);
446}
447
448static int exp_seq_show(struct seq_file *s, void *v)
449{
5d08ad44
PM
450 struct nf_conntrack_expect *expect;
451 struct hlist_node *n = v;
452
453 expect = hlist_entry(n, struct nf_conntrack_expect, hnode);
77ab9cff
MJ
454
455 if (expect->timeout.function)
456 seq_printf(s, "%ld ", timer_pending(&expect->timeout)
457 ? (long)(expect->timeout.expires - jiffies)/HZ : 0);
458 else
459 seq_printf(s, "- ");
460 seq_printf(s, "l3proto = %u proto=%u ",
461 expect->tuple.src.l3num,
462 expect->tuple.dst.protonum);
463 print_tuple(s, &expect->tuple,
464 __nf_ct_l3proto_find(expect->tuple.src.l3num),
605dcad6 465 __nf_ct_l4proto_find(expect->tuple.src.l3num,
77ab9cff
MJ
466 expect->tuple.dst.protonum));
467 return seq_putc(s, '\n');
468}
469
56b3d975 470static const struct seq_operations exp_seq_ops = {
77ab9cff
MJ
471 .start = exp_seq_start,
472 .next = exp_seq_next,
473 .stop = exp_seq_stop,
474 .show = exp_seq_show
475};
476
477static int exp_open(struct inode *inode, struct file *file)
478{
e2da5913
PE
479 return seq_open_private(file, &exp_seq_ops,
480 sizeof(struct ct_expect_iter_state));
77ab9cff
MJ
481}
482
5d08ad44 483static const struct file_operations exp_file_ops = {
77ab9cff
MJ
484 .owner = THIS_MODULE,
485 .open = exp_open,
486 .read = seq_read,
487 .llseek = seq_lseek,
5d08ad44 488 .release = seq_release_private,
77ab9cff
MJ
489};
490#endif /* CONFIG_PROC_FS */
e9c1b084
PM
491
492static int __init exp_proc_init(void)
493{
494#ifdef CONFIG_PROC_FS
495 struct proc_dir_entry *proc;
496
457c4cbc 497 proc = proc_net_fops_create(&init_net, "nf_conntrack_expect", 0440, &exp_file_ops);
e9c1b084
PM
498 if (!proc)
499 return -ENOMEM;
500#endif /* CONFIG_PROC_FS */
501 return 0;
502}
503
504static void exp_proc_remove(void)
505{
506#ifdef CONFIG_PROC_FS
457c4cbc 507 proc_net_remove(&init_net, "nf_conntrack_expect");
e9c1b084
PM
508#endif /* CONFIG_PROC_FS */
509}
510
a71c0855
PM
511module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0600);
512
e9c1b084
PM
513int __init nf_conntrack_expect_init(void)
514{
a71c0855
PM
515 int err = -ENOMEM;
516
517 if (!nf_ct_expect_hsize) {
518 nf_ct_expect_hsize = nf_conntrack_htable_size / 256;
519 if (!nf_ct_expect_hsize)
520 nf_ct_expect_hsize = 1;
521 }
f264a7df 522 nf_ct_expect_max = nf_ct_expect_hsize * 4;
a71c0855
PM
523
524 nf_ct_expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize,
525 &nf_ct_expect_vmalloc);
526 if (nf_ct_expect_hash == NULL)
527 goto err1;
e9c1b084
PM
528
529 nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect",
530 sizeof(struct nf_conntrack_expect),
20c2df83 531 0, 0, NULL);
e9c1b084 532 if (!nf_ct_expect_cachep)
a71c0855 533 goto err2;
e9c1b084
PM
534
535 err = exp_proc_init();
536 if (err < 0)
a71c0855 537 goto err3;
e9c1b084
PM
538
539 return 0;
540
a71c0855
PM
541err3:
542 nf_ct_free_hashtable(nf_ct_expect_hash, nf_ct_expect_vmalloc,
543 nf_ct_expect_hsize);
544err2:
e9c1b084 545 kmem_cache_destroy(nf_ct_expect_cachep);
a71c0855 546err1:
e9c1b084
PM
547 return err;
548}
549
550void nf_conntrack_expect_fini(void)
551{
552 exp_proc_remove();
553 kmem_cache_destroy(nf_ct_expect_cachep);
a71c0855
PM
554 nf_ct_free_hashtable(nf_ct_expect_hash, nf_ct_expect_vmalloc,
555 nf_ct_expect_hsize);
e9c1b084 556}