]>
Commit | Line | Data |
---|---|---|
7eb95156 PE |
1 | /* |
2 | * inet fragments management | |
3 | * | |
4 | * This program is free software; you can redistribute it and/or | |
5 | * modify it under the terms of the GNU General Public License | |
6 | * as published by the Free Software Foundation; either version | |
7 | * 2 of the License, or (at your option) any later version. | |
8 | * | |
9 | * Authors: Pavel Emelyanov <xemul@openvz.org> | |
10 | * Started as consolidation of ipv4/ip_fragment.c, | |
11 | * ipv6/reassembly. and ipv6 nf conntrack reassembly | |
12 | */ | |
13 | ||
14 | #include <linux/list.h> | |
15 | #include <linux/spinlock.h> | |
16 | #include <linux/module.h> | |
17 | #include <linux/timer.h> | |
18 | #include <linux/mm.h> | |
321a3a99 | 19 | #include <linux/random.h> |
1e4b8287 PE |
20 | #include <linux/skbuff.h> |
21 | #include <linux/rtnetlink.h> | |
5a0e3ad6 | 22 | #include <linux/slab.h> |
7eb95156 | 23 | |
5a3da1fe | 24 | #include <net/sock.h> |
7eb95156 | 25 | #include <net/inet_frag.h> |
be991971 HFS |
26 | #include <net/inet_ecn.h> |
27 | ||
28 | /* Given the OR values of all fragments, apply RFC 3168 5.3 requirements | |
29 | * Value : 0xff if frame should be dropped. | |
30 | * 0 or INET_ECN_CE value, to be ORed in to final iph->tos field | |
31 | */ | |
32 | const u8 ip_frag_ecn_table[16] = { | |
33 | /* at least one fragment had CE, and others ECT_0 or ECT_1 */ | |
34 | [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = INET_ECN_CE, | |
35 | [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = INET_ECN_CE, | |
36 | [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = INET_ECN_CE, | |
37 | ||
38 | /* invalid combinations : drop frame */ | |
39 | [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE] = 0xff, | |
40 | [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0] = 0xff, | |
41 | [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_1] = 0xff, | |
42 | [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff, | |
43 | [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = 0xff, | |
44 | [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = 0xff, | |
45 | [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff, | |
46 | }; | |
47 | EXPORT_SYMBOL(ip_frag_ecn_table); | |
7eb95156 | 48 | |
d4ad4d22 | 49 | int inet_frags_init(struct inet_frags *f) |
7eb95156 | 50 | { |
d4ad4d22 NA |
51 | f->frags_cachep = kmem_cache_create(f->frags_cache_name, f->qsize, 0, 0, |
52 | NULL); | |
53 | if (!f->frags_cachep) | |
54 | return -ENOMEM; | |
55 | ||
56 | return 0; | |
7eb95156 PE |
57 | } |
58 | EXPORT_SYMBOL(inet_frags_init); | |
59 | ||
60 | void inet_frags_fini(struct inet_frags *f) | |
61 | { | |
648700f7 ED |
62 | /* We must wait that all inet_frag_destroy_rcu() have completed. */ |
63 | rcu_barrier(); | |
64 | ||
d4ad4d22 | 65 | kmem_cache_destroy(f->frags_cachep); |
648700f7 | 66 | f->frags_cachep = NULL; |
7eb95156 PE |
67 | } |
68 | EXPORT_SYMBOL(inet_frags_fini); | |
277e650d | 69 | |
648700f7 | 70 | static void inet_frags_free_cb(void *ptr, void *arg) |
277e650d | 71 | { |
648700f7 | 72 | struct inet_frag_queue *fq = ptr; |
ab1c724f | 73 | |
648700f7 ED |
74 | /* If we can not cancel the timer, it means this frag_queue |
75 | * is already disappearing, we have nothing to do. | |
76 | * Otherwise, we own a refcount until the end of this function. | |
77 | */ | |
78 | if (!del_timer(&fq->timer)) | |
79 | return; | |
19952cc4 | 80 | |
648700f7 ED |
81 | spin_lock_bh(&fq->lock); |
82 | if (!(fq->flags & INET_FRAG_COMPLETE)) { | |
83 | fq->flags |= INET_FRAG_COMPLETE; | |
84 | refcount_dec(&fq->refcnt); | |
ab1c724f | 85 | } |
648700f7 | 86 | spin_unlock_bh(&fq->lock); |
ab1c724f | 87 | |
648700f7 | 88 | inet_frag_put(fq); |
ab1c724f FW |
89 | } |
90 | ||
648700f7 | 91 | void inet_frags_exit_net(struct netns_frags *nf) |
ab1c724f | 92 | { |
f6f2a4a2 | 93 | nf->high_thresh = 0; /* prevent creation of new frags */ |
ab1c724f | 94 | |
648700f7 | 95 | rhashtable_free_and_destroy(&nf->rhashtable, inet_frags_free_cb, NULL); |
277e650d | 96 | } |
648700f7 | 97 | EXPORT_SYMBOL(inet_frags_exit_net); |
277e650d | 98 | |
093ba729 | 99 | void inet_frag_kill(struct inet_frag_queue *fq) |
277e650d PE |
100 | { |
101 | if (del_timer(&fq->timer)) | |
edcb6918 | 102 | refcount_dec(&fq->refcnt); |
277e650d | 103 | |
06aa8b8a | 104 | if (!(fq->flags & INET_FRAG_COMPLETE)) { |
648700f7 ED |
105 | struct netns_frags *nf = fq->net; |
106 | ||
107 | fq->flags |= INET_FRAG_COMPLETE; | |
108 | rhashtable_remove_fast(&nf->rhashtable, &fq->node, nf->f->rhash_params); | |
edcb6918 | 109 | refcount_dec(&fq->refcnt); |
277e650d PE |
110 | } |
111 | } | |
277e650d | 112 | EXPORT_SYMBOL(inet_frag_kill); |
1e4b8287 | 113 | |
648700f7 ED |
114 | static void inet_frag_destroy_rcu(struct rcu_head *head) |
115 | { | |
116 | struct inet_frag_queue *q = container_of(head, struct inet_frag_queue, | |
117 | rcu); | |
118 | struct inet_frags *f = q->net->f; | |
119 | ||
120 | if (f->destructor) | |
121 | f->destructor(q); | |
122 | kmem_cache_free(f->frags_cachep, q); | |
123 | } | |
124 | ||
093ba729 | 125 | void inet_frag_destroy(struct inet_frag_queue *q) |
1e4b8287 PE |
126 | { |
127 | struct sk_buff *fp; | |
6ddc0822 | 128 | struct netns_frags *nf; |
d433673e | 129 | unsigned int sum, sum_truesize = 0; |
093ba729 | 130 | struct inet_frags *f; |
1e4b8287 | 131 | |
06aa8b8a | 132 | WARN_ON(!(q->flags & INET_FRAG_COMPLETE)); |
547b792c | 133 | WARN_ON(del_timer(&q->timer) != 0); |
1e4b8287 PE |
134 | |
135 | /* Release all fragment data. */ | |
136 | fp = q->fragments; | |
6ddc0822 | 137 | nf = q->net; |
093ba729 | 138 | f = nf->f; |
1e4b8287 PE |
139 | while (fp) { |
140 | struct sk_buff *xp = fp->next; | |
141 | ||
d433673e | 142 | sum_truesize += fp->truesize; |
a72a5e2d | 143 | kfree_skb(fp); |
1e4b8287 PE |
144 | fp = xp; |
145 | } | |
d433673e | 146 | sum = sum_truesize + f->qsize; |
1e4b8287 | 147 | |
648700f7 | 148 | call_rcu(&q->rcu, inet_frag_destroy_rcu); |
5719b296 FW |
149 | |
150 | sub_frag_mem_limit(nf, sum); | |
1e4b8287 PE |
151 | } |
152 | EXPORT_SYMBOL(inet_frag_destroy); | |
8e7999c4 | 153 | |
ac18e750 | 154 | static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, |
f926e236 NA |
155 | struct inet_frags *f, |
156 | void *arg) | |
e521db9d PE |
157 | { |
158 | struct inet_frag_queue *q; | |
159 | ||
648700f7 | 160 | if (!nf->high_thresh || frag_mem_limit(nf) > nf->high_thresh) |
86e93e47 FW |
161 | return NULL; |
162 | ||
d4ad4d22 | 163 | q = kmem_cache_zalloc(f->frags_cachep, GFP_ATOMIC); |
51456b29 | 164 | if (!q) |
e521db9d PE |
165 | return NULL; |
166 | ||
54db0cc2 | 167 | q->net = nf; |
c6fda282 | 168 | f->constructor(q, arg); |
0e60d245 | 169 | add_frag_mem_limit(nf, f->qsize); |
d433673e | 170 | |
78802011 | 171 | timer_setup(&q->timer, f->frag_expire, 0); |
e521db9d | 172 | spin_lock_init(&q->lock); |
648700f7 | 173 | refcount_set(&q->refcnt, 3); |
e521db9d PE |
174 | |
175 | return q; | |
176 | } | |
c6fda282 | 177 | |
ac18e750 | 178 | static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf, |
f926e236 | 179 | void *arg) |
c6fda282 | 180 | { |
648700f7 | 181 | struct inet_frags *f = nf->f; |
c6fda282 | 182 | struct inet_frag_queue *q; |
648700f7 | 183 | int err; |
c6fda282 | 184 | |
ac18e750 | 185 | q = inet_frag_alloc(nf, f, arg); |
51456b29 | 186 | if (!q) |
c6fda282 PE |
187 | return NULL; |
188 | ||
648700f7 ED |
189 | mod_timer(&q->timer, jiffies + nf->timeout); |
190 | ||
191 | err = rhashtable_insert_fast(&nf->rhashtable, &q->node, | |
192 | f->rhash_params); | |
193 | if (err < 0) { | |
194 | q->flags |= INET_FRAG_COMPLETE; | |
195 | inet_frag_kill(q); | |
196 | inet_frag_destroy(q); | |
197 | return NULL; | |
198 | } | |
199 | return q; | |
c6fda282 | 200 | } |
abd6523d | 201 | |
648700f7 ED |
202 | /* TODO : call from rcu_read_lock() and no longer use refcount_inc_not_zero() */ |
203 | struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key) | |
abd6523d | 204 | { |
648700f7 | 205 | struct inet_frag_queue *fq; |
abd6523d | 206 | |
648700f7 | 207 | rcu_read_lock(); |
e3a57d18 | 208 | |
648700f7 ED |
209 | fq = rhashtable_lookup(&nf->rhashtable, key, nf->f->rhash_params); |
210 | if (fq) { | |
211 | if (!refcount_inc_not_zero(&fq->refcnt)) | |
212 | fq = NULL; | |
213 | rcu_read_unlock(); | |
214 | return fq; | |
e3a57d18 | 215 | } |
648700f7 | 216 | rcu_read_unlock(); |
e3a57d18 | 217 | |
648700f7 | 218 | return inet_frag_create(nf, key); |
abd6523d PE |
219 | } |
220 | EXPORT_SYMBOL(inet_frag_find); |