]>
Commit | Line | Data |
---|---|---|
ac2a6666 PNA |
1 | #include <linux/kernel.h> |
2 | #include <linux/init.h> | |
3 | #include <linux/module.h> | |
4 | #include <linux/netfilter.h> | |
5 | #include <linux/rhashtable.h> | |
6 | #include <linux/netdevice.h> | |
4f3780c0 FF |
7 | #include <net/ip.h> |
8 | #include <net/ip6_route.h> | |
c0ea1bcb | 9 | #include <net/netfilter/nf_tables.h> |
ac2a6666 PNA |
10 | #include <net/netfilter/nf_flow_table.h> |
11 | #include <net/netfilter/nf_conntrack.h> | |
12 | #include <net/netfilter/nf_conntrack_core.h> | |
13 | #include <net/netfilter/nf_conntrack_tuple.h> | |
14 | ||
15 | struct flow_offload_entry { | |
16 | struct flow_offload flow; | |
17 | struct nf_conn *ct; | |
18 | struct rcu_head rcu_head; | |
19 | }; | |
20 | ||
84453a90 FF |
21 | static DEFINE_MUTEX(flowtable_lock); |
22 | static LIST_HEAD(flowtables); | |
23 | ||
047b300e FF |
24 | static void |
25 | flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct, | |
26 | struct nf_flow_route *route, | |
27 | enum flow_offload_tuple_dir dir) | |
28 | { | |
29 | struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple; | |
30 | struct nf_conntrack_tuple *ctt = &ct->tuplehash[dir].tuple; | |
4f3780c0 | 31 | struct dst_entry *dst = route->tuple[dir].dst; |
047b300e FF |
32 | |
33 | ft->dir = dir; | |
34 | ||
35 | switch (ctt->src.l3num) { | |
36 | case NFPROTO_IPV4: | |
37 | ft->src_v4 = ctt->src.u3.in; | |
38 | ft->dst_v4 = ctt->dst.u3.in; | |
4f3780c0 | 39 | ft->mtu = ip_dst_mtu_maybe_forward(dst, true); |
047b300e FF |
40 | break; |
41 | case NFPROTO_IPV6: | |
42 | ft->src_v6 = ctt->src.u3.in6; | |
43 | ft->dst_v6 = ctt->dst.u3.in6; | |
4f3780c0 | 44 | ft->mtu = ip6_dst_mtu_forward(dst); |
047b300e FF |
45 | break; |
46 | } | |
47 | ||
48 | ft->l3proto = ctt->src.l3num; | |
49 | ft->l4proto = ctt->dst.protonum; | |
50 | ft->src_port = ctt->src.u.tcp.port; | |
51 | ft->dst_port = ctt->dst.u.tcp.port; | |
52 | ||
53 | ft->iifidx = route->tuple[dir].ifindex; | |
54 | ft->oifidx = route->tuple[!dir].ifindex; | |
4f3780c0 | 55 | ft->dst_cache = dst; |
047b300e FF |
56 | } |
57 | ||
ac2a6666 PNA |
58 | struct flow_offload * |
59 | flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route) | |
60 | { | |
61 | struct flow_offload_entry *entry; | |
62 | struct flow_offload *flow; | |
63 | ||
64 | if (unlikely(nf_ct_is_dying(ct) || | |
65 | !atomic_inc_not_zero(&ct->ct_general.use))) | |
66 | return NULL; | |
67 | ||
68 | entry = kzalloc(sizeof(*entry), GFP_ATOMIC); | |
69 | if (!entry) | |
70 | goto err_ct_refcnt; | |
71 | ||
72 | flow = &entry->flow; | |
73 | ||
74 | if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst)) | |
75 | goto err_dst_cache_original; | |
76 | ||
77 | if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst)) | |
78 | goto err_dst_cache_reply; | |
79 | ||
80 | entry->ct = ct; | |
81 | ||
047b300e FF |
82 | flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_ORIGINAL); |
83 | flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_REPLY); | |
ac2a6666 PNA |
84 | |
85 | if (ct->status & IPS_SRC_NAT) | |
86 | flow->flags |= FLOW_OFFLOAD_SNAT; | |
87 | else if (ct->status & IPS_DST_NAT) | |
88 | flow->flags |= FLOW_OFFLOAD_DNAT; | |
89 | ||
90 | return flow; | |
91 | ||
92 | err_dst_cache_reply: | |
93 | dst_release(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst); | |
94 | err_dst_cache_original: | |
95 | kfree(entry); | |
96 | err_ct_refcnt: | |
97 | nf_ct_put(ct); | |
98 | ||
99 | return NULL; | |
100 | } | |
101 | EXPORT_SYMBOL_GPL(flow_offload_alloc); | |
102 | ||
103 | void flow_offload_free(struct flow_offload *flow) | |
104 | { | |
105 | struct flow_offload_entry *e; | |
106 | ||
107 | dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache); | |
108 | dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache); | |
109 | e = container_of(flow, struct flow_offload_entry, flow); | |
0ff90b6c FF |
110 | nf_ct_delete(e->ct, 0, 0); |
111 | nf_ct_put(e->ct); | |
112 | kfree_rcu(e, rcu_head); | |
ac2a6666 PNA |
113 | } |
114 | EXPORT_SYMBOL_GPL(flow_offload_free); | |
115 | ||
a268de77 FF |
116 | static u32 flow_offload_hash(const void *data, u32 len, u32 seed) |
117 | { | |
118 | const struct flow_offload_tuple *tuple = data; | |
119 | ||
120 | return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed); | |
121 | } | |
122 | ||
123 | static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed) | |
124 | { | |
125 | const struct flow_offload_tuple_rhash *tuplehash = data; | |
126 | ||
127 | return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed); | |
128 | } | |
129 | ||
130 | static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg, | |
131 | const void *ptr) | |
132 | { | |
133 | const struct flow_offload_tuple *tuple = arg->key; | |
134 | const struct flow_offload_tuple_rhash *x = ptr; | |
135 | ||
136 | if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir))) | |
137 | return 1; | |
138 | ||
139 | return 0; | |
140 | } | |
141 | ||
142 | static const struct rhashtable_params nf_flow_offload_rhash_params = { | |
143 | .head_offset = offsetof(struct flow_offload_tuple_rhash, node), | |
144 | .hashfn = flow_offload_hash, | |
145 | .obj_hashfn = flow_offload_hash_obj, | |
146 | .obj_cmpfn = flow_offload_hash_cmp, | |
147 | .automatic_shrinking = true, | |
148 | }; | |
149 | ||
ac2a6666 PNA |
150 | int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow) |
151 | { | |
152 | flow->timeout = (u32)jiffies; | |
153 | ||
154 | rhashtable_insert_fast(&flow_table->rhashtable, | |
155 | &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node, | |
a268de77 | 156 | nf_flow_offload_rhash_params); |
ac2a6666 PNA |
157 | rhashtable_insert_fast(&flow_table->rhashtable, |
158 | &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node, | |
a268de77 | 159 | nf_flow_offload_rhash_params); |
ac2a6666 PNA |
160 | return 0; |
161 | } | |
162 | EXPORT_SYMBOL_GPL(flow_offload_add); | |
163 | ||
0ff90b6c FF |
164 | static void flow_offload_del(struct nf_flowtable *flow_table, |
165 | struct flow_offload *flow) | |
ac2a6666 | 166 | { |
ac2a6666 PNA |
167 | rhashtable_remove_fast(&flow_table->rhashtable, |
168 | &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node, | |
a268de77 | 169 | nf_flow_offload_rhash_params); |
ac2a6666 PNA |
170 | rhashtable_remove_fast(&flow_table->rhashtable, |
171 | &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node, | |
a268de77 | 172 | nf_flow_offload_rhash_params); |
ac2a6666 | 173 | |
0ff90b6c | 174 | flow_offload_free(flow); |
ac2a6666 | 175 | } |
ac2a6666 | 176 | |
59c466dd FF |
177 | void flow_offload_teardown(struct flow_offload *flow) |
178 | { | |
179 | flow->flags |= FLOW_OFFLOAD_TEARDOWN; | |
180 | } | |
181 | EXPORT_SYMBOL_GPL(flow_offload_teardown); | |
182 | ||
ac2a6666 PNA |
183 | struct flow_offload_tuple_rhash * |
184 | flow_offload_lookup(struct nf_flowtable *flow_table, | |
185 | struct flow_offload_tuple *tuple) | |
186 | { | |
187 | return rhashtable_lookup_fast(&flow_table->rhashtable, tuple, | |
a268de77 | 188 | nf_flow_offload_rhash_params); |
ac2a6666 PNA |
189 | } |
190 | EXPORT_SYMBOL_GPL(flow_offload_lookup); | |
191 | ||
ac2a6666 PNA |
192 | int nf_flow_table_iterate(struct nf_flowtable *flow_table, |
193 | void (*iter)(struct flow_offload *flow, void *data), | |
194 | void *data) | |
195 | { | |
196 | struct flow_offload_tuple_rhash *tuplehash; | |
197 | struct rhashtable_iter hti; | |
198 | struct flow_offload *flow; | |
199 | int err; | |
200 | ||
201 | err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL); | |
202 | if (err) | |
203 | return err; | |
204 | ||
205 | rhashtable_walk_start(&hti); | |
206 | ||
207 | while ((tuplehash = rhashtable_walk_next(&hti))) { | |
208 | if (IS_ERR(tuplehash)) { | |
209 | err = PTR_ERR(tuplehash); | |
210 | if (err != -EAGAIN) | |
211 | goto out; | |
212 | ||
213 | continue; | |
214 | } | |
215 | if (tuplehash->tuple.dir) | |
216 | continue; | |
217 | ||
218 | flow = container_of(tuplehash, struct flow_offload, tuplehash[0]); | |
219 | ||
220 | iter(flow, data); | |
221 | } | |
222 | out: | |
223 | rhashtable_walk_stop(&hti); | |
224 | rhashtable_walk_exit(&hti); | |
225 | ||
226 | return err; | |
227 | } | |
228 | EXPORT_SYMBOL_GPL(nf_flow_table_iterate); | |
229 | ||
230 | static inline bool nf_flow_has_expired(const struct flow_offload *flow) | |
231 | { | |
232 | return (__s32)(flow->timeout - (u32)jiffies) <= 0; | |
233 | } | |
234 | ||
b408c5b0 | 235 | static int nf_flow_offload_gc_step(struct nf_flowtable *flow_table) |
ac2a6666 PNA |
236 | { |
237 | struct flow_offload_tuple_rhash *tuplehash; | |
ac2a6666 PNA |
238 | struct rhashtable_iter hti; |
239 | struct flow_offload *flow; | |
240 | int err; | |
241 | ||
ac2a6666 PNA |
242 | err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL); |
243 | if (err) | |
b408c5b0 | 244 | return 0; |
ac2a6666 PNA |
245 | |
246 | rhashtable_walk_start(&hti); | |
247 | ||
248 | while ((tuplehash = rhashtable_walk_next(&hti))) { | |
249 | if (IS_ERR(tuplehash)) { | |
250 | err = PTR_ERR(tuplehash); | |
251 | if (err != -EAGAIN) | |
252 | goto out; | |
253 | ||
254 | continue; | |
255 | } | |
256 | if (tuplehash->tuple.dir) | |
257 | continue; | |
258 | ||
259 | flow = container_of(tuplehash, struct flow_offload, tuplehash[0]); | |
260 | ||
261 | if (nf_flow_has_expired(flow) || | |
59c466dd FF |
262 | (flow->flags & (FLOW_OFFLOAD_DYING | |
263 | FLOW_OFFLOAD_TEARDOWN))) | |
ac2a6666 | 264 | flow_offload_del(flow_table, flow); |
ac2a6666 PNA |
265 | } |
266 | out: | |
267 | rhashtable_walk_stop(&hti); | |
268 | rhashtable_walk_exit(&hti); | |
b408c5b0 PNA |
269 | |
270 | return 1; | |
271 | } | |
272 | ||
a268de77 | 273 | static void nf_flow_offload_work_gc(struct work_struct *work) |
b408c5b0 PNA |
274 | { |
275 | struct nf_flowtable *flow_table; | |
276 | ||
277 | flow_table = container_of(work, struct nf_flowtable, gc_work.work); | |
278 | nf_flow_offload_gc_step(flow_table); | |
ac2a6666 PNA |
279 | queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ); |
280 | } | |
ac2a6666 PNA |
281 | |
282 | static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff, | |
283 | __be16 port, __be16 new_port) | |
284 | { | |
285 | struct tcphdr *tcph; | |
286 | ||
287 | if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) || | |
288 | skb_try_make_writable(skb, thoff + sizeof(*tcph))) | |
289 | return -1; | |
290 | ||
291 | tcph = (void *)(skb_network_header(skb) + thoff); | |
292 | inet_proto_csum_replace2(&tcph->check, skb, port, new_port, true); | |
293 | ||
294 | return 0; | |
295 | } | |
296 | ||
297 | static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff, | |
298 | __be16 port, __be16 new_port) | |
299 | { | |
300 | struct udphdr *udph; | |
301 | ||
302 | if (!pskb_may_pull(skb, thoff + sizeof(*udph)) || | |
303 | skb_try_make_writable(skb, thoff + sizeof(*udph))) | |
304 | return -1; | |
305 | ||
306 | udph = (void *)(skb_network_header(skb) + thoff); | |
307 | if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { | |
308 | inet_proto_csum_replace2(&udph->check, skb, port, | |
309 | new_port, true); | |
310 | if (!udph->check) | |
311 | udph->check = CSUM_MANGLED_0; | |
312 | } | |
313 | ||
314 | return 0; | |
315 | } | |
316 | ||
317 | static int nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff, | |
318 | u8 protocol, __be16 port, __be16 new_port) | |
319 | { | |
320 | switch (protocol) { | |
321 | case IPPROTO_TCP: | |
322 | if (nf_flow_nat_port_tcp(skb, thoff, port, new_port) < 0) | |
323 | return NF_DROP; | |
324 | break; | |
325 | case IPPROTO_UDP: | |
326 | if (nf_flow_nat_port_udp(skb, thoff, port, new_port) < 0) | |
327 | return NF_DROP; | |
328 | break; | |
329 | } | |
330 | ||
331 | return 0; | |
332 | } | |
333 | ||
334 | int nf_flow_snat_port(const struct flow_offload *flow, | |
335 | struct sk_buff *skb, unsigned int thoff, | |
336 | u8 protocol, enum flow_offload_tuple_dir dir) | |
337 | { | |
338 | struct flow_ports *hdr; | |
339 | __be16 port, new_port; | |
340 | ||
341 | if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) || | |
342 | skb_try_make_writable(skb, thoff + sizeof(*hdr))) | |
343 | return -1; | |
344 | ||
345 | hdr = (void *)(skb_network_header(skb) + thoff); | |
346 | ||
347 | switch (dir) { | |
348 | case FLOW_OFFLOAD_DIR_ORIGINAL: | |
349 | port = hdr->source; | |
350 | new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port; | |
351 | hdr->source = new_port; | |
352 | break; | |
353 | case FLOW_OFFLOAD_DIR_REPLY: | |
354 | port = hdr->dest; | |
355 | new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port; | |
356 | hdr->dest = new_port; | |
357 | break; | |
358 | default: | |
359 | return -1; | |
360 | } | |
361 | ||
362 | return nf_flow_nat_port(skb, thoff, protocol, port, new_port); | |
363 | } | |
364 | EXPORT_SYMBOL_GPL(nf_flow_snat_port); | |
365 | ||
366 | int nf_flow_dnat_port(const struct flow_offload *flow, | |
367 | struct sk_buff *skb, unsigned int thoff, | |
368 | u8 protocol, enum flow_offload_tuple_dir dir) | |
369 | { | |
370 | struct flow_ports *hdr; | |
371 | __be16 port, new_port; | |
372 | ||
373 | if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) || | |
374 | skb_try_make_writable(skb, thoff + sizeof(*hdr))) | |
375 | return -1; | |
376 | ||
377 | hdr = (void *)(skb_network_header(skb) + thoff); | |
378 | ||
379 | switch (dir) { | |
380 | case FLOW_OFFLOAD_DIR_ORIGINAL: | |
381 | port = hdr->dest; | |
382 | new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port; | |
383 | hdr->dest = new_port; | |
384 | break; | |
385 | case FLOW_OFFLOAD_DIR_REPLY: | |
386 | port = hdr->source; | |
387 | new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port; | |
388 | hdr->source = new_port; | |
389 | break; | |
390 | default: | |
391 | return -1; | |
392 | } | |
393 | ||
394 | return nf_flow_nat_port(skb, thoff, protocol, port, new_port); | |
395 | } | |
396 | EXPORT_SYMBOL_GPL(nf_flow_dnat_port); | |
397 | ||
a268de77 FF |
398 | int nf_flow_table_init(struct nf_flowtable *flowtable) |
399 | { | |
400 | int err; | |
401 | ||
402 | INIT_DEFERRABLE_WORK(&flowtable->gc_work, nf_flow_offload_work_gc); | |
403 | ||
404 | err = rhashtable_init(&flowtable->rhashtable, | |
405 | &nf_flow_offload_rhash_params); | |
406 | if (err < 0) | |
407 | return err; | |
408 | ||
409 | queue_delayed_work(system_power_efficient_wq, | |
410 | &flowtable->gc_work, HZ); | |
411 | ||
84453a90 FF |
412 | mutex_lock(&flowtable_lock); |
413 | list_add(&flowtable->list, &flowtables); | |
414 | mutex_unlock(&flowtable_lock); | |
415 | ||
a268de77 FF |
416 | return 0; |
417 | } | |
418 | EXPORT_SYMBOL_GPL(nf_flow_table_init); | |
419 | ||
c0ea1bcb PNA |
420 | static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data) |
421 | { | |
422 | struct net_device *dev = data; | |
423 | ||
59c466dd FF |
424 | if (!dev) { |
425 | flow_offload_teardown(flow); | |
c0ea1bcb | 426 | return; |
59c466dd | 427 | } |
c0ea1bcb | 428 | |
59c466dd FF |
429 | if (flow->tuplehash[0].tuple.iifidx == dev->ifindex || |
430 | flow->tuplehash[1].tuple.iifidx == dev->ifindex) | |
431 | flow_offload_dead(flow); | |
c0ea1bcb PNA |
432 | } |
433 | ||
434 | static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable, | |
84453a90 | 435 | struct net_device *dev) |
c0ea1bcb | 436 | { |
84453a90 | 437 | nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, dev); |
c0ea1bcb PNA |
438 | flush_delayed_work(&flowtable->gc_work); |
439 | } | |
440 | ||
441 | void nf_flow_table_cleanup(struct net *net, struct net_device *dev) | |
442 | { | |
84453a90 FF |
443 | struct nf_flowtable *flowtable; |
444 | ||
445 | mutex_lock(&flowtable_lock); | |
446 | list_for_each_entry(flowtable, &flowtables, list) | |
447 | nf_flow_table_iterate_cleanup(flowtable, dev); | |
448 | mutex_unlock(&flowtable_lock); | |
c0ea1bcb PNA |
449 | } |
450 | EXPORT_SYMBOL_GPL(nf_flow_table_cleanup); | |
451 | ||
b408c5b0 PNA |
452 | void nf_flow_table_free(struct nf_flowtable *flow_table) |
453 | { | |
84453a90 FF |
454 | mutex_lock(&flowtable_lock); |
455 | list_del(&flow_table->list); | |
456 | mutex_unlock(&flowtable_lock); | |
a268de77 | 457 | cancel_delayed_work_sync(&flow_table->gc_work); |
b408c5b0 PNA |
458 | nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL); |
459 | WARN_ON(!nf_flow_offload_gc_step(flow_table)); | |
a268de77 | 460 | rhashtable_destroy(&flow_table->rhashtable); |
b408c5b0 PNA |
461 | } |
462 | EXPORT_SYMBOL_GPL(nf_flow_table_free); | |
463 | ||
ac2a6666 PNA |
464 | MODULE_LICENSE("GPL"); |
465 | MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); |