]>
Commit | Line | Data |
---|---|---|
ac2a6666 PNA |
1 | #include <linux/kernel.h> |
2 | #include <linux/init.h> | |
3 | #include <linux/module.h> | |
4 | #include <linux/netfilter.h> | |
5 | #include <linux/rhashtable.h> | |
6 | #include <linux/netdevice.h> | |
c0ea1bcb | 7 | #include <net/netfilter/nf_tables.h> |
ac2a6666 PNA |
8 | #include <net/netfilter/nf_flow_table.h> |
9 | #include <net/netfilter/nf_conntrack.h> | |
10 | #include <net/netfilter/nf_conntrack_core.h> | |
11 | #include <net/netfilter/nf_conntrack_tuple.h> | |
12 | ||
13 | struct flow_offload_entry { | |
14 | struct flow_offload flow; | |
15 | struct nf_conn *ct; | |
16 | struct rcu_head rcu_head; | |
17 | }; | |
18 | ||
19 | struct flow_offload * | |
20 | flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route) | |
21 | { | |
22 | struct flow_offload_entry *entry; | |
23 | struct flow_offload *flow; | |
24 | ||
25 | if (unlikely(nf_ct_is_dying(ct) || | |
26 | !atomic_inc_not_zero(&ct->ct_general.use))) | |
27 | return NULL; | |
28 | ||
29 | entry = kzalloc(sizeof(*entry), GFP_ATOMIC); | |
30 | if (!entry) | |
31 | goto err_ct_refcnt; | |
32 | ||
33 | flow = &entry->flow; | |
34 | ||
35 | if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst)) | |
36 | goto err_dst_cache_original; | |
37 | ||
38 | if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst)) | |
39 | goto err_dst_cache_reply; | |
40 | ||
41 | entry->ct = ct; | |
42 | ||
43 | switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num) { | |
44 | case NFPROTO_IPV4: | |
45 | flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4 = | |
46 | ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.in; | |
47 | flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4 = | |
48 | ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in; | |
49 | flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4 = | |
50 | ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.in; | |
51 | flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4 = | |
52 | ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.in; | |
53 | break; | |
54 | case NFPROTO_IPV6: | |
55 | flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6 = | |
56 | ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.in6; | |
57 | flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6 = | |
58 | ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6; | |
59 | flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6 = | |
60 | ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.in6; | |
61 | flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6 = | |
62 | ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.in6; | |
63 | break; | |
64 | } | |
65 | ||
66 | flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l3proto = | |
67 | ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num; | |
68 | flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto = | |
69 | ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum; | |
70 | flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.l3proto = | |
71 | ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num; | |
72 | flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.l4proto = | |
73 | ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum; | |
74 | ||
75 | flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache = | |
76 | route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst; | |
77 | flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache = | |
78 | route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst; | |
79 | ||
80 | flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port = | |
81 | ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.tcp.port; | |
82 | flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port = | |
83 | ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port; | |
84 | flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port = | |
85 | ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u.tcp.port; | |
86 | flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port = | |
87 | ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port; | |
88 | ||
89 | flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dir = | |
90 | FLOW_OFFLOAD_DIR_ORIGINAL; | |
91 | flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dir = | |
92 | FLOW_OFFLOAD_DIR_REPLY; | |
93 | ||
94 | flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.iifidx = | |
95 | route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].ifindex; | |
96 | flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.oifidx = | |
97 | route->tuple[FLOW_OFFLOAD_DIR_REPLY].ifindex; | |
98 | flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.iifidx = | |
99 | route->tuple[FLOW_OFFLOAD_DIR_REPLY].ifindex; | |
100 | flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.oifidx = | |
101 | route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].ifindex; | |
102 | ||
103 | if (ct->status & IPS_SRC_NAT) | |
104 | flow->flags |= FLOW_OFFLOAD_SNAT; | |
105 | else if (ct->status & IPS_DST_NAT) | |
106 | flow->flags |= FLOW_OFFLOAD_DNAT; | |
107 | ||
108 | return flow; | |
109 | ||
110 | err_dst_cache_reply: | |
111 | dst_release(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst); | |
112 | err_dst_cache_original: | |
113 | kfree(entry); | |
114 | err_ct_refcnt: | |
115 | nf_ct_put(ct); | |
116 | ||
117 | return NULL; | |
118 | } | |
119 | EXPORT_SYMBOL_GPL(flow_offload_alloc); | |
120 | ||
121 | void flow_offload_free(struct flow_offload *flow) | |
122 | { | |
123 | struct flow_offload_entry *e; | |
124 | ||
125 | dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache); | |
126 | dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache); | |
127 | e = container_of(flow, struct flow_offload_entry, flow); | |
128 | kfree(e); | |
129 | } | |
130 | EXPORT_SYMBOL_GPL(flow_offload_free); | |
131 | ||
132 | void flow_offload_dead(struct flow_offload *flow) | |
133 | { | |
134 | flow->flags |= FLOW_OFFLOAD_DYING; | |
135 | } | |
136 | EXPORT_SYMBOL_GPL(flow_offload_dead); | |
137 | ||
138 | int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow) | |
139 | { | |
140 | flow->timeout = (u32)jiffies; | |
141 | ||
142 | rhashtable_insert_fast(&flow_table->rhashtable, | |
143 | &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node, | |
144 | *flow_table->type->params); | |
145 | rhashtable_insert_fast(&flow_table->rhashtable, | |
146 | &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node, | |
147 | *flow_table->type->params); | |
148 | return 0; | |
149 | } | |
150 | EXPORT_SYMBOL_GPL(flow_offload_add); | |
151 | ||
152 | void flow_offload_del(struct nf_flowtable *flow_table, | |
153 | struct flow_offload *flow) | |
154 | { | |
155 | struct flow_offload_entry *e; | |
156 | ||
157 | rhashtable_remove_fast(&flow_table->rhashtable, | |
158 | &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node, | |
159 | *flow_table->type->params); | |
160 | rhashtable_remove_fast(&flow_table->rhashtable, | |
161 | &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node, | |
162 | *flow_table->type->params); | |
163 | ||
164 | e = container_of(flow, struct flow_offload_entry, flow); | |
165 | kfree_rcu(e, rcu_head); | |
166 | } | |
167 | EXPORT_SYMBOL_GPL(flow_offload_del); | |
168 | ||
169 | struct flow_offload_tuple_rhash * | |
170 | flow_offload_lookup(struct nf_flowtable *flow_table, | |
171 | struct flow_offload_tuple *tuple) | |
172 | { | |
173 | return rhashtable_lookup_fast(&flow_table->rhashtable, tuple, | |
174 | *flow_table->type->params); | |
175 | } | |
176 | EXPORT_SYMBOL_GPL(flow_offload_lookup); | |
177 | ||
178 | static void nf_flow_release_ct(const struct flow_offload *flow) | |
179 | { | |
180 | struct flow_offload_entry *e; | |
181 | ||
182 | e = container_of(flow, struct flow_offload_entry, flow); | |
183 | nf_ct_delete(e->ct, 0, 0); | |
184 | nf_ct_put(e->ct); | |
185 | } | |
186 | ||
187 | int nf_flow_table_iterate(struct nf_flowtable *flow_table, | |
188 | void (*iter)(struct flow_offload *flow, void *data), | |
189 | void *data) | |
190 | { | |
191 | struct flow_offload_tuple_rhash *tuplehash; | |
192 | struct rhashtable_iter hti; | |
193 | struct flow_offload *flow; | |
194 | int err; | |
195 | ||
196 | err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL); | |
197 | if (err) | |
198 | return err; | |
199 | ||
200 | rhashtable_walk_start(&hti); | |
201 | ||
202 | while ((tuplehash = rhashtable_walk_next(&hti))) { | |
203 | if (IS_ERR(tuplehash)) { | |
204 | err = PTR_ERR(tuplehash); | |
205 | if (err != -EAGAIN) | |
206 | goto out; | |
207 | ||
208 | continue; | |
209 | } | |
210 | if (tuplehash->tuple.dir) | |
211 | continue; | |
212 | ||
213 | flow = container_of(tuplehash, struct flow_offload, tuplehash[0]); | |
214 | ||
215 | iter(flow, data); | |
216 | } | |
217 | out: | |
218 | rhashtable_walk_stop(&hti); | |
219 | rhashtable_walk_exit(&hti); | |
220 | ||
221 | return err; | |
222 | } | |
223 | EXPORT_SYMBOL_GPL(nf_flow_table_iterate); | |
224 | ||
225 | static inline bool nf_flow_has_expired(const struct flow_offload *flow) | |
226 | { | |
227 | return (__s32)(flow->timeout - (u32)jiffies) <= 0; | |
228 | } | |
229 | ||
230 | static inline bool nf_flow_is_dying(const struct flow_offload *flow) | |
231 | { | |
232 | return flow->flags & FLOW_OFFLOAD_DYING; | |
233 | } | |
234 | ||
235 | void nf_flow_offload_work_gc(struct work_struct *work) | |
236 | { | |
237 | struct flow_offload_tuple_rhash *tuplehash; | |
238 | struct nf_flowtable *flow_table; | |
239 | struct rhashtable_iter hti; | |
240 | struct flow_offload *flow; | |
241 | int err; | |
242 | ||
243 | flow_table = container_of(work, struct nf_flowtable, gc_work.work); | |
244 | ||
245 | err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL); | |
246 | if (err) | |
247 | goto schedule; | |
248 | ||
249 | rhashtable_walk_start(&hti); | |
250 | ||
251 | while ((tuplehash = rhashtable_walk_next(&hti))) { | |
252 | if (IS_ERR(tuplehash)) { | |
253 | err = PTR_ERR(tuplehash); | |
254 | if (err != -EAGAIN) | |
255 | goto out; | |
256 | ||
257 | continue; | |
258 | } | |
259 | if (tuplehash->tuple.dir) | |
260 | continue; | |
261 | ||
262 | flow = container_of(tuplehash, struct flow_offload, tuplehash[0]); | |
263 | ||
264 | if (nf_flow_has_expired(flow) || | |
265 | nf_flow_is_dying(flow)) { | |
266 | flow_offload_del(flow_table, flow); | |
267 | nf_flow_release_ct(flow); | |
268 | } | |
269 | } | |
270 | out: | |
271 | rhashtable_walk_stop(&hti); | |
272 | rhashtable_walk_exit(&hti); | |
273 | schedule: | |
274 | queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ); | |
275 | } | |
276 | EXPORT_SYMBOL_GPL(nf_flow_offload_work_gc); | |
277 | ||
278 | static u32 flow_offload_hash(const void *data, u32 len, u32 seed) | |
279 | { | |
280 | const struct flow_offload_tuple *tuple = data; | |
281 | ||
282 | return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed); | |
283 | } | |
284 | ||
285 | static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed) | |
286 | { | |
287 | const struct flow_offload_tuple_rhash *tuplehash = data; | |
288 | ||
289 | return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed); | |
290 | } | |
291 | ||
292 | static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg, | |
293 | const void *ptr) | |
294 | { | |
295 | const struct flow_offload_tuple *tuple = arg->key; | |
296 | const struct flow_offload_tuple_rhash *x = ptr; | |
297 | ||
298 | if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir))) | |
299 | return 1; | |
300 | ||
301 | return 0; | |
302 | } | |
303 | ||
304 | const struct rhashtable_params nf_flow_offload_rhash_params = { | |
305 | .head_offset = offsetof(struct flow_offload_tuple_rhash, node), | |
306 | .hashfn = flow_offload_hash, | |
307 | .obj_hashfn = flow_offload_hash_obj, | |
308 | .obj_cmpfn = flow_offload_hash_cmp, | |
309 | .automatic_shrinking = true, | |
310 | }; | |
311 | EXPORT_SYMBOL_GPL(nf_flow_offload_rhash_params); | |
312 | ||
313 | static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff, | |
314 | __be16 port, __be16 new_port) | |
315 | { | |
316 | struct tcphdr *tcph; | |
317 | ||
318 | if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) || | |
319 | skb_try_make_writable(skb, thoff + sizeof(*tcph))) | |
320 | return -1; | |
321 | ||
322 | tcph = (void *)(skb_network_header(skb) + thoff); | |
323 | inet_proto_csum_replace2(&tcph->check, skb, port, new_port, true); | |
324 | ||
325 | return 0; | |
326 | } | |
327 | ||
328 | static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff, | |
329 | __be16 port, __be16 new_port) | |
330 | { | |
331 | struct udphdr *udph; | |
332 | ||
333 | if (!pskb_may_pull(skb, thoff + sizeof(*udph)) || | |
334 | skb_try_make_writable(skb, thoff + sizeof(*udph))) | |
335 | return -1; | |
336 | ||
337 | udph = (void *)(skb_network_header(skb) + thoff); | |
338 | if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { | |
339 | inet_proto_csum_replace2(&udph->check, skb, port, | |
340 | new_port, true); | |
341 | if (!udph->check) | |
342 | udph->check = CSUM_MANGLED_0; | |
343 | } | |
344 | ||
345 | return 0; | |
346 | } | |
347 | ||
348 | static int nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff, | |
349 | u8 protocol, __be16 port, __be16 new_port) | |
350 | { | |
351 | switch (protocol) { | |
352 | case IPPROTO_TCP: | |
353 | if (nf_flow_nat_port_tcp(skb, thoff, port, new_port) < 0) | |
354 | return NF_DROP; | |
355 | break; | |
356 | case IPPROTO_UDP: | |
357 | if (nf_flow_nat_port_udp(skb, thoff, port, new_port) < 0) | |
358 | return NF_DROP; | |
359 | break; | |
360 | } | |
361 | ||
362 | return 0; | |
363 | } | |
364 | ||
365 | int nf_flow_snat_port(const struct flow_offload *flow, | |
366 | struct sk_buff *skb, unsigned int thoff, | |
367 | u8 protocol, enum flow_offload_tuple_dir dir) | |
368 | { | |
369 | struct flow_ports *hdr; | |
370 | __be16 port, new_port; | |
371 | ||
372 | if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) || | |
373 | skb_try_make_writable(skb, thoff + sizeof(*hdr))) | |
374 | return -1; | |
375 | ||
376 | hdr = (void *)(skb_network_header(skb) + thoff); | |
377 | ||
378 | switch (dir) { | |
379 | case FLOW_OFFLOAD_DIR_ORIGINAL: | |
380 | port = hdr->source; | |
381 | new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port; | |
382 | hdr->source = new_port; | |
383 | break; | |
384 | case FLOW_OFFLOAD_DIR_REPLY: | |
385 | port = hdr->dest; | |
386 | new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port; | |
387 | hdr->dest = new_port; | |
388 | break; | |
389 | default: | |
390 | return -1; | |
391 | } | |
392 | ||
393 | return nf_flow_nat_port(skb, thoff, protocol, port, new_port); | |
394 | } | |
395 | EXPORT_SYMBOL_GPL(nf_flow_snat_port); | |
396 | ||
397 | int nf_flow_dnat_port(const struct flow_offload *flow, | |
398 | struct sk_buff *skb, unsigned int thoff, | |
399 | u8 protocol, enum flow_offload_tuple_dir dir) | |
400 | { | |
401 | struct flow_ports *hdr; | |
402 | __be16 port, new_port; | |
403 | ||
404 | if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) || | |
405 | skb_try_make_writable(skb, thoff + sizeof(*hdr))) | |
406 | return -1; | |
407 | ||
408 | hdr = (void *)(skb_network_header(skb) + thoff); | |
409 | ||
410 | switch (dir) { | |
411 | case FLOW_OFFLOAD_DIR_ORIGINAL: | |
412 | port = hdr->dest; | |
413 | new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port; | |
414 | hdr->dest = new_port; | |
415 | break; | |
416 | case FLOW_OFFLOAD_DIR_REPLY: | |
417 | port = hdr->source; | |
418 | new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port; | |
419 | hdr->source = new_port; | |
420 | break; | |
421 | default: | |
422 | return -1; | |
423 | } | |
424 | ||
425 | return nf_flow_nat_port(skb, thoff, protocol, port, new_port); | |
426 | } | |
427 | EXPORT_SYMBOL_GPL(nf_flow_dnat_port); | |
428 | ||
c0ea1bcb PNA |
429 | static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data) |
430 | { | |
431 | struct net_device *dev = data; | |
432 | ||
433 | if (dev && flow->tuplehash[0].tuple.iifidx != dev->ifindex) | |
434 | return; | |
435 | ||
436 | flow_offload_dead(flow); | |
437 | } | |
438 | ||
439 | static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable, | |
440 | void *data) | |
441 | { | |
442 | nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, data); | |
443 | flush_delayed_work(&flowtable->gc_work); | |
444 | } | |
445 | ||
446 | void nf_flow_table_cleanup(struct net *net, struct net_device *dev) | |
447 | { | |
448 | nft_flow_table_iterate(net, nf_flow_table_iterate_cleanup, dev); | |
449 | } | |
450 | EXPORT_SYMBOL_GPL(nf_flow_table_cleanup); | |
451 | ||
ac2a6666 PNA |
452 | MODULE_LICENSE("GPL"); |
453 | MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); |