]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - net/netfilter/nf_flow_table.c
netfilter: nft_flow_offload: move flowtable cleanup routines to nf_flow_table
[mirror_ubuntu-jammy-kernel.git] / net / netfilter / nf_flow_table.c
CommitLineData
ac2a6666
PNA
1#include <linux/kernel.h>
2#include <linux/init.h>
3#include <linux/module.h>
4#include <linux/netfilter.h>
5#include <linux/rhashtable.h>
6#include <linux/netdevice.h>
c0ea1bcb 7#include <net/netfilter/nf_tables.h>
ac2a6666
PNA
8#include <net/netfilter/nf_flow_table.h>
9#include <net/netfilter/nf_conntrack.h>
10#include <net/netfilter/nf_conntrack_core.h>
11#include <net/netfilter/nf_conntrack_tuple.h>
12
13struct flow_offload_entry {
14 struct flow_offload flow;
15 struct nf_conn *ct;
16 struct rcu_head rcu_head;
17};
18
19struct flow_offload *
20flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
21{
22 struct flow_offload_entry *entry;
23 struct flow_offload *flow;
24
25 if (unlikely(nf_ct_is_dying(ct) ||
26 !atomic_inc_not_zero(&ct->ct_general.use)))
27 return NULL;
28
29 entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
30 if (!entry)
31 goto err_ct_refcnt;
32
33 flow = &entry->flow;
34
35 if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst))
36 goto err_dst_cache_original;
37
38 if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst))
39 goto err_dst_cache_reply;
40
41 entry->ct = ct;
42
43 switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num) {
44 case NFPROTO_IPV4:
45 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4 =
46 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.in;
47 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4 =
48 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in;
49 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4 =
50 ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.in;
51 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4 =
52 ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.in;
53 break;
54 case NFPROTO_IPV6:
55 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6 =
56 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.in6;
57 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6 =
58 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6;
59 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6 =
60 ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.in6;
61 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6 =
62 ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.in6;
63 break;
64 }
65
66 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l3proto =
67 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
68 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto =
69 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum;
70 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.l3proto =
71 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
72 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.l4proto =
73 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum;
74
75 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache =
76 route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst;
77 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache =
78 route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst;
79
80 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port =
81 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.tcp.port;
82 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port =
83 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port;
84 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port =
85 ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u.tcp.port;
86 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port =
87 ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port;
88
89 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dir =
90 FLOW_OFFLOAD_DIR_ORIGINAL;
91 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dir =
92 FLOW_OFFLOAD_DIR_REPLY;
93
94 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.iifidx =
95 route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].ifindex;
96 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.oifidx =
97 route->tuple[FLOW_OFFLOAD_DIR_REPLY].ifindex;
98 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.iifidx =
99 route->tuple[FLOW_OFFLOAD_DIR_REPLY].ifindex;
100 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.oifidx =
101 route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].ifindex;
102
103 if (ct->status & IPS_SRC_NAT)
104 flow->flags |= FLOW_OFFLOAD_SNAT;
105 else if (ct->status & IPS_DST_NAT)
106 flow->flags |= FLOW_OFFLOAD_DNAT;
107
108 return flow;
109
110err_dst_cache_reply:
111 dst_release(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst);
112err_dst_cache_original:
113 kfree(entry);
114err_ct_refcnt:
115 nf_ct_put(ct);
116
117 return NULL;
118}
119EXPORT_SYMBOL_GPL(flow_offload_alloc);
120
121void flow_offload_free(struct flow_offload *flow)
122{
123 struct flow_offload_entry *e;
124
125 dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache);
126 dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache);
127 e = container_of(flow, struct flow_offload_entry, flow);
128 kfree(e);
129}
130EXPORT_SYMBOL_GPL(flow_offload_free);
131
132void flow_offload_dead(struct flow_offload *flow)
133{
134 flow->flags |= FLOW_OFFLOAD_DYING;
135}
136EXPORT_SYMBOL_GPL(flow_offload_dead);
137
138int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
139{
140 flow->timeout = (u32)jiffies;
141
142 rhashtable_insert_fast(&flow_table->rhashtable,
143 &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
144 *flow_table->type->params);
145 rhashtable_insert_fast(&flow_table->rhashtable,
146 &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
147 *flow_table->type->params);
148 return 0;
149}
150EXPORT_SYMBOL_GPL(flow_offload_add);
151
152void flow_offload_del(struct nf_flowtable *flow_table,
153 struct flow_offload *flow)
154{
155 struct flow_offload_entry *e;
156
157 rhashtable_remove_fast(&flow_table->rhashtable,
158 &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
159 *flow_table->type->params);
160 rhashtable_remove_fast(&flow_table->rhashtable,
161 &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
162 *flow_table->type->params);
163
164 e = container_of(flow, struct flow_offload_entry, flow);
165 kfree_rcu(e, rcu_head);
166}
167EXPORT_SYMBOL_GPL(flow_offload_del);
168
169struct flow_offload_tuple_rhash *
170flow_offload_lookup(struct nf_flowtable *flow_table,
171 struct flow_offload_tuple *tuple)
172{
173 return rhashtable_lookup_fast(&flow_table->rhashtable, tuple,
174 *flow_table->type->params);
175}
176EXPORT_SYMBOL_GPL(flow_offload_lookup);
177
178static void nf_flow_release_ct(const struct flow_offload *flow)
179{
180 struct flow_offload_entry *e;
181
182 e = container_of(flow, struct flow_offload_entry, flow);
183 nf_ct_delete(e->ct, 0, 0);
184 nf_ct_put(e->ct);
185}
186
187int nf_flow_table_iterate(struct nf_flowtable *flow_table,
188 void (*iter)(struct flow_offload *flow, void *data),
189 void *data)
190{
191 struct flow_offload_tuple_rhash *tuplehash;
192 struct rhashtable_iter hti;
193 struct flow_offload *flow;
194 int err;
195
196 err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL);
197 if (err)
198 return err;
199
200 rhashtable_walk_start(&hti);
201
202 while ((tuplehash = rhashtable_walk_next(&hti))) {
203 if (IS_ERR(tuplehash)) {
204 err = PTR_ERR(tuplehash);
205 if (err != -EAGAIN)
206 goto out;
207
208 continue;
209 }
210 if (tuplehash->tuple.dir)
211 continue;
212
213 flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
214
215 iter(flow, data);
216 }
217out:
218 rhashtable_walk_stop(&hti);
219 rhashtable_walk_exit(&hti);
220
221 return err;
222}
223EXPORT_SYMBOL_GPL(nf_flow_table_iterate);
224
225static inline bool nf_flow_has_expired(const struct flow_offload *flow)
226{
227 return (__s32)(flow->timeout - (u32)jiffies) <= 0;
228}
229
230static inline bool nf_flow_is_dying(const struct flow_offload *flow)
231{
232 return flow->flags & FLOW_OFFLOAD_DYING;
233}
234
235void nf_flow_offload_work_gc(struct work_struct *work)
236{
237 struct flow_offload_tuple_rhash *tuplehash;
238 struct nf_flowtable *flow_table;
239 struct rhashtable_iter hti;
240 struct flow_offload *flow;
241 int err;
242
243 flow_table = container_of(work, struct nf_flowtable, gc_work.work);
244
245 err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL);
246 if (err)
247 goto schedule;
248
249 rhashtable_walk_start(&hti);
250
251 while ((tuplehash = rhashtable_walk_next(&hti))) {
252 if (IS_ERR(tuplehash)) {
253 err = PTR_ERR(tuplehash);
254 if (err != -EAGAIN)
255 goto out;
256
257 continue;
258 }
259 if (tuplehash->tuple.dir)
260 continue;
261
262 flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
263
264 if (nf_flow_has_expired(flow) ||
265 nf_flow_is_dying(flow)) {
266 flow_offload_del(flow_table, flow);
267 nf_flow_release_ct(flow);
268 }
269 }
270out:
271 rhashtable_walk_stop(&hti);
272 rhashtable_walk_exit(&hti);
273schedule:
274 queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
275}
276EXPORT_SYMBOL_GPL(nf_flow_offload_work_gc);
277
278static u32 flow_offload_hash(const void *data, u32 len, u32 seed)
279{
280 const struct flow_offload_tuple *tuple = data;
281
282 return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed);
283}
284
285static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed)
286{
287 const struct flow_offload_tuple_rhash *tuplehash = data;
288
289 return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed);
290}
291
292static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
293 const void *ptr)
294{
295 const struct flow_offload_tuple *tuple = arg->key;
296 const struct flow_offload_tuple_rhash *x = ptr;
297
298 if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir)))
299 return 1;
300
301 return 0;
302}
303
304const struct rhashtable_params nf_flow_offload_rhash_params = {
305 .head_offset = offsetof(struct flow_offload_tuple_rhash, node),
306 .hashfn = flow_offload_hash,
307 .obj_hashfn = flow_offload_hash_obj,
308 .obj_cmpfn = flow_offload_hash_cmp,
309 .automatic_shrinking = true,
310};
311EXPORT_SYMBOL_GPL(nf_flow_offload_rhash_params);
312
313static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
314 __be16 port, __be16 new_port)
315{
316 struct tcphdr *tcph;
317
318 if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
319 skb_try_make_writable(skb, thoff + sizeof(*tcph)))
320 return -1;
321
322 tcph = (void *)(skb_network_header(skb) + thoff);
323 inet_proto_csum_replace2(&tcph->check, skb, port, new_port, true);
324
325 return 0;
326}
327
328static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
329 __be16 port, __be16 new_port)
330{
331 struct udphdr *udph;
332
333 if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
334 skb_try_make_writable(skb, thoff + sizeof(*udph)))
335 return -1;
336
337 udph = (void *)(skb_network_header(skb) + thoff);
338 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
339 inet_proto_csum_replace2(&udph->check, skb, port,
340 new_port, true);
341 if (!udph->check)
342 udph->check = CSUM_MANGLED_0;
343 }
344
345 return 0;
346}
347
348static int nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff,
349 u8 protocol, __be16 port, __be16 new_port)
350{
351 switch (protocol) {
352 case IPPROTO_TCP:
353 if (nf_flow_nat_port_tcp(skb, thoff, port, new_port) < 0)
354 return NF_DROP;
355 break;
356 case IPPROTO_UDP:
357 if (nf_flow_nat_port_udp(skb, thoff, port, new_port) < 0)
358 return NF_DROP;
359 break;
360 }
361
362 return 0;
363}
364
365int nf_flow_snat_port(const struct flow_offload *flow,
366 struct sk_buff *skb, unsigned int thoff,
367 u8 protocol, enum flow_offload_tuple_dir dir)
368{
369 struct flow_ports *hdr;
370 __be16 port, new_port;
371
372 if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
373 skb_try_make_writable(skb, thoff + sizeof(*hdr)))
374 return -1;
375
376 hdr = (void *)(skb_network_header(skb) + thoff);
377
378 switch (dir) {
379 case FLOW_OFFLOAD_DIR_ORIGINAL:
380 port = hdr->source;
381 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port;
382 hdr->source = new_port;
383 break;
384 case FLOW_OFFLOAD_DIR_REPLY:
385 port = hdr->dest;
386 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port;
387 hdr->dest = new_port;
388 break;
389 default:
390 return -1;
391 }
392
393 return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
394}
395EXPORT_SYMBOL_GPL(nf_flow_snat_port);
396
397int nf_flow_dnat_port(const struct flow_offload *flow,
398 struct sk_buff *skb, unsigned int thoff,
399 u8 protocol, enum flow_offload_tuple_dir dir)
400{
401 struct flow_ports *hdr;
402 __be16 port, new_port;
403
404 if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
405 skb_try_make_writable(skb, thoff + sizeof(*hdr)))
406 return -1;
407
408 hdr = (void *)(skb_network_header(skb) + thoff);
409
410 switch (dir) {
411 case FLOW_OFFLOAD_DIR_ORIGINAL:
412 port = hdr->dest;
413 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port;
414 hdr->dest = new_port;
415 break;
416 case FLOW_OFFLOAD_DIR_REPLY:
417 port = hdr->source;
418 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port;
419 hdr->source = new_port;
420 break;
421 default:
422 return -1;
423 }
424
425 return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
426}
427EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
428
c0ea1bcb
PNA
429static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data)
430{
431 struct net_device *dev = data;
432
433 if (dev && flow->tuplehash[0].tuple.iifidx != dev->ifindex)
434 return;
435
436 flow_offload_dead(flow);
437}
438
439static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable,
440 void *data)
441{
442 nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, data);
443 flush_delayed_work(&flowtable->gc_work);
444}
445
446void nf_flow_table_cleanup(struct net *net, struct net_device *dev)
447{
448 nft_flow_table_iterate(net, nf_flow_table_iterate_cleanup, dev);
449}
450EXPORT_SYMBOL_GPL(nf_flow_table_cleanup);
451
ac2a6666
PNA
452MODULE_LICENSE("GPL");
453MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");