]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - net/netfilter/nf_flow_table_core.c
netfilter: nft_reject_bridge: remove unnecessary ttl set
[mirror_ubuntu-jammy-kernel.git] / net / netfilter / nf_flow_table_core.c
CommitLineData
ac2a6666
PNA
1#include <linux/kernel.h>
2#include <linux/init.h>
3#include <linux/module.h>
4#include <linux/netfilter.h>
5#include <linux/rhashtable.h>
6#include <linux/netdevice.h>
4f3780c0
FF
7#include <net/ip.h>
8#include <net/ip6_route.h>
c0ea1bcb 9#include <net/netfilter/nf_tables.h>
ac2a6666
PNA
10#include <net/netfilter/nf_flow_table.h>
11#include <net/netfilter/nf_conntrack.h>
12#include <net/netfilter/nf_conntrack_core.h>
13#include <net/netfilter/nf_conntrack_tuple.h>
14
15struct flow_offload_entry {
16 struct flow_offload flow;
17 struct nf_conn *ct;
18 struct rcu_head rcu_head;
19};
20
84453a90
FF
21static DEFINE_MUTEX(flowtable_lock);
22static LIST_HEAD(flowtables);
23
047b300e
FF
24static void
25flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
26 struct nf_flow_route *route,
27 enum flow_offload_tuple_dir dir)
28{
29 struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple;
30 struct nf_conntrack_tuple *ctt = &ct->tuplehash[dir].tuple;
4f3780c0 31 struct dst_entry *dst = route->tuple[dir].dst;
047b300e
FF
32
33 ft->dir = dir;
34
35 switch (ctt->src.l3num) {
36 case NFPROTO_IPV4:
37 ft->src_v4 = ctt->src.u3.in;
38 ft->dst_v4 = ctt->dst.u3.in;
4f3780c0 39 ft->mtu = ip_dst_mtu_maybe_forward(dst, true);
047b300e
FF
40 break;
41 case NFPROTO_IPV6:
42 ft->src_v6 = ctt->src.u3.in6;
43 ft->dst_v6 = ctt->dst.u3.in6;
4f3780c0 44 ft->mtu = ip6_dst_mtu_forward(dst);
047b300e
FF
45 break;
46 }
47
48 ft->l3proto = ctt->src.l3num;
49 ft->l4proto = ctt->dst.protonum;
50 ft->src_port = ctt->src.u.tcp.port;
51 ft->dst_port = ctt->dst.u.tcp.port;
52
53 ft->iifidx = route->tuple[dir].ifindex;
54 ft->oifidx = route->tuple[!dir].ifindex;
4f3780c0 55 ft->dst_cache = dst;
047b300e
FF
56}
57
ac2a6666
PNA
58struct flow_offload *
59flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
60{
61 struct flow_offload_entry *entry;
62 struct flow_offload *flow;
63
64 if (unlikely(nf_ct_is_dying(ct) ||
65 !atomic_inc_not_zero(&ct->ct_general.use)))
66 return NULL;
67
68 entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
69 if (!entry)
70 goto err_ct_refcnt;
71
72 flow = &entry->flow;
73
74 if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst))
75 goto err_dst_cache_original;
76
77 if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst))
78 goto err_dst_cache_reply;
79
80 entry->ct = ct;
81
047b300e
FF
82 flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_ORIGINAL);
83 flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_REPLY);
ac2a6666
PNA
84
85 if (ct->status & IPS_SRC_NAT)
86 flow->flags |= FLOW_OFFLOAD_SNAT;
df1e2025 87 if (ct->status & IPS_DST_NAT)
ac2a6666
PNA
88 flow->flags |= FLOW_OFFLOAD_DNAT;
89
90 return flow;
91
92err_dst_cache_reply:
93 dst_release(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst);
94err_dst_cache_original:
95 kfree(entry);
96err_ct_refcnt:
97 nf_ct_put(ct);
98
99 return NULL;
100}
101EXPORT_SYMBOL_GPL(flow_offload_alloc);
102
da5984e5
FF
103static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
104{
105 tcp->state = TCP_CONNTRACK_ESTABLISHED;
106 tcp->seen[0].td_maxwin = 0;
107 tcp->seen[1].td_maxwin = 0;
108}
109
110static void flow_offload_fixup_ct_state(struct nf_conn *ct)
111{
112 const struct nf_conntrack_l4proto *l4proto;
113 struct net *net = nf_ct_net(ct);
114 unsigned int *timeouts;
115 unsigned int timeout;
116 int l4num;
117
118 l4num = nf_ct_protonum(ct);
119 if (l4num == IPPROTO_TCP)
120 flow_offload_fixup_tcp(&ct->proto.tcp);
121
122 l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), l4num);
123 if (!l4proto)
124 return;
125
126 timeouts = l4proto->get_timeouts(net);
127 if (!timeouts)
128 return;
129
130 if (l4num == IPPROTO_TCP)
131 timeout = timeouts[TCP_CONNTRACK_ESTABLISHED];
132 else if (l4num == IPPROTO_UDP)
133 timeout = timeouts[UDP_CT_REPLIED];
134 else
135 return;
136
137 ct->timeout = nfct_time_stamp + timeout;
138}
139
ac2a6666
PNA
140void flow_offload_free(struct flow_offload *flow)
141{
142 struct flow_offload_entry *e;
143
144 dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache);
145 dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache);
146 e = container_of(flow, struct flow_offload_entry, flow);
da5984e5
FF
147 if (flow->flags & FLOW_OFFLOAD_DYING)
148 nf_ct_delete(e->ct, 0, 0);
0ff90b6c
FF
149 nf_ct_put(e->ct);
150 kfree_rcu(e, rcu_head);
ac2a6666
PNA
151}
152EXPORT_SYMBOL_GPL(flow_offload_free);
153
a268de77
FF
154static u32 flow_offload_hash(const void *data, u32 len, u32 seed)
155{
156 const struct flow_offload_tuple *tuple = data;
157
158 return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed);
159}
160
161static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed)
162{
163 const struct flow_offload_tuple_rhash *tuplehash = data;
164
165 return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed);
166}
167
168static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
169 const void *ptr)
170{
171 const struct flow_offload_tuple *tuple = arg->key;
172 const struct flow_offload_tuple_rhash *x = ptr;
173
174 if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir)))
175 return 1;
176
177 return 0;
178}
179
180static const struct rhashtable_params nf_flow_offload_rhash_params = {
181 .head_offset = offsetof(struct flow_offload_tuple_rhash, node),
182 .hashfn = flow_offload_hash,
183 .obj_hashfn = flow_offload_hash_obj,
184 .obj_cmpfn = flow_offload_hash_cmp,
185 .automatic_shrinking = true,
186};
187
ac2a6666
PNA
188int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
189{
190 flow->timeout = (u32)jiffies;
191
192 rhashtable_insert_fast(&flow_table->rhashtable,
193 &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
a268de77 194 nf_flow_offload_rhash_params);
ac2a6666
PNA
195 rhashtable_insert_fast(&flow_table->rhashtable,
196 &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
a268de77 197 nf_flow_offload_rhash_params);
ac2a6666
PNA
198 return 0;
199}
200EXPORT_SYMBOL_GPL(flow_offload_add);
201
0ff90b6c
FF
202static void flow_offload_del(struct nf_flowtable *flow_table,
203 struct flow_offload *flow)
ac2a6666 204{
da5984e5
FF
205 struct flow_offload_entry *e;
206
ac2a6666
PNA
207 rhashtable_remove_fast(&flow_table->rhashtable,
208 &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
a268de77 209 nf_flow_offload_rhash_params);
ac2a6666
PNA
210 rhashtable_remove_fast(&flow_table->rhashtable,
211 &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
a268de77 212 nf_flow_offload_rhash_params);
ac2a6666 213
da5984e5
FF
214 e = container_of(flow, struct flow_offload_entry, flow);
215 clear_bit(IPS_OFFLOAD_BIT, &e->ct->status);
216
0ff90b6c 217 flow_offload_free(flow);
ac2a6666 218}
ac2a6666 219
59c466dd
FF
220void flow_offload_teardown(struct flow_offload *flow)
221{
da5984e5
FF
222 struct flow_offload_entry *e;
223
59c466dd 224 flow->flags |= FLOW_OFFLOAD_TEARDOWN;
da5984e5
FF
225
226 e = container_of(flow, struct flow_offload_entry, flow);
227 flow_offload_fixup_ct_state(e->ct);
59c466dd
FF
228}
229EXPORT_SYMBOL_GPL(flow_offload_teardown);
230
ac2a6666
PNA
231struct flow_offload_tuple_rhash *
232flow_offload_lookup(struct nf_flowtable *flow_table,
233 struct flow_offload_tuple *tuple)
234{
ba03137f
FF
235 struct flow_offload_tuple_rhash *tuplehash;
236 struct flow_offload *flow;
237 int dir;
238
239 tuplehash = rhashtable_lookup_fast(&flow_table->rhashtable, tuple,
240 nf_flow_offload_rhash_params);
241 if (!tuplehash)
242 return NULL;
243
244 dir = tuplehash->tuple.dir;
245 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
246 if (flow->flags & (FLOW_OFFLOAD_DYING | FLOW_OFFLOAD_TEARDOWN))
247 return NULL;
248
249 return tuplehash;
ac2a6666
PNA
250}
251EXPORT_SYMBOL_GPL(flow_offload_lookup);
252
ac2a6666
PNA
253int nf_flow_table_iterate(struct nf_flowtable *flow_table,
254 void (*iter)(struct flow_offload *flow, void *data),
255 void *data)
256{
257 struct flow_offload_tuple_rhash *tuplehash;
258 struct rhashtable_iter hti;
259 struct flow_offload *flow;
260 int err;
261
262 err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL);
263 if (err)
264 return err;
265
266 rhashtable_walk_start(&hti);
267
268 while ((tuplehash = rhashtable_walk_next(&hti))) {
269 if (IS_ERR(tuplehash)) {
270 err = PTR_ERR(tuplehash);
271 if (err != -EAGAIN)
272 goto out;
273
274 continue;
275 }
276 if (tuplehash->tuple.dir)
277 continue;
278
279 flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
280
281 iter(flow, data);
282 }
283out:
284 rhashtable_walk_stop(&hti);
285 rhashtable_walk_exit(&hti);
286
287 return err;
288}
289EXPORT_SYMBOL_GPL(nf_flow_table_iterate);
290
291static inline bool nf_flow_has_expired(const struct flow_offload *flow)
292{
293 return (__s32)(flow->timeout - (u32)jiffies) <= 0;
294}
295
b408c5b0 296static int nf_flow_offload_gc_step(struct nf_flowtable *flow_table)
ac2a6666
PNA
297{
298 struct flow_offload_tuple_rhash *tuplehash;
ac2a6666
PNA
299 struct rhashtable_iter hti;
300 struct flow_offload *flow;
301 int err;
302
ac2a6666
PNA
303 err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL);
304 if (err)
b408c5b0 305 return 0;
ac2a6666
PNA
306
307 rhashtable_walk_start(&hti);
308
309 while ((tuplehash = rhashtable_walk_next(&hti))) {
310 if (IS_ERR(tuplehash)) {
311 err = PTR_ERR(tuplehash);
312 if (err != -EAGAIN)
313 goto out;
314
315 continue;
316 }
317 if (tuplehash->tuple.dir)
318 continue;
319
320 flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
321
322 if (nf_flow_has_expired(flow) ||
59c466dd
FF
323 (flow->flags & (FLOW_OFFLOAD_DYING |
324 FLOW_OFFLOAD_TEARDOWN)))
ac2a6666 325 flow_offload_del(flow_table, flow);
ac2a6666
PNA
326 }
327out:
328 rhashtable_walk_stop(&hti);
329 rhashtable_walk_exit(&hti);
b408c5b0
PNA
330
331 return 1;
332}
333
a268de77 334static void nf_flow_offload_work_gc(struct work_struct *work)
b408c5b0
PNA
335{
336 struct nf_flowtable *flow_table;
337
338 flow_table = container_of(work, struct nf_flowtable, gc_work.work);
339 nf_flow_offload_gc_step(flow_table);
ac2a6666
PNA
340 queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
341}
ac2a6666
PNA
342
343static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
344 __be16 port, __be16 new_port)
345{
346 struct tcphdr *tcph;
347
348 if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
349 skb_try_make_writable(skb, thoff + sizeof(*tcph)))
350 return -1;
351
352 tcph = (void *)(skb_network_header(skb) + thoff);
353 inet_proto_csum_replace2(&tcph->check, skb, port, new_port, true);
354
355 return 0;
356}
357
358static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
359 __be16 port, __be16 new_port)
360{
361 struct udphdr *udph;
362
363 if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
364 skb_try_make_writable(skb, thoff + sizeof(*udph)))
365 return -1;
366
367 udph = (void *)(skb_network_header(skb) + thoff);
368 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
369 inet_proto_csum_replace2(&udph->check, skb, port,
370 new_port, true);
371 if (!udph->check)
372 udph->check = CSUM_MANGLED_0;
373 }
374
375 return 0;
376}
377
378static int nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff,
379 u8 protocol, __be16 port, __be16 new_port)
380{
381 switch (protocol) {
382 case IPPROTO_TCP:
383 if (nf_flow_nat_port_tcp(skb, thoff, port, new_port) < 0)
384 return NF_DROP;
385 break;
386 case IPPROTO_UDP:
387 if (nf_flow_nat_port_udp(skb, thoff, port, new_port) < 0)
388 return NF_DROP;
389 break;
390 }
391
392 return 0;
393}
394
395int nf_flow_snat_port(const struct flow_offload *flow,
396 struct sk_buff *skb, unsigned int thoff,
397 u8 protocol, enum flow_offload_tuple_dir dir)
398{
399 struct flow_ports *hdr;
400 __be16 port, new_port;
401
402 if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
403 skb_try_make_writable(skb, thoff + sizeof(*hdr)))
404 return -1;
405
406 hdr = (void *)(skb_network_header(skb) + thoff);
407
408 switch (dir) {
409 case FLOW_OFFLOAD_DIR_ORIGINAL:
410 port = hdr->source;
411 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port;
412 hdr->source = new_port;
413 break;
414 case FLOW_OFFLOAD_DIR_REPLY:
415 port = hdr->dest;
416 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port;
417 hdr->dest = new_port;
418 break;
419 default:
420 return -1;
421 }
422
423 return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
424}
425EXPORT_SYMBOL_GPL(nf_flow_snat_port);
426
427int nf_flow_dnat_port(const struct flow_offload *flow,
428 struct sk_buff *skb, unsigned int thoff,
429 u8 protocol, enum flow_offload_tuple_dir dir)
430{
431 struct flow_ports *hdr;
432 __be16 port, new_port;
433
434 if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
435 skb_try_make_writable(skb, thoff + sizeof(*hdr)))
436 return -1;
437
438 hdr = (void *)(skb_network_header(skb) + thoff);
439
440 switch (dir) {
441 case FLOW_OFFLOAD_DIR_ORIGINAL:
442 port = hdr->dest;
443 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port;
444 hdr->dest = new_port;
445 break;
446 case FLOW_OFFLOAD_DIR_REPLY:
447 port = hdr->source;
448 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port;
449 hdr->source = new_port;
450 break;
451 default:
452 return -1;
453 }
454
455 return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
456}
457EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
458
a268de77
FF
459int nf_flow_table_init(struct nf_flowtable *flowtable)
460{
461 int err;
462
463 INIT_DEFERRABLE_WORK(&flowtable->gc_work, nf_flow_offload_work_gc);
464
465 err = rhashtable_init(&flowtable->rhashtable,
466 &nf_flow_offload_rhash_params);
467 if (err < 0)
468 return err;
469
470 queue_delayed_work(system_power_efficient_wq,
471 &flowtable->gc_work, HZ);
472
84453a90
FF
473 mutex_lock(&flowtable_lock);
474 list_add(&flowtable->list, &flowtables);
475 mutex_unlock(&flowtable_lock);
476
a268de77
FF
477 return 0;
478}
479EXPORT_SYMBOL_GPL(nf_flow_table_init);
480
c0ea1bcb
PNA
481static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data)
482{
483 struct net_device *dev = data;
484
59c466dd
FF
485 if (!dev) {
486 flow_offload_teardown(flow);
c0ea1bcb 487 return;
59c466dd 488 }
c0ea1bcb 489
59c466dd
FF
490 if (flow->tuplehash[0].tuple.iifidx == dev->ifindex ||
491 flow->tuplehash[1].tuple.iifidx == dev->ifindex)
492 flow_offload_dead(flow);
c0ea1bcb
PNA
493}
494
495static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable,
84453a90 496 struct net_device *dev)
c0ea1bcb 497{
84453a90 498 nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, dev);
c0ea1bcb
PNA
499 flush_delayed_work(&flowtable->gc_work);
500}
501
502void nf_flow_table_cleanup(struct net *net, struct net_device *dev)
503{
84453a90
FF
504 struct nf_flowtable *flowtable;
505
506 mutex_lock(&flowtable_lock);
507 list_for_each_entry(flowtable, &flowtables, list)
508 nf_flow_table_iterate_cleanup(flowtable, dev);
509 mutex_unlock(&flowtable_lock);
c0ea1bcb
PNA
510}
511EXPORT_SYMBOL_GPL(nf_flow_table_cleanup);
512
b408c5b0
PNA
513void nf_flow_table_free(struct nf_flowtable *flow_table)
514{
84453a90
FF
515 mutex_lock(&flowtable_lock);
516 list_del(&flow_table->list);
517 mutex_unlock(&flowtable_lock);
a268de77 518 cancel_delayed_work_sync(&flow_table->gc_work);
b408c5b0
PNA
519 nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
520 WARN_ON(!nf_flow_offload_gc_step(flow_table));
a268de77 521 rhashtable_destroy(&flow_table->rhashtable);
b408c5b0
PNA
522}
523EXPORT_SYMBOL_GPL(nf_flow_table_free);
524
ac2a6666
PNA
525MODULE_LICENSE("GPL");
526MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");