1 // SPDX-License-Identifier: GPL-2.0-only
2 #include <linux/kernel.h>
3 #include <linux/init.h>
4 #include <linux/module.h>
5 #include <linux/netfilter.h>
6 #include <linux/rhashtable.h>
7 #include <linux/netdevice.h>
9 #include <net/ip6_route.h>
10 #include <net/netfilter/nf_tables.h>
11 #include <net/netfilter/nf_flow_table.h>
12 #include <net/netfilter/nf_conntrack.h>
13 #include <net/netfilter/nf_conntrack_core.h>
14 #include <net/netfilter/nf_conntrack_l4proto.h>
15 #include <net/netfilter/nf_conntrack_tuple.h>
17 static DEFINE_MUTEX(flowtable_lock
);
18 static LIST_HEAD(flowtables
);
21 flow_offload_fill_dir(struct flow_offload
*flow
,
22 enum flow_offload_tuple_dir dir
)
24 struct flow_offload_tuple
*ft
= &flow
->tuplehash
[dir
].tuple
;
25 struct nf_conntrack_tuple
*ctt
= &flow
->ct
->tuplehash
[dir
].tuple
;
29 switch (ctt
->src
.l3num
) {
31 ft
->src_v4
= ctt
->src
.u3
.in
;
32 ft
->dst_v4
= ctt
->dst
.u3
.in
;
35 ft
->src_v6
= ctt
->src
.u3
.in6
;
36 ft
->dst_v6
= ctt
->dst
.u3
.in6
;
40 ft
->l3proto
= ctt
->src
.l3num
;
41 ft
->l4proto
= ctt
->dst
.protonum
;
42 ft
->src_port
= ctt
->src
.u
.tcp
.port
;
43 ft
->dst_port
= ctt
->dst
.u
.tcp
.port
;
46 struct flow_offload
*flow_offload_alloc(struct nf_conn
*ct
)
48 struct flow_offload
*flow
;
50 if (unlikely(nf_ct_is_dying(ct
) ||
51 !atomic_inc_not_zero(&ct
->ct_general
.use
)))
54 flow
= kzalloc(sizeof(*flow
), GFP_ATOMIC
);
60 flow_offload_fill_dir(flow
, FLOW_OFFLOAD_DIR_ORIGINAL
);
61 flow_offload_fill_dir(flow
, FLOW_OFFLOAD_DIR_REPLY
);
63 if (ct
->status
& IPS_SRC_NAT
)
64 __set_bit(NF_FLOW_SNAT
, &flow
->flags
);
65 if (ct
->status
& IPS_DST_NAT
)
66 __set_bit(NF_FLOW_DNAT
, &flow
->flags
);
75 EXPORT_SYMBOL_GPL(flow_offload_alloc
);
77 static int flow_offload_fill_route(struct flow_offload
*flow
,
78 const struct nf_flow_route
*route
,
79 enum flow_offload_tuple_dir dir
)
81 struct flow_offload_tuple
*flow_tuple
= &flow
->tuplehash
[dir
].tuple
;
82 struct dst_entry
*other_dst
= route
->tuple
[!dir
].dst
;
83 struct dst_entry
*dst
= route
->tuple
[dir
].dst
;
85 if (!dst_hold_safe(route
->tuple
[dir
].dst
))
88 switch (flow_tuple
->l3proto
) {
90 flow_tuple
->mtu
= ip_dst_mtu_maybe_forward(dst
, true);
93 flow_tuple
->mtu
= ip6_dst_mtu_forward(dst
);
97 flow_tuple
->iifidx
= other_dst
->dev
->ifindex
;
98 flow_tuple
->dst_cache
= dst
;
103 int flow_offload_route_init(struct flow_offload
*flow
,
104 const struct nf_flow_route
*route
)
108 err
= flow_offload_fill_route(flow
, route
, FLOW_OFFLOAD_DIR_ORIGINAL
);
112 err
= flow_offload_fill_route(flow
, route
, FLOW_OFFLOAD_DIR_REPLY
);
114 goto err_route_reply
;
116 flow
->type
= NF_FLOW_OFFLOAD_ROUTE
;
121 dst_release(route
->tuple
[FLOW_OFFLOAD_DIR_ORIGINAL
].dst
);
125 EXPORT_SYMBOL_GPL(flow_offload_route_init
);
127 static void flow_offload_fixup_tcp(struct ip_ct_tcp
*tcp
)
129 tcp
->state
= TCP_CONNTRACK_ESTABLISHED
;
130 tcp
->seen
[0].td_maxwin
= 0;
131 tcp
->seen
[1].td_maxwin
= 0;
134 #define NF_FLOWTABLE_TCP_PICKUP_TIMEOUT (120 * HZ)
135 #define NF_FLOWTABLE_UDP_PICKUP_TIMEOUT (30 * HZ)
137 static void flow_offload_fixup_ct_timeout(struct nf_conn
*ct
)
139 const struct nf_conntrack_l4proto
*l4proto
;
140 int l4num
= nf_ct_protonum(ct
);
141 unsigned int timeout
;
143 l4proto
= nf_ct_l4proto_find(l4num
);
147 if (l4num
== IPPROTO_TCP
)
148 timeout
= NF_FLOWTABLE_TCP_PICKUP_TIMEOUT
;
149 else if (l4num
== IPPROTO_UDP
)
150 timeout
= NF_FLOWTABLE_UDP_PICKUP_TIMEOUT
;
154 if (nf_flow_timeout_delta(ct
->timeout
) > (__s32
)timeout
)
155 ct
->timeout
= nfct_time_stamp
+ timeout
;
158 static void flow_offload_fixup_ct_state(struct nf_conn
*ct
)
160 if (nf_ct_protonum(ct
) == IPPROTO_TCP
)
161 flow_offload_fixup_tcp(&ct
->proto
.tcp
);
164 static void flow_offload_fixup_ct(struct nf_conn
*ct
)
166 flow_offload_fixup_ct_state(ct
);
167 flow_offload_fixup_ct_timeout(ct
);
170 static void flow_offload_route_release(struct flow_offload
*flow
)
172 dst_release(flow
->tuplehash
[FLOW_OFFLOAD_DIR_ORIGINAL
].tuple
.dst_cache
);
173 dst_release(flow
->tuplehash
[FLOW_OFFLOAD_DIR_REPLY
].tuple
.dst_cache
);
176 void flow_offload_free(struct flow_offload
*flow
)
178 switch (flow
->type
) {
179 case NF_FLOW_OFFLOAD_ROUTE
:
180 flow_offload_route_release(flow
);
186 kfree_rcu(flow
, rcu_head
);
188 EXPORT_SYMBOL_GPL(flow_offload_free
);
190 static u32
flow_offload_hash(const void *data
, u32 len
, u32 seed
)
192 const struct flow_offload_tuple
*tuple
= data
;
194 return jhash(tuple
, offsetof(struct flow_offload_tuple
, __hash
), seed
);
197 static u32
flow_offload_hash_obj(const void *data
, u32 len
, u32 seed
)
199 const struct flow_offload_tuple_rhash
*tuplehash
= data
;
201 return jhash(&tuplehash
->tuple
, offsetof(struct flow_offload_tuple
, __hash
), seed
);
204 static int flow_offload_hash_cmp(struct rhashtable_compare_arg
*arg
,
207 const struct flow_offload_tuple
*tuple
= arg
->key
;
208 const struct flow_offload_tuple_rhash
*x
= ptr
;
210 if (memcmp(&x
->tuple
, tuple
, offsetof(struct flow_offload_tuple
, __hash
)))
216 static const struct rhashtable_params nf_flow_offload_rhash_params
= {
217 .head_offset
= offsetof(struct flow_offload_tuple_rhash
, node
),
218 .hashfn
= flow_offload_hash
,
219 .obj_hashfn
= flow_offload_hash_obj
,
220 .obj_cmpfn
= flow_offload_hash_cmp
,
221 .automatic_shrinking
= true,
224 int flow_offload_add(struct nf_flowtable
*flow_table
, struct flow_offload
*flow
)
228 flow
->timeout
= nf_flowtable_time_stamp
+ NF_FLOW_TIMEOUT
;
230 err
= rhashtable_insert_fast(&flow_table
->rhashtable
,
231 &flow
->tuplehash
[0].node
,
232 nf_flow_offload_rhash_params
);
236 err
= rhashtable_insert_fast(&flow_table
->rhashtable
,
237 &flow
->tuplehash
[1].node
,
238 nf_flow_offload_rhash_params
);
240 rhashtable_remove_fast(&flow_table
->rhashtable
,
241 &flow
->tuplehash
[0].node
,
242 nf_flow_offload_rhash_params
);
246 nf_ct_offload_timeout(flow
->ct
);
248 if (nf_flowtable_hw_offload(flow_table
)) {
249 __set_bit(NF_FLOW_HW
, &flow
->flags
);
250 nf_flow_offload_add(flow_table
, flow
);
255 EXPORT_SYMBOL_GPL(flow_offload_add
);
257 void flow_offload_refresh(struct nf_flowtable
*flow_table
,
258 struct flow_offload
*flow
)
260 flow
->timeout
= nf_flowtable_time_stamp
+ NF_FLOW_TIMEOUT
;
262 if (likely(!nf_flowtable_hw_offload(flow_table
) ||
263 !test_and_clear_bit(NF_FLOW_HW_REFRESH
, &flow
->flags
)))
266 nf_flow_offload_add(flow_table
, flow
);
268 EXPORT_SYMBOL_GPL(flow_offload_refresh
);
270 static inline bool nf_flow_has_expired(const struct flow_offload
*flow
)
272 return nf_flow_timeout_delta(flow
->timeout
) <= 0;
275 static void flow_offload_del(struct nf_flowtable
*flow_table
,
276 struct flow_offload
*flow
)
278 rhashtable_remove_fast(&flow_table
->rhashtable
,
279 &flow
->tuplehash
[FLOW_OFFLOAD_DIR_ORIGINAL
].node
,
280 nf_flow_offload_rhash_params
);
281 rhashtable_remove_fast(&flow_table
->rhashtable
,
282 &flow
->tuplehash
[FLOW_OFFLOAD_DIR_REPLY
].node
,
283 nf_flow_offload_rhash_params
);
285 clear_bit(IPS_OFFLOAD_BIT
, &flow
->ct
->status
);
287 if (nf_flow_has_expired(flow
))
288 flow_offload_fixup_ct(flow
->ct
);
290 flow_offload_fixup_ct_timeout(flow
->ct
);
292 flow_offload_free(flow
);
295 void flow_offload_teardown(struct flow_offload
*flow
)
297 set_bit(NF_FLOW_TEARDOWN
, &flow
->flags
);
299 flow_offload_fixup_ct_state(flow
->ct
);
301 EXPORT_SYMBOL_GPL(flow_offload_teardown
);
303 struct flow_offload_tuple_rhash
*
304 flow_offload_lookup(struct nf_flowtable
*flow_table
,
305 struct flow_offload_tuple
*tuple
)
307 struct flow_offload_tuple_rhash
*tuplehash
;
308 struct flow_offload
*flow
;
311 tuplehash
= rhashtable_lookup(&flow_table
->rhashtable
, tuple
,
312 nf_flow_offload_rhash_params
);
316 dir
= tuplehash
->tuple
.dir
;
317 flow
= container_of(tuplehash
, struct flow_offload
, tuplehash
[dir
]);
318 if (test_bit(NF_FLOW_TEARDOWN
, &flow
->flags
))
321 if (unlikely(nf_ct_is_dying(flow
->ct
)))
326 EXPORT_SYMBOL_GPL(flow_offload_lookup
);
329 nf_flow_table_iterate(struct nf_flowtable
*flow_table
,
330 void (*iter
)(struct flow_offload
*flow
, void *data
),
333 struct flow_offload_tuple_rhash
*tuplehash
;
334 struct rhashtable_iter hti
;
335 struct flow_offload
*flow
;
338 rhashtable_walk_enter(&flow_table
->rhashtable
, &hti
);
339 rhashtable_walk_start(&hti
);
341 while ((tuplehash
= rhashtable_walk_next(&hti
))) {
342 if (IS_ERR(tuplehash
)) {
343 if (PTR_ERR(tuplehash
) != -EAGAIN
) {
344 err
= PTR_ERR(tuplehash
);
349 if (tuplehash
->tuple
.dir
)
352 flow
= container_of(tuplehash
, struct flow_offload
, tuplehash
[0]);
356 rhashtable_walk_stop(&hti
);
357 rhashtable_walk_exit(&hti
);
362 static void nf_flow_offload_gc_step(struct flow_offload
*flow
, void *data
)
364 struct nf_flowtable
*flow_table
= data
;
366 if (nf_flow_has_expired(flow
) || nf_ct_is_dying(flow
->ct
))
367 set_bit(NF_FLOW_TEARDOWN
, &flow
->flags
);
369 if (test_bit(NF_FLOW_TEARDOWN
, &flow
->flags
)) {
370 if (test_bit(NF_FLOW_HW
, &flow
->flags
)) {
371 if (!test_bit(NF_FLOW_HW_DYING
, &flow
->flags
))
372 nf_flow_offload_del(flow_table
, flow
);
373 else if (test_bit(NF_FLOW_HW_DEAD
, &flow
->flags
))
374 flow_offload_del(flow_table
, flow
);
376 flow_offload_del(flow_table
, flow
);
378 } else if (test_bit(NF_FLOW_HW
, &flow
->flags
)) {
379 nf_flow_offload_stats(flow_table
, flow
);
383 static void nf_flow_offload_work_gc(struct work_struct
*work
)
385 struct nf_flowtable
*flow_table
;
387 flow_table
= container_of(work
, struct nf_flowtable
, gc_work
.work
);
388 nf_flow_table_iterate(flow_table
, nf_flow_offload_gc_step
, flow_table
);
389 queue_delayed_work(system_power_efficient_wq
, &flow_table
->gc_work
, HZ
);
393 static int nf_flow_nat_port_tcp(struct sk_buff
*skb
, unsigned int thoff
,
394 __be16 port
, __be16 new_port
)
398 if (skb_try_make_writable(skb
, thoff
+ sizeof(*tcph
)))
401 tcph
= (void *)(skb_network_header(skb
) + thoff
);
402 inet_proto_csum_replace2(&tcph
->check
, skb
, port
, new_port
, false);
407 static int nf_flow_nat_port_udp(struct sk_buff
*skb
, unsigned int thoff
,
408 __be16 port
, __be16 new_port
)
412 if (skb_try_make_writable(skb
, thoff
+ sizeof(*udph
)))
415 udph
= (void *)(skb_network_header(skb
) + thoff
);
416 if (udph
->check
|| skb
->ip_summed
== CHECKSUM_PARTIAL
) {
417 inet_proto_csum_replace2(&udph
->check
, skb
, port
,
420 udph
->check
= CSUM_MANGLED_0
;
426 static int nf_flow_nat_port(struct sk_buff
*skb
, unsigned int thoff
,
427 u8 protocol
, __be16 port
, __be16 new_port
)
431 if (nf_flow_nat_port_tcp(skb
, thoff
, port
, new_port
) < 0)
435 if (nf_flow_nat_port_udp(skb
, thoff
, port
, new_port
) < 0)
443 int nf_flow_snat_port(const struct flow_offload
*flow
,
444 struct sk_buff
*skb
, unsigned int thoff
,
445 u8 protocol
, enum flow_offload_tuple_dir dir
)
447 struct flow_ports
*hdr
;
448 __be16 port
, new_port
;
450 if (skb_try_make_writable(skb
, thoff
+ sizeof(*hdr
)))
453 hdr
= (void *)(skb_network_header(skb
) + thoff
);
456 case FLOW_OFFLOAD_DIR_ORIGINAL
:
458 new_port
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_REPLY
].tuple
.dst_port
;
459 hdr
->source
= new_port
;
461 case FLOW_OFFLOAD_DIR_REPLY
:
463 new_port
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_ORIGINAL
].tuple
.src_port
;
464 hdr
->dest
= new_port
;
470 return nf_flow_nat_port(skb
, thoff
, protocol
, port
, new_port
);
472 EXPORT_SYMBOL_GPL(nf_flow_snat_port
);
474 int nf_flow_dnat_port(const struct flow_offload
*flow
,
475 struct sk_buff
*skb
, unsigned int thoff
,
476 u8 protocol
, enum flow_offload_tuple_dir dir
)
478 struct flow_ports
*hdr
;
479 __be16 port
, new_port
;
481 if (skb_try_make_writable(skb
, thoff
+ sizeof(*hdr
)))
484 hdr
= (void *)(skb_network_header(skb
) + thoff
);
487 case FLOW_OFFLOAD_DIR_ORIGINAL
:
489 new_port
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_REPLY
].tuple
.src_port
;
490 hdr
->dest
= new_port
;
492 case FLOW_OFFLOAD_DIR_REPLY
:
494 new_port
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_ORIGINAL
].tuple
.dst_port
;
495 hdr
->source
= new_port
;
501 return nf_flow_nat_port(skb
, thoff
, protocol
, port
, new_port
);
503 EXPORT_SYMBOL_GPL(nf_flow_dnat_port
);
505 int nf_flow_table_init(struct nf_flowtable
*flowtable
)
509 INIT_DEFERRABLE_WORK(&flowtable
->gc_work
, nf_flow_offload_work_gc
);
510 flow_block_init(&flowtable
->flow_block
);
511 init_rwsem(&flowtable
->flow_block_lock
);
513 err
= rhashtable_init(&flowtable
->rhashtable
,
514 &nf_flow_offload_rhash_params
);
518 queue_delayed_work(system_power_efficient_wq
,
519 &flowtable
->gc_work
, HZ
);
521 mutex_lock(&flowtable_lock
);
522 list_add(&flowtable
->list
, &flowtables
);
523 mutex_unlock(&flowtable_lock
);
527 EXPORT_SYMBOL_GPL(nf_flow_table_init
);
529 static void nf_flow_table_do_cleanup(struct flow_offload
*flow
, void *data
)
531 struct net_device
*dev
= data
;
534 flow_offload_teardown(flow
);
538 if (net_eq(nf_ct_net(flow
->ct
), dev_net(dev
)) &&
539 (flow
->tuplehash
[0].tuple
.iifidx
== dev
->ifindex
||
540 flow
->tuplehash
[1].tuple
.iifidx
== dev
->ifindex
))
541 flow_offload_teardown(flow
);
544 void nf_flow_table_gc_cleanup(struct nf_flowtable
*flowtable
,
545 struct net_device
*dev
)
547 nf_flow_table_iterate(flowtable
, nf_flow_table_do_cleanup
, dev
);
548 flush_delayed_work(&flowtable
->gc_work
);
549 nf_flow_table_offload_flush(flowtable
);
552 void nf_flow_table_cleanup(struct net_device
*dev
)
554 struct nf_flowtable
*flowtable
;
556 mutex_lock(&flowtable_lock
);
557 list_for_each_entry(flowtable
, &flowtables
, list
)
558 nf_flow_table_gc_cleanup(flowtable
, dev
);
559 mutex_unlock(&flowtable_lock
);
561 EXPORT_SYMBOL_GPL(nf_flow_table_cleanup
);
563 void nf_flow_table_free(struct nf_flowtable
*flow_table
)
565 mutex_lock(&flowtable_lock
);
566 list_del(&flow_table
->list
);
567 mutex_unlock(&flowtable_lock
);
569 cancel_delayed_work_sync(&flow_table
->gc_work
);
570 nf_flow_table_iterate(flow_table
, nf_flow_table_do_cleanup
, NULL
);
571 nf_flow_table_iterate(flow_table
, nf_flow_offload_gc_step
, flow_table
);
572 nf_flow_table_offload_flush(flow_table
);
573 if (nf_flowtable_hw_offload(flow_table
))
574 nf_flow_table_iterate(flow_table
, nf_flow_offload_gc_step
,
576 rhashtable_destroy(&flow_table
->rhashtable
);
578 EXPORT_SYMBOL_GPL(nf_flow_table_free
);
580 static int __init
nf_flow_table_module_init(void)
582 return nf_flow_table_offload_init();
585 static void __exit
nf_flow_table_module_exit(void)
587 nf_flow_table_offload_exit();
590 module_init(nf_flow_table_module_init
);
591 module_exit(nf_flow_table_module_exit
);
593 MODULE_LICENSE("GPL");
594 MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
595 MODULE_DESCRIPTION("Netfilter flow table module");