1 // SPDX-License-Identifier: GPL-2.0-only
2 #include <linux/kernel.h>
3 #include <linux/init.h>
4 #include <linux/module.h>
5 #include <linux/netfilter.h>
6 #include <linux/rhashtable.h>
8 #include <linux/ipv6.h>
9 #include <linux/netdevice.h>
12 #include <net/ip6_route.h>
13 #include <net/neighbour.h>
14 #include <net/netfilter/nf_flow_table.h>
15 #include <net/netfilter/nf_conntrack_acct.h>
16 /* For layer 4 checksum field offset. */
17 #include <linux/tcp.h>
18 #include <linux/udp.h>
20 static int nf_flow_state_check(struct flow_offload
*flow
, int proto
,
21 struct sk_buff
*skb
, unsigned int thoff
)
25 if (proto
!= IPPROTO_TCP
)
28 tcph
= (void *)(skb_network_header(skb
) + thoff
);
29 if (unlikely(tcph
->fin
|| tcph
->rst
)) {
30 flow_offload_teardown(flow
);
37 static void nf_flow_nat_ip_tcp(struct sk_buff
*skb
, unsigned int thoff
,
38 __be32 addr
, __be32 new_addr
)
42 tcph
= (void *)(skb_network_header(skb
) + thoff
);
43 inet_proto_csum_replace4(&tcph
->check
, skb
, addr
, new_addr
, true);
46 static void nf_flow_nat_ip_udp(struct sk_buff
*skb
, unsigned int thoff
,
47 __be32 addr
, __be32 new_addr
)
51 udph
= (void *)(skb_network_header(skb
) + thoff
);
52 if (udph
->check
|| skb
->ip_summed
== CHECKSUM_PARTIAL
) {
53 inet_proto_csum_replace4(&udph
->check
, skb
, addr
,
56 udph
->check
= CSUM_MANGLED_0
;
60 static void nf_flow_nat_ip_l4proto(struct sk_buff
*skb
, struct iphdr
*iph
,
61 unsigned int thoff
, __be32 addr
,
64 switch (iph
->protocol
) {
66 nf_flow_nat_ip_tcp(skb
, thoff
, addr
, new_addr
);
69 nf_flow_nat_ip_udp(skb
, thoff
, addr
, new_addr
);
74 static void nf_flow_snat_ip(const struct flow_offload
*flow
,
75 struct sk_buff
*skb
, struct iphdr
*iph
,
76 unsigned int thoff
, enum flow_offload_tuple_dir dir
)
78 __be32 addr
, new_addr
;
81 case FLOW_OFFLOAD_DIR_ORIGINAL
:
83 new_addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_REPLY
].tuple
.dst_v4
.s_addr
;
84 iph
->saddr
= new_addr
;
86 case FLOW_OFFLOAD_DIR_REPLY
:
88 new_addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_ORIGINAL
].tuple
.src_v4
.s_addr
;
89 iph
->daddr
= new_addr
;
92 csum_replace4(&iph
->check
, addr
, new_addr
);
94 nf_flow_nat_ip_l4proto(skb
, iph
, thoff
, addr
, new_addr
);
97 static void nf_flow_dnat_ip(const struct flow_offload
*flow
,
98 struct sk_buff
*skb
, struct iphdr
*iph
,
99 unsigned int thoff
, enum flow_offload_tuple_dir dir
)
101 __be32 addr
, new_addr
;
104 case FLOW_OFFLOAD_DIR_ORIGINAL
:
106 new_addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_REPLY
].tuple
.src_v4
.s_addr
;
107 iph
->daddr
= new_addr
;
109 case FLOW_OFFLOAD_DIR_REPLY
:
111 new_addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_ORIGINAL
].tuple
.dst_v4
.s_addr
;
112 iph
->saddr
= new_addr
;
115 csum_replace4(&iph
->check
, addr
, new_addr
);
117 nf_flow_nat_ip_l4proto(skb
, iph
, thoff
, addr
, new_addr
);
120 static void nf_flow_nat_ip(const struct flow_offload
*flow
, struct sk_buff
*skb
,
121 unsigned int thoff
, enum flow_offload_tuple_dir dir
,
124 if (test_bit(NF_FLOW_SNAT
, &flow
->flags
)) {
125 nf_flow_snat_port(flow
, skb
, thoff
, iph
->protocol
, dir
);
126 nf_flow_snat_ip(flow
, skb
, iph
, thoff
, dir
);
128 if (test_bit(NF_FLOW_DNAT
, &flow
->flags
)) {
129 nf_flow_dnat_port(flow
, skb
, thoff
, iph
->protocol
, dir
);
130 nf_flow_dnat_ip(flow
, skb
, iph
, thoff
, dir
);
134 static bool ip_has_options(unsigned int thoff
)
136 return thoff
!= sizeof(struct iphdr
);
139 static int nf_flow_tuple_ip(struct sk_buff
*skb
, const struct net_device
*dev
,
140 struct flow_offload_tuple
*tuple
, u32
*hdrsize
)
142 struct flow_ports
*ports
;
146 if (!pskb_may_pull(skb
, sizeof(*iph
)))
150 thoff
= iph
->ihl
* 4;
152 if (ip_is_fragment(iph
) ||
153 unlikely(ip_has_options(thoff
)))
156 switch (iph
->protocol
) {
158 *hdrsize
= sizeof(struct tcphdr
);
161 *hdrsize
= sizeof(struct udphdr
);
170 thoff
= iph
->ihl
* 4;
171 if (!pskb_may_pull(skb
, thoff
+ *hdrsize
))
175 ports
= (struct flow_ports
*)(skb_network_header(skb
) + thoff
);
177 tuple
->src_v4
.s_addr
= iph
->saddr
;
178 tuple
->dst_v4
.s_addr
= iph
->daddr
;
179 tuple
->src_port
= ports
->source
;
180 tuple
->dst_port
= ports
->dest
;
181 tuple
->l3proto
= AF_INET
;
182 tuple
->l4proto
= iph
->protocol
;
183 tuple
->iifidx
= dev
->ifindex
;
188 /* Based on ip_exceeds_mtu(). */
189 static bool nf_flow_exceeds_mtu(const struct sk_buff
*skb
, unsigned int mtu
)
194 if (skb_is_gso(skb
) && skb_gso_validate_network_len(skb
, mtu
))
200 static unsigned int nf_flow_xmit_xfrm(struct sk_buff
*skb
,
201 const struct nf_hook_state
*state
,
202 struct dst_entry
*dst
)
205 skb_dst_set_noref(skb
, dst
);
206 dst_output(state
->net
, state
->sk
, skb
);
210 static unsigned int nf_flow_queue_xmit(struct net
*net
, struct sk_buff
*skb
,
211 const struct flow_offload_tuple_rhash
*tuplehash
,
214 struct net_device
*outdev
;
216 outdev
= dev_get_by_index_rcu(net
, tuplehash
->tuple
.out
.ifidx
);
221 dev_hard_header(skb
, skb
->dev
, type
, tuplehash
->tuple
.out
.h_dest
,
222 tuplehash
->tuple
.out
.h_source
, skb
->len
);
229 nf_flow_offload_ip_hook(void *priv
, struct sk_buff
*skb
,
230 const struct nf_hook_state
*state
)
232 struct flow_offload_tuple_rhash
*tuplehash
;
233 struct nf_flowtable
*flow_table
= priv
;
234 struct flow_offload_tuple tuple
= {};
235 enum flow_offload_tuple_dir dir
;
236 struct flow_offload
*flow
;
237 struct net_device
*outdev
;
245 if (skb
->protocol
!= htons(ETH_P_IP
))
248 if (nf_flow_tuple_ip(skb
, state
->in
, &tuple
, &hdrsize
) < 0)
251 tuplehash
= flow_offload_lookup(flow_table
, &tuple
);
252 if (tuplehash
== NULL
)
255 dir
= tuplehash
->tuple
.dir
;
256 flow
= container_of(tuplehash
, struct flow_offload
, tuplehash
[dir
]);
258 if (unlikely(nf_flow_exceeds_mtu(skb
, flow
->tuplehash
[dir
].tuple
.mtu
)))
262 thoff
= iph
->ihl
* 4;
263 if (nf_flow_state_check(flow
, iph
->protocol
, skb
, thoff
))
266 if (tuplehash
->tuple
.xmit_type
== FLOW_OFFLOAD_XMIT_NEIGH
||
267 tuplehash
->tuple
.xmit_type
== FLOW_OFFLOAD_XMIT_XFRM
) {
268 rt
= (struct rtable
*)tuplehash
->tuple
.dst_cache
;
269 if (!dst_check(&rt
->dst
, 0)) {
270 flow_offload_teardown(flow
);
275 if (skb_try_make_writable(skb
, thoff
+ hdrsize
))
278 flow_offload_refresh(flow_table
, flow
);
281 nf_flow_nat_ip(flow
, skb
, thoff
, dir
, iph
);
283 ip_decrease_ttl(iph
);
286 if (flow_table
->flags
& NF_FLOWTABLE_COUNTER
)
287 nf_ct_acct_update(flow
->ct
, tuplehash
->tuple
.dir
, skb
->len
);
289 if (unlikely(tuplehash
->tuple
.xmit_type
== FLOW_OFFLOAD_XMIT_XFRM
)) {
290 memset(skb
->cb
, 0, sizeof(struct inet_skb_parm
));
291 IPCB(skb
)->iif
= skb
->dev
->ifindex
;
292 IPCB(skb
)->flags
= IPSKB_FORWARDED
;
293 return nf_flow_xmit_xfrm(skb
, state
, &rt
->dst
);
296 switch (tuplehash
->tuple
.xmit_type
) {
297 case FLOW_OFFLOAD_XMIT_NEIGH
:
298 outdev
= rt
->dst
.dev
;
300 nexthop
= rt_nexthop(rt
, flow
->tuplehash
[!dir
].tuple
.src_v4
.s_addr
);
301 skb_dst_set_noref(skb
, &rt
->dst
);
302 neigh_xmit(NEIGH_ARP_TABLE
, outdev
, &nexthop
, skb
);
305 case FLOW_OFFLOAD_XMIT_DIRECT
:
306 ret
= nf_flow_queue_xmit(state
->net
, skb
, tuplehash
, ETH_P_IP
);
308 flow_offload_teardown(flow
);
314 EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook
);
316 static void nf_flow_nat_ipv6_tcp(struct sk_buff
*skb
, unsigned int thoff
,
317 struct in6_addr
*addr
,
318 struct in6_addr
*new_addr
,
319 struct ipv6hdr
*ip6h
)
323 tcph
= (void *)(skb_network_header(skb
) + thoff
);
324 inet_proto_csum_replace16(&tcph
->check
, skb
, addr
->s6_addr32
,
325 new_addr
->s6_addr32
, true);
328 static void nf_flow_nat_ipv6_udp(struct sk_buff
*skb
, unsigned int thoff
,
329 struct in6_addr
*addr
,
330 struct in6_addr
*new_addr
)
334 udph
= (void *)(skb_network_header(skb
) + thoff
);
335 if (udph
->check
|| skb
->ip_summed
== CHECKSUM_PARTIAL
) {
336 inet_proto_csum_replace16(&udph
->check
, skb
, addr
->s6_addr32
,
337 new_addr
->s6_addr32
, true);
339 udph
->check
= CSUM_MANGLED_0
;
343 static void nf_flow_nat_ipv6_l4proto(struct sk_buff
*skb
, struct ipv6hdr
*ip6h
,
344 unsigned int thoff
, struct in6_addr
*addr
,
345 struct in6_addr
*new_addr
)
347 switch (ip6h
->nexthdr
) {
349 nf_flow_nat_ipv6_tcp(skb
, thoff
, addr
, new_addr
, ip6h
);
352 nf_flow_nat_ipv6_udp(skb
, thoff
, addr
, new_addr
);
357 static void nf_flow_snat_ipv6(const struct flow_offload
*flow
,
358 struct sk_buff
*skb
, struct ipv6hdr
*ip6h
,
360 enum flow_offload_tuple_dir dir
)
362 struct in6_addr addr
, new_addr
;
365 case FLOW_OFFLOAD_DIR_ORIGINAL
:
367 new_addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_REPLY
].tuple
.dst_v6
;
368 ip6h
->saddr
= new_addr
;
370 case FLOW_OFFLOAD_DIR_REPLY
:
372 new_addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_ORIGINAL
].tuple
.src_v6
;
373 ip6h
->daddr
= new_addr
;
377 nf_flow_nat_ipv6_l4proto(skb
, ip6h
, thoff
, &addr
, &new_addr
);
380 static void nf_flow_dnat_ipv6(const struct flow_offload
*flow
,
381 struct sk_buff
*skb
, struct ipv6hdr
*ip6h
,
383 enum flow_offload_tuple_dir dir
)
385 struct in6_addr addr
, new_addr
;
388 case FLOW_OFFLOAD_DIR_ORIGINAL
:
390 new_addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_REPLY
].tuple
.src_v6
;
391 ip6h
->daddr
= new_addr
;
393 case FLOW_OFFLOAD_DIR_REPLY
:
395 new_addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_ORIGINAL
].tuple
.dst_v6
;
396 ip6h
->saddr
= new_addr
;
400 nf_flow_nat_ipv6_l4proto(skb
, ip6h
, thoff
, &addr
, &new_addr
);
403 static void nf_flow_nat_ipv6(const struct flow_offload
*flow
,
405 enum flow_offload_tuple_dir dir
,
406 struct ipv6hdr
*ip6h
)
408 unsigned int thoff
= sizeof(*ip6h
);
410 if (test_bit(NF_FLOW_SNAT
, &flow
->flags
)) {
411 nf_flow_snat_port(flow
, skb
, thoff
, ip6h
->nexthdr
, dir
);
412 nf_flow_snat_ipv6(flow
, skb
, ip6h
, thoff
, dir
);
414 if (test_bit(NF_FLOW_DNAT
, &flow
->flags
)) {
415 nf_flow_dnat_port(flow
, skb
, thoff
, ip6h
->nexthdr
, dir
);
416 nf_flow_dnat_ipv6(flow
, skb
, ip6h
, thoff
, dir
);
420 static int nf_flow_tuple_ipv6(struct sk_buff
*skb
, const struct net_device
*dev
,
421 struct flow_offload_tuple
*tuple
, u32
*hdrsize
)
423 struct flow_ports
*ports
;
424 struct ipv6hdr
*ip6h
;
427 if (!pskb_may_pull(skb
, sizeof(*ip6h
)))
430 ip6h
= ipv6_hdr(skb
);
432 switch (ip6h
->nexthdr
) {
434 *hdrsize
= sizeof(struct tcphdr
);
437 *hdrsize
= sizeof(struct udphdr
);
443 if (ip6h
->hop_limit
<= 1)
446 thoff
= sizeof(*ip6h
);
447 if (!pskb_may_pull(skb
, thoff
+ *hdrsize
))
450 ip6h
= ipv6_hdr(skb
);
451 ports
= (struct flow_ports
*)(skb_network_header(skb
) + thoff
);
453 tuple
->src_v6
= ip6h
->saddr
;
454 tuple
->dst_v6
= ip6h
->daddr
;
455 tuple
->src_port
= ports
->source
;
456 tuple
->dst_port
= ports
->dest
;
457 tuple
->l3proto
= AF_INET6
;
458 tuple
->l4proto
= ip6h
->nexthdr
;
459 tuple
->iifidx
= dev
->ifindex
;
465 nf_flow_offload_ipv6_hook(void *priv
, struct sk_buff
*skb
,
466 const struct nf_hook_state
*state
)
468 struct flow_offload_tuple_rhash
*tuplehash
;
469 struct nf_flowtable
*flow_table
= priv
;
470 struct flow_offload_tuple tuple
= {};
471 enum flow_offload_tuple_dir dir
;
472 const struct in6_addr
*nexthop
;
473 struct flow_offload
*flow
;
474 struct net_device
*outdev
;
475 struct ipv6hdr
*ip6h
;
480 if (skb
->protocol
!= htons(ETH_P_IPV6
))
483 if (nf_flow_tuple_ipv6(skb
, state
->in
, &tuple
, &hdrsize
) < 0)
486 tuplehash
= flow_offload_lookup(flow_table
, &tuple
);
487 if (tuplehash
== NULL
)
490 dir
= tuplehash
->tuple
.dir
;
491 flow
= container_of(tuplehash
, struct flow_offload
, tuplehash
[dir
]);
493 if (unlikely(nf_flow_exceeds_mtu(skb
, flow
->tuplehash
[dir
].tuple
.mtu
)))
496 if (nf_flow_state_check(flow
, ipv6_hdr(skb
)->nexthdr
, skb
,
500 if (tuplehash
->tuple
.xmit_type
== FLOW_OFFLOAD_XMIT_NEIGH
||
501 tuplehash
->tuple
.xmit_type
== FLOW_OFFLOAD_XMIT_XFRM
) {
502 rt
= (struct rt6_info
*)tuplehash
->tuple
.dst_cache
;
503 if (!dst_check(&rt
->dst
, 0)) {
504 flow_offload_teardown(flow
);
509 if (skb_try_make_writable(skb
, sizeof(*ip6h
) + hdrsize
))
512 flow_offload_refresh(flow_table
, flow
);
514 ip6h
= ipv6_hdr(skb
);
515 nf_flow_nat_ipv6(flow
, skb
, dir
, ip6h
);
520 if (flow_table
->flags
& NF_FLOWTABLE_COUNTER
)
521 nf_ct_acct_update(flow
->ct
, tuplehash
->tuple
.dir
, skb
->len
);
523 if (unlikely(tuplehash
->tuple
.xmit_type
== FLOW_OFFLOAD_XMIT_XFRM
)) {
524 memset(skb
->cb
, 0, sizeof(struct inet6_skb_parm
));
525 IP6CB(skb
)->iif
= skb
->dev
->ifindex
;
526 IP6CB(skb
)->flags
= IP6SKB_FORWARDED
;
527 return nf_flow_xmit_xfrm(skb
, state
, &rt
->dst
);
530 switch (tuplehash
->tuple
.xmit_type
) {
531 case FLOW_OFFLOAD_XMIT_NEIGH
:
532 outdev
= rt
->dst
.dev
;
534 nexthop
= rt6_nexthop(rt
, &flow
->tuplehash
[!dir
].tuple
.src_v6
);
535 skb_dst_set_noref(skb
, &rt
->dst
);
536 neigh_xmit(NEIGH_ND_TABLE
, outdev
, nexthop
, skb
);
539 case FLOW_OFFLOAD_XMIT_DIRECT
:
540 ret
= nf_flow_queue_xmit(state
->net
, skb
, tuplehash
, ETH_P_IPV6
);
542 flow_offload_teardown(flow
);
548 EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook
);