1 // SPDX-License-Identifier: GPL-2.0-only
2 #include <linux/kernel.h>
3 #include <linux/init.h>
4 #include <linux/module.h>
5 #include <linux/netfilter.h>
6 #include <linux/rhashtable.h>
8 #include <linux/ipv6.h>
9 #include <linux/netdevice.h>
12 #include <net/ip6_route.h>
13 #include <net/neighbour.h>
14 #include <net/netfilter/nf_flow_table.h>
15 #include <net/netfilter/nf_conntrack_acct.h>
16 /* For layer 4 checksum field offset. */
17 #include <linux/tcp.h>
18 #include <linux/udp.h>
20 static int nf_flow_state_check(struct flow_offload
*flow
, int proto
,
21 struct sk_buff
*skb
, unsigned int thoff
)
25 if (proto
!= IPPROTO_TCP
)
28 tcph
= (void *)(skb_network_header(skb
) + thoff
);
29 if (unlikely(tcph
->fin
|| tcph
->rst
)) {
30 flow_offload_teardown(flow
);
37 static int nf_flow_nat_ip_tcp(struct sk_buff
*skb
, unsigned int thoff
,
38 __be32 addr
, __be32 new_addr
)
42 if (skb_try_make_writable(skb
, thoff
+ sizeof(*tcph
)))
45 tcph
= (void *)(skb_network_header(skb
) + thoff
);
46 inet_proto_csum_replace4(&tcph
->check
, skb
, addr
, new_addr
, true);
51 static int nf_flow_nat_ip_udp(struct sk_buff
*skb
, unsigned int thoff
,
52 __be32 addr
, __be32 new_addr
)
56 if (skb_try_make_writable(skb
, thoff
+ sizeof(*udph
)))
59 udph
= (void *)(skb_network_header(skb
) + thoff
);
60 if (udph
->check
|| skb
->ip_summed
== CHECKSUM_PARTIAL
) {
61 inet_proto_csum_replace4(&udph
->check
, skb
, addr
,
64 udph
->check
= CSUM_MANGLED_0
;
70 static int nf_flow_nat_ip_l4proto(struct sk_buff
*skb
, struct iphdr
*iph
,
71 unsigned int thoff
, __be32 addr
,
74 switch (iph
->protocol
) {
76 if (nf_flow_nat_ip_tcp(skb
, thoff
, addr
, new_addr
) < 0)
80 if (nf_flow_nat_ip_udp(skb
, thoff
, addr
, new_addr
) < 0)
88 static int nf_flow_snat_ip(const struct flow_offload
*flow
, struct sk_buff
*skb
,
89 struct iphdr
*iph
, unsigned int thoff
,
90 enum flow_offload_tuple_dir dir
)
92 __be32 addr
, new_addr
;
95 case FLOW_OFFLOAD_DIR_ORIGINAL
:
97 new_addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_REPLY
].tuple
.dst_v4
.s_addr
;
98 iph
->saddr
= new_addr
;
100 case FLOW_OFFLOAD_DIR_REPLY
:
102 new_addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_ORIGINAL
].tuple
.src_v4
.s_addr
;
103 iph
->daddr
= new_addr
;
108 csum_replace4(&iph
->check
, addr
, new_addr
);
110 return nf_flow_nat_ip_l4proto(skb
, iph
, thoff
, addr
, new_addr
);
113 static int nf_flow_dnat_ip(const struct flow_offload
*flow
, struct sk_buff
*skb
,
114 struct iphdr
*iph
, unsigned int thoff
,
115 enum flow_offload_tuple_dir dir
)
117 __be32 addr
, new_addr
;
120 case FLOW_OFFLOAD_DIR_ORIGINAL
:
122 new_addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_REPLY
].tuple
.src_v4
.s_addr
;
123 iph
->daddr
= new_addr
;
125 case FLOW_OFFLOAD_DIR_REPLY
:
127 new_addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_ORIGINAL
].tuple
.dst_v4
.s_addr
;
128 iph
->saddr
= new_addr
;
133 csum_replace4(&iph
->check
, addr
, new_addr
);
135 return nf_flow_nat_ip_l4proto(skb
, iph
, thoff
, addr
, new_addr
);
138 static int nf_flow_nat_ip(const struct flow_offload
*flow
, struct sk_buff
*skb
,
139 unsigned int thoff
, enum flow_offload_tuple_dir dir
)
141 struct iphdr
*iph
= ip_hdr(skb
);
143 if (test_bit(NF_FLOW_SNAT
, &flow
->flags
) &&
144 (nf_flow_snat_port(flow
, skb
, thoff
, iph
->protocol
, dir
) < 0 ||
145 nf_flow_snat_ip(flow
, skb
, ip_hdr(skb
), thoff
, dir
) < 0))
149 if (test_bit(NF_FLOW_DNAT
, &flow
->flags
) &&
150 (nf_flow_dnat_port(flow
, skb
, thoff
, iph
->protocol
, dir
) < 0 ||
151 nf_flow_dnat_ip(flow
, skb
, ip_hdr(skb
), thoff
, dir
) < 0))
157 static bool ip_has_options(unsigned int thoff
)
159 return thoff
!= sizeof(struct iphdr
);
162 static int nf_flow_tuple_ip(struct sk_buff
*skb
, const struct net_device
*dev
,
163 struct flow_offload_tuple
*tuple
)
165 unsigned int thoff
, hdrsize
;
166 struct flow_ports
*ports
;
169 if (!pskb_may_pull(skb
, sizeof(*iph
)))
173 thoff
= iph
->ihl
* 4;
175 if (ip_is_fragment(iph
) ||
176 unlikely(ip_has_options(thoff
)))
179 switch (iph
->protocol
) {
181 hdrsize
= sizeof(struct tcphdr
);
184 hdrsize
= sizeof(struct udphdr
);
193 thoff
= iph
->ihl
* 4;
194 if (!pskb_may_pull(skb
, thoff
+ hdrsize
))
198 ports
= (struct flow_ports
*)(skb_network_header(skb
) + thoff
);
200 tuple
->src_v4
.s_addr
= iph
->saddr
;
201 tuple
->dst_v4
.s_addr
= iph
->daddr
;
202 tuple
->src_port
= ports
->source
;
203 tuple
->dst_port
= ports
->dest
;
204 tuple
->l3proto
= AF_INET
;
205 tuple
->l4proto
= iph
->protocol
;
206 tuple
->iifidx
= dev
->ifindex
;
211 /* Based on ip_exceeds_mtu(). */
212 static bool nf_flow_exceeds_mtu(const struct sk_buff
*skb
, unsigned int mtu
)
217 if (skb_is_gso(skb
) && skb_gso_validate_network_len(skb
, mtu
))
223 static int nf_flow_offload_dst_check(struct dst_entry
*dst
)
225 if (unlikely(dst_xfrm(dst
)))
226 return dst_check(dst
, 0) ? 0 : -1;
231 static unsigned int nf_flow_xmit_xfrm(struct sk_buff
*skb
,
232 const struct nf_hook_state
*state
,
233 struct dst_entry
*dst
)
236 skb_dst_set_noref(skb
, dst
);
237 dst_output(state
->net
, state
->sk
, skb
);
242 nf_flow_offload_ip_hook(void *priv
, struct sk_buff
*skb
,
243 const struct nf_hook_state
*state
)
245 struct flow_offload_tuple_rhash
*tuplehash
;
246 struct nf_flowtable
*flow_table
= priv
;
247 struct flow_offload_tuple tuple
= {};
248 enum flow_offload_tuple_dir dir
;
249 struct flow_offload
*flow
;
250 struct net_device
*outdev
;
256 if (skb
->protocol
!= htons(ETH_P_IP
))
259 if (nf_flow_tuple_ip(skb
, state
->in
, &tuple
) < 0)
262 tuplehash
= flow_offload_lookup(flow_table
, &tuple
);
263 if (tuplehash
== NULL
)
266 dir
= tuplehash
->tuple
.dir
;
267 flow
= container_of(tuplehash
, struct flow_offload
, tuplehash
[dir
]);
268 rt
= (struct rtable
*)flow
->tuplehash
[dir
].tuple
.dst_cache
;
269 outdev
= rt
->dst
.dev
;
271 if (unlikely(nf_flow_exceeds_mtu(skb
, flow
->tuplehash
[dir
].tuple
.mtu
)))
274 if (skb_try_make_writable(skb
, sizeof(*iph
)))
277 thoff
= ip_hdr(skb
)->ihl
* 4;
278 if (nf_flow_state_check(flow
, ip_hdr(skb
)->protocol
, skb
, thoff
))
281 flow_offload_refresh(flow_table
, flow
);
283 if (nf_flow_offload_dst_check(&rt
->dst
)) {
284 flow_offload_teardown(flow
);
288 if (nf_flow_nat_ip(flow
, skb
, thoff
, dir
) < 0)
292 ip_decrease_ttl(iph
);
295 if (flow_table
->flags
& NF_FLOWTABLE_COUNTER
)
296 nf_ct_acct_update(flow
->ct
, tuplehash
->tuple
.dir
, skb
->len
);
298 if (unlikely(dst_xfrm(&rt
->dst
))) {
299 memset(skb
->cb
, 0, sizeof(struct inet_skb_parm
));
300 IPCB(skb
)->iif
= skb
->dev
->ifindex
;
301 IPCB(skb
)->flags
= IPSKB_FORWARDED
;
302 return nf_flow_xmit_xfrm(skb
, state
, &rt
->dst
);
306 nexthop
= rt_nexthop(rt
, flow
->tuplehash
[!dir
].tuple
.src_v4
.s_addr
);
307 skb_dst_set_noref(skb
, &rt
->dst
);
308 neigh_xmit(NEIGH_ARP_TABLE
, outdev
, &nexthop
, skb
);
312 EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook
);
314 static int nf_flow_nat_ipv6_tcp(struct sk_buff
*skb
, unsigned int thoff
,
315 struct in6_addr
*addr
,
316 struct in6_addr
*new_addr
)
320 if (skb_try_make_writable(skb
, thoff
+ sizeof(*tcph
)))
323 tcph
= (void *)(skb_network_header(skb
) + thoff
);
324 inet_proto_csum_replace16(&tcph
->check
, skb
, addr
->s6_addr32
,
325 new_addr
->s6_addr32
, true);
330 static int nf_flow_nat_ipv6_udp(struct sk_buff
*skb
, unsigned int thoff
,
331 struct in6_addr
*addr
,
332 struct in6_addr
*new_addr
)
336 if (skb_try_make_writable(skb
, thoff
+ sizeof(*udph
)))
339 udph
= (void *)(skb_network_header(skb
) + thoff
);
340 if (udph
->check
|| skb
->ip_summed
== CHECKSUM_PARTIAL
) {
341 inet_proto_csum_replace16(&udph
->check
, skb
, addr
->s6_addr32
,
342 new_addr
->s6_addr32
, true);
344 udph
->check
= CSUM_MANGLED_0
;
350 static int nf_flow_nat_ipv6_l4proto(struct sk_buff
*skb
, struct ipv6hdr
*ip6h
,
351 unsigned int thoff
, struct in6_addr
*addr
,
352 struct in6_addr
*new_addr
)
354 switch (ip6h
->nexthdr
) {
356 if (nf_flow_nat_ipv6_tcp(skb
, thoff
, addr
, new_addr
) < 0)
360 if (nf_flow_nat_ipv6_udp(skb
, thoff
, addr
, new_addr
) < 0)
368 static int nf_flow_snat_ipv6(const struct flow_offload
*flow
,
369 struct sk_buff
*skb
, struct ipv6hdr
*ip6h
,
371 enum flow_offload_tuple_dir dir
)
373 struct in6_addr addr
, new_addr
;
376 case FLOW_OFFLOAD_DIR_ORIGINAL
:
378 new_addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_REPLY
].tuple
.dst_v6
;
379 ip6h
->saddr
= new_addr
;
381 case FLOW_OFFLOAD_DIR_REPLY
:
383 new_addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_ORIGINAL
].tuple
.src_v6
;
384 ip6h
->daddr
= new_addr
;
390 return nf_flow_nat_ipv6_l4proto(skb
, ip6h
, thoff
, &addr
, &new_addr
);
393 static int nf_flow_dnat_ipv6(const struct flow_offload
*flow
,
394 struct sk_buff
*skb
, struct ipv6hdr
*ip6h
,
396 enum flow_offload_tuple_dir dir
)
398 struct in6_addr addr
, new_addr
;
401 case FLOW_OFFLOAD_DIR_ORIGINAL
:
403 new_addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_REPLY
].tuple
.src_v6
;
404 ip6h
->daddr
= new_addr
;
406 case FLOW_OFFLOAD_DIR_REPLY
:
408 new_addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_ORIGINAL
].tuple
.dst_v6
;
409 ip6h
->saddr
= new_addr
;
415 return nf_flow_nat_ipv6_l4proto(skb
, ip6h
, thoff
, &addr
, &new_addr
);
418 static int nf_flow_nat_ipv6(const struct flow_offload
*flow
,
420 enum flow_offload_tuple_dir dir
)
422 struct ipv6hdr
*ip6h
= ipv6_hdr(skb
);
423 unsigned int thoff
= sizeof(*ip6h
);
425 if (test_bit(NF_FLOW_SNAT
, &flow
->flags
) &&
426 (nf_flow_snat_port(flow
, skb
, thoff
, ip6h
->nexthdr
, dir
) < 0 ||
427 nf_flow_snat_ipv6(flow
, skb
, ipv6_hdr(skb
), thoff
, dir
) < 0))
430 ip6h
= ipv6_hdr(skb
);
431 if (test_bit(NF_FLOW_DNAT
, &flow
->flags
) &&
432 (nf_flow_dnat_port(flow
, skb
, thoff
, ip6h
->nexthdr
, dir
) < 0 ||
433 nf_flow_dnat_ipv6(flow
, skb
, ipv6_hdr(skb
), thoff
, dir
) < 0))
439 static int nf_flow_tuple_ipv6(struct sk_buff
*skb
, const struct net_device
*dev
,
440 struct flow_offload_tuple
*tuple
)
442 unsigned int thoff
, hdrsize
;
443 struct flow_ports
*ports
;
444 struct ipv6hdr
*ip6h
;
446 if (!pskb_may_pull(skb
, sizeof(*ip6h
)))
449 ip6h
= ipv6_hdr(skb
);
451 switch (ip6h
->nexthdr
) {
453 hdrsize
= sizeof(struct tcphdr
);
456 hdrsize
= sizeof(struct udphdr
);
462 if (ip6h
->hop_limit
<= 1)
465 thoff
= sizeof(*ip6h
);
466 if (!pskb_may_pull(skb
, thoff
+ hdrsize
))
469 ip6h
= ipv6_hdr(skb
);
470 ports
= (struct flow_ports
*)(skb_network_header(skb
) + thoff
);
472 tuple
->src_v6
= ip6h
->saddr
;
473 tuple
->dst_v6
= ip6h
->daddr
;
474 tuple
->src_port
= ports
->source
;
475 tuple
->dst_port
= ports
->dest
;
476 tuple
->l3proto
= AF_INET6
;
477 tuple
->l4proto
= ip6h
->nexthdr
;
478 tuple
->iifidx
= dev
->ifindex
;
484 nf_flow_offload_ipv6_hook(void *priv
, struct sk_buff
*skb
,
485 const struct nf_hook_state
*state
)
487 struct flow_offload_tuple_rhash
*tuplehash
;
488 struct nf_flowtable
*flow_table
= priv
;
489 struct flow_offload_tuple tuple
= {};
490 enum flow_offload_tuple_dir dir
;
491 const struct in6_addr
*nexthop
;
492 struct flow_offload
*flow
;
493 struct net_device
*outdev
;
494 struct ipv6hdr
*ip6h
;
497 if (skb
->protocol
!= htons(ETH_P_IPV6
))
500 if (nf_flow_tuple_ipv6(skb
, state
->in
, &tuple
) < 0)
503 tuplehash
= flow_offload_lookup(flow_table
, &tuple
);
504 if (tuplehash
== NULL
)
507 dir
= tuplehash
->tuple
.dir
;
508 flow
= container_of(tuplehash
, struct flow_offload
, tuplehash
[dir
]);
509 rt
= (struct rt6_info
*)flow
->tuplehash
[dir
].tuple
.dst_cache
;
510 outdev
= rt
->dst
.dev
;
512 if (unlikely(nf_flow_exceeds_mtu(skb
, flow
->tuplehash
[dir
].tuple
.mtu
)))
515 if (nf_flow_state_check(flow
, ipv6_hdr(skb
)->nexthdr
, skb
,
519 flow_offload_refresh(flow_table
, flow
);
521 if (nf_flow_offload_dst_check(&rt
->dst
)) {
522 flow_offload_teardown(flow
);
526 if (skb_try_make_writable(skb
, sizeof(*ip6h
)))
529 if (nf_flow_nat_ipv6(flow
, skb
, dir
) < 0)
532 ip6h
= ipv6_hdr(skb
);
536 if (flow_table
->flags
& NF_FLOWTABLE_COUNTER
)
537 nf_ct_acct_update(flow
->ct
, tuplehash
->tuple
.dir
, skb
->len
);
539 if (unlikely(dst_xfrm(&rt
->dst
))) {
540 memset(skb
->cb
, 0, sizeof(struct inet6_skb_parm
));
541 IP6CB(skb
)->iif
= skb
->dev
->ifindex
;
542 IP6CB(skb
)->flags
= IP6SKB_FORWARDED
;
543 return nf_flow_xmit_xfrm(skb
, state
, &rt
->dst
);
547 nexthop
= rt6_nexthop(rt
, &flow
->tuplehash
[!dir
].tuple
.src_v6
);
548 skb_dst_set_noref(skb
, &rt
->dst
);
549 neigh_xmit(NEIGH_ND_TABLE
, outdev
, nexthop
, skb
);
553 EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook
);