1 // SPDX-License-Identifier: GPL-2.0-only
2 #include <linux/kernel.h>
3 #include <linux/init.h>
4 #include <linux/module.h>
5 #include <linux/netfilter.h>
6 #include <linux/rhashtable.h>
8 #include <linux/ipv6.h>
9 #include <linux/netdevice.h>
12 #include <net/ip6_route.h>
13 #include <net/neighbour.h>
14 #include <net/netfilter/nf_flow_table.h>
15 #include <net/netfilter/nf_conntrack_acct.h>
16 /* For layer 4 checksum field offset. */
17 #include <linux/tcp.h>
18 #include <linux/udp.h>
20 static int nf_flow_state_check(struct flow_offload
*flow
, int proto
,
21 struct sk_buff
*skb
, unsigned int thoff
)
25 if (proto
!= IPPROTO_TCP
)
28 tcph
= (void *)(skb_network_header(skb
) + thoff
);
29 if (unlikely(tcph
->fin
|| tcph
->rst
)) {
30 flow_offload_teardown(flow
);
37 static void nf_flow_nat_ip_tcp(struct sk_buff
*skb
, unsigned int thoff
,
38 __be32 addr
, __be32 new_addr
)
42 tcph
= (void *)(skb_network_header(skb
) + thoff
);
43 inet_proto_csum_replace4(&tcph
->check
, skb
, addr
, new_addr
, true);
46 static void nf_flow_nat_ip_udp(struct sk_buff
*skb
, unsigned int thoff
,
47 __be32 addr
, __be32 new_addr
)
51 udph
= (void *)(skb_network_header(skb
) + thoff
);
52 if (udph
->check
|| skb
->ip_summed
== CHECKSUM_PARTIAL
) {
53 inet_proto_csum_replace4(&udph
->check
, skb
, addr
,
56 udph
->check
= CSUM_MANGLED_0
;
60 static void nf_flow_nat_ip_l4proto(struct sk_buff
*skb
, struct iphdr
*iph
,
61 unsigned int thoff
, __be32 addr
,
64 switch (iph
->protocol
) {
66 nf_flow_nat_ip_tcp(skb
, thoff
, addr
, new_addr
);
69 nf_flow_nat_ip_udp(skb
, thoff
, addr
, new_addr
);
74 static void nf_flow_snat_ip(const struct flow_offload
*flow
,
75 struct sk_buff
*skb
, struct iphdr
*iph
,
76 unsigned int thoff
, enum flow_offload_tuple_dir dir
)
78 __be32 addr
, new_addr
;
81 case FLOW_OFFLOAD_DIR_ORIGINAL
:
83 new_addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_REPLY
].tuple
.dst_v4
.s_addr
;
84 iph
->saddr
= new_addr
;
86 case FLOW_OFFLOAD_DIR_REPLY
:
88 new_addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_ORIGINAL
].tuple
.src_v4
.s_addr
;
89 iph
->daddr
= new_addr
;
92 csum_replace4(&iph
->check
, addr
, new_addr
);
94 nf_flow_nat_ip_l4proto(skb
, iph
, thoff
, addr
, new_addr
);
97 static void nf_flow_dnat_ip(const struct flow_offload
*flow
,
98 struct sk_buff
*skb
, struct iphdr
*iph
,
99 unsigned int thoff
, enum flow_offload_tuple_dir dir
)
101 __be32 addr
, new_addr
;
104 case FLOW_OFFLOAD_DIR_ORIGINAL
:
106 new_addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_REPLY
].tuple
.src_v4
.s_addr
;
107 iph
->daddr
= new_addr
;
109 case FLOW_OFFLOAD_DIR_REPLY
:
111 new_addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_ORIGINAL
].tuple
.dst_v4
.s_addr
;
112 iph
->saddr
= new_addr
;
115 csum_replace4(&iph
->check
, addr
, new_addr
);
117 nf_flow_nat_ip_l4proto(skb
, iph
, thoff
, addr
, new_addr
);
120 static void nf_flow_nat_ip(const struct flow_offload
*flow
, struct sk_buff
*skb
,
121 unsigned int thoff
, enum flow_offload_tuple_dir dir
,
124 if (test_bit(NF_FLOW_SNAT
, &flow
->flags
)) {
125 nf_flow_snat_port(flow
, skb
, thoff
, iph
->protocol
, dir
);
126 nf_flow_snat_ip(flow
, skb
, iph
, thoff
, dir
);
128 if (test_bit(NF_FLOW_DNAT
, &flow
->flags
)) {
129 nf_flow_dnat_port(flow
, skb
, thoff
, iph
->protocol
, dir
);
130 nf_flow_dnat_ip(flow
, skb
, iph
, thoff
, dir
);
134 static bool ip_has_options(unsigned int thoff
)
136 return thoff
!= sizeof(struct iphdr
);
139 static void nf_flow_tuple_encap(struct sk_buff
*skb
,
140 struct flow_offload_tuple
*tuple
)
144 if (skb_vlan_tag_present(skb
)) {
145 tuple
->encap
[i
].id
= skb_vlan_tag_get(skb
);
146 tuple
->encap
[i
].proto
= skb
->vlan_proto
;
149 if (skb
->protocol
== htons(ETH_P_8021Q
)) {
150 struct vlan_ethhdr
*veth
= (struct vlan_ethhdr
*)skb_mac_header(skb
);
152 tuple
->encap
[i
].id
= ntohs(veth
->h_vlan_TCI
);
153 tuple
->encap
[i
].proto
= skb
->protocol
;
157 static int nf_flow_tuple_ip(struct sk_buff
*skb
, const struct net_device
*dev
,
158 struct flow_offload_tuple
*tuple
, u32
*hdrsize
,
161 struct flow_ports
*ports
;
165 if (!pskb_may_pull(skb
, sizeof(*iph
) + offset
))
168 iph
= (struct iphdr
*)(skb_network_header(skb
) + offset
);
169 thoff
= (iph
->ihl
* 4);
171 if (ip_is_fragment(iph
) ||
172 unlikely(ip_has_options(thoff
)))
177 switch (iph
->protocol
) {
179 *hdrsize
= sizeof(struct tcphdr
);
182 *hdrsize
= sizeof(struct udphdr
);
191 if (!pskb_may_pull(skb
, thoff
+ *hdrsize
))
194 iph
= (struct iphdr
*)(skb_network_header(skb
) + offset
);
195 ports
= (struct flow_ports
*)(skb_network_header(skb
) + thoff
);
197 tuple
->src_v4
.s_addr
= iph
->saddr
;
198 tuple
->dst_v4
.s_addr
= iph
->daddr
;
199 tuple
->src_port
= ports
->source
;
200 tuple
->dst_port
= ports
->dest
;
201 tuple
->l3proto
= AF_INET
;
202 tuple
->l4proto
= iph
->protocol
;
203 tuple
->iifidx
= dev
->ifindex
;
204 nf_flow_tuple_encap(skb
, tuple
);
209 /* Based on ip_exceeds_mtu(). */
210 static bool nf_flow_exceeds_mtu(const struct sk_buff
*skb
, unsigned int mtu
)
215 if (skb_is_gso(skb
) && skb_gso_validate_network_len(skb
, mtu
))
221 static unsigned int nf_flow_xmit_xfrm(struct sk_buff
*skb
,
222 const struct nf_hook_state
*state
,
223 struct dst_entry
*dst
)
226 skb_dst_set_noref(skb
, dst
);
227 dst_output(state
->net
, state
->sk
, skb
);
231 static bool nf_flow_skb_encap_protocol(const struct sk_buff
*skb
, __be16 proto
,
234 if (skb
->protocol
== htons(ETH_P_8021Q
)) {
235 struct vlan_ethhdr
*veth
;
237 veth
= (struct vlan_ethhdr
*)skb_mac_header(skb
);
238 if (veth
->h_vlan_encapsulated_proto
== proto
) {
239 *offset
+= VLAN_HLEN
;
247 static void nf_flow_encap_pop(struct sk_buff
*skb
,
248 struct flow_offload_tuple_rhash
*tuplehash
)
250 struct vlan_hdr
*vlan_hdr
;
253 for (i
= 0; i
< tuplehash
->tuple
.encap_num
; i
++) {
254 if (skb_vlan_tag_present(skb
)) {
255 __vlan_hwaccel_clear_tag(skb
);
258 if (skb
->protocol
== htons(ETH_P_8021Q
)) {
259 vlan_hdr
= (struct vlan_hdr
*)skb
->data
;
260 __skb_pull(skb
, VLAN_HLEN
);
261 vlan_set_encap_proto(skb
, vlan_hdr
);
262 skb_reset_network_header(skb
);
268 static unsigned int nf_flow_queue_xmit(struct net
*net
, struct sk_buff
*skb
,
269 const struct flow_offload_tuple_rhash
*tuplehash
,
272 struct net_device
*outdev
;
274 outdev
= dev_get_by_index_rcu(net
, tuplehash
->tuple
.out
.ifidx
);
279 dev_hard_header(skb
, skb
->dev
, type
, tuplehash
->tuple
.out
.h_dest
,
280 tuplehash
->tuple
.out
.h_source
, skb
->len
);
287 nf_flow_offload_ip_hook(void *priv
, struct sk_buff
*skb
,
288 const struct nf_hook_state
*state
)
290 struct flow_offload_tuple_rhash
*tuplehash
;
291 struct nf_flowtable
*flow_table
= priv
;
292 struct flow_offload_tuple tuple
= {};
293 enum flow_offload_tuple_dir dir
;
294 struct flow_offload
*flow
;
295 struct net_device
*outdev
;
296 u32 hdrsize
, offset
= 0;
297 unsigned int thoff
, mtu
;
303 if (skb
->protocol
!= htons(ETH_P_IP
) &&
304 !nf_flow_skb_encap_protocol(skb
, htons(ETH_P_IP
), &offset
))
307 if (nf_flow_tuple_ip(skb
, state
->in
, &tuple
, &hdrsize
, offset
) < 0)
310 tuplehash
= flow_offload_lookup(flow_table
, &tuple
);
311 if (tuplehash
== NULL
)
314 dir
= tuplehash
->tuple
.dir
;
315 flow
= container_of(tuplehash
, struct flow_offload
, tuplehash
[dir
]);
317 mtu
= flow
->tuplehash
[dir
].tuple
.mtu
+ offset
;
318 if (unlikely(nf_flow_exceeds_mtu(skb
, mtu
)))
321 iph
= (struct iphdr
*)(skb_network_header(skb
) + offset
);
322 thoff
= (iph
->ihl
* 4) + offset
;
323 if (nf_flow_state_check(flow
, iph
->protocol
, skb
, thoff
))
326 if (tuplehash
->tuple
.xmit_type
== FLOW_OFFLOAD_XMIT_NEIGH
||
327 tuplehash
->tuple
.xmit_type
== FLOW_OFFLOAD_XMIT_XFRM
) {
328 rt
= (struct rtable
*)tuplehash
->tuple
.dst_cache
;
329 if (!dst_check(&rt
->dst
, 0)) {
330 flow_offload_teardown(flow
);
335 if (skb_try_make_writable(skb
, thoff
+ hdrsize
))
338 flow_offload_refresh(flow_table
, flow
);
340 nf_flow_encap_pop(skb
, tuplehash
);
344 nf_flow_nat_ip(flow
, skb
, thoff
, dir
, iph
);
346 ip_decrease_ttl(iph
);
349 if (flow_table
->flags
& NF_FLOWTABLE_COUNTER
)
350 nf_ct_acct_update(flow
->ct
, tuplehash
->tuple
.dir
, skb
->len
);
352 if (unlikely(tuplehash
->tuple
.xmit_type
== FLOW_OFFLOAD_XMIT_XFRM
)) {
353 memset(skb
->cb
, 0, sizeof(struct inet_skb_parm
));
354 IPCB(skb
)->iif
= skb
->dev
->ifindex
;
355 IPCB(skb
)->flags
= IPSKB_FORWARDED
;
356 return nf_flow_xmit_xfrm(skb
, state
, &rt
->dst
);
359 switch (tuplehash
->tuple
.xmit_type
) {
360 case FLOW_OFFLOAD_XMIT_NEIGH
:
361 outdev
= rt
->dst
.dev
;
363 nexthop
= rt_nexthop(rt
, flow
->tuplehash
[!dir
].tuple
.src_v4
.s_addr
);
364 skb_dst_set_noref(skb
, &rt
->dst
);
365 neigh_xmit(NEIGH_ARP_TABLE
, outdev
, &nexthop
, skb
);
368 case FLOW_OFFLOAD_XMIT_DIRECT
:
369 ret
= nf_flow_queue_xmit(state
->net
, skb
, tuplehash
, ETH_P_IP
);
371 flow_offload_teardown(flow
);
377 EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook
);
379 static void nf_flow_nat_ipv6_tcp(struct sk_buff
*skb
, unsigned int thoff
,
380 struct in6_addr
*addr
,
381 struct in6_addr
*new_addr
,
382 struct ipv6hdr
*ip6h
)
386 tcph
= (void *)(skb_network_header(skb
) + thoff
);
387 inet_proto_csum_replace16(&tcph
->check
, skb
, addr
->s6_addr32
,
388 new_addr
->s6_addr32
, true);
391 static void nf_flow_nat_ipv6_udp(struct sk_buff
*skb
, unsigned int thoff
,
392 struct in6_addr
*addr
,
393 struct in6_addr
*new_addr
)
397 udph
= (void *)(skb_network_header(skb
) + thoff
);
398 if (udph
->check
|| skb
->ip_summed
== CHECKSUM_PARTIAL
) {
399 inet_proto_csum_replace16(&udph
->check
, skb
, addr
->s6_addr32
,
400 new_addr
->s6_addr32
, true);
402 udph
->check
= CSUM_MANGLED_0
;
406 static void nf_flow_nat_ipv6_l4proto(struct sk_buff
*skb
, struct ipv6hdr
*ip6h
,
407 unsigned int thoff
, struct in6_addr
*addr
,
408 struct in6_addr
*new_addr
)
410 switch (ip6h
->nexthdr
) {
412 nf_flow_nat_ipv6_tcp(skb
, thoff
, addr
, new_addr
, ip6h
);
415 nf_flow_nat_ipv6_udp(skb
, thoff
, addr
, new_addr
);
420 static void nf_flow_snat_ipv6(const struct flow_offload
*flow
,
421 struct sk_buff
*skb
, struct ipv6hdr
*ip6h
,
423 enum flow_offload_tuple_dir dir
)
425 struct in6_addr addr
, new_addr
;
428 case FLOW_OFFLOAD_DIR_ORIGINAL
:
430 new_addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_REPLY
].tuple
.dst_v6
;
431 ip6h
->saddr
= new_addr
;
433 case FLOW_OFFLOAD_DIR_REPLY
:
435 new_addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_ORIGINAL
].tuple
.src_v6
;
436 ip6h
->daddr
= new_addr
;
440 nf_flow_nat_ipv6_l4proto(skb
, ip6h
, thoff
, &addr
, &new_addr
);
443 static void nf_flow_dnat_ipv6(const struct flow_offload
*flow
,
444 struct sk_buff
*skb
, struct ipv6hdr
*ip6h
,
446 enum flow_offload_tuple_dir dir
)
448 struct in6_addr addr
, new_addr
;
451 case FLOW_OFFLOAD_DIR_ORIGINAL
:
453 new_addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_REPLY
].tuple
.src_v6
;
454 ip6h
->daddr
= new_addr
;
456 case FLOW_OFFLOAD_DIR_REPLY
:
458 new_addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_ORIGINAL
].tuple
.dst_v6
;
459 ip6h
->saddr
= new_addr
;
463 nf_flow_nat_ipv6_l4proto(skb
, ip6h
, thoff
, &addr
, &new_addr
);
466 static void nf_flow_nat_ipv6(const struct flow_offload
*flow
,
468 enum flow_offload_tuple_dir dir
,
469 struct ipv6hdr
*ip6h
)
471 unsigned int thoff
= sizeof(*ip6h
);
473 if (test_bit(NF_FLOW_SNAT
, &flow
->flags
)) {
474 nf_flow_snat_port(flow
, skb
, thoff
, ip6h
->nexthdr
, dir
);
475 nf_flow_snat_ipv6(flow
, skb
, ip6h
, thoff
, dir
);
477 if (test_bit(NF_FLOW_DNAT
, &flow
->flags
)) {
478 nf_flow_dnat_port(flow
, skb
, thoff
, ip6h
->nexthdr
, dir
);
479 nf_flow_dnat_ipv6(flow
, skb
, ip6h
, thoff
, dir
);
483 static int nf_flow_tuple_ipv6(struct sk_buff
*skb
, const struct net_device
*dev
,
484 struct flow_offload_tuple
*tuple
, u32
*hdrsize
,
487 struct flow_ports
*ports
;
488 struct ipv6hdr
*ip6h
;
491 thoff
= sizeof(*ip6h
) + offset
;
492 if (!pskb_may_pull(skb
, thoff
))
495 ip6h
= (struct ipv6hdr
*)(skb_network_header(skb
) + offset
);
497 switch (ip6h
->nexthdr
) {
499 *hdrsize
= sizeof(struct tcphdr
);
502 *hdrsize
= sizeof(struct udphdr
);
508 if (ip6h
->hop_limit
<= 1)
511 if (!pskb_may_pull(skb
, thoff
+ *hdrsize
))
514 ip6h
= (struct ipv6hdr
*)(skb_network_header(skb
) + offset
);
515 ports
= (struct flow_ports
*)(skb_network_header(skb
) + thoff
);
517 tuple
->src_v6
= ip6h
->saddr
;
518 tuple
->dst_v6
= ip6h
->daddr
;
519 tuple
->src_port
= ports
->source
;
520 tuple
->dst_port
= ports
->dest
;
521 tuple
->l3proto
= AF_INET6
;
522 tuple
->l4proto
= ip6h
->nexthdr
;
523 tuple
->iifidx
= dev
->ifindex
;
524 nf_flow_tuple_encap(skb
, tuple
);
530 nf_flow_offload_ipv6_hook(void *priv
, struct sk_buff
*skb
,
531 const struct nf_hook_state
*state
)
533 struct flow_offload_tuple_rhash
*tuplehash
;
534 struct nf_flowtable
*flow_table
= priv
;
535 struct flow_offload_tuple tuple
= {};
536 enum flow_offload_tuple_dir dir
;
537 const struct in6_addr
*nexthop
;
538 struct flow_offload
*flow
;
539 struct net_device
*outdev
;
540 unsigned int thoff
, mtu
;
541 u32 hdrsize
, offset
= 0;
542 struct ipv6hdr
*ip6h
;
546 if (skb
->protocol
!= htons(ETH_P_IPV6
) &&
547 !nf_flow_skb_encap_protocol(skb
, htons(ETH_P_IPV6
), &offset
))
550 if (nf_flow_tuple_ipv6(skb
, state
->in
, &tuple
, &hdrsize
, offset
) < 0)
553 tuplehash
= flow_offload_lookup(flow_table
, &tuple
);
554 if (tuplehash
== NULL
)
557 dir
= tuplehash
->tuple
.dir
;
558 flow
= container_of(tuplehash
, struct flow_offload
, tuplehash
[dir
]);
560 mtu
= flow
->tuplehash
[dir
].tuple
.mtu
+ offset
;
561 if (unlikely(nf_flow_exceeds_mtu(skb
, mtu
)))
564 ip6h
= (struct ipv6hdr
*)(skb_network_header(skb
) + offset
);
565 thoff
= sizeof(*ip6h
) + offset
;
566 if (nf_flow_state_check(flow
, ip6h
->nexthdr
, skb
, thoff
))
569 if (tuplehash
->tuple
.xmit_type
== FLOW_OFFLOAD_XMIT_NEIGH
||
570 tuplehash
->tuple
.xmit_type
== FLOW_OFFLOAD_XMIT_XFRM
) {
571 rt
= (struct rt6_info
*)tuplehash
->tuple
.dst_cache
;
572 if (!dst_check(&rt
->dst
, 0)) {
573 flow_offload_teardown(flow
);
578 if (skb_try_make_writable(skb
, thoff
+ hdrsize
))
581 flow_offload_refresh(flow_table
, flow
);
583 nf_flow_encap_pop(skb
, tuplehash
);
585 ip6h
= ipv6_hdr(skb
);
586 nf_flow_nat_ipv6(flow
, skb
, dir
, ip6h
);
591 if (flow_table
->flags
& NF_FLOWTABLE_COUNTER
)
592 nf_ct_acct_update(flow
->ct
, tuplehash
->tuple
.dir
, skb
->len
);
594 if (unlikely(tuplehash
->tuple
.xmit_type
== FLOW_OFFLOAD_XMIT_XFRM
)) {
595 memset(skb
->cb
, 0, sizeof(struct inet6_skb_parm
));
596 IP6CB(skb
)->iif
= skb
->dev
->ifindex
;
597 IP6CB(skb
)->flags
= IP6SKB_FORWARDED
;
598 return nf_flow_xmit_xfrm(skb
, state
, &rt
->dst
);
601 switch (tuplehash
->tuple
.xmit_type
) {
602 case FLOW_OFFLOAD_XMIT_NEIGH
:
603 outdev
= rt
->dst
.dev
;
605 nexthop
= rt6_nexthop(rt
, &flow
->tuplehash
[!dir
].tuple
.src_v6
);
606 skb_dst_set_noref(skb
, &rt
->dst
);
607 neigh_xmit(NEIGH_ND_TABLE
, outdev
, nexthop
, skb
);
610 case FLOW_OFFLOAD_XMIT_DIRECT
:
611 ret
= nf_flow_queue_xmit(state
->net
, skb
, tuplehash
, ETH_P_IPV6
);
613 flow_offload_teardown(flow
);
619 EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook
);