1 // SPDX-License-Identifier: GPL-2.0-only
2 #include <linux/kernel.h>
3 #include <linux/init.h>
4 #include <linux/module.h>
5 #include <linux/netfilter.h>
6 #include <linux/rhashtable.h>
8 #include <linux/ipv6.h>
9 #include <linux/netdevice.h>
10 #include <linux/if_ether.h>
11 #include <linux/if_pppox.h>
12 #include <linux/ppp_defs.h>
15 #include <net/ip6_route.h>
16 #include <net/neighbour.h>
17 #include <net/netfilter/nf_flow_table.h>
18 #include <net/netfilter/nf_conntrack_acct.h>
19 /* For layer 4 checksum field offset. */
20 #include <linux/tcp.h>
21 #include <linux/udp.h>
23 static int nf_flow_state_check(struct flow_offload
*flow
, int proto
,
24 struct sk_buff
*skb
, unsigned int thoff
)
28 if (proto
!= IPPROTO_TCP
)
31 tcph
= (void *)(skb_network_header(skb
) + thoff
);
32 if (unlikely(tcph
->fin
|| tcph
->rst
)) {
33 flow_offload_teardown(flow
);
40 static void nf_flow_nat_ip_tcp(struct sk_buff
*skb
, unsigned int thoff
,
41 __be32 addr
, __be32 new_addr
)
45 tcph
= (void *)(skb_network_header(skb
) + thoff
);
46 inet_proto_csum_replace4(&tcph
->check
, skb
, addr
, new_addr
, true);
49 static void nf_flow_nat_ip_udp(struct sk_buff
*skb
, unsigned int thoff
,
50 __be32 addr
, __be32 new_addr
)
54 udph
= (void *)(skb_network_header(skb
) + thoff
);
55 if (udph
->check
|| skb
->ip_summed
== CHECKSUM_PARTIAL
) {
56 inet_proto_csum_replace4(&udph
->check
, skb
, addr
,
59 udph
->check
= CSUM_MANGLED_0
;
63 static void nf_flow_nat_ip_l4proto(struct sk_buff
*skb
, struct iphdr
*iph
,
64 unsigned int thoff
, __be32 addr
,
67 switch (iph
->protocol
) {
69 nf_flow_nat_ip_tcp(skb
, thoff
, addr
, new_addr
);
72 nf_flow_nat_ip_udp(skb
, thoff
, addr
, new_addr
);
77 static void nf_flow_snat_ip(const struct flow_offload
*flow
,
78 struct sk_buff
*skb
, struct iphdr
*iph
,
79 unsigned int thoff
, enum flow_offload_tuple_dir dir
)
81 __be32 addr
, new_addr
;
84 case FLOW_OFFLOAD_DIR_ORIGINAL
:
86 new_addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_REPLY
].tuple
.dst_v4
.s_addr
;
87 iph
->saddr
= new_addr
;
89 case FLOW_OFFLOAD_DIR_REPLY
:
91 new_addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_ORIGINAL
].tuple
.src_v4
.s_addr
;
92 iph
->daddr
= new_addr
;
95 csum_replace4(&iph
->check
, addr
, new_addr
);
97 nf_flow_nat_ip_l4proto(skb
, iph
, thoff
, addr
, new_addr
);
100 static void nf_flow_dnat_ip(const struct flow_offload
*flow
,
101 struct sk_buff
*skb
, struct iphdr
*iph
,
102 unsigned int thoff
, enum flow_offload_tuple_dir dir
)
104 __be32 addr
, new_addr
;
107 case FLOW_OFFLOAD_DIR_ORIGINAL
:
109 new_addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_REPLY
].tuple
.src_v4
.s_addr
;
110 iph
->daddr
= new_addr
;
112 case FLOW_OFFLOAD_DIR_REPLY
:
114 new_addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_ORIGINAL
].tuple
.dst_v4
.s_addr
;
115 iph
->saddr
= new_addr
;
118 csum_replace4(&iph
->check
, addr
, new_addr
);
120 nf_flow_nat_ip_l4proto(skb
, iph
, thoff
, addr
, new_addr
);
123 static void nf_flow_nat_ip(const struct flow_offload
*flow
, struct sk_buff
*skb
,
124 unsigned int thoff
, enum flow_offload_tuple_dir dir
,
127 if (test_bit(NF_FLOW_SNAT
, &flow
->flags
)) {
128 nf_flow_snat_port(flow
, skb
, thoff
, iph
->protocol
, dir
);
129 nf_flow_snat_ip(flow
, skb
, iph
, thoff
, dir
);
131 if (test_bit(NF_FLOW_DNAT
, &flow
->flags
)) {
132 nf_flow_dnat_port(flow
, skb
, thoff
, iph
->protocol
, dir
);
133 nf_flow_dnat_ip(flow
, skb
, iph
, thoff
, dir
);
137 static bool ip_has_options(unsigned int thoff
)
139 return thoff
!= sizeof(struct iphdr
);
142 static void nf_flow_tuple_encap(struct sk_buff
*skb
,
143 struct flow_offload_tuple
*tuple
)
145 struct vlan_ethhdr
*veth
;
146 struct pppoe_hdr
*phdr
;
149 if (skb_vlan_tag_present(skb
)) {
150 tuple
->encap
[i
].id
= skb_vlan_tag_get(skb
);
151 tuple
->encap
[i
].proto
= skb
->vlan_proto
;
154 switch (skb
->protocol
) {
155 case htons(ETH_P_8021Q
):
156 veth
= (struct vlan_ethhdr
*)skb_mac_header(skb
);
157 tuple
->encap
[i
].id
= ntohs(veth
->h_vlan_TCI
);
158 tuple
->encap
[i
].proto
= skb
->protocol
;
160 case htons(ETH_P_PPP_SES
):
161 phdr
= (struct pppoe_hdr
*)skb_mac_header(skb
);
162 tuple
->encap
[i
].id
= ntohs(phdr
->sid
);
163 tuple
->encap
[i
].proto
= skb
->protocol
;
168 static int nf_flow_tuple_ip(struct sk_buff
*skb
, const struct net_device
*dev
,
169 struct flow_offload_tuple
*tuple
, u32
*hdrsize
,
172 struct flow_ports
*ports
;
176 if (!pskb_may_pull(skb
, sizeof(*iph
) + offset
))
179 iph
= (struct iphdr
*)(skb_network_header(skb
) + offset
);
180 thoff
= (iph
->ihl
* 4);
182 if (ip_is_fragment(iph
) ||
183 unlikely(ip_has_options(thoff
)))
188 switch (iph
->protocol
) {
190 *hdrsize
= sizeof(struct tcphdr
);
193 *hdrsize
= sizeof(struct udphdr
);
202 if (!pskb_may_pull(skb
, thoff
+ *hdrsize
))
205 iph
= (struct iphdr
*)(skb_network_header(skb
) + offset
);
206 ports
= (struct flow_ports
*)(skb_network_header(skb
) + thoff
);
208 tuple
->src_v4
.s_addr
= iph
->saddr
;
209 tuple
->dst_v4
.s_addr
= iph
->daddr
;
210 tuple
->src_port
= ports
->source
;
211 tuple
->dst_port
= ports
->dest
;
212 tuple
->l3proto
= AF_INET
;
213 tuple
->l4proto
= iph
->protocol
;
214 tuple
->iifidx
= dev
->ifindex
;
215 nf_flow_tuple_encap(skb
, tuple
);
220 /* Based on ip_exceeds_mtu(). */
221 static bool nf_flow_exceeds_mtu(const struct sk_buff
*skb
, unsigned int mtu
)
226 if (skb_is_gso(skb
) && skb_gso_validate_network_len(skb
, mtu
))
232 static unsigned int nf_flow_xmit_xfrm(struct sk_buff
*skb
,
233 const struct nf_hook_state
*state
,
234 struct dst_entry
*dst
)
237 skb_dst_set_noref(skb
, dst
);
238 dst_output(state
->net
, state
->sk
, skb
);
242 static inline __be16
nf_flow_pppoe_proto(const struct sk_buff
*skb
)
246 proto
= *((__be16
*)(skb_mac_header(skb
) + ETH_HLEN
+
247 sizeof(struct pppoe_hdr
)));
250 return htons(ETH_P_IP
);
251 case htons(PPP_IPV6
):
252 return htons(ETH_P_IPV6
);
258 static bool nf_flow_skb_encap_protocol(const struct sk_buff
*skb
, __be16 proto
,
261 struct vlan_ethhdr
*veth
;
263 switch (skb
->protocol
) {
264 case htons(ETH_P_8021Q
):
265 veth
= (struct vlan_ethhdr
*)skb_mac_header(skb
);
266 if (veth
->h_vlan_encapsulated_proto
== proto
) {
267 *offset
+= VLAN_HLEN
;
271 case htons(ETH_P_PPP_SES
):
272 if (nf_flow_pppoe_proto(skb
) == proto
) {
273 *offset
+= PPPOE_SES_HLEN
;
282 static void nf_flow_encap_pop(struct sk_buff
*skb
,
283 struct flow_offload_tuple_rhash
*tuplehash
)
285 struct vlan_hdr
*vlan_hdr
;
288 for (i
= 0; i
< tuplehash
->tuple
.encap_num
; i
++) {
289 if (skb_vlan_tag_present(skb
)) {
290 __vlan_hwaccel_clear_tag(skb
);
293 switch (skb
->protocol
) {
294 case htons(ETH_P_8021Q
):
295 vlan_hdr
= (struct vlan_hdr
*)skb
->data
;
296 __skb_pull(skb
, VLAN_HLEN
);
297 vlan_set_encap_proto(skb
, vlan_hdr
);
298 skb_reset_network_header(skb
);
300 case htons(ETH_P_PPP_SES
):
301 skb
->protocol
= nf_flow_pppoe_proto(skb
);
302 skb_pull(skb
, PPPOE_SES_HLEN
);
303 skb_reset_network_header(skb
);
309 static unsigned int nf_flow_queue_xmit(struct net
*net
, struct sk_buff
*skb
,
310 const struct flow_offload_tuple_rhash
*tuplehash
,
313 struct net_device
*outdev
;
315 outdev
= dev_get_by_index_rcu(net
, tuplehash
->tuple
.out
.ifidx
);
320 dev_hard_header(skb
, skb
->dev
, type
, tuplehash
->tuple
.out
.h_dest
,
321 tuplehash
->tuple
.out
.h_source
, skb
->len
);
328 nf_flow_offload_ip_hook(void *priv
, struct sk_buff
*skb
,
329 const struct nf_hook_state
*state
)
331 struct flow_offload_tuple_rhash
*tuplehash
;
332 struct nf_flowtable
*flow_table
= priv
;
333 struct flow_offload_tuple tuple
= {};
334 enum flow_offload_tuple_dir dir
;
335 struct flow_offload
*flow
;
336 struct net_device
*outdev
;
337 u32 hdrsize
, offset
= 0;
338 unsigned int thoff
, mtu
;
344 if (skb
->protocol
!= htons(ETH_P_IP
) &&
345 !nf_flow_skb_encap_protocol(skb
, htons(ETH_P_IP
), &offset
))
348 if (nf_flow_tuple_ip(skb
, state
->in
, &tuple
, &hdrsize
, offset
) < 0)
351 tuplehash
= flow_offload_lookup(flow_table
, &tuple
);
352 if (tuplehash
== NULL
)
355 dir
= tuplehash
->tuple
.dir
;
356 flow
= container_of(tuplehash
, struct flow_offload
, tuplehash
[dir
]);
358 mtu
= flow
->tuplehash
[dir
].tuple
.mtu
+ offset
;
359 if (unlikely(nf_flow_exceeds_mtu(skb
, mtu
)))
362 iph
= (struct iphdr
*)(skb_network_header(skb
) + offset
);
363 thoff
= (iph
->ihl
* 4) + offset
;
364 if (nf_flow_state_check(flow
, iph
->protocol
, skb
, thoff
))
367 if (tuplehash
->tuple
.xmit_type
== FLOW_OFFLOAD_XMIT_NEIGH
||
368 tuplehash
->tuple
.xmit_type
== FLOW_OFFLOAD_XMIT_XFRM
) {
369 rt
= (struct rtable
*)tuplehash
->tuple
.dst_cache
;
370 if (!dst_check(&rt
->dst
, 0)) {
371 flow_offload_teardown(flow
);
376 if (skb_try_make_writable(skb
, thoff
+ hdrsize
))
379 flow_offload_refresh(flow_table
, flow
);
381 nf_flow_encap_pop(skb
, tuplehash
);
385 nf_flow_nat_ip(flow
, skb
, thoff
, dir
, iph
);
387 ip_decrease_ttl(iph
);
390 if (flow_table
->flags
& NF_FLOWTABLE_COUNTER
)
391 nf_ct_acct_update(flow
->ct
, tuplehash
->tuple
.dir
, skb
->len
);
393 if (unlikely(tuplehash
->tuple
.xmit_type
== FLOW_OFFLOAD_XMIT_XFRM
)) {
394 memset(skb
->cb
, 0, sizeof(struct inet_skb_parm
));
395 IPCB(skb
)->iif
= skb
->dev
->ifindex
;
396 IPCB(skb
)->flags
= IPSKB_FORWARDED
;
397 return nf_flow_xmit_xfrm(skb
, state
, &rt
->dst
);
400 switch (tuplehash
->tuple
.xmit_type
) {
401 case FLOW_OFFLOAD_XMIT_NEIGH
:
402 outdev
= rt
->dst
.dev
;
404 nexthop
= rt_nexthop(rt
, flow
->tuplehash
[!dir
].tuple
.src_v4
.s_addr
);
405 skb_dst_set_noref(skb
, &rt
->dst
);
406 neigh_xmit(NEIGH_ARP_TABLE
, outdev
, &nexthop
, skb
);
409 case FLOW_OFFLOAD_XMIT_DIRECT
:
410 ret
= nf_flow_queue_xmit(state
->net
, skb
, tuplehash
, ETH_P_IP
);
412 flow_offload_teardown(flow
);
418 EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook
);
420 static void nf_flow_nat_ipv6_tcp(struct sk_buff
*skb
, unsigned int thoff
,
421 struct in6_addr
*addr
,
422 struct in6_addr
*new_addr
,
423 struct ipv6hdr
*ip6h
)
427 tcph
= (void *)(skb_network_header(skb
) + thoff
);
428 inet_proto_csum_replace16(&tcph
->check
, skb
, addr
->s6_addr32
,
429 new_addr
->s6_addr32
, true);
432 static void nf_flow_nat_ipv6_udp(struct sk_buff
*skb
, unsigned int thoff
,
433 struct in6_addr
*addr
,
434 struct in6_addr
*new_addr
)
438 udph
= (void *)(skb_network_header(skb
) + thoff
);
439 if (udph
->check
|| skb
->ip_summed
== CHECKSUM_PARTIAL
) {
440 inet_proto_csum_replace16(&udph
->check
, skb
, addr
->s6_addr32
,
441 new_addr
->s6_addr32
, true);
443 udph
->check
= CSUM_MANGLED_0
;
447 static void nf_flow_nat_ipv6_l4proto(struct sk_buff
*skb
, struct ipv6hdr
*ip6h
,
448 unsigned int thoff
, struct in6_addr
*addr
,
449 struct in6_addr
*new_addr
)
451 switch (ip6h
->nexthdr
) {
453 nf_flow_nat_ipv6_tcp(skb
, thoff
, addr
, new_addr
, ip6h
);
456 nf_flow_nat_ipv6_udp(skb
, thoff
, addr
, new_addr
);
461 static void nf_flow_snat_ipv6(const struct flow_offload
*flow
,
462 struct sk_buff
*skb
, struct ipv6hdr
*ip6h
,
464 enum flow_offload_tuple_dir dir
)
466 struct in6_addr addr
, new_addr
;
469 case FLOW_OFFLOAD_DIR_ORIGINAL
:
471 new_addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_REPLY
].tuple
.dst_v6
;
472 ip6h
->saddr
= new_addr
;
474 case FLOW_OFFLOAD_DIR_REPLY
:
476 new_addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_ORIGINAL
].tuple
.src_v6
;
477 ip6h
->daddr
= new_addr
;
481 nf_flow_nat_ipv6_l4proto(skb
, ip6h
, thoff
, &addr
, &new_addr
);
484 static void nf_flow_dnat_ipv6(const struct flow_offload
*flow
,
485 struct sk_buff
*skb
, struct ipv6hdr
*ip6h
,
487 enum flow_offload_tuple_dir dir
)
489 struct in6_addr addr
, new_addr
;
492 case FLOW_OFFLOAD_DIR_ORIGINAL
:
494 new_addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_REPLY
].tuple
.src_v6
;
495 ip6h
->daddr
= new_addr
;
497 case FLOW_OFFLOAD_DIR_REPLY
:
499 new_addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_ORIGINAL
].tuple
.dst_v6
;
500 ip6h
->saddr
= new_addr
;
504 nf_flow_nat_ipv6_l4proto(skb
, ip6h
, thoff
, &addr
, &new_addr
);
507 static void nf_flow_nat_ipv6(const struct flow_offload
*flow
,
509 enum flow_offload_tuple_dir dir
,
510 struct ipv6hdr
*ip6h
)
512 unsigned int thoff
= sizeof(*ip6h
);
514 if (test_bit(NF_FLOW_SNAT
, &flow
->flags
)) {
515 nf_flow_snat_port(flow
, skb
, thoff
, ip6h
->nexthdr
, dir
);
516 nf_flow_snat_ipv6(flow
, skb
, ip6h
, thoff
, dir
);
518 if (test_bit(NF_FLOW_DNAT
, &flow
->flags
)) {
519 nf_flow_dnat_port(flow
, skb
, thoff
, ip6h
->nexthdr
, dir
);
520 nf_flow_dnat_ipv6(flow
, skb
, ip6h
, thoff
, dir
);
524 static int nf_flow_tuple_ipv6(struct sk_buff
*skb
, const struct net_device
*dev
,
525 struct flow_offload_tuple
*tuple
, u32
*hdrsize
,
528 struct flow_ports
*ports
;
529 struct ipv6hdr
*ip6h
;
532 thoff
= sizeof(*ip6h
) + offset
;
533 if (!pskb_may_pull(skb
, thoff
))
536 ip6h
= (struct ipv6hdr
*)(skb_network_header(skb
) + offset
);
538 switch (ip6h
->nexthdr
) {
540 *hdrsize
= sizeof(struct tcphdr
);
543 *hdrsize
= sizeof(struct udphdr
);
549 if (ip6h
->hop_limit
<= 1)
552 if (!pskb_may_pull(skb
, thoff
+ *hdrsize
))
555 ip6h
= (struct ipv6hdr
*)(skb_network_header(skb
) + offset
);
556 ports
= (struct flow_ports
*)(skb_network_header(skb
) + thoff
);
558 tuple
->src_v6
= ip6h
->saddr
;
559 tuple
->dst_v6
= ip6h
->daddr
;
560 tuple
->src_port
= ports
->source
;
561 tuple
->dst_port
= ports
->dest
;
562 tuple
->l3proto
= AF_INET6
;
563 tuple
->l4proto
= ip6h
->nexthdr
;
564 tuple
->iifidx
= dev
->ifindex
;
565 nf_flow_tuple_encap(skb
, tuple
);
571 nf_flow_offload_ipv6_hook(void *priv
, struct sk_buff
*skb
,
572 const struct nf_hook_state
*state
)
574 struct flow_offload_tuple_rhash
*tuplehash
;
575 struct nf_flowtable
*flow_table
= priv
;
576 struct flow_offload_tuple tuple
= {};
577 enum flow_offload_tuple_dir dir
;
578 const struct in6_addr
*nexthop
;
579 struct flow_offload
*flow
;
580 struct net_device
*outdev
;
581 unsigned int thoff
, mtu
;
582 u32 hdrsize
, offset
= 0;
583 struct ipv6hdr
*ip6h
;
587 if (skb
->protocol
!= htons(ETH_P_IPV6
) &&
588 !nf_flow_skb_encap_protocol(skb
, htons(ETH_P_IPV6
), &offset
))
591 if (nf_flow_tuple_ipv6(skb
, state
->in
, &tuple
, &hdrsize
, offset
) < 0)
594 tuplehash
= flow_offload_lookup(flow_table
, &tuple
);
595 if (tuplehash
== NULL
)
598 dir
= tuplehash
->tuple
.dir
;
599 flow
= container_of(tuplehash
, struct flow_offload
, tuplehash
[dir
]);
601 mtu
= flow
->tuplehash
[dir
].tuple
.mtu
+ offset
;
602 if (unlikely(nf_flow_exceeds_mtu(skb
, mtu
)))
605 ip6h
= (struct ipv6hdr
*)(skb_network_header(skb
) + offset
);
606 thoff
= sizeof(*ip6h
) + offset
;
607 if (nf_flow_state_check(flow
, ip6h
->nexthdr
, skb
, thoff
))
610 if (tuplehash
->tuple
.xmit_type
== FLOW_OFFLOAD_XMIT_NEIGH
||
611 tuplehash
->tuple
.xmit_type
== FLOW_OFFLOAD_XMIT_XFRM
) {
612 rt
= (struct rt6_info
*)tuplehash
->tuple
.dst_cache
;
613 if (!dst_check(&rt
->dst
, 0)) {
614 flow_offload_teardown(flow
);
619 if (skb_try_make_writable(skb
, thoff
+ hdrsize
))
622 flow_offload_refresh(flow_table
, flow
);
624 nf_flow_encap_pop(skb
, tuplehash
);
626 ip6h
= ipv6_hdr(skb
);
627 nf_flow_nat_ipv6(flow
, skb
, dir
, ip6h
);
632 if (flow_table
->flags
& NF_FLOWTABLE_COUNTER
)
633 nf_ct_acct_update(flow
->ct
, tuplehash
->tuple
.dir
, skb
->len
);
635 if (unlikely(tuplehash
->tuple
.xmit_type
== FLOW_OFFLOAD_XMIT_XFRM
)) {
636 memset(skb
->cb
, 0, sizeof(struct inet6_skb_parm
));
637 IP6CB(skb
)->iif
= skb
->dev
->ifindex
;
638 IP6CB(skb
)->flags
= IP6SKB_FORWARDED
;
639 return nf_flow_xmit_xfrm(skb
, state
, &rt
->dst
);
642 switch (tuplehash
->tuple
.xmit_type
) {
643 case FLOW_OFFLOAD_XMIT_NEIGH
:
644 outdev
= rt
->dst
.dev
;
646 nexthop
= rt6_nexthop(rt
, &flow
->tuplehash
[!dir
].tuple
.src_v6
);
647 skb_dst_set_noref(skb
, &rt
->dst
);
648 neigh_xmit(NEIGH_ND_TABLE
, outdev
, nexthop
, skb
);
651 case FLOW_OFFLOAD_XMIT_DIRECT
:
652 ret
= nf_flow_queue_xmit(state
->net
, skb
, tuplehash
, ETH_P_IPV6
);
654 flow_offload_teardown(flow
);
660 EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook
);