1 // SPDX-License-Identifier: GPL-2.0-only
2 #include <linux/kernel.h>
3 #include <linux/init.h>
4 #include <linux/module.h>
5 #include <linux/netfilter.h>
6 #include <linux/rhashtable.h>
8 #include <linux/ipv6.h>
9 #include <linux/netdevice.h>
12 #include <net/ip6_route.h>
13 #include <net/neighbour.h>
14 #include <net/netfilter/nf_flow_table.h>
15 /* For layer 4 checksum field offset. */
16 #include <linux/tcp.h>
17 #include <linux/udp.h>
19 static int nf_flow_state_check(struct flow_offload
*flow
, int proto
,
20 struct sk_buff
*skb
, unsigned int thoff
)
24 if (proto
!= IPPROTO_TCP
)
27 if (!pskb_may_pull(skb
, thoff
+ sizeof(*tcph
)))
30 tcph
= (void *)(skb_network_header(skb
) + thoff
);
31 if (unlikely(tcph
->fin
|| tcph
->rst
)) {
32 flow_offload_teardown(flow
);
39 static int nf_flow_nat_ip_tcp(struct sk_buff
*skb
, unsigned int thoff
,
40 __be32 addr
, __be32 new_addr
)
44 if (!pskb_may_pull(skb
, thoff
+ sizeof(*tcph
)) ||
45 skb_try_make_writable(skb
, thoff
+ sizeof(*tcph
)))
48 tcph
= (void *)(skb_network_header(skb
) + thoff
);
49 inet_proto_csum_replace4(&tcph
->check
, skb
, addr
, new_addr
, true);
54 static int nf_flow_nat_ip_udp(struct sk_buff
*skb
, unsigned int thoff
,
55 __be32 addr
, __be32 new_addr
)
59 if (!pskb_may_pull(skb
, thoff
+ sizeof(*udph
)) ||
60 skb_try_make_writable(skb
, thoff
+ sizeof(*udph
)))
63 udph
= (void *)(skb_network_header(skb
) + thoff
);
64 if (udph
->check
|| skb
->ip_summed
== CHECKSUM_PARTIAL
) {
65 inet_proto_csum_replace4(&udph
->check
, skb
, addr
,
68 udph
->check
= CSUM_MANGLED_0
;
74 static int nf_flow_nat_ip_l4proto(struct sk_buff
*skb
, struct iphdr
*iph
,
75 unsigned int thoff
, __be32 addr
,
78 switch (iph
->protocol
) {
80 if (nf_flow_nat_ip_tcp(skb
, thoff
, addr
, new_addr
) < 0)
84 if (nf_flow_nat_ip_udp(skb
, thoff
, addr
, new_addr
) < 0)
92 static int nf_flow_snat_ip(const struct flow_offload
*flow
, struct sk_buff
*skb
,
93 struct iphdr
*iph
, unsigned int thoff
,
94 enum flow_offload_tuple_dir dir
)
96 __be32 addr
, new_addr
;
99 case FLOW_OFFLOAD_DIR_ORIGINAL
:
101 new_addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_REPLY
].tuple
.dst_v4
.s_addr
;
102 iph
->saddr
= new_addr
;
104 case FLOW_OFFLOAD_DIR_REPLY
:
106 new_addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_ORIGINAL
].tuple
.src_v4
.s_addr
;
107 iph
->daddr
= new_addr
;
112 csum_replace4(&iph
->check
, addr
, new_addr
);
114 return nf_flow_nat_ip_l4proto(skb
, iph
, thoff
, addr
, new_addr
);
117 static int nf_flow_dnat_ip(const struct flow_offload
*flow
, struct sk_buff
*skb
,
118 struct iphdr
*iph
, unsigned int thoff
,
119 enum flow_offload_tuple_dir dir
)
121 __be32 addr
, new_addr
;
124 case FLOW_OFFLOAD_DIR_ORIGINAL
:
126 new_addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_REPLY
].tuple
.src_v4
.s_addr
;
127 iph
->daddr
= new_addr
;
129 case FLOW_OFFLOAD_DIR_REPLY
:
131 new_addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_ORIGINAL
].tuple
.dst_v4
.s_addr
;
132 iph
->saddr
= new_addr
;
137 csum_replace4(&iph
->check
, addr
, new_addr
);
139 return nf_flow_nat_ip_l4proto(skb
, iph
, thoff
, addr
, new_addr
);
142 static int nf_flow_nat_ip(const struct flow_offload
*flow
, struct sk_buff
*skb
,
143 unsigned int thoff
, enum flow_offload_tuple_dir dir
)
145 struct iphdr
*iph
= ip_hdr(skb
);
147 if (flow
->flags
& FLOW_OFFLOAD_SNAT
&&
148 (nf_flow_snat_port(flow
, skb
, thoff
, iph
->protocol
, dir
) < 0 ||
149 nf_flow_snat_ip(flow
, skb
, iph
, thoff
, dir
) < 0))
151 if (flow
->flags
& FLOW_OFFLOAD_DNAT
&&
152 (nf_flow_dnat_port(flow
, skb
, thoff
, iph
->protocol
, dir
) < 0 ||
153 nf_flow_dnat_ip(flow
, skb
, iph
, thoff
, dir
) < 0))
159 static bool ip_has_options(unsigned int thoff
)
161 return thoff
!= sizeof(struct iphdr
);
164 static int nf_flow_tuple_ip(struct sk_buff
*skb
, const struct net_device
*dev
,
165 struct flow_offload_tuple
*tuple
)
167 struct flow_ports
*ports
;
171 if (!pskb_may_pull(skb
, sizeof(*iph
)))
175 thoff
= iph
->ihl
* 4;
177 if (ip_is_fragment(iph
) ||
178 unlikely(ip_has_options(thoff
)))
181 if (iph
->protocol
!= IPPROTO_TCP
&&
182 iph
->protocol
!= IPPROTO_UDP
)
188 thoff
= iph
->ihl
* 4;
189 if (!pskb_may_pull(skb
, thoff
+ sizeof(*ports
)))
192 ports
= (struct flow_ports
*)(skb_network_header(skb
) + thoff
);
194 tuple
->src_v4
.s_addr
= iph
->saddr
;
195 tuple
->dst_v4
.s_addr
= iph
->daddr
;
196 tuple
->src_port
= ports
->source
;
197 tuple
->dst_port
= ports
->dest
;
198 tuple
->l3proto
= AF_INET
;
199 tuple
->l4proto
= iph
->protocol
;
200 tuple
->iifidx
= dev
->ifindex
;
205 /* Based on ip_exceeds_mtu(). */
206 static bool nf_flow_exceeds_mtu(const struct sk_buff
*skb
, unsigned int mtu
)
211 if (skb_is_gso(skb
) && skb_gso_validate_network_len(skb
, mtu
))
218 nf_flow_offload_ip_hook(void *priv
, struct sk_buff
*skb
,
219 const struct nf_hook_state
*state
)
221 struct flow_offload_tuple_rhash
*tuplehash
;
222 struct nf_flowtable
*flow_table
= priv
;
223 struct flow_offload_tuple tuple
= {};
224 enum flow_offload_tuple_dir dir
;
225 struct flow_offload
*flow
;
226 struct net_device
*outdev
;
232 if (skb
->protocol
!= htons(ETH_P_IP
))
235 if (nf_flow_tuple_ip(skb
, state
->in
, &tuple
) < 0)
238 tuplehash
= flow_offload_lookup(flow_table
, &tuple
);
239 if (tuplehash
== NULL
)
242 dir
= tuplehash
->tuple
.dir
;
243 flow
= container_of(tuplehash
, struct flow_offload
, tuplehash
[dir
]);
244 rt
= (struct rtable
*)flow
->tuplehash
[dir
].tuple
.dst_cache
;
245 outdev
= rt
->dst
.dev
;
247 if (unlikely(nf_flow_exceeds_mtu(skb
, flow
->tuplehash
[dir
].tuple
.mtu
)))
250 if (skb_try_make_writable(skb
, sizeof(*iph
)))
253 thoff
= ip_hdr(skb
)->ihl
* 4;
254 if (nf_flow_state_check(flow
, ip_hdr(skb
)->protocol
, skb
, thoff
))
257 if (nf_flow_nat_ip(flow
, skb
, thoff
, dir
) < 0)
260 flow
->timeout
= (u32
)jiffies
+ NF_FLOW_TIMEOUT
;
262 ip_decrease_ttl(iph
);
265 nexthop
= rt_nexthop(rt
, flow
->tuplehash
[!dir
].tuple
.src_v4
.s_addr
);
266 skb_dst_set_noref(skb
, &rt
->dst
);
267 neigh_xmit(NEIGH_ARP_TABLE
, outdev
, &nexthop
, skb
);
271 EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook
);
273 static int nf_flow_nat_ipv6_tcp(struct sk_buff
*skb
, unsigned int thoff
,
274 struct in6_addr
*addr
,
275 struct in6_addr
*new_addr
)
279 if (!pskb_may_pull(skb
, thoff
+ sizeof(*tcph
)) ||
280 skb_try_make_writable(skb
, thoff
+ sizeof(*tcph
)))
283 tcph
= (void *)(skb_network_header(skb
) + thoff
);
284 inet_proto_csum_replace16(&tcph
->check
, skb
, addr
->s6_addr32
,
285 new_addr
->s6_addr32
, true);
290 static int nf_flow_nat_ipv6_udp(struct sk_buff
*skb
, unsigned int thoff
,
291 struct in6_addr
*addr
,
292 struct in6_addr
*new_addr
)
296 if (!pskb_may_pull(skb
, thoff
+ sizeof(*udph
)) ||
297 skb_try_make_writable(skb
, thoff
+ sizeof(*udph
)))
300 udph
= (void *)(skb_network_header(skb
) + thoff
);
301 if (udph
->check
|| skb
->ip_summed
== CHECKSUM_PARTIAL
) {
302 inet_proto_csum_replace16(&udph
->check
, skb
, addr
->s6_addr32
,
303 new_addr
->s6_addr32
, true);
305 udph
->check
= CSUM_MANGLED_0
;
311 static int nf_flow_nat_ipv6_l4proto(struct sk_buff
*skb
, struct ipv6hdr
*ip6h
,
312 unsigned int thoff
, struct in6_addr
*addr
,
313 struct in6_addr
*new_addr
)
315 switch (ip6h
->nexthdr
) {
317 if (nf_flow_nat_ipv6_tcp(skb
, thoff
, addr
, new_addr
) < 0)
321 if (nf_flow_nat_ipv6_udp(skb
, thoff
, addr
, new_addr
) < 0)
329 static int nf_flow_snat_ipv6(const struct flow_offload
*flow
,
330 struct sk_buff
*skb
, struct ipv6hdr
*ip6h
,
332 enum flow_offload_tuple_dir dir
)
334 struct in6_addr addr
, new_addr
;
337 case FLOW_OFFLOAD_DIR_ORIGINAL
:
339 new_addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_REPLY
].tuple
.dst_v6
;
340 ip6h
->saddr
= new_addr
;
342 case FLOW_OFFLOAD_DIR_REPLY
:
344 new_addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_ORIGINAL
].tuple
.src_v6
;
345 ip6h
->daddr
= new_addr
;
351 return nf_flow_nat_ipv6_l4proto(skb
, ip6h
, thoff
, &addr
, &new_addr
);
354 static int nf_flow_dnat_ipv6(const struct flow_offload
*flow
,
355 struct sk_buff
*skb
, struct ipv6hdr
*ip6h
,
357 enum flow_offload_tuple_dir dir
)
359 struct in6_addr addr
, new_addr
;
362 case FLOW_OFFLOAD_DIR_ORIGINAL
:
364 new_addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_REPLY
].tuple
.src_v6
;
365 ip6h
->daddr
= new_addr
;
367 case FLOW_OFFLOAD_DIR_REPLY
:
369 new_addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_ORIGINAL
].tuple
.dst_v6
;
370 ip6h
->saddr
= new_addr
;
376 return nf_flow_nat_ipv6_l4proto(skb
, ip6h
, thoff
, &addr
, &new_addr
);
379 static int nf_flow_nat_ipv6(const struct flow_offload
*flow
,
381 enum flow_offload_tuple_dir dir
)
383 struct ipv6hdr
*ip6h
= ipv6_hdr(skb
);
384 unsigned int thoff
= sizeof(*ip6h
);
386 if (flow
->flags
& FLOW_OFFLOAD_SNAT
&&
387 (nf_flow_snat_port(flow
, skb
, thoff
, ip6h
->nexthdr
, dir
) < 0 ||
388 nf_flow_snat_ipv6(flow
, skb
, ip6h
, thoff
, dir
) < 0))
390 if (flow
->flags
& FLOW_OFFLOAD_DNAT
&&
391 (nf_flow_dnat_port(flow
, skb
, thoff
, ip6h
->nexthdr
, dir
) < 0 ||
392 nf_flow_dnat_ipv6(flow
, skb
, ip6h
, thoff
, dir
) < 0))
398 static int nf_flow_tuple_ipv6(struct sk_buff
*skb
, const struct net_device
*dev
,
399 struct flow_offload_tuple
*tuple
)
401 struct flow_ports
*ports
;
402 struct ipv6hdr
*ip6h
;
405 if (!pskb_may_pull(skb
, sizeof(*ip6h
)))
408 ip6h
= ipv6_hdr(skb
);
410 if (ip6h
->nexthdr
!= IPPROTO_TCP
&&
411 ip6h
->nexthdr
!= IPPROTO_UDP
)
414 if (ip6h
->hop_limit
<= 1)
417 thoff
= sizeof(*ip6h
);
418 if (!pskb_may_pull(skb
, thoff
+ sizeof(*ports
)))
421 ports
= (struct flow_ports
*)(skb_network_header(skb
) + thoff
);
423 tuple
->src_v6
= ip6h
->saddr
;
424 tuple
->dst_v6
= ip6h
->daddr
;
425 tuple
->src_port
= ports
->source
;
426 tuple
->dst_port
= ports
->dest
;
427 tuple
->l3proto
= AF_INET6
;
428 tuple
->l4proto
= ip6h
->nexthdr
;
429 tuple
->iifidx
= dev
->ifindex
;
435 nf_flow_offload_ipv6_hook(void *priv
, struct sk_buff
*skb
,
436 const struct nf_hook_state
*state
)
438 struct flow_offload_tuple_rhash
*tuplehash
;
439 struct nf_flowtable
*flow_table
= priv
;
440 struct flow_offload_tuple tuple
= {};
441 enum flow_offload_tuple_dir dir
;
442 const struct in6_addr
*nexthop
;
443 struct flow_offload
*flow
;
444 struct net_device
*outdev
;
445 struct ipv6hdr
*ip6h
;
448 if (skb
->protocol
!= htons(ETH_P_IPV6
))
451 if (nf_flow_tuple_ipv6(skb
, state
->in
, &tuple
) < 0)
454 tuplehash
= flow_offload_lookup(flow_table
, &tuple
);
455 if (tuplehash
== NULL
)
458 dir
= tuplehash
->tuple
.dir
;
459 flow
= container_of(tuplehash
, struct flow_offload
, tuplehash
[dir
]);
460 rt
= (struct rt6_info
*)flow
->tuplehash
[dir
].tuple
.dst_cache
;
461 outdev
= rt
->dst
.dev
;
463 if (unlikely(nf_flow_exceeds_mtu(skb
, flow
->tuplehash
[dir
].tuple
.mtu
)))
466 if (nf_flow_state_check(flow
, ipv6_hdr(skb
)->nexthdr
, skb
,
470 if (skb_try_make_writable(skb
, sizeof(*ip6h
)))
473 if (nf_flow_nat_ipv6(flow
, skb
, dir
) < 0)
476 flow
->timeout
= (u32
)jiffies
+ NF_FLOW_TIMEOUT
;
477 ip6h
= ipv6_hdr(skb
);
481 nexthop
= rt6_nexthop(rt
, &flow
->tuplehash
[!dir
].tuple
.src_v6
);
482 skb_dst_set_noref(skb
, &rt
->dst
);
483 neigh_xmit(NEIGH_ND_TABLE
, outdev
, nexthop
, skb
);
487 EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook
);