]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - net/netfilter/nf_flow_table_ip.c
netfilter: nf_flow_table: add support for sending flows back to the slow path
[mirror_ubuntu-jammy-kernel.git] / net / netfilter / nf_flow_table_ip.c
CommitLineData
7d208687
FF
1#include <linux/kernel.h>
2#include <linux/init.h>
3#include <linux/module.h>
4#include <linux/netfilter.h>
5#include <linux/rhashtable.h>
6#include <linux/ip.h>
a908fdec 7#include <linux/ipv6.h>
7d208687
FF
8#include <linux/netdevice.h>
9#include <net/ip.h>
a908fdec
FF
10#include <net/ipv6.h>
11#include <net/ip6_route.h>
7d208687
FF
12#include <net/neighbour.h>
13#include <net/netfilter/nf_flow_table.h>
14/* For layer 4 checksum field offset. */
15#include <linux/tcp.h>
16#include <linux/udp.h>
17
18static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
19 __be32 addr, __be32 new_addr)
20{
21 struct tcphdr *tcph;
22
23 if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
24 skb_try_make_writable(skb, thoff + sizeof(*tcph)))
25 return -1;
26
27 tcph = (void *)(skb_network_header(skb) + thoff);
28 inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true);
29
30 return 0;
31}
32
33static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
34 __be32 addr, __be32 new_addr)
35{
36 struct udphdr *udph;
37
38 if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
39 skb_try_make_writable(skb, thoff + sizeof(*udph)))
40 return -1;
41
42 udph = (void *)(skb_network_header(skb) + thoff);
43 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
44 inet_proto_csum_replace4(&udph->check, skb, addr,
45 new_addr, true);
46 if (!udph->check)
47 udph->check = CSUM_MANGLED_0;
48 }
49
50 return 0;
51}
52
53static int nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
54 unsigned int thoff, __be32 addr,
55 __be32 new_addr)
56{
57 switch (iph->protocol) {
58 case IPPROTO_TCP:
59 if (nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr) < 0)
60 return NF_DROP;
61 break;
62 case IPPROTO_UDP:
63 if (nf_flow_nat_ip_udp(skb, thoff, addr, new_addr) < 0)
64 return NF_DROP;
65 break;
66 }
67
68 return 0;
69}
70
71static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb,
72 struct iphdr *iph, unsigned int thoff,
73 enum flow_offload_tuple_dir dir)
74{
75 __be32 addr, new_addr;
76
77 switch (dir) {
78 case FLOW_OFFLOAD_DIR_ORIGINAL:
79 addr = iph->saddr;
80 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr;
81 iph->saddr = new_addr;
82 break;
83 case FLOW_OFFLOAD_DIR_REPLY:
84 addr = iph->daddr;
85 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
86 iph->daddr = new_addr;
87 break;
88 default:
89 return -1;
90 }
91 csum_replace4(&iph->check, addr, new_addr);
92
93 return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
94}
95
96static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
97 struct iphdr *iph, unsigned int thoff,
98 enum flow_offload_tuple_dir dir)
99{
100 __be32 addr, new_addr;
101
102 switch (dir) {
103 case FLOW_OFFLOAD_DIR_ORIGINAL:
104 addr = iph->daddr;
105 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr;
106 iph->daddr = new_addr;
107 break;
108 case FLOW_OFFLOAD_DIR_REPLY:
109 addr = iph->saddr;
110 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
111 iph->saddr = new_addr;
112 break;
113 default:
114 return -1;
115 }
116 csum_replace4(&iph->check, addr, new_addr);
117
118 return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
119}
120
121static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
122 enum flow_offload_tuple_dir dir)
123{
124 struct iphdr *iph = ip_hdr(skb);
125 unsigned int thoff = iph->ihl * 4;
126
127 if (flow->flags & FLOW_OFFLOAD_SNAT &&
128 (nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
129 nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0))
130 return -1;
131 if (flow->flags & FLOW_OFFLOAD_DNAT &&
132 (nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
133 nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0))
134 return -1;
135
136 return 0;
137}
138
139static bool ip_has_options(unsigned int thoff)
140{
141 return thoff != sizeof(struct iphdr);
142}
143
144static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
145 struct flow_offload_tuple *tuple)
146{
147 struct flow_ports *ports;
148 unsigned int thoff;
149 struct iphdr *iph;
150
151 if (!pskb_may_pull(skb, sizeof(*iph)))
152 return -1;
153
154 iph = ip_hdr(skb);
155 thoff = iph->ihl * 4;
156
157 if (ip_is_fragment(iph) ||
158 unlikely(ip_has_options(thoff)))
159 return -1;
160
161 if (iph->protocol != IPPROTO_TCP &&
162 iph->protocol != IPPROTO_UDP)
163 return -1;
164
165 thoff = iph->ihl * 4;
166 if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
167 return -1;
168
169 ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
170
171 tuple->src_v4.s_addr = iph->saddr;
172 tuple->dst_v4.s_addr = iph->daddr;
173 tuple->src_port = ports->source;
174 tuple->dst_port = ports->dest;
175 tuple->l3proto = AF_INET;
176 tuple->l4proto = iph->protocol;
177 tuple->iifidx = dev->ifindex;
178
179 return 0;
180}
181
182/* Based on ip_exceeds_mtu(). */
183static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
184{
185 if (skb->len <= mtu)
186 return false;
187
7d208687
FF
188 if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
189 return false;
190
191 return true;
192}
193
194unsigned int
195nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
196 const struct nf_hook_state *state)
197{
198 struct flow_offload_tuple_rhash *tuplehash;
199 struct nf_flowtable *flow_table = priv;
200 struct flow_offload_tuple tuple = {};
201 enum flow_offload_tuple_dir dir;
202 struct flow_offload *flow;
203 struct net_device *outdev;
204 const struct rtable *rt;
205 struct iphdr *iph;
206 __be32 nexthop;
207
208 if (skb->protocol != htons(ETH_P_IP))
209 return NF_ACCEPT;
210
211 if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0)
212 return NF_ACCEPT;
213
214 tuplehash = flow_offload_lookup(flow_table, &tuple);
215 if (tuplehash == NULL)
216 return NF_ACCEPT;
217
218 outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx);
219 if (!outdev)
220 return NF_ACCEPT;
221
222 dir = tuplehash->tuple.dir;
223 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
224 rt = (const struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
225
3aeb51d7
FF
226 if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)) &&
227 (ip_hdr(skb)->frag_off & htons(IP_DF)) != 0)
7d208687
FF
228 return NF_ACCEPT;
229
230 if (skb_try_make_writable(skb, sizeof(*iph)))
231 return NF_DROP;
232
233 if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
234 nf_flow_nat_ip(flow, skb, dir) < 0)
235 return NF_DROP;
236
237 flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
238 iph = ip_hdr(skb);
239 ip_decrease_ttl(iph);
240
241 skb->dev = outdev;
242 nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
243 neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
244
245 return NF_STOLEN;
246}
247EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);
a908fdec
FF
248
249static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
250 struct in6_addr *addr,
251 struct in6_addr *new_addr)
252{
253 struct tcphdr *tcph;
254
255 if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
256 skb_try_make_writable(skb, thoff + sizeof(*tcph)))
257 return -1;
258
259 tcph = (void *)(skb_network_header(skb) + thoff);
260 inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32,
261 new_addr->s6_addr32, true);
262
263 return 0;
264}
265
266static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
267 struct in6_addr *addr,
268 struct in6_addr *new_addr)
269{
270 struct udphdr *udph;
271
272 if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
273 skb_try_make_writable(skb, thoff + sizeof(*udph)))
274 return -1;
275
276 udph = (void *)(skb_network_header(skb) + thoff);
277 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
278 inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32,
279 new_addr->s6_addr32, true);
280 if (!udph->check)
281 udph->check = CSUM_MANGLED_0;
282 }
283
284 return 0;
285}
286
287static int nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
288 unsigned int thoff, struct in6_addr *addr,
289 struct in6_addr *new_addr)
290{
291 switch (ip6h->nexthdr) {
292 case IPPROTO_TCP:
293 if (nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr) < 0)
294 return NF_DROP;
295 break;
296 case IPPROTO_UDP:
297 if (nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr) < 0)
298 return NF_DROP;
299 break;
300 }
301
302 return 0;
303}
304
305static int nf_flow_snat_ipv6(const struct flow_offload *flow,
306 struct sk_buff *skb, struct ipv6hdr *ip6h,
307 unsigned int thoff,
308 enum flow_offload_tuple_dir dir)
309{
310 struct in6_addr addr, new_addr;
311
312 switch (dir) {
313 case FLOW_OFFLOAD_DIR_ORIGINAL:
314 addr = ip6h->saddr;
315 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6;
316 ip6h->saddr = new_addr;
317 break;
318 case FLOW_OFFLOAD_DIR_REPLY:
319 addr = ip6h->daddr;
320 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6;
321 ip6h->daddr = new_addr;
322 break;
323 default:
324 return -1;
325 }
326
327 return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
328}
329
330static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
331 struct sk_buff *skb, struct ipv6hdr *ip6h,
332 unsigned int thoff,
333 enum flow_offload_tuple_dir dir)
334{
335 struct in6_addr addr, new_addr;
336
337 switch (dir) {
338 case FLOW_OFFLOAD_DIR_ORIGINAL:
339 addr = ip6h->daddr;
340 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6;
341 ip6h->daddr = new_addr;
342 break;
343 case FLOW_OFFLOAD_DIR_REPLY:
344 addr = ip6h->saddr;
345 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6;
346 ip6h->saddr = new_addr;
347 break;
348 default:
349 return -1;
350 }
351
352 return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
353}
354
355static int nf_flow_nat_ipv6(const struct flow_offload *flow,
356 struct sk_buff *skb,
357 enum flow_offload_tuple_dir dir)
358{
359 struct ipv6hdr *ip6h = ipv6_hdr(skb);
360 unsigned int thoff = sizeof(*ip6h);
361
362 if (flow->flags & FLOW_OFFLOAD_SNAT &&
363 (nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
364 nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
365 return -1;
366 if (flow->flags & FLOW_OFFLOAD_DNAT &&
367 (nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
368 nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
369 return -1;
370
371 return 0;
372}
373
374static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
375 struct flow_offload_tuple *tuple)
376{
377 struct flow_ports *ports;
378 struct ipv6hdr *ip6h;
379 unsigned int thoff;
380
381 if (!pskb_may_pull(skb, sizeof(*ip6h)))
382 return -1;
383
384 ip6h = ipv6_hdr(skb);
385
386 if (ip6h->nexthdr != IPPROTO_TCP &&
387 ip6h->nexthdr != IPPROTO_UDP)
388 return -1;
389
390 thoff = sizeof(*ip6h);
391 if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
392 return -1;
393
394 ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
395
396 tuple->src_v6 = ip6h->saddr;
397 tuple->dst_v6 = ip6h->daddr;
398 tuple->src_port = ports->source;
399 tuple->dst_port = ports->dest;
400 tuple->l3proto = AF_INET6;
401 tuple->l4proto = ip6h->nexthdr;
402 tuple->iifidx = dev->ifindex;
403
404 return 0;
405}
406
407unsigned int
408nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
409 const struct nf_hook_state *state)
410{
411 struct flow_offload_tuple_rhash *tuplehash;
412 struct nf_flowtable *flow_table = priv;
413 struct flow_offload_tuple tuple = {};
414 enum flow_offload_tuple_dir dir;
415 struct flow_offload *flow;
416 struct net_device *outdev;
417 struct in6_addr *nexthop;
418 struct ipv6hdr *ip6h;
419 struct rt6_info *rt;
420
421 if (skb->protocol != htons(ETH_P_IPV6))
422 return NF_ACCEPT;
423
424 if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0)
425 return NF_ACCEPT;
426
427 tuplehash = flow_offload_lookup(flow_table, &tuple);
428 if (tuplehash == NULL)
429 return NF_ACCEPT;
430
431 outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx);
432 if (!outdev)
433 return NF_ACCEPT;
434
435 dir = tuplehash->tuple.dir;
436 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
437 rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache;
438
439 if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
440 return NF_ACCEPT;
441
442 if (skb_try_make_writable(skb, sizeof(*ip6h)))
443 return NF_DROP;
444
445 if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
446 nf_flow_nat_ipv6(flow, skb, dir) < 0)
447 return NF_DROP;
448
449 flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
450 ip6h = ipv6_hdr(skb);
451 ip6h->hop_limit--;
452
453 skb->dev = outdev;
454 nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
455 neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
456
457 return NF_STOLEN;
458}
459EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook);