]>
Commit | Line | Data |
---|---|---|
7d208687 FF |
1 | #include <linux/kernel.h> |
2 | #include <linux/init.h> | |
3 | #include <linux/module.h> | |
4 | #include <linux/netfilter.h> | |
5 | #include <linux/rhashtable.h> | |
6 | #include <linux/ip.h> | |
a908fdec | 7 | #include <linux/ipv6.h> |
7d208687 FF |
8 | #include <linux/netdevice.h> |
9 | #include <net/ip.h> | |
a908fdec FF |
10 | #include <net/ipv6.h> |
11 | #include <net/ip6_route.h> | |
7d208687 FF |
12 | #include <net/neighbour.h> |
13 | #include <net/netfilter/nf_flow_table.h> | |
14 | /* For layer 4 checksum field offset. */ | |
15 | #include <linux/tcp.h> | |
16 | #include <linux/udp.h> | |
17 | ||
33894c36 FF |
18 | static int nf_flow_state_check(struct flow_offload *flow, int proto, |
19 | struct sk_buff *skb, unsigned int thoff) | |
b6f27d32 FF |
20 | { |
21 | struct tcphdr *tcph; | |
22 | ||
33894c36 FF |
23 | if (proto != IPPROTO_TCP) |
24 | return 0; | |
25 | ||
b6f27d32 FF |
26 | if (!pskb_may_pull(skb, thoff + sizeof(*tcph))) |
27 | return -1; | |
28 | ||
29 | tcph = (void *)(skb_network_header(skb) + thoff); | |
30 | if (unlikely(tcph->fin || tcph->rst)) { | |
31 | flow_offload_teardown(flow); | |
32 | return -1; | |
33 | } | |
34 | ||
35 | return 0; | |
36 | } | |
37 | ||
7d208687 FF |
38 | static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff, |
39 | __be32 addr, __be32 new_addr) | |
40 | { | |
41 | struct tcphdr *tcph; | |
42 | ||
43 | if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) || | |
44 | skb_try_make_writable(skb, thoff + sizeof(*tcph))) | |
45 | return -1; | |
46 | ||
47 | tcph = (void *)(skb_network_header(skb) + thoff); | |
48 | inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true); | |
49 | ||
50 | return 0; | |
51 | } | |
52 | ||
53 | static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff, | |
54 | __be32 addr, __be32 new_addr) | |
55 | { | |
56 | struct udphdr *udph; | |
57 | ||
58 | if (!pskb_may_pull(skb, thoff + sizeof(*udph)) || | |
59 | skb_try_make_writable(skb, thoff + sizeof(*udph))) | |
60 | return -1; | |
61 | ||
62 | udph = (void *)(skb_network_header(skb) + thoff); | |
63 | if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { | |
64 | inet_proto_csum_replace4(&udph->check, skb, addr, | |
65 | new_addr, true); | |
66 | if (!udph->check) | |
67 | udph->check = CSUM_MANGLED_0; | |
68 | } | |
69 | ||
70 | return 0; | |
71 | } | |
72 | ||
73 | static int nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph, | |
74 | unsigned int thoff, __be32 addr, | |
75 | __be32 new_addr) | |
76 | { | |
77 | switch (iph->protocol) { | |
78 | case IPPROTO_TCP: | |
79 | if (nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr) < 0) | |
80 | return NF_DROP; | |
81 | break; | |
82 | case IPPROTO_UDP: | |
83 | if (nf_flow_nat_ip_udp(skb, thoff, addr, new_addr) < 0) | |
84 | return NF_DROP; | |
85 | break; | |
86 | } | |
87 | ||
88 | return 0; | |
89 | } | |
90 | ||
91 | static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb, | |
92 | struct iphdr *iph, unsigned int thoff, | |
93 | enum flow_offload_tuple_dir dir) | |
94 | { | |
95 | __be32 addr, new_addr; | |
96 | ||
97 | switch (dir) { | |
98 | case FLOW_OFFLOAD_DIR_ORIGINAL: | |
99 | addr = iph->saddr; | |
100 | new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr; | |
101 | iph->saddr = new_addr; | |
102 | break; | |
103 | case FLOW_OFFLOAD_DIR_REPLY: | |
104 | addr = iph->daddr; | |
105 | new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr; | |
106 | iph->daddr = new_addr; | |
107 | break; | |
108 | default: | |
109 | return -1; | |
110 | } | |
111 | csum_replace4(&iph->check, addr, new_addr); | |
112 | ||
113 | return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr); | |
114 | } | |
115 | ||
116 | static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb, | |
117 | struct iphdr *iph, unsigned int thoff, | |
118 | enum flow_offload_tuple_dir dir) | |
119 | { | |
120 | __be32 addr, new_addr; | |
121 | ||
122 | switch (dir) { | |
123 | case FLOW_OFFLOAD_DIR_ORIGINAL: | |
124 | addr = iph->daddr; | |
125 | new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr; | |
126 | iph->daddr = new_addr; | |
127 | break; | |
128 | case FLOW_OFFLOAD_DIR_REPLY: | |
129 | addr = iph->saddr; | |
130 | new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr; | |
131 | iph->saddr = new_addr; | |
132 | break; | |
133 | default: | |
134 | return -1; | |
135 | } | |
136 | csum_replace4(&iph->check, addr, new_addr); | |
137 | ||
138 | return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr); | |
139 | } | |
140 | ||
141 | static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb, | |
b6f27d32 | 142 | unsigned int thoff, enum flow_offload_tuple_dir dir) |
7d208687 FF |
143 | { |
144 | struct iphdr *iph = ip_hdr(skb); | |
7d208687 FF |
145 | |
146 | if (flow->flags & FLOW_OFFLOAD_SNAT && | |
147 | (nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 || | |
148 | nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0)) | |
149 | return -1; | |
150 | if (flow->flags & FLOW_OFFLOAD_DNAT && | |
151 | (nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 || | |
152 | nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0)) | |
153 | return -1; | |
154 | ||
155 | return 0; | |
156 | } | |
157 | ||
158 | static bool ip_has_options(unsigned int thoff) | |
159 | { | |
160 | return thoff != sizeof(struct iphdr); | |
161 | } | |
162 | ||
163 | static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev, | |
164 | struct flow_offload_tuple *tuple) | |
165 | { | |
166 | struct flow_ports *ports; | |
167 | unsigned int thoff; | |
168 | struct iphdr *iph; | |
169 | ||
170 | if (!pskb_may_pull(skb, sizeof(*iph))) | |
171 | return -1; | |
172 | ||
173 | iph = ip_hdr(skb); | |
174 | thoff = iph->ihl * 4; | |
175 | ||
176 | if (ip_is_fragment(iph) || | |
177 | unlikely(ip_has_options(thoff))) | |
178 | return -1; | |
179 | ||
180 | if (iph->protocol != IPPROTO_TCP && | |
181 | iph->protocol != IPPROTO_UDP) | |
182 | return -1; | |
183 | ||
33cc3c0c TY |
184 | if (iph->ttl <= 1) |
185 | return -1; | |
186 | ||
7d208687 FF |
187 | thoff = iph->ihl * 4; |
188 | if (!pskb_may_pull(skb, thoff + sizeof(*ports))) | |
189 | return -1; | |
190 | ||
191 | ports = (struct flow_ports *)(skb_network_header(skb) + thoff); | |
192 | ||
193 | tuple->src_v4.s_addr = iph->saddr; | |
194 | tuple->dst_v4.s_addr = iph->daddr; | |
195 | tuple->src_port = ports->source; | |
196 | tuple->dst_port = ports->dest; | |
197 | tuple->l3proto = AF_INET; | |
198 | tuple->l4proto = iph->protocol; | |
199 | tuple->iifidx = dev->ifindex; | |
200 | ||
201 | return 0; | |
202 | } | |
203 | ||
204 | /* Based on ip_exceeds_mtu(). */ | |
205 | static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) | |
206 | { | |
207 | if (skb->len <= mtu) | |
208 | return false; | |
209 | ||
7d208687 FF |
210 | if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu)) |
211 | return false; | |
212 | ||
213 | return true; | |
214 | } | |
215 | ||
216 | unsigned int | |
217 | nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, | |
218 | const struct nf_hook_state *state) | |
219 | { | |
220 | struct flow_offload_tuple_rhash *tuplehash; | |
221 | struct nf_flowtable *flow_table = priv; | |
222 | struct flow_offload_tuple tuple = {}; | |
223 | enum flow_offload_tuple_dir dir; | |
224 | struct flow_offload *flow; | |
225 | struct net_device *outdev; | |
2a79fd39 | 226 | struct rtable *rt; |
b6f27d32 | 227 | unsigned int thoff; |
7d208687 FF |
228 | struct iphdr *iph; |
229 | __be32 nexthop; | |
230 | ||
231 | if (skb->protocol != htons(ETH_P_IP)) | |
232 | return NF_ACCEPT; | |
233 | ||
234 | if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0) | |
235 | return NF_ACCEPT; | |
236 | ||
237 | tuplehash = flow_offload_lookup(flow_table, &tuple); | |
238 | if (tuplehash == NULL) | |
239 | return NF_ACCEPT; | |
240 | ||
7d208687 FF |
241 | dir = tuplehash->tuple.dir; |
242 | flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); | |
2a79fd39 | 243 | rt = (struct rtable *)flow->tuplehash[dir].tuple.dst_cache; |
227e1e4d | 244 | outdev = rt->dst.dev; |
7d208687 | 245 | |
3aeb51d7 FF |
246 | if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)) && |
247 | (ip_hdr(skb)->frag_off & htons(IP_DF)) != 0) | |
7d208687 FF |
248 | return NF_ACCEPT; |
249 | ||
250 | if (skb_try_make_writable(skb, sizeof(*iph))) | |
251 | return NF_DROP; | |
252 | ||
b6f27d32 | 253 | thoff = ip_hdr(skb)->ihl * 4; |
33894c36 | 254 | if (nf_flow_state_check(flow, ip_hdr(skb)->protocol, skb, thoff)) |
b6f27d32 FF |
255 | return NF_ACCEPT; |
256 | ||
28c5ed2f | 257 | if (nf_flow_nat_ip(flow, skb, thoff, dir) < 0) |
7d208687 FF |
258 | return NF_DROP; |
259 | ||
260 | flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT; | |
261 | iph = ip_hdr(skb); | |
262 | ip_decrease_ttl(iph); | |
263 | ||
264 | skb->dev = outdev; | |
265 | nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr); | |
2a79fd39 | 266 | skb_dst_set_noref(skb, &rt->dst); |
7d208687 FF |
267 | neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb); |
268 | ||
269 | return NF_STOLEN; | |
270 | } | |
271 | EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook); | |
a908fdec FF |
272 | |
273 | static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff, | |
274 | struct in6_addr *addr, | |
275 | struct in6_addr *new_addr) | |
276 | { | |
277 | struct tcphdr *tcph; | |
278 | ||
279 | if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) || | |
280 | skb_try_make_writable(skb, thoff + sizeof(*tcph))) | |
281 | return -1; | |
282 | ||
283 | tcph = (void *)(skb_network_header(skb) + thoff); | |
284 | inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32, | |
285 | new_addr->s6_addr32, true); | |
286 | ||
287 | return 0; | |
288 | } | |
289 | ||
290 | static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff, | |
291 | struct in6_addr *addr, | |
292 | struct in6_addr *new_addr) | |
293 | { | |
294 | struct udphdr *udph; | |
295 | ||
296 | if (!pskb_may_pull(skb, thoff + sizeof(*udph)) || | |
297 | skb_try_make_writable(skb, thoff + sizeof(*udph))) | |
298 | return -1; | |
299 | ||
300 | udph = (void *)(skb_network_header(skb) + thoff); | |
301 | if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { | |
302 | inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32, | |
303 | new_addr->s6_addr32, true); | |
304 | if (!udph->check) | |
305 | udph->check = CSUM_MANGLED_0; | |
306 | } | |
307 | ||
308 | return 0; | |
309 | } | |
310 | ||
311 | static int nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h, | |
312 | unsigned int thoff, struct in6_addr *addr, | |
313 | struct in6_addr *new_addr) | |
314 | { | |
315 | switch (ip6h->nexthdr) { | |
316 | case IPPROTO_TCP: | |
317 | if (nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr) < 0) | |
318 | return NF_DROP; | |
319 | break; | |
320 | case IPPROTO_UDP: | |
321 | if (nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr) < 0) | |
322 | return NF_DROP; | |
323 | break; | |
324 | } | |
325 | ||
326 | return 0; | |
327 | } | |
328 | ||
329 | static int nf_flow_snat_ipv6(const struct flow_offload *flow, | |
330 | struct sk_buff *skb, struct ipv6hdr *ip6h, | |
331 | unsigned int thoff, | |
332 | enum flow_offload_tuple_dir dir) | |
333 | { | |
334 | struct in6_addr addr, new_addr; | |
335 | ||
336 | switch (dir) { | |
337 | case FLOW_OFFLOAD_DIR_ORIGINAL: | |
338 | addr = ip6h->saddr; | |
339 | new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6; | |
340 | ip6h->saddr = new_addr; | |
341 | break; | |
342 | case FLOW_OFFLOAD_DIR_REPLY: | |
343 | addr = ip6h->daddr; | |
344 | new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6; | |
345 | ip6h->daddr = new_addr; | |
346 | break; | |
347 | default: | |
348 | return -1; | |
349 | } | |
350 | ||
351 | return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr); | |
352 | } | |
353 | ||
354 | static int nf_flow_dnat_ipv6(const struct flow_offload *flow, | |
355 | struct sk_buff *skb, struct ipv6hdr *ip6h, | |
356 | unsigned int thoff, | |
357 | enum flow_offload_tuple_dir dir) | |
358 | { | |
359 | struct in6_addr addr, new_addr; | |
360 | ||
361 | switch (dir) { | |
362 | case FLOW_OFFLOAD_DIR_ORIGINAL: | |
363 | addr = ip6h->daddr; | |
364 | new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6; | |
365 | ip6h->daddr = new_addr; | |
366 | break; | |
367 | case FLOW_OFFLOAD_DIR_REPLY: | |
368 | addr = ip6h->saddr; | |
369 | new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6; | |
370 | ip6h->saddr = new_addr; | |
371 | break; | |
372 | default: | |
373 | return -1; | |
374 | } | |
375 | ||
376 | return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr); | |
377 | } | |
378 | ||
379 | static int nf_flow_nat_ipv6(const struct flow_offload *flow, | |
380 | struct sk_buff *skb, | |
381 | enum flow_offload_tuple_dir dir) | |
382 | { | |
383 | struct ipv6hdr *ip6h = ipv6_hdr(skb); | |
384 | unsigned int thoff = sizeof(*ip6h); | |
385 | ||
386 | if (flow->flags & FLOW_OFFLOAD_SNAT && | |
387 | (nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 || | |
388 | nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0)) | |
389 | return -1; | |
390 | if (flow->flags & FLOW_OFFLOAD_DNAT && | |
391 | (nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 || | |
392 | nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0)) | |
393 | return -1; | |
394 | ||
395 | return 0; | |
396 | } | |
397 | ||
398 | static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev, | |
399 | struct flow_offload_tuple *tuple) | |
400 | { | |
401 | struct flow_ports *ports; | |
402 | struct ipv6hdr *ip6h; | |
403 | unsigned int thoff; | |
404 | ||
405 | if (!pskb_may_pull(skb, sizeof(*ip6h))) | |
406 | return -1; | |
407 | ||
408 | ip6h = ipv6_hdr(skb); | |
409 | ||
410 | if (ip6h->nexthdr != IPPROTO_TCP && | |
411 | ip6h->nexthdr != IPPROTO_UDP) | |
412 | return -1; | |
413 | ||
33cc3c0c TY |
414 | if (ip6h->hop_limit <= 1) |
415 | return -1; | |
416 | ||
a908fdec FF |
417 | thoff = sizeof(*ip6h); |
418 | if (!pskb_may_pull(skb, thoff + sizeof(*ports))) | |
419 | return -1; | |
420 | ||
421 | ports = (struct flow_ports *)(skb_network_header(skb) + thoff); | |
422 | ||
423 | tuple->src_v6 = ip6h->saddr; | |
424 | tuple->dst_v6 = ip6h->daddr; | |
425 | tuple->src_port = ports->source; | |
426 | tuple->dst_port = ports->dest; | |
427 | tuple->l3proto = AF_INET6; | |
428 | tuple->l4proto = ip6h->nexthdr; | |
429 | tuple->iifidx = dev->ifindex; | |
430 | ||
431 | return 0; | |
432 | } | |
433 | ||
434 | unsigned int | |
435 | nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, | |
436 | const struct nf_hook_state *state) | |
437 | { | |
438 | struct flow_offload_tuple_rhash *tuplehash; | |
439 | struct nf_flowtable *flow_table = priv; | |
440 | struct flow_offload_tuple tuple = {}; | |
441 | enum flow_offload_tuple_dir dir; | |
442 | struct flow_offload *flow; | |
443 | struct net_device *outdev; | |
444 | struct in6_addr *nexthop; | |
445 | struct ipv6hdr *ip6h; | |
446 | struct rt6_info *rt; | |
447 | ||
448 | if (skb->protocol != htons(ETH_P_IPV6)) | |
449 | return NF_ACCEPT; | |
450 | ||
451 | if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0) | |
452 | return NF_ACCEPT; | |
453 | ||
454 | tuplehash = flow_offload_lookup(flow_table, &tuple); | |
455 | if (tuplehash == NULL) | |
456 | return NF_ACCEPT; | |
457 | ||
a908fdec FF |
458 | dir = tuplehash->tuple.dir; |
459 | flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); | |
460 | rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache; | |
227e1e4d | 461 | outdev = rt->dst.dev; |
a908fdec FF |
462 | |
463 | if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu))) | |
464 | return NF_ACCEPT; | |
465 | ||
33894c36 FF |
466 | if (nf_flow_state_check(flow, ipv6_hdr(skb)->nexthdr, skb, |
467 | sizeof(*ip6h))) | |
b6f27d32 FF |
468 | return NF_ACCEPT; |
469 | ||
a908fdec FF |
470 | if (skb_try_make_writable(skb, sizeof(*ip6h))) |
471 | return NF_DROP; | |
472 | ||
28c5ed2f | 473 | if (nf_flow_nat_ipv6(flow, skb, dir) < 0) |
a908fdec FF |
474 | return NF_DROP; |
475 | ||
476 | flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT; | |
477 | ip6h = ipv6_hdr(skb); | |
478 | ip6h->hop_limit--; | |
479 | ||
480 | skb->dev = outdev; | |
481 | nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6); | |
2a79fd39 | 482 | skb_dst_set_noref(skb, &rt->dst); |
a908fdec FF |
483 | neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb); |
484 | ||
485 | return NF_STOLEN; | |
486 | } | |
487 | EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook); |