]>
Commit | Line | Data |
---|---|---|
7d208687 FF |
1 | #include <linux/kernel.h> |
2 | #include <linux/init.h> | |
3 | #include <linux/module.h> | |
4 | #include <linux/netfilter.h> | |
5 | #include <linux/rhashtable.h> | |
6 | #include <linux/ip.h> | |
a908fdec | 7 | #include <linux/ipv6.h> |
7d208687 FF |
8 | #include <linux/netdevice.h> |
9 | #include <net/ip.h> | |
a908fdec FF |
10 | #include <net/ipv6.h> |
11 | #include <net/ip6_route.h> | |
7d208687 FF |
12 | #include <net/neighbour.h> |
13 | #include <net/netfilter/nf_flow_table.h> | |
14 | /* For layer 4 checksum field offset. */ | |
15 | #include <linux/tcp.h> | |
16 | #include <linux/udp.h> | |
17 | ||
18 | static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff, | |
19 | __be32 addr, __be32 new_addr) | |
20 | { | |
21 | struct tcphdr *tcph; | |
22 | ||
23 | if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) || | |
24 | skb_try_make_writable(skb, thoff + sizeof(*tcph))) | |
25 | return -1; | |
26 | ||
27 | tcph = (void *)(skb_network_header(skb) + thoff); | |
28 | inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true); | |
29 | ||
30 | return 0; | |
31 | } | |
32 | ||
33 | static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff, | |
34 | __be32 addr, __be32 new_addr) | |
35 | { | |
36 | struct udphdr *udph; | |
37 | ||
38 | if (!pskb_may_pull(skb, thoff + sizeof(*udph)) || | |
39 | skb_try_make_writable(skb, thoff + sizeof(*udph))) | |
40 | return -1; | |
41 | ||
42 | udph = (void *)(skb_network_header(skb) + thoff); | |
43 | if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { | |
44 | inet_proto_csum_replace4(&udph->check, skb, addr, | |
45 | new_addr, true); | |
46 | if (!udph->check) | |
47 | udph->check = CSUM_MANGLED_0; | |
48 | } | |
49 | ||
50 | return 0; | |
51 | } | |
52 | ||
53 | static int nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph, | |
54 | unsigned int thoff, __be32 addr, | |
55 | __be32 new_addr) | |
56 | { | |
57 | switch (iph->protocol) { | |
58 | case IPPROTO_TCP: | |
59 | if (nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr) < 0) | |
60 | return NF_DROP; | |
61 | break; | |
62 | case IPPROTO_UDP: | |
63 | if (nf_flow_nat_ip_udp(skb, thoff, addr, new_addr) < 0) | |
64 | return NF_DROP; | |
65 | break; | |
66 | } | |
67 | ||
68 | return 0; | |
69 | } | |
70 | ||
71 | static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb, | |
72 | struct iphdr *iph, unsigned int thoff, | |
73 | enum flow_offload_tuple_dir dir) | |
74 | { | |
75 | __be32 addr, new_addr; | |
76 | ||
77 | switch (dir) { | |
78 | case FLOW_OFFLOAD_DIR_ORIGINAL: | |
79 | addr = iph->saddr; | |
80 | new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr; | |
81 | iph->saddr = new_addr; | |
82 | break; | |
83 | case FLOW_OFFLOAD_DIR_REPLY: | |
84 | addr = iph->daddr; | |
85 | new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr; | |
86 | iph->daddr = new_addr; | |
87 | break; | |
88 | default: | |
89 | return -1; | |
90 | } | |
91 | csum_replace4(&iph->check, addr, new_addr); | |
92 | ||
93 | return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr); | |
94 | } | |
95 | ||
96 | static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb, | |
97 | struct iphdr *iph, unsigned int thoff, | |
98 | enum flow_offload_tuple_dir dir) | |
99 | { | |
100 | __be32 addr, new_addr; | |
101 | ||
102 | switch (dir) { | |
103 | case FLOW_OFFLOAD_DIR_ORIGINAL: | |
104 | addr = iph->daddr; | |
105 | new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr; | |
106 | iph->daddr = new_addr; | |
107 | break; | |
108 | case FLOW_OFFLOAD_DIR_REPLY: | |
109 | addr = iph->saddr; | |
110 | new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr; | |
111 | iph->saddr = new_addr; | |
112 | break; | |
113 | default: | |
114 | return -1; | |
115 | } | |
116 | csum_replace4(&iph->check, addr, new_addr); | |
117 | ||
118 | return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr); | |
119 | } | |
120 | ||
121 | static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb, | |
122 | enum flow_offload_tuple_dir dir) | |
123 | { | |
124 | struct iphdr *iph = ip_hdr(skb); | |
125 | unsigned int thoff = iph->ihl * 4; | |
126 | ||
127 | if (flow->flags & FLOW_OFFLOAD_SNAT && | |
128 | (nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 || | |
129 | nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0)) | |
130 | return -1; | |
131 | if (flow->flags & FLOW_OFFLOAD_DNAT && | |
132 | (nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 || | |
133 | nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0)) | |
134 | return -1; | |
135 | ||
136 | return 0; | |
137 | } | |
138 | ||
139 | static bool ip_has_options(unsigned int thoff) | |
140 | { | |
141 | return thoff != sizeof(struct iphdr); | |
142 | } | |
143 | ||
144 | static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev, | |
145 | struct flow_offload_tuple *tuple) | |
146 | { | |
147 | struct flow_ports *ports; | |
148 | unsigned int thoff; | |
149 | struct iphdr *iph; | |
150 | ||
151 | if (!pskb_may_pull(skb, sizeof(*iph))) | |
152 | return -1; | |
153 | ||
154 | iph = ip_hdr(skb); | |
155 | thoff = iph->ihl * 4; | |
156 | ||
157 | if (ip_is_fragment(iph) || | |
158 | unlikely(ip_has_options(thoff))) | |
159 | return -1; | |
160 | ||
161 | if (iph->protocol != IPPROTO_TCP && | |
162 | iph->protocol != IPPROTO_UDP) | |
163 | return -1; | |
164 | ||
165 | thoff = iph->ihl * 4; | |
166 | if (!pskb_may_pull(skb, thoff + sizeof(*ports))) | |
167 | return -1; | |
168 | ||
169 | ports = (struct flow_ports *)(skb_network_header(skb) + thoff); | |
170 | ||
171 | tuple->src_v4.s_addr = iph->saddr; | |
172 | tuple->dst_v4.s_addr = iph->daddr; | |
173 | tuple->src_port = ports->source; | |
174 | tuple->dst_port = ports->dest; | |
175 | tuple->l3proto = AF_INET; | |
176 | tuple->l4proto = iph->protocol; | |
177 | tuple->iifidx = dev->ifindex; | |
178 | ||
179 | return 0; | |
180 | } | |
181 | ||
182 | /* Based on ip_exceeds_mtu(). */ | |
183 | static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) | |
184 | { | |
185 | if (skb->len <= mtu) | |
186 | return false; | |
187 | ||
7d208687 FF |
188 | if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu)) |
189 | return false; | |
190 | ||
191 | return true; | |
192 | } | |
193 | ||
194 | unsigned int | |
195 | nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, | |
196 | const struct nf_hook_state *state) | |
197 | { | |
198 | struct flow_offload_tuple_rhash *tuplehash; | |
199 | struct nf_flowtable *flow_table = priv; | |
200 | struct flow_offload_tuple tuple = {}; | |
201 | enum flow_offload_tuple_dir dir; | |
202 | struct flow_offload *flow; | |
203 | struct net_device *outdev; | |
204 | const struct rtable *rt; | |
205 | struct iphdr *iph; | |
206 | __be32 nexthop; | |
207 | ||
208 | if (skb->protocol != htons(ETH_P_IP)) | |
209 | return NF_ACCEPT; | |
210 | ||
211 | if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0) | |
212 | return NF_ACCEPT; | |
213 | ||
214 | tuplehash = flow_offload_lookup(flow_table, &tuple); | |
215 | if (tuplehash == NULL) | |
216 | return NF_ACCEPT; | |
217 | ||
218 | outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx); | |
219 | if (!outdev) | |
220 | return NF_ACCEPT; | |
221 | ||
222 | dir = tuplehash->tuple.dir; | |
223 | flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); | |
224 | rt = (const struct rtable *)flow->tuplehash[dir].tuple.dst_cache; | |
225 | ||
3aeb51d7 FF |
226 | if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)) && |
227 | (ip_hdr(skb)->frag_off & htons(IP_DF)) != 0) | |
7d208687 FF |
228 | return NF_ACCEPT; |
229 | ||
230 | if (skb_try_make_writable(skb, sizeof(*iph))) | |
231 | return NF_DROP; | |
232 | ||
233 | if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) && | |
234 | nf_flow_nat_ip(flow, skb, dir) < 0) | |
235 | return NF_DROP; | |
236 | ||
237 | flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT; | |
238 | iph = ip_hdr(skb); | |
239 | ip_decrease_ttl(iph); | |
240 | ||
241 | skb->dev = outdev; | |
242 | nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr); | |
243 | neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb); | |
244 | ||
245 | return NF_STOLEN; | |
246 | } | |
247 | EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook); | |
a908fdec FF |
248 | |
249 | static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff, | |
250 | struct in6_addr *addr, | |
251 | struct in6_addr *new_addr) | |
252 | { | |
253 | struct tcphdr *tcph; | |
254 | ||
255 | if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) || | |
256 | skb_try_make_writable(skb, thoff + sizeof(*tcph))) | |
257 | return -1; | |
258 | ||
259 | tcph = (void *)(skb_network_header(skb) + thoff); | |
260 | inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32, | |
261 | new_addr->s6_addr32, true); | |
262 | ||
263 | return 0; | |
264 | } | |
265 | ||
266 | static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff, | |
267 | struct in6_addr *addr, | |
268 | struct in6_addr *new_addr) | |
269 | { | |
270 | struct udphdr *udph; | |
271 | ||
272 | if (!pskb_may_pull(skb, thoff + sizeof(*udph)) || | |
273 | skb_try_make_writable(skb, thoff + sizeof(*udph))) | |
274 | return -1; | |
275 | ||
276 | udph = (void *)(skb_network_header(skb) + thoff); | |
277 | if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { | |
278 | inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32, | |
279 | new_addr->s6_addr32, true); | |
280 | if (!udph->check) | |
281 | udph->check = CSUM_MANGLED_0; | |
282 | } | |
283 | ||
284 | return 0; | |
285 | } | |
286 | ||
287 | static int nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h, | |
288 | unsigned int thoff, struct in6_addr *addr, | |
289 | struct in6_addr *new_addr) | |
290 | { | |
291 | switch (ip6h->nexthdr) { | |
292 | case IPPROTO_TCP: | |
293 | if (nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr) < 0) | |
294 | return NF_DROP; | |
295 | break; | |
296 | case IPPROTO_UDP: | |
297 | if (nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr) < 0) | |
298 | return NF_DROP; | |
299 | break; | |
300 | } | |
301 | ||
302 | return 0; | |
303 | } | |
304 | ||
305 | static int nf_flow_snat_ipv6(const struct flow_offload *flow, | |
306 | struct sk_buff *skb, struct ipv6hdr *ip6h, | |
307 | unsigned int thoff, | |
308 | enum flow_offload_tuple_dir dir) | |
309 | { | |
310 | struct in6_addr addr, new_addr; | |
311 | ||
312 | switch (dir) { | |
313 | case FLOW_OFFLOAD_DIR_ORIGINAL: | |
314 | addr = ip6h->saddr; | |
315 | new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6; | |
316 | ip6h->saddr = new_addr; | |
317 | break; | |
318 | case FLOW_OFFLOAD_DIR_REPLY: | |
319 | addr = ip6h->daddr; | |
320 | new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6; | |
321 | ip6h->daddr = new_addr; | |
322 | break; | |
323 | default: | |
324 | return -1; | |
325 | } | |
326 | ||
327 | return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr); | |
328 | } | |
329 | ||
330 | static int nf_flow_dnat_ipv6(const struct flow_offload *flow, | |
331 | struct sk_buff *skb, struct ipv6hdr *ip6h, | |
332 | unsigned int thoff, | |
333 | enum flow_offload_tuple_dir dir) | |
334 | { | |
335 | struct in6_addr addr, new_addr; | |
336 | ||
337 | switch (dir) { | |
338 | case FLOW_OFFLOAD_DIR_ORIGINAL: | |
339 | addr = ip6h->daddr; | |
340 | new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6; | |
341 | ip6h->daddr = new_addr; | |
342 | break; | |
343 | case FLOW_OFFLOAD_DIR_REPLY: | |
344 | addr = ip6h->saddr; | |
345 | new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6; | |
346 | ip6h->saddr = new_addr; | |
347 | break; | |
348 | default: | |
349 | return -1; | |
350 | } | |
351 | ||
352 | return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr); | |
353 | } | |
354 | ||
355 | static int nf_flow_nat_ipv6(const struct flow_offload *flow, | |
356 | struct sk_buff *skb, | |
357 | enum flow_offload_tuple_dir dir) | |
358 | { | |
359 | struct ipv6hdr *ip6h = ipv6_hdr(skb); | |
360 | unsigned int thoff = sizeof(*ip6h); | |
361 | ||
362 | if (flow->flags & FLOW_OFFLOAD_SNAT && | |
363 | (nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 || | |
364 | nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0)) | |
365 | return -1; | |
366 | if (flow->flags & FLOW_OFFLOAD_DNAT && | |
367 | (nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 || | |
368 | nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0)) | |
369 | return -1; | |
370 | ||
371 | return 0; | |
372 | } | |
373 | ||
374 | static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev, | |
375 | struct flow_offload_tuple *tuple) | |
376 | { | |
377 | struct flow_ports *ports; | |
378 | struct ipv6hdr *ip6h; | |
379 | unsigned int thoff; | |
380 | ||
381 | if (!pskb_may_pull(skb, sizeof(*ip6h))) | |
382 | return -1; | |
383 | ||
384 | ip6h = ipv6_hdr(skb); | |
385 | ||
386 | if (ip6h->nexthdr != IPPROTO_TCP && | |
387 | ip6h->nexthdr != IPPROTO_UDP) | |
388 | return -1; | |
389 | ||
390 | thoff = sizeof(*ip6h); | |
391 | if (!pskb_may_pull(skb, thoff + sizeof(*ports))) | |
392 | return -1; | |
393 | ||
394 | ports = (struct flow_ports *)(skb_network_header(skb) + thoff); | |
395 | ||
396 | tuple->src_v6 = ip6h->saddr; | |
397 | tuple->dst_v6 = ip6h->daddr; | |
398 | tuple->src_port = ports->source; | |
399 | tuple->dst_port = ports->dest; | |
400 | tuple->l3proto = AF_INET6; | |
401 | tuple->l4proto = ip6h->nexthdr; | |
402 | tuple->iifidx = dev->ifindex; | |
403 | ||
404 | return 0; | |
405 | } | |
406 | ||
407 | unsigned int | |
408 | nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, | |
409 | const struct nf_hook_state *state) | |
410 | { | |
411 | struct flow_offload_tuple_rhash *tuplehash; | |
412 | struct nf_flowtable *flow_table = priv; | |
413 | struct flow_offload_tuple tuple = {}; | |
414 | enum flow_offload_tuple_dir dir; | |
415 | struct flow_offload *flow; | |
416 | struct net_device *outdev; | |
417 | struct in6_addr *nexthop; | |
418 | struct ipv6hdr *ip6h; | |
419 | struct rt6_info *rt; | |
420 | ||
421 | if (skb->protocol != htons(ETH_P_IPV6)) | |
422 | return NF_ACCEPT; | |
423 | ||
424 | if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0) | |
425 | return NF_ACCEPT; | |
426 | ||
427 | tuplehash = flow_offload_lookup(flow_table, &tuple); | |
428 | if (tuplehash == NULL) | |
429 | return NF_ACCEPT; | |
430 | ||
431 | outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx); | |
432 | if (!outdev) | |
433 | return NF_ACCEPT; | |
434 | ||
435 | dir = tuplehash->tuple.dir; | |
436 | flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); | |
437 | rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache; | |
438 | ||
439 | if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu))) | |
440 | return NF_ACCEPT; | |
441 | ||
442 | if (skb_try_make_writable(skb, sizeof(*ip6h))) | |
443 | return NF_DROP; | |
444 | ||
445 | if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) && | |
446 | nf_flow_nat_ipv6(flow, skb, dir) < 0) | |
447 | return NF_DROP; | |
448 | ||
449 | flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT; | |
450 | ip6h = ipv6_hdr(skb); | |
451 | ip6h->hop_limit--; | |
452 | ||
453 | skb->dev = outdev; | |
454 | nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6); | |
455 | neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb); | |
456 | ||
457 | return NF_STOLEN; | |
458 | } | |
459 | EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook); |