]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blob - net/netfilter/ipvs/ip_vs_xmit.c
ipvs: no need to reroute anymore on DNAT over loopback
[mirror_ubuntu-zesty-kernel.git] / net / netfilter / ipvs / ip_vs_xmit.c
1 /*
2 * ip_vs_xmit.c: various packet transmitters for IPVS
3 *
4 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
5 * Julian Anastasov <ja@ssi.bg>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
12 * Changes:
13 *
14 * Description of forwarding methods:
15 * - all transmitters are called from LOCAL_IN (remote clients) and
16 * LOCAL_OUT (local clients) but for ICMP can be called from FORWARD
17 * - not all connections have destination server, for example,
18 * connections in backup server when fwmark is used
19 * - bypass connections use daddr from packet
20 * LOCAL_OUT rules:
21 * - skb->dev is NULL, skb->protocol is not set (both are set in POST_ROUTING)
22 * - skb->pkt_type is not set yet
23 * - the only place where we can see skb->sk != NULL
24 */
25
26 #define KMSG_COMPONENT "IPVS"
27 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
28
29 #include <linux/kernel.h>
30 #include <linux/slab.h>
31 #include <linux/tcp.h> /* for tcphdr */
32 #include <net/ip.h>
33 #include <net/tcp.h> /* for csum_tcpudp_magic */
34 #include <net/udp.h>
35 #include <net/icmp.h> /* for icmp_send */
36 #include <net/route.h> /* for ip_route_output */
37 #include <net/ipv6.h>
38 #include <net/ip6_route.h>
39 #include <net/addrconf.h>
40 #include <linux/icmpv6.h>
41 #include <linux/netfilter.h>
42 #include <linux/netfilter_ipv4.h>
43
44 #include <net/ip_vs.h>
45
46 enum {
47 IP_VS_RT_MODE_LOCAL = 1, /* Allow local dest */
48 IP_VS_RT_MODE_NON_LOCAL = 2, /* Allow non-local dest */
49 IP_VS_RT_MODE_RDR = 4, /* Allow redirect from remote daddr to
50 * local
51 */
52 IP_VS_RT_MODE_CONNECT = 8, /* Always bind route to saddr */
53 IP_VS_RT_MODE_KNOWN_NH = 16,/* Route via remote addr */
54 };
55
56 /*
57 * Destination cache to speed up outgoing route lookup
58 */
59 static inline void
60 __ip_vs_dst_set(struct ip_vs_dest *dest, struct dst_entry *dst, u32 dst_cookie)
61 {
62 struct dst_entry *old_dst;
63
64 old_dst = dest->dst_cache;
65 dest->dst_cache = dst;
66 dest->dst_cookie = dst_cookie;
67 dst_release(old_dst);
68 }
69
70 static inline struct dst_entry *
71 __ip_vs_dst_check(struct ip_vs_dest *dest)
72 {
73 struct dst_entry *dst = dest->dst_cache;
74
75 if (!dst)
76 return NULL;
77 if (dst->obsolete && dst->ops->check(dst, dest->dst_cookie) == NULL) {
78 dest->dst_cache = NULL;
79 dst_release(dst);
80 return NULL;
81 }
82 dst_hold(dst);
83 return dst;
84 }
85
86 static inline bool
87 __mtu_check_toobig_v6(const struct sk_buff *skb, u32 mtu)
88 {
89 if (IP6CB(skb)->frag_max_size) {
90 /* frag_max_size tell us that, this packet have been
91 * defragmented by netfilter IPv6 conntrack module.
92 */
93 if (IP6CB(skb)->frag_max_size > mtu)
94 return true; /* largest fragment violate MTU */
95 }
96 else if (skb->len > mtu && !skb_is_gso(skb)) {
97 return true; /* Packet size violate MTU size */
98 }
99 return false;
100 }
101
102 /* Get route to daddr, update *saddr, optionally bind route to saddr */
103 static struct rtable *do_output_route4(struct net *net, __be32 daddr,
104 int rt_mode, __be32 *saddr)
105 {
106 struct flowi4 fl4;
107 struct rtable *rt;
108 int loop = 0;
109
110 memset(&fl4, 0, sizeof(fl4));
111 fl4.daddr = daddr;
112 fl4.saddr = (rt_mode & IP_VS_RT_MODE_CONNECT) ? *saddr : 0;
113 fl4.flowi4_flags = (rt_mode & IP_VS_RT_MODE_KNOWN_NH) ?
114 FLOWI_FLAG_KNOWN_NH : 0;
115
116 retry:
117 rt = ip_route_output_key(net, &fl4);
118 if (IS_ERR(rt)) {
119 /* Invalid saddr ? */
120 if (PTR_ERR(rt) == -EINVAL && *saddr &&
121 rt_mode & IP_VS_RT_MODE_CONNECT && !loop) {
122 *saddr = 0;
123 flowi4_update_output(&fl4, 0, 0, daddr, 0);
124 goto retry;
125 }
126 IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &daddr);
127 return NULL;
128 } else if (!*saddr && rt_mode & IP_VS_RT_MODE_CONNECT && fl4.saddr) {
129 ip_rt_put(rt);
130 *saddr = fl4.saddr;
131 flowi4_update_output(&fl4, 0, 0, daddr, fl4.saddr);
132 loop++;
133 goto retry;
134 }
135 *saddr = fl4.saddr;
136 return rt;
137 }
138
139 /* Get route to destination or remote server */
140 static struct rtable *
141 __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
142 __be32 daddr, int rt_mode, __be32 *ret_saddr)
143 {
144 struct net *net = dev_net(skb_dst(skb)->dev);
145 struct rtable *rt; /* Route to the other host */
146 struct rtable *ort; /* Original route */
147 int local;
148
149 if (dest) {
150 spin_lock(&dest->dst_lock);
151 rt = (struct rtable *) __ip_vs_dst_check(dest);
152 if (!rt) {
153 rt = do_output_route4(net, dest->addr.ip, rt_mode,
154 &dest->dst_saddr.ip);
155 if (!rt) {
156 spin_unlock(&dest->dst_lock);
157 return NULL;
158 }
159 __ip_vs_dst_set(dest, dst_clone(&rt->dst), 0);
160 IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d\n",
161 &dest->addr.ip, &dest->dst_saddr.ip,
162 atomic_read(&rt->dst.__refcnt));
163 }
164 daddr = dest->addr.ip;
165 if (ret_saddr)
166 *ret_saddr = dest->dst_saddr.ip;
167 spin_unlock(&dest->dst_lock);
168 } else {
169 __be32 saddr = htonl(INADDR_ANY);
170
171 /* For such unconfigured boxes avoid many route lookups
172 * for performance reasons because we do not remember saddr
173 */
174 rt_mode &= ~IP_VS_RT_MODE_CONNECT;
175 rt = do_output_route4(net, daddr, rt_mode, &saddr);
176 if (!rt)
177 return NULL;
178 if (ret_saddr)
179 *ret_saddr = saddr;
180 }
181
182 local = rt->rt_flags & RTCF_LOCAL;
183 if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) &
184 rt_mode)) {
185 IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI4\n",
186 (rt->rt_flags & RTCF_LOCAL) ?
187 "local":"non-local", &daddr);
188 ip_rt_put(rt);
189 return NULL;
190 }
191 if (local && !(rt_mode & IP_VS_RT_MODE_RDR) &&
192 !((ort = skb_rtable(skb)) && ort->rt_flags & RTCF_LOCAL)) {
193 IP_VS_DBG_RL("Redirect from non-local address %pI4 to local "
194 "requires NAT method, dest: %pI4\n",
195 &ip_hdr(skb)->daddr, &daddr);
196 ip_rt_put(rt);
197 return NULL;
198 }
199 if (unlikely(!local && ipv4_is_loopback(ip_hdr(skb)->saddr))) {
200 IP_VS_DBG_RL("Stopping traffic from loopback address %pI4 "
201 "to non-local address, dest: %pI4\n",
202 &ip_hdr(skb)->saddr, &daddr);
203 ip_rt_put(rt);
204 return NULL;
205 }
206
207 return rt;
208 }
209
210 #ifdef CONFIG_IP_VS_IPV6
211
212 static inline int __ip_vs_is_local_route6(struct rt6_info *rt)
213 {
214 return rt->dst.dev && rt->dst.dev->flags & IFF_LOOPBACK;
215 }
216
217 static struct dst_entry *
218 __ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr,
219 struct in6_addr *ret_saddr, int do_xfrm)
220 {
221 struct dst_entry *dst;
222 struct flowi6 fl6 = {
223 .daddr = *daddr,
224 };
225
226 dst = ip6_route_output(net, NULL, &fl6);
227 if (dst->error)
228 goto out_err;
229 if (!ret_saddr)
230 return dst;
231 if (ipv6_addr_any(&fl6.saddr) &&
232 ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev,
233 &fl6.daddr, 0, &fl6.saddr) < 0)
234 goto out_err;
235 if (do_xfrm) {
236 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
237 if (IS_ERR(dst)) {
238 dst = NULL;
239 goto out_err;
240 }
241 }
242 *ret_saddr = fl6.saddr;
243 return dst;
244
245 out_err:
246 dst_release(dst);
247 IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n", daddr);
248 return NULL;
249 }
250
251 /*
252 * Get route to destination or remote server
253 */
254 static struct rt6_info *
255 __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
256 struct in6_addr *daddr, struct in6_addr *ret_saddr,
257 int do_xfrm, int rt_mode)
258 {
259 struct net *net = dev_net(skb_dst(skb)->dev);
260 struct rt6_info *rt; /* Route to the other host */
261 struct rt6_info *ort; /* Original route */
262 struct dst_entry *dst;
263 int local;
264
265 if (dest) {
266 spin_lock(&dest->dst_lock);
267 rt = (struct rt6_info *)__ip_vs_dst_check(dest);
268 if (!rt) {
269 u32 cookie;
270
271 dst = __ip_vs_route_output_v6(net, &dest->addr.in6,
272 &dest->dst_saddr.in6,
273 do_xfrm);
274 if (!dst) {
275 spin_unlock(&dest->dst_lock);
276 return NULL;
277 }
278 rt = (struct rt6_info *) dst;
279 cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
280 __ip_vs_dst_set(dest, dst_clone(&rt->dst), cookie);
281 IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n",
282 &dest->addr.in6, &dest->dst_saddr.in6,
283 atomic_read(&rt->dst.__refcnt));
284 }
285 if (ret_saddr)
286 *ret_saddr = dest->dst_saddr.in6;
287 spin_unlock(&dest->dst_lock);
288 } else {
289 dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm);
290 if (!dst)
291 return NULL;
292 rt = (struct rt6_info *) dst;
293 }
294
295 local = __ip_vs_is_local_route6(rt);
296 if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) &
297 rt_mode)) {
298 IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6c\n",
299 local ? "local":"non-local", daddr);
300 dst_release(&rt->dst);
301 return NULL;
302 }
303 if (local && !(rt_mode & IP_VS_RT_MODE_RDR) &&
304 !((ort = (struct rt6_info *) skb_dst(skb)) &&
305 __ip_vs_is_local_route6(ort))) {
306 IP_VS_DBG_RL("Redirect from non-local address %pI6c to local "
307 "requires NAT method, dest: %pI6c\n",
308 &ipv6_hdr(skb)->daddr, daddr);
309 dst_release(&rt->dst);
310 return NULL;
311 }
312 if (unlikely(!local && (!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
313 ipv6_addr_type(&ipv6_hdr(skb)->saddr) &
314 IPV6_ADDR_LOOPBACK)) {
315 IP_VS_DBG_RL("Stopping traffic from loopback address %pI6c "
316 "to non-local address, dest: %pI6c\n",
317 &ipv6_hdr(skb)->saddr, daddr);
318 dst_release(&rt->dst);
319 return NULL;
320 }
321
322 return rt;
323 }
324 #endif
325
326
327 /* return NF_ACCEPT to allow forwarding or other NF_xxx on error */
328 static inline int ip_vs_tunnel_xmit_prepare(struct sk_buff *skb,
329 struct ip_vs_conn *cp)
330 {
331 int ret = NF_ACCEPT;
332
333 skb->ipvs_property = 1;
334 if (unlikely(cp->flags & IP_VS_CONN_F_NFCT))
335 ret = ip_vs_confirm_conntrack(skb);
336 if (ret == NF_ACCEPT) {
337 nf_reset(skb);
338 skb_forward_csum(skb);
339 }
340 return ret;
341 }
342
343 /* return NF_STOLEN (sent) or NF_ACCEPT if local=1 (not sent) */
344 static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb,
345 struct ip_vs_conn *cp, int local)
346 {
347 int ret = NF_STOLEN;
348
349 skb->ipvs_property = 1;
350 if (likely(!(cp->flags & IP_VS_CONN_F_NFCT)))
351 ip_vs_notrack(skb);
352 else
353 ip_vs_update_conntrack(skb, cp, 1);
354 if (!local) {
355 skb_forward_csum(skb);
356 NF_HOOK(pf, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev,
357 dst_output);
358 } else
359 ret = NF_ACCEPT;
360 return ret;
361 }
362
363 /* return NF_STOLEN (sent) or NF_ACCEPT if local=1 (not sent) */
364 static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb,
365 struct ip_vs_conn *cp, int local)
366 {
367 int ret = NF_STOLEN;
368
369 skb->ipvs_property = 1;
370 if (likely(!(cp->flags & IP_VS_CONN_F_NFCT)))
371 ip_vs_notrack(skb);
372 if (!local) {
373 skb_forward_csum(skb);
374 NF_HOOK(pf, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev,
375 dst_output);
376 } else
377 ret = NF_ACCEPT;
378 return ret;
379 }
380
381
382 /*
383 * NULL transmitter (do nothing except return NF_ACCEPT)
384 */
385 int
386 ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
387 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
388 {
389 /* we do not touch skb and do not need pskb ptr */
390 return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1);
391 }
392
393
394 /*
395 * Bypass transmitter
396 * Let packets bypass the destination when the destination is not
397 * available, it may be only used in transparent cache cluster.
398 */
399 int
400 ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
401 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
402 {
403 struct rtable *rt; /* Route to the other host */
404 struct iphdr *iph = ip_hdr(skb);
405 int mtu;
406
407 EnterFunction(10);
408
409 rt = __ip_vs_get_out_rt(skb, NULL, iph->daddr, IP_VS_RT_MODE_NON_LOCAL,
410 NULL);
411 if (!rt)
412 goto tx_error_icmp;
413
414 /* MTU checking */
415 mtu = dst_mtu(&rt->dst);
416 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) &&
417 !skb_is_gso(skb)) {
418 ip_rt_put(rt);
419 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
420 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
421 goto tx_error;
422 }
423
424 /*
425 * Call ip_send_check because we are not sure it is called
426 * after ip_defrag. Is copy-on-write needed?
427 */
428 if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
429 ip_rt_put(rt);
430 return NF_STOLEN;
431 }
432 ip_send_check(ip_hdr(skb));
433
434 /* drop old route */
435 skb_dst_drop(skb);
436 skb_dst_set(skb, &rt->dst);
437
438 /* Another hack: avoid icmp_send in ip_fragment */
439 skb->local_df = 1;
440
441 ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0);
442
443 LeaveFunction(10);
444 return NF_STOLEN;
445
446 tx_error_icmp:
447 dst_link_failure(skb);
448 tx_error:
449 kfree_skb(skb);
450 LeaveFunction(10);
451 return NF_STOLEN;
452 }
453
454 #ifdef CONFIG_IP_VS_IPV6
455 int
456 ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
457 struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph)
458 {
459 struct rt6_info *rt; /* Route to the other host */
460 int mtu;
461
462 EnterFunction(10);
463
464 rt = __ip_vs_get_out_rt_v6(skb, NULL, &iph->daddr.in6, NULL, 0,
465 IP_VS_RT_MODE_NON_LOCAL);
466 if (!rt)
467 goto tx_error_icmp;
468
469 /* MTU checking */
470 mtu = dst_mtu(&rt->dst);
471 if (__mtu_check_toobig_v6(skb, mtu)) {
472 if (!skb->dev) {
473 struct net *net = dev_net(skb_dst(skb)->dev);
474
475 skb->dev = net->loopback_dev;
476 }
477 /* only send ICMP too big on first fragment */
478 if (!iph->fragoffs)
479 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
480 dst_release(&rt->dst);
481 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
482 goto tx_error;
483 }
484
485 /*
486 * Call ip_send_check because we are not sure it is called
487 * after ip_defrag. Is copy-on-write needed?
488 */
489 skb = skb_share_check(skb, GFP_ATOMIC);
490 if (unlikely(skb == NULL)) {
491 dst_release(&rt->dst);
492 return NF_STOLEN;
493 }
494
495 /* drop old route */
496 skb_dst_drop(skb);
497 skb_dst_set(skb, &rt->dst);
498
499 /* Another hack: avoid icmp_send in ip_fragment */
500 skb->local_df = 1;
501
502 ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0);
503
504 LeaveFunction(10);
505 return NF_STOLEN;
506
507 tx_error_icmp:
508 dst_link_failure(skb);
509 tx_error:
510 kfree_skb(skb);
511 LeaveFunction(10);
512 return NF_STOLEN;
513 }
514 #endif
515
516 /*
517 * NAT transmitter (only for outside-to-inside nat forwarding)
518 * Not used for related ICMP
519 */
520 int
521 ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
522 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
523 {
524 struct rtable *rt; /* Route to the other host */
525 int mtu;
526 struct iphdr *iph = ip_hdr(skb);
527 int local, rc;
528
529 EnterFunction(10);
530
531 /* check if it is a connection of no-client-port */
532 if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
533 __be16 _pt, *p;
534 p = skb_header_pointer(skb, iph->ihl*4, sizeof(_pt), &_pt);
535 if (p == NULL)
536 goto tx_error;
537 ip_vs_conn_fill_cport(cp, *p);
538 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
539 }
540
541 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
542 IP_VS_RT_MODE_LOCAL |
543 IP_VS_RT_MODE_NON_LOCAL |
544 IP_VS_RT_MODE_RDR, NULL)))
545 goto tx_error_icmp;
546 local = rt->rt_flags & RTCF_LOCAL;
547 /*
548 * Avoid duplicate tuple in reply direction for NAT traffic
549 * to local address when connection is sync-ed
550 */
551 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
552 if (cp->flags & IP_VS_CONN_F_SYNC && local) {
553 enum ip_conntrack_info ctinfo;
554 struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
555
556 if (ct && !nf_ct_is_untracked(ct)) {
557 IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, 0,
558 "ip_vs_nat_xmit(): "
559 "stopping DNAT to local address");
560 goto tx_error_put;
561 }
562 }
563 #endif
564
565 /* From world but DNAT to loopback address? */
566 if (local && ipv4_is_loopback(cp->daddr.ip) &&
567 rt_is_input_route(skb_rtable(skb))) {
568 IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): "
569 "stopping DNAT to loopback address");
570 goto tx_error_put;
571 }
572
573 /* MTU checking */
574 mtu = dst_mtu(&rt->dst);
575 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) &&
576 !skb_is_gso(skb)) {
577 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
578 IP_VS_DBG_RL_PKT(0, AF_INET, pp, skb, 0,
579 "ip_vs_nat_xmit(): frag needed for");
580 goto tx_error_put;
581 }
582
583 /* copy-on-write the packet before mangling it */
584 if (!skb_make_writable(skb, sizeof(struct iphdr)))
585 goto tx_error_put;
586
587 if (skb_cow(skb, rt->dst.dev->hard_header_len))
588 goto tx_error_put;
589
590 /* mangle the packet */
591 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, ipvsh))
592 goto tx_error_put;
593 ip_hdr(skb)->daddr = cp->daddr.ip;
594 ip_send_check(ip_hdr(skb));
595
596 if (!local) {
597 /* drop old route */
598 skb_dst_drop(skb);
599 skb_dst_set(skb, &rt->dst);
600 } else
601 ip_rt_put(rt);
602
603 IP_VS_DBG_PKT(10, AF_INET, pp, skb, 0, "After DNAT");
604
605 /* FIXME: when application helper enlarges the packet and the length
606 is larger than the MTU of outgoing device, there will be still
607 MTU problem. */
608
609 /* Another hack: avoid icmp_send in ip_fragment */
610 skb->local_df = 1;
611
612 rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local);
613
614 LeaveFunction(10);
615 return rc;
616
617 tx_error_icmp:
618 dst_link_failure(skb);
619 tx_error:
620 kfree_skb(skb);
621 LeaveFunction(10);
622 return NF_STOLEN;
623 tx_error_put:
624 ip_rt_put(rt);
625 goto tx_error;
626 }
627
628 #ifdef CONFIG_IP_VS_IPV6
629 int
630 ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
631 struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph)
632 {
633 struct rt6_info *rt; /* Route to the other host */
634 int mtu;
635 int local, rc;
636
637 EnterFunction(10);
638
639 /* check if it is a connection of no-client-port */
640 if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT && !iph->fragoffs)) {
641 __be16 _pt, *p;
642 p = skb_header_pointer(skb, iph->len, sizeof(_pt), &_pt);
643 if (p == NULL)
644 goto tx_error;
645 ip_vs_conn_fill_cport(cp, *p);
646 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
647 }
648
649 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
650 0, (IP_VS_RT_MODE_LOCAL |
651 IP_VS_RT_MODE_NON_LOCAL |
652 IP_VS_RT_MODE_RDR))))
653 goto tx_error_icmp;
654 local = __ip_vs_is_local_route6(rt);
655 /*
656 * Avoid duplicate tuple in reply direction for NAT traffic
657 * to local address when connection is sync-ed
658 */
659 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
660 if (cp->flags & IP_VS_CONN_F_SYNC && local) {
661 enum ip_conntrack_info ctinfo;
662 struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
663
664 if (ct && !nf_ct_is_untracked(ct)) {
665 IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, 0,
666 "ip_vs_nat_xmit_v6(): "
667 "stopping DNAT to local address");
668 goto tx_error_put;
669 }
670 }
671 #endif
672
673 /* From world but DNAT to loopback address? */
674 if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) &&
675 ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) {
676 IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, 0,
677 "ip_vs_nat_xmit_v6(): "
678 "stopping DNAT to loopback address");
679 goto tx_error_put;
680 }
681
682 /* MTU checking */
683 mtu = dst_mtu(&rt->dst);
684 if (__mtu_check_toobig_v6(skb, mtu)) {
685 if (!skb->dev) {
686 struct net *net = dev_net(skb_dst(skb)->dev);
687
688 skb->dev = net->loopback_dev;
689 }
690 /* only send ICMP too big on first fragment */
691 if (!iph->fragoffs)
692 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
693 IP_VS_DBG_RL_PKT(0, AF_INET6, pp, skb, 0,
694 "ip_vs_nat_xmit_v6(): frag needed for");
695 goto tx_error_put;
696 }
697
698 /* copy-on-write the packet before mangling it */
699 if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
700 goto tx_error_put;
701
702 if (skb_cow(skb, rt->dst.dev->hard_header_len))
703 goto tx_error_put;
704
705 /* mangle the packet */
706 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, iph))
707 goto tx_error;
708 ipv6_hdr(skb)->daddr = cp->daddr.in6;
709
710 if (!local || !skb->dev) {
711 /* drop the old route when skb is not shared */
712 skb_dst_drop(skb);
713 skb_dst_set(skb, &rt->dst);
714 } else {
715 /* destined to loopback, do we need to change route? */
716 dst_release(&rt->dst);
717 }
718
719 IP_VS_DBG_PKT(10, AF_INET6, pp, skb, 0, "After DNAT");
720
721 /* FIXME: when application helper enlarges the packet and the length
722 is larger than the MTU of outgoing device, there will be still
723 MTU problem. */
724
725 /* Another hack: avoid icmp_send in ip_fragment */
726 skb->local_df = 1;
727
728 rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local);
729
730 LeaveFunction(10);
731 return rc;
732
733 tx_error_icmp:
734 dst_link_failure(skb);
735 tx_error:
736 LeaveFunction(10);
737 kfree_skb(skb);
738 return NF_STOLEN;
739 tx_error_put:
740 dst_release(&rt->dst);
741 goto tx_error;
742 }
743 #endif
744
745
746 /*
747 * IP Tunneling transmitter
748 *
749 * This function encapsulates the packet in a new IP packet, its
750 * destination will be set to cp->daddr. Most code of this function
751 * is taken from ipip.c.
752 *
753 * It is used in VS/TUN cluster. The load balancer selects a real
754 * server from a cluster based on a scheduling algorithm,
755 * encapsulates the request packet and forwards it to the selected
756 * server. For example, all real servers are configured with
757 * "ifconfig tunl0 <Virtual IP Address> up". When the server receives
758 * the encapsulated packet, it will decapsulate the packet, processe
759 * the request and return the response packets directly to the client
760 * without passing the load balancer. This can greatly increase the
761 * scalability of virtual server.
762 *
763 * Used for ANY protocol
764 */
765 int
766 ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
767 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
768 {
769 struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
770 struct rtable *rt; /* Route to the other host */
771 __be32 saddr; /* Source for tunnel */
772 struct net_device *tdev; /* Device to other host */
773 struct iphdr *old_iph = ip_hdr(skb);
774 u8 tos = old_iph->tos;
775 __be16 df;
776 struct iphdr *iph; /* Our new IP header */
777 unsigned int max_headroom; /* The extra header space needed */
778 int mtu;
779 int ret;
780
781 EnterFunction(10);
782
783 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
784 IP_VS_RT_MODE_LOCAL |
785 IP_VS_RT_MODE_NON_LOCAL |
786 IP_VS_RT_MODE_CONNECT, &saddr)))
787 goto tx_error_icmp;
788 if (rt->rt_flags & RTCF_LOCAL) {
789 ip_rt_put(rt);
790 return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1);
791 }
792
793 tdev = rt->dst.dev;
794
795 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
796 if (mtu < 68) {
797 IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
798 goto tx_error_put;
799 }
800 if (rt_is_output_route(skb_rtable(skb)))
801 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
802
803 /* Copy DF, reset fragment offset and MF */
804 df = sysctl_pmtu_disc(ipvs) ? old_iph->frag_off & htons(IP_DF) : 0;
805
806 if (df && mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb)) {
807 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
808 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
809 goto tx_error_put;
810 }
811
812 /*
813 * Okay, now see if we can stuff it in the buffer as-is.
814 */
815 max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr);
816
817 if (skb_headroom(skb) < max_headroom
818 || skb_cloned(skb) || skb_shared(skb)) {
819 struct sk_buff *new_skb =
820 skb_realloc_headroom(skb, max_headroom);
821 if (!new_skb) {
822 ip_rt_put(rt);
823 kfree_skb(skb);
824 IP_VS_ERR_RL("%s(): no memory\n", __func__);
825 return NF_STOLEN;
826 }
827 consume_skb(skb);
828 skb = new_skb;
829 old_iph = ip_hdr(skb);
830 }
831
832 skb->transport_header = skb->network_header;
833
834 /* fix old IP header checksum */
835 ip_send_check(old_iph);
836
837 skb_push(skb, sizeof(struct iphdr));
838 skb_reset_network_header(skb);
839 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
840
841 /* drop old route */
842 skb_dst_drop(skb);
843 skb_dst_set(skb, &rt->dst);
844
845 /*
846 * Push down and install the IPIP header.
847 */
848 iph = ip_hdr(skb);
849 iph->version = 4;
850 iph->ihl = sizeof(struct iphdr)>>2;
851 iph->frag_off = df;
852 iph->protocol = IPPROTO_IPIP;
853 iph->tos = tos;
854 iph->daddr = cp->daddr.ip;
855 iph->saddr = saddr;
856 iph->ttl = old_iph->ttl;
857 ip_select_ident(iph, &rt->dst, NULL);
858
859 /* Another hack: avoid icmp_send in ip_fragment */
860 skb->local_df = 1;
861
862 ret = ip_vs_tunnel_xmit_prepare(skb, cp);
863 if (ret == NF_ACCEPT)
864 ip_local_out(skb);
865 else if (ret == NF_DROP)
866 kfree_skb(skb);
867
868 LeaveFunction(10);
869
870 return NF_STOLEN;
871
872 tx_error_icmp:
873 dst_link_failure(skb);
874 tx_error:
875 kfree_skb(skb);
876 LeaveFunction(10);
877 return NF_STOLEN;
878 tx_error_put:
879 ip_rt_put(rt);
880 goto tx_error;
881 }
882
883 #ifdef CONFIG_IP_VS_IPV6
884 int
885 ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
886 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
887 {
888 struct rt6_info *rt; /* Route to the other host */
889 struct in6_addr saddr; /* Source for tunnel */
890 struct net_device *tdev; /* Device to other host */
891 struct ipv6hdr *old_iph = ipv6_hdr(skb);
892 struct ipv6hdr *iph; /* Our new IP header */
893 unsigned int max_headroom; /* The extra header space needed */
894 int mtu;
895 int ret;
896
897 EnterFunction(10);
898
899 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6,
900 &saddr, 1, (IP_VS_RT_MODE_LOCAL |
901 IP_VS_RT_MODE_NON_LOCAL))))
902 goto tx_error_icmp;
903 if (__ip_vs_is_local_route6(rt)) {
904 dst_release(&rt->dst);
905 return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1);
906 }
907
908 tdev = rt->dst.dev;
909
910 mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr);
911 if (mtu < IPV6_MIN_MTU) {
912 IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__,
913 IPV6_MIN_MTU);
914 goto tx_error_put;
915 }
916 if (skb_dst(skb))
917 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
918
919 /* MTU checking: Notice that 'mtu' have been adjusted before hand */
920 if (__mtu_check_toobig_v6(skb, mtu)) {
921 if (!skb->dev) {
922 struct net *net = dev_net(skb_dst(skb)->dev);
923
924 skb->dev = net->loopback_dev;
925 }
926 /* only send ICMP too big on first fragment */
927 if (!ipvsh->fragoffs)
928 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
929 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
930 goto tx_error_put;
931 }
932
933 /*
934 * Okay, now see if we can stuff it in the buffer as-is.
935 */
936 max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr);
937
938 if (skb_headroom(skb) < max_headroom
939 || skb_cloned(skb) || skb_shared(skb)) {
940 struct sk_buff *new_skb =
941 skb_realloc_headroom(skb, max_headroom);
942 if (!new_skb) {
943 dst_release(&rt->dst);
944 kfree_skb(skb);
945 IP_VS_ERR_RL("%s(): no memory\n", __func__);
946 return NF_STOLEN;
947 }
948 consume_skb(skb);
949 skb = new_skb;
950 old_iph = ipv6_hdr(skb);
951 }
952
953 skb->transport_header = skb->network_header;
954
955 skb_push(skb, sizeof(struct ipv6hdr));
956 skb_reset_network_header(skb);
957 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
958
959 /* drop old route */
960 skb_dst_drop(skb);
961 skb_dst_set(skb, &rt->dst);
962
963 /*
964 * Push down and install the IPIP header.
965 */
966 iph = ipv6_hdr(skb);
967 iph->version = 6;
968 iph->nexthdr = IPPROTO_IPV6;
969 iph->payload_len = old_iph->payload_len;
970 be16_add_cpu(&iph->payload_len, sizeof(*old_iph));
971 iph->priority = old_iph->priority;
972 memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl));
973 iph->daddr = cp->daddr.in6;
974 iph->saddr = saddr;
975 iph->hop_limit = old_iph->hop_limit;
976
977 /* Another hack: avoid icmp_send in ip_fragment */
978 skb->local_df = 1;
979
980 ret = ip_vs_tunnel_xmit_prepare(skb, cp);
981 if (ret == NF_ACCEPT)
982 ip6_local_out(skb);
983 else if (ret == NF_DROP)
984 kfree_skb(skb);
985
986 LeaveFunction(10);
987
988 return NF_STOLEN;
989
990 tx_error_icmp:
991 dst_link_failure(skb);
992 tx_error:
993 kfree_skb(skb);
994 LeaveFunction(10);
995 return NF_STOLEN;
996 tx_error_put:
997 dst_release(&rt->dst);
998 goto tx_error;
999 }
1000 #endif
1001
1002
1003 /*
1004 * Direct Routing transmitter
1005 * Used for ANY protocol
1006 */
1007 int
1008 ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1009 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
1010 {
1011 struct rtable *rt; /* Route to the other host */
1012 struct iphdr *iph = ip_hdr(skb);
1013 int mtu;
1014
1015 EnterFunction(10);
1016
1017 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
1018 IP_VS_RT_MODE_LOCAL |
1019 IP_VS_RT_MODE_NON_LOCAL |
1020 IP_VS_RT_MODE_KNOWN_NH, NULL)))
1021 goto tx_error_icmp;
1022 if (rt->rt_flags & RTCF_LOCAL) {
1023 ip_rt_put(rt);
1024 return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1);
1025 }
1026
1027 /* MTU checking */
1028 mtu = dst_mtu(&rt->dst);
1029 if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu &&
1030 !skb_is_gso(skb)) {
1031 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
1032 ip_rt_put(rt);
1033 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
1034 goto tx_error;
1035 }
1036
1037 /*
1038 * Call ip_send_check because we are not sure it is called
1039 * after ip_defrag. Is copy-on-write needed?
1040 */
1041 if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
1042 ip_rt_put(rt);
1043 return NF_STOLEN;
1044 }
1045 ip_send_check(ip_hdr(skb));
1046
1047 /* drop old route */
1048 skb_dst_drop(skb);
1049 skb_dst_set(skb, &rt->dst);
1050
1051 /* Another hack: avoid icmp_send in ip_fragment */
1052 skb->local_df = 1;
1053
1054 ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0);
1055
1056 LeaveFunction(10);
1057 return NF_STOLEN;
1058
1059 tx_error_icmp:
1060 dst_link_failure(skb);
1061 tx_error:
1062 kfree_skb(skb);
1063 LeaveFunction(10);
1064 return NF_STOLEN;
1065 }
1066
1067 #ifdef CONFIG_IP_VS_IPV6
1068 int
1069 ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1070 struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph)
1071 {
1072 struct rt6_info *rt; /* Route to the other host */
1073 int mtu;
1074
1075 EnterFunction(10);
1076
1077 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
1078 0, (IP_VS_RT_MODE_LOCAL |
1079 IP_VS_RT_MODE_NON_LOCAL))))
1080 goto tx_error_icmp;
1081 if (__ip_vs_is_local_route6(rt)) {
1082 dst_release(&rt->dst);
1083 return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1);
1084 }
1085
1086 /* MTU checking */
1087 mtu = dst_mtu(&rt->dst);
1088 if (__mtu_check_toobig_v6(skb, mtu)) {
1089 if (!skb->dev) {
1090 struct net *net = dev_net(skb_dst(skb)->dev);
1091
1092 skb->dev = net->loopback_dev;
1093 }
1094 /* only send ICMP too big on first fragment */
1095 if (!iph->fragoffs)
1096 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1097 dst_release(&rt->dst);
1098 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
1099 goto tx_error;
1100 }
1101
1102 /*
1103 * Call ip_send_check because we are not sure it is called
1104 * after ip_defrag. Is copy-on-write needed?
1105 */
1106 skb = skb_share_check(skb, GFP_ATOMIC);
1107 if (unlikely(skb == NULL)) {
1108 dst_release(&rt->dst);
1109 return NF_STOLEN;
1110 }
1111
1112 /* drop old route */
1113 skb_dst_drop(skb);
1114 skb_dst_set(skb, &rt->dst);
1115
1116 /* Another hack: avoid icmp_send in ip_fragment */
1117 skb->local_df = 1;
1118
1119 ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0);
1120
1121 LeaveFunction(10);
1122 return NF_STOLEN;
1123
1124 tx_error_icmp:
1125 dst_link_failure(skb);
1126 tx_error:
1127 kfree_skb(skb);
1128 LeaveFunction(10);
1129 return NF_STOLEN;
1130 }
1131 #endif
1132
1133
1134 /*
1135 * ICMP packet transmitter
1136 * called by the ip_vs_in_icmp
1137 */
1138 int
1139 ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1140 struct ip_vs_protocol *pp, int offset, unsigned int hooknum,
1141 struct ip_vs_iphdr *iph)
1142 {
1143 struct rtable *rt; /* Route to the other host */
1144 int mtu;
1145 int rc;
1146 int local;
1147 int rt_mode;
1148
1149 EnterFunction(10);
1150
1151 /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
1152 forwarded directly here, because there is no need to
1153 translate address/port back */
1154 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
1155 if (cp->packet_xmit)
1156 rc = cp->packet_xmit(skb, cp, pp, iph);
1157 else
1158 rc = NF_ACCEPT;
1159 /* do not touch skb anymore */
1160 atomic_inc(&cp->in_pkts);
1161 goto out;
1162 }
1163
1164 /*
1165 * mangle and send the packet here (only for VS/NAT)
1166 */
1167
1168 /* LOCALNODE from FORWARD hook is not supported */
1169 rt_mode = (hooknum != NF_INET_FORWARD) ?
1170 IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
1171 IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
1172 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
1173 rt_mode, NULL)))
1174 goto tx_error_icmp;
1175 local = rt->rt_flags & RTCF_LOCAL;
1176
1177 /*
1178 * Avoid duplicate tuple in reply direction for NAT traffic
1179 * to local address when connection is sync-ed
1180 */
1181 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
1182 if (cp->flags & IP_VS_CONN_F_SYNC && local) {
1183 enum ip_conntrack_info ctinfo;
1184 struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
1185
1186 if (ct && !nf_ct_is_untracked(ct)) {
1187 IP_VS_DBG(10, "%s(): "
1188 "stopping DNAT to local address %pI4\n",
1189 __func__, &cp->daddr.ip);
1190 goto tx_error_put;
1191 }
1192 }
1193 #endif
1194
1195 /* From world but DNAT to loopback address? */
1196 if (local && ipv4_is_loopback(cp->daddr.ip) &&
1197 rt_is_input_route(skb_rtable(skb))) {
1198 IP_VS_DBG(1, "%s(): "
1199 "stopping DNAT to loopback %pI4\n",
1200 __func__, &cp->daddr.ip);
1201 goto tx_error_put;
1202 }
1203
1204 /* MTU checking */
1205 mtu = dst_mtu(&rt->dst);
1206 if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF)) &&
1207 !skb_is_gso(skb)) {
1208 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
1209 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
1210 goto tx_error_put;
1211 }
1212
1213 /* copy-on-write the packet before mangling it */
1214 if (!skb_make_writable(skb, offset))
1215 goto tx_error_put;
1216
1217 if (skb_cow(skb, rt->dst.dev->hard_header_len))
1218 goto tx_error_put;
1219
1220 ip_vs_nat_icmp(skb, pp, cp, 0);
1221
1222 if (!local) {
1223 /* drop the old route when skb is not shared */
1224 skb_dst_drop(skb);
1225 skb_dst_set(skb, &rt->dst);
1226 } else
1227 ip_rt_put(rt);
1228
1229 /* Another hack: avoid icmp_send in ip_fragment */
1230 skb->local_df = 1;
1231
1232 rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local);
1233 goto out;
1234
1235 tx_error_icmp:
1236 dst_link_failure(skb);
1237 tx_error:
1238 dev_kfree_skb(skb);
1239 rc = NF_STOLEN;
1240 out:
1241 LeaveFunction(10);
1242 return rc;
1243 tx_error_put:
1244 ip_rt_put(rt);
1245 goto tx_error;
1246 }
1247
1248 #ifdef CONFIG_IP_VS_IPV6
1249 int
1250 ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1251 struct ip_vs_protocol *pp, int offset, unsigned int hooknum,
1252 struct ip_vs_iphdr *iph)
1253 {
1254 struct rt6_info *rt; /* Route to the other host */
1255 int mtu;
1256 int rc;
1257 int local;
1258 int rt_mode;
1259
1260 EnterFunction(10);
1261
1262 /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
1263 forwarded directly here, because there is no need to
1264 translate address/port back */
1265 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
1266 if (cp->packet_xmit)
1267 rc = cp->packet_xmit(skb, cp, pp, iph);
1268 else
1269 rc = NF_ACCEPT;
1270 /* do not touch skb anymore */
1271 atomic_inc(&cp->in_pkts);
1272 goto out;
1273 }
1274
1275 /*
1276 * mangle and send the packet here (only for VS/NAT)
1277 */
1278
1279 /* LOCALNODE from FORWARD hook is not supported */
1280 rt_mode = (hooknum != NF_INET_FORWARD) ?
1281 IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
1282 IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
1283 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
1284 0, rt_mode)))
1285 goto tx_error_icmp;
1286
1287 local = __ip_vs_is_local_route6(rt);
1288 /*
1289 * Avoid duplicate tuple in reply direction for NAT traffic
1290 * to local address when connection is sync-ed
1291 */
1292 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
1293 if (cp->flags & IP_VS_CONN_F_SYNC && local) {
1294 enum ip_conntrack_info ctinfo;
1295 struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
1296
1297 if (ct && !nf_ct_is_untracked(ct)) {
1298 IP_VS_DBG(10, "%s(): "
1299 "stopping DNAT to local address %pI6\n",
1300 __func__, &cp->daddr.in6);
1301 goto tx_error_put;
1302 }
1303 }
1304 #endif
1305
1306 /* From world but DNAT to loopback address? */
1307 if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) &&
1308 ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) {
1309 IP_VS_DBG(1, "%s(): "
1310 "stopping DNAT to loopback %pI6\n",
1311 __func__, &cp->daddr.in6);
1312 goto tx_error_put;
1313 }
1314
1315 /* MTU checking */
1316 mtu = dst_mtu(&rt->dst);
1317 if (__mtu_check_toobig_v6(skb, mtu)) {
1318 if (!skb->dev) {
1319 struct net *net = dev_net(skb_dst(skb)->dev);
1320
1321 skb->dev = net->loopback_dev;
1322 }
1323 /* only send ICMP too big on first fragment */
1324 if (!iph->fragoffs)
1325 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1326 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
1327 goto tx_error_put;
1328 }
1329
1330 /* copy-on-write the packet before mangling it */
1331 if (!skb_make_writable(skb, offset))
1332 goto tx_error_put;
1333
1334 if (skb_cow(skb, rt->dst.dev->hard_header_len))
1335 goto tx_error_put;
1336
1337 ip_vs_nat_icmp_v6(skb, pp, cp, 0);
1338
1339 if (!local || !skb->dev) {
1340 /* drop the old route when skb is not shared */
1341 skb_dst_drop(skb);
1342 skb_dst_set(skb, &rt->dst);
1343 } else {
1344 /* destined to loopback, do we need to change route? */
1345 dst_release(&rt->dst);
1346 }
1347
1348 /* Another hack: avoid icmp_send in ip_fragment */
1349 skb->local_df = 1;
1350
1351 rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local);
1352 goto out;
1353
1354 tx_error_icmp:
1355 dst_link_failure(skb);
1356 tx_error:
1357 dev_kfree_skb(skb);
1358 rc = NF_STOLEN;
1359 out:
1360 LeaveFunction(10);
1361 return rc;
1362 tx_error_put:
1363 dst_release(&rt->dst);
1364 goto tx_error;
1365 }
1366 #endif