]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blame - net/netfilter/ipvs/ip_vs_xmit.c
ipv4: fix path MTU discovery with connection tracking
[mirror_ubuntu-zesty-kernel.git] / net / netfilter / ipvs / ip_vs_xmit.c
CommitLineData
1da177e4
LT
1/*
2 * ip_vs_xmit.c: various packet transmitters for IPVS
3 *
1da177e4
LT
4 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
5 * Julian Anastasov <ja@ssi.bg>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
12 * Changes:
13 *
cb59155f
JA
14 * Description of forwarding methods:
15 * - all transmitters are called from LOCAL_IN (remote clients) and
16 * LOCAL_OUT (local clients) but for ICMP can be called from FORWARD
17 * - not all connections have destination server, for example,
18 * connections in backup server when fwmark is used
19 * - bypass connections use daddr from packet
20 * LOCAL_OUT rules:
21 * - skb->dev is NULL, skb->protocol is not set (both are set in POST_ROUTING)
22 * - skb->pkt_type is not set yet
23 * - the only place where we can see skb->sk != NULL
1da177e4
LT
24 */
25
9aada7ac
HE
26#define KMSG_COMPONENT "IPVS"
27#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
28
1da177e4 29#include <linux/kernel.h>
5a0e3ad6 30#include <linux/slab.h>
1da177e4 31#include <linux/tcp.h> /* for tcphdr */
c439cb2e 32#include <net/ip.h>
1da177e4
LT
33#include <net/tcp.h> /* for csum_tcpudp_magic */
34#include <net/udp.h>
35#include <net/icmp.h> /* for icmp_send */
36#include <net/route.h> /* for ip_route_output */
38cdcc9a
JV
37#include <net/ipv6.h>
38#include <net/ip6_route.h>
714f095f 39#include <net/addrconf.h>
38cdcc9a 40#include <linux/icmpv6.h>
1da177e4
LT
41#include <linux/netfilter.h>
42#include <linux/netfilter_ipv4.h>
43
44#include <net/ip_vs.h>
45
17a8f8e3
CG
46enum {
47 IP_VS_RT_MODE_LOCAL = 1, /* Allow local dest */
48 IP_VS_RT_MODE_NON_LOCAL = 2, /* Allow non-local dest */
49 IP_VS_RT_MODE_RDR = 4, /* Allow redirect from remote daddr to
50 * local
51 */
f2edb9f7 52 IP_VS_RT_MODE_CONNECT = 8, /* Always bind route to saddr */
17a8f8e3 53};
1da177e4
LT
54
55/*
56 * Destination cache to speed up outgoing route lookup
57 */
58static inline void
714f095f
HS
59__ip_vs_dst_set(struct ip_vs_dest *dest, u32 rtos, struct dst_entry *dst,
60 u32 dst_cookie)
1da177e4
LT
61{
62 struct dst_entry *old_dst;
63
64 old_dst = dest->dst_cache;
65 dest->dst_cache = dst;
66 dest->dst_rtos = rtos;
714f095f 67 dest->dst_cookie = dst_cookie;
1da177e4
LT
68 dst_release(old_dst);
69}
70
71static inline struct dst_entry *
714f095f 72__ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos)
1da177e4
LT
73{
74 struct dst_entry *dst = dest->dst_cache;
75
76 if (!dst)
77 return NULL;
714f095f
HS
78 if ((dst->obsolete || rtos != dest->dst_rtos) &&
79 dst->ops->check(dst, dest->dst_cookie) == NULL) {
1da177e4
LT
80 dest->dst_cache = NULL;
81 dst_release(dst);
82 return NULL;
83 }
84 dst_hold(dst);
85 return dst;
86}
87
f2edb9f7
JA
88/* Get route to daddr, update *saddr, optionally bind route to saddr */
89static struct rtable *do_output_route4(struct net *net, __be32 daddr,
90 u32 rtos, int rt_mode, __be32 *saddr)
91{
92 struct flowi4 fl4;
93 struct rtable *rt;
94 int loop = 0;
95
96 memset(&fl4, 0, sizeof(fl4));
97 fl4.daddr = daddr;
98 fl4.saddr = (rt_mode & IP_VS_RT_MODE_CONNECT) ? *saddr : 0;
99 fl4.flowi4_tos = rtos;
100
101retry:
102 rt = ip_route_output_key(net, &fl4);
103 if (IS_ERR(rt)) {
104 /* Invalid saddr ? */
105 if (PTR_ERR(rt) == -EINVAL && *saddr &&
106 rt_mode & IP_VS_RT_MODE_CONNECT && !loop) {
107 *saddr = 0;
108 flowi4_update_output(&fl4, 0, rtos, daddr, 0);
109 goto retry;
110 }
111 IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &daddr);
112 return NULL;
113 } else if (!*saddr && rt_mode & IP_VS_RT_MODE_CONNECT && fl4.saddr) {
114 ip_rt_put(rt);
115 *saddr = fl4.saddr;
116 flowi4_update_output(&fl4, 0, rtos, daddr, fl4.saddr);
117 loop++;
118 goto retry;
119 }
120 *saddr = fl4.saddr;
121 return rt;
122}
123
17a8f8e3 124/* Get route to destination or remote server */
ad1b30b1 125static struct rtable *
fc604767 126__ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
c92f5ca2 127 __be32 daddr, u32 rtos, int rt_mode, __be32 *ret_saddr)
1da177e4 128{
fc604767 129 struct net *net = dev_net(skb_dst(skb)->dev);
1da177e4 130 struct rtable *rt; /* Route to the other host */
fc604767
JA
131 struct rtable *ort; /* Original route */
132 int local;
1da177e4
LT
133
134 if (dest) {
135 spin_lock(&dest->dst_lock);
136 if (!(rt = (struct rtable *)
714f095f 137 __ip_vs_dst_check(dest, rtos))) {
f2edb9f7
JA
138 rt = do_output_route4(net, dest->addr.ip, rtos,
139 rt_mode, &dest->dst_saddr.ip);
140 if (!rt) {
1da177e4 141 spin_unlock(&dest->dst_lock);
1da177e4
LT
142 return NULL;
143 }
714f095f 144 __ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst), 0);
c92f5ca2
JA
145 IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d, "
146 "rtos=%X\n",
147 &dest->addr.ip, &dest->dst_saddr.ip,
d8d1f30b 148 atomic_read(&rt->dst.__refcnt), rtos);
1da177e4 149 }
44e3125c 150 daddr = dest->addr.ip;
c92f5ca2
JA
151 if (ret_saddr)
152 *ret_saddr = dest->dst_saddr.ip;
1da177e4
LT
153 spin_unlock(&dest->dst_lock);
154 } else {
f2edb9f7 155 __be32 saddr = htonl(INADDR_ANY);
c92f5ca2 156
f2edb9f7
JA
157 /* For such unconfigured boxes avoid many route lookups
158 * for performance reasons because we do not remember saddr
159 */
160 rt_mode &= ~IP_VS_RT_MODE_CONNECT;
161 rt = do_output_route4(net, daddr, rtos, rt_mode, &saddr);
162 if (!rt)
1da177e4 163 return NULL;
c92f5ca2 164 if (ret_saddr)
f2edb9f7 165 *ret_saddr = saddr;
1da177e4
LT
166 }
167
fc604767 168 local = rt->rt_flags & RTCF_LOCAL;
17a8f8e3
CG
169 if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) &
170 rt_mode)) {
fc604767
JA
171 IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI4\n",
172 (rt->rt_flags & RTCF_LOCAL) ?
44e3125c 173 "local":"non-local", &daddr);
fc604767
JA
174 ip_rt_put(rt);
175 return NULL;
176 }
17a8f8e3
CG
177 if (local && !(rt_mode & IP_VS_RT_MODE_RDR) &&
178 !((ort = skb_rtable(skb)) && ort->rt_flags & RTCF_LOCAL)) {
fc604767
JA
179 IP_VS_DBG_RL("Redirect from non-local address %pI4 to local "
180 "requires NAT method, dest: %pI4\n",
44e3125c 181 &ip_hdr(skb)->daddr, &daddr);
fc604767
JA
182 ip_rt_put(rt);
183 return NULL;
184 }
185 if (unlikely(!local && ipv4_is_loopback(ip_hdr(skb)->saddr))) {
186 IP_VS_DBG_RL("Stopping traffic from loopback address %pI4 "
187 "to non-local address, dest: %pI4\n",
44e3125c 188 &ip_hdr(skb)->saddr, &daddr);
fc604767
JA
189 ip_rt_put(rt);
190 return NULL;
191 }
192
1da177e4
LT
193 return rt;
194}
195
fc604767
JA
196/* Reroute packet to local IPv4 stack after DNAT */
197static int
198__ip_vs_reroute_locally(struct sk_buff *skb)
199{
200 struct rtable *rt = skb_rtable(skb);
201 struct net_device *dev = rt->dst.dev;
202 struct net *net = dev_net(dev);
203 struct iphdr *iph = ip_hdr(skb);
204
c7537967 205 if (rt_is_input_route(rt)) {
fc604767
JA
206 unsigned long orefdst = skb->_skb_refdst;
207
208 if (ip_route_input(skb, iph->daddr, iph->saddr,
209 iph->tos, skb->dev))
210 return 0;
211 refdst_drop(orefdst);
212 } else {
9d6ec938
DM
213 struct flowi4 fl4 = {
214 .daddr = iph->daddr,
215 .saddr = iph->saddr,
216 .flowi4_tos = RT_TOS(iph->tos),
217 .flowi4_mark = skb->mark,
fc604767 218 };
fc604767 219
9d6ec938 220 rt = ip_route_output_key(net, &fl4);
b23dd4fe 221 if (IS_ERR(rt))
fc604767
JA
222 return 0;
223 if (!(rt->rt_flags & RTCF_LOCAL)) {
224 ip_rt_put(rt);
225 return 0;
226 }
227 /* Drop old route. */
228 skb_dst_drop(skb);
229 skb_dst_set(skb, &rt->dst);
230 }
231 return 1;
232}
233
38cdcc9a 234#ifdef CONFIG_IP_VS_IPV6
714f095f 235
fc604767
JA
236static inline int __ip_vs_is_local_route6(struct rt6_info *rt)
237{
d1918542 238 return rt->dst.dev && rt->dst.dev->flags & IFF_LOOPBACK;
fc604767
JA
239}
240
714f095f
HS
241static struct dst_entry *
242__ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr,
243 struct in6_addr *ret_saddr, int do_xfrm)
244{
245 struct dst_entry *dst;
4c9483b2
DM
246 struct flowi6 fl6 = {
247 .daddr = *daddr,
714f095f
HS
248 };
249
4c9483b2 250 dst = ip6_route_output(net, NULL, &fl6);
714f095f
HS
251 if (dst->error)
252 goto out_err;
253 if (!ret_saddr)
254 return dst;
4c9483b2 255 if (ipv6_addr_any(&fl6.saddr) &&
714f095f 256 ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev,
4c9483b2 257 &fl6.daddr, 0, &fl6.saddr) < 0)
714f095f 258 goto out_err;
452edd59 259 if (do_xfrm) {
4c9483b2 260 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
452edd59
DM
261 if (IS_ERR(dst)) {
262 dst = NULL;
263 goto out_err;
264 }
265 }
4e3fd7a0 266 *ret_saddr = fl6.saddr;
714f095f
HS
267 return dst;
268
269out_err:
270 dst_release(dst);
271 IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n", daddr);
272 return NULL;
273}
274
fc604767
JA
275/*
276 * Get route to destination or remote server
fc604767 277 */
38cdcc9a 278static struct rt6_info *
fc604767
JA
279__ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
280 struct in6_addr *daddr, struct in6_addr *ret_saddr,
281 int do_xfrm, int rt_mode)
38cdcc9a 282{
fc604767 283 struct net *net = dev_net(skb_dst(skb)->dev);
38cdcc9a 284 struct rt6_info *rt; /* Route to the other host */
fc604767 285 struct rt6_info *ort; /* Original route */
714f095f 286 struct dst_entry *dst;
fc604767 287 int local;
38cdcc9a
JV
288
289 if (dest) {
290 spin_lock(&dest->dst_lock);
714f095f 291 rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0);
38cdcc9a 292 if (!rt) {
714f095f 293 u32 cookie;
38cdcc9a 294
714f095f 295 dst = __ip_vs_route_output_v6(net, &dest->addr.in6,
c92f5ca2 296 &dest->dst_saddr.in6,
714f095f
HS
297 do_xfrm);
298 if (!dst) {
38cdcc9a 299 spin_unlock(&dest->dst_lock);
38cdcc9a
JV
300 return NULL;
301 }
714f095f
HS
302 rt = (struct rt6_info *) dst;
303 cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
304 __ip_vs_dst_set(dest, 0, dst_clone(&rt->dst), cookie);
305 IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n",
c92f5ca2 306 &dest->addr.in6, &dest->dst_saddr.in6,
d8d1f30b 307 atomic_read(&rt->dst.__refcnt));
38cdcc9a 308 }
714f095f 309 if (ret_saddr)
4e3fd7a0 310 *ret_saddr = dest->dst_saddr.in6;
38cdcc9a
JV
311 spin_unlock(&dest->dst_lock);
312 } else {
fc604767 313 dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm);
714f095f 314 if (!dst)
38cdcc9a 315 return NULL;
714f095f 316 rt = (struct rt6_info *) dst;
38cdcc9a
JV
317 }
318
fc604767 319 local = __ip_vs_is_local_route6(rt);
e58b3442
DM
320 if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) &
321 rt_mode)) {
fc604767
JA
322 IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6\n",
323 local ? "local":"non-local", daddr);
324 dst_release(&rt->dst);
325 return NULL;
326 }
e58b3442 327 if (local && !(rt_mode & IP_VS_RT_MODE_RDR) &&
fc604767
JA
328 !((ort = (struct rt6_info *) skb_dst(skb)) &&
329 __ip_vs_is_local_route6(ort))) {
330 IP_VS_DBG_RL("Redirect from non-local address %pI6 to local "
331 "requires NAT method, dest: %pI6\n",
332 &ipv6_hdr(skb)->daddr, daddr);
333 dst_release(&rt->dst);
334 return NULL;
335 }
336 if (unlikely(!local && (!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
337 ipv6_addr_type(&ipv6_hdr(skb)->saddr) &
338 IPV6_ADDR_LOOPBACK)) {
339 IP_VS_DBG_RL("Stopping traffic from loopback address %pI6 "
340 "to non-local address, dest: %pI6\n",
341 &ipv6_hdr(skb)->saddr, daddr);
342 dst_release(&rt->dst);
343 return NULL;
344 }
345
38cdcc9a
JV
346 return rt;
347}
348#endif
349
1da177e4
LT
350
351/*
352 * Release dest->dst_cache before a dest is removed
353 */
354void
355ip_vs_dst_reset(struct ip_vs_dest *dest)
356{
357 struct dst_entry *old_dst;
358
359 old_dst = dest->dst_cache;
360 dest->dst_cache = NULL;
361 dst_release(old_dst);
f2edb9f7 362 dest->dst_saddr.ip = 0;
1da177e4
LT
363}
364
f4bc17cd
JA
365#define IP_VS_XMIT_TUNNEL(skb, cp) \
366({ \
367 int __ret = NF_ACCEPT; \
368 \
cf356d69 369 (skb)->ipvs_property = 1; \
f4bc17cd 370 if (unlikely((cp)->flags & IP_VS_CONN_F_NFCT)) \
3c2de2ae 371 __ret = ip_vs_confirm_conntrack(skb); \
f4bc17cd
JA
372 if (__ret == NF_ACCEPT) { \
373 nf_reset(skb); \
4256f1aa 374 skb_forward_csum(skb); \
f4bc17cd
JA
375 } \
376 __ret; \
377})
378
fc604767 379#define IP_VS_XMIT_NAT(pf, skb, cp, local) \
1da177e4 380do { \
cf356d69 381 (skb)->ipvs_property = 1; \
f4bc17cd 382 if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \
cf356d69 383 ip_vs_notrack(skb); \
f4bc17cd
JA
384 else \
385 ip_vs_update_conntrack(skb, cp, 1); \
fc604767
JA
386 if (local) \
387 return NF_ACCEPT; \
ccc7911f 388 skb_forward_csum(skb); \
38cdcc9a 389 NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \
f4bc17cd
JA
390 skb_dst(skb)->dev, dst_output); \
391} while (0)
392
fc604767 393#define IP_VS_XMIT(pf, skb, cp, local) \
f4bc17cd 394do { \
cf356d69 395 (skb)->ipvs_property = 1; \
f4bc17cd 396 if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \
cf356d69 397 ip_vs_notrack(skb); \
fc604767
JA
398 if (local) \
399 return NF_ACCEPT; \
f4bc17cd
JA
400 skb_forward_csum(skb); \
401 NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \
402 skb_dst(skb)->dev, dst_output); \
1da177e4
LT
403} while (0)
404
405
406/*
407 * NULL transmitter (do nothing except return NF_ACCEPT)
408 */
409int
410ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
411 struct ip_vs_protocol *pp)
412{
413 /* we do not touch skb and do not need pskb ptr */
fc604767 414 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
1da177e4
LT
415}
416
417
418/*
419 * Bypass transmitter
420 * Let packets bypass the destination when the destination is not
421 * available, it may be only used in transparent cache cluster.
422 */
423int
424ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
425 struct ip_vs_protocol *pp)
426{
427 struct rtable *rt; /* Route to the other host */
eddc9ec5 428 struct iphdr *iph = ip_hdr(skb);
1da177e4 429 int mtu;
1da177e4
LT
430
431 EnterFunction(10);
432
17a8f8e3 433 if (!(rt = __ip_vs_get_out_rt(skb, NULL, iph->daddr, RT_TOS(iph->tos),
c92f5ca2 434 IP_VS_RT_MODE_NON_LOCAL, NULL)))
1da177e4 435 goto tx_error_icmp;
1da177e4
LT
436
437 /* MTU checking */
d8d1f30b 438 mtu = dst_mtu(&rt->dst);
8f1b03a4
SH
439 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) &&
440 !skb_is_gso(skb)) {
1da177e4
LT
441 ip_rt_put(rt);
442 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
1e3e238e 443 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
1da177e4
LT
444 goto tx_error;
445 }
446
447 /*
448 * Call ip_send_check because we are not sure it is called
449 * after ip_defrag. Is copy-on-write needed?
450 */
451 if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
452 ip_rt_put(rt);
453 return NF_STOLEN;
454 }
eddc9ec5 455 ip_send_check(ip_hdr(skb));
1da177e4
LT
456
457 /* drop old route */
adf30907 458 skb_dst_drop(skb);
d8d1f30b 459 skb_dst_set(skb, &rt->dst);
1da177e4
LT
460
461 /* Another hack: avoid icmp_send in ip_fragment */
462 skb->local_df = 1;
463
fc604767 464 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 0);
1da177e4
LT
465
466 LeaveFunction(10);
467 return NF_STOLEN;
468
469 tx_error_icmp:
470 dst_link_failure(skb);
471 tx_error:
472 kfree_skb(skb);
473 LeaveFunction(10);
474 return NF_STOLEN;
475}
476
b3cdd2a7
JV
477#ifdef CONFIG_IP_VS_IPV6
478int
479ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
480 struct ip_vs_protocol *pp)
481{
482 struct rt6_info *rt; /* Route to the other host */
483 struct ipv6hdr *iph = ipv6_hdr(skb);
484 int mtu;
b3cdd2a7
JV
485
486 EnterFunction(10);
487
e58b3442
DM
488 if (!(rt = __ip_vs_get_out_rt_v6(skb, NULL, &iph->daddr, NULL, 0,
489 IP_VS_RT_MODE_NON_LOCAL)))
b3cdd2a7 490 goto tx_error_icmp;
b3cdd2a7
JV
491
492 /* MTU checking */
d8d1f30b 493 mtu = dst_mtu(&rt->dst);
8f1b03a4 494 if (skb->len > mtu && !skb_is_gso(skb)) {
cb59155f
JA
495 if (!skb->dev) {
496 struct net *net = dev_net(skb_dst(skb)->dev);
497
498 skb->dev = net->loopback_dev;
499 }
3ffe533c 500 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
cb59155f 501 dst_release(&rt->dst);
1e3e238e 502 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
b3cdd2a7
JV
503 goto tx_error;
504 }
505
506 /*
507 * Call ip_send_check because we are not sure it is called
508 * after ip_defrag. Is copy-on-write needed?
509 */
510 skb = skb_share_check(skb, GFP_ATOMIC);
511 if (unlikely(skb == NULL)) {
d8d1f30b 512 dst_release(&rt->dst);
b3cdd2a7
JV
513 return NF_STOLEN;
514 }
515
516 /* drop old route */
adf30907 517 skb_dst_drop(skb);
d8d1f30b 518 skb_dst_set(skb, &rt->dst);
b3cdd2a7
JV
519
520 /* Another hack: avoid icmp_send in ip_fragment */
521 skb->local_df = 1;
522
fc604767 523 IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 0);
b3cdd2a7
JV
524
525 LeaveFunction(10);
526 return NF_STOLEN;
527
528 tx_error_icmp:
529 dst_link_failure(skb);
530 tx_error:
531 kfree_skb(skb);
532 LeaveFunction(10);
533 return NF_STOLEN;
534}
535#endif
1da177e4
LT
536
537/*
538 * NAT transmitter (only for outside-to-inside nat forwarding)
539 * Not used for related ICMP
540 */
541int
542ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
543 struct ip_vs_protocol *pp)
544{
545 struct rtable *rt; /* Route to the other host */
546 int mtu;
eddc9ec5 547 struct iphdr *iph = ip_hdr(skb);
fc604767 548 int local;
1da177e4
LT
549
550 EnterFunction(10);
551
552 /* check if it is a connection of no-client-port */
553 if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
014d730d 554 __be16 _pt, *p;
1da177e4
LT
555 p = skb_header_pointer(skb, iph->ihl*4, sizeof(_pt), &_pt);
556 if (p == NULL)
557 goto tx_error;
558 ip_vs_conn_fill_cport(cp, *p);
559 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
560 }
561
fc604767 562 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
17a8f8e3
CG
563 RT_TOS(iph->tos),
564 IP_VS_RT_MODE_LOCAL |
565 IP_VS_RT_MODE_NON_LOCAL |
c92f5ca2 566 IP_VS_RT_MODE_RDR, NULL)))
1da177e4 567 goto tx_error_icmp;
fc604767
JA
568 local = rt->rt_flags & RTCF_LOCAL;
569 /*
570 * Avoid duplicate tuple in reply direction for NAT traffic
571 * to local address when connection is sync-ed
572 */
c0cd1156 573#if IS_ENABLED(CONFIG_NF_CONNTRACK)
fc604767
JA
574 if (cp->flags & IP_VS_CONN_F_SYNC && local) {
575 enum ip_conntrack_info ctinfo;
576 struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
577
578 if (ct && !nf_ct_is_untracked(ct)) {
0d79641a
JA
579 IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, 0,
580 "ip_vs_nat_xmit(): "
fc604767
JA
581 "stopping DNAT to local address");
582 goto tx_error_put;
583 }
584 }
585#endif
586
587 /* From world but DNAT to loopback address? */
c92f5ca2 588 if (local && ipv4_is_loopback(cp->daddr.ip) &&
c7537967 589 rt_is_input_route(skb_rtable(skb))) {
0d79641a 590 IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): "
fc604767
JA
591 "stopping DNAT to loopback address");
592 goto tx_error_put;
593 }
1da177e4
LT
594
595 /* MTU checking */
d8d1f30b 596 mtu = dst_mtu(&rt->dst);
8f1b03a4
SH
597 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) &&
598 !skb_is_gso(skb)) {
1da177e4 599 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
0d79641a
JA
600 IP_VS_DBG_RL_PKT(0, AF_INET, pp, skb, 0,
601 "ip_vs_nat_xmit(): frag needed for");
fc604767 602 goto tx_error_put;
1da177e4
LT
603 }
604
605 /* copy-on-write the packet before mangling it */
af1e1cf0 606 if (!skb_make_writable(skb, sizeof(struct iphdr)))
1da177e4
LT
607 goto tx_error_put;
608
d8d1f30b 609 if (skb_cow(skb, rt->dst.dev->hard_header_len))
1da177e4
LT
610 goto tx_error_put;
611
1da177e4 612 /* mangle the packet */
3db05fea 613 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
fc604767 614 goto tx_error_put;
e7ade46a 615 ip_hdr(skb)->daddr = cp->daddr.ip;
eddc9ec5 616 ip_send_check(ip_hdr(skb));
1da177e4 617
fc604767
JA
618 if (!local) {
619 /* drop old route */
620 skb_dst_drop(skb);
621 skb_dst_set(skb, &rt->dst);
622 } else {
623 ip_rt_put(rt);
624 /*
625 * Some IPv4 replies get local address from routes,
626 * not from iph, so while we DNAT after routing
627 * we need this second input/output route.
628 */
629 if (!__ip_vs_reroute_locally(skb))
630 goto tx_error;
631 }
632
0d79641a 633 IP_VS_DBG_PKT(10, AF_INET, pp, skb, 0, "After DNAT");
1da177e4
LT
634
635 /* FIXME: when application helper enlarges the packet and the length
636 is larger than the MTU of outgoing device, there will be still
637 MTU problem. */
638
639 /* Another hack: avoid icmp_send in ip_fragment */
640 skb->local_df = 1;
641
fc604767 642 IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local);
1da177e4
LT
643
644 LeaveFunction(10);
645 return NF_STOLEN;
646
647 tx_error_icmp:
648 dst_link_failure(skb);
649 tx_error:
1da177e4 650 kfree_skb(skb);
f4bc17cd 651 LeaveFunction(10);
1da177e4
LT
652 return NF_STOLEN;
653 tx_error_put:
654 ip_rt_put(rt);
655 goto tx_error;
656}
657
b3cdd2a7
JV
658#ifdef CONFIG_IP_VS_IPV6
659int
660ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
661 struct ip_vs_protocol *pp)
662{
663 struct rt6_info *rt; /* Route to the other host */
664 int mtu;
fc604767 665 int local;
b3cdd2a7
JV
666
667 EnterFunction(10);
668
669 /* check if it is a connection of no-client-port */
670 if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
671 __be16 _pt, *p;
672 p = skb_header_pointer(skb, sizeof(struct ipv6hdr),
673 sizeof(_pt), &_pt);
674 if (p == NULL)
675 goto tx_error;
676 ip_vs_conn_fill_cport(cp, *p);
677 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
678 }
679
fc604767 680 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
e58b3442
DM
681 0, (IP_VS_RT_MODE_LOCAL |
682 IP_VS_RT_MODE_NON_LOCAL |
683 IP_VS_RT_MODE_RDR))))
b3cdd2a7 684 goto tx_error_icmp;
fc604767
JA
685 local = __ip_vs_is_local_route6(rt);
686 /*
687 * Avoid duplicate tuple in reply direction for NAT traffic
688 * to local address when connection is sync-ed
689 */
c0cd1156 690#if IS_ENABLED(CONFIG_NF_CONNTRACK)
fc604767
JA
691 if (cp->flags & IP_VS_CONN_F_SYNC && local) {
692 enum ip_conntrack_info ctinfo;
693 struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
694
695 if (ct && !nf_ct_is_untracked(ct)) {
0d79641a 696 IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, 0,
fc604767
JA
697 "ip_vs_nat_xmit_v6(): "
698 "stopping DNAT to local address");
699 goto tx_error_put;
700 }
701 }
702#endif
703
704 /* From world but DNAT to loopback address? */
705 if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) &&
706 ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) {
0d79641a 707 IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, 0,
fc604767
JA
708 "ip_vs_nat_xmit_v6(): "
709 "stopping DNAT to loopback address");
710 goto tx_error_put;
711 }
b3cdd2a7
JV
712
713 /* MTU checking */
d8d1f30b 714 mtu = dst_mtu(&rt->dst);
8f1b03a4 715 if (skb->len > mtu && !skb_is_gso(skb)) {
cb59155f
JA
716 if (!skb->dev) {
717 struct net *net = dev_net(skb_dst(skb)->dev);
718
719 skb->dev = net->loopback_dev;
720 }
3ffe533c 721 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
0d79641a 722 IP_VS_DBG_RL_PKT(0, AF_INET6, pp, skb, 0,
b3cdd2a7 723 "ip_vs_nat_xmit_v6(): frag needed for");
fc604767 724 goto tx_error_put;
b3cdd2a7
JV
725 }
726
727 /* copy-on-write the packet before mangling it */
728 if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
729 goto tx_error_put;
730
d8d1f30b 731 if (skb_cow(skb, rt->dst.dev->hard_header_len))
b3cdd2a7
JV
732 goto tx_error_put;
733
b3cdd2a7
JV
734 /* mangle the packet */
735 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
736 goto tx_error;
4e3fd7a0 737 ipv6_hdr(skb)->daddr = cp->daddr.in6;
fc604767
JA
738
739 if (!local || !skb->dev) {
740 /* drop the old route when skb is not shared */
741 skb_dst_drop(skb);
742 skb_dst_set(skb, &rt->dst);
743 } else {
744 /* destined to loopback, do we need to change route? */
745 dst_release(&rt->dst);
746 }
b3cdd2a7 747
0d79641a 748 IP_VS_DBG_PKT(10, AF_INET6, pp, skb, 0, "After DNAT");
b3cdd2a7
JV
749
750 /* FIXME: when application helper enlarges the packet and the length
751 is larger than the MTU of outgoing device, there will be still
752 MTU problem. */
753
754 /* Another hack: avoid icmp_send in ip_fragment */
755 skb->local_df = 1;
756
fc604767 757 IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local);
b3cdd2a7
JV
758
759 LeaveFunction(10);
760 return NF_STOLEN;
761
762tx_error_icmp:
763 dst_link_failure(skb);
764tx_error:
765 LeaveFunction(10);
766 kfree_skb(skb);
767 return NF_STOLEN;
768tx_error_put:
d8d1f30b 769 dst_release(&rt->dst);
b3cdd2a7
JV
770 goto tx_error;
771}
772#endif
773
1da177e4
LT
774
775/*
776 * IP Tunneling transmitter
777 *
778 * This function encapsulates the packet in a new IP packet, its
779 * destination will be set to cp->daddr. Most code of this function
780 * is taken from ipip.c.
781 *
782 * It is used in VS/TUN cluster. The load balancer selects a real
783 * server from a cluster based on a scheduling algorithm,
784 * encapsulates the request packet and forwards it to the selected
785 * server. For example, all real servers are configured with
786 * "ifconfig tunl0 <Virtual IP Address> up". When the server receives
787 * the encapsulated packet, it will decapsulate the packet, processe
788 * the request and return the response packets directly to the client
789 * without passing the load balancer. This can greatly increase the
790 * scalability of virtual server.
791 *
792 * Used for ANY protocol
793 */
794int
795ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
796 struct ip_vs_protocol *pp)
797{
3654e611 798 struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
1da177e4 799 struct rtable *rt; /* Route to the other host */
c92f5ca2 800 __be32 saddr; /* Source for tunnel */
1da177e4 801 struct net_device *tdev; /* Device to other host */
eddc9ec5 802 struct iphdr *old_iph = ip_hdr(skb);
1da177e4 803 u8 tos = old_iph->tos;
f2edb9f7 804 __be16 df;
1da177e4 805 struct iphdr *iph; /* Our new IP header */
c2636b4d 806 unsigned int max_headroom; /* The extra header space needed */
1da177e4 807 int mtu;
f4bc17cd 808 int ret;
1da177e4
LT
809
810 EnterFunction(10);
811
fc604767 812 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
17a8f8e3 813 RT_TOS(tos), IP_VS_RT_MODE_LOCAL |
f2edb9f7
JA
814 IP_VS_RT_MODE_NON_LOCAL |
815 IP_VS_RT_MODE_CONNECT,
c92f5ca2 816 &saddr)))
1da177e4 817 goto tx_error_icmp;
fc604767
JA
818 if (rt->rt_flags & RTCF_LOCAL) {
819 ip_rt_put(rt);
820 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
821 }
1da177e4 822
d8d1f30b 823 tdev = rt->dst.dev;
1da177e4 824
d8d1f30b 825 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
1da177e4 826 if (mtu < 68) {
1e3e238e 827 IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
fc604767 828 goto tx_error_put;
1da177e4 829 }
f2edb9f7 830 if (rt_is_output_route(skb_rtable(skb)))
6700c270 831 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
1da177e4 832
f2edb9f7 833 /* Copy DF, reset fragment offset and MF */
3654e611 834 df = sysctl_pmtu_disc(ipvs) ? old_iph->frag_off & htons(IP_DF) : 0;
1da177e4 835
3654e611 836 if (df && mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb)) {
1da177e4 837 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
1e3e238e 838 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
fc604767 839 goto tx_error_put;
1da177e4
LT
840 }
841
842 /*
843 * Okay, now see if we can stuff it in the buffer as-is.
844 */
845 max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr);
846
847 if (skb_headroom(skb) < max_headroom
848 || skb_cloned(skb) || skb_shared(skb)) {
849 struct sk_buff *new_skb =
850 skb_realloc_headroom(skb, max_headroom);
851 if (!new_skb) {
852 ip_rt_put(rt);
853 kfree_skb(skb);
1e3e238e 854 IP_VS_ERR_RL("%s(): no memory\n", __func__);
1da177e4
LT
855 return NF_STOLEN;
856 }
5d0ba55b 857 consume_skb(skb);
1da177e4 858 skb = new_skb;
eddc9ec5 859 old_iph = ip_hdr(skb);
1da177e4
LT
860 }
861
714f095f 862 skb->transport_header = skb->network_header;
1da177e4
LT
863
864 /* fix old IP header checksum */
865 ip_send_check(old_iph);
866
e2d1bca7
ACM
867 skb_push(skb, sizeof(struct iphdr));
868 skb_reset_network_header(skb);
1da177e4
LT
869 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
870
871 /* drop old route */
adf30907 872 skb_dst_drop(skb);
d8d1f30b 873 skb_dst_set(skb, &rt->dst);
1da177e4
LT
874
875 /*
876 * Push down and install the IPIP header.
877 */
eddc9ec5 878 iph = ip_hdr(skb);
1da177e4
LT
879 iph->version = 4;
880 iph->ihl = sizeof(struct iphdr)>>2;
881 iph->frag_off = df;
882 iph->protocol = IPPROTO_IPIP;
883 iph->tos = tos;
c92f5ca2
JA
884 iph->daddr = cp->daddr.ip;
885 iph->saddr = saddr;
1da177e4 886 iph->ttl = old_iph->ttl;
d8d1f30b 887 ip_select_ident(iph, &rt->dst, NULL);
1da177e4
LT
888
889 /* Another hack: avoid icmp_send in ip_fragment */
890 skb->local_df = 1;
891
f4bc17cd
JA
892 ret = IP_VS_XMIT_TUNNEL(skb, cp);
893 if (ret == NF_ACCEPT)
894 ip_local_out(skb);
895 else if (ret == NF_DROP)
896 kfree_skb(skb);
1da177e4
LT
897
898 LeaveFunction(10);
899
900 return NF_STOLEN;
901
902 tx_error_icmp:
903 dst_link_failure(skb);
904 tx_error:
905 kfree_skb(skb);
906 LeaveFunction(10);
907 return NF_STOLEN;
fc604767
JA
908tx_error_put:
909 ip_rt_put(rt);
910 goto tx_error;
1da177e4
LT
911}
912
b3cdd2a7
JV
913#ifdef CONFIG_IP_VS_IPV6
914int
915ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
916 struct ip_vs_protocol *pp)
917{
918 struct rt6_info *rt; /* Route to the other host */
714f095f 919 struct in6_addr saddr; /* Source for tunnel */
b3cdd2a7
JV
920 struct net_device *tdev; /* Device to other host */
921 struct ipv6hdr *old_iph = ipv6_hdr(skb);
b3cdd2a7
JV
922 struct ipv6hdr *iph; /* Our new IP header */
923 unsigned int max_headroom; /* The extra header space needed */
924 int mtu;
f4bc17cd 925 int ret;
b3cdd2a7
JV
926
927 EnterFunction(10);
928
fc604767 929 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6,
e58b3442
DM
930 &saddr, 1, (IP_VS_RT_MODE_LOCAL |
931 IP_VS_RT_MODE_NON_LOCAL))))
b3cdd2a7 932 goto tx_error_icmp;
fc604767
JA
933 if (__ip_vs_is_local_route6(rt)) {
934 dst_release(&rt->dst);
935 IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1);
936 }
b3cdd2a7 937
d8d1f30b 938 tdev = rt->dst.dev;
b3cdd2a7 939
d8d1f30b 940 mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr);
714f095f 941 if (mtu < IPV6_MIN_MTU) {
714f095f
HS
942 IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__,
943 IPV6_MIN_MTU);
fc604767 944 goto tx_error_put;
b3cdd2a7 945 }
adf30907 946 if (skb_dst(skb))
6700c270 947 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
b3cdd2a7 948
8f1b03a4
SH
949 if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr) &&
950 !skb_is_gso(skb)) {
cb59155f
JA
951 if (!skb->dev) {
952 struct net *net = dev_net(skb_dst(skb)->dev);
953
954 skb->dev = net->loopback_dev;
955 }
3ffe533c 956 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1e3e238e 957 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
fc604767 958 goto tx_error_put;
b3cdd2a7
JV
959 }
960
961 /*
962 * Okay, now see if we can stuff it in the buffer as-is.
963 */
964 max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr);
965
966 if (skb_headroom(skb) < max_headroom
967 || skb_cloned(skb) || skb_shared(skb)) {
968 struct sk_buff *new_skb =
969 skb_realloc_headroom(skb, max_headroom);
970 if (!new_skb) {
d8d1f30b 971 dst_release(&rt->dst);
b3cdd2a7 972 kfree_skb(skb);
1e3e238e 973 IP_VS_ERR_RL("%s(): no memory\n", __func__);
b3cdd2a7
JV
974 return NF_STOLEN;
975 }
5d0ba55b 976 consume_skb(skb);
b3cdd2a7
JV
977 skb = new_skb;
978 old_iph = ipv6_hdr(skb);
979 }
980
714f095f 981 skb->transport_header = skb->network_header;
b3cdd2a7
JV
982
983 skb_push(skb, sizeof(struct ipv6hdr));
984 skb_reset_network_header(skb);
985 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
986
987 /* drop old route */
adf30907 988 skb_dst_drop(skb);
d8d1f30b 989 skb_dst_set(skb, &rt->dst);
b3cdd2a7
JV
990
991 /*
992 * Push down and install the IPIP header.
993 */
994 iph = ipv6_hdr(skb);
995 iph->version = 6;
996 iph->nexthdr = IPPROTO_IPV6;
b7b45f47
HH
997 iph->payload_len = old_iph->payload_len;
998 be16_add_cpu(&iph->payload_len, sizeof(*old_iph));
b3cdd2a7
JV
999 iph->priority = old_iph->priority;
1000 memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl));
4e3fd7a0
AD
1001 iph->daddr = cp->daddr.in6;
1002 iph->saddr = saddr;
b3cdd2a7
JV
1003 iph->hop_limit = old_iph->hop_limit;
1004
1005 /* Another hack: avoid icmp_send in ip_fragment */
1006 skb->local_df = 1;
1007
f4bc17cd
JA
1008 ret = IP_VS_XMIT_TUNNEL(skb, cp);
1009 if (ret == NF_ACCEPT)
1010 ip6_local_out(skb);
1011 else if (ret == NF_DROP)
1012 kfree_skb(skb);
b3cdd2a7
JV
1013
1014 LeaveFunction(10);
1015
1016 return NF_STOLEN;
1017
1018tx_error_icmp:
1019 dst_link_failure(skb);
1020tx_error:
1021 kfree_skb(skb);
1022 LeaveFunction(10);
1023 return NF_STOLEN;
fc604767
JA
1024tx_error_put:
1025 dst_release(&rt->dst);
1026 goto tx_error;
b3cdd2a7
JV
1027}
1028#endif
1029
1da177e4
LT
1030
1031/*
1032 * Direct Routing transmitter
1033 * Used for ANY protocol
1034 */
1035int
1036ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1037 struct ip_vs_protocol *pp)
1038{
1039 struct rtable *rt; /* Route to the other host */
eddc9ec5 1040 struct iphdr *iph = ip_hdr(skb);
1da177e4
LT
1041 int mtu;
1042
1043 EnterFunction(10);
1044
fc604767 1045 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
17a8f8e3
CG
1046 RT_TOS(iph->tos),
1047 IP_VS_RT_MODE_LOCAL |
c92f5ca2 1048 IP_VS_RT_MODE_NON_LOCAL, NULL)))
1da177e4 1049 goto tx_error_icmp;
fc604767
JA
1050 if (rt->rt_flags & RTCF_LOCAL) {
1051 ip_rt_put(rt);
1052 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
1053 }
1da177e4
LT
1054
1055 /* MTU checking */
d8d1f30b 1056 mtu = dst_mtu(&rt->dst);
8f1b03a4
SH
1057 if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu &&
1058 !skb_is_gso(skb)) {
1da177e4
LT
1059 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
1060 ip_rt_put(rt);
1e3e238e 1061 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
1da177e4
LT
1062 goto tx_error;
1063 }
1064
1065 /*
1066 * Call ip_send_check because we are not sure it is called
1067 * after ip_defrag. Is copy-on-write needed?
1068 */
1069 if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
1070 ip_rt_put(rt);
1071 return NF_STOLEN;
1072 }
eddc9ec5 1073 ip_send_check(ip_hdr(skb));
1da177e4
LT
1074
1075 /* drop old route */
adf30907 1076 skb_dst_drop(skb);
d8d1f30b 1077 skb_dst_set(skb, &rt->dst);
1da177e4
LT
1078
1079 /* Another hack: avoid icmp_send in ip_fragment */
1080 skb->local_df = 1;
1081
fc604767 1082 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 0);
1da177e4
LT
1083
1084 LeaveFunction(10);
1085 return NF_STOLEN;
1086
1087 tx_error_icmp:
1088 dst_link_failure(skb);
1089 tx_error:
1090 kfree_skb(skb);
1091 LeaveFunction(10);
1092 return NF_STOLEN;
1093}
1094
b3cdd2a7
JV
1095#ifdef CONFIG_IP_VS_IPV6
1096int
1097ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1098 struct ip_vs_protocol *pp)
1099{
1100 struct rt6_info *rt; /* Route to the other host */
1101 int mtu;
1102
1103 EnterFunction(10);
1104
fc604767 1105 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
e58b3442
DM
1106 0, (IP_VS_RT_MODE_LOCAL |
1107 IP_VS_RT_MODE_NON_LOCAL))))
b3cdd2a7 1108 goto tx_error_icmp;
fc604767
JA
1109 if (__ip_vs_is_local_route6(rt)) {
1110 dst_release(&rt->dst);
1111 IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1);
1112 }
b3cdd2a7
JV
1113
1114 /* MTU checking */
d8d1f30b 1115 mtu = dst_mtu(&rt->dst);
b3cdd2a7 1116 if (skb->len > mtu) {
cb59155f
JA
1117 if (!skb->dev) {
1118 struct net *net = dev_net(skb_dst(skb)->dev);
1119
1120 skb->dev = net->loopback_dev;
1121 }
3ffe533c 1122 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
d8d1f30b 1123 dst_release(&rt->dst);
1e3e238e 1124 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
b3cdd2a7
JV
1125 goto tx_error;
1126 }
1127
1128 /*
1129 * Call ip_send_check because we are not sure it is called
1130 * after ip_defrag. Is copy-on-write needed?
1131 */
1132 skb = skb_share_check(skb, GFP_ATOMIC);
1133 if (unlikely(skb == NULL)) {
d8d1f30b 1134 dst_release(&rt->dst);
b3cdd2a7
JV
1135 return NF_STOLEN;
1136 }
1137
1138 /* drop old route */
adf30907 1139 skb_dst_drop(skb);
d8d1f30b 1140 skb_dst_set(skb, &rt->dst);
b3cdd2a7
JV
1141
1142 /* Another hack: avoid icmp_send in ip_fragment */
1143 skb->local_df = 1;
1144
fc604767 1145 IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 0);
b3cdd2a7
JV
1146
1147 LeaveFunction(10);
1148 return NF_STOLEN;
1149
1150tx_error_icmp:
1151 dst_link_failure(skb);
1152tx_error:
1153 kfree_skb(skb);
1154 LeaveFunction(10);
1155 return NF_STOLEN;
1156}
1157#endif
1158
1da177e4
LT
1159
1160/*
1161 * ICMP packet transmitter
1162 * called by the ip_vs_in_icmp
1163 */
1164int
1165ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
c92f5ca2 1166 struct ip_vs_protocol *pp, int offset, unsigned int hooknum)
1da177e4
LT
1167{
1168 struct rtable *rt; /* Route to the other host */
1169 int mtu;
1170 int rc;
fc604767 1171 int local;
c92f5ca2 1172 int rt_mode;
1da177e4
LT
1173
1174 EnterFunction(10);
1175
1176 /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
1177 forwarded directly here, because there is no need to
1178 translate address/port back */
1179 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
1180 if (cp->packet_xmit)
1181 rc = cp->packet_xmit(skb, cp, pp);
1182 else
1183 rc = NF_ACCEPT;
1184 /* do not touch skb anymore */
1185 atomic_inc(&cp->in_pkts);
1da177e4
LT
1186 goto out;
1187 }
1188
1189 /*
1190 * mangle and send the packet here (only for VS/NAT)
1191 */
1192
c92f5ca2
JA
1193 /* LOCALNODE from FORWARD hook is not supported */
1194 rt_mode = (hooknum != NF_INET_FORWARD) ?
1195 IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
1196 IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
fc604767 1197 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
17a8f8e3 1198 RT_TOS(ip_hdr(skb)->tos),
c92f5ca2 1199 rt_mode, NULL)))
1da177e4 1200 goto tx_error_icmp;
fc604767
JA
1201 local = rt->rt_flags & RTCF_LOCAL;
1202
1203 /*
1204 * Avoid duplicate tuple in reply direction for NAT traffic
1205 * to local address when connection is sync-ed
1206 */
c0cd1156 1207#if IS_ENABLED(CONFIG_NF_CONNTRACK)
fc604767
JA
1208 if (cp->flags & IP_VS_CONN_F_SYNC && local) {
1209 enum ip_conntrack_info ctinfo;
1210 struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
1211
1212 if (ct && !nf_ct_is_untracked(ct)) {
1213 IP_VS_DBG(10, "%s(): "
1214 "stopping DNAT to local address %pI4\n",
1215 __func__, &cp->daddr.ip);
1216 goto tx_error_put;
1217 }
1218 }
1219#endif
1220
1221 /* From world but DNAT to loopback address? */
c92f5ca2 1222 if (local && ipv4_is_loopback(cp->daddr.ip) &&
c7537967 1223 rt_is_input_route(skb_rtable(skb))) {
fc604767
JA
1224 IP_VS_DBG(1, "%s(): "
1225 "stopping DNAT to loopback %pI4\n",
1226 __func__, &cp->daddr.ip);
1227 goto tx_error_put;
1228 }
1da177e4
LT
1229
1230 /* MTU checking */
d8d1f30b 1231 mtu = dst_mtu(&rt->dst);
8f1b03a4
SH
1232 if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF)) &&
1233 !skb_is_gso(skb)) {
1da177e4 1234 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
1e3e238e 1235 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
fc604767 1236 goto tx_error_put;
1da177e4
LT
1237 }
1238
1239 /* copy-on-write the packet before mangling it */
af1e1cf0 1240 if (!skb_make_writable(skb, offset))
1da177e4
LT
1241 goto tx_error_put;
1242
d8d1f30b 1243 if (skb_cow(skb, rt->dst.dev->hard_header_len))
1da177e4
LT
1244 goto tx_error_put;
1245
1da177e4
LT
1246 ip_vs_nat_icmp(skb, pp, cp, 0);
1247
fc604767
JA
1248 if (!local) {
1249 /* drop the old route when skb is not shared */
1250 skb_dst_drop(skb);
1251 skb_dst_set(skb, &rt->dst);
1252 } else {
1253 ip_rt_put(rt);
1254 /*
1255 * Some IPv4 replies get local address from routes,
1256 * not from iph, so while we DNAT after routing
1257 * we need this second input/output route.
1258 */
1259 if (!__ip_vs_reroute_locally(skb))
1260 goto tx_error;
1261 }
1262
1da177e4
LT
1263 /* Another hack: avoid icmp_send in ip_fragment */
1264 skb->local_df = 1;
1265
fc604767 1266 IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local);
1da177e4
LT
1267
1268 rc = NF_STOLEN;
1269 goto out;
1270
1271 tx_error_icmp:
1272 dst_link_failure(skb);
1273 tx_error:
1274 dev_kfree_skb(skb);
1275 rc = NF_STOLEN;
1276 out:
1277 LeaveFunction(10);
1278 return rc;
1279 tx_error_put:
1280 ip_rt_put(rt);
1281 goto tx_error;
1282}
b3cdd2a7
JV
1283
1284#ifdef CONFIG_IP_VS_IPV6
1285int
1286ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
c92f5ca2 1287 struct ip_vs_protocol *pp, int offset, unsigned int hooknum)
b3cdd2a7
JV
1288{
1289 struct rt6_info *rt; /* Route to the other host */
1290 int mtu;
1291 int rc;
fc604767 1292 int local;
c92f5ca2 1293 int rt_mode;
b3cdd2a7
JV
1294
1295 EnterFunction(10);
1296
1297 /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
1298 forwarded directly here, because there is no need to
1299 translate address/port back */
1300 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
1301 if (cp->packet_xmit)
1302 rc = cp->packet_xmit(skb, cp, pp);
1303 else
1304 rc = NF_ACCEPT;
1305 /* do not touch skb anymore */
1306 atomic_inc(&cp->in_pkts);
1307 goto out;
1308 }
1309
1310 /*
1311 * mangle and send the packet here (only for VS/NAT)
1312 */
1313
c92f5ca2
JA
1314 /* LOCALNODE from FORWARD hook is not supported */
1315 rt_mode = (hooknum != NF_INET_FORWARD) ?
1316 IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
1317 IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
fc604767 1318 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
c92f5ca2 1319 0, rt_mode)))
b3cdd2a7
JV
1320 goto tx_error_icmp;
1321
fc604767
JA
1322 local = __ip_vs_is_local_route6(rt);
1323 /*
1324 * Avoid duplicate tuple in reply direction for NAT traffic
1325 * to local address when connection is sync-ed
1326 */
c0cd1156 1327#if IS_ENABLED(CONFIG_NF_CONNTRACK)
fc604767
JA
1328 if (cp->flags & IP_VS_CONN_F_SYNC && local) {
1329 enum ip_conntrack_info ctinfo;
1330 struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
1331
1332 if (ct && !nf_ct_is_untracked(ct)) {
1333 IP_VS_DBG(10, "%s(): "
1334 "stopping DNAT to local address %pI6\n",
1335 __func__, &cp->daddr.in6);
1336 goto tx_error_put;
1337 }
1338 }
1339#endif
1340
1341 /* From world but DNAT to loopback address? */
1342 if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) &&
1343 ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) {
1344 IP_VS_DBG(1, "%s(): "
1345 "stopping DNAT to loopback %pI6\n",
1346 __func__, &cp->daddr.in6);
1347 goto tx_error_put;
1348 }
1349
b3cdd2a7 1350 /* MTU checking */
d8d1f30b 1351 mtu = dst_mtu(&rt->dst);
8f1b03a4 1352 if (skb->len > mtu && !skb_is_gso(skb)) {
cb59155f
JA
1353 if (!skb->dev) {
1354 struct net *net = dev_net(skb_dst(skb)->dev);
1355
1356 skb->dev = net->loopback_dev;
1357 }
3ffe533c 1358 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1e3e238e 1359 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
fc604767 1360 goto tx_error_put;
b3cdd2a7
JV
1361 }
1362
1363 /* copy-on-write the packet before mangling it */
1364 if (!skb_make_writable(skb, offset))
1365 goto tx_error_put;
1366
d8d1f30b 1367 if (skb_cow(skb, rt->dst.dev->hard_header_len))
b3cdd2a7
JV
1368 goto tx_error_put;
1369
b3cdd2a7
JV
1370 ip_vs_nat_icmp_v6(skb, pp, cp, 0);
1371
fc604767
JA
1372 if (!local || !skb->dev) {
1373 /* drop the old route when skb is not shared */
1374 skb_dst_drop(skb);
1375 skb_dst_set(skb, &rt->dst);
1376 } else {
1377 /* destined to loopback, do we need to change route? */
1378 dst_release(&rt->dst);
1379 }
1380
b3cdd2a7
JV
1381 /* Another hack: avoid icmp_send in ip_fragment */
1382 skb->local_df = 1;
1383
fc604767 1384 IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local);
b3cdd2a7
JV
1385
1386 rc = NF_STOLEN;
1387 goto out;
1388
1389tx_error_icmp:
1390 dst_link_failure(skb);
1391tx_error:
1392 dev_kfree_skb(skb);
1393 rc = NF_STOLEN;
1394out:
1395 LeaveFunction(10);
1396 return rc;
1397tx_error_put:
d8d1f30b 1398 dst_release(&rt->dst);
b3cdd2a7
JV
1399 goto tx_error;
1400}
1401#endif