]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - net/ipv6/ip6_output.c
ipv6: fix panic when forwarding a pkt with no in6 dev
[mirror_ubuntu-jammy-kernel.git] / net / ipv6 / ip6_output.c
CommitLineData
2874c5fd 1// SPDX-License-Identifier: GPL-2.0-or-later
1da177e4
LT
2/*
3 * IPv6 output functions
1ab1457c 4 * Linux INET6 implementation
1da177e4
LT
5 *
6 * Authors:
1ab1457c 7 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 8 *
1da177e4
LT
9 * Based on linux/net/ipv4/ip_output.c
10 *
1da177e4
LT
11 * Changes:
12 * A.N.Kuznetsov : airthmetics in fragmentation.
13 * extension headers are implemented.
14 * route changes now work.
15 * ip6_forward does not confuse sniffers.
16 * etc.
17 *
18 * H. von Brand : Added missing #include <linux/string.h>
67ba4152 19 * Imran Patel : frag id should be in NBO
1da177e4
LT
20 * Kazunori MIYAZAWA @USAGI
21 * : add ip6_append_data and related functions
22 * for datagram xmit
23 */
24
1da177e4 25#include <linux/errno.h>
ef76bc23 26#include <linux/kernel.h>
1da177e4
LT
27#include <linux/string.h>
28#include <linux/socket.h>
29#include <linux/net.h>
30#include <linux/netdevice.h>
31#include <linux/if_arp.h>
32#include <linux/in6.h>
33#include <linux/tcp.h>
34#include <linux/route.h>
b59f45d0 35#include <linux/module.h>
5a0e3ad6 36#include <linux/slab.h>
1da177e4 37
33b48679 38#include <linux/bpf-cgroup.h>
1da177e4
LT
39#include <linux/netfilter.h>
40#include <linux/netfilter_ipv6.h>
41
42#include <net/sock.h>
43#include <net/snmp.h>
44
45#include <net/ipv6.h>
46#include <net/ndisc.h>
47#include <net/protocol.h>
48#include <net/ip6_route.h>
49#include <net/addrconf.h>
50#include <net/rawv6.h>
51#include <net/icmp.h>
52#include <net/xfrm.h>
53#include <net/checksum.h>
7bc570c8 54#include <linux/mroute6.h>
ca254490 55#include <net/l3mdev.h>
14972cbd 56#include <net/lwtunnel.h>
571912c6 57#include <net/ip_tunnels.h>
1da177e4 58
7d8c6e39 59static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 60{
adf30907 61 struct dst_entry *dst = skb_dst(skb);
1da177e4 62 struct net_device *dev = dst->dev;
e415ed3a 63 struct inet6_dev *idev = ip6_dst_idev(dst);
5796015f 64 unsigned int hh_len = LL_RESERVED_SPACE(dev);
e415ed3a
VA
65 const struct in6_addr *daddr, *nexthop;
66 struct ipv6hdr *hdr;
f6b72b62 67 struct neighbour *neigh;
6fd6ce20 68 int ret;
1da177e4 69
5796015f 70 /* Be paranoid, rather than too clever. */
e415ed3a
VA
71 if (unlikely(hh_len > skb_headroom(skb)) && dev->header_ops) {
72 skb = skb_expand_head(skb, hh_len);
5796015f 73 if (!skb) {
e415ed3a 74 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
5796015f
VA
75 return -ENOMEM;
76 }
77 }
78
e415ed3a
VA
79 hdr = ipv6_hdr(skb);
80 daddr = &hdr->daddr;
81 if (ipv6_addr_is_multicast(daddr)) {
7026b1dd 82 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
8571ab47 83 ((mroute6_is_socket(net, skb) &&
bd91b8bf 84 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
e415ed3a 85 ipv6_chk_mcast_addr(dev, daddr, &hdr->saddr))) {
1da177e4
LT
86 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
87
88 /* Do not check for IFF_ALLMULTI; multicast routing
89 is not supported in any case.
90 */
91 if (newskb)
b2e0b385 92 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
29a26a56 93 net, sk, newskb, NULL, newskb->dev,
95603e22 94 dev_loopback_xmit);
1da177e4 95
e415ed3a 96 if (hdr->hop_limit == 0) {
78126c41 97 IP6_INC_STATS(net, idev,
3bd653c8 98 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
99 kfree_skb(skb);
100 return 0;
101 }
102 }
103
78126c41 104 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
e415ed3a 105 if (IPV6_ADDR_MC_SCOPE(daddr) <= IPV6_ADDR_SCOPE_NODELOCAL &&
dd408515
HFS
106 !(dev->flags & IFF_LOOPBACK)) {
107 kfree_skb(skb);
108 return 0;
109 }
1da177e4
LT
110 }
111
14972cbd
RP
112 if (lwtunnel_xmit_redirect(dst->lwtstate)) {
113 int res = lwtunnel_xmit(skb);
114
115 if (res < 0 || res == LWTUNNEL_XMIT_DONE)
116 return res;
117 }
118
6fd6ce20 119 rcu_read_lock_bh();
e415ed3a
VA
120 nexthop = rt6_nexthop((struct rt6_info *)dst, daddr);
121 neigh = __ipv6_neigh_lookup_noref(dev, nexthop);
6fd6ce20 122 if (unlikely(!neigh))
e415ed3a 123 neigh = __neigh_create(&nd_tbl, nexthop, dev, false);
6fd6ce20 124 if (!IS_ERR(neigh)) {
4ff06203 125 sock_confirm_neigh(skb, neigh);
0353f282 126 ret = neigh_output(neigh, skb, false);
6fd6ce20
YH
127 rcu_read_unlock_bh();
128 return ret;
129 }
130 rcu_read_unlock_bh();
05e3aa09 131
e415ed3a 132 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTNOROUTES);
9e508490
JE
133 kfree_skb(skb);
134 return -EINVAL;
1da177e4
LT
135}
136
b210de4f
AL
137static int
138ip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk,
139 struct sk_buff *skb, unsigned int mtu)
140{
141 struct sk_buff *segs, *nskb;
142 netdev_features_t features;
143 int ret = 0;
144
145 /* Please see corresponding comment in ip_finish_output_gso
146 * describing the cases where GSO segment length exceeds the
147 * egress MTU.
148 */
149 features = netif_skb_features(skb);
150 segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
151 if (IS_ERR_OR_NULL(segs)) {
152 kfree_skb(skb);
153 return -ENOMEM;
154 }
155
156 consume_skb(skb);
157
158 skb_list_walk_safe(segs, segs, nskb) {
159 int err;
160
161 skb_mark_not_on_list(segs);
162 err = ip6_fragment(net, sk, segs, ip6_finish_output2);
163 if (err && ret == 0)
164 ret = err;
165 }
166
167 return ret;
168}
169
956fe219 170static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
9e508490 171{
b210de4f
AL
172 unsigned int mtu;
173
09ee9dba
TB
174#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
175 /* Policy lookup after SNAT yielded a new policy */
176 if (skb_dst(skb)->xfrm) {
e829e0d5 177 IP6CB(skb)->flags |= IP6SKB_REROUTED;
09ee9dba
TB
178 return dst_output(net, sk, skb);
179 }
180#endif
181
b210de4f
AL
182 mtu = ip6_skb_dst_mtu(skb);
183 if (skb_is_gso(skb) && !skb_gso_validate_network_len(skb, mtu))
184 return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu);
185
186 if ((skb->len > mtu && !skb_is_gso(skb)) ||
9037c357
JP
187 dst_allfrag(skb_dst(skb)) ||
188 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
7d8c6e39 189 return ip6_fragment(net, sk, skb, ip6_finish_output2);
9e508490 190 else
7d8c6e39 191 return ip6_finish_output2(net, sk, skb);
9e508490
JE
192}
193
956fe219 194static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
195{
196 int ret;
197
198 ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
199 switch (ret) {
200 case NET_XMIT_SUCCESS:
201 return __ip6_finish_output(net, sk, skb);
202 case NET_XMIT_CN:
203 return __ip6_finish_output(net, sk, skb) ? : ret;
204 default:
205 kfree_skb(skb);
206 return ret;
207 }
208}
209
ede2059d 210int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 211{
28f8bfd1 212 struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev;
adf30907 213 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
be10de0a 214
97a7a37a
CF
215 skb->protocol = htons(ETH_P_IPV6);
216 skb->dev = dev;
217
778d80be 218 if (unlikely(idev->cnf.disable_ipv6)) {
19a0644c 219 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
778d80be
YH
220 kfree_skb(skb);
221 return 0;
222 }
223
29a26a56 224 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
28f8bfd1 225 net, sk, skb, indev, dev,
9c6eb28a
JE
226 ip6_finish_output,
227 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
1da177e4 228}
6585d7dc 229EXPORT_SYMBOL(ip6_output);
1da177e4 230
e9191ffb 231bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
513674b5
SL
232{
233 if (!np->autoflowlabel_set)
234 return ip6_default_np_autolabel(net);
235 else
236 return np->autoflowlabel;
237}
238
1da177e4 239/*
1c1e9d2b
ED
240 * xmit an sk_buff (used by TCP, SCTP and DCCP)
241 * Note : socket lock is not held for SYNACK packets, but might be modified
242 * by calls to skb_set_owner_w() and ipv6_local_error(),
243 * which are using proper atomic operations or spinlocks.
1da177e4 244 */
1c1e9d2b 245int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
4f6570d7 246 __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority)
1da177e4 247{
3bd653c8 248 struct net *net = sock_net(sk);
1c1e9d2b 249 const struct ipv6_pinfo *np = inet6_sk(sk);
4c9483b2 250 struct in6_addr *first_hop = &fl6->daddr;
adf30907 251 struct dst_entry *dst = skb_dst(skb);
0c9f227b
VA
252 struct net_device *dev = dst->dev;
253 struct inet6_dev *idev = ip6_dst_idev(dst);
66033f47 254 unsigned int head_room;
1da177e4 255 struct ipv6hdr *hdr;
4c9483b2 256 u8 proto = fl6->flowi6_proto;
1da177e4 257 int seg_len = skb->len;
e651f03a 258 int hlimit = -1;
1da177e4
LT
259 u32 mtu;
260
0c9f227b 261 head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dev);
66033f47
SB
262 if (opt)
263 head_room += opt->opt_nflen + opt->opt_flen;
264
0c9f227b
VA
265 if (unlikely(head_room > skb_headroom(skb))) {
266 skb = skb_expand_head(skb, head_room);
267 if (!skb) {
268 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
66033f47 269 return -ENOBUFS;
1da177e4 270 }
66033f47
SB
271 }
272
273 if (opt) {
274 seg_len += opt->opt_nflen + opt->opt_flen;
275
1da177e4
LT
276 if (opt->opt_flen)
277 ipv6_push_frag_opts(skb, opt, &proto);
66033f47 278
1da177e4 279 if (opt->opt_nflen)
613fa3ca
DL
280 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
281 &fl6->saddr);
1da177e4
LT
282 }
283
e2d1bca7
ACM
284 skb_push(skb, sizeof(struct ipv6hdr));
285 skb_reset_network_header(skb);
0660e03f 286 hdr = ipv6_hdr(skb);
1da177e4
LT
287
288 /*
289 * Fill in the IPv6 header
290 */
b903d324 291 if (np)
1da177e4
LT
292 hlimit = np->hop_limit;
293 if (hlimit < 0)
6b75d090 294 hlimit = ip6_dst_hoplimit(dst);
1da177e4 295
cb1ce2ef 296 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
513674b5 297 ip6_autoflowlabel(net, np), fl6));
41a1f8ea 298
1da177e4
LT
299 hdr->payload_len = htons(seg_len);
300 hdr->nexthdr = proto;
301 hdr->hop_limit = hlimit;
302
4e3fd7a0
AD
303 hdr->saddr = fl6->saddr;
304 hdr->daddr = *first_hop;
1da177e4 305
9c9c9ad5 306 skb->protocol = htons(ETH_P_IPV6);
4f6570d7 307 skb->priority = priority;
92e55f41 308 skb->mark = mark;
a2c2064f 309
1da177e4 310 mtu = dst_mtu(dst);
60ff7467 311 if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
0c9f227b 312 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
a8e3e1a9
DA
313
314 /* if egress device is enslaved to an L3 master device pass the
315 * skb to its handler for processing
316 */
317 skb = l3mdev_ip6_out((struct sock *)sk, skb);
318 if (unlikely(!skb))
319 return 0;
320
1c1e9d2b
ED
321 /* hooks should never assume socket lock is held.
322 * we promote our socket to non const
323 */
29a26a56 324 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
0c9f227b 325 net, (struct sock *)sk, skb, NULL, dev,
13206b6b 326 dst_output);
1da177e4
LT
327 }
328
0c9f227b 329 skb->dev = dev;
1c1e9d2b
ED
330 /* ipv6_local_error() does not require socket lock,
331 * we promote our socket to non const
332 */
333 ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
334
0c9f227b 335 IP6_INC_STATS(net, idev, IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
336 kfree_skb(skb);
337 return -EMSGSIZE;
338}
7159039a
YH
339EXPORT_SYMBOL(ip6_xmit);
340
1da177e4
LT
341static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
342{
343 struct ip6_ra_chain *ra;
344 struct sock *last = NULL;
345
346 read_lock(&ip6_ra_lock);
347 for (ra = ip6_ra_chain; ra; ra = ra->next) {
348 struct sock *sk = ra->sk;
0bd1b59b
AM
349 if (sk && ra->sel == sel &&
350 (!sk->sk_bound_dev_if ||
351 sk->sk_bound_dev_if == skb->dev->ifindex)) {
9036b2fe
FR
352 struct ipv6_pinfo *np = inet6_sk(sk);
353
354 if (np && np->rtalert_isolate &&
355 !net_eq(sock_net(sk), dev_net(skb->dev))) {
356 continue;
357 }
1da177e4
LT
358 if (last) {
359 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
360 if (skb2)
361 rawv6_rcv(last, skb2);
362 }
363 last = sk;
364 }
365 }
366
367 if (last) {
368 rawv6_rcv(last, skb);
369 read_unlock(&ip6_ra_lock);
370 return 1;
371 }
372 read_unlock(&ip6_ra_lock);
373 return 0;
374}
375
e21e0b5f
VN
376static int ip6_forward_proxy_check(struct sk_buff *skb)
377{
0660e03f 378 struct ipv6hdr *hdr = ipv6_hdr(skb);
e21e0b5f 379 u8 nexthdr = hdr->nexthdr;
75f2811c 380 __be16 frag_off;
e21e0b5f
VN
381 int offset;
382
383 if (ipv6_ext_hdr(nexthdr)) {
75f2811c 384 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
e21e0b5f
VN
385 if (offset < 0)
386 return 0;
387 } else
388 offset = sizeof(struct ipv6hdr);
389
390 if (nexthdr == IPPROTO_ICMPV6) {
391 struct icmp6hdr *icmp6;
392
d56f90a7
ACM
393 if (!pskb_may_pull(skb, (skb_network_header(skb) +
394 offset + 1 - skb->data)))
e21e0b5f
VN
395 return 0;
396
d56f90a7 397 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
e21e0b5f
VN
398
399 switch (icmp6->icmp6_type) {
400 case NDISC_ROUTER_SOLICITATION:
401 case NDISC_ROUTER_ADVERTISEMENT:
402 case NDISC_NEIGHBOUR_SOLICITATION:
403 case NDISC_NEIGHBOUR_ADVERTISEMENT:
404 case NDISC_REDIRECT:
405 /* For reaction involving unicast neighbor discovery
406 * message destined to the proxied address, pass it to
407 * input function.
408 */
409 return 1;
410 default:
411 break;
412 }
413 }
414
74553b09
VN
415 /*
416 * The proxying router can't forward traffic sent to a link-local
417 * address, so signal the sender and discard the packet. This
418 * behavior is clarified by the MIPv6 specification.
419 */
420 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
421 dst_link_failure(skb);
422 return -1;
423 }
424
e21e0b5f
VN
425 return 0;
426}
427
0c4b51f0
EB
428static inline int ip6_forward_finish(struct net *net, struct sock *sk,
429 struct sk_buff *skb)
1da177e4 430{
71a1c915
JB
431 struct dst_entry *dst = skb_dst(skb);
432
433 __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
434 __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
435
f839a6c9
IS
436#ifdef CONFIG_NET_SWITCHDEV
437 if (skb->offload_l3_fwd_mark) {
438 consume_skb(skb);
439 return 0;
440 }
441#endif
442
8203e2d8 443 skb->tstamp = 0;
13206b6b 444 return dst_output(net, sk, skb);
1da177e4
LT
445}
446
fe6cc55f
FW
447static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
448{
418a3156 449 if (skb->len <= mtu)
fe6cc55f
FW
450 return false;
451
60ff7467 452 /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
fe6cc55f
FW
453 if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
454 return true;
455
60ff7467 456 if (skb->ignore_df)
418a3156
FW
457 return false;
458
779b7931 459 if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
fe6cc55f
FW
460 return false;
461
462 return true;
463}
464
1da177e4
LT
465int ip6_forward(struct sk_buff *skb)
466{
adf30907 467 struct dst_entry *dst = skb_dst(skb);
0660e03f 468 struct ipv6hdr *hdr = ipv6_hdr(skb);
1da177e4 469 struct inet6_skb_parm *opt = IP6CB(skb);
c346dca1 470 struct net *net = dev_net(dst->dev);
0857d6f8 471 struct inet6_dev *idev;
14f3ad6f 472 u32 mtu;
1ab1457c 473
0857d6f8 474 idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif));
56e9a295
AR
475 if (unlikely(!idev))
476 idev = __in6_dev_get_safely(skb->dev);
477
53b7997f 478 if (net->ipv6.devconf_all->forwarding == 0)
1da177e4
LT
479 goto error;
480
090f1166
LR
481 if (skb->pkt_type != PACKET_HOST)
482 goto drop;
483
9ef2e965
HFS
484 if (unlikely(skb->sk))
485 goto drop;
486
4497b076
BH
487 if (skb_warn_if_lro(skb))
488 goto drop;
489
ccd27f05 490 if (!net->ipv6.devconf_all->disable_policy &&
2ff1db7e 491 (!idev || !idev->cnf.disable_policy) &&
ccd27f05 492 !xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
bdb7cc64 493 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
1da177e4
LT
494 goto drop;
495 }
496
35fc92a9 497 skb_forward_csum(skb);
1da177e4
LT
498
499 /*
500 * We DO NOT make any processing on
501 * RA packets, pushing them to user level AS IS
502 * without ane WARRANTY that application will be able
503 * to interpret them. The reason is that we
504 * cannot make anything clever here.
505 *
506 * We are not end-node, so that if packet contains
507 * AH/ESP, we cannot make anything.
508 * Defragmentation also would be mistake, RA packets
509 * cannot be fragmented, because there is no warranty
510 * that different fragments will go along one path. --ANK
511 */
ab4eb353
YH
512 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
513 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
1da177e4
LT
514 return 0;
515 }
516
517 /*
518 * check and decrement ttl
519 */
520 if (hdr->hop_limit <= 1) {
3ffe533c 521 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
bdb7cc64 522 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
1da177e4
LT
523
524 kfree_skb(skb);
525 return -ETIMEDOUT;
526 }
527
fbea49e1 528 /* XXX: idev->cnf.proxy_ndp? */
53b7997f 529 if (net->ipv6.devconf_all->proxy_ndp &&
8a3edd80 530 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
74553b09 531 int proxied = ip6_forward_proxy_check(skb);
46c7655f
KP
532 if (proxied > 0) {
533 hdr->hop_limit--;
e21e0b5f 534 return ip6_input(skb);
46c7655f 535 } else if (proxied < 0) {
bdb7cc64 536 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
74553b09
VN
537 goto drop;
538 }
e21e0b5f
VN
539 }
540
1da177e4 541 if (!xfrm6_route_forward(skb)) {
bdb7cc64 542 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
1da177e4
LT
543 goto drop;
544 }
adf30907 545 dst = skb_dst(skb);
1da177e4
LT
546
547 /* IPv6 specs say nothing about it, but it is clear that we cannot
548 send redirects to source routed frames.
1e5dc146 549 We don't send redirects to frames decapsulated from IPsec.
1da177e4 550 */
2f17becf
SS
551 if (IP6CB(skb)->iif == dst->dev->ifindex &&
552 opt->srcrt == 0 && !skb_sec_path(skb)) {
1da177e4 553 struct in6_addr *target = NULL;
fbfe95a4 554 struct inet_peer *peer;
1da177e4 555 struct rt6_info *rt;
1da177e4
LT
556
557 /*
558 * incoming and outgoing devices are the same
559 * send a redirect.
560 */
561
562 rt = (struct rt6_info *) dst;
c45a3dfb
DM
563 if (rt->rt6i_flags & RTF_GATEWAY)
564 target = &rt->rt6i_gateway;
1da177e4
LT
565 else
566 target = &hdr->daddr;
567
fd0273d7 568 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
92d86829 569
1da177e4
LT
570 /* Limit redirects both by destination (here)
571 and by source (inside ndisc_send_redirect)
572 */
fbfe95a4 573 if (inet_peer_xrlim_allow(peer, 1*HZ))
4991969a 574 ndisc_send_redirect(skb, target);
1d861aa4
DM
575 if (peer)
576 inet_putpeer(peer);
5bb1ab09
DS
577 } else {
578 int addrtype = ipv6_addr_type(&hdr->saddr);
579
1da177e4 580 /* This check is security critical. */
f81b2e7d
YH
581 if (addrtype == IPV6_ADDR_ANY ||
582 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
5bb1ab09
DS
583 goto error;
584 if (addrtype & IPV6_ADDR_LINKLOCAL) {
585 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
3ffe533c 586 ICMPV6_NOT_NEIGHBOUR, 0);
5bb1ab09
DS
587 goto error;
588 }
1da177e4
LT
589 }
590
427faee1 591 mtu = ip6_dst_mtu_maybe_forward(dst, true);
14f3ad6f
UW
592 if (mtu < IPV6_MIN_MTU)
593 mtu = IPV6_MIN_MTU;
594
fe6cc55f 595 if (ip6_pkt_too_big(skb, mtu)) {
1da177e4
LT
596 /* Again, force OUTPUT device used as source address */
597 skb->dev = dst->dev;
14f3ad6f 598 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
bdb7cc64 599 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS);
1d015503
ED
600 __IP6_INC_STATS(net, ip6_dst_idev(dst),
601 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
602 kfree_skb(skb);
603 return -EMSGSIZE;
604 }
605
606 if (skb_cow(skb, dst->dev->hard_header_len)) {
1d015503
ED
607 __IP6_INC_STATS(net, ip6_dst_idev(dst),
608 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
609 goto drop;
610 }
611
0660e03f 612 hdr = ipv6_hdr(skb);
1da177e4
LT
613
614 /* Mangling hops number delayed to point after skb COW */
1ab1457c 615
1da177e4
LT
616 hdr->hop_limit--;
617
29a26a56
EB
618 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
619 net, NULL, skb, skb->dev, dst->dev,
6e23ae2a 620 ip6_forward_finish);
1da177e4
LT
621
622error:
bdb7cc64 623 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
1da177e4
LT
624drop:
625 kfree_skb(skb);
626 return -EINVAL;
627}
628
629static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
630{
631 to->pkt_type = from->pkt_type;
632 to->priority = from->priority;
633 to->protocol = from->protocol;
adf30907
ED
634 skb_dst_drop(to);
635 skb_dst_set(to, dst_clone(skb_dst(from)));
1da177e4 636 to->dev = from->dev;
82e91ffe 637 to->mark = from->mark;
1da177e4 638
3dd1c9a1
PA
639 skb_copy_hash(to, from);
640
1da177e4
LT
641#ifdef CONFIG_NET_SCHED
642 to->tc_index = from->tc_index;
643#endif
e7ac05f3 644 nf_copy(to, from);
df5042f4 645 skb_ext_copy(to, from);
984bc16c 646 skb_copy_secmark(to, from);
1da177e4
LT
647}
648
0feca619
PNA
649int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr,
650 u8 nexthdr, __be32 frag_id,
651 struct ip6_fraglist_iter *iter)
652{
653 unsigned int first_len;
654 struct frag_hdr *fh;
655
656 /* BUILD HEADER */
657 *prevhdr = NEXTHDR_FRAGMENT;
658 iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
659 if (!iter->tmp_hdr)
660 return -ENOMEM;
661
b7034146 662 iter->frag = skb_shinfo(skb)->frag_list;
0feca619
PNA
663 skb_frag_list_init(skb);
664
665 iter->offset = 0;
666 iter->hlen = hlen;
667 iter->frag_id = frag_id;
668 iter->nexthdr = nexthdr;
669
670 __skb_pull(skb, hlen);
671 fh = __skb_push(skb, sizeof(struct frag_hdr));
672 __skb_push(skb, hlen);
673 skb_reset_network_header(skb);
674 memcpy(skb_network_header(skb), iter->tmp_hdr, hlen);
675
676 fh->nexthdr = nexthdr;
677 fh->reserved = 0;
678 fh->frag_off = htons(IP6_MF);
679 fh->identification = frag_id;
680
681 first_len = skb_pagelen(skb);
682 skb->data_len = first_len - skb_headlen(skb);
683 skb->len = first_len;
684 ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr));
685
686 return 0;
687}
688EXPORT_SYMBOL(ip6_fraglist_init);
689
690void ip6_fraglist_prepare(struct sk_buff *skb,
691 struct ip6_fraglist_iter *iter)
692{
693 struct sk_buff *frag = iter->frag;
694 unsigned int hlen = iter->hlen;
695 struct frag_hdr *fh;
696
697 frag->ip_summed = CHECKSUM_NONE;
698 skb_reset_transport_header(frag);
699 fh = __skb_push(frag, sizeof(struct frag_hdr));
700 __skb_push(frag, hlen);
701 skb_reset_network_header(frag);
702 memcpy(skb_network_header(frag), iter->tmp_hdr, hlen);
703 iter->offset += skb->len - hlen - sizeof(struct frag_hdr);
704 fh->nexthdr = iter->nexthdr;
705 fh->reserved = 0;
706 fh->frag_off = htons(iter->offset);
707 if (frag->next)
708 fh->frag_off |= htons(IP6_MF);
709 fh->identification = iter->frag_id;
710 ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
711 ip6_copy_metadata(frag, skb);
712}
713EXPORT_SYMBOL(ip6_fraglist_prepare);
714
8a6a1f17
PNA
715void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu,
716 unsigned short needed_tailroom, int hdr_room, u8 *prevhdr,
717 u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state)
718{
719 state->prevhdr = prevhdr;
720 state->nexthdr = nexthdr;
721 state->frag_id = frag_id;
722
723 state->hlen = hlen;
724 state->mtu = mtu;
725
726 state->left = skb->len - hlen; /* Space per frame */
727 state->ptr = hlen; /* Where to start from */
728
729 state->hroom = hdr_room;
730 state->troom = needed_tailroom;
731
732 state->offset = 0;
733}
734EXPORT_SYMBOL(ip6_frag_init);
735
736struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state)
737{
738 u8 *prevhdr = state->prevhdr, *fragnexthdr_offset;
739 struct sk_buff *frag;
740 struct frag_hdr *fh;
741 unsigned int len;
742
743 len = state->left;
744 /* IF: it doesn't fit, use 'mtu' - the data space left */
745 if (len > state->mtu)
746 len = state->mtu;
747 /* IF: we are not sending up to and including the packet end
748 then align the next start on an eight byte boundary */
749 if (len < state->left)
750 len &= ~7;
751
752 /* Allocate buffer */
753 frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) +
754 state->hroom + state->troom, GFP_ATOMIC);
755 if (!frag)
756 return ERR_PTR(-ENOMEM);
757
758 /*
759 * Set up data on packet
760 */
761
762 ip6_copy_metadata(frag, skb);
763 skb_reserve(frag, state->hroom);
764 skb_put(frag, len + state->hlen + sizeof(struct frag_hdr));
765 skb_reset_network_header(frag);
766 fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen);
767 frag->transport_header = (frag->network_header + state->hlen +
768 sizeof(struct frag_hdr));
769
770 /*
771 * Charge the memory for the fragment to any owner
772 * it might possess
773 */
774 if (skb->sk)
775 skb_set_owner_w(frag, skb->sk);
776
777 /*
778 * Copy the packet header into the new buffer.
779 */
780 skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen);
781
782 fragnexthdr_offset = skb_network_header(frag);
783 fragnexthdr_offset += prevhdr - skb_network_header(skb);
784 *fragnexthdr_offset = NEXTHDR_FRAGMENT;
785
786 /*
787 * Build fragment header.
788 */
789 fh->nexthdr = state->nexthdr;
790 fh->reserved = 0;
791 fh->identification = state->frag_id;
792
793 /*
794 * Copy a block of the IP datagram.
795 */
796 BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag),
797 len));
798 state->left -= len;
799
800 fh->frag_off = htons(state->offset);
801 if (state->left > 0)
802 fh->frag_off |= htons(IP6_MF);
803 ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
804
805 state->ptr += len;
806 state->offset += len;
807
808 return frag;
809}
810EXPORT_SYMBOL(ip6_frag_next);
811
7d8c6e39
EB
812int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
813 int (*output)(struct net *, struct sock *, struct sk_buff *))
1da177e4 814{
1da177e4 815 struct sk_buff *frag;
67ba4152 816 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
f60e5990 817 struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
818 inet6_sk(skb->sk) : NULL;
8a6a1f17
PNA
819 struct ip6_frag_state state;
820 unsigned int mtu, hlen, nexthdr_offset;
9669fffc 821 ktime_t tstamp = skb->tstamp;
8a6a1f17 822 int hroom, err = 0;
286c2349 823 __be32 frag_id;
1da177e4
LT
824 u8 *prevhdr, nexthdr = 0;
825
7dd7eb95
DM
826 err = ip6_find_1stfragopt(skb, &prevhdr);
827 if (err < 0)
2423496a 828 goto fail;
7dd7eb95 829 hlen = err;
1da177e4 830 nexthdr = *prevhdr;
ef0efcd3 831 nexthdr_offset = prevhdr - skb_network_header(skb);
1da177e4 832
628a5c56 833 mtu = ip6_skb_dst_mtu(skb);
b881ef76
JH
834
835 /* We must not fragment if the socket is set to force MTU discovery
14f3ad6f 836 * or if the skb it not generated by a local socket.
b881ef76 837 */
485fca66
FW
838 if (unlikely(!skb->ignore_df && skb->len > mtu))
839 goto fail_toobig;
a34a101e 840
485fca66
FW
841 if (IP6CB(skb)->frag_max_size) {
842 if (IP6CB(skb)->frag_max_size > mtu)
843 goto fail_toobig;
844
845 /* don't send fragments larger than what we received */
846 mtu = IP6CB(skb)->frag_max_size;
847 if (mtu < IPV6_MIN_MTU)
848 mtu = IPV6_MIN_MTU;
b881ef76
JH
849 }
850
d91675f9
YH
851 if (np && np->frag_size < mtu) {
852 if (np->frag_size)
853 mtu = np->frag_size;
854 }
89bc7848 855 if (mtu < hlen + sizeof(struct frag_hdr) + 8)
b72a2b01 856 goto fail_toobig;
1e0d69a9 857 mtu -= hlen + sizeof(struct frag_hdr);
1da177e4 858
fd0273d7
MKL
859 frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
860 &ipv6_hdr(skb)->saddr);
286c2349 861
405c92f7
HFS
862 if (skb->ip_summed == CHECKSUM_PARTIAL &&
863 (err = skb_checksum_help(skb)))
864 goto fail;
865
ef0efcd3 866 prevhdr = skb_network_header(skb) + nexthdr_offset;
1d325d21 867 hroom = LL_RESERVED_SPACE(rt->dst.dev);
21dc3301 868 if (skb_has_frag_list(skb)) {
c72d8cda 869 unsigned int first_len = skb_pagelen(skb);
0feca619 870 struct ip6_fraglist_iter iter;
3d13008e 871 struct sk_buff *frag2;
1da177e4
LT
872
873 if (first_len - hlen > mtu ||
874 ((first_len - hlen) & 7) ||
1d325d21
FW
875 skb_cloned(skb) ||
876 skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
1da177e4
LT
877 goto slow_path;
878
4d9092bb 879 skb_walk_frags(skb, frag) {
1da177e4
LT
880 /* Correct geometry. */
881 if (frag->len > mtu ||
882 ((frag->len & 7) && frag->next) ||
1d325d21 883 skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
3d13008e 884 goto slow_path_clean;
1da177e4 885
1da177e4
LT
886 /* Partially cloned skb? */
887 if (skb_shared(frag))
3d13008e 888 goto slow_path_clean;
2fdba6b0
HX
889
890 BUG_ON(frag->sk);
891 if (skb->sk) {
2fdba6b0
HX
892 frag->sk = skb->sk;
893 frag->destructor = sock_wfree;
2fdba6b0 894 }
3d13008e 895 skb->truesize -= frag->truesize;
1da177e4
LT
896 }
897
0feca619
PNA
898 err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id,
899 &iter);
900 if (err < 0)
1d325d21 901 goto fail;
a11d206d 902
1da177e4
LT
903 for (;;) {
904 /* Prepare header of the next frame,
905 * before previous one went down. */
0feca619
PNA
906 if (iter.frag)
907 ip6_fraglist_prepare(skb, &iter);
1ab1457c 908
9669fffc 909 skb->tstamp = tstamp;
7d8c6e39 910 err = output(net, sk, skb);
67ba4152 911 if (!err)
d8d1f30b 912 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 913 IPSTATS_MIB_FRAGCREATES);
dafee490 914
0feca619 915 if (err || !iter.frag)
1da177e4
LT
916 break;
917
0feca619 918 skb = ip6_fraglist_next(&iter);
1da177e4
LT
919 }
920
0feca619 921 kfree(iter.tmp_hdr);
1da177e4
LT
922
923 if (err == 0) {
d8d1f30b 924 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 925 IPSTATS_MIB_FRAGOKS);
1da177e4
LT
926 return 0;
927 }
928
b7034146 929 kfree_skb_list(iter.frag);
1da177e4 930
d8d1f30b 931 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 932 IPSTATS_MIB_FRAGFAILS);
1da177e4 933 return err;
3d13008e
ED
934
935slow_path_clean:
936 skb_walk_frags(skb, frag2) {
937 if (frag2 == frag)
938 break;
939 frag2->sk = NULL;
940 frag2->destructor = NULL;
941 skb->truesize += frag2->truesize;
942 }
1da177e4
LT
943 }
944
945slow_path:
1da177e4
LT
946 /*
947 * Fragment the datagram.
948 */
949
8a6a1f17
PNA
950 ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom,
951 LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id,
952 &state);
1da177e4
LT
953
954 /*
955 * Keep copying data until we run out.
956 */
1da177e4 957
8a6a1f17
PNA
958 while (state.left > 0) {
959 frag = ip6_frag_next(skb, &state);
960 if (IS_ERR(frag)) {
961 err = PTR_ERR(frag);
1da177e4
LT
962 goto fail;
963 }
964
1da177e4
LT
965 /*
966 * Put this fragment into the sending queue.
967 */
9669fffc 968 frag->tstamp = tstamp;
7d8c6e39 969 err = output(net, sk, frag);
1da177e4
LT
970 if (err)
971 goto fail;
dafee490 972
adf30907 973 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 974 IPSTATS_MIB_FRAGCREATES);
1da177e4 975 }
adf30907 976 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 977 IPSTATS_MIB_FRAGOKS);
808db80a 978 consume_skb(skb);
1da177e4
LT
979 return err;
980
485fca66
FW
981fail_toobig:
982 if (skb->sk && dst_allfrag(skb_dst(skb)))
983 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
984
485fca66
FW
985 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
986 err = -EMSGSIZE;
987
1da177e4 988fail:
adf30907 989 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 990 IPSTATS_MIB_FRAGFAILS);
1ab1457c 991 kfree_skb(skb);
1da177e4
LT
992 return err;
993}
994
b71d1d42
ED
995static inline int ip6_rt_check(const struct rt6key *rt_key,
996 const struct in6_addr *fl_addr,
997 const struct in6_addr *addr_cache)
cf6b1982 998{
a02cec21 999 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
63159f29 1000 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
cf6b1982
YH
1001}
1002
497c615a
HX
1003static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
1004 struct dst_entry *dst,
b71d1d42 1005 const struct flowi6 *fl6)
1da177e4 1006{
497c615a 1007 struct ipv6_pinfo *np = inet6_sk(sk);
a963a37d 1008 struct rt6_info *rt;
1da177e4 1009
497c615a
HX
1010 if (!dst)
1011 goto out;
1012
a963a37d
ED
1013 if (dst->ops->family != AF_INET6) {
1014 dst_release(dst);
1015 return NULL;
1016 }
1017
1018 rt = (struct rt6_info *)dst;
497c615a
HX
1019 /* Yes, checking route validity in not connected
1020 * case is not very simple. Take into account,
1021 * that we do not support routing by source, TOS,
67ba4152 1022 * and MSG_DONTROUTE --ANK (980726)
497c615a 1023 *
cf6b1982
YH
1024 * 1. ip6_rt_check(): If route was host route,
1025 * check that cached destination is current.
497c615a
HX
1026 * If it is network route, we still may
1027 * check its validity using saved pointer
1028 * to the last used address: daddr_cache.
1029 * We do not want to save whole address now,
1030 * (because main consumer of this service
1031 * is tcp, which has not this problem),
1032 * so that the last trick works only on connected
1033 * sockets.
1034 * 2. oif also should be the same.
1035 */
4c9483b2 1036 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
8e1ef0a9 1037#ifdef CONFIG_IPV6_SUBTREES
4c9483b2 1038 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
8e1ef0a9 1039#endif
ca254490
DA
1040 (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
1041 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
497c615a
HX
1042 dst_release(dst);
1043 dst = NULL;
1da177e4
LT
1044 }
1045
497c615a
HX
1046out:
1047 return dst;
1048}
1049
3aef934f 1050static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
4c9483b2 1051 struct dst_entry **dst, struct flowi6 *fl6)
497c615a 1052{
69cce1d1
DM
1053#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1054 struct neighbour *n;
97cac082 1055 struct rt6_info *rt;
69cce1d1
DM
1056#endif
1057 int err;
6f21c96a 1058 int flags = 0;
497c615a 1059
e16e888b
MS
1060 /* The correct way to handle this would be to do
1061 * ip6_route_get_saddr, and then ip6_route_output; however,
1062 * the route-specific preferred source forces the
1063 * ip6_route_output call _before_ ip6_route_get_saddr.
1064 *
1065 * In source specific routing (no src=any default route),
1066 * ip6_route_output will fail given src=any saddr, though, so
1067 * that's why we try it again later.
1068 */
c305b9e6 1069 if (ipv6_addr_any(&fl6->saddr)) {
a68886a6 1070 struct fib6_info *from;
e16e888b 1071 struct rt6_info *rt;
1da177e4 1072
c305b9e6 1073 *dst = ip6_route_output(net, sk, fl6);
e16e888b 1074 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
a68886a6
DA
1075
1076 rcu_read_lock();
1077 from = rt ? rcu_dereference(rt->from) : NULL;
1078 err = ip6_route_get_saddr(net, from, &fl6->daddr,
c3968a85
DW
1079 sk ? inet6_sk(sk)->srcprefs : 0,
1080 &fl6->saddr);
a68886a6
DA
1081 rcu_read_unlock();
1082
44456d37 1083 if (err)
1da177e4 1084 goto out_err_release;
e16e888b
MS
1085
1086 /* If we had an erroneous initial result, pretend it
1087 * never existed and let the SA-enabled version take
1088 * over.
1089 */
c305b9e6 1090 if ((*dst)->error) {
e16e888b
MS
1091 dst_release(*dst);
1092 *dst = NULL;
1093 }
6f21c96a
PA
1094
1095 if (fl6->flowi6_oif)
1096 flags |= RT6_LOOKUP_F_IFACE;
1da177e4
LT
1097 }
1098
e16e888b 1099 if (!*dst)
6f21c96a 1100 *dst = ip6_route_output_flags(net, sk, fl6, flags);
e16e888b
MS
1101
1102 err = (*dst)->error;
1103 if (err)
1104 goto out_err_release;
1105
95c385b4 1106#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
e550dfb0
NH
1107 /*
1108 * Here if the dst entry we've looked up
1109 * has a neighbour entry that is in the INCOMPLETE
1110 * state and the src address from the flow is
1111 * marked as OPTIMISTIC, we release the found
1112 * dst entry and replace it instead with the
1113 * dst entry of the nexthop router
1114 */
c56bf6fe 1115 rt = (struct rt6_info *) *dst;
707be1ff 1116 rcu_read_lock_bh();
2647a9b0
MKL
1117 n = __ipv6_neigh_lookup_noref(rt->dst.dev,
1118 rt6_nexthop(rt, &fl6->daddr));
707be1ff
YH
1119 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
1120 rcu_read_unlock_bh();
1121
1122 if (err) {
e550dfb0 1123 struct inet6_ifaddr *ifp;
4c9483b2 1124 struct flowi6 fl_gw6;
e550dfb0
NH
1125 int redirect;
1126
4c9483b2 1127 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
e550dfb0
NH
1128 (*dst)->dev, 1);
1129
1130 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1131 if (ifp)
1132 in6_ifa_put(ifp);
1133
1134 if (redirect) {
1135 /*
1136 * We need to get the dst entry for the
1137 * default router instead
1138 */
1139 dst_release(*dst);
4c9483b2
DM
1140 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1141 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1142 *dst = ip6_route_output(net, sk, &fl_gw6);
e5d08d71
IM
1143 err = (*dst)->error;
1144 if (err)
e550dfb0 1145 goto out_err_release;
95c385b4 1146 }
e550dfb0 1147 }
95c385b4 1148#endif
ec5e3b0a 1149 if (ipv6_addr_v4mapped(&fl6->saddr) &&
00ea1cee
WB
1150 !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
1151 err = -EAFNOSUPPORT;
1152 goto out_err_release;
1153 }
95c385b4 1154
1da177e4
LT
1155 return 0;
1156
1157out_err_release:
1158 dst_release(*dst);
1159 *dst = NULL;
8a966fc0 1160
0d240e78
DA
1161 if (err == -ENETUNREACH)
1162 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
1163 return err;
1164}
34a0b3cd 1165
497c615a
HX
1166/**
1167 * ip6_dst_lookup - perform route lookup on flow
b51cd7c8 1168 * @net: Network namespace to perform lookup in
497c615a
HX
1169 * @sk: socket which provides route info
1170 * @dst: pointer to dst_entry * for result
4c9483b2 1171 * @fl6: flow to lookup
497c615a
HX
1172 *
1173 * This function performs a route lookup on the given flow.
1174 *
1175 * It returns zero on success, or a standard errno code on error.
1176 */
343d60aa
RP
1177int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1178 struct flowi6 *fl6)
497c615a
HX
1179{
1180 *dst = NULL;
343d60aa 1181 return ip6_dst_lookup_tail(net, sk, dst, fl6);
497c615a 1182}
3cf3dc6c
ACM
1183EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1184
497c615a 1185/**
68d0c6d3 1186 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
b51cd7c8 1187 * @net: Network namespace to perform lookup in
68d0c6d3 1188 * @sk: socket which provides route info
4c9483b2 1189 * @fl6: flow to lookup
68d0c6d3 1190 * @final_dst: final destination address for ipsec lookup
68d0c6d3
DM
1191 *
1192 * This function performs a route lookup on the given flow.
1193 *
1194 * It returns a valid dst pointer on success, or a pointer encoded
1195 * error code.
1196 */
c4e85f73 1197struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6,
0e0d44ab 1198 const struct in6_addr *final_dst)
68d0c6d3
DM
1199{
1200 struct dst_entry *dst = NULL;
1201 int err;
1202
c4e85f73 1203 err = ip6_dst_lookup_tail(net, sk, &dst, fl6);
68d0c6d3
DM
1204 if (err)
1205 return ERR_PTR(err);
1206 if (final_dst)
4e3fd7a0 1207 fl6->daddr = *final_dst;
2774c131 1208
c4e85f73 1209 return xfrm_lookup_route(net, dst, flowi6_to_flowi(fl6), sk, 0);
68d0c6d3
DM
1210}
1211EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1212
1213/**
1214 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
497c615a 1215 * @sk: socket which provides the dst cache and route info
4c9483b2 1216 * @fl6: flow to lookup
68d0c6d3 1217 * @final_dst: final destination address for ipsec lookup
96818159 1218 * @connected: whether @sk is connected or not
497c615a
HX
1219 *
1220 * This function performs a route lookup on the given flow with the
1221 * possibility of using the cached route in the socket if it is valid.
1222 * It will take the socket dst lock when operating on the dst cache.
1223 * As a result, this function can only be used in process context.
1224 *
96818159
AK
1225 * In addition, for a connected socket, cache the dst in the socket
1226 * if the current cache is not valid.
1227 *
68d0c6d3
DM
1228 * It returns a valid dst pointer on success, or a pointer encoded
1229 * error code.
497c615a 1230 */
4c9483b2 1231struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
96818159
AK
1232 const struct in6_addr *final_dst,
1233 bool connected)
497c615a 1234{
68d0c6d3 1235 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
497c615a 1236
4c9483b2 1237 dst = ip6_sk_dst_check(sk, dst, fl6);
96818159
AK
1238 if (dst)
1239 return dst;
1240
c4e85f73 1241 dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_dst);
96818159
AK
1242 if (connected && !IS_ERR(dst))
1243 ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6);
68d0c6d3 1244
00bc0ef5 1245 return dst;
497c615a 1246}
68d0c6d3 1247EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
497c615a 1248
571912c6
MV
1249/**
1250 * ip6_dst_lookup_tunnel - perform route lookup on tunnel
1251 * @skb: Packet for which lookup is done
1252 * @dev: Tunnel device
1253 * @net: Network namespace of tunnel device
b51cd7c8 1254 * @sock: Socket which provides route info
571912c6
MV
1255 * @saddr: Memory to store the src ip address
1256 * @info: Tunnel information
1257 * @protocol: IP protocol
b51cd7c8 1258 * @use_cache: Flag to enable cache usage
571912c6
MV
1259 * This function performs a route lookup on a tunnel
1260 *
1261 * It returns a valid dst pointer and stores src address to be used in
1262 * tunnel in param saddr on success, else a pointer encoded error code.
1263 */
1264
1265struct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb,
1266 struct net_device *dev,
1267 struct net *net,
1268 struct socket *sock,
1269 struct in6_addr *saddr,
1270 const struct ip_tunnel_info *info,
1271 u8 protocol,
1272 bool use_cache)
1273{
1274 struct dst_entry *dst = NULL;
1275#ifdef CONFIG_DST_CACHE
1276 struct dst_cache *dst_cache;
1277#endif
1278 struct flowi6 fl6;
1279 __u8 prio;
1280
1281#ifdef CONFIG_DST_CACHE
1282 dst_cache = (struct dst_cache *)&info->dst_cache;
1283 if (use_cache) {
1284 dst = dst_cache_get_ip6(dst_cache, saddr);
1285 if (dst)
1286 return dst;
1287 }
1288#endif
1289 memset(&fl6, 0, sizeof(fl6));
1290 fl6.flowi6_mark = skb->mark;
1291 fl6.flowi6_proto = protocol;
1292 fl6.daddr = info->key.u.ipv6.dst;
1293 fl6.saddr = info->key.u.ipv6.src;
1294 prio = info->key.tos;
1295 fl6.flowlabel = ip6_make_flowinfo(RT_TOS(prio),
1296 info->key.label);
1297
1298 dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6,
1299 NULL);
1300 if (IS_ERR(dst)) {
1301 netdev_dbg(dev, "no route to %pI6\n", &fl6.daddr);
1302 return ERR_PTR(-ENETUNREACH);
1303 }
1304 if (dst->dev == dev) { /* is this necessary? */
1305 netdev_dbg(dev, "circular route to %pI6\n", &fl6.daddr);
1306 dst_release(dst);
1307 return ERR_PTR(-ELOOP);
1308 }
1309#ifdef CONFIG_DST_CACHE
1310 if (use_cache)
1311 dst_cache_set_ip6(dst_cache, dst, &fl6.saddr);
1312#endif
1313 *saddr = fl6.saddr;
1314 return dst;
1315}
1316EXPORT_SYMBOL_GPL(ip6_dst_lookup_tunnel);
1317
0178b695
HX
1318static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1319 gfp_t gfp)
1320{
1321 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1322}
1323
1324static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1325 gfp_t gfp)
1326{
1327 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1328}
1329
75a493e6 1330static void ip6_append_data_mtu(unsigned int *mtu,
0c183379
G
1331 int *maxfraglen,
1332 unsigned int fragheaderlen,
1333 struct sk_buff *skb,
75a493e6 1334 struct rt6_info *rt,
e367c2d0 1335 unsigned int orig_mtu)
0c183379
G
1336{
1337 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
63159f29 1338 if (!skb) {
0c183379 1339 /* first fragment, reserve header_len */
e367c2d0 1340 *mtu = orig_mtu - rt->dst.header_len;
0c183379
G
1341
1342 } else {
1343 /*
1344 * this fragment is not first, the headers
1345 * space is regarded as data space.
1346 */
e367c2d0 1347 *mtu = orig_mtu;
0c183379
G
1348 }
1349 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1350 + fragheaderlen - sizeof(struct frag_hdr);
1351 }
1352}
1353
366e41d9 1354static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
26879da5 1355 struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
5fdaa88d 1356 struct rt6_info *rt, struct flowi6 *fl6)
366e41d9
VY
1357{
1358 struct ipv6_pinfo *np = inet6_sk(sk);
1359 unsigned int mtu;
26879da5 1360 struct ipv6_txoptions *opt = ipc6->opt;
366e41d9
VY
1361
1362 /*
1363 * setup for corking
1364 */
1365 if (opt) {
1366 if (WARN_ON(v6_cork->opt))
1367 return -EINVAL;
1368
864e2a1f 1369 v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
63159f29 1370 if (unlikely(!v6_cork->opt))
366e41d9
VY
1371 return -ENOBUFS;
1372
864e2a1f 1373 v6_cork->opt->tot_len = sizeof(*opt);
366e41d9
VY
1374 v6_cork->opt->opt_flen = opt->opt_flen;
1375 v6_cork->opt->opt_nflen = opt->opt_nflen;
1376
1377 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1378 sk->sk_allocation);
1379 if (opt->dst0opt && !v6_cork->opt->dst0opt)
1380 return -ENOBUFS;
1381
1382 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1383 sk->sk_allocation);
1384 if (opt->dst1opt && !v6_cork->opt->dst1opt)
1385 return -ENOBUFS;
1386
1387 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1388 sk->sk_allocation);
1389 if (opt->hopopt && !v6_cork->opt->hopopt)
1390 return -ENOBUFS;
1391
1392 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1393 sk->sk_allocation);
1394 if (opt->srcrt && !v6_cork->opt->srcrt)
1395 return -ENOBUFS;
1396
1397 /* need source address above miyazawa*/
1398 }
1399 dst_hold(&rt->dst);
1400 cork->base.dst = &rt->dst;
1401 cork->fl.u.ip6 = *fl6;
26879da5
WW
1402 v6_cork->hop_limit = ipc6->hlimit;
1403 v6_cork->tclass = ipc6->tclass;
366e41d9
VY
1404 if (rt->dst.flags & DST_XFRM_TUNNEL)
1405 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
749439bf 1406 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
366e41d9
VY
1407 else
1408 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
c02b3741 1409 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
366e41d9
VY
1410 if (np->frag_size < mtu) {
1411 if (np->frag_size)
1412 mtu = np->frag_size;
1413 }
1414 cork->base.fragsize = mtu;
fbf47813 1415 cork->base.gso_size = ipc6->gso_size;
678ca42d 1416 cork->base.tx_flags = 0;
c6af0c22 1417 cork->base.mark = ipc6->sockc.mark;
678ca42d 1418 sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
bec1f6f6 1419
0f6c480f 1420 if (dst_allfrag(xfrm_dst_path(&rt->dst)))
366e41d9
VY
1421 cork->base.flags |= IPCORK_ALLFRAG;
1422 cork->base.length = 0;
1423
5fdaa88d 1424 cork->base.transmit_time = ipc6->sockc.transmit_time;
a818f75e 1425
366e41d9
VY
1426 return 0;
1427}
1428
0bbe84a6
VY
1429static int __ip6_append_data(struct sock *sk,
1430 struct flowi6 *fl6,
1431 struct sk_buff_head *queue,
1432 struct inet_cork *cork,
1433 struct inet6_cork *v6_cork,
1434 struct page_frag *pfrag,
1435 int getfrag(void *from, char *to, int offset,
1436 int len, int odd, struct sk_buff *skb),
1437 void *from, int length, int transhdrlen,
5fdaa88d 1438 unsigned int flags, struct ipcm6_cookie *ipc6)
1da177e4 1439{
0c183379 1440 struct sk_buff *skb, *skb_prev = NULL;
10b8a3de 1441 unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
b5947e5d 1442 struct ubuf_info *uarg = NULL;
0bbe84a6
VY
1443 int exthdrlen = 0;
1444 int dst_exthdrlen = 0;
1da177e4 1445 int hh_len;
1da177e4
LT
1446 int copy;
1447 int err;
1448 int offset = 0;
09c2d251 1449 u32 tskey = 0;
0bbe84a6
VY
1450 struct rt6_info *rt = (struct rt6_info *)cork->dst;
1451 struct ipv6_txoptions *opt = v6_cork->opt;
32dce968 1452 int csummode = CHECKSUM_NONE;
682b1a9d 1453 unsigned int maxnonfragsize, headersize;
1f4c6eb2 1454 unsigned int wmem_alloc_delta = 0;
100f6d8e 1455 bool paged, extra_uref = false;
1da177e4 1456
0bbe84a6
VY
1457 skb = skb_peek_tail(queue);
1458 if (!skb) {
1459 exthdrlen = opt ? opt->opt_flen : 0;
7efdba5b 1460 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1da177e4 1461 }
0bbe84a6 1462
15e36f5b 1463 paged = !!cork->gso_size;
bec1f6f6 1464 mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
e367c2d0 1465 orig_mtu = mtu;
1da177e4 1466
678ca42d
WB
1467 if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
1468 sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
c43a37f4 1469 tskey = atomic_inc_return(&sk->sk_tskey) - 1;
678ca42d 1470
d8d1f30b 1471 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1da177e4 1472
a1b05140 1473 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
b4ce9277 1474 (opt ? opt->opt_nflen : 0);
1da177e4 1475
682b1a9d
HFS
1476 headersize = sizeof(struct ipv6hdr) +
1477 (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1478 (dst_allfrag(&rt->dst) ?
1479 sizeof(struct frag_hdr) : 0) +
1480 rt->rt6i_nfheader_len;
1481
2fddc6f6
TS
1482 if (mtu <= fragheaderlen ||
1483 ((mtu - fragheaderlen) & ~7) + fragheaderlen <= sizeof(struct frag_hdr))
42c29567
JB
1484 goto emsgsize;
1485
1486 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1487 sizeof(struct frag_hdr);
1488
10b8a3de
PA
1489 /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
1490 * the first fragment
1491 */
1492 if (headersize + transhdrlen > mtu)
1493 goto emsgsize;
1494
26879da5 1495 if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
682b1a9d
HFS
1496 (sk->sk_protocol == IPPROTO_UDP ||
1497 sk->sk_protocol == IPPROTO_RAW)) {
1498 ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1499 sizeof(struct ipv6hdr));
1500 goto emsgsize;
1501 }
4df98e76 1502
682b1a9d
HFS
1503 if (ip6_sk_ignore_df(sk))
1504 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1505 else
1506 maxnonfragsize = mtu;
4df98e76 1507
682b1a9d 1508 if (cork->length + length > maxnonfragsize - headersize) {
4df98e76 1509emsgsize:
10b8a3de
PA
1510 pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
1511 ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
682b1a9d 1512 return -EMSGSIZE;
1da177e4
LT
1513 }
1514
682b1a9d
HFS
1515 /* CHECKSUM_PARTIAL only with no extension headers and when
1516 * we are not going to fragment
1517 */
1518 if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1519 headersize == sizeof(struct ipv6hdr) &&
2b89ed65 1520 length <= mtu - headersize &&
bec1f6f6 1521 (!(flags & MSG_MORE) || cork->gso_size) &&
c8cd0989 1522 rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
682b1a9d
HFS
1523 csummode = CHECKSUM_PARTIAL;
1524
b5947e5d 1525 if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) {
8c793822 1526 uarg = msg_zerocopy_realloc(sk, length, skb_zcopy(skb));
b5947e5d
WB
1527 if (!uarg)
1528 return -ENOBUFS;
522924b5 1529 extra_uref = !skb_zcopy(skb); /* only ref on new uarg */
b5947e5d
WB
1530 if (rt->dst.dev->features & NETIF_F_SG &&
1531 csummode == CHECKSUM_PARTIAL) {
1532 paged = true;
1533 } else {
1534 uarg->zerocopy = 0;
52900d22 1535 skb_zcopy_set(skb, uarg, &extra_uref);
b5947e5d
WB
1536 }
1537 }
1538
1da177e4
LT
1539 /*
1540 * Let's try using as much space as possible.
1541 * Use MTU if total length of the message fits into the MTU.
1542 * Otherwise, we need to reserve fragment header and
1543 * fragment alignment (= 8-15 octects, in total).
1544 *
634a63e7 1545 * Note that we may need to "move" the data from the tail
1ab1457c 1546 * of the buffer to the new fragment when we split
1da177e4
LT
1547 * the message.
1548 *
1ab1457c 1549 * FIXME: It may be fragmented into multiple chunks
1da177e4
LT
1550 * at once if non-fragmentable extension headers
1551 * are too large.
1ab1457c 1552 * --yoshfuji
1da177e4
LT
1553 */
1554
2811ebac 1555 cork->length += length;
2811ebac 1556 if (!skb)
1da177e4
LT
1557 goto alloc_new_skb;
1558
1559 while (length > 0) {
1560 /* Check if the remaining data fits into current packet. */
bdc712b4 1561 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1da177e4
LT
1562 if (copy < length)
1563 copy = maxfraglen - skb->len;
1564
1565 if (copy <= 0) {
1566 char *data;
1567 unsigned int datalen;
1568 unsigned int fraglen;
1569 unsigned int fraggap;
6d123b81 1570 unsigned int alloclen, alloc_extra;
aba36930 1571 unsigned int pagedlen;
1da177e4 1572alloc_new_skb:
1da177e4 1573 /* There's no room in the current skb */
0c183379
G
1574 if (skb)
1575 fraggap = skb->len - maxfraglen;
1da177e4
LT
1576 else
1577 fraggap = 0;
0c183379 1578 /* update mtu and maxfraglen if necessary */
63159f29 1579 if (!skb || !skb_prev)
0c183379 1580 ip6_append_data_mtu(&mtu, &maxfraglen,
75a493e6 1581 fragheaderlen, skb, rt,
e367c2d0 1582 orig_mtu);
0c183379
G
1583
1584 skb_prev = skb;
1da177e4
LT
1585
1586 /*
1587 * If remaining data exceeds the mtu,
1588 * we know we need more fragment(s).
1589 */
1590 datalen = length + fraggap;
1da177e4 1591
0c183379
G
1592 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1593 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
15e36f5b 1594 fraglen = datalen + fragheaderlen;
aba36930 1595 pagedlen = 0;
15e36f5b 1596
6d123b81
JK
1597 alloc_extra = hh_len;
1598 alloc_extra += dst_exthdrlen;
1599 alloc_extra += rt->dst.trailer_len;
1600
1601 /* We just reserve space for fragment header.
1602 * Note: this may be overallocation if the message
1603 * (without MSG_MORE) fits into the MTU.
1604 */
1605 alloc_extra += sizeof(struct frag_hdr);
1606
1da177e4 1607 if ((flags & MSG_MORE) &&
d8d1f30b 1608 !(rt->dst.dev->features&NETIF_F_SG))
1da177e4 1609 alloclen = mtu;
6d123b81
JK
1610 else if (!paged &&
1611 (fraglen + alloc_extra < SKB_MAX_ALLOC ||
1612 !(rt->dst.dev->features & NETIF_F_SG)))
15e36f5b
WB
1613 alloclen = fraglen;
1614 else {
1615 alloclen = min_t(int, fraglen, MAX_HEADER);
1616 pagedlen = fraglen - alloclen;
1617 }
6d123b81 1618 alloclen += alloc_extra;
299b0767 1619
0c183379
G
1620 if (datalen != length + fraggap) {
1621 /*
1622 * this is not the last fragment, the trailer
1623 * space is regarded as data space.
1624 */
1625 datalen += rt->dst.trailer_len;
1626 }
1627
0c183379 1628 fraglen = datalen + fragheaderlen;
1da177e4 1629
15e36f5b 1630 copy = datalen - transhdrlen - fraggap - pagedlen;
232cd35d
ED
1631 if (copy < 0) {
1632 err = -EINVAL;
1633 goto error;
1634 }
1da177e4 1635 if (transhdrlen) {
6d123b81 1636 skb = sock_alloc_send_skb(sk, alloclen,
1da177e4
LT
1637 (flags & MSG_DONTWAIT), &err);
1638 } else {
1639 skb = NULL;
1f4c6eb2 1640 if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
1da177e4 1641 2 * sk->sk_sndbuf)
6d123b81 1642 skb = alloc_skb(alloclen,
1f4c6eb2 1643 sk->sk_allocation);
63159f29 1644 if (unlikely(!skb))
1da177e4
LT
1645 err = -ENOBUFS;
1646 }
63159f29 1647 if (!skb)
1da177e4
LT
1648 goto error;
1649 /*
1650 * Fill in the control structures
1651 */
9c9c9ad5 1652 skb->protocol = htons(ETH_P_IPV6);
32dce968 1653 skb->ip_summed = csummode;
1da177e4 1654 skb->csum = 0;
1f85851e
G
1655 /* reserve for fragmentation and ipsec header */
1656 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1657 dst_exthdrlen);
1da177e4
LT
1658
1659 /*
1660 * Find where to start putting bytes
1661 */
15e36f5b 1662 data = skb_put(skb, fraglen - pagedlen);
1f85851e
G
1663 skb_set_network_header(skb, exthdrlen);
1664 data += fragheaderlen;
b0e380b1
ACM
1665 skb->transport_header = (skb->network_header +
1666 fragheaderlen);
1da177e4
LT
1667 if (fraggap) {
1668 skb->csum = skb_copy_and_csum_bits(
1669 skb_prev, maxfraglen,
8d5930df 1670 data + transhdrlen, fraggap);
1da177e4
LT
1671 skb_prev->csum = csum_sub(skb_prev->csum,
1672 skb->csum);
1673 data += fraggap;
e9fa4f7b 1674 pskb_trim_unique(skb_prev, maxfraglen);
1da177e4 1675 }
232cd35d
ED
1676 if (copy > 0 &&
1677 getfrag(from, data + transhdrlen, offset,
1678 copy, fraggap, skb) < 0) {
1da177e4
LT
1679 err = -EFAULT;
1680 kfree_skb(skb);
1681 goto error;
1682 }
1683
1684 offset += copy;
15e36f5b 1685 length -= copy + transhdrlen;
1da177e4
LT
1686 transhdrlen = 0;
1687 exthdrlen = 0;
299b0767 1688 dst_exthdrlen = 0;
1da177e4 1689
52900d22
WB
1690 /* Only the initial fragment is time stamped */
1691 skb_shinfo(skb)->tx_flags = cork->tx_flags;
1692 cork->tx_flags = 0;
1693 skb_shinfo(skb)->tskey = tskey;
1694 tskey = 0;
1695 skb_zcopy_set(skb, uarg, &extra_uref);
1696
0dec879f
JA
1697 if ((flags & MSG_CONFIRM) && !skb_prev)
1698 skb_set_dst_pending_confirm(skb, 1);
1699
1da177e4
LT
1700 /*
1701 * Put the packet on the pending queue
1702 */
1f4c6eb2
ED
1703 if (!skb->destructor) {
1704 skb->destructor = sock_wfree;
1705 skb->sk = sk;
1706 wmem_alloc_delta += skb->truesize;
1707 }
0bbe84a6 1708 __skb_queue_tail(queue, skb);
1da177e4
LT
1709 continue;
1710 }
1711
1712 if (copy > length)
1713 copy = length;
1714
113f99c3
WB
1715 if (!(rt->dst.dev->features&NETIF_F_SG) &&
1716 skb_tailroom(skb) >= copy) {
1da177e4
LT
1717 unsigned int off;
1718
1719 off = skb->len;
1720 if (getfrag(from, skb_put(skb, copy),
1721 offset, copy, off, skb) < 0) {
1722 __skb_trim(skb, off);
1723 err = -EFAULT;
1724 goto error;
1725 }
b5947e5d 1726 } else if (!uarg || !uarg->zerocopy) {
1da177e4 1727 int i = skb_shinfo(skb)->nr_frags;
1da177e4 1728
5640f768
ED
1729 err = -ENOMEM;
1730 if (!sk_page_frag_refill(sk, pfrag))
1da177e4 1731 goto error;
5640f768
ED
1732
1733 if (!skb_can_coalesce(skb, i, pfrag->page,
1734 pfrag->offset)) {
1735 err = -EMSGSIZE;
1736 if (i == MAX_SKB_FRAGS)
1737 goto error;
1738
1739 __skb_fill_page_desc(skb, i, pfrag->page,
1740 pfrag->offset, 0);
1741 skb_shinfo(skb)->nr_frags = ++i;
1742 get_page(pfrag->page);
1da177e4 1743 }
5640f768 1744 copy = min_t(int, copy, pfrag->size - pfrag->offset);
9e903e08 1745 if (getfrag(from,
5640f768
ED
1746 page_address(pfrag->page) + pfrag->offset,
1747 offset, copy, skb->len, skb) < 0)
1748 goto error_efault;
1749
1750 pfrag->offset += copy;
1751 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1da177e4
LT
1752 skb->len += copy;
1753 skb->data_len += copy;
f945fa7a 1754 skb->truesize += copy;
1f4c6eb2 1755 wmem_alloc_delta += copy;
b5947e5d
WB
1756 } else {
1757 err = skb_zerocopy_iter_dgram(skb, from, copy);
1758 if (err < 0)
1759 goto error;
1da177e4
LT
1760 }
1761 offset += copy;
1762 length -= copy;
1763 }
5640f768 1764
9e8445a5
PA
1765 if (wmem_alloc_delta)
1766 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1da177e4 1767 return 0;
5640f768
ED
1768
1769error_efault:
1770 err = -EFAULT;
1da177e4 1771error:
8e044917 1772 net_zcopy_put_abort(uarg, extra_uref);
bdc712b4 1773 cork->length -= length;
3bd653c8 1774 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1f4c6eb2 1775 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1da177e4
LT
1776 return err;
1777}
0bbe84a6
VY
1778
1779int ip6_append_data(struct sock *sk,
1780 int getfrag(void *from, char *to, int offset, int len,
1781 int odd, struct sk_buff *skb),
26879da5
WW
1782 void *from, int length, int transhdrlen,
1783 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
5fdaa88d 1784 struct rt6_info *rt, unsigned int flags)
0bbe84a6
VY
1785{
1786 struct inet_sock *inet = inet_sk(sk);
1787 struct ipv6_pinfo *np = inet6_sk(sk);
1788 int exthdrlen;
1789 int err;
1790
1791 if (flags&MSG_PROBE)
1792 return 0;
1793 if (skb_queue_empty(&sk->sk_write_queue)) {
1794 /*
1795 * setup for corking
1796 */
26879da5 1797 err = ip6_setup_cork(sk, &inet->cork, &np->cork,
5fdaa88d 1798 ipc6, rt, fl6);
0bbe84a6
VY
1799 if (err)
1800 return err;
1801
26879da5 1802 exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
0bbe84a6
VY
1803 length += exthdrlen;
1804 transhdrlen += exthdrlen;
1805 } else {
1806 fl6 = &inet->cork.fl.u.ip6;
1807 transhdrlen = 0;
1808 }
1809
1810 return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1811 &np->cork, sk_page_frag(sk), getfrag,
5fdaa88d 1812 from, length, transhdrlen, flags, ipc6);
0bbe84a6 1813}
a495f836 1814EXPORT_SYMBOL_GPL(ip6_append_data);
1da177e4 1815
366e41d9
VY
1816static void ip6_cork_release(struct inet_cork_full *cork,
1817 struct inet6_cork *v6_cork)
bf138862 1818{
366e41d9
VY
1819 if (v6_cork->opt) {
1820 kfree(v6_cork->opt->dst0opt);
1821 kfree(v6_cork->opt->dst1opt);
1822 kfree(v6_cork->opt->hopopt);
1823 kfree(v6_cork->opt->srcrt);
1824 kfree(v6_cork->opt);
1825 v6_cork->opt = NULL;
0178b695
HX
1826 }
1827
366e41d9
VY
1828 if (cork->base.dst) {
1829 dst_release(cork->base.dst);
1830 cork->base.dst = NULL;
1831 cork->base.flags &= ~IPCORK_ALLFRAG;
bf138862 1832 }
366e41d9 1833 memset(&cork->fl, 0, sizeof(cork->fl));
bf138862
PE
1834}
1835
6422398c
VY
1836struct sk_buff *__ip6_make_skb(struct sock *sk,
1837 struct sk_buff_head *queue,
1838 struct inet_cork_full *cork,
1839 struct inet6_cork *v6_cork)
1da177e4
LT
1840{
1841 struct sk_buff *skb, *tmp_skb;
1842 struct sk_buff **tail_skb;
1843 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1da177e4 1844 struct ipv6_pinfo *np = inet6_sk(sk);
3bd653c8 1845 struct net *net = sock_net(sk);
1da177e4 1846 struct ipv6hdr *hdr;
6422398c
VY
1847 struct ipv6_txoptions *opt = v6_cork->opt;
1848 struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1849 struct flowi6 *fl6 = &cork->fl.u.ip6;
4c9483b2 1850 unsigned char proto = fl6->flowi6_proto;
1da177e4 1851
6422398c 1852 skb = __skb_dequeue(queue);
63159f29 1853 if (!skb)
1da177e4
LT
1854 goto out;
1855 tail_skb = &(skb_shinfo(skb)->frag_list);
1856
1857 /* move skb->data to ip header from ext header */
d56f90a7 1858 if (skb->data < skb_network_header(skb))
bbe735e4 1859 __skb_pull(skb, skb_network_offset(skb));
6422398c 1860 while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
cfe1fc77 1861 __skb_pull(tmp_skb, skb_network_header_len(skb));
1da177e4
LT
1862 *tail_skb = tmp_skb;
1863 tail_skb = &(tmp_skb->next);
1864 skb->len += tmp_skb->len;
1865 skb->data_len += tmp_skb->len;
1da177e4 1866 skb->truesize += tmp_skb->truesize;
1da177e4
LT
1867 tmp_skb->destructor = NULL;
1868 tmp_skb->sk = NULL;
1da177e4
LT
1869 }
1870
28a89453 1871 /* Allow local fragmentation. */
60ff7467 1872 skb->ignore_df = ip6_sk_ignore_df(sk);
28a89453 1873
4e3fd7a0 1874 *final_dst = fl6->daddr;
cfe1fc77 1875 __skb_pull(skb, skb_network_header_len(skb));
1da177e4
LT
1876 if (opt && opt->opt_flen)
1877 ipv6_push_frag_opts(skb, opt, &proto);
1878 if (opt && opt->opt_nflen)
613fa3ca 1879 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
1da177e4 1880
e2d1bca7
ACM
1881 skb_push(skb, sizeof(struct ipv6hdr));
1882 skb_reset_network_header(skb);
0660e03f 1883 hdr = ipv6_hdr(skb);
1ab1457c 1884
6422398c 1885 ip6_flow_hdr(hdr, v6_cork->tclass,
cb1ce2ef 1886 ip6_make_flowlabel(net, skb, fl6->flowlabel,
513674b5 1887 ip6_autoflowlabel(net, np), fl6));
6422398c 1888 hdr->hop_limit = v6_cork->hop_limit;
1da177e4 1889 hdr->nexthdr = proto;
4e3fd7a0
AD
1890 hdr->saddr = fl6->saddr;
1891 hdr->daddr = *final_dst;
1da177e4 1892
a2c2064f 1893 skb->priority = sk->sk_priority;
c6af0c22 1894 skb->mark = cork->base.mark;
a2c2064f 1895
a818f75e
JSP
1896 skb->tstamp = cork->base.transmit_time;
1897
d8d1f30b 1898 skb_dst_set(skb, dst_clone(&rt->dst));
edf391ff 1899 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
14878f75 1900 if (proto == IPPROTO_ICMPV6) {
adf30907 1901 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
14878f75 1902
43a43b60
HFS
1903 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1904 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
14878f75
DS
1905 }
1906
6422398c
VY
1907 ip6_cork_release(cork, v6_cork);
1908out:
1909 return skb;
1910}
1911
1912int ip6_send_skb(struct sk_buff *skb)
1913{
1914 struct net *net = sock_net(skb->sk);
1915 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1916 int err;
1917
33224b16 1918 err = ip6_local_out(net, skb->sk, skb);
1da177e4
LT
1919 if (err) {
1920 if (err > 0)
6ce9e7b5 1921 err = net_xmit_errno(err);
1da177e4 1922 if (err)
6422398c
VY
1923 IP6_INC_STATS(net, rt->rt6i_idev,
1924 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1925 }
1926
1da177e4 1927 return err;
6422398c
VY
1928}
1929
1930int ip6_push_pending_frames(struct sock *sk)
1931{
1932 struct sk_buff *skb;
1933
1934 skb = ip6_finish_skb(sk);
1935 if (!skb)
1936 return 0;
1937
1938 return ip6_send_skb(skb);
1da177e4 1939}
a495f836 1940EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1da177e4 1941
0bbe84a6 1942static void __ip6_flush_pending_frames(struct sock *sk,
6422398c
VY
1943 struct sk_buff_head *queue,
1944 struct inet_cork_full *cork,
1945 struct inet6_cork *v6_cork)
1da177e4 1946{
1da177e4
LT
1947 struct sk_buff *skb;
1948
0bbe84a6 1949 while ((skb = __skb_dequeue_tail(queue)) != NULL) {
adf30907
ED
1950 if (skb_dst(skb))
1951 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
e1f52208 1952 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1953 kfree_skb(skb);
1954 }
1955
6422398c 1956 ip6_cork_release(cork, v6_cork);
1da177e4 1957}
0bbe84a6
VY
1958
1959void ip6_flush_pending_frames(struct sock *sk)
1960{
6422398c
VY
1961 __ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1962 &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
0bbe84a6 1963}
a495f836 1964EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
6422398c
VY
1965
1966struct sk_buff *ip6_make_skb(struct sock *sk,
1967 int getfrag(void *from, char *to, int offset,
1968 int len, int odd, struct sk_buff *skb),
1969 void *from, int length, int transhdrlen,
26879da5 1970 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
6422398c 1971 struct rt6_info *rt, unsigned int flags,
5fdaa88d 1972 struct inet_cork_full *cork)
6422398c 1973{
6422398c
VY
1974 struct inet6_cork v6_cork;
1975 struct sk_buff_head queue;
26879da5 1976 int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
6422398c
VY
1977 int err;
1978
1979 if (flags & MSG_PROBE)
1980 return NULL;
1981
1982 __skb_queue_head_init(&queue);
1983
1cd7884d
WB
1984 cork->base.flags = 0;
1985 cork->base.addr = 0;
1986 cork->base.opt = NULL;
1987 cork->base.dst = NULL;
6422398c 1988 v6_cork.opt = NULL;
5fdaa88d 1989 err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6);
862c03ee 1990 if (err) {
1cd7884d 1991 ip6_cork_release(cork, &v6_cork);
6422398c 1992 return ERR_PTR(err);
862c03ee 1993 }
26879da5
WW
1994 if (ipc6->dontfrag < 0)
1995 ipc6->dontfrag = inet6_sk(sk)->dontfrag;
6422398c 1996
1cd7884d 1997 err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork,
6422398c
VY
1998 &current->task_frag, getfrag, from,
1999 length + exthdrlen, transhdrlen + exthdrlen,
5fdaa88d 2000 flags, ipc6);
6422398c 2001 if (err) {
1cd7884d 2002 __ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
6422398c
VY
2003 return ERR_PTR(err);
2004 }
2005
1cd7884d 2006 return __ip6_make_skb(sk, &queue, cork, &v6_cork);
6422398c 2007}