]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - net/ipv6/ip6_output.c
ipv6: Check available headroom in ip6_xmit() even without options
[mirror_ubuntu-bionic-kernel.git] / net / ipv6 / ip6_output.c
CommitLineData
1da177e4
LT
1/*
2 * IPv6 output functions
1ab1457c 3 * Linux INET6 implementation
1da177e4
LT
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * Based on linux/net/ipv4/ip_output.c
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 *
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
21 *
22 * H. von Brand : Added missing #include <linux/string.h>
67ba4152 23 * Imran Patel : frag id should be in NBO
1da177e4
LT
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
27 */
28
1da177e4 29#include <linux/errno.h>
ef76bc23 30#include <linux/kernel.h>
1da177e4
LT
31#include <linux/string.h>
32#include <linux/socket.h>
33#include <linux/net.h>
34#include <linux/netdevice.h>
35#include <linux/if_arp.h>
36#include <linux/in6.h>
37#include <linux/tcp.h>
38#include <linux/route.h>
b59f45d0 39#include <linux/module.h>
5a0e3ad6 40#include <linux/slab.h>
1da177e4 41
33b48679 42#include <linux/bpf-cgroup.h>
1da177e4
LT
43#include <linux/netfilter.h>
44#include <linux/netfilter_ipv6.h>
45
46#include <net/sock.h>
47#include <net/snmp.h>
48
49#include <net/ipv6.h>
50#include <net/ndisc.h>
51#include <net/protocol.h>
52#include <net/ip6_route.h>
53#include <net/addrconf.h>
54#include <net/rawv6.h>
55#include <net/icmp.h>
56#include <net/xfrm.h>
57#include <net/checksum.h>
7bc570c8 58#include <linux/mroute6.h>
ca254490 59#include <net/l3mdev.h>
14972cbd 60#include <net/lwtunnel.h>
1da177e4 61
7d8c6e39 62static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 63{
adf30907 64 struct dst_entry *dst = skb_dst(skb);
1da177e4 65 struct net_device *dev = dst->dev;
f6b72b62 66 struct neighbour *neigh;
6fd6ce20
YH
67 struct in6_addr *nexthop;
68 int ret;
1da177e4 69
0660e03f 70 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
adf30907 71 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1da177e4 72
7026b1dd 73 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
78126c41 74 ((mroute6_socket(net, skb) &&
bd91b8bf 75 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
7bc570c8
YH
76 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
77 &ipv6_hdr(skb)->saddr))) {
1da177e4
LT
78 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
79
80 /* Do not check for IFF_ALLMULTI; multicast routing
81 is not supported in any case.
82 */
83 if (newskb)
b2e0b385 84 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
29a26a56 85 net, sk, newskb, NULL, newskb->dev,
95603e22 86 dev_loopback_xmit);
1da177e4 87
0660e03f 88 if (ipv6_hdr(skb)->hop_limit == 0) {
78126c41 89 IP6_INC_STATS(net, idev,
3bd653c8 90 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
91 kfree_skb(skb);
92 return 0;
93 }
94 }
95
78126c41 96 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
dd408515
HFS
97
98 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
99 IPV6_ADDR_SCOPE_NODELOCAL &&
100 !(dev->flags & IFF_LOOPBACK)) {
101 kfree_skb(skb);
102 return 0;
103 }
1da177e4
LT
104 }
105
14972cbd
RP
106 if (lwtunnel_xmit_redirect(dst->lwtstate)) {
107 int res = lwtunnel_xmit(skb);
108
109 if (res < 0 || res == LWTUNNEL_XMIT_DONE)
110 return res;
111 }
112
6fd6ce20 113 rcu_read_lock_bh();
2647a9b0 114 nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
6fd6ce20
YH
115 neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
116 if (unlikely(!neigh))
117 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
118 if (!IS_ERR(neigh)) {
4ff06203 119 sock_confirm_neigh(skb, neigh);
c16ec185 120 ret = neigh_output(neigh, skb);
6fd6ce20
YH
121 rcu_read_unlock_bh();
122 return ret;
123 }
124 rcu_read_unlock_bh();
05e3aa09 125
78126c41 126 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
9e508490
JE
127 kfree_skb(skb);
128 return -EINVAL;
1da177e4
LT
129}
130
0c4b51f0 131static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
9e508490 132{
33b48679
DM
133 int ret;
134
135 ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
136 if (ret) {
137 kfree_skb(skb);
138 return ret;
139 }
140
1c8f6a6e
TB
141#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
142 /* Policy lookup after SNAT yielded a new policy */
143 if (skb_dst(skb)->xfrm) {
144 IPCB(skb)->flags |= IPSKB_REROUTED;
145 return dst_output(net, sk, skb);
146 }
147#endif
148
9e508490 149 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
9037c357
JP
150 dst_allfrag(skb_dst(skb)) ||
151 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
7d8c6e39 152 return ip6_fragment(net, sk, skb, ip6_finish_output2);
9e508490 153 else
7d8c6e39 154 return ip6_finish_output2(net, sk, skb);
9e508490
JE
155}
156
ede2059d 157int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 158{
9e508490 159 struct net_device *dev = skb_dst(skb)->dev;
adf30907 160 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
be10de0a 161
97a7a37a
CF
162 skb->protocol = htons(ETH_P_IPV6);
163 skb->dev = dev;
164
778d80be 165 if (unlikely(idev->cnf.disable_ipv6)) {
19a0644c 166 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
778d80be
YH
167 kfree_skb(skb);
168 return 0;
169 }
170
29a26a56
EB
171 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
172 net, sk, skb, NULL, dev,
9c6eb28a
JE
173 ip6_finish_output,
174 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
1da177e4
LT
175}
176
e9191ffb 177bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
513674b5
SL
178{
179 if (!np->autoflowlabel_set)
180 return ip6_default_np_autolabel(net);
181 else
182 return np->autoflowlabel;
183}
184
1da177e4 185/*
1c1e9d2b
ED
186 * xmit an sk_buff (used by TCP, SCTP and DCCP)
187 * Note : socket lock is not held for SYNACK packets, but might be modified
188 * by calls to skb_set_owner_w() and ipv6_local_error(),
189 * which are using proper atomic operations or spinlocks.
1da177e4 190 */
1c1e9d2b 191int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
92e55f41 192 __u32 mark, struct ipv6_txoptions *opt, int tclass)
1da177e4 193{
3bd653c8 194 struct net *net = sock_net(sk);
1c1e9d2b 195 const struct ipv6_pinfo *np = inet6_sk(sk);
4c9483b2 196 struct in6_addr *first_hop = &fl6->daddr;
adf30907 197 struct dst_entry *dst = skb_dst(skb);
114067c3 198 unsigned int head_room;
1da177e4 199 struct ipv6hdr *hdr;
4c9483b2 200 u8 proto = fl6->flowi6_proto;
1da177e4 201 int seg_len = skb->len;
e651f03a 202 int hlimit = -1;
1da177e4
LT
203 u32 mtu;
204
114067c3
SB
205 head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
206 if (opt)
207 head_room += opt->opt_nflen + opt->opt_flen;
208
209 if (unlikely(skb_headroom(skb) < head_room)) {
210 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
211 if (!skb2) {
212 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
213 IPSTATS_MIB_OUTDISCARDS);
214 kfree_skb(skb);
215 return -ENOBUFS;
1da177e4 216 }
114067c3
SB
217 if (skb->sk)
218 skb_set_owner_w(skb2, skb->sk);
219 consume_skb(skb);
220 skb = skb2;
221 }
222
223 if (opt) {
224 seg_len += opt->opt_nflen + opt->opt_flen;
225
1da177e4
LT
226 if (opt->opt_flen)
227 ipv6_push_frag_opts(skb, opt, &proto);
114067c3 228
1da177e4 229 if (opt->opt_nflen)
613fa3ca
DL
230 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
231 &fl6->saddr);
1da177e4
LT
232 }
233
e2d1bca7
ACM
234 skb_push(skb, sizeof(struct ipv6hdr));
235 skb_reset_network_header(skb);
0660e03f 236 hdr = ipv6_hdr(skb);
1da177e4
LT
237
238 /*
239 * Fill in the IPv6 header
240 */
b903d324 241 if (np)
1da177e4
LT
242 hlimit = np->hop_limit;
243 if (hlimit < 0)
6b75d090 244 hlimit = ip6_dst_hoplimit(dst);
1da177e4 245
cb1ce2ef 246 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
513674b5 247 ip6_autoflowlabel(net, np), fl6));
41a1f8ea 248
1da177e4
LT
249 hdr->payload_len = htons(seg_len);
250 hdr->nexthdr = proto;
251 hdr->hop_limit = hlimit;
252
4e3fd7a0
AD
253 hdr->saddr = fl6->saddr;
254 hdr->daddr = *first_hop;
1da177e4 255
9c9c9ad5 256 skb->protocol = htons(ETH_P_IPV6);
a2c2064f 257 skb->priority = sk->sk_priority;
92e55f41 258 skb->mark = mark;
a2c2064f 259
1da177e4 260 mtu = dst_mtu(dst);
60ff7467 261 if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
adf30907 262 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
edf391ff 263 IPSTATS_MIB_OUT, skb->len);
a8e3e1a9
DA
264
265 /* if egress device is enslaved to an L3 master device pass the
266 * skb to its handler for processing
267 */
268 skb = l3mdev_ip6_out((struct sock *)sk, skb);
269 if (unlikely(!skb))
270 return 0;
271
1c1e9d2b
ED
272 /* hooks should never assume socket lock is held.
273 * we promote our socket to non const
274 */
29a26a56 275 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
1c1e9d2b 276 net, (struct sock *)sk, skb, NULL, dst->dev,
13206b6b 277 dst_output);
1da177e4
LT
278 }
279
1da177e4 280 skb->dev = dst->dev;
1c1e9d2b
ED
281 /* ipv6_local_error() does not require socket lock,
282 * we promote our socket to non const
283 */
284 ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
285
adf30907 286 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
287 kfree_skb(skb);
288 return -EMSGSIZE;
289}
7159039a
YH
290EXPORT_SYMBOL(ip6_xmit);
291
1da177e4
LT
292static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
293{
294 struct ip6_ra_chain *ra;
295 struct sock *last = NULL;
296
297 read_lock(&ip6_ra_lock);
298 for (ra = ip6_ra_chain; ra; ra = ra->next) {
299 struct sock *sk = ra->sk;
0bd1b59b
AM
300 if (sk && ra->sel == sel &&
301 (!sk->sk_bound_dev_if ||
302 sk->sk_bound_dev_if == skb->dev->ifindex)) {
1da177e4
LT
303 if (last) {
304 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
305 if (skb2)
306 rawv6_rcv(last, skb2);
307 }
308 last = sk;
309 }
310 }
311
312 if (last) {
313 rawv6_rcv(last, skb);
314 read_unlock(&ip6_ra_lock);
315 return 1;
316 }
317 read_unlock(&ip6_ra_lock);
318 return 0;
319}
320
e21e0b5f
VN
321static int ip6_forward_proxy_check(struct sk_buff *skb)
322{
0660e03f 323 struct ipv6hdr *hdr = ipv6_hdr(skb);
e21e0b5f 324 u8 nexthdr = hdr->nexthdr;
75f2811c 325 __be16 frag_off;
e21e0b5f
VN
326 int offset;
327
328 if (ipv6_ext_hdr(nexthdr)) {
75f2811c 329 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
e21e0b5f
VN
330 if (offset < 0)
331 return 0;
332 } else
333 offset = sizeof(struct ipv6hdr);
334
335 if (nexthdr == IPPROTO_ICMPV6) {
336 struct icmp6hdr *icmp6;
337
d56f90a7
ACM
338 if (!pskb_may_pull(skb, (skb_network_header(skb) +
339 offset + 1 - skb->data)))
e21e0b5f
VN
340 return 0;
341
d56f90a7 342 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
e21e0b5f
VN
343
344 switch (icmp6->icmp6_type) {
345 case NDISC_ROUTER_SOLICITATION:
346 case NDISC_ROUTER_ADVERTISEMENT:
347 case NDISC_NEIGHBOUR_SOLICITATION:
348 case NDISC_NEIGHBOUR_ADVERTISEMENT:
349 case NDISC_REDIRECT:
350 /* For reaction involving unicast neighbor discovery
351 * message destined to the proxied address, pass it to
352 * input function.
353 */
354 return 1;
355 default:
356 break;
357 }
358 }
359
74553b09
VN
360 /*
361 * The proxying router can't forward traffic sent to a link-local
362 * address, so signal the sender and discard the packet. This
363 * behavior is clarified by the MIPv6 specification.
364 */
365 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
366 dst_link_failure(skb);
367 return -1;
368 }
369
e21e0b5f
VN
370 return 0;
371}
372
0c4b51f0
EB
373static inline int ip6_forward_finish(struct net *net, struct sock *sk,
374 struct sk_buff *skb)
1da177e4 375{
b9d90d19
JB
376 struct dst_entry *dst = skb_dst(skb);
377
378 __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
379 __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
380
13206b6b 381 return dst_output(net, sk, skb);
1da177e4
LT
382}
383
0954cf9c
HFS
384static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
385{
386 unsigned int mtu;
387 struct inet6_dev *idev;
388
389 if (dst_metric_locked(dst, RTAX_MTU)) {
390 mtu = dst_metric_raw(dst, RTAX_MTU);
391 if (mtu)
392 return mtu;
393 }
394
395 mtu = IPV6_MIN_MTU;
396 rcu_read_lock();
397 idev = __in6_dev_get(dst->dev);
398 if (idev)
399 mtu = idev->cnf.mtu6;
400 rcu_read_unlock();
401
402 return mtu;
403}
404
fe6cc55f
FW
405static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
406{
418a3156 407 if (skb->len <= mtu)
fe6cc55f
FW
408 return false;
409
60ff7467 410 /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
fe6cc55f
FW
411 if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
412 return true;
413
60ff7467 414 if (skb->ignore_df)
418a3156
FW
415 return false;
416
ae7ef81e 417 if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
fe6cc55f
FW
418 return false;
419
420 return true;
421}
422
1da177e4
LT
423int ip6_forward(struct sk_buff *skb)
424{
adf30907 425 struct dst_entry *dst = skb_dst(skb);
0660e03f 426 struct ipv6hdr *hdr = ipv6_hdr(skb);
1da177e4 427 struct inet6_skb_parm *opt = IP6CB(skb);
c346dca1 428 struct net *net = dev_net(dst->dev);
14f3ad6f 429 u32 mtu;
1ab1457c 430
53b7997f 431 if (net->ipv6.devconf_all->forwarding == 0)
1da177e4
LT
432 goto error;
433
090f1166
LR
434 if (skb->pkt_type != PACKET_HOST)
435 goto drop;
436
9ef2e965
HFS
437 if (unlikely(skb->sk))
438 goto drop;
439
4497b076
BH
440 if (skb_warn_if_lro(skb))
441 goto drop;
442
1da177e4 443 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
1d015503
ED
444 __IP6_INC_STATS(net, ip6_dst_idev(dst),
445 IPSTATS_MIB_INDISCARDS);
1da177e4
LT
446 goto drop;
447 }
448
35fc92a9 449 skb_forward_csum(skb);
1da177e4
LT
450
451 /*
452 * We DO NOT make any processing on
453 * RA packets, pushing them to user level AS IS
454 * without ane WARRANTY that application will be able
455 * to interpret them. The reason is that we
456 * cannot make anything clever here.
457 *
458 * We are not end-node, so that if packet contains
459 * AH/ESP, we cannot make anything.
460 * Defragmentation also would be mistake, RA packets
461 * cannot be fragmented, because there is no warranty
462 * that different fragments will go along one path. --ANK
463 */
ab4eb353
YH
464 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
465 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
1da177e4
LT
466 return 0;
467 }
468
469 /*
470 * check and decrement ttl
471 */
472 if (hdr->hop_limit <= 1) {
473 /* Force OUTPUT device used as source address */
474 skb->dev = dst->dev;
3ffe533c 475 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
1d015503
ED
476 __IP6_INC_STATS(net, ip6_dst_idev(dst),
477 IPSTATS_MIB_INHDRERRORS);
1da177e4
LT
478
479 kfree_skb(skb);
480 return -ETIMEDOUT;
481 }
482
fbea49e1 483 /* XXX: idev->cnf.proxy_ndp? */
53b7997f 484 if (net->ipv6.devconf_all->proxy_ndp &&
8a3edd80 485 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
74553b09
VN
486 int proxied = ip6_forward_proxy_check(skb);
487 if (proxied > 0)
e21e0b5f 488 return ip6_input(skb);
74553b09 489 else if (proxied < 0) {
1d015503
ED
490 __IP6_INC_STATS(net, ip6_dst_idev(dst),
491 IPSTATS_MIB_INDISCARDS);
74553b09
VN
492 goto drop;
493 }
e21e0b5f
VN
494 }
495
1da177e4 496 if (!xfrm6_route_forward(skb)) {
1d015503
ED
497 __IP6_INC_STATS(net, ip6_dst_idev(dst),
498 IPSTATS_MIB_INDISCARDS);
1da177e4
LT
499 goto drop;
500 }
adf30907 501 dst = skb_dst(skb);
1da177e4
LT
502
503 /* IPv6 specs say nothing about it, but it is clear that we cannot
504 send redirects to source routed frames.
1e5dc146 505 We don't send redirects to frames decapsulated from IPsec.
1da177e4 506 */
764e34bc
SS
507 if (IP6CB(skb)->iif == dst->dev->ifindex &&
508 opt->srcrt == 0 && !skb_sec_path(skb)) {
1da177e4 509 struct in6_addr *target = NULL;
fbfe95a4 510 struct inet_peer *peer;
1da177e4 511 struct rt6_info *rt;
1da177e4
LT
512
513 /*
514 * incoming and outgoing devices are the same
515 * send a redirect.
516 */
517
518 rt = (struct rt6_info *) dst;
c45a3dfb
DM
519 if (rt->rt6i_flags & RTF_GATEWAY)
520 target = &rt->rt6i_gateway;
1da177e4
LT
521 else
522 target = &hdr->daddr;
523
fd0273d7 524 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
92d86829 525
1da177e4
LT
526 /* Limit redirects both by destination (here)
527 and by source (inside ndisc_send_redirect)
528 */
fbfe95a4 529 if (inet_peer_xrlim_allow(peer, 1*HZ))
4991969a 530 ndisc_send_redirect(skb, target);
1d861aa4
DM
531 if (peer)
532 inet_putpeer(peer);
5bb1ab09
DS
533 } else {
534 int addrtype = ipv6_addr_type(&hdr->saddr);
535
1da177e4 536 /* This check is security critical. */
f81b2e7d
YH
537 if (addrtype == IPV6_ADDR_ANY ||
538 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
5bb1ab09
DS
539 goto error;
540 if (addrtype & IPV6_ADDR_LINKLOCAL) {
541 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
3ffe533c 542 ICMPV6_NOT_NEIGHBOUR, 0);
5bb1ab09
DS
543 goto error;
544 }
1da177e4
LT
545 }
546
0954cf9c 547 mtu = ip6_dst_mtu_forward(dst);
14f3ad6f
UW
548 if (mtu < IPV6_MIN_MTU)
549 mtu = IPV6_MIN_MTU;
550
fe6cc55f 551 if (ip6_pkt_too_big(skb, mtu)) {
1da177e4
LT
552 /* Again, force OUTPUT device used as source address */
553 skb->dev = dst->dev;
14f3ad6f 554 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1d015503
ED
555 __IP6_INC_STATS(net, ip6_dst_idev(dst),
556 IPSTATS_MIB_INTOOBIGERRORS);
557 __IP6_INC_STATS(net, ip6_dst_idev(dst),
558 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
559 kfree_skb(skb);
560 return -EMSGSIZE;
561 }
562
563 if (skb_cow(skb, dst->dev->hard_header_len)) {
1d015503
ED
564 __IP6_INC_STATS(net, ip6_dst_idev(dst),
565 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
566 goto drop;
567 }
568
0660e03f 569 hdr = ipv6_hdr(skb);
1da177e4
LT
570
571 /* Mangling hops number delayed to point after skb COW */
1ab1457c 572
1da177e4
LT
573 hdr->hop_limit--;
574
29a26a56
EB
575 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
576 net, NULL, skb, skb->dev, dst->dev,
6e23ae2a 577 ip6_forward_finish);
1da177e4
LT
578
579error:
1d015503 580 __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
1da177e4
LT
581drop:
582 kfree_skb(skb);
583 return -EINVAL;
584}
585
586static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
587{
588 to->pkt_type = from->pkt_type;
589 to->priority = from->priority;
590 to->protocol = from->protocol;
adf30907
ED
591 skb_dst_drop(to);
592 skb_dst_set(to, dst_clone(skb_dst(from)));
1da177e4 593 to->dev = from->dev;
82e91ffe 594 to->mark = from->mark;
1da177e4 595
558f6115
PA
596 skb_copy_hash(to, from);
597
1da177e4
LT
598#ifdef CONFIG_NET_SCHED
599 to->tc_index = from->tc_index;
600#endif
e7ac05f3 601 nf_copy(to, from);
984bc16c 602 skb_copy_secmark(to, from);
1da177e4
LT
603}
604
7d8c6e39
EB
605int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
606 int (*output)(struct net *, struct sock *, struct sk_buff *))
1da177e4 607{
1da177e4 608 struct sk_buff *frag;
67ba4152 609 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
f60e5990 610 struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
611 inet6_sk(skb->sk) : NULL;
1da177e4
LT
612 struct ipv6hdr *tmp_hdr;
613 struct frag_hdr *fh;
614 unsigned int mtu, hlen, left, len;
a7ae1992 615 int hroom, troom;
286c2349 616 __be32 frag_id;
67ba4152 617 int ptr, offset = 0, err = 0;
1da177e4
LT
618 u8 *prevhdr, nexthdr = 0;
619
7dd7eb95
DM
620 err = ip6_find_1stfragopt(skb, &prevhdr);
621 if (err < 0)
2423496a 622 goto fail;
7dd7eb95 623 hlen = err;
1da177e4
LT
624 nexthdr = *prevhdr;
625
628a5c56 626 mtu = ip6_skb_dst_mtu(skb);
b881ef76
JH
627
628 /* We must not fragment if the socket is set to force MTU discovery
14f3ad6f 629 * or if the skb it not generated by a local socket.
b881ef76 630 */
485fca66
FW
631 if (unlikely(!skb->ignore_df && skb->len > mtu))
632 goto fail_toobig;
a34a101e 633
485fca66
FW
634 if (IP6CB(skb)->frag_max_size) {
635 if (IP6CB(skb)->frag_max_size > mtu)
636 goto fail_toobig;
637
638 /* don't send fragments larger than what we received */
639 mtu = IP6CB(skb)->frag_max_size;
640 if (mtu < IPV6_MIN_MTU)
641 mtu = IPV6_MIN_MTU;
b881ef76
JH
642 }
643
d91675f9
YH
644 if (np && np->frag_size < mtu) {
645 if (np->frag_size)
646 mtu = np->frag_size;
647 }
89bc7848 648 if (mtu < hlen + sizeof(struct frag_hdr) + 8)
b72a2b01 649 goto fail_toobig;
1e0d69a9 650 mtu -= hlen + sizeof(struct frag_hdr);
1da177e4 651
fd0273d7
MKL
652 frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
653 &ipv6_hdr(skb)->saddr);
286c2349 654
405c92f7
HFS
655 if (skb->ip_summed == CHECKSUM_PARTIAL &&
656 (err = skb_checksum_help(skb)))
657 goto fail;
658
1d325d21 659 hroom = LL_RESERVED_SPACE(rt->dst.dev);
21dc3301 660 if (skb_has_frag_list(skb)) {
c72d8cda 661 unsigned int first_len = skb_pagelen(skb);
3d13008e 662 struct sk_buff *frag2;
1da177e4
LT
663
664 if (first_len - hlen > mtu ||
665 ((first_len - hlen) & 7) ||
1d325d21
FW
666 skb_cloned(skb) ||
667 skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
1da177e4
LT
668 goto slow_path;
669
4d9092bb 670 skb_walk_frags(skb, frag) {
1da177e4
LT
671 /* Correct geometry. */
672 if (frag->len > mtu ||
673 ((frag->len & 7) && frag->next) ||
1d325d21 674 skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
3d13008e 675 goto slow_path_clean;
1da177e4 676
1da177e4
LT
677 /* Partially cloned skb? */
678 if (skb_shared(frag))
3d13008e 679 goto slow_path_clean;
2fdba6b0
HX
680
681 BUG_ON(frag->sk);
682 if (skb->sk) {
2fdba6b0
HX
683 frag->sk = skb->sk;
684 frag->destructor = sock_wfree;
2fdba6b0 685 }
3d13008e 686 skb->truesize -= frag->truesize;
1da177e4
LT
687 }
688
689 err = 0;
690 offset = 0;
1da177e4
LT
691 /* BUILD HEADER */
692
9a217a1c 693 *prevhdr = NEXTHDR_FRAGMENT;
d56f90a7 694 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
1da177e4 695 if (!tmp_hdr) {
1d325d21
FW
696 err = -ENOMEM;
697 goto fail;
1da177e4 698 }
1d325d21
FW
699 frag = skb_shinfo(skb)->frag_list;
700 skb_frag_list_init(skb);
1da177e4 701
1da177e4 702 __skb_pull(skb, hlen);
d58ff351 703 fh = __skb_push(skb, sizeof(struct frag_hdr));
e2d1bca7
ACM
704 __skb_push(skb, hlen);
705 skb_reset_network_header(skb);
d56f90a7 706 memcpy(skb_network_header(skb), tmp_hdr, hlen);
1da177e4 707
1da177e4
LT
708 fh->nexthdr = nexthdr;
709 fh->reserved = 0;
710 fh->frag_off = htons(IP6_MF);
286c2349 711 fh->identification = frag_id;
1da177e4
LT
712
713 first_len = skb_pagelen(skb);
714 skb->data_len = first_len - skb_headlen(skb);
715 skb->len = first_len;
0660e03f
ACM
716 ipv6_hdr(skb)->payload_len = htons(first_len -
717 sizeof(struct ipv6hdr));
a11d206d 718
1da177e4
LT
719 for (;;) {
720 /* Prepare header of the next frame,
721 * before previous one went down. */
722 if (frag) {
723 frag->ip_summed = CHECKSUM_NONE;
badff6d0 724 skb_reset_transport_header(frag);
d58ff351 725 fh = __skb_push(frag, sizeof(struct frag_hdr));
e2d1bca7
ACM
726 __skb_push(frag, hlen);
727 skb_reset_network_header(frag);
d56f90a7
ACM
728 memcpy(skb_network_header(frag), tmp_hdr,
729 hlen);
1da177e4
LT
730 offset += skb->len - hlen - sizeof(struct frag_hdr);
731 fh->nexthdr = nexthdr;
732 fh->reserved = 0;
733 fh->frag_off = htons(offset);
53b24b8f 734 if (frag->next)
1da177e4
LT
735 fh->frag_off |= htons(IP6_MF);
736 fh->identification = frag_id;
0660e03f
ACM
737 ipv6_hdr(frag)->payload_len =
738 htons(frag->len -
739 sizeof(struct ipv6hdr));
1da177e4
LT
740 ip6_copy_metadata(frag, skb);
741 }
1ab1457c 742
7d8c6e39 743 err = output(net, sk, skb);
67ba4152 744 if (!err)
d8d1f30b 745 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 746 IPSTATS_MIB_FRAGCREATES);
dafee490 747
1da177e4
LT
748 if (err || !frag)
749 break;
750
751 skb = frag;
752 frag = skb->next;
753 skb->next = NULL;
754 }
755
a51482bd 756 kfree(tmp_hdr);
1da177e4
LT
757
758 if (err == 0) {
d8d1f30b 759 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 760 IPSTATS_MIB_FRAGOKS);
1da177e4
LT
761 return 0;
762 }
763
46cfd725 764 kfree_skb_list(frag);
1da177e4 765
d8d1f30b 766 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 767 IPSTATS_MIB_FRAGFAILS);
1da177e4 768 return err;
3d13008e
ED
769
770slow_path_clean:
771 skb_walk_frags(skb, frag2) {
772 if (frag2 == frag)
773 break;
774 frag2->sk = NULL;
775 frag2->destructor = NULL;
776 skb->truesize += frag2->truesize;
777 }
1da177e4
LT
778 }
779
780slow_path:
781 left = skb->len - hlen; /* Space per frame */
782 ptr = hlen; /* Where to start from */
783
784 /*
785 * Fragment the datagram.
786 */
787
a7ae1992 788 troom = rt->dst.dev->needed_tailroom;
1da177e4
LT
789
790 /*
791 * Keep copying data until we run out.
792 */
67ba4152 793 while (left > 0) {
79e49503
FW
794 u8 *fragnexthdr_offset;
795
1da177e4
LT
796 len = left;
797 /* IF: it doesn't fit, use 'mtu' - the data space left */
798 if (len > mtu)
799 len = mtu;
25985edc 800 /* IF: we are not sending up to and including the packet end
1da177e4
LT
801 then align the next start on an eight byte boundary */
802 if (len < left) {
803 len &= ~7;
804 }
1da177e4 805
cbffccc9
JP
806 /* Allocate buffer */
807 frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
808 hroom + troom, GFP_ATOMIC);
809 if (!frag) {
1da177e4
LT
810 err = -ENOMEM;
811 goto fail;
812 }
813
814 /*
815 * Set up data on packet
816 */
817
818 ip6_copy_metadata(frag, skb);
a7ae1992 819 skb_reserve(frag, hroom);
1da177e4 820 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
c1d2bbe1 821 skb_reset_network_header(frag);
badff6d0 822 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
b0e380b1
ACM
823 frag->transport_header = (frag->network_header + hlen +
824 sizeof(struct frag_hdr));
1da177e4
LT
825
826 /*
827 * Charge the memory for the fragment to any owner
828 * it might possess
829 */
830 if (skb->sk)
831 skb_set_owner_w(frag, skb->sk);
832
833 /*
834 * Copy the packet header into the new buffer.
835 */
d626f62b 836 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
1da177e4 837
79e49503
FW
838 fragnexthdr_offset = skb_network_header(frag);
839 fragnexthdr_offset += prevhdr - skb_network_header(skb);
840 *fragnexthdr_offset = NEXTHDR_FRAGMENT;
841
1da177e4
LT
842 /*
843 * Build fragment header.
844 */
845 fh->nexthdr = nexthdr;
846 fh->reserved = 0;
286c2349 847 fh->identification = frag_id;
1da177e4
LT
848
849 /*
850 * Copy a block of the IP datagram.
851 */
e3f0b86b
HS
852 BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag),
853 len));
1da177e4
LT
854 left -= len;
855
856 fh->frag_off = htons(offset);
857 if (left > 0)
858 fh->frag_off |= htons(IP6_MF);
0660e03f
ACM
859 ipv6_hdr(frag)->payload_len = htons(frag->len -
860 sizeof(struct ipv6hdr));
1da177e4
LT
861
862 ptr += len;
863 offset += len;
864
865 /*
866 * Put this fragment into the sending queue.
867 */
7d8c6e39 868 err = output(net, sk, frag);
1da177e4
LT
869 if (err)
870 goto fail;
dafee490 871
adf30907 872 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 873 IPSTATS_MIB_FRAGCREATES);
1da177e4 874 }
adf30907 875 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 876 IPSTATS_MIB_FRAGOKS);
808db80a 877 consume_skb(skb);
1da177e4
LT
878 return err;
879
485fca66
FW
880fail_toobig:
881 if (skb->sk && dst_allfrag(skb_dst(skb)))
882 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
883
485fca66
FW
884 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
885 err = -EMSGSIZE;
886
1da177e4 887fail:
adf30907 888 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 889 IPSTATS_MIB_FRAGFAILS);
1ab1457c 890 kfree_skb(skb);
1da177e4
LT
891 return err;
892}
893
b71d1d42
ED
894static inline int ip6_rt_check(const struct rt6key *rt_key,
895 const struct in6_addr *fl_addr,
896 const struct in6_addr *addr_cache)
cf6b1982 897{
a02cec21 898 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
63159f29 899 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
cf6b1982
YH
900}
901
497c615a
HX
902static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
903 struct dst_entry *dst,
b71d1d42 904 const struct flowi6 *fl6)
1da177e4 905{
497c615a 906 struct ipv6_pinfo *np = inet6_sk(sk);
a963a37d 907 struct rt6_info *rt;
1da177e4 908
497c615a
HX
909 if (!dst)
910 goto out;
911
a963a37d
ED
912 if (dst->ops->family != AF_INET6) {
913 dst_release(dst);
914 return NULL;
915 }
916
917 rt = (struct rt6_info *)dst;
497c615a
HX
918 /* Yes, checking route validity in not connected
919 * case is not very simple. Take into account,
920 * that we do not support routing by source, TOS,
67ba4152 921 * and MSG_DONTROUTE --ANK (980726)
497c615a 922 *
cf6b1982
YH
923 * 1. ip6_rt_check(): If route was host route,
924 * check that cached destination is current.
497c615a
HX
925 * If it is network route, we still may
926 * check its validity using saved pointer
927 * to the last used address: daddr_cache.
928 * We do not want to save whole address now,
929 * (because main consumer of this service
930 * is tcp, which has not this problem),
931 * so that the last trick works only on connected
932 * sockets.
933 * 2. oif also should be the same.
934 */
4c9483b2 935 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
8e1ef0a9 936#ifdef CONFIG_IPV6_SUBTREES
4c9483b2 937 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
8e1ef0a9 938#endif
ca254490
DA
939 (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
940 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
497c615a
HX
941 dst_release(dst);
942 dst = NULL;
1da177e4
LT
943 }
944
497c615a
HX
945out:
946 return dst;
947}
948
3aef934f 949static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
4c9483b2 950 struct dst_entry **dst, struct flowi6 *fl6)
497c615a 951{
69cce1d1
DM
952#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
953 struct neighbour *n;
97cac082 954 struct rt6_info *rt;
69cce1d1
DM
955#endif
956 int err;
6f21c96a 957 int flags = 0;
497c615a 958
e16e888b
MS
959 /* The correct way to handle this would be to do
960 * ip6_route_get_saddr, and then ip6_route_output; however,
961 * the route-specific preferred source forces the
962 * ip6_route_output call _before_ ip6_route_get_saddr.
963 *
964 * In source specific routing (no src=any default route),
965 * ip6_route_output will fail given src=any saddr, though, so
966 * that's why we try it again later.
967 */
968 if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
969 struct rt6_info *rt;
970 bool had_dst = *dst != NULL;
1da177e4 971
e16e888b
MS
972 if (!had_dst)
973 *dst = ip6_route_output(net, sk, fl6);
974 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
c3968a85
DW
975 err = ip6_route_get_saddr(net, rt, &fl6->daddr,
976 sk ? inet6_sk(sk)->srcprefs : 0,
977 &fl6->saddr);
44456d37 978 if (err)
1da177e4 979 goto out_err_release;
e16e888b
MS
980
981 /* If we had an erroneous initial result, pretend it
982 * never existed and let the SA-enabled version take
983 * over.
984 */
985 if (!had_dst && (*dst)->error) {
986 dst_release(*dst);
987 *dst = NULL;
988 }
6f21c96a
PA
989
990 if (fl6->flowi6_oif)
991 flags |= RT6_LOOKUP_F_IFACE;
1da177e4
LT
992 }
993
e16e888b 994 if (!*dst)
6f21c96a 995 *dst = ip6_route_output_flags(net, sk, fl6, flags);
e16e888b
MS
996
997 err = (*dst)->error;
998 if (err)
999 goto out_err_release;
1000
95c385b4 1001#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
e550dfb0
NH
1002 /*
1003 * Here if the dst entry we've looked up
1004 * has a neighbour entry that is in the INCOMPLETE
1005 * state and the src address from the flow is
1006 * marked as OPTIMISTIC, we release the found
1007 * dst entry and replace it instead with the
1008 * dst entry of the nexthop router
1009 */
c56bf6fe 1010 rt = (struct rt6_info *) *dst;
707be1ff 1011 rcu_read_lock_bh();
2647a9b0
MKL
1012 n = __ipv6_neigh_lookup_noref(rt->dst.dev,
1013 rt6_nexthop(rt, &fl6->daddr));
707be1ff
YH
1014 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
1015 rcu_read_unlock_bh();
1016
1017 if (err) {
e550dfb0 1018 struct inet6_ifaddr *ifp;
4c9483b2 1019 struct flowi6 fl_gw6;
e550dfb0
NH
1020 int redirect;
1021
4c9483b2 1022 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
e550dfb0
NH
1023 (*dst)->dev, 1);
1024
1025 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1026 if (ifp)
1027 in6_ifa_put(ifp);
1028
1029 if (redirect) {
1030 /*
1031 * We need to get the dst entry for the
1032 * default router instead
1033 */
1034 dst_release(*dst);
4c9483b2
DM
1035 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1036 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1037 *dst = ip6_route_output(net, sk, &fl_gw6);
e5d08d71
IM
1038 err = (*dst)->error;
1039 if (err)
e550dfb0 1040 goto out_err_release;
95c385b4 1041 }
e550dfb0 1042 }
95c385b4 1043#endif
ec5e3b0a 1044 if (ipv6_addr_v4mapped(&fl6->saddr) &&
00ea1cee
WB
1045 !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
1046 err = -EAFNOSUPPORT;
1047 goto out_err_release;
1048 }
95c385b4 1049
1da177e4
LT
1050 return 0;
1051
1052out_err_release:
1053 dst_release(*dst);
1054 *dst = NULL;
8a966fc0 1055
0d240e78
DA
1056 if (err == -ENETUNREACH)
1057 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
1058 return err;
1059}
34a0b3cd 1060
497c615a
HX
1061/**
1062 * ip6_dst_lookup - perform route lookup on flow
1063 * @sk: socket which provides route info
1064 * @dst: pointer to dst_entry * for result
4c9483b2 1065 * @fl6: flow to lookup
497c615a
HX
1066 *
1067 * This function performs a route lookup on the given flow.
1068 *
1069 * It returns zero on success, or a standard errno code on error.
1070 */
343d60aa
RP
1071int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1072 struct flowi6 *fl6)
497c615a
HX
1073{
1074 *dst = NULL;
343d60aa 1075 return ip6_dst_lookup_tail(net, sk, dst, fl6);
497c615a 1076}
3cf3dc6c
ACM
1077EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1078
497c615a 1079/**
68d0c6d3
DM
1080 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1081 * @sk: socket which provides route info
4c9483b2 1082 * @fl6: flow to lookup
68d0c6d3 1083 * @final_dst: final destination address for ipsec lookup
68d0c6d3
DM
1084 *
1085 * This function performs a route lookup on the given flow.
1086 *
1087 * It returns a valid dst pointer on success, or a pointer encoded
1088 * error code.
1089 */
3aef934f 1090struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
0e0d44ab 1091 const struct in6_addr *final_dst)
68d0c6d3
DM
1092{
1093 struct dst_entry *dst = NULL;
1094 int err;
1095
343d60aa 1096 err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
68d0c6d3
DM
1097 if (err)
1098 return ERR_PTR(err);
1099 if (final_dst)
4e3fd7a0 1100 fl6->daddr = *final_dst;
2774c131 1101
f92ee619 1102 return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
68d0c6d3
DM
1103}
1104EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1105
1106/**
1107 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
497c615a 1108 * @sk: socket which provides the dst cache and route info
4c9483b2 1109 * @fl6: flow to lookup
68d0c6d3 1110 * @final_dst: final destination address for ipsec lookup
497c615a
HX
1111 *
1112 * This function performs a route lookup on the given flow with the
1113 * possibility of using the cached route in the socket if it is valid.
1114 * It will take the socket dst lock when operating on the dst cache.
1115 * As a result, this function can only be used in process context.
1116 *
68d0c6d3
DM
1117 * It returns a valid dst pointer on success, or a pointer encoded
1118 * error code.
497c615a 1119 */
4c9483b2 1120struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
0e0d44ab 1121 const struct in6_addr *final_dst)
497c615a 1122{
68d0c6d3 1123 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
497c615a 1124
4c9483b2 1125 dst = ip6_sk_dst_check(sk, dst, fl6);
00bc0ef5
JS
1126 if (!dst)
1127 dst = ip6_dst_lookup_flow(sk, fl6, final_dst);
68d0c6d3 1128
00bc0ef5 1129 return dst;
497c615a 1130}
68d0c6d3 1131EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
497c615a 1132
0178b695
HX
1133static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1134 gfp_t gfp)
1135{
1136 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1137}
1138
1139static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1140 gfp_t gfp)
1141{
1142 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1143}
1144
75a493e6 1145static void ip6_append_data_mtu(unsigned int *mtu,
0c183379
G
1146 int *maxfraglen,
1147 unsigned int fragheaderlen,
1148 struct sk_buff *skb,
75a493e6 1149 struct rt6_info *rt,
e367c2d0 1150 unsigned int orig_mtu)
0c183379
G
1151{
1152 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
63159f29 1153 if (!skb) {
0c183379 1154 /* first fragment, reserve header_len */
e367c2d0 1155 *mtu = orig_mtu - rt->dst.header_len;
0c183379
G
1156
1157 } else {
1158 /*
1159 * this fragment is not first, the headers
1160 * space is regarded as data space.
1161 */
e367c2d0 1162 *mtu = orig_mtu;
0c183379
G
1163 }
1164 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1165 + fragheaderlen - sizeof(struct frag_hdr);
1166 }
1167}
1168
366e41d9 1169static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
26879da5 1170 struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
366e41d9
VY
1171 struct rt6_info *rt, struct flowi6 *fl6)
1172{
1173 struct ipv6_pinfo *np = inet6_sk(sk);
1174 unsigned int mtu;
26879da5 1175 struct ipv6_txoptions *opt = ipc6->opt;
366e41d9
VY
1176
1177 /*
1178 * setup for corking
1179 */
1180 if (opt) {
1181 if (WARN_ON(v6_cork->opt))
1182 return -EINVAL;
1183
864e2a1f 1184 v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
63159f29 1185 if (unlikely(!v6_cork->opt))
366e41d9
VY
1186 return -ENOBUFS;
1187
864e2a1f 1188 v6_cork->opt->tot_len = sizeof(*opt);
366e41d9
VY
1189 v6_cork->opt->opt_flen = opt->opt_flen;
1190 v6_cork->opt->opt_nflen = opt->opt_nflen;
1191
1192 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1193 sk->sk_allocation);
1194 if (opt->dst0opt && !v6_cork->opt->dst0opt)
1195 return -ENOBUFS;
1196
1197 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1198 sk->sk_allocation);
1199 if (opt->dst1opt && !v6_cork->opt->dst1opt)
1200 return -ENOBUFS;
1201
1202 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1203 sk->sk_allocation);
1204 if (opt->hopopt && !v6_cork->opt->hopopt)
1205 return -ENOBUFS;
1206
1207 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1208 sk->sk_allocation);
1209 if (opt->srcrt && !v6_cork->opt->srcrt)
1210 return -ENOBUFS;
1211
1212 /* need source address above miyazawa*/
1213 }
1214 dst_hold(&rt->dst);
1215 cork->base.dst = &rt->dst;
1216 cork->fl.u.ip6 = *fl6;
26879da5
WW
1217 v6_cork->hop_limit = ipc6->hlimit;
1218 v6_cork->tclass = ipc6->tclass;
366e41d9
VY
1219 if (rt->dst.flags & DST_XFRM_TUNNEL)
1220 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
749439bf 1221 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
366e41d9
VY
1222 else
1223 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
749439bf 1224 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(rt->dst.path);
366e41d9
VY
1225 if (np->frag_size < mtu) {
1226 if (np->frag_size)
1227 mtu = np->frag_size;
1228 }
749439bf
MM
1229 if (mtu < IPV6_MIN_MTU)
1230 return -EINVAL;
366e41d9
VY
1231 cork->base.fragsize = mtu;
1232 if (dst_allfrag(rt->dst.path))
1233 cork->base.flags |= IPCORK_ALLFRAG;
1234 cork->base.length = 0;
1235
1236 return 0;
1237}
1238
0bbe84a6
VY
1239static int __ip6_append_data(struct sock *sk,
1240 struct flowi6 *fl6,
1241 struct sk_buff_head *queue,
1242 struct inet_cork *cork,
1243 struct inet6_cork *v6_cork,
1244 struct page_frag *pfrag,
1245 int getfrag(void *from, char *to, int offset,
1246 int len, int odd, struct sk_buff *skb),
1247 void *from, int length, int transhdrlen,
26879da5 1248 unsigned int flags, struct ipcm6_cookie *ipc6,
c14ac945 1249 const struct sockcm_cookie *sockc)
1da177e4 1250{
0c183379 1251 struct sk_buff *skb, *skb_prev = NULL;
56244bf8 1252 unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
0bbe84a6
VY
1253 int exthdrlen = 0;
1254 int dst_exthdrlen = 0;
1da177e4 1255 int hh_len;
1da177e4
LT
1256 int copy;
1257 int err;
1258 int offset = 0;
a693e698 1259 __u8 tx_flags = 0;
09c2d251 1260 u32 tskey = 0;
0bbe84a6
VY
1261 struct rt6_info *rt = (struct rt6_info *)cork->dst;
1262 struct ipv6_txoptions *opt = v6_cork->opt;
32dce968 1263 int csummode = CHECKSUM_NONE;
682b1a9d 1264 unsigned int maxnonfragsize, headersize;
1da177e4 1265
0bbe84a6
VY
1266 skb = skb_peek_tail(queue);
1267 if (!skb) {
1268 exthdrlen = opt ? opt->opt_flen : 0;
7efdba5b 1269 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1da177e4 1270 }
0bbe84a6 1271
366e41d9 1272 mtu = cork->fragsize;
e367c2d0 1273 orig_mtu = mtu;
1da177e4 1274
d8d1f30b 1275 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1da177e4 1276
a1b05140 1277 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
b4ce9277 1278 (opt ? opt->opt_nflen : 0);
4df98e76
HFS
1279 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1280 sizeof(struct frag_hdr);
1da177e4 1281
682b1a9d
HFS
1282 headersize = sizeof(struct ipv6hdr) +
1283 (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1284 (dst_allfrag(&rt->dst) ?
1285 sizeof(struct frag_hdr) : 0) +
1286 rt->rt6i_nfheader_len;
1287
56244bf8
PA
1288 /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
1289 * the first fragment
1290 */
1291 if (headersize + transhdrlen > mtu)
1292 goto emsgsize;
1293
26879da5 1294 if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
682b1a9d
HFS
1295 (sk->sk_protocol == IPPROTO_UDP ||
1296 sk->sk_protocol == IPPROTO_RAW)) {
1297 ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1298 sizeof(struct ipv6hdr));
1299 goto emsgsize;
1300 }
4df98e76 1301
682b1a9d
HFS
1302 if (ip6_sk_ignore_df(sk))
1303 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1304 else
1305 maxnonfragsize = mtu;
4df98e76 1306
682b1a9d 1307 if (cork->length + length > maxnonfragsize - headersize) {
4df98e76 1308emsgsize:
56244bf8
PA
1309 pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
1310 ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
682b1a9d 1311 return -EMSGSIZE;
1da177e4
LT
1312 }
1313
682b1a9d
HFS
1314 /* CHECKSUM_PARTIAL only with no extension headers and when
1315 * we are not going to fragment
1316 */
1317 if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1318 headersize == sizeof(struct ipv6hdr) &&
2b89ed65 1319 length <= mtu - headersize &&
682b1a9d 1320 !(flags & MSG_MORE) &&
c8cd0989 1321 rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
682b1a9d
HFS
1322 csummode = CHECKSUM_PARTIAL;
1323
09c2d251 1324 if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) {
c14ac945 1325 sock_tx_timestamp(sk, sockc->tsflags, &tx_flags);
09c2d251
WB
1326 if (tx_flags & SKBTX_ANY_SW_TSTAMP &&
1327 sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1328 tskey = sk->sk_tskey++;
1329 }
a693e698 1330
1da177e4
LT
1331 /*
1332 * Let's try using as much space as possible.
1333 * Use MTU if total length of the message fits into the MTU.
1334 * Otherwise, we need to reserve fragment header and
1335 * fragment alignment (= 8-15 octects, in total).
1336 *
1337 * Note that we may need to "move" the data from the tail of
1ab1457c 1338 * of the buffer to the new fragment when we split
1da177e4
LT
1339 * the message.
1340 *
1ab1457c 1341 * FIXME: It may be fragmented into multiple chunks
1da177e4
LT
1342 * at once if non-fragmentable extension headers
1343 * are too large.
1ab1457c 1344 * --yoshfuji
1da177e4
LT
1345 */
1346
2811ebac 1347 cork->length += length;
2811ebac 1348 if (!skb)
1da177e4
LT
1349 goto alloc_new_skb;
1350
1351 while (length > 0) {
1352 /* Check if the remaining data fits into current packet. */
bdc712b4 1353 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1da177e4
LT
1354 if (copy < length)
1355 copy = maxfraglen - skb->len;
1356
1357 if (copy <= 0) {
1358 char *data;
1359 unsigned int datalen;
1360 unsigned int fraglen;
1361 unsigned int fraggap;
1362 unsigned int alloclen;
1da177e4 1363alloc_new_skb:
1da177e4 1364 /* There's no room in the current skb */
0c183379
G
1365 if (skb)
1366 fraggap = skb->len - maxfraglen;
1da177e4
LT
1367 else
1368 fraggap = 0;
0c183379 1369 /* update mtu and maxfraglen if necessary */
63159f29 1370 if (!skb || !skb_prev)
0c183379 1371 ip6_append_data_mtu(&mtu, &maxfraglen,
75a493e6 1372 fragheaderlen, skb, rt,
e367c2d0 1373 orig_mtu);
0c183379
G
1374
1375 skb_prev = skb;
1da177e4
LT
1376
1377 /*
1378 * If remaining data exceeds the mtu,
1379 * we know we need more fragment(s).
1380 */
1381 datalen = length + fraggap;
1da177e4 1382
0c183379
G
1383 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1384 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1da177e4 1385 if ((flags & MSG_MORE) &&
d8d1f30b 1386 !(rt->dst.dev->features&NETIF_F_SG))
1da177e4
LT
1387 alloclen = mtu;
1388 else
1389 alloclen = datalen + fragheaderlen;
1390
299b0767
SK
1391 alloclen += dst_exthdrlen;
1392
0c183379
G
1393 if (datalen != length + fraggap) {
1394 /*
1395 * this is not the last fragment, the trailer
1396 * space is regarded as data space.
1397 */
1398 datalen += rt->dst.trailer_len;
1399 }
1400
1401 alloclen += rt->dst.trailer_len;
1402 fraglen = datalen + fragheaderlen;
1da177e4
LT
1403
1404 /*
1405 * We just reserve space for fragment header.
1ab1457c 1406 * Note: this may be overallocation if the message
1da177e4
LT
1407 * (without MSG_MORE) fits into the MTU.
1408 */
1409 alloclen += sizeof(struct frag_hdr);
1410
232cd35d
ED
1411 copy = datalen - transhdrlen - fraggap;
1412 if (copy < 0) {
1413 err = -EINVAL;
1414 goto error;
1415 }
1da177e4
LT
1416 if (transhdrlen) {
1417 skb = sock_alloc_send_skb(sk,
1418 alloclen + hh_len,
1419 (flags & MSG_DONTWAIT), &err);
1420 } else {
1421 skb = NULL;
14afee4b 1422 if (refcount_read(&sk->sk_wmem_alloc) <=
1da177e4
LT
1423 2 * sk->sk_sndbuf)
1424 skb = sock_wmalloc(sk,
1425 alloclen + hh_len, 1,
1426 sk->sk_allocation);
63159f29 1427 if (unlikely(!skb))
1da177e4
LT
1428 err = -ENOBUFS;
1429 }
63159f29 1430 if (!skb)
1da177e4
LT
1431 goto error;
1432 /*
1433 * Fill in the control structures
1434 */
9c9c9ad5 1435 skb->protocol = htons(ETH_P_IPV6);
32dce968 1436 skb->ip_summed = csummode;
1da177e4 1437 skb->csum = 0;
1f85851e
G
1438 /* reserve for fragmentation and ipsec header */
1439 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1440 dst_exthdrlen);
1da177e4 1441
11878b40
WB
1442 /* Only the initial fragment is time stamped */
1443 skb_shinfo(skb)->tx_flags = tx_flags;
1444 tx_flags = 0;
09c2d251
WB
1445 skb_shinfo(skb)->tskey = tskey;
1446 tskey = 0;
a693e698 1447
1da177e4
LT
1448 /*
1449 * Find where to start putting bytes
1450 */
1f85851e
G
1451 data = skb_put(skb, fraglen);
1452 skb_set_network_header(skb, exthdrlen);
1453 data += fragheaderlen;
b0e380b1
ACM
1454 skb->transport_header = (skb->network_header +
1455 fragheaderlen);
1da177e4
LT
1456 if (fraggap) {
1457 skb->csum = skb_copy_and_csum_bits(
1458 skb_prev, maxfraglen,
1459 data + transhdrlen, fraggap, 0);
1460 skb_prev->csum = csum_sub(skb_prev->csum,
1461 skb->csum);
1462 data += fraggap;
e9fa4f7b 1463 pskb_trim_unique(skb_prev, maxfraglen);
1da177e4 1464 }
232cd35d
ED
1465 if (copy > 0 &&
1466 getfrag(from, data + transhdrlen, offset,
1467 copy, fraggap, skb) < 0) {
1da177e4
LT
1468 err = -EFAULT;
1469 kfree_skb(skb);
1470 goto error;
1471 }
1472
1473 offset += copy;
1474 length -= datalen - fraggap;
1475 transhdrlen = 0;
1476 exthdrlen = 0;
299b0767 1477 dst_exthdrlen = 0;
1da177e4 1478
0dec879f
JA
1479 if ((flags & MSG_CONFIRM) && !skb_prev)
1480 skb_set_dst_pending_confirm(skb, 1);
1481
1da177e4
LT
1482 /*
1483 * Put the packet on the pending queue
1484 */
0bbe84a6 1485 __skb_queue_tail(queue, skb);
1da177e4
LT
1486 continue;
1487 }
1488
1489 if (copy > length)
1490 copy = length;
1491
3374d02f
WB
1492 if (!(rt->dst.dev->features&NETIF_F_SG) &&
1493 skb_tailroom(skb) >= copy) {
1da177e4
LT
1494 unsigned int off;
1495
1496 off = skb->len;
1497 if (getfrag(from, skb_put(skb, copy),
1498 offset, copy, off, skb) < 0) {
1499 __skb_trim(skb, off);
1500 err = -EFAULT;
1501 goto error;
1502 }
1503 } else {
1504 int i = skb_shinfo(skb)->nr_frags;
1da177e4 1505
5640f768
ED
1506 err = -ENOMEM;
1507 if (!sk_page_frag_refill(sk, pfrag))
1da177e4 1508 goto error;
5640f768
ED
1509
1510 if (!skb_can_coalesce(skb, i, pfrag->page,
1511 pfrag->offset)) {
1512 err = -EMSGSIZE;
1513 if (i == MAX_SKB_FRAGS)
1514 goto error;
1515
1516 __skb_fill_page_desc(skb, i, pfrag->page,
1517 pfrag->offset, 0);
1518 skb_shinfo(skb)->nr_frags = ++i;
1519 get_page(pfrag->page);
1da177e4 1520 }
5640f768 1521 copy = min_t(int, copy, pfrag->size - pfrag->offset);
9e903e08 1522 if (getfrag(from,
5640f768
ED
1523 page_address(pfrag->page) + pfrag->offset,
1524 offset, copy, skb->len, skb) < 0)
1525 goto error_efault;
1526
1527 pfrag->offset += copy;
1528 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1da177e4
LT
1529 skb->len += copy;
1530 skb->data_len += copy;
f945fa7a 1531 skb->truesize += copy;
14afee4b 1532 refcount_add(copy, &sk->sk_wmem_alloc);
1da177e4
LT
1533 }
1534 offset += copy;
1535 length -= copy;
1536 }
5640f768 1537
1da177e4 1538 return 0;
5640f768
ED
1539
1540error_efault:
1541 err = -EFAULT;
1da177e4 1542error:
bdc712b4 1543 cork->length -= length;
3bd653c8 1544 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1545 return err;
1546}
0bbe84a6
VY
1547
1548int ip6_append_data(struct sock *sk,
1549 int getfrag(void *from, char *to, int offset, int len,
1550 int odd, struct sk_buff *skb),
26879da5
WW
1551 void *from, int length, int transhdrlen,
1552 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1553 struct rt6_info *rt, unsigned int flags,
c14ac945 1554 const struct sockcm_cookie *sockc)
0bbe84a6
VY
1555{
1556 struct inet_sock *inet = inet_sk(sk);
1557 struct ipv6_pinfo *np = inet6_sk(sk);
1558 int exthdrlen;
1559 int err;
1560
1561 if (flags&MSG_PROBE)
1562 return 0;
1563 if (skb_queue_empty(&sk->sk_write_queue)) {
1564 /*
1565 * setup for corking
1566 */
26879da5
WW
1567 err = ip6_setup_cork(sk, &inet->cork, &np->cork,
1568 ipc6, rt, fl6);
0bbe84a6
VY
1569 if (err)
1570 return err;
1571
26879da5 1572 exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
0bbe84a6
VY
1573 length += exthdrlen;
1574 transhdrlen += exthdrlen;
1575 } else {
1576 fl6 = &inet->cork.fl.u.ip6;
1577 transhdrlen = 0;
1578 }
1579
1580 return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1581 &np->cork, sk_page_frag(sk), getfrag,
26879da5 1582 from, length, transhdrlen, flags, ipc6, sockc);
0bbe84a6 1583}
a495f836 1584EXPORT_SYMBOL_GPL(ip6_append_data);
1da177e4 1585
366e41d9
VY
1586static void ip6_cork_release(struct inet_cork_full *cork,
1587 struct inet6_cork *v6_cork)
bf138862 1588{
366e41d9
VY
1589 if (v6_cork->opt) {
1590 kfree(v6_cork->opt->dst0opt);
1591 kfree(v6_cork->opt->dst1opt);
1592 kfree(v6_cork->opt->hopopt);
1593 kfree(v6_cork->opt->srcrt);
1594 kfree(v6_cork->opt);
1595 v6_cork->opt = NULL;
0178b695
HX
1596 }
1597
366e41d9
VY
1598 if (cork->base.dst) {
1599 dst_release(cork->base.dst);
1600 cork->base.dst = NULL;
1601 cork->base.flags &= ~IPCORK_ALLFRAG;
bf138862 1602 }
366e41d9 1603 memset(&cork->fl, 0, sizeof(cork->fl));
bf138862
PE
1604}
1605
6422398c
VY
1606struct sk_buff *__ip6_make_skb(struct sock *sk,
1607 struct sk_buff_head *queue,
1608 struct inet_cork_full *cork,
1609 struct inet6_cork *v6_cork)
1da177e4
LT
1610{
1611 struct sk_buff *skb, *tmp_skb;
1612 struct sk_buff **tail_skb;
1613 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1da177e4 1614 struct ipv6_pinfo *np = inet6_sk(sk);
3bd653c8 1615 struct net *net = sock_net(sk);
1da177e4 1616 struct ipv6hdr *hdr;
6422398c
VY
1617 struct ipv6_txoptions *opt = v6_cork->opt;
1618 struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1619 struct flowi6 *fl6 = &cork->fl.u.ip6;
4c9483b2 1620 unsigned char proto = fl6->flowi6_proto;
1da177e4 1621
6422398c 1622 skb = __skb_dequeue(queue);
63159f29 1623 if (!skb)
1da177e4
LT
1624 goto out;
1625 tail_skb = &(skb_shinfo(skb)->frag_list);
1626
1627 /* move skb->data to ip header from ext header */
d56f90a7 1628 if (skb->data < skb_network_header(skb))
bbe735e4 1629 __skb_pull(skb, skb_network_offset(skb));
6422398c 1630 while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
cfe1fc77 1631 __skb_pull(tmp_skb, skb_network_header_len(skb));
1da177e4
LT
1632 *tail_skb = tmp_skb;
1633 tail_skb = &(tmp_skb->next);
1634 skb->len += tmp_skb->len;
1635 skb->data_len += tmp_skb->len;
1da177e4 1636 skb->truesize += tmp_skb->truesize;
1da177e4
LT
1637 tmp_skb->destructor = NULL;
1638 tmp_skb->sk = NULL;
1da177e4
LT
1639 }
1640
28a89453 1641 /* Allow local fragmentation. */
60ff7467 1642 skb->ignore_df = ip6_sk_ignore_df(sk);
28a89453 1643
4e3fd7a0 1644 *final_dst = fl6->daddr;
cfe1fc77 1645 __skb_pull(skb, skb_network_header_len(skb));
1da177e4
LT
1646 if (opt && opt->opt_flen)
1647 ipv6_push_frag_opts(skb, opt, &proto);
1648 if (opt && opt->opt_nflen)
613fa3ca 1649 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
1da177e4 1650
e2d1bca7
ACM
1651 skb_push(skb, sizeof(struct ipv6hdr));
1652 skb_reset_network_header(skb);
0660e03f 1653 hdr = ipv6_hdr(skb);
1ab1457c 1654
6422398c 1655 ip6_flow_hdr(hdr, v6_cork->tclass,
cb1ce2ef 1656 ip6_make_flowlabel(net, skb, fl6->flowlabel,
513674b5 1657 ip6_autoflowlabel(net, np), fl6));
6422398c 1658 hdr->hop_limit = v6_cork->hop_limit;
1da177e4 1659 hdr->nexthdr = proto;
4e3fd7a0
AD
1660 hdr->saddr = fl6->saddr;
1661 hdr->daddr = *final_dst;
1da177e4 1662
a2c2064f 1663 skb->priority = sk->sk_priority;
4a19ec58 1664 skb->mark = sk->sk_mark;
a2c2064f 1665
d8d1f30b 1666 skb_dst_set(skb, dst_clone(&rt->dst));
edf391ff 1667 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
14878f75 1668 if (proto == IPPROTO_ICMPV6) {
adf30907 1669 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
14878f75 1670
43a43b60
HFS
1671 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1672 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
14878f75
DS
1673 }
1674
6422398c
VY
1675 ip6_cork_release(cork, v6_cork);
1676out:
1677 return skb;
1678}
1679
1680int ip6_send_skb(struct sk_buff *skb)
1681{
1682 struct net *net = sock_net(skb->sk);
1683 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1684 int err;
1685
33224b16 1686 err = ip6_local_out(net, skb->sk, skb);
1da177e4
LT
1687 if (err) {
1688 if (err > 0)
6ce9e7b5 1689 err = net_xmit_errno(err);
1da177e4 1690 if (err)
6422398c
VY
1691 IP6_INC_STATS(net, rt->rt6i_idev,
1692 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1693 }
1694
1da177e4 1695 return err;
6422398c
VY
1696}
1697
1698int ip6_push_pending_frames(struct sock *sk)
1699{
1700 struct sk_buff *skb;
1701
1702 skb = ip6_finish_skb(sk);
1703 if (!skb)
1704 return 0;
1705
1706 return ip6_send_skb(skb);
1da177e4 1707}
a495f836 1708EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1da177e4 1709
0bbe84a6 1710static void __ip6_flush_pending_frames(struct sock *sk,
6422398c
VY
1711 struct sk_buff_head *queue,
1712 struct inet_cork_full *cork,
1713 struct inet6_cork *v6_cork)
1da177e4 1714{
1da177e4
LT
1715 struct sk_buff *skb;
1716
0bbe84a6 1717 while ((skb = __skb_dequeue_tail(queue)) != NULL) {
adf30907
ED
1718 if (skb_dst(skb))
1719 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
e1f52208 1720 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1721 kfree_skb(skb);
1722 }
1723
6422398c 1724 ip6_cork_release(cork, v6_cork);
1da177e4 1725}
0bbe84a6
VY
1726
1727void ip6_flush_pending_frames(struct sock *sk)
1728{
6422398c
VY
1729 __ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1730 &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
0bbe84a6 1731}
a495f836 1732EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
6422398c
VY
1733
1734struct sk_buff *ip6_make_skb(struct sock *sk,
1735 int getfrag(void *from, char *to, int offset,
1736 int len, int odd, struct sk_buff *skb),
1737 void *from, int length, int transhdrlen,
26879da5 1738 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
6422398c 1739 struct rt6_info *rt, unsigned int flags,
26879da5 1740 const struct sockcm_cookie *sockc)
6422398c
VY
1741{
1742 struct inet_cork_full cork;
1743 struct inet6_cork v6_cork;
1744 struct sk_buff_head queue;
26879da5 1745 int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
6422398c
VY
1746 int err;
1747
1748 if (flags & MSG_PROBE)
1749 return NULL;
1750
1751 __skb_queue_head_init(&queue);
1752
1753 cork.base.flags = 0;
1754 cork.base.addr = 0;
1755 cork.base.opt = NULL;
95ef498d 1756 cork.base.dst = NULL;
6422398c 1757 v6_cork.opt = NULL;
26879da5 1758 err = ip6_setup_cork(sk, &cork, &v6_cork, ipc6, rt, fl6);
862c03ee
ED
1759 if (err) {
1760 ip6_cork_release(&cork, &v6_cork);
6422398c 1761 return ERR_PTR(err);
862c03ee 1762 }
26879da5
WW
1763 if (ipc6->dontfrag < 0)
1764 ipc6->dontfrag = inet6_sk(sk)->dontfrag;
6422398c
VY
1765
1766 err = __ip6_append_data(sk, fl6, &queue, &cork.base, &v6_cork,
1767 &current->task_frag, getfrag, from,
1768 length + exthdrlen, transhdrlen + exthdrlen,
26879da5 1769 flags, ipc6, sockc);
6422398c
VY
1770 if (err) {
1771 __ip6_flush_pending_frames(sk, &queue, &cork, &v6_cork);
1772 return ERR_PTR(err);
1773 }
1774
1775 return __ip6_make_skb(sk, &queue, &cork, &v6_cork);
1776}