]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blame - net/ipv6/ip6_output.c
net: pktgen: fix null ptr deref in skb allocation
[mirror_ubuntu-zesty-kernel.git] / net / ipv6 / ip6_output.c
CommitLineData
1da177e4
LT
1/*
2 * IPv6 output functions
1ab1457c 3 * Linux INET6 implementation
1da177e4
LT
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * Based on linux/net/ipv4/ip_output.c
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 *
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
21 *
22 * H. von Brand : Added missing #include <linux/string.h>
67ba4152 23 * Imran Patel : frag id should be in NBO
1da177e4
LT
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
27 */
28
1da177e4 29#include <linux/errno.h>
ef76bc23 30#include <linux/kernel.h>
1da177e4
LT
31#include <linux/string.h>
32#include <linux/socket.h>
33#include <linux/net.h>
34#include <linux/netdevice.h>
35#include <linux/if_arp.h>
36#include <linux/in6.h>
37#include <linux/tcp.h>
38#include <linux/route.h>
b59f45d0 39#include <linux/module.h>
5a0e3ad6 40#include <linux/slab.h>
1da177e4
LT
41
42#include <linux/netfilter.h>
43#include <linux/netfilter_ipv6.h>
44
45#include <net/sock.h>
46#include <net/snmp.h>
47
48#include <net/ipv6.h>
49#include <net/ndisc.h>
50#include <net/protocol.h>
51#include <net/ip6_route.h>
52#include <net/addrconf.h>
53#include <net/rawv6.h>
54#include <net/icmp.h>
55#include <net/xfrm.h>
56#include <net/checksum.h>
7bc570c8 57#include <linux/mroute6.h>
ca254490 58#include <net/l3mdev.h>
1da177e4 59
7d8c6e39 60static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 61{
adf30907 62 struct dst_entry *dst = skb_dst(skb);
1da177e4 63 struct net_device *dev = dst->dev;
f6b72b62 64 struct neighbour *neigh;
6fd6ce20
YH
65 struct in6_addr *nexthop;
66 int ret;
1da177e4
LT
67
68 skb->protocol = htons(ETH_P_IPV6);
69 skb->dev = dev;
70
0660e03f 71 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
adf30907 72 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1da177e4 73
7026b1dd 74 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
78126c41 75 ((mroute6_socket(net, skb) &&
bd91b8bf 76 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
7bc570c8
YH
77 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
78 &ipv6_hdr(skb)->saddr))) {
1da177e4
LT
79 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
80
81 /* Do not check for IFF_ALLMULTI; multicast routing
82 is not supported in any case.
83 */
84 if (newskb)
b2e0b385 85 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
29a26a56 86 net, sk, newskb, NULL, newskb->dev,
95603e22 87 dev_loopback_xmit);
1da177e4 88
0660e03f 89 if (ipv6_hdr(skb)->hop_limit == 0) {
78126c41 90 IP6_INC_STATS(net, idev,
3bd653c8 91 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
92 kfree_skb(skb);
93 return 0;
94 }
95 }
96
78126c41 97 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
dd408515
HFS
98
99 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
100 IPV6_ADDR_SCOPE_NODELOCAL &&
101 !(dev->flags & IFF_LOOPBACK)) {
102 kfree_skb(skb);
103 return 0;
104 }
1da177e4
LT
105 }
106
6fd6ce20 107 rcu_read_lock_bh();
2647a9b0 108 nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
6fd6ce20
YH
109 neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
110 if (unlikely(!neigh))
111 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
112 if (!IS_ERR(neigh)) {
113 ret = dst_neigh_output(dst, neigh, skb);
114 rcu_read_unlock_bh();
115 return ret;
116 }
117 rcu_read_unlock_bh();
05e3aa09 118
78126c41 119 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
9e508490
JE
120 kfree_skb(skb);
121 return -EINVAL;
1da177e4
LT
122}
123
0c4b51f0 124static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
9e508490
JE
125{
126 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
9037c357
JP
127 dst_allfrag(skb_dst(skb)) ||
128 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
7d8c6e39 129 return ip6_fragment(net, sk, skb, ip6_finish_output2);
9e508490 130 else
7d8c6e39 131 return ip6_finish_output2(net, sk, skb);
9e508490
JE
132}
133
ede2059d 134int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 135{
9e508490 136 struct net_device *dev = skb_dst(skb)->dev;
adf30907 137 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
be10de0a 138
778d80be 139 if (unlikely(idev->cnf.disable_ipv6)) {
19a0644c 140 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
778d80be
YH
141 kfree_skb(skb);
142 return 0;
143 }
144
29a26a56
EB
145 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
146 net, sk, skb, NULL, dev,
9c6eb28a
JE
147 ip6_finish_output,
148 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
1da177e4
LT
149}
150
1da177e4 151/*
1c1e9d2b
ED
152 * xmit an sk_buff (used by TCP, SCTP and DCCP)
153 * Note : socket lock is not held for SYNACK packets, but might be modified
154 * by calls to skb_set_owner_w() and ipv6_local_error(),
155 * which are using proper atomic operations or spinlocks.
1da177e4 156 */
1c1e9d2b 157int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
b903d324 158 struct ipv6_txoptions *opt, int tclass)
1da177e4 159{
3bd653c8 160 struct net *net = sock_net(sk);
1c1e9d2b 161 const struct ipv6_pinfo *np = inet6_sk(sk);
4c9483b2 162 struct in6_addr *first_hop = &fl6->daddr;
adf30907 163 struct dst_entry *dst = skb_dst(skb);
1da177e4 164 struct ipv6hdr *hdr;
4c9483b2 165 u8 proto = fl6->flowi6_proto;
1da177e4 166 int seg_len = skb->len;
e651f03a 167 int hlimit = -1;
1da177e4
LT
168 u32 mtu;
169
170 if (opt) {
c2636b4d 171 unsigned int head_room;
1da177e4
LT
172
173 /* First: exthdrs may take lots of space (~8K for now)
174 MAX_HEADER is not enough.
175 */
176 head_room = opt->opt_nflen + opt->opt_flen;
177 seg_len += head_room;
178 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
179
180 if (skb_headroom(skb) < head_room) {
181 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
63159f29 182 if (!skb2) {
adf30907 183 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d
YH
184 IPSTATS_MIB_OUTDISCARDS);
185 kfree_skb(skb);
1da177e4
LT
186 return -ENOBUFS;
187 }
808db80a 188 consume_skb(skb);
a11d206d 189 skb = skb2;
1c1e9d2b
ED
190 /* skb_set_owner_w() changes sk->sk_wmem_alloc atomically,
191 * it is safe to call in our context (socket lock not held)
192 */
193 skb_set_owner_w(skb, (struct sock *)sk);
1da177e4
LT
194 }
195 if (opt->opt_flen)
196 ipv6_push_frag_opts(skb, opt, &proto);
197 if (opt->opt_nflen)
198 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
199 }
200
e2d1bca7
ACM
201 skb_push(skb, sizeof(struct ipv6hdr));
202 skb_reset_network_header(skb);
0660e03f 203 hdr = ipv6_hdr(skb);
1da177e4
LT
204
205 /*
206 * Fill in the IPv6 header
207 */
b903d324 208 if (np)
1da177e4
LT
209 hlimit = np->hop_limit;
210 if (hlimit < 0)
6b75d090 211 hlimit = ip6_dst_hoplimit(dst);
1da177e4 212
cb1ce2ef 213 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
67800f9b 214 np->autoflowlabel, fl6));
41a1f8ea 215
1da177e4
LT
216 hdr->payload_len = htons(seg_len);
217 hdr->nexthdr = proto;
218 hdr->hop_limit = hlimit;
219
4e3fd7a0
AD
220 hdr->saddr = fl6->saddr;
221 hdr->daddr = *first_hop;
1da177e4 222
9c9c9ad5 223 skb->protocol = htons(ETH_P_IPV6);
a2c2064f 224 skb->priority = sk->sk_priority;
4a19ec58 225 skb->mark = sk->sk_mark;
a2c2064f 226
1da177e4 227 mtu = dst_mtu(dst);
60ff7467 228 if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
adf30907 229 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
edf391ff 230 IPSTATS_MIB_OUT, skb->len);
1c1e9d2b
ED
231 /* hooks should never assume socket lock is held.
232 * we promote our socket to non const
233 */
29a26a56 234 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
1c1e9d2b 235 net, (struct sock *)sk, skb, NULL, dst->dev,
13206b6b 236 dst_output);
1da177e4
LT
237 }
238
1da177e4 239 skb->dev = dst->dev;
1c1e9d2b
ED
240 /* ipv6_local_error() does not require socket lock,
241 * we promote our socket to non const
242 */
243 ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
244
adf30907 245 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
246 kfree_skb(skb);
247 return -EMSGSIZE;
248}
7159039a
YH
249EXPORT_SYMBOL(ip6_xmit);
250
1da177e4
LT
251static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
252{
253 struct ip6_ra_chain *ra;
254 struct sock *last = NULL;
255
256 read_lock(&ip6_ra_lock);
257 for (ra = ip6_ra_chain; ra; ra = ra->next) {
258 struct sock *sk = ra->sk;
0bd1b59b
AM
259 if (sk && ra->sel == sel &&
260 (!sk->sk_bound_dev_if ||
261 sk->sk_bound_dev_if == skb->dev->ifindex)) {
1da177e4
LT
262 if (last) {
263 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
264 if (skb2)
265 rawv6_rcv(last, skb2);
266 }
267 last = sk;
268 }
269 }
270
271 if (last) {
272 rawv6_rcv(last, skb);
273 read_unlock(&ip6_ra_lock);
274 return 1;
275 }
276 read_unlock(&ip6_ra_lock);
277 return 0;
278}
279
e21e0b5f
VN
280static int ip6_forward_proxy_check(struct sk_buff *skb)
281{
0660e03f 282 struct ipv6hdr *hdr = ipv6_hdr(skb);
e21e0b5f 283 u8 nexthdr = hdr->nexthdr;
75f2811c 284 __be16 frag_off;
e21e0b5f
VN
285 int offset;
286
287 if (ipv6_ext_hdr(nexthdr)) {
75f2811c 288 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
e21e0b5f
VN
289 if (offset < 0)
290 return 0;
291 } else
292 offset = sizeof(struct ipv6hdr);
293
294 if (nexthdr == IPPROTO_ICMPV6) {
295 struct icmp6hdr *icmp6;
296
d56f90a7
ACM
297 if (!pskb_may_pull(skb, (skb_network_header(skb) +
298 offset + 1 - skb->data)))
e21e0b5f
VN
299 return 0;
300
d56f90a7 301 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
e21e0b5f
VN
302
303 switch (icmp6->icmp6_type) {
304 case NDISC_ROUTER_SOLICITATION:
305 case NDISC_ROUTER_ADVERTISEMENT:
306 case NDISC_NEIGHBOUR_SOLICITATION:
307 case NDISC_NEIGHBOUR_ADVERTISEMENT:
308 case NDISC_REDIRECT:
309 /* For reaction involving unicast neighbor discovery
310 * message destined to the proxied address, pass it to
311 * input function.
312 */
313 return 1;
314 default:
315 break;
316 }
317 }
318
74553b09
VN
319 /*
320 * The proxying router can't forward traffic sent to a link-local
321 * address, so signal the sender and discard the packet. This
322 * behavior is clarified by the MIPv6 specification.
323 */
324 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
325 dst_link_failure(skb);
326 return -1;
327 }
328
e21e0b5f
VN
329 return 0;
330}
331
0c4b51f0
EB
332static inline int ip6_forward_finish(struct net *net, struct sock *sk,
333 struct sk_buff *skb)
1da177e4 334{
c29390c6 335 skb_sender_cpu_clear(skb);
13206b6b 336 return dst_output(net, sk, skb);
1da177e4
LT
337}
338
0954cf9c
HFS
339static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
340{
341 unsigned int mtu;
342 struct inet6_dev *idev;
343
344 if (dst_metric_locked(dst, RTAX_MTU)) {
345 mtu = dst_metric_raw(dst, RTAX_MTU);
346 if (mtu)
347 return mtu;
348 }
349
350 mtu = IPV6_MIN_MTU;
351 rcu_read_lock();
352 idev = __in6_dev_get(dst->dev);
353 if (idev)
354 mtu = idev->cnf.mtu6;
355 rcu_read_unlock();
356
357 return mtu;
358}
359
fe6cc55f
FW
360static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
361{
418a3156 362 if (skb->len <= mtu)
fe6cc55f
FW
363 return false;
364
60ff7467 365 /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
fe6cc55f
FW
366 if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
367 return true;
368
60ff7467 369 if (skb->ignore_df)
418a3156
FW
370 return false;
371
fe6cc55f
FW
372 if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
373 return false;
374
375 return true;
376}
377
1da177e4
LT
378int ip6_forward(struct sk_buff *skb)
379{
adf30907 380 struct dst_entry *dst = skb_dst(skb);
0660e03f 381 struct ipv6hdr *hdr = ipv6_hdr(skb);
1da177e4 382 struct inet6_skb_parm *opt = IP6CB(skb);
c346dca1 383 struct net *net = dev_net(dst->dev);
14f3ad6f 384 u32 mtu;
1ab1457c 385
53b7997f 386 if (net->ipv6.devconf_all->forwarding == 0)
1da177e4
LT
387 goto error;
388
090f1166
LR
389 if (skb->pkt_type != PACKET_HOST)
390 goto drop;
391
9ef2e965
HFS
392 if (unlikely(skb->sk))
393 goto drop;
394
4497b076
BH
395 if (skb_warn_if_lro(skb))
396 goto drop;
397
1da177e4 398 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
15c77d8b
ED
399 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
400 IPSTATS_MIB_INDISCARDS);
1da177e4
LT
401 goto drop;
402 }
403
35fc92a9 404 skb_forward_csum(skb);
1da177e4
LT
405
406 /*
407 * We DO NOT make any processing on
408 * RA packets, pushing them to user level AS IS
409 * without ane WARRANTY that application will be able
410 * to interpret them. The reason is that we
411 * cannot make anything clever here.
412 *
413 * We are not end-node, so that if packet contains
414 * AH/ESP, we cannot make anything.
415 * Defragmentation also would be mistake, RA packets
416 * cannot be fragmented, because there is no warranty
417 * that different fragments will go along one path. --ANK
418 */
ab4eb353
YH
419 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
420 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
1da177e4
LT
421 return 0;
422 }
423
424 /*
425 * check and decrement ttl
426 */
427 if (hdr->hop_limit <= 1) {
428 /* Force OUTPUT device used as source address */
429 skb->dev = dst->dev;
3ffe533c 430 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
15c77d8b
ED
431 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
432 IPSTATS_MIB_INHDRERRORS);
1da177e4
LT
433
434 kfree_skb(skb);
435 return -ETIMEDOUT;
436 }
437
fbea49e1 438 /* XXX: idev->cnf.proxy_ndp? */
53b7997f 439 if (net->ipv6.devconf_all->proxy_ndp &&
8a3edd80 440 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
74553b09
VN
441 int proxied = ip6_forward_proxy_check(skb);
442 if (proxied > 0)
e21e0b5f 443 return ip6_input(skb);
74553b09 444 else if (proxied < 0) {
15c77d8b
ED
445 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
446 IPSTATS_MIB_INDISCARDS);
74553b09
VN
447 goto drop;
448 }
e21e0b5f
VN
449 }
450
1da177e4 451 if (!xfrm6_route_forward(skb)) {
15c77d8b
ED
452 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
453 IPSTATS_MIB_INDISCARDS);
1da177e4
LT
454 goto drop;
455 }
adf30907 456 dst = skb_dst(skb);
1da177e4
LT
457
458 /* IPv6 specs say nothing about it, but it is clear that we cannot
459 send redirects to source routed frames.
1e5dc146 460 We don't send redirects to frames decapsulated from IPsec.
1da177e4 461 */
c45a3dfb 462 if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
1da177e4 463 struct in6_addr *target = NULL;
fbfe95a4 464 struct inet_peer *peer;
1da177e4 465 struct rt6_info *rt;
1da177e4
LT
466
467 /*
468 * incoming and outgoing devices are the same
469 * send a redirect.
470 */
471
472 rt = (struct rt6_info *) dst;
c45a3dfb
DM
473 if (rt->rt6i_flags & RTF_GATEWAY)
474 target = &rt->rt6i_gateway;
1da177e4
LT
475 else
476 target = &hdr->daddr;
477
fd0273d7 478 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
92d86829 479
1da177e4
LT
480 /* Limit redirects both by destination (here)
481 and by source (inside ndisc_send_redirect)
482 */
fbfe95a4 483 if (inet_peer_xrlim_allow(peer, 1*HZ))
4991969a 484 ndisc_send_redirect(skb, target);
1d861aa4
DM
485 if (peer)
486 inet_putpeer(peer);
5bb1ab09
DS
487 } else {
488 int addrtype = ipv6_addr_type(&hdr->saddr);
489
1da177e4 490 /* This check is security critical. */
f81b2e7d
YH
491 if (addrtype == IPV6_ADDR_ANY ||
492 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
5bb1ab09
DS
493 goto error;
494 if (addrtype & IPV6_ADDR_LINKLOCAL) {
495 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
3ffe533c 496 ICMPV6_NOT_NEIGHBOUR, 0);
5bb1ab09
DS
497 goto error;
498 }
1da177e4
LT
499 }
500
0954cf9c 501 mtu = ip6_dst_mtu_forward(dst);
14f3ad6f
UW
502 if (mtu < IPV6_MIN_MTU)
503 mtu = IPV6_MIN_MTU;
504
fe6cc55f 505 if (ip6_pkt_too_big(skb, mtu)) {
1da177e4
LT
506 /* Again, force OUTPUT device used as source address */
507 skb->dev = dst->dev;
14f3ad6f 508 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
15c77d8b
ED
509 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
510 IPSTATS_MIB_INTOOBIGERRORS);
511 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
512 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
513 kfree_skb(skb);
514 return -EMSGSIZE;
515 }
516
517 if (skb_cow(skb, dst->dev->hard_header_len)) {
15c77d8b
ED
518 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
519 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
520 goto drop;
521 }
522
0660e03f 523 hdr = ipv6_hdr(skb);
1da177e4
LT
524
525 /* Mangling hops number delayed to point after skb COW */
1ab1457c 526
1da177e4
LT
527 hdr->hop_limit--;
528
483a47d2 529 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
2d8dbb04 530 IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
29a26a56
EB
531 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
532 net, NULL, skb, skb->dev, dst->dev,
6e23ae2a 533 ip6_forward_finish);
1da177e4
LT
534
535error:
483a47d2 536 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
1da177e4
LT
537drop:
538 kfree_skb(skb);
539 return -EINVAL;
540}
541
542static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
543{
544 to->pkt_type = from->pkt_type;
545 to->priority = from->priority;
546 to->protocol = from->protocol;
adf30907
ED
547 skb_dst_drop(to);
548 skb_dst_set(to, dst_clone(skb_dst(from)));
1da177e4 549 to->dev = from->dev;
82e91ffe 550 to->mark = from->mark;
1da177e4
LT
551
552#ifdef CONFIG_NET_SCHED
553 to->tc_index = from->tc_index;
554#endif
e7ac05f3 555 nf_copy(to, from);
984bc16c 556 skb_copy_secmark(to, from);
1da177e4
LT
557}
558
7d8c6e39
EB
559int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
560 int (*output)(struct net *, struct sock *, struct sk_buff *))
1da177e4 561{
1da177e4 562 struct sk_buff *frag;
67ba4152 563 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
f60e5990 564 struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
565 inet6_sk(skb->sk) : NULL;
1da177e4
LT
566 struct ipv6hdr *tmp_hdr;
567 struct frag_hdr *fh;
568 unsigned int mtu, hlen, left, len;
a7ae1992 569 int hroom, troom;
286c2349 570 __be32 frag_id;
67ba4152 571 int ptr, offset = 0, err = 0;
1da177e4
LT
572 u8 *prevhdr, nexthdr = 0;
573
1da177e4
LT
574 hlen = ip6_find_1stfragopt(skb, &prevhdr);
575 nexthdr = *prevhdr;
576
628a5c56 577 mtu = ip6_skb_dst_mtu(skb);
b881ef76
JH
578
579 /* We must not fragment if the socket is set to force MTU discovery
14f3ad6f 580 * or if the skb it not generated by a local socket.
b881ef76 581 */
485fca66
FW
582 if (unlikely(!skb->ignore_df && skb->len > mtu))
583 goto fail_toobig;
a34a101e 584
485fca66
FW
585 if (IP6CB(skb)->frag_max_size) {
586 if (IP6CB(skb)->frag_max_size > mtu)
587 goto fail_toobig;
588
589 /* don't send fragments larger than what we received */
590 mtu = IP6CB(skb)->frag_max_size;
591 if (mtu < IPV6_MIN_MTU)
592 mtu = IPV6_MIN_MTU;
b881ef76
JH
593 }
594
d91675f9
YH
595 if (np && np->frag_size < mtu) {
596 if (np->frag_size)
597 mtu = np->frag_size;
598 }
89bc7848 599 if (mtu < hlen + sizeof(struct frag_hdr) + 8)
b72a2b01 600 goto fail_toobig;
1e0d69a9 601 mtu -= hlen + sizeof(struct frag_hdr);
1da177e4 602
fd0273d7
MKL
603 frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
604 &ipv6_hdr(skb)->saddr);
286c2349 605
405c92f7
HFS
606 if (skb->ip_summed == CHECKSUM_PARTIAL &&
607 (err = skb_checksum_help(skb)))
608 goto fail;
609
1d325d21 610 hroom = LL_RESERVED_SPACE(rt->dst.dev);
21dc3301 611 if (skb_has_frag_list(skb)) {
1da177e4 612 int first_len = skb_pagelen(skb);
3d13008e 613 struct sk_buff *frag2;
1da177e4
LT
614
615 if (first_len - hlen > mtu ||
616 ((first_len - hlen) & 7) ||
1d325d21
FW
617 skb_cloned(skb) ||
618 skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
1da177e4
LT
619 goto slow_path;
620
4d9092bb 621 skb_walk_frags(skb, frag) {
1da177e4
LT
622 /* Correct geometry. */
623 if (frag->len > mtu ||
624 ((frag->len & 7) && frag->next) ||
1d325d21 625 skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
3d13008e 626 goto slow_path_clean;
1da177e4 627
1da177e4
LT
628 /* Partially cloned skb? */
629 if (skb_shared(frag))
3d13008e 630 goto slow_path_clean;
2fdba6b0
HX
631
632 BUG_ON(frag->sk);
633 if (skb->sk) {
2fdba6b0
HX
634 frag->sk = skb->sk;
635 frag->destructor = sock_wfree;
2fdba6b0 636 }
3d13008e 637 skb->truesize -= frag->truesize;
1da177e4
LT
638 }
639
640 err = 0;
641 offset = 0;
1da177e4
LT
642 /* BUILD HEADER */
643
9a217a1c 644 *prevhdr = NEXTHDR_FRAGMENT;
d56f90a7 645 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
1da177e4 646 if (!tmp_hdr) {
adf30907 647 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 648 IPSTATS_MIB_FRAGFAILS);
1d325d21
FW
649 err = -ENOMEM;
650 goto fail;
1da177e4 651 }
1d325d21
FW
652 frag = skb_shinfo(skb)->frag_list;
653 skb_frag_list_init(skb);
1da177e4 654
1da177e4 655 __skb_pull(skb, hlen);
67ba4152 656 fh = (struct frag_hdr *)__skb_push(skb, sizeof(struct frag_hdr));
e2d1bca7
ACM
657 __skb_push(skb, hlen);
658 skb_reset_network_header(skb);
d56f90a7 659 memcpy(skb_network_header(skb), tmp_hdr, hlen);
1da177e4 660
1da177e4
LT
661 fh->nexthdr = nexthdr;
662 fh->reserved = 0;
663 fh->frag_off = htons(IP6_MF);
286c2349 664 fh->identification = frag_id;
1da177e4
LT
665
666 first_len = skb_pagelen(skb);
667 skb->data_len = first_len - skb_headlen(skb);
668 skb->len = first_len;
0660e03f
ACM
669 ipv6_hdr(skb)->payload_len = htons(first_len -
670 sizeof(struct ipv6hdr));
a11d206d 671
d8d1f30b 672 dst_hold(&rt->dst);
1da177e4
LT
673
674 for (;;) {
675 /* Prepare header of the next frame,
676 * before previous one went down. */
677 if (frag) {
678 frag->ip_summed = CHECKSUM_NONE;
badff6d0 679 skb_reset_transport_header(frag);
67ba4152 680 fh = (struct frag_hdr *)__skb_push(frag, sizeof(struct frag_hdr));
e2d1bca7
ACM
681 __skb_push(frag, hlen);
682 skb_reset_network_header(frag);
d56f90a7
ACM
683 memcpy(skb_network_header(frag), tmp_hdr,
684 hlen);
1da177e4
LT
685 offset += skb->len - hlen - sizeof(struct frag_hdr);
686 fh->nexthdr = nexthdr;
687 fh->reserved = 0;
688 fh->frag_off = htons(offset);
53b24b8f 689 if (frag->next)
1da177e4
LT
690 fh->frag_off |= htons(IP6_MF);
691 fh->identification = frag_id;
0660e03f
ACM
692 ipv6_hdr(frag)->payload_len =
693 htons(frag->len -
694 sizeof(struct ipv6hdr));
1da177e4
LT
695 ip6_copy_metadata(frag, skb);
696 }
1ab1457c 697
7d8c6e39 698 err = output(net, sk, skb);
67ba4152 699 if (!err)
d8d1f30b 700 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 701 IPSTATS_MIB_FRAGCREATES);
dafee490 702
1da177e4
LT
703 if (err || !frag)
704 break;
705
706 skb = frag;
707 frag = skb->next;
708 skb->next = NULL;
709 }
710
a51482bd 711 kfree(tmp_hdr);
1da177e4
LT
712
713 if (err == 0) {
d8d1f30b 714 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 715 IPSTATS_MIB_FRAGOKS);
94e187c0 716 ip6_rt_put(rt);
1da177e4
LT
717 return 0;
718 }
719
46cfd725 720 kfree_skb_list(frag);
1da177e4 721
d8d1f30b 722 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 723 IPSTATS_MIB_FRAGFAILS);
94e187c0 724 ip6_rt_put(rt);
1da177e4 725 return err;
3d13008e
ED
726
727slow_path_clean:
728 skb_walk_frags(skb, frag2) {
729 if (frag2 == frag)
730 break;
731 frag2->sk = NULL;
732 frag2->destructor = NULL;
733 skb->truesize += frag2->truesize;
734 }
1da177e4
LT
735 }
736
737slow_path:
738 left = skb->len - hlen; /* Space per frame */
739 ptr = hlen; /* Where to start from */
740
741 /*
742 * Fragment the datagram.
743 */
744
745 *prevhdr = NEXTHDR_FRAGMENT;
a7ae1992 746 troom = rt->dst.dev->needed_tailroom;
1da177e4
LT
747
748 /*
749 * Keep copying data until we run out.
750 */
67ba4152 751 while (left > 0) {
1da177e4
LT
752 len = left;
753 /* IF: it doesn't fit, use 'mtu' - the data space left */
754 if (len > mtu)
755 len = mtu;
25985edc 756 /* IF: we are not sending up to and including the packet end
1da177e4
LT
757 then align the next start on an eight byte boundary */
758 if (len < left) {
759 len &= ~7;
760 }
1da177e4 761
cbffccc9
JP
762 /* Allocate buffer */
763 frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
764 hroom + troom, GFP_ATOMIC);
765 if (!frag) {
adf30907 766 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 767 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
768 err = -ENOMEM;
769 goto fail;
770 }
771
772 /*
773 * Set up data on packet
774 */
775
776 ip6_copy_metadata(frag, skb);
a7ae1992 777 skb_reserve(frag, hroom);
1da177e4 778 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
c1d2bbe1 779 skb_reset_network_header(frag);
badff6d0 780 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
b0e380b1
ACM
781 frag->transport_header = (frag->network_header + hlen +
782 sizeof(struct frag_hdr));
1da177e4
LT
783
784 /*
785 * Charge the memory for the fragment to any owner
786 * it might possess
787 */
788 if (skb->sk)
789 skb_set_owner_w(frag, skb->sk);
790
791 /*
792 * Copy the packet header into the new buffer.
793 */
d626f62b 794 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
1da177e4
LT
795
796 /*
797 * Build fragment header.
798 */
799 fh->nexthdr = nexthdr;
800 fh->reserved = 0;
286c2349 801 fh->identification = frag_id;
1da177e4
LT
802
803 /*
804 * Copy a block of the IP datagram.
805 */
e3f0b86b
HS
806 BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag),
807 len));
1da177e4
LT
808 left -= len;
809
810 fh->frag_off = htons(offset);
811 if (left > 0)
812 fh->frag_off |= htons(IP6_MF);
0660e03f
ACM
813 ipv6_hdr(frag)->payload_len = htons(frag->len -
814 sizeof(struct ipv6hdr));
1da177e4
LT
815
816 ptr += len;
817 offset += len;
818
819 /*
820 * Put this fragment into the sending queue.
821 */
7d8c6e39 822 err = output(net, sk, frag);
1da177e4
LT
823 if (err)
824 goto fail;
dafee490 825
adf30907 826 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 827 IPSTATS_MIB_FRAGCREATES);
1da177e4 828 }
adf30907 829 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 830 IPSTATS_MIB_FRAGOKS);
808db80a 831 consume_skb(skb);
1da177e4
LT
832 return err;
833
485fca66
FW
834fail_toobig:
835 if (skb->sk && dst_allfrag(skb_dst(skb)))
836 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
837
838 skb->dev = skb_dst(skb)->dev;
839 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
840 err = -EMSGSIZE;
841
1da177e4 842fail:
adf30907 843 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 844 IPSTATS_MIB_FRAGFAILS);
1ab1457c 845 kfree_skb(skb);
1da177e4
LT
846 return err;
847}
848
b71d1d42
ED
849static inline int ip6_rt_check(const struct rt6key *rt_key,
850 const struct in6_addr *fl_addr,
851 const struct in6_addr *addr_cache)
cf6b1982 852{
a02cec21 853 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
63159f29 854 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
cf6b1982
YH
855}
856
497c615a
HX
857static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
858 struct dst_entry *dst,
b71d1d42 859 const struct flowi6 *fl6)
1da177e4 860{
497c615a 861 struct ipv6_pinfo *np = inet6_sk(sk);
a963a37d 862 struct rt6_info *rt;
1da177e4 863
497c615a
HX
864 if (!dst)
865 goto out;
866
a963a37d
ED
867 if (dst->ops->family != AF_INET6) {
868 dst_release(dst);
869 return NULL;
870 }
871
872 rt = (struct rt6_info *)dst;
497c615a
HX
873 /* Yes, checking route validity in not connected
874 * case is not very simple. Take into account,
875 * that we do not support routing by source, TOS,
67ba4152 876 * and MSG_DONTROUTE --ANK (980726)
497c615a 877 *
cf6b1982
YH
878 * 1. ip6_rt_check(): If route was host route,
879 * check that cached destination is current.
497c615a
HX
880 * If it is network route, we still may
881 * check its validity using saved pointer
882 * to the last used address: daddr_cache.
883 * We do not want to save whole address now,
884 * (because main consumer of this service
885 * is tcp, which has not this problem),
886 * so that the last trick works only on connected
887 * sockets.
888 * 2. oif also should be the same.
889 */
4c9483b2 890 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
8e1ef0a9 891#ifdef CONFIG_IPV6_SUBTREES
4c9483b2 892 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
8e1ef0a9 893#endif
ca254490
DA
894 (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
895 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
497c615a
HX
896 dst_release(dst);
897 dst = NULL;
1da177e4
LT
898 }
899
497c615a
HX
900out:
901 return dst;
902}
903
3aef934f 904static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
4c9483b2 905 struct dst_entry **dst, struct flowi6 *fl6)
497c615a 906{
69cce1d1
DM
907#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
908 struct neighbour *n;
97cac082 909 struct rt6_info *rt;
69cce1d1
DM
910#endif
911 int err;
497c615a 912
e16e888b
MS
913 /* The correct way to handle this would be to do
914 * ip6_route_get_saddr, and then ip6_route_output; however,
915 * the route-specific preferred source forces the
916 * ip6_route_output call _before_ ip6_route_get_saddr.
917 *
918 * In source specific routing (no src=any default route),
919 * ip6_route_output will fail given src=any saddr, though, so
920 * that's why we try it again later.
921 */
922 if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
923 struct rt6_info *rt;
924 bool had_dst = *dst != NULL;
1da177e4 925
e16e888b
MS
926 if (!had_dst)
927 *dst = ip6_route_output(net, sk, fl6);
928 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
c3968a85
DW
929 err = ip6_route_get_saddr(net, rt, &fl6->daddr,
930 sk ? inet6_sk(sk)->srcprefs : 0,
931 &fl6->saddr);
44456d37 932 if (err)
1da177e4 933 goto out_err_release;
e16e888b
MS
934
935 /* If we had an erroneous initial result, pretend it
936 * never existed and let the SA-enabled version take
937 * over.
938 */
939 if (!had_dst && (*dst)->error) {
940 dst_release(*dst);
941 *dst = NULL;
942 }
1da177e4
LT
943 }
944
e16e888b
MS
945 if (!*dst)
946 *dst = ip6_route_output(net, sk, fl6);
947
948 err = (*dst)->error;
949 if (err)
950 goto out_err_release;
951
95c385b4 952#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
e550dfb0
NH
953 /*
954 * Here if the dst entry we've looked up
955 * has a neighbour entry that is in the INCOMPLETE
956 * state and the src address from the flow is
957 * marked as OPTIMISTIC, we release the found
958 * dst entry and replace it instead with the
959 * dst entry of the nexthop router
960 */
c56bf6fe 961 rt = (struct rt6_info *) *dst;
707be1ff 962 rcu_read_lock_bh();
2647a9b0
MKL
963 n = __ipv6_neigh_lookup_noref(rt->dst.dev,
964 rt6_nexthop(rt, &fl6->daddr));
707be1ff
YH
965 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
966 rcu_read_unlock_bh();
967
968 if (err) {
e550dfb0 969 struct inet6_ifaddr *ifp;
4c9483b2 970 struct flowi6 fl_gw6;
e550dfb0
NH
971 int redirect;
972
4c9483b2 973 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
e550dfb0
NH
974 (*dst)->dev, 1);
975
976 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
977 if (ifp)
978 in6_ifa_put(ifp);
979
980 if (redirect) {
981 /*
982 * We need to get the dst entry for the
983 * default router instead
984 */
985 dst_release(*dst);
4c9483b2
DM
986 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
987 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
988 *dst = ip6_route_output(net, sk, &fl_gw6);
e5d08d71
IM
989 err = (*dst)->error;
990 if (err)
e550dfb0 991 goto out_err_release;
95c385b4 992 }
e550dfb0 993 }
95c385b4
NH
994#endif
995
1da177e4
LT
996 return 0;
997
998out_err_release:
ca46f9c8 999 if (err == -ENETUNREACH)
5ac68e7c 1000 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
1001 dst_release(*dst);
1002 *dst = NULL;
1003 return err;
1004}
34a0b3cd 1005
497c615a
HX
1006/**
1007 * ip6_dst_lookup - perform route lookup on flow
1008 * @sk: socket which provides route info
1009 * @dst: pointer to dst_entry * for result
4c9483b2 1010 * @fl6: flow to lookup
497c615a
HX
1011 *
1012 * This function performs a route lookup on the given flow.
1013 *
1014 * It returns zero on success, or a standard errno code on error.
1015 */
343d60aa
RP
1016int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1017 struct flowi6 *fl6)
497c615a
HX
1018{
1019 *dst = NULL;
343d60aa 1020 return ip6_dst_lookup_tail(net, sk, dst, fl6);
497c615a 1021}
3cf3dc6c
ACM
1022EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1023
497c615a 1024/**
68d0c6d3
DM
1025 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1026 * @sk: socket which provides route info
4c9483b2 1027 * @fl6: flow to lookup
68d0c6d3 1028 * @final_dst: final destination address for ipsec lookup
68d0c6d3
DM
1029 *
1030 * This function performs a route lookup on the given flow.
1031 *
1032 * It returns a valid dst pointer on success, or a pointer encoded
1033 * error code.
1034 */
3aef934f 1035struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
0e0d44ab 1036 const struct in6_addr *final_dst)
68d0c6d3
DM
1037{
1038 struct dst_entry *dst = NULL;
1039 int err;
1040
343d60aa 1041 err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
68d0c6d3
DM
1042 if (err)
1043 return ERR_PTR(err);
1044 if (final_dst)
4e3fd7a0 1045 fl6->daddr = *final_dst;
a0a9f33b 1046 if (!fl6->flowi6_oif)
ca254490 1047 fl6->flowi6_oif = l3mdev_fib_oif(dst->dev);
2774c131 1048
f92ee619 1049 return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
68d0c6d3
DM
1050}
1051EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1052
1053/**
1054 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
497c615a 1055 * @sk: socket which provides the dst cache and route info
4c9483b2 1056 * @fl6: flow to lookup
68d0c6d3 1057 * @final_dst: final destination address for ipsec lookup
497c615a
HX
1058 *
1059 * This function performs a route lookup on the given flow with the
1060 * possibility of using the cached route in the socket if it is valid.
1061 * It will take the socket dst lock when operating on the dst cache.
1062 * As a result, this function can only be used in process context.
1063 *
68d0c6d3
DM
1064 * It returns a valid dst pointer on success, or a pointer encoded
1065 * error code.
497c615a 1066 */
4c9483b2 1067struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
0e0d44ab 1068 const struct in6_addr *final_dst)
497c615a 1069{
68d0c6d3
DM
1070 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1071 int err;
497c615a 1072
4c9483b2 1073 dst = ip6_sk_dst_check(sk, dst, fl6);
68d0c6d3 1074
343d60aa 1075 err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
68d0c6d3
DM
1076 if (err)
1077 return ERR_PTR(err);
1078 if (final_dst)
4e3fd7a0 1079 fl6->daddr = *final_dst;
2774c131 1080
f92ee619 1081 return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
497c615a 1082}
68d0c6d3 1083EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
497c615a 1084
34a0b3cd 1085static inline int ip6_ufo_append_data(struct sock *sk,
0bbe84a6 1086 struct sk_buff_head *queue,
e89e9cf5
AR
1087 int getfrag(void *from, char *to, int offset, int len,
1088 int odd, struct sk_buff *skb),
1089 void *from, int length, int hh_len, int fragheaderlen,
67ba4152 1090 int transhdrlen, int mtu, unsigned int flags,
fd0273d7 1091 const struct flowi6 *fl6)
e89e9cf5
AR
1092
1093{
1094 struct sk_buff *skb;
1095 int err;
1096
1097 /* There is support for UDP large send offload by network
1098 * device, so create one single skb packet containing complete
1099 * udp datagram
1100 */
0bbe84a6 1101 skb = skb_peek_tail(queue);
63159f29 1102 if (!skb) {
e89e9cf5
AR
1103 skb = sock_alloc_send_skb(sk,
1104 hh_len + fragheaderlen + transhdrlen + 20,
1105 (flags & MSG_DONTWAIT), &err);
63159f29 1106 if (!skb)
504744e4 1107 return err;
e89e9cf5
AR
1108
1109 /* reserve space for Hardware header */
1110 skb_reserve(skb, hh_len);
1111
1112 /* create space for UDP/IP header */
67ba4152 1113 skb_put(skb, fragheaderlen + transhdrlen);
e89e9cf5
AR
1114
1115 /* initialize network header pointer */
c1d2bbe1 1116 skb_reset_network_header(skb);
e89e9cf5
AR
1117
1118 /* initialize protocol header pointer */
b0e380b1 1119 skb->transport_header = skb->network_header + fragheaderlen;
e89e9cf5 1120
9c9c9ad5 1121 skb->protocol = htons(ETH_P_IPV6);
e89e9cf5 1122 skb->csum = 0;
e89e9cf5 1123
0bbe84a6 1124 __skb_queue_tail(queue, skb);
c547dbf5
JP
1125 } else if (skb_is_gso(skb)) {
1126 goto append;
e89e9cf5 1127 }
e89e9cf5 1128
c547dbf5
JP
1129 skb->ip_summed = CHECKSUM_PARTIAL;
1130 /* Specify the length of each IPv6 datagram fragment.
1131 * It has to be a multiple of 8.
1132 */
1133 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1134 sizeof(struct frag_hdr)) & ~7;
1135 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
fd0273d7
MKL
1136 skb_shinfo(skb)->ip6_frag_id = ipv6_select_ident(sock_net(sk),
1137 &fl6->daddr,
1138 &fl6->saddr);
c547dbf5
JP
1139
1140append:
2811ebac
HFS
1141 return skb_append_datato_frags(sk, skb, getfrag, from,
1142 (length - transhdrlen));
e89e9cf5 1143}
1da177e4 1144
0178b695
HX
1145static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1146 gfp_t gfp)
1147{
1148 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1149}
1150
1151static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1152 gfp_t gfp)
1153{
1154 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1155}
1156
75a493e6 1157static void ip6_append_data_mtu(unsigned int *mtu,
0c183379
G
1158 int *maxfraglen,
1159 unsigned int fragheaderlen,
1160 struct sk_buff *skb,
75a493e6 1161 struct rt6_info *rt,
e367c2d0 1162 unsigned int orig_mtu)
0c183379
G
1163{
1164 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
63159f29 1165 if (!skb) {
0c183379 1166 /* first fragment, reserve header_len */
e367c2d0 1167 *mtu = orig_mtu - rt->dst.header_len;
0c183379
G
1168
1169 } else {
1170 /*
1171 * this fragment is not first, the headers
1172 * space is regarded as data space.
1173 */
e367c2d0 1174 *mtu = orig_mtu;
0c183379
G
1175 }
1176 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1177 + fragheaderlen - sizeof(struct frag_hdr);
1178 }
1179}
1180
366e41d9
VY
1181static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1182 struct inet6_cork *v6_cork,
1183 int hlimit, int tclass, struct ipv6_txoptions *opt,
1184 struct rt6_info *rt, struct flowi6 *fl6)
1185{
1186 struct ipv6_pinfo *np = inet6_sk(sk);
1187 unsigned int mtu;
1188
1189 /*
1190 * setup for corking
1191 */
1192 if (opt) {
1193 if (WARN_ON(v6_cork->opt))
1194 return -EINVAL;
1195
1196 v6_cork->opt = kzalloc(opt->tot_len, sk->sk_allocation);
63159f29 1197 if (unlikely(!v6_cork->opt))
366e41d9
VY
1198 return -ENOBUFS;
1199
1200 v6_cork->opt->tot_len = opt->tot_len;
1201 v6_cork->opt->opt_flen = opt->opt_flen;
1202 v6_cork->opt->opt_nflen = opt->opt_nflen;
1203
1204 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1205 sk->sk_allocation);
1206 if (opt->dst0opt && !v6_cork->opt->dst0opt)
1207 return -ENOBUFS;
1208
1209 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1210 sk->sk_allocation);
1211 if (opt->dst1opt && !v6_cork->opt->dst1opt)
1212 return -ENOBUFS;
1213
1214 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1215 sk->sk_allocation);
1216 if (opt->hopopt && !v6_cork->opt->hopopt)
1217 return -ENOBUFS;
1218
1219 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1220 sk->sk_allocation);
1221 if (opt->srcrt && !v6_cork->opt->srcrt)
1222 return -ENOBUFS;
1223
1224 /* need source address above miyazawa*/
1225 }
1226 dst_hold(&rt->dst);
1227 cork->base.dst = &rt->dst;
1228 cork->fl.u.ip6 = *fl6;
1229 v6_cork->hop_limit = hlimit;
1230 v6_cork->tclass = tclass;
1231 if (rt->dst.flags & DST_XFRM_TUNNEL)
1232 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1233 rt->dst.dev->mtu : dst_mtu(&rt->dst);
1234 else
1235 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1236 rt->dst.dev->mtu : dst_mtu(rt->dst.path);
1237 if (np->frag_size < mtu) {
1238 if (np->frag_size)
1239 mtu = np->frag_size;
1240 }
1241 cork->base.fragsize = mtu;
1242 if (dst_allfrag(rt->dst.path))
1243 cork->base.flags |= IPCORK_ALLFRAG;
1244 cork->base.length = 0;
1245
1246 return 0;
1247}
1248
0bbe84a6
VY
1249static int __ip6_append_data(struct sock *sk,
1250 struct flowi6 *fl6,
1251 struct sk_buff_head *queue,
1252 struct inet_cork *cork,
1253 struct inet6_cork *v6_cork,
1254 struct page_frag *pfrag,
1255 int getfrag(void *from, char *to, int offset,
1256 int len, int odd, struct sk_buff *skb),
1257 void *from, int length, int transhdrlen,
1258 unsigned int flags, int dontfrag)
1da177e4 1259{
0c183379 1260 struct sk_buff *skb, *skb_prev = NULL;
e367c2d0 1261 unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu;
0bbe84a6
VY
1262 int exthdrlen = 0;
1263 int dst_exthdrlen = 0;
1da177e4 1264 int hh_len;
1da177e4
LT
1265 int copy;
1266 int err;
1267 int offset = 0;
a693e698 1268 __u8 tx_flags = 0;
09c2d251 1269 u32 tskey = 0;
0bbe84a6
VY
1270 struct rt6_info *rt = (struct rt6_info *)cork->dst;
1271 struct ipv6_txoptions *opt = v6_cork->opt;
32dce968 1272 int csummode = CHECKSUM_NONE;
682b1a9d 1273 unsigned int maxnonfragsize, headersize;
1da177e4 1274
0bbe84a6
VY
1275 skb = skb_peek_tail(queue);
1276 if (!skb) {
1277 exthdrlen = opt ? opt->opt_flen : 0;
7efdba5b 1278 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1da177e4 1279 }
0bbe84a6 1280
366e41d9 1281 mtu = cork->fragsize;
e367c2d0 1282 orig_mtu = mtu;
1da177e4 1283
d8d1f30b 1284 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1da177e4 1285
a1b05140 1286 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
b4ce9277 1287 (opt ? opt->opt_nflen : 0);
4df98e76
HFS
1288 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1289 sizeof(struct frag_hdr);
1da177e4 1290
682b1a9d
HFS
1291 headersize = sizeof(struct ipv6hdr) +
1292 (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1293 (dst_allfrag(&rt->dst) ?
1294 sizeof(struct frag_hdr) : 0) +
1295 rt->rt6i_nfheader_len;
1296
1297 if (cork->length + length > mtu - headersize && dontfrag &&
1298 (sk->sk_protocol == IPPROTO_UDP ||
1299 sk->sk_protocol == IPPROTO_RAW)) {
1300 ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1301 sizeof(struct ipv6hdr));
1302 goto emsgsize;
1303 }
4df98e76 1304
682b1a9d
HFS
1305 if (ip6_sk_ignore_df(sk))
1306 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1307 else
1308 maxnonfragsize = mtu;
4df98e76 1309
682b1a9d 1310 if (cork->length + length > maxnonfragsize - headersize) {
4df98e76 1311emsgsize:
682b1a9d
HFS
1312 ipv6_local_error(sk, EMSGSIZE, fl6,
1313 mtu - headersize +
1314 sizeof(struct ipv6hdr));
1315 return -EMSGSIZE;
1da177e4
LT
1316 }
1317
682b1a9d
HFS
1318 /* CHECKSUM_PARTIAL only with no extension headers and when
1319 * we are not going to fragment
1320 */
1321 if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1322 headersize == sizeof(struct ipv6hdr) &&
1323 length < mtu - headersize &&
1324 !(flags & MSG_MORE) &&
1325 rt->dst.dev->features & NETIF_F_V6_CSUM)
1326 csummode = CHECKSUM_PARTIAL;
1327
09c2d251 1328 if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) {
bf84a010 1329 sock_tx_timestamp(sk, &tx_flags);
09c2d251
WB
1330 if (tx_flags & SKBTX_ANY_SW_TSTAMP &&
1331 sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1332 tskey = sk->sk_tskey++;
1333 }
a693e698 1334
1da177e4
LT
1335 /*
1336 * Let's try using as much space as possible.
1337 * Use MTU if total length of the message fits into the MTU.
1338 * Otherwise, we need to reserve fragment header and
1339 * fragment alignment (= 8-15 octects, in total).
1340 *
1341 * Note that we may need to "move" the data from the tail of
1ab1457c 1342 * of the buffer to the new fragment when we split
1da177e4
LT
1343 * the message.
1344 *
1ab1457c 1345 * FIXME: It may be fragmented into multiple chunks
1da177e4
LT
1346 * at once if non-fragmentable extension headers
1347 * are too large.
1ab1457c 1348 * --yoshfuji
1da177e4
LT
1349 */
1350
2811ebac
HFS
1351 cork->length += length;
1352 if (((length > mtu) ||
1353 (skb && skb_is_gso(skb))) &&
1354 (sk->sk_protocol == IPPROTO_UDP) &&
acf8dd0a
MK
1355 (rt->dst.dev->features & NETIF_F_UFO) &&
1356 (sk->sk_type == SOCK_DGRAM)) {
0bbe84a6 1357 err = ip6_ufo_append_data(sk, queue, getfrag, from, length,
2811ebac 1358 hh_len, fragheaderlen,
fd0273d7 1359 transhdrlen, mtu, flags, fl6);
2811ebac
HFS
1360 if (err)
1361 goto error;
1362 return 0;
e89e9cf5 1363 }
1da177e4 1364
2811ebac 1365 if (!skb)
1da177e4
LT
1366 goto alloc_new_skb;
1367
1368 while (length > 0) {
1369 /* Check if the remaining data fits into current packet. */
bdc712b4 1370 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1da177e4
LT
1371 if (copy < length)
1372 copy = maxfraglen - skb->len;
1373
1374 if (copy <= 0) {
1375 char *data;
1376 unsigned int datalen;
1377 unsigned int fraglen;
1378 unsigned int fraggap;
1379 unsigned int alloclen;
1da177e4 1380alloc_new_skb:
1da177e4 1381 /* There's no room in the current skb */
0c183379
G
1382 if (skb)
1383 fraggap = skb->len - maxfraglen;
1da177e4
LT
1384 else
1385 fraggap = 0;
0c183379 1386 /* update mtu and maxfraglen if necessary */
63159f29 1387 if (!skb || !skb_prev)
0c183379 1388 ip6_append_data_mtu(&mtu, &maxfraglen,
75a493e6 1389 fragheaderlen, skb, rt,
e367c2d0 1390 orig_mtu);
0c183379
G
1391
1392 skb_prev = skb;
1da177e4
LT
1393
1394 /*
1395 * If remaining data exceeds the mtu,
1396 * we know we need more fragment(s).
1397 */
1398 datalen = length + fraggap;
1da177e4 1399
0c183379
G
1400 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1401 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1da177e4 1402 if ((flags & MSG_MORE) &&
d8d1f30b 1403 !(rt->dst.dev->features&NETIF_F_SG))
1da177e4
LT
1404 alloclen = mtu;
1405 else
1406 alloclen = datalen + fragheaderlen;
1407
299b0767
SK
1408 alloclen += dst_exthdrlen;
1409
0c183379
G
1410 if (datalen != length + fraggap) {
1411 /*
1412 * this is not the last fragment, the trailer
1413 * space is regarded as data space.
1414 */
1415 datalen += rt->dst.trailer_len;
1416 }
1417
1418 alloclen += rt->dst.trailer_len;
1419 fraglen = datalen + fragheaderlen;
1da177e4
LT
1420
1421 /*
1422 * We just reserve space for fragment header.
1ab1457c 1423 * Note: this may be overallocation if the message
1da177e4
LT
1424 * (without MSG_MORE) fits into the MTU.
1425 */
1426 alloclen += sizeof(struct frag_hdr);
1427
1428 if (transhdrlen) {
1429 skb = sock_alloc_send_skb(sk,
1430 alloclen + hh_len,
1431 (flags & MSG_DONTWAIT), &err);
1432 } else {
1433 skb = NULL;
1434 if (atomic_read(&sk->sk_wmem_alloc) <=
1435 2 * sk->sk_sndbuf)
1436 skb = sock_wmalloc(sk,
1437 alloclen + hh_len, 1,
1438 sk->sk_allocation);
63159f29 1439 if (unlikely(!skb))
1da177e4
LT
1440 err = -ENOBUFS;
1441 }
63159f29 1442 if (!skb)
1da177e4
LT
1443 goto error;
1444 /*
1445 * Fill in the control structures
1446 */
9c9c9ad5 1447 skb->protocol = htons(ETH_P_IPV6);
32dce968 1448 skb->ip_summed = csummode;
1da177e4 1449 skb->csum = 0;
1f85851e
G
1450 /* reserve for fragmentation and ipsec header */
1451 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1452 dst_exthdrlen);
1da177e4 1453
11878b40
WB
1454 /* Only the initial fragment is time stamped */
1455 skb_shinfo(skb)->tx_flags = tx_flags;
1456 tx_flags = 0;
09c2d251
WB
1457 skb_shinfo(skb)->tskey = tskey;
1458 tskey = 0;
a693e698 1459
1da177e4
LT
1460 /*
1461 * Find where to start putting bytes
1462 */
1f85851e
G
1463 data = skb_put(skb, fraglen);
1464 skb_set_network_header(skb, exthdrlen);
1465 data += fragheaderlen;
b0e380b1
ACM
1466 skb->transport_header = (skb->network_header +
1467 fragheaderlen);
1da177e4
LT
1468 if (fraggap) {
1469 skb->csum = skb_copy_and_csum_bits(
1470 skb_prev, maxfraglen,
1471 data + transhdrlen, fraggap, 0);
1472 skb_prev->csum = csum_sub(skb_prev->csum,
1473 skb->csum);
1474 data += fraggap;
e9fa4f7b 1475 pskb_trim_unique(skb_prev, maxfraglen);
1da177e4
LT
1476 }
1477 copy = datalen - transhdrlen - fraggap;
299b0767 1478
1da177e4
LT
1479 if (copy < 0) {
1480 err = -EINVAL;
1481 kfree_skb(skb);
1482 goto error;
1483 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1484 err = -EFAULT;
1485 kfree_skb(skb);
1486 goto error;
1487 }
1488
1489 offset += copy;
1490 length -= datalen - fraggap;
1491 transhdrlen = 0;
1492 exthdrlen = 0;
299b0767 1493 dst_exthdrlen = 0;
1da177e4
LT
1494
1495 /*
1496 * Put the packet on the pending queue
1497 */
0bbe84a6 1498 __skb_queue_tail(queue, skb);
1da177e4
LT
1499 continue;
1500 }
1501
1502 if (copy > length)
1503 copy = length;
1504
d8d1f30b 1505 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1da177e4
LT
1506 unsigned int off;
1507
1508 off = skb->len;
1509 if (getfrag(from, skb_put(skb, copy),
1510 offset, copy, off, skb) < 0) {
1511 __skb_trim(skb, off);
1512 err = -EFAULT;
1513 goto error;
1514 }
1515 } else {
1516 int i = skb_shinfo(skb)->nr_frags;
1da177e4 1517
5640f768
ED
1518 err = -ENOMEM;
1519 if (!sk_page_frag_refill(sk, pfrag))
1da177e4 1520 goto error;
5640f768
ED
1521
1522 if (!skb_can_coalesce(skb, i, pfrag->page,
1523 pfrag->offset)) {
1524 err = -EMSGSIZE;
1525 if (i == MAX_SKB_FRAGS)
1526 goto error;
1527
1528 __skb_fill_page_desc(skb, i, pfrag->page,
1529 pfrag->offset, 0);
1530 skb_shinfo(skb)->nr_frags = ++i;
1531 get_page(pfrag->page);
1da177e4 1532 }
5640f768 1533 copy = min_t(int, copy, pfrag->size - pfrag->offset);
9e903e08 1534 if (getfrag(from,
5640f768
ED
1535 page_address(pfrag->page) + pfrag->offset,
1536 offset, copy, skb->len, skb) < 0)
1537 goto error_efault;
1538
1539 pfrag->offset += copy;
1540 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1da177e4
LT
1541 skb->len += copy;
1542 skb->data_len += copy;
f945fa7a
HX
1543 skb->truesize += copy;
1544 atomic_add(copy, &sk->sk_wmem_alloc);
1da177e4
LT
1545 }
1546 offset += copy;
1547 length -= copy;
1548 }
5640f768 1549
1da177e4 1550 return 0;
5640f768
ED
1551
1552error_efault:
1553 err = -EFAULT;
1da177e4 1554error:
bdc712b4 1555 cork->length -= length;
3bd653c8 1556 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1557 return err;
1558}
0bbe84a6
VY
1559
1560int ip6_append_data(struct sock *sk,
1561 int getfrag(void *from, char *to, int offset, int len,
1562 int odd, struct sk_buff *skb),
1563 void *from, int length, int transhdrlen, int hlimit,
1564 int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
1565 struct rt6_info *rt, unsigned int flags, int dontfrag)
1566{
1567 struct inet_sock *inet = inet_sk(sk);
1568 struct ipv6_pinfo *np = inet6_sk(sk);
1569 int exthdrlen;
1570 int err;
1571
1572 if (flags&MSG_PROBE)
1573 return 0;
1574 if (skb_queue_empty(&sk->sk_write_queue)) {
1575 /*
1576 * setup for corking
1577 */
1578 err = ip6_setup_cork(sk, &inet->cork, &np->cork, hlimit,
1579 tclass, opt, rt, fl6);
1580 if (err)
1581 return err;
1582
1583 exthdrlen = (opt ? opt->opt_flen : 0);
1584 length += exthdrlen;
1585 transhdrlen += exthdrlen;
1586 } else {
1587 fl6 = &inet->cork.fl.u.ip6;
1588 transhdrlen = 0;
1589 }
1590
1591 return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1592 &np->cork, sk_page_frag(sk), getfrag,
1593 from, length, transhdrlen, flags, dontfrag);
1594}
a495f836 1595EXPORT_SYMBOL_GPL(ip6_append_data);
1da177e4 1596
366e41d9
VY
1597static void ip6_cork_release(struct inet_cork_full *cork,
1598 struct inet6_cork *v6_cork)
bf138862 1599{
366e41d9
VY
1600 if (v6_cork->opt) {
1601 kfree(v6_cork->opt->dst0opt);
1602 kfree(v6_cork->opt->dst1opt);
1603 kfree(v6_cork->opt->hopopt);
1604 kfree(v6_cork->opt->srcrt);
1605 kfree(v6_cork->opt);
1606 v6_cork->opt = NULL;
0178b695
HX
1607 }
1608
366e41d9
VY
1609 if (cork->base.dst) {
1610 dst_release(cork->base.dst);
1611 cork->base.dst = NULL;
1612 cork->base.flags &= ~IPCORK_ALLFRAG;
bf138862 1613 }
366e41d9 1614 memset(&cork->fl, 0, sizeof(cork->fl));
bf138862
PE
1615}
1616
6422398c
VY
1617struct sk_buff *__ip6_make_skb(struct sock *sk,
1618 struct sk_buff_head *queue,
1619 struct inet_cork_full *cork,
1620 struct inet6_cork *v6_cork)
1da177e4
LT
1621{
1622 struct sk_buff *skb, *tmp_skb;
1623 struct sk_buff **tail_skb;
1624 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1da177e4 1625 struct ipv6_pinfo *np = inet6_sk(sk);
3bd653c8 1626 struct net *net = sock_net(sk);
1da177e4 1627 struct ipv6hdr *hdr;
6422398c
VY
1628 struct ipv6_txoptions *opt = v6_cork->opt;
1629 struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1630 struct flowi6 *fl6 = &cork->fl.u.ip6;
4c9483b2 1631 unsigned char proto = fl6->flowi6_proto;
1da177e4 1632
6422398c 1633 skb = __skb_dequeue(queue);
63159f29 1634 if (!skb)
1da177e4
LT
1635 goto out;
1636 tail_skb = &(skb_shinfo(skb)->frag_list);
1637
1638 /* move skb->data to ip header from ext header */
d56f90a7 1639 if (skb->data < skb_network_header(skb))
bbe735e4 1640 __skb_pull(skb, skb_network_offset(skb));
6422398c 1641 while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
cfe1fc77 1642 __skb_pull(tmp_skb, skb_network_header_len(skb));
1da177e4
LT
1643 *tail_skb = tmp_skb;
1644 tail_skb = &(tmp_skb->next);
1645 skb->len += tmp_skb->len;
1646 skb->data_len += tmp_skb->len;
1da177e4 1647 skb->truesize += tmp_skb->truesize;
1da177e4
LT
1648 tmp_skb->destructor = NULL;
1649 tmp_skb->sk = NULL;
1da177e4
LT
1650 }
1651
28a89453 1652 /* Allow local fragmentation. */
60ff7467 1653 skb->ignore_df = ip6_sk_ignore_df(sk);
28a89453 1654
4e3fd7a0 1655 *final_dst = fl6->daddr;
cfe1fc77 1656 __skb_pull(skb, skb_network_header_len(skb));
1da177e4
LT
1657 if (opt && opt->opt_flen)
1658 ipv6_push_frag_opts(skb, opt, &proto);
1659 if (opt && opt->opt_nflen)
1660 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1661
e2d1bca7
ACM
1662 skb_push(skb, sizeof(struct ipv6hdr));
1663 skb_reset_network_header(skb);
0660e03f 1664 hdr = ipv6_hdr(skb);
1ab1457c 1665
6422398c 1666 ip6_flow_hdr(hdr, v6_cork->tclass,
cb1ce2ef 1667 ip6_make_flowlabel(net, skb, fl6->flowlabel,
67800f9b 1668 np->autoflowlabel, fl6));
6422398c 1669 hdr->hop_limit = v6_cork->hop_limit;
1da177e4 1670 hdr->nexthdr = proto;
4e3fd7a0
AD
1671 hdr->saddr = fl6->saddr;
1672 hdr->daddr = *final_dst;
1da177e4 1673
a2c2064f 1674 skb->priority = sk->sk_priority;
4a19ec58 1675 skb->mark = sk->sk_mark;
a2c2064f 1676
d8d1f30b 1677 skb_dst_set(skb, dst_clone(&rt->dst));
edf391ff 1678 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
14878f75 1679 if (proto == IPPROTO_ICMPV6) {
adf30907 1680 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
14878f75 1681
43a43b60
HFS
1682 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1683 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
14878f75
DS
1684 }
1685
6422398c
VY
1686 ip6_cork_release(cork, v6_cork);
1687out:
1688 return skb;
1689}
1690
1691int ip6_send_skb(struct sk_buff *skb)
1692{
1693 struct net *net = sock_net(skb->sk);
1694 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1695 int err;
1696
33224b16 1697 err = ip6_local_out(net, skb->sk, skb);
1da177e4
LT
1698 if (err) {
1699 if (err > 0)
6ce9e7b5 1700 err = net_xmit_errno(err);
1da177e4 1701 if (err)
6422398c
VY
1702 IP6_INC_STATS(net, rt->rt6i_idev,
1703 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1704 }
1705
1da177e4 1706 return err;
6422398c
VY
1707}
1708
1709int ip6_push_pending_frames(struct sock *sk)
1710{
1711 struct sk_buff *skb;
1712
1713 skb = ip6_finish_skb(sk);
1714 if (!skb)
1715 return 0;
1716
1717 return ip6_send_skb(skb);
1da177e4 1718}
a495f836 1719EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1da177e4 1720
0bbe84a6 1721static void __ip6_flush_pending_frames(struct sock *sk,
6422398c
VY
1722 struct sk_buff_head *queue,
1723 struct inet_cork_full *cork,
1724 struct inet6_cork *v6_cork)
1da177e4 1725{
1da177e4
LT
1726 struct sk_buff *skb;
1727
0bbe84a6 1728 while ((skb = __skb_dequeue_tail(queue)) != NULL) {
adf30907
ED
1729 if (skb_dst(skb))
1730 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
e1f52208 1731 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1732 kfree_skb(skb);
1733 }
1734
6422398c 1735 ip6_cork_release(cork, v6_cork);
1da177e4 1736}
0bbe84a6
VY
1737
1738void ip6_flush_pending_frames(struct sock *sk)
1739{
6422398c
VY
1740 __ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1741 &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
0bbe84a6 1742}
a495f836 1743EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
6422398c
VY
1744
1745struct sk_buff *ip6_make_skb(struct sock *sk,
1746 int getfrag(void *from, char *to, int offset,
1747 int len, int odd, struct sk_buff *skb),
1748 void *from, int length, int transhdrlen,
1749 int hlimit, int tclass,
1750 struct ipv6_txoptions *opt, struct flowi6 *fl6,
1751 struct rt6_info *rt, unsigned int flags,
1752 int dontfrag)
1753{
1754 struct inet_cork_full cork;
1755 struct inet6_cork v6_cork;
1756 struct sk_buff_head queue;
1757 int exthdrlen = (opt ? opt->opt_flen : 0);
1758 int err;
1759
1760 if (flags & MSG_PROBE)
1761 return NULL;
1762
1763 __skb_queue_head_init(&queue);
1764
1765 cork.base.flags = 0;
1766 cork.base.addr = 0;
1767 cork.base.opt = NULL;
1768 v6_cork.opt = NULL;
1769 err = ip6_setup_cork(sk, &cork, &v6_cork, hlimit, tclass, opt, rt, fl6);
1770 if (err)
1771 return ERR_PTR(err);
1772
1773 if (dontfrag < 0)
1774 dontfrag = inet6_sk(sk)->dontfrag;
1775
1776 err = __ip6_append_data(sk, fl6, &queue, &cork.base, &v6_cork,
1777 &current->task_frag, getfrag, from,
1778 length + exthdrlen, transhdrlen + exthdrlen,
1779 flags, dontfrag);
1780 if (err) {
1781 __ip6_flush_pending_frames(sk, &queue, &cork, &v6_cork);
1782 return ERR_PTR(err);
1783 }
1784
1785 return __ip6_make_skb(sk, &queue, &cork, &v6_cork);
1786}