]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blame - net/ipv6/ip6_output.c
xfrm: Only compute net once in xfrm_policy_queue_process
[mirror_ubuntu-zesty-kernel.git] / net / ipv6 / ip6_output.c
CommitLineData
1da177e4
LT
1/*
2 * IPv6 output functions
1ab1457c 3 * Linux INET6 implementation
1da177e4
LT
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * Based on linux/net/ipv4/ip_output.c
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 *
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
21 *
22 * H. von Brand : Added missing #include <linux/string.h>
67ba4152 23 * Imran Patel : frag id should be in NBO
1da177e4
LT
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
27 */
28
1da177e4 29#include <linux/errno.h>
ef76bc23 30#include <linux/kernel.h>
1da177e4
LT
31#include <linux/string.h>
32#include <linux/socket.h>
33#include <linux/net.h>
34#include <linux/netdevice.h>
35#include <linux/if_arp.h>
36#include <linux/in6.h>
37#include <linux/tcp.h>
38#include <linux/route.h>
b59f45d0 39#include <linux/module.h>
5a0e3ad6 40#include <linux/slab.h>
1da177e4
LT
41
42#include <linux/netfilter.h>
43#include <linux/netfilter_ipv6.h>
44
45#include <net/sock.h>
46#include <net/snmp.h>
47
48#include <net/ipv6.h>
49#include <net/ndisc.h>
50#include <net/protocol.h>
51#include <net/ip6_route.h>
52#include <net/addrconf.h>
53#include <net/rawv6.h>
54#include <net/icmp.h>
55#include <net/xfrm.h>
56#include <net/checksum.h>
7bc570c8 57#include <linux/mroute6.h>
1da177e4 58
7d8c6e39 59static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 60{
adf30907 61 struct dst_entry *dst = skb_dst(skb);
1da177e4 62 struct net_device *dev = dst->dev;
f6b72b62 63 struct neighbour *neigh;
6fd6ce20
YH
64 struct in6_addr *nexthop;
65 int ret;
1da177e4
LT
66
67 skb->protocol = htons(ETH_P_IPV6);
68 skb->dev = dev;
69
0660e03f 70 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
adf30907 71 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1da177e4 72
7026b1dd 73 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
78126c41 74 ((mroute6_socket(net, skb) &&
bd91b8bf 75 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
7bc570c8
YH
76 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
77 &ipv6_hdr(skb)->saddr))) {
1da177e4
LT
78 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
79
80 /* Do not check for IFF_ALLMULTI; multicast routing
81 is not supported in any case.
82 */
83 if (newskb)
b2e0b385 84 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
29a26a56 85 net, sk, newskb, NULL, newskb->dev,
95603e22 86 dev_loopback_xmit);
1da177e4 87
0660e03f 88 if (ipv6_hdr(skb)->hop_limit == 0) {
78126c41 89 IP6_INC_STATS(net, idev,
3bd653c8 90 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
91 kfree_skb(skb);
92 return 0;
93 }
94 }
95
78126c41 96 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
dd408515
HFS
97
98 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
99 IPV6_ADDR_SCOPE_NODELOCAL &&
100 !(dev->flags & IFF_LOOPBACK)) {
101 kfree_skb(skb);
102 return 0;
103 }
1da177e4
LT
104 }
105
6fd6ce20 106 rcu_read_lock_bh();
2647a9b0 107 nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
6fd6ce20
YH
108 neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
109 if (unlikely(!neigh))
110 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
111 if (!IS_ERR(neigh)) {
112 ret = dst_neigh_output(dst, neigh, skb);
113 rcu_read_unlock_bh();
114 return ret;
115 }
116 rcu_read_unlock_bh();
05e3aa09 117
78126c41 118 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
9e508490
JE
119 kfree_skb(skb);
120 return -EINVAL;
1da177e4
LT
121}
122
0c4b51f0 123static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
9e508490
JE
124{
125 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
9037c357
JP
126 dst_allfrag(skb_dst(skb)) ||
127 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
7d8c6e39 128 return ip6_fragment(net, sk, skb, ip6_finish_output2);
9e508490 129 else
7d8c6e39 130 return ip6_finish_output2(net, sk, skb);
9e508490
JE
131}
132
aad88724 133int ip6_output(struct sock *sk, struct sk_buff *skb)
1da177e4 134{
9e508490 135 struct net_device *dev = skb_dst(skb)->dev;
adf30907 136 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
19a0644c 137 struct net *net = dev_net(dev);
be10de0a 138
778d80be 139 if (unlikely(idev->cnf.disable_ipv6)) {
19a0644c 140 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
778d80be
YH
141 kfree_skb(skb);
142 return 0;
143 }
144
29a26a56
EB
145 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
146 net, sk, skb, NULL, dev,
9c6eb28a
JE
147 ip6_finish_output,
148 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
1da177e4
LT
149}
150
1da177e4 151/*
1c1e9d2b
ED
152 * xmit an sk_buff (used by TCP, SCTP and DCCP)
153 * Note : socket lock is not held for SYNACK packets, but might be modified
154 * by calls to skb_set_owner_w() and ipv6_local_error(),
155 * which are using proper atomic operations or spinlocks.
1da177e4 156 */
1c1e9d2b 157int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
b903d324 158 struct ipv6_txoptions *opt, int tclass)
1da177e4 159{
3bd653c8 160 struct net *net = sock_net(sk);
1c1e9d2b 161 const struct ipv6_pinfo *np = inet6_sk(sk);
4c9483b2 162 struct in6_addr *first_hop = &fl6->daddr;
adf30907 163 struct dst_entry *dst = skb_dst(skb);
1da177e4 164 struct ipv6hdr *hdr;
4c9483b2 165 u8 proto = fl6->flowi6_proto;
1da177e4 166 int seg_len = skb->len;
e651f03a 167 int hlimit = -1;
1da177e4
LT
168 u32 mtu;
169
170 if (opt) {
c2636b4d 171 unsigned int head_room;
1da177e4
LT
172
173 /* First: exthdrs may take lots of space (~8K for now)
174 MAX_HEADER is not enough.
175 */
176 head_room = opt->opt_nflen + opt->opt_flen;
177 seg_len += head_room;
178 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
179
180 if (skb_headroom(skb) < head_room) {
181 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
63159f29 182 if (!skb2) {
adf30907 183 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d
YH
184 IPSTATS_MIB_OUTDISCARDS);
185 kfree_skb(skb);
1da177e4
LT
186 return -ENOBUFS;
187 }
808db80a 188 consume_skb(skb);
a11d206d 189 skb = skb2;
1c1e9d2b
ED
190 /* skb_set_owner_w() changes sk->sk_wmem_alloc atomically,
191 * it is safe to call in our context (socket lock not held)
192 */
193 skb_set_owner_w(skb, (struct sock *)sk);
1da177e4
LT
194 }
195 if (opt->opt_flen)
196 ipv6_push_frag_opts(skb, opt, &proto);
197 if (opt->opt_nflen)
198 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
199 }
200
e2d1bca7
ACM
201 skb_push(skb, sizeof(struct ipv6hdr));
202 skb_reset_network_header(skb);
0660e03f 203 hdr = ipv6_hdr(skb);
1da177e4
LT
204
205 /*
206 * Fill in the IPv6 header
207 */
b903d324 208 if (np)
1da177e4
LT
209 hlimit = np->hop_limit;
210 if (hlimit < 0)
6b75d090 211 hlimit = ip6_dst_hoplimit(dst);
1da177e4 212
cb1ce2ef 213 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
67800f9b 214 np->autoflowlabel, fl6));
41a1f8ea 215
1da177e4
LT
216 hdr->payload_len = htons(seg_len);
217 hdr->nexthdr = proto;
218 hdr->hop_limit = hlimit;
219
4e3fd7a0
AD
220 hdr->saddr = fl6->saddr;
221 hdr->daddr = *first_hop;
1da177e4 222
9c9c9ad5 223 skb->protocol = htons(ETH_P_IPV6);
a2c2064f 224 skb->priority = sk->sk_priority;
4a19ec58 225 skb->mark = sk->sk_mark;
a2c2064f 226
1da177e4 227 mtu = dst_mtu(dst);
60ff7467 228 if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
adf30907 229 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
edf391ff 230 IPSTATS_MIB_OUT, skb->len);
1c1e9d2b
ED
231 /* hooks should never assume socket lock is held.
232 * we promote our socket to non const
233 */
29a26a56 234 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
1c1e9d2b 235 net, (struct sock *)sk, skb, NULL, dst->dev,
0c4b51f0 236 dst_output_okfn);
1da177e4
LT
237 }
238
1da177e4 239 skb->dev = dst->dev;
1c1e9d2b
ED
240 /* ipv6_local_error() does not require socket lock,
241 * we promote our socket to non const
242 */
243 ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
244
adf30907 245 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
246 kfree_skb(skb);
247 return -EMSGSIZE;
248}
7159039a
YH
249EXPORT_SYMBOL(ip6_xmit);
250
1da177e4
LT
251static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
252{
253 struct ip6_ra_chain *ra;
254 struct sock *last = NULL;
255
256 read_lock(&ip6_ra_lock);
257 for (ra = ip6_ra_chain; ra; ra = ra->next) {
258 struct sock *sk = ra->sk;
0bd1b59b
AM
259 if (sk && ra->sel == sel &&
260 (!sk->sk_bound_dev_if ||
261 sk->sk_bound_dev_if == skb->dev->ifindex)) {
1da177e4
LT
262 if (last) {
263 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
264 if (skb2)
265 rawv6_rcv(last, skb2);
266 }
267 last = sk;
268 }
269 }
270
271 if (last) {
272 rawv6_rcv(last, skb);
273 read_unlock(&ip6_ra_lock);
274 return 1;
275 }
276 read_unlock(&ip6_ra_lock);
277 return 0;
278}
279
e21e0b5f
VN
280static int ip6_forward_proxy_check(struct sk_buff *skb)
281{
0660e03f 282 struct ipv6hdr *hdr = ipv6_hdr(skb);
e21e0b5f 283 u8 nexthdr = hdr->nexthdr;
75f2811c 284 __be16 frag_off;
e21e0b5f
VN
285 int offset;
286
287 if (ipv6_ext_hdr(nexthdr)) {
75f2811c 288 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
e21e0b5f
VN
289 if (offset < 0)
290 return 0;
291 } else
292 offset = sizeof(struct ipv6hdr);
293
294 if (nexthdr == IPPROTO_ICMPV6) {
295 struct icmp6hdr *icmp6;
296
d56f90a7
ACM
297 if (!pskb_may_pull(skb, (skb_network_header(skb) +
298 offset + 1 - skb->data)))
e21e0b5f
VN
299 return 0;
300
d56f90a7 301 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
e21e0b5f
VN
302
303 switch (icmp6->icmp6_type) {
304 case NDISC_ROUTER_SOLICITATION:
305 case NDISC_ROUTER_ADVERTISEMENT:
306 case NDISC_NEIGHBOUR_SOLICITATION:
307 case NDISC_NEIGHBOUR_ADVERTISEMENT:
308 case NDISC_REDIRECT:
309 /* For reaction involving unicast neighbor discovery
310 * message destined to the proxied address, pass it to
311 * input function.
312 */
313 return 1;
314 default:
315 break;
316 }
317 }
318
74553b09
VN
319 /*
320 * The proxying router can't forward traffic sent to a link-local
321 * address, so signal the sender and discard the packet. This
322 * behavior is clarified by the MIPv6 specification.
323 */
324 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
325 dst_link_failure(skb);
326 return -1;
327 }
328
e21e0b5f
VN
329 return 0;
330}
331
0c4b51f0
EB
332static inline int ip6_forward_finish(struct net *net, struct sock *sk,
333 struct sk_buff *skb)
1da177e4 334{
c29390c6 335 skb_sender_cpu_clear(skb);
5a70649e 336 return dst_output(sk, skb);
1da177e4
LT
337}
338
0954cf9c
HFS
339static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
340{
341 unsigned int mtu;
342 struct inet6_dev *idev;
343
344 if (dst_metric_locked(dst, RTAX_MTU)) {
345 mtu = dst_metric_raw(dst, RTAX_MTU);
346 if (mtu)
347 return mtu;
348 }
349
350 mtu = IPV6_MIN_MTU;
351 rcu_read_lock();
352 idev = __in6_dev_get(dst->dev);
353 if (idev)
354 mtu = idev->cnf.mtu6;
355 rcu_read_unlock();
356
357 return mtu;
358}
359
fe6cc55f
FW
360static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
361{
418a3156 362 if (skb->len <= mtu)
fe6cc55f
FW
363 return false;
364
60ff7467 365 /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
fe6cc55f
FW
366 if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
367 return true;
368
60ff7467 369 if (skb->ignore_df)
418a3156
FW
370 return false;
371
fe6cc55f
FW
372 if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
373 return false;
374
375 return true;
376}
377
1da177e4
LT
378int ip6_forward(struct sk_buff *skb)
379{
adf30907 380 struct dst_entry *dst = skb_dst(skb);
0660e03f 381 struct ipv6hdr *hdr = ipv6_hdr(skb);
1da177e4 382 struct inet6_skb_parm *opt = IP6CB(skb);
c346dca1 383 struct net *net = dev_net(dst->dev);
14f3ad6f 384 u32 mtu;
1ab1457c 385
53b7997f 386 if (net->ipv6.devconf_all->forwarding == 0)
1da177e4
LT
387 goto error;
388
090f1166
LR
389 if (skb->pkt_type != PACKET_HOST)
390 goto drop;
391
4497b076
BH
392 if (skb_warn_if_lro(skb))
393 goto drop;
394
1da177e4 395 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
15c77d8b
ED
396 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
397 IPSTATS_MIB_INDISCARDS);
1da177e4
LT
398 goto drop;
399 }
400
35fc92a9 401 skb_forward_csum(skb);
1da177e4
LT
402
403 /*
404 * We DO NOT make any processing on
405 * RA packets, pushing them to user level AS IS
406 * without ane WARRANTY that application will be able
407 * to interpret them. The reason is that we
408 * cannot make anything clever here.
409 *
410 * We are not end-node, so that if packet contains
411 * AH/ESP, we cannot make anything.
412 * Defragmentation also would be mistake, RA packets
413 * cannot be fragmented, because there is no warranty
414 * that different fragments will go along one path. --ANK
415 */
ab4eb353
YH
416 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
417 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
1da177e4
LT
418 return 0;
419 }
420
421 /*
422 * check and decrement ttl
423 */
424 if (hdr->hop_limit <= 1) {
425 /* Force OUTPUT device used as source address */
426 skb->dev = dst->dev;
3ffe533c 427 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
15c77d8b
ED
428 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
429 IPSTATS_MIB_INHDRERRORS);
1da177e4
LT
430
431 kfree_skb(skb);
432 return -ETIMEDOUT;
433 }
434
fbea49e1 435 /* XXX: idev->cnf.proxy_ndp? */
53b7997f 436 if (net->ipv6.devconf_all->proxy_ndp &&
8a3edd80 437 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
74553b09
VN
438 int proxied = ip6_forward_proxy_check(skb);
439 if (proxied > 0)
e21e0b5f 440 return ip6_input(skb);
74553b09 441 else if (proxied < 0) {
15c77d8b
ED
442 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
443 IPSTATS_MIB_INDISCARDS);
74553b09
VN
444 goto drop;
445 }
e21e0b5f
VN
446 }
447
1da177e4 448 if (!xfrm6_route_forward(skb)) {
15c77d8b
ED
449 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
450 IPSTATS_MIB_INDISCARDS);
1da177e4
LT
451 goto drop;
452 }
adf30907 453 dst = skb_dst(skb);
1da177e4
LT
454
455 /* IPv6 specs say nothing about it, but it is clear that we cannot
456 send redirects to source routed frames.
1e5dc146 457 We don't send redirects to frames decapsulated from IPsec.
1da177e4 458 */
c45a3dfb 459 if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
1da177e4 460 struct in6_addr *target = NULL;
fbfe95a4 461 struct inet_peer *peer;
1da177e4 462 struct rt6_info *rt;
1da177e4
LT
463
464 /*
465 * incoming and outgoing devices are the same
466 * send a redirect.
467 */
468
469 rt = (struct rt6_info *) dst;
c45a3dfb
DM
470 if (rt->rt6i_flags & RTF_GATEWAY)
471 target = &rt->rt6i_gateway;
1da177e4
LT
472 else
473 target = &hdr->daddr;
474
fd0273d7 475 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
92d86829 476
1da177e4
LT
477 /* Limit redirects both by destination (here)
478 and by source (inside ndisc_send_redirect)
479 */
fbfe95a4 480 if (inet_peer_xrlim_allow(peer, 1*HZ))
4991969a 481 ndisc_send_redirect(skb, target);
1d861aa4
DM
482 if (peer)
483 inet_putpeer(peer);
5bb1ab09
DS
484 } else {
485 int addrtype = ipv6_addr_type(&hdr->saddr);
486
1da177e4 487 /* This check is security critical. */
f81b2e7d
YH
488 if (addrtype == IPV6_ADDR_ANY ||
489 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
5bb1ab09
DS
490 goto error;
491 if (addrtype & IPV6_ADDR_LINKLOCAL) {
492 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
3ffe533c 493 ICMPV6_NOT_NEIGHBOUR, 0);
5bb1ab09
DS
494 goto error;
495 }
1da177e4
LT
496 }
497
0954cf9c 498 mtu = ip6_dst_mtu_forward(dst);
14f3ad6f
UW
499 if (mtu < IPV6_MIN_MTU)
500 mtu = IPV6_MIN_MTU;
501
fe6cc55f 502 if (ip6_pkt_too_big(skb, mtu)) {
1da177e4
LT
503 /* Again, force OUTPUT device used as source address */
504 skb->dev = dst->dev;
14f3ad6f 505 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
15c77d8b
ED
506 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
507 IPSTATS_MIB_INTOOBIGERRORS);
508 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
509 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
510 kfree_skb(skb);
511 return -EMSGSIZE;
512 }
513
514 if (skb_cow(skb, dst->dev->hard_header_len)) {
15c77d8b
ED
515 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
516 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
517 goto drop;
518 }
519
0660e03f 520 hdr = ipv6_hdr(skb);
1da177e4
LT
521
522 /* Mangling hops number delayed to point after skb COW */
1ab1457c 523
1da177e4
LT
524 hdr->hop_limit--;
525
483a47d2 526 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
2d8dbb04 527 IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
29a26a56
EB
528 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
529 net, NULL, skb, skb->dev, dst->dev,
6e23ae2a 530 ip6_forward_finish);
1da177e4
LT
531
532error:
483a47d2 533 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
1da177e4
LT
534drop:
535 kfree_skb(skb);
536 return -EINVAL;
537}
538
539static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
540{
541 to->pkt_type = from->pkt_type;
542 to->priority = from->priority;
543 to->protocol = from->protocol;
adf30907
ED
544 skb_dst_drop(to);
545 skb_dst_set(to, dst_clone(skb_dst(from)));
1da177e4 546 to->dev = from->dev;
82e91ffe 547 to->mark = from->mark;
1da177e4
LT
548
549#ifdef CONFIG_NET_SCHED
550 to->tc_index = from->tc_index;
551#endif
e7ac05f3 552 nf_copy(to, from);
984bc16c 553 skb_copy_secmark(to, from);
1da177e4
LT
554}
555
7d8c6e39
EB
556int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
557 int (*output)(struct net *, struct sock *, struct sk_buff *))
1da177e4 558{
1da177e4 559 struct sk_buff *frag;
67ba4152 560 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
f60e5990 561 struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
562 inet6_sk(skb->sk) : NULL;
1da177e4
LT
563 struct ipv6hdr *tmp_hdr;
564 struct frag_hdr *fh;
565 unsigned int mtu, hlen, left, len;
a7ae1992 566 int hroom, troom;
286c2349 567 __be32 frag_id;
67ba4152 568 int ptr, offset = 0, err = 0;
1da177e4
LT
569 u8 *prevhdr, nexthdr = 0;
570
1da177e4
LT
571 hlen = ip6_find_1stfragopt(skb, &prevhdr);
572 nexthdr = *prevhdr;
573
628a5c56 574 mtu = ip6_skb_dst_mtu(skb);
b881ef76
JH
575
576 /* We must not fragment if the socket is set to force MTU discovery
14f3ad6f 577 * or if the skb it not generated by a local socket.
b881ef76 578 */
485fca66
FW
579 if (unlikely(!skb->ignore_df && skb->len > mtu))
580 goto fail_toobig;
a34a101e 581
485fca66
FW
582 if (IP6CB(skb)->frag_max_size) {
583 if (IP6CB(skb)->frag_max_size > mtu)
584 goto fail_toobig;
585
586 /* don't send fragments larger than what we received */
587 mtu = IP6CB(skb)->frag_max_size;
588 if (mtu < IPV6_MIN_MTU)
589 mtu = IPV6_MIN_MTU;
b881ef76
JH
590 }
591
d91675f9
YH
592 if (np && np->frag_size < mtu) {
593 if (np->frag_size)
594 mtu = np->frag_size;
595 }
596 mtu -= hlen + sizeof(struct frag_hdr);
1da177e4 597
fd0273d7
MKL
598 frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
599 &ipv6_hdr(skb)->saddr);
286c2349 600
1d325d21 601 hroom = LL_RESERVED_SPACE(rt->dst.dev);
21dc3301 602 if (skb_has_frag_list(skb)) {
1da177e4 603 int first_len = skb_pagelen(skb);
3d13008e 604 struct sk_buff *frag2;
1da177e4
LT
605
606 if (first_len - hlen > mtu ||
607 ((first_len - hlen) & 7) ||
1d325d21
FW
608 skb_cloned(skb) ||
609 skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
1da177e4
LT
610 goto slow_path;
611
4d9092bb 612 skb_walk_frags(skb, frag) {
1da177e4
LT
613 /* Correct geometry. */
614 if (frag->len > mtu ||
615 ((frag->len & 7) && frag->next) ||
1d325d21 616 skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
3d13008e 617 goto slow_path_clean;
1da177e4 618
1da177e4
LT
619 /* Partially cloned skb? */
620 if (skb_shared(frag))
3d13008e 621 goto slow_path_clean;
2fdba6b0
HX
622
623 BUG_ON(frag->sk);
624 if (skb->sk) {
2fdba6b0
HX
625 frag->sk = skb->sk;
626 frag->destructor = sock_wfree;
2fdba6b0 627 }
3d13008e 628 skb->truesize -= frag->truesize;
1da177e4
LT
629 }
630
631 err = 0;
632 offset = 0;
1da177e4
LT
633 /* BUILD HEADER */
634
9a217a1c 635 *prevhdr = NEXTHDR_FRAGMENT;
d56f90a7 636 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
1da177e4 637 if (!tmp_hdr) {
adf30907 638 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 639 IPSTATS_MIB_FRAGFAILS);
1d325d21
FW
640 err = -ENOMEM;
641 goto fail;
1da177e4 642 }
1d325d21
FW
643 frag = skb_shinfo(skb)->frag_list;
644 skb_frag_list_init(skb);
1da177e4 645
1da177e4 646 __skb_pull(skb, hlen);
67ba4152 647 fh = (struct frag_hdr *)__skb_push(skb, sizeof(struct frag_hdr));
e2d1bca7
ACM
648 __skb_push(skb, hlen);
649 skb_reset_network_header(skb);
d56f90a7 650 memcpy(skb_network_header(skb), tmp_hdr, hlen);
1da177e4 651
1da177e4
LT
652 fh->nexthdr = nexthdr;
653 fh->reserved = 0;
654 fh->frag_off = htons(IP6_MF);
286c2349 655 fh->identification = frag_id;
1da177e4
LT
656
657 first_len = skb_pagelen(skb);
658 skb->data_len = first_len - skb_headlen(skb);
659 skb->len = first_len;
0660e03f
ACM
660 ipv6_hdr(skb)->payload_len = htons(first_len -
661 sizeof(struct ipv6hdr));
a11d206d 662
d8d1f30b 663 dst_hold(&rt->dst);
1da177e4
LT
664
665 for (;;) {
666 /* Prepare header of the next frame,
667 * before previous one went down. */
668 if (frag) {
669 frag->ip_summed = CHECKSUM_NONE;
badff6d0 670 skb_reset_transport_header(frag);
67ba4152 671 fh = (struct frag_hdr *)__skb_push(frag, sizeof(struct frag_hdr));
e2d1bca7
ACM
672 __skb_push(frag, hlen);
673 skb_reset_network_header(frag);
d56f90a7
ACM
674 memcpy(skb_network_header(frag), tmp_hdr,
675 hlen);
1da177e4
LT
676 offset += skb->len - hlen - sizeof(struct frag_hdr);
677 fh->nexthdr = nexthdr;
678 fh->reserved = 0;
679 fh->frag_off = htons(offset);
53b24b8f 680 if (frag->next)
1da177e4
LT
681 fh->frag_off |= htons(IP6_MF);
682 fh->identification = frag_id;
0660e03f
ACM
683 ipv6_hdr(frag)->payload_len =
684 htons(frag->len -
685 sizeof(struct ipv6hdr));
1da177e4
LT
686 ip6_copy_metadata(frag, skb);
687 }
1ab1457c 688
7d8c6e39 689 err = output(net, sk, skb);
67ba4152 690 if (!err)
d8d1f30b 691 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 692 IPSTATS_MIB_FRAGCREATES);
dafee490 693
1da177e4
LT
694 if (err || !frag)
695 break;
696
697 skb = frag;
698 frag = skb->next;
699 skb->next = NULL;
700 }
701
a51482bd 702 kfree(tmp_hdr);
1da177e4
LT
703
704 if (err == 0) {
d8d1f30b 705 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 706 IPSTATS_MIB_FRAGOKS);
94e187c0 707 ip6_rt_put(rt);
1da177e4
LT
708 return 0;
709 }
710
46cfd725 711 kfree_skb_list(frag);
1da177e4 712
d8d1f30b 713 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 714 IPSTATS_MIB_FRAGFAILS);
94e187c0 715 ip6_rt_put(rt);
1da177e4 716 return err;
3d13008e
ED
717
718slow_path_clean:
719 skb_walk_frags(skb, frag2) {
720 if (frag2 == frag)
721 break;
722 frag2->sk = NULL;
723 frag2->destructor = NULL;
724 skb->truesize += frag2->truesize;
725 }
1da177e4
LT
726 }
727
728slow_path:
72e843bb
ED
729 if ((skb->ip_summed == CHECKSUM_PARTIAL) &&
730 skb_checksum_help(skb))
731 goto fail;
732
1da177e4
LT
733 left = skb->len - hlen; /* Space per frame */
734 ptr = hlen; /* Where to start from */
735
736 /*
737 * Fragment the datagram.
738 */
739
740 *prevhdr = NEXTHDR_FRAGMENT;
a7ae1992 741 troom = rt->dst.dev->needed_tailroom;
1da177e4
LT
742
743 /*
744 * Keep copying data until we run out.
745 */
67ba4152 746 while (left > 0) {
1da177e4
LT
747 len = left;
748 /* IF: it doesn't fit, use 'mtu' - the data space left */
749 if (len > mtu)
750 len = mtu;
25985edc 751 /* IF: we are not sending up to and including the packet end
1da177e4
LT
752 then align the next start on an eight byte boundary */
753 if (len < left) {
754 len &= ~7;
755 }
1da177e4 756
cbffccc9
JP
757 /* Allocate buffer */
758 frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
759 hroom + troom, GFP_ATOMIC);
760 if (!frag) {
adf30907 761 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 762 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
763 err = -ENOMEM;
764 goto fail;
765 }
766
767 /*
768 * Set up data on packet
769 */
770
771 ip6_copy_metadata(frag, skb);
a7ae1992 772 skb_reserve(frag, hroom);
1da177e4 773 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
c1d2bbe1 774 skb_reset_network_header(frag);
badff6d0 775 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
b0e380b1
ACM
776 frag->transport_header = (frag->network_header + hlen +
777 sizeof(struct frag_hdr));
1da177e4
LT
778
779 /*
780 * Charge the memory for the fragment to any owner
781 * it might possess
782 */
783 if (skb->sk)
784 skb_set_owner_w(frag, skb->sk);
785
786 /*
787 * Copy the packet header into the new buffer.
788 */
d626f62b 789 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
1da177e4
LT
790
791 /*
792 * Build fragment header.
793 */
794 fh->nexthdr = nexthdr;
795 fh->reserved = 0;
286c2349 796 fh->identification = frag_id;
1da177e4
LT
797
798 /*
799 * Copy a block of the IP datagram.
800 */
e3f0b86b
HS
801 BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag),
802 len));
1da177e4
LT
803 left -= len;
804
805 fh->frag_off = htons(offset);
806 if (left > 0)
807 fh->frag_off |= htons(IP6_MF);
0660e03f
ACM
808 ipv6_hdr(frag)->payload_len = htons(frag->len -
809 sizeof(struct ipv6hdr));
1da177e4
LT
810
811 ptr += len;
812 offset += len;
813
814 /*
815 * Put this fragment into the sending queue.
816 */
7d8c6e39 817 err = output(net, sk, frag);
1da177e4
LT
818 if (err)
819 goto fail;
dafee490 820
adf30907 821 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 822 IPSTATS_MIB_FRAGCREATES);
1da177e4 823 }
adf30907 824 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 825 IPSTATS_MIB_FRAGOKS);
808db80a 826 consume_skb(skb);
1da177e4
LT
827 return err;
828
485fca66
FW
829fail_toobig:
830 if (skb->sk && dst_allfrag(skb_dst(skb)))
831 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
832
833 skb->dev = skb_dst(skb)->dev;
834 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
835 err = -EMSGSIZE;
836
1da177e4 837fail:
adf30907 838 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 839 IPSTATS_MIB_FRAGFAILS);
1ab1457c 840 kfree_skb(skb);
1da177e4
LT
841 return err;
842}
843
b71d1d42
ED
844static inline int ip6_rt_check(const struct rt6key *rt_key,
845 const struct in6_addr *fl_addr,
846 const struct in6_addr *addr_cache)
cf6b1982 847{
a02cec21 848 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
63159f29 849 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
cf6b1982
YH
850}
851
497c615a
HX
852static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
853 struct dst_entry *dst,
b71d1d42 854 const struct flowi6 *fl6)
1da177e4 855{
497c615a 856 struct ipv6_pinfo *np = inet6_sk(sk);
a963a37d 857 struct rt6_info *rt;
1da177e4 858
497c615a
HX
859 if (!dst)
860 goto out;
861
a963a37d
ED
862 if (dst->ops->family != AF_INET6) {
863 dst_release(dst);
864 return NULL;
865 }
866
867 rt = (struct rt6_info *)dst;
497c615a
HX
868 /* Yes, checking route validity in not connected
869 * case is not very simple. Take into account,
870 * that we do not support routing by source, TOS,
67ba4152 871 * and MSG_DONTROUTE --ANK (980726)
497c615a 872 *
cf6b1982
YH
873 * 1. ip6_rt_check(): If route was host route,
874 * check that cached destination is current.
497c615a
HX
875 * If it is network route, we still may
876 * check its validity using saved pointer
877 * to the last used address: daddr_cache.
878 * We do not want to save whole address now,
879 * (because main consumer of this service
880 * is tcp, which has not this problem),
881 * so that the last trick works only on connected
882 * sockets.
883 * 2. oif also should be the same.
884 */
4c9483b2 885 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
8e1ef0a9 886#ifdef CONFIG_IPV6_SUBTREES
4c9483b2 887 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
8e1ef0a9 888#endif
4c9483b2 889 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
497c615a
HX
890 dst_release(dst);
891 dst = NULL;
1da177e4
LT
892 }
893
497c615a
HX
894out:
895 return dst;
896}
897
3aef934f 898static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
4c9483b2 899 struct dst_entry **dst, struct flowi6 *fl6)
497c615a 900{
69cce1d1
DM
901#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
902 struct neighbour *n;
97cac082 903 struct rt6_info *rt;
69cce1d1
DM
904#endif
905 int err;
497c615a 906
e16e888b
MS
907 /* The correct way to handle this would be to do
908 * ip6_route_get_saddr, and then ip6_route_output; however,
909 * the route-specific preferred source forces the
910 * ip6_route_output call _before_ ip6_route_get_saddr.
911 *
912 * In source specific routing (no src=any default route),
913 * ip6_route_output will fail given src=any saddr, though, so
914 * that's why we try it again later.
915 */
916 if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
917 struct rt6_info *rt;
918 bool had_dst = *dst != NULL;
1da177e4 919
e16e888b
MS
920 if (!had_dst)
921 *dst = ip6_route_output(net, sk, fl6);
922 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
c3968a85
DW
923 err = ip6_route_get_saddr(net, rt, &fl6->daddr,
924 sk ? inet6_sk(sk)->srcprefs : 0,
925 &fl6->saddr);
44456d37 926 if (err)
1da177e4 927 goto out_err_release;
e16e888b
MS
928
929 /* If we had an erroneous initial result, pretend it
930 * never existed and let the SA-enabled version take
931 * over.
932 */
933 if (!had_dst && (*dst)->error) {
934 dst_release(*dst);
935 *dst = NULL;
936 }
1da177e4
LT
937 }
938
e16e888b
MS
939 if (!*dst)
940 *dst = ip6_route_output(net, sk, fl6);
941
942 err = (*dst)->error;
943 if (err)
944 goto out_err_release;
945
95c385b4 946#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
e550dfb0
NH
947 /*
948 * Here if the dst entry we've looked up
949 * has a neighbour entry that is in the INCOMPLETE
950 * state and the src address from the flow is
951 * marked as OPTIMISTIC, we release the found
952 * dst entry and replace it instead with the
953 * dst entry of the nexthop router
954 */
c56bf6fe 955 rt = (struct rt6_info *) *dst;
707be1ff 956 rcu_read_lock_bh();
2647a9b0
MKL
957 n = __ipv6_neigh_lookup_noref(rt->dst.dev,
958 rt6_nexthop(rt, &fl6->daddr));
707be1ff
YH
959 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
960 rcu_read_unlock_bh();
961
962 if (err) {
e550dfb0 963 struct inet6_ifaddr *ifp;
4c9483b2 964 struct flowi6 fl_gw6;
e550dfb0
NH
965 int redirect;
966
4c9483b2 967 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
e550dfb0
NH
968 (*dst)->dev, 1);
969
970 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
971 if (ifp)
972 in6_ifa_put(ifp);
973
974 if (redirect) {
975 /*
976 * We need to get the dst entry for the
977 * default router instead
978 */
979 dst_release(*dst);
4c9483b2
DM
980 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
981 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
982 *dst = ip6_route_output(net, sk, &fl_gw6);
e5d08d71
IM
983 err = (*dst)->error;
984 if (err)
e550dfb0 985 goto out_err_release;
95c385b4 986 }
e550dfb0 987 }
95c385b4
NH
988#endif
989
1da177e4
LT
990 return 0;
991
992out_err_release:
ca46f9c8 993 if (err == -ENETUNREACH)
5ac68e7c 994 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
995 dst_release(*dst);
996 *dst = NULL;
997 return err;
998}
34a0b3cd 999
497c615a
HX
1000/**
1001 * ip6_dst_lookup - perform route lookup on flow
1002 * @sk: socket which provides route info
1003 * @dst: pointer to dst_entry * for result
4c9483b2 1004 * @fl6: flow to lookup
497c615a
HX
1005 *
1006 * This function performs a route lookup on the given flow.
1007 *
1008 * It returns zero on success, or a standard errno code on error.
1009 */
343d60aa
RP
1010int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1011 struct flowi6 *fl6)
497c615a
HX
1012{
1013 *dst = NULL;
343d60aa 1014 return ip6_dst_lookup_tail(net, sk, dst, fl6);
497c615a 1015}
3cf3dc6c
ACM
1016EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1017
497c615a 1018/**
68d0c6d3
DM
1019 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1020 * @sk: socket which provides route info
4c9483b2 1021 * @fl6: flow to lookup
68d0c6d3 1022 * @final_dst: final destination address for ipsec lookup
68d0c6d3
DM
1023 *
1024 * This function performs a route lookup on the given flow.
1025 *
1026 * It returns a valid dst pointer on success, or a pointer encoded
1027 * error code.
1028 */
3aef934f 1029struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
0e0d44ab 1030 const struct in6_addr *final_dst)
68d0c6d3
DM
1031{
1032 struct dst_entry *dst = NULL;
1033 int err;
1034
343d60aa 1035 err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
68d0c6d3
DM
1036 if (err)
1037 return ERR_PTR(err);
1038 if (final_dst)
4e3fd7a0 1039 fl6->daddr = *final_dst;
a0a9f33b
PS
1040 if (!fl6->flowi6_oif)
1041 fl6->flowi6_oif = dst->dev->ifindex;
2774c131 1042
f92ee619 1043 return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
68d0c6d3
DM
1044}
1045EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1046
1047/**
1048 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
497c615a 1049 * @sk: socket which provides the dst cache and route info
4c9483b2 1050 * @fl6: flow to lookup
68d0c6d3 1051 * @final_dst: final destination address for ipsec lookup
497c615a
HX
1052 *
1053 * This function performs a route lookup on the given flow with the
1054 * possibility of using the cached route in the socket if it is valid.
1055 * It will take the socket dst lock when operating on the dst cache.
1056 * As a result, this function can only be used in process context.
1057 *
68d0c6d3
DM
1058 * It returns a valid dst pointer on success, or a pointer encoded
1059 * error code.
497c615a 1060 */
4c9483b2 1061struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
0e0d44ab 1062 const struct in6_addr *final_dst)
497c615a 1063{
68d0c6d3
DM
1064 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1065 int err;
497c615a 1066
4c9483b2 1067 dst = ip6_sk_dst_check(sk, dst, fl6);
68d0c6d3 1068
343d60aa 1069 err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
68d0c6d3
DM
1070 if (err)
1071 return ERR_PTR(err);
1072 if (final_dst)
4e3fd7a0 1073 fl6->daddr = *final_dst;
2774c131 1074
f92ee619 1075 return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
497c615a 1076}
68d0c6d3 1077EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
497c615a 1078
34a0b3cd 1079static inline int ip6_ufo_append_data(struct sock *sk,
0bbe84a6 1080 struct sk_buff_head *queue,
e89e9cf5
AR
1081 int getfrag(void *from, char *to, int offset, int len,
1082 int odd, struct sk_buff *skb),
1083 void *from, int length, int hh_len, int fragheaderlen,
67ba4152 1084 int transhdrlen, int mtu, unsigned int flags,
fd0273d7 1085 const struct flowi6 *fl6)
e89e9cf5
AR
1086
1087{
1088 struct sk_buff *skb;
1089 int err;
1090
1091 /* There is support for UDP large send offload by network
1092 * device, so create one single skb packet containing complete
1093 * udp datagram
1094 */
0bbe84a6 1095 skb = skb_peek_tail(queue);
63159f29 1096 if (!skb) {
e89e9cf5
AR
1097 skb = sock_alloc_send_skb(sk,
1098 hh_len + fragheaderlen + transhdrlen + 20,
1099 (flags & MSG_DONTWAIT), &err);
63159f29 1100 if (!skb)
504744e4 1101 return err;
e89e9cf5
AR
1102
1103 /* reserve space for Hardware header */
1104 skb_reserve(skb, hh_len);
1105
1106 /* create space for UDP/IP header */
67ba4152 1107 skb_put(skb, fragheaderlen + transhdrlen);
e89e9cf5
AR
1108
1109 /* initialize network header pointer */
c1d2bbe1 1110 skb_reset_network_header(skb);
e89e9cf5
AR
1111
1112 /* initialize protocol header pointer */
b0e380b1 1113 skb->transport_header = skb->network_header + fragheaderlen;
e89e9cf5 1114
9c9c9ad5 1115 skb->protocol = htons(ETH_P_IPV6);
e89e9cf5 1116 skb->csum = 0;
e89e9cf5 1117
0bbe84a6 1118 __skb_queue_tail(queue, skb);
c547dbf5
JP
1119 } else if (skb_is_gso(skb)) {
1120 goto append;
e89e9cf5 1121 }
e89e9cf5 1122
c547dbf5
JP
1123 skb->ip_summed = CHECKSUM_PARTIAL;
1124 /* Specify the length of each IPv6 datagram fragment.
1125 * It has to be a multiple of 8.
1126 */
1127 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1128 sizeof(struct frag_hdr)) & ~7;
1129 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
fd0273d7
MKL
1130 skb_shinfo(skb)->ip6_frag_id = ipv6_select_ident(sock_net(sk),
1131 &fl6->daddr,
1132 &fl6->saddr);
c547dbf5
JP
1133
1134append:
2811ebac
HFS
1135 return skb_append_datato_frags(sk, skb, getfrag, from,
1136 (length - transhdrlen));
e89e9cf5 1137}
1da177e4 1138
0178b695
HX
1139static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1140 gfp_t gfp)
1141{
1142 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1143}
1144
1145static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1146 gfp_t gfp)
1147{
1148 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1149}
1150
75a493e6 1151static void ip6_append_data_mtu(unsigned int *mtu,
0c183379
G
1152 int *maxfraglen,
1153 unsigned int fragheaderlen,
1154 struct sk_buff *skb,
75a493e6 1155 struct rt6_info *rt,
e367c2d0 1156 unsigned int orig_mtu)
0c183379
G
1157{
1158 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
63159f29 1159 if (!skb) {
0c183379 1160 /* first fragment, reserve header_len */
e367c2d0 1161 *mtu = orig_mtu - rt->dst.header_len;
0c183379
G
1162
1163 } else {
1164 /*
1165 * this fragment is not first, the headers
1166 * space is regarded as data space.
1167 */
e367c2d0 1168 *mtu = orig_mtu;
0c183379
G
1169 }
1170 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1171 + fragheaderlen - sizeof(struct frag_hdr);
1172 }
1173}
1174
366e41d9
VY
1175static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1176 struct inet6_cork *v6_cork,
1177 int hlimit, int tclass, struct ipv6_txoptions *opt,
1178 struct rt6_info *rt, struct flowi6 *fl6)
1179{
1180 struct ipv6_pinfo *np = inet6_sk(sk);
1181 unsigned int mtu;
1182
1183 /*
1184 * setup for corking
1185 */
1186 if (opt) {
1187 if (WARN_ON(v6_cork->opt))
1188 return -EINVAL;
1189
1190 v6_cork->opt = kzalloc(opt->tot_len, sk->sk_allocation);
63159f29 1191 if (unlikely(!v6_cork->opt))
366e41d9
VY
1192 return -ENOBUFS;
1193
1194 v6_cork->opt->tot_len = opt->tot_len;
1195 v6_cork->opt->opt_flen = opt->opt_flen;
1196 v6_cork->opt->opt_nflen = opt->opt_nflen;
1197
1198 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1199 sk->sk_allocation);
1200 if (opt->dst0opt && !v6_cork->opt->dst0opt)
1201 return -ENOBUFS;
1202
1203 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1204 sk->sk_allocation);
1205 if (opt->dst1opt && !v6_cork->opt->dst1opt)
1206 return -ENOBUFS;
1207
1208 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1209 sk->sk_allocation);
1210 if (opt->hopopt && !v6_cork->opt->hopopt)
1211 return -ENOBUFS;
1212
1213 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1214 sk->sk_allocation);
1215 if (opt->srcrt && !v6_cork->opt->srcrt)
1216 return -ENOBUFS;
1217
1218 /* need source address above miyazawa*/
1219 }
1220 dst_hold(&rt->dst);
1221 cork->base.dst = &rt->dst;
1222 cork->fl.u.ip6 = *fl6;
1223 v6_cork->hop_limit = hlimit;
1224 v6_cork->tclass = tclass;
1225 if (rt->dst.flags & DST_XFRM_TUNNEL)
1226 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1227 rt->dst.dev->mtu : dst_mtu(&rt->dst);
1228 else
1229 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1230 rt->dst.dev->mtu : dst_mtu(rt->dst.path);
1231 if (np->frag_size < mtu) {
1232 if (np->frag_size)
1233 mtu = np->frag_size;
1234 }
1235 cork->base.fragsize = mtu;
1236 if (dst_allfrag(rt->dst.path))
1237 cork->base.flags |= IPCORK_ALLFRAG;
1238 cork->base.length = 0;
1239
1240 return 0;
1241}
1242
0bbe84a6
VY
1243static int __ip6_append_data(struct sock *sk,
1244 struct flowi6 *fl6,
1245 struct sk_buff_head *queue,
1246 struct inet_cork *cork,
1247 struct inet6_cork *v6_cork,
1248 struct page_frag *pfrag,
1249 int getfrag(void *from, char *to, int offset,
1250 int len, int odd, struct sk_buff *skb),
1251 void *from, int length, int transhdrlen,
1252 unsigned int flags, int dontfrag)
1da177e4 1253{
0c183379 1254 struct sk_buff *skb, *skb_prev = NULL;
e367c2d0 1255 unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu;
0bbe84a6
VY
1256 int exthdrlen = 0;
1257 int dst_exthdrlen = 0;
1da177e4 1258 int hh_len;
1da177e4
LT
1259 int copy;
1260 int err;
1261 int offset = 0;
a693e698 1262 __u8 tx_flags = 0;
09c2d251 1263 u32 tskey = 0;
0bbe84a6
VY
1264 struct rt6_info *rt = (struct rt6_info *)cork->dst;
1265 struct ipv6_txoptions *opt = v6_cork->opt;
32dce968 1266 int csummode = CHECKSUM_NONE;
1da177e4 1267
0bbe84a6
VY
1268 skb = skb_peek_tail(queue);
1269 if (!skb) {
1270 exthdrlen = opt ? opt->opt_flen : 0;
7efdba5b 1271 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1da177e4 1272 }
0bbe84a6 1273
366e41d9 1274 mtu = cork->fragsize;
e367c2d0 1275 orig_mtu = mtu;
1da177e4 1276
d8d1f30b 1277 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1da177e4 1278
a1b05140 1279 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
b4ce9277 1280 (opt ? opt->opt_nflen : 0);
4df98e76
HFS
1281 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1282 sizeof(struct frag_hdr);
1da177e4
LT
1283
1284 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
4df98e76
HFS
1285 unsigned int maxnonfragsize, headersize;
1286
1287 headersize = sizeof(struct ipv6hdr) +
3a1cebe7 1288 (opt ? opt->opt_flen + opt->opt_nflen : 0) +
4df98e76
HFS
1289 (dst_allfrag(&rt->dst) ?
1290 sizeof(struct frag_hdr) : 0) +
1291 rt->rt6i_nfheader_len;
1292
60ff7467 1293 if (ip6_sk_ignore_df(sk))
0b95227a
HFS
1294 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1295 else
1296 maxnonfragsize = mtu;
4df98e76
HFS
1297
1298 /* dontfrag active */
1299 if ((cork->length + length > mtu - headersize) && dontfrag &&
1300 (sk->sk_protocol == IPPROTO_UDP ||
1301 sk->sk_protocol == IPPROTO_RAW)) {
1302 ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1303 sizeof(struct ipv6hdr));
1304 goto emsgsize;
1305 }
1306
1307 if (cork->length + length > maxnonfragsize - headersize) {
1308emsgsize:
1309 ipv6_local_error(sk, EMSGSIZE, fl6,
1310 mtu - headersize +
1311 sizeof(struct ipv6hdr));
1da177e4
LT
1312 return -EMSGSIZE;
1313 }
1314 }
1315
09c2d251 1316 if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) {
bf84a010 1317 sock_tx_timestamp(sk, &tx_flags);
09c2d251
WB
1318 if (tx_flags & SKBTX_ANY_SW_TSTAMP &&
1319 sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1320 tskey = sk->sk_tskey++;
1321 }
a693e698 1322
32dce968
VY
1323 /* If this is the first and only packet and device
1324 * supports checksum offloading, let's use it.
e87a468e
VY
1325 * Use transhdrlen, same as IPv4, because partial
1326 * sums only work when transhdrlen is set.
32dce968 1327 */
e87a468e 1328 if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
32dce968
VY
1329 length + fragheaderlen < mtu &&
1330 rt->dst.dev->features & NETIF_F_V6_CSUM &&
1331 !exthdrlen)
1332 csummode = CHECKSUM_PARTIAL;
1da177e4
LT
1333 /*
1334 * Let's try using as much space as possible.
1335 * Use MTU if total length of the message fits into the MTU.
1336 * Otherwise, we need to reserve fragment header and
1337 * fragment alignment (= 8-15 octects, in total).
1338 *
1339 * Note that we may need to "move" the data from the tail of
1ab1457c 1340 * of the buffer to the new fragment when we split
1da177e4
LT
1341 * the message.
1342 *
1ab1457c 1343 * FIXME: It may be fragmented into multiple chunks
1da177e4
LT
1344 * at once if non-fragmentable extension headers
1345 * are too large.
1ab1457c 1346 * --yoshfuji
1da177e4
LT
1347 */
1348
2811ebac
HFS
1349 cork->length += length;
1350 if (((length > mtu) ||
1351 (skb && skb_is_gso(skb))) &&
1352 (sk->sk_protocol == IPPROTO_UDP) &&
acf8dd0a
MK
1353 (rt->dst.dev->features & NETIF_F_UFO) &&
1354 (sk->sk_type == SOCK_DGRAM)) {
0bbe84a6 1355 err = ip6_ufo_append_data(sk, queue, getfrag, from, length,
2811ebac 1356 hh_len, fragheaderlen,
fd0273d7 1357 transhdrlen, mtu, flags, fl6);
2811ebac
HFS
1358 if (err)
1359 goto error;
1360 return 0;
e89e9cf5 1361 }
1da177e4 1362
2811ebac 1363 if (!skb)
1da177e4
LT
1364 goto alloc_new_skb;
1365
1366 while (length > 0) {
1367 /* Check if the remaining data fits into current packet. */
bdc712b4 1368 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1da177e4
LT
1369 if (copy < length)
1370 copy = maxfraglen - skb->len;
1371
1372 if (copy <= 0) {
1373 char *data;
1374 unsigned int datalen;
1375 unsigned int fraglen;
1376 unsigned int fraggap;
1377 unsigned int alloclen;
1da177e4 1378alloc_new_skb:
1da177e4 1379 /* There's no room in the current skb */
0c183379
G
1380 if (skb)
1381 fraggap = skb->len - maxfraglen;
1da177e4
LT
1382 else
1383 fraggap = 0;
0c183379 1384 /* update mtu and maxfraglen if necessary */
63159f29 1385 if (!skb || !skb_prev)
0c183379 1386 ip6_append_data_mtu(&mtu, &maxfraglen,
75a493e6 1387 fragheaderlen, skb, rt,
e367c2d0 1388 orig_mtu);
0c183379
G
1389
1390 skb_prev = skb;
1da177e4
LT
1391
1392 /*
1393 * If remaining data exceeds the mtu,
1394 * we know we need more fragment(s).
1395 */
1396 datalen = length + fraggap;
1da177e4 1397
0c183379
G
1398 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1399 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1da177e4 1400 if ((flags & MSG_MORE) &&
d8d1f30b 1401 !(rt->dst.dev->features&NETIF_F_SG))
1da177e4
LT
1402 alloclen = mtu;
1403 else
1404 alloclen = datalen + fragheaderlen;
1405
299b0767
SK
1406 alloclen += dst_exthdrlen;
1407
0c183379
G
1408 if (datalen != length + fraggap) {
1409 /*
1410 * this is not the last fragment, the trailer
1411 * space is regarded as data space.
1412 */
1413 datalen += rt->dst.trailer_len;
1414 }
1415
1416 alloclen += rt->dst.trailer_len;
1417 fraglen = datalen + fragheaderlen;
1da177e4
LT
1418
1419 /*
1420 * We just reserve space for fragment header.
1ab1457c 1421 * Note: this may be overallocation if the message
1da177e4
LT
1422 * (without MSG_MORE) fits into the MTU.
1423 */
1424 alloclen += sizeof(struct frag_hdr);
1425
1426 if (transhdrlen) {
1427 skb = sock_alloc_send_skb(sk,
1428 alloclen + hh_len,
1429 (flags & MSG_DONTWAIT), &err);
1430 } else {
1431 skb = NULL;
1432 if (atomic_read(&sk->sk_wmem_alloc) <=
1433 2 * sk->sk_sndbuf)
1434 skb = sock_wmalloc(sk,
1435 alloclen + hh_len, 1,
1436 sk->sk_allocation);
63159f29 1437 if (unlikely(!skb))
1da177e4
LT
1438 err = -ENOBUFS;
1439 }
63159f29 1440 if (!skb)
1da177e4
LT
1441 goto error;
1442 /*
1443 * Fill in the control structures
1444 */
9c9c9ad5 1445 skb->protocol = htons(ETH_P_IPV6);
32dce968 1446 skb->ip_summed = csummode;
1da177e4 1447 skb->csum = 0;
1f85851e
G
1448 /* reserve for fragmentation and ipsec header */
1449 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1450 dst_exthdrlen);
1da177e4 1451
11878b40
WB
1452 /* Only the initial fragment is time stamped */
1453 skb_shinfo(skb)->tx_flags = tx_flags;
1454 tx_flags = 0;
09c2d251
WB
1455 skb_shinfo(skb)->tskey = tskey;
1456 tskey = 0;
a693e698 1457
1da177e4
LT
1458 /*
1459 * Find where to start putting bytes
1460 */
1f85851e
G
1461 data = skb_put(skb, fraglen);
1462 skb_set_network_header(skb, exthdrlen);
1463 data += fragheaderlen;
b0e380b1
ACM
1464 skb->transport_header = (skb->network_header +
1465 fragheaderlen);
1da177e4
LT
1466 if (fraggap) {
1467 skb->csum = skb_copy_and_csum_bits(
1468 skb_prev, maxfraglen,
1469 data + transhdrlen, fraggap, 0);
1470 skb_prev->csum = csum_sub(skb_prev->csum,
1471 skb->csum);
1472 data += fraggap;
e9fa4f7b 1473 pskb_trim_unique(skb_prev, maxfraglen);
1da177e4
LT
1474 }
1475 copy = datalen - transhdrlen - fraggap;
299b0767 1476
1da177e4
LT
1477 if (copy < 0) {
1478 err = -EINVAL;
1479 kfree_skb(skb);
1480 goto error;
1481 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1482 err = -EFAULT;
1483 kfree_skb(skb);
1484 goto error;
1485 }
1486
1487 offset += copy;
1488 length -= datalen - fraggap;
1489 transhdrlen = 0;
1490 exthdrlen = 0;
299b0767 1491 dst_exthdrlen = 0;
1da177e4
LT
1492
1493 /*
1494 * Put the packet on the pending queue
1495 */
0bbe84a6 1496 __skb_queue_tail(queue, skb);
1da177e4
LT
1497 continue;
1498 }
1499
1500 if (copy > length)
1501 copy = length;
1502
d8d1f30b 1503 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1da177e4
LT
1504 unsigned int off;
1505
1506 off = skb->len;
1507 if (getfrag(from, skb_put(skb, copy),
1508 offset, copy, off, skb) < 0) {
1509 __skb_trim(skb, off);
1510 err = -EFAULT;
1511 goto error;
1512 }
1513 } else {
1514 int i = skb_shinfo(skb)->nr_frags;
1da177e4 1515
5640f768
ED
1516 err = -ENOMEM;
1517 if (!sk_page_frag_refill(sk, pfrag))
1da177e4 1518 goto error;
5640f768
ED
1519
1520 if (!skb_can_coalesce(skb, i, pfrag->page,
1521 pfrag->offset)) {
1522 err = -EMSGSIZE;
1523 if (i == MAX_SKB_FRAGS)
1524 goto error;
1525
1526 __skb_fill_page_desc(skb, i, pfrag->page,
1527 pfrag->offset, 0);
1528 skb_shinfo(skb)->nr_frags = ++i;
1529 get_page(pfrag->page);
1da177e4 1530 }
5640f768 1531 copy = min_t(int, copy, pfrag->size - pfrag->offset);
9e903e08 1532 if (getfrag(from,
5640f768
ED
1533 page_address(pfrag->page) + pfrag->offset,
1534 offset, copy, skb->len, skb) < 0)
1535 goto error_efault;
1536
1537 pfrag->offset += copy;
1538 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1da177e4
LT
1539 skb->len += copy;
1540 skb->data_len += copy;
f945fa7a
HX
1541 skb->truesize += copy;
1542 atomic_add(copy, &sk->sk_wmem_alloc);
1da177e4
LT
1543 }
1544 offset += copy;
1545 length -= copy;
1546 }
5640f768 1547
1da177e4 1548 return 0;
5640f768
ED
1549
1550error_efault:
1551 err = -EFAULT;
1da177e4 1552error:
bdc712b4 1553 cork->length -= length;
3bd653c8 1554 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1555 return err;
1556}
0bbe84a6
VY
1557
1558int ip6_append_data(struct sock *sk,
1559 int getfrag(void *from, char *to, int offset, int len,
1560 int odd, struct sk_buff *skb),
1561 void *from, int length, int transhdrlen, int hlimit,
1562 int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
1563 struct rt6_info *rt, unsigned int flags, int dontfrag)
1564{
1565 struct inet_sock *inet = inet_sk(sk);
1566 struct ipv6_pinfo *np = inet6_sk(sk);
1567 int exthdrlen;
1568 int err;
1569
1570 if (flags&MSG_PROBE)
1571 return 0;
1572 if (skb_queue_empty(&sk->sk_write_queue)) {
1573 /*
1574 * setup for corking
1575 */
1576 err = ip6_setup_cork(sk, &inet->cork, &np->cork, hlimit,
1577 tclass, opt, rt, fl6);
1578 if (err)
1579 return err;
1580
1581 exthdrlen = (opt ? opt->opt_flen : 0);
1582 length += exthdrlen;
1583 transhdrlen += exthdrlen;
1584 } else {
1585 fl6 = &inet->cork.fl.u.ip6;
1586 transhdrlen = 0;
1587 }
1588
1589 return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1590 &np->cork, sk_page_frag(sk), getfrag,
1591 from, length, transhdrlen, flags, dontfrag);
1592}
a495f836 1593EXPORT_SYMBOL_GPL(ip6_append_data);
1da177e4 1594
366e41d9
VY
1595static void ip6_cork_release(struct inet_cork_full *cork,
1596 struct inet6_cork *v6_cork)
bf138862 1597{
366e41d9
VY
1598 if (v6_cork->opt) {
1599 kfree(v6_cork->opt->dst0opt);
1600 kfree(v6_cork->opt->dst1opt);
1601 kfree(v6_cork->opt->hopopt);
1602 kfree(v6_cork->opt->srcrt);
1603 kfree(v6_cork->opt);
1604 v6_cork->opt = NULL;
0178b695
HX
1605 }
1606
366e41d9
VY
1607 if (cork->base.dst) {
1608 dst_release(cork->base.dst);
1609 cork->base.dst = NULL;
1610 cork->base.flags &= ~IPCORK_ALLFRAG;
bf138862 1611 }
366e41d9 1612 memset(&cork->fl, 0, sizeof(cork->fl));
bf138862
PE
1613}
1614
6422398c
VY
1615struct sk_buff *__ip6_make_skb(struct sock *sk,
1616 struct sk_buff_head *queue,
1617 struct inet_cork_full *cork,
1618 struct inet6_cork *v6_cork)
1da177e4
LT
1619{
1620 struct sk_buff *skb, *tmp_skb;
1621 struct sk_buff **tail_skb;
1622 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1da177e4 1623 struct ipv6_pinfo *np = inet6_sk(sk);
3bd653c8 1624 struct net *net = sock_net(sk);
1da177e4 1625 struct ipv6hdr *hdr;
6422398c
VY
1626 struct ipv6_txoptions *opt = v6_cork->opt;
1627 struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1628 struct flowi6 *fl6 = &cork->fl.u.ip6;
4c9483b2 1629 unsigned char proto = fl6->flowi6_proto;
1da177e4 1630
6422398c 1631 skb = __skb_dequeue(queue);
63159f29 1632 if (!skb)
1da177e4
LT
1633 goto out;
1634 tail_skb = &(skb_shinfo(skb)->frag_list);
1635
1636 /* move skb->data to ip header from ext header */
d56f90a7 1637 if (skb->data < skb_network_header(skb))
bbe735e4 1638 __skb_pull(skb, skb_network_offset(skb));
6422398c 1639 while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
cfe1fc77 1640 __skb_pull(tmp_skb, skb_network_header_len(skb));
1da177e4
LT
1641 *tail_skb = tmp_skb;
1642 tail_skb = &(tmp_skb->next);
1643 skb->len += tmp_skb->len;
1644 skb->data_len += tmp_skb->len;
1da177e4 1645 skb->truesize += tmp_skb->truesize;
1da177e4
LT
1646 tmp_skb->destructor = NULL;
1647 tmp_skb->sk = NULL;
1da177e4
LT
1648 }
1649
28a89453 1650 /* Allow local fragmentation. */
60ff7467 1651 skb->ignore_df = ip6_sk_ignore_df(sk);
28a89453 1652
4e3fd7a0 1653 *final_dst = fl6->daddr;
cfe1fc77 1654 __skb_pull(skb, skb_network_header_len(skb));
1da177e4
LT
1655 if (opt && opt->opt_flen)
1656 ipv6_push_frag_opts(skb, opt, &proto);
1657 if (opt && opt->opt_nflen)
1658 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1659
e2d1bca7
ACM
1660 skb_push(skb, sizeof(struct ipv6hdr));
1661 skb_reset_network_header(skb);
0660e03f 1662 hdr = ipv6_hdr(skb);
1ab1457c 1663
6422398c 1664 ip6_flow_hdr(hdr, v6_cork->tclass,
cb1ce2ef 1665 ip6_make_flowlabel(net, skb, fl6->flowlabel,
67800f9b 1666 np->autoflowlabel, fl6));
6422398c 1667 hdr->hop_limit = v6_cork->hop_limit;
1da177e4 1668 hdr->nexthdr = proto;
4e3fd7a0
AD
1669 hdr->saddr = fl6->saddr;
1670 hdr->daddr = *final_dst;
1da177e4 1671
a2c2064f 1672 skb->priority = sk->sk_priority;
4a19ec58 1673 skb->mark = sk->sk_mark;
a2c2064f 1674
d8d1f30b 1675 skb_dst_set(skb, dst_clone(&rt->dst));
edf391ff 1676 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
14878f75 1677 if (proto == IPPROTO_ICMPV6) {
adf30907 1678 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
14878f75 1679
43a43b60
HFS
1680 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1681 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
14878f75
DS
1682 }
1683
6422398c
VY
1684 ip6_cork_release(cork, v6_cork);
1685out:
1686 return skb;
1687}
1688
1689int ip6_send_skb(struct sk_buff *skb)
1690{
1691 struct net *net = sock_net(skb->sk);
1692 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1693 int err;
1694
ef76bc23 1695 err = ip6_local_out(skb);
1da177e4
LT
1696 if (err) {
1697 if (err > 0)
6ce9e7b5 1698 err = net_xmit_errno(err);
1da177e4 1699 if (err)
6422398c
VY
1700 IP6_INC_STATS(net, rt->rt6i_idev,
1701 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1702 }
1703
1da177e4 1704 return err;
6422398c
VY
1705}
1706
1707int ip6_push_pending_frames(struct sock *sk)
1708{
1709 struct sk_buff *skb;
1710
1711 skb = ip6_finish_skb(sk);
1712 if (!skb)
1713 return 0;
1714
1715 return ip6_send_skb(skb);
1da177e4 1716}
a495f836 1717EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1da177e4 1718
0bbe84a6 1719static void __ip6_flush_pending_frames(struct sock *sk,
6422398c
VY
1720 struct sk_buff_head *queue,
1721 struct inet_cork_full *cork,
1722 struct inet6_cork *v6_cork)
1da177e4 1723{
1da177e4
LT
1724 struct sk_buff *skb;
1725
0bbe84a6 1726 while ((skb = __skb_dequeue_tail(queue)) != NULL) {
adf30907
ED
1727 if (skb_dst(skb))
1728 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
e1f52208 1729 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1730 kfree_skb(skb);
1731 }
1732
6422398c 1733 ip6_cork_release(cork, v6_cork);
1da177e4 1734}
0bbe84a6
VY
1735
1736void ip6_flush_pending_frames(struct sock *sk)
1737{
6422398c
VY
1738 __ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1739 &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
0bbe84a6 1740}
a495f836 1741EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
6422398c
VY
1742
1743struct sk_buff *ip6_make_skb(struct sock *sk,
1744 int getfrag(void *from, char *to, int offset,
1745 int len, int odd, struct sk_buff *skb),
1746 void *from, int length, int transhdrlen,
1747 int hlimit, int tclass,
1748 struct ipv6_txoptions *opt, struct flowi6 *fl6,
1749 struct rt6_info *rt, unsigned int flags,
1750 int dontfrag)
1751{
1752 struct inet_cork_full cork;
1753 struct inet6_cork v6_cork;
1754 struct sk_buff_head queue;
1755 int exthdrlen = (opt ? opt->opt_flen : 0);
1756 int err;
1757
1758 if (flags & MSG_PROBE)
1759 return NULL;
1760
1761 __skb_queue_head_init(&queue);
1762
1763 cork.base.flags = 0;
1764 cork.base.addr = 0;
1765 cork.base.opt = NULL;
1766 v6_cork.opt = NULL;
1767 err = ip6_setup_cork(sk, &cork, &v6_cork, hlimit, tclass, opt, rt, fl6);
1768 if (err)
1769 return ERR_PTR(err);
1770
1771 if (dontfrag < 0)
1772 dontfrag = inet6_sk(sk)->dontfrag;
1773
1774 err = __ip6_append_data(sk, fl6, &queue, &cork.base, &v6_cork,
1775 &current->task_frag, getfrag, from,
1776 length + exthdrlen, transhdrlen + exthdrlen,
1777 flags, dontfrag);
1778 if (err) {
1779 __ip6_flush_pending_frames(sk, &queue, &cork, &v6_cork);
1780 return ERR_PTR(err);
1781 }
1782
1783 return __ip6_make_skb(sk, &queue, &cork, &v6_cork);
1784}