]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blame - net/ipv6/ip6_output.c
netns: don't allocate an id for dead netns
[mirror_ubuntu-zesty-kernel.git] / net / ipv6 / ip6_output.c
CommitLineData
1da177e4
LT
1/*
2 * IPv6 output functions
1ab1457c 3 * Linux INET6 implementation
1da177e4
LT
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * Based on linux/net/ipv4/ip_output.c
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 *
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
21 *
22 * H. von Brand : Added missing #include <linux/string.h>
67ba4152 23 * Imran Patel : frag id should be in NBO
1da177e4
LT
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
27 */
28
1da177e4 29#include <linux/errno.h>
ef76bc23 30#include <linux/kernel.h>
1da177e4
LT
31#include <linux/string.h>
32#include <linux/socket.h>
33#include <linux/net.h>
34#include <linux/netdevice.h>
35#include <linux/if_arp.h>
36#include <linux/in6.h>
37#include <linux/tcp.h>
38#include <linux/route.h>
b59f45d0 39#include <linux/module.h>
5a0e3ad6 40#include <linux/slab.h>
1da177e4
LT
41
42#include <linux/netfilter.h>
43#include <linux/netfilter_ipv6.h>
44
45#include <net/sock.h>
46#include <net/snmp.h>
47
48#include <net/ipv6.h>
49#include <net/ndisc.h>
50#include <net/protocol.h>
51#include <net/ip6_route.h>
52#include <net/addrconf.h>
53#include <net/rawv6.h>
54#include <net/icmp.h>
55#include <net/xfrm.h>
56#include <net/checksum.h>
7bc570c8 57#include <linux/mroute6.h>
1da177e4 58
9e508490 59static int ip6_finish_output2(struct sk_buff *skb)
1da177e4 60{
adf30907 61 struct dst_entry *dst = skb_dst(skb);
1da177e4 62 struct net_device *dev = dst->dev;
f6b72b62 63 struct neighbour *neigh;
6fd6ce20
YH
64 struct in6_addr *nexthop;
65 int ret;
1da177e4
LT
66
67 skb->protocol = htons(ETH_P_IPV6);
68 skb->dev = dev;
69
0660e03f 70 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
adf30907 71 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1da177e4 72
7ad6848c 73 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
d1db275d 74 ((mroute6_socket(dev_net(dev), skb) &&
bd91b8bf 75 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
7bc570c8
YH
76 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
77 &ipv6_hdr(skb)->saddr))) {
1da177e4
LT
78 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
79
80 /* Do not check for IFF_ALLMULTI; multicast routing
81 is not supported in any case.
82 */
83 if (newskb)
b2e0b385
JE
84 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
85 newskb, NULL, newskb->dev,
95603e22 86 dev_loopback_xmit);
1da177e4 87
0660e03f 88 if (ipv6_hdr(skb)->hop_limit == 0) {
3bd653c8
DL
89 IP6_INC_STATS(dev_net(dev), idev,
90 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
91 kfree_skb(skb);
92 return 0;
93 }
94 }
95
edf391ff
NH
96 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
97 skb->len);
dd408515
HFS
98
99 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
100 IPV6_ADDR_SCOPE_NODELOCAL &&
101 !(dev->flags & IFF_LOOPBACK)) {
102 kfree_skb(skb);
103 return 0;
104 }
1da177e4
LT
105 }
106
6fd6ce20 107 rcu_read_lock_bh();
550bab42 108 nexthop = rt6_nexthop((struct rt6_info *)dst);
6fd6ce20
YH
109 neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
110 if (unlikely(!neigh))
111 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
112 if (!IS_ERR(neigh)) {
113 ret = dst_neigh_output(dst, neigh, skb);
114 rcu_read_unlock_bh();
115 return ret;
116 }
117 rcu_read_unlock_bh();
05e3aa09 118
7f88c6b2
HFS
119 IP6_INC_STATS(dev_net(dst->dev),
120 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
9e508490
JE
121 kfree_skb(skb);
122 return -EINVAL;
1da177e4
LT
123}
124
9e508490
JE
125static int ip6_finish_output(struct sk_buff *skb)
126{
127 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
9037c357
JP
128 dst_allfrag(skb_dst(skb)) ||
129 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
9e508490
JE
130 return ip6_fragment(skb, ip6_finish_output2);
131 else
132 return ip6_finish_output2(skb);
133}
134
aad88724 135int ip6_output(struct sock *sk, struct sk_buff *skb)
1da177e4 136{
9e508490 137 struct net_device *dev = skb_dst(skb)->dev;
adf30907 138 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
778d80be 139 if (unlikely(idev->cnf.disable_ipv6)) {
9e508490 140 IP6_INC_STATS(dev_net(dev), idev,
3bd653c8 141 IPSTATS_MIB_OUTDISCARDS);
778d80be
YH
142 kfree_skb(skb);
143 return 0;
144 }
145
9c6eb28a
JE
146 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
147 ip6_finish_output,
148 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
1da177e4
LT
149}
150
1da177e4 151/*
b5d43998 152 * xmit an sk_buff (used by TCP, SCTP and DCCP)
1da177e4
LT
153 */
154
4c9483b2 155int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
b903d324 156 struct ipv6_txoptions *opt, int tclass)
1da177e4 157{
3bd653c8 158 struct net *net = sock_net(sk);
b30bd282 159 struct ipv6_pinfo *np = inet6_sk(sk);
4c9483b2 160 struct in6_addr *first_hop = &fl6->daddr;
adf30907 161 struct dst_entry *dst = skb_dst(skb);
1da177e4 162 struct ipv6hdr *hdr;
4c9483b2 163 u8 proto = fl6->flowi6_proto;
1da177e4 164 int seg_len = skb->len;
e651f03a 165 int hlimit = -1;
1da177e4
LT
166 u32 mtu;
167
168 if (opt) {
c2636b4d 169 unsigned int head_room;
1da177e4
LT
170
171 /* First: exthdrs may take lots of space (~8K for now)
172 MAX_HEADER is not enough.
173 */
174 head_room = opt->opt_nflen + opt->opt_flen;
175 seg_len += head_room;
176 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
177
178 if (skb_headroom(skb) < head_room) {
179 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
a11d206d 180 if (skb2 == NULL) {
adf30907 181 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d
YH
182 IPSTATS_MIB_OUTDISCARDS);
183 kfree_skb(skb);
1da177e4
LT
184 return -ENOBUFS;
185 }
808db80a 186 consume_skb(skb);
a11d206d 187 skb = skb2;
83d7eb29 188 skb_set_owner_w(skb, sk);
1da177e4
LT
189 }
190 if (opt->opt_flen)
191 ipv6_push_frag_opts(skb, opt, &proto);
192 if (opt->opt_nflen)
193 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
194 }
195
e2d1bca7
ACM
196 skb_push(skb, sizeof(struct ipv6hdr));
197 skb_reset_network_header(skb);
0660e03f 198 hdr = ipv6_hdr(skb);
1da177e4
LT
199
200 /*
201 * Fill in the IPv6 header
202 */
b903d324 203 if (np)
1da177e4
LT
204 hlimit = np->hop_limit;
205 if (hlimit < 0)
6b75d090 206 hlimit = ip6_dst_hoplimit(dst);
1da177e4 207
cb1ce2ef
TH
208 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
209 np->autoflowlabel));
41a1f8ea 210
1da177e4
LT
211 hdr->payload_len = htons(seg_len);
212 hdr->nexthdr = proto;
213 hdr->hop_limit = hlimit;
214
4e3fd7a0
AD
215 hdr->saddr = fl6->saddr;
216 hdr->daddr = *first_hop;
1da177e4 217
9c9c9ad5 218 skb->protocol = htons(ETH_P_IPV6);
a2c2064f 219 skb->priority = sk->sk_priority;
4a19ec58 220 skb->mark = sk->sk_mark;
a2c2064f 221
1da177e4 222 mtu = dst_mtu(dst);
60ff7467 223 if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
adf30907 224 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
edf391ff 225 IPSTATS_MIB_OUT, skb->len);
b2e0b385
JE
226 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
227 dst->dev, dst_output);
1da177e4
LT
228 }
229
1da177e4 230 skb->dev = dst->dev;
f4e53e29 231 ipv6_local_error(sk, EMSGSIZE, fl6, mtu);
adf30907 232 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
233 kfree_skb(skb);
234 return -EMSGSIZE;
235}
7159039a
YH
236EXPORT_SYMBOL(ip6_xmit);
237
1da177e4
LT
238static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
239{
240 struct ip6_ra_chain *ra;
241 struct sock *last = NULL;
242
243 read_lock(&ip6_ra_lock);
244 for (ra = ip6_ra_chain; ra; ra = ra->next) {
245 struct sock *sk = ra->sk;
0bd1b59b
AM
246 if (sk && ra->sel == sel &&
247 (!sk->sk_bound_dev_if ||
248 sk->sk_bound_dev_if == skb->dev->ifindex)) {
1da177e4
LT
249 if (last) {
250 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
251 if (skb2)
252 rawv6_rcv(last, skb2);
253 }
254 last = sk;
255 }
256 }
257
258 if (last) {
259 rawv6_rcv(last, skb);
260 read_unlock(&ip6_ra_lock);
261 return 1;
262 }
263 read_unlock(&ip6_ra_lock);
264 return 0;
265}
266
e21e0b5f
VN
267static int ip6_forward_proxy_check(struct sk_buff *skb)
268{
0660e03f 269 struct ipv6hdr *hdr = ipv6_hdr(skb);
e21e0b5f 270 u8 nexthdr = hdr->nexthdr;
75f2811c 271 __be16 frag_off;
e21e0b5f
VN
272 int offset;
273
274 if (ipv6_ext_hdr(nexthdr)) {
75f2811c 275 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
e21e0b5f
VN
276 if (offset < 0)
277 return 0;
278 } else
279 offset = sizeof(struct ipv6hdr);
280
281 if (nexthdr == IPPROTO_ICMPV6) {
282 struct icmp6hdr *icmp6;
283
d56f90a7
ACM
284 if (!pskb_may_pull(skb, (skb_network_header(skb) +
285 offset + 1 - skb->data)))
e21e0b5f
VN
286 return 0;
287
d56f90a7 288 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
e21e0b5f
VN
289
290 switch (icmp6->icmp6_type) {
291 case NDISC_ROUTER_SOLICITATION:
292 case NDISC_ROUTER_ADVERTISEMENT:
293 case NDISC_NEIGHBOUR_SOLICITATION:
294 case NDISC_NEIGHBOUR_ADVERTISEMENT:
295 case NDISC_REDIRECT:
296 /* For reaction involving unicast neighbor discovery
297 * message destined to the proxied address, pass it to
298 * input function.
299 */
300 return 1;
301 default:
302 break;
303 }
304 }
305
74553b09
VN
306 /*
307 * The proxying router can't forward traffic sent to a link-local
308 * address, so signal the sender and discard the packet. This
309 * behavior is clarified by the MIPv6 specification.
310 */
311 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
312 dst_link_failure(skb);
313 return -1;
314 }
315
e21e0b5f
VN
316 return 0;
317}
318
1da177e4
LT
319static inline int ip6_forward_finish(struct sk_buff *skb)
320{
c29390c6 321 skb_sender_cpu_clear(skb);
1da177e4
LT
322 return dst_output(skb);
323}
324
0954cf9c
HFS
325static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
326{
327 unsigned int mtu;
328 struct inet6_dev *idev;
329
330 if (dst_metric_locked(dst, RTAX_MTU)) {
331 mtu = dst_metric_raw(dst, RTAX_MTU);
332 if (mtu)
333 return mtu;
334 }
335
336 mtu = IPV6_MIN_MTU;
337 rcu_read_lock();
338 idev = __in6_dev_get(dst->dev);
339 if (idev)
340 mtu = idev->cnf.mtu6;
341 rcu_read_unlock();
342
343 return mtu;
344}
345
fe6cc55f
FW
346static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
347{
418a3156 348 if (skb->len <= mtu)
fe6cc55f
FW
349 return false;
350
60ff7467 351 /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
fe6cc55f
FW
352 if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
353 return true;
354
60ff7467 355 if (skb->ignore_df)
418a3156
FW
356 return false;
357
fe6cc55f
FW
358 if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
359 return false;
360
361 return true;
362}
363
1da177e4
LT
364int ip6_forward(struct sk_buff *skb)
365{
adf30907 366 struct dst_entry *dst = skb_dst(skb);
0660e03f 367 struct ipv6hdr *hdr = ipv6_hdr(skb);
1da177e4 368 struct inet6_skb_parm *opt = IP6CB(skb);
c346dca1 369 struct net *net = dev_net(dst->dev);
14f3ad6f 370 u32 mtu;
1ab1457c 371
53b7997f 372 if (net->ipv6.devconf_all->forwarding == 0)
1da177e4
LT
373 goto error;
374
090f1166
LR
375 if (skb->pkt_type != PACKET_HOST)
376 goto drop;
377
4497b076
BH
378 if (skb_warn_if_lro(skb))
379 goto drop;
380
1da177e4 381 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
15c77d8b
ED
382 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
383 IPSTATS_MIB_INDISCARDS);
1da177e4
LT
384 goto drop;
385 }
386
35fc92a9 387 skb_forward_csum(skb);
1da177e4
LT
388
389 /*
390 * We DO NOT make any processing on
391 * RA packets, pushing them to user level AS IS
392 * without ane WARRANTY that application will be able
393 * to interpret them. The reason is that we
394 * cannot make anything clever here.
395 *
396 * We are not end-node, so that if packet contains
397 * AH/ESP, we cannot make anything.
398 * Defragmentation also would be mistake, RA packets
399 * cannot be fragmented, because there is no warranty
400 * that different fragments will go along one path. --ANK
401 */
ab4eb353
YH
402 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
403 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
1da177e4
LT
404 return 0;
405 }
406
407 /*
408 * check and decrement ttl
409 */
410 if (hdr->hop_limit <= 1) {
411 /* Force OUTPUT device used as source address */
412 skb->dev = dst->dev;
3ffe533c 413 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
15c77d8b
ED
414 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
415 IPSTATS_MIB_INHDRERRORS);
1da177e4
LT
416
417 kfree_skb(skb);
418 return -ETIMEDOUT;
419 }
420
fbea49e1 421 /* XXX: idev->cnf.proxy_ndp? */
53b7997f 422 if (net->ipv6.devconf_all->proxy_ndp &&
8a3edd80 423 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
74553b09
VN
424 int proxied = ip6_forward_proxy_check(skb);
425 if (proxied > 0)
e21e0b5f 426 return ip6_input(skb);
74553b09 427 else if (proxied < 0) {
15c77d8b
ED
428 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
429 IPSTATS_MIB_INDISCARDS);
74553b09
VN
430 goto drop;
431 }
e21e0b5f
VN
432 }
433
1da177e4 434 if (!xfrm6_route_forward(skb)) {
15c77d8b
ED
435 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
436 IPSTATS_MIB_INDISCARDS);
1da177e4
LT
437 goto drop;
438 }
adf30907 439 dst = skb_dst(skb);
1da177e4
LT
440
441 /* IPv6 specs say nothing about it, but it is clear that we cannot
442 send redirects to source routed frames.
1e5dc146 443 We don't send redirects to frames decapsulated from IPsec.
1da177e4 444 */
c45a3dfb 445 if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
1da177e4 446 struct in6_addr *target = NULL;
fbfe95a4 447 struct inet_peer *peer;
1da177e4 448 struct rt6_info *rt;
1da177e4
LT
449
450 /*
451 * incoming and outgoing devices are the same
452 * send a redirect.
453 */
454
455 rt = (struct rt6_info *) dst;
c45a3dfb
DM
456 if (rt->rt6i_flags & RTF_GATEWAY)
457 target = &rt->rt6i_gateway;
1da177e4
LT
458 else
459 target = &hdr->daddr;
460
1d861aa4 461 peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
92d86829 462
1da177e4
LT
463 /* Limit redirects both by destination (here)
464 and by source (inside ndisc_send_redirect)
465 */
fbfe95a4 466 if (inet_peer_xrlim_allow(peer, 1*HZ))
4991969a 467 ndisc_send_redirect(skb, target);
1d861aa4
DM
468 if (peer)
469 inet_putpeer(peer);
5bb1ab09
DS
470 } else {
471 int addrtype = ipv6_addr_type(&hdr->saddr);
472
1da177e4 473 /* This check is security critical. */
f81b2e7d
YH
474 if (addrtype == IPV6_ADDR_ANY ||
475 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
5bb1ab09
DS
476 goto error;
477 if (addrtype & IPV6_ADDR_LINKLOCAL) {
478 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
3ffe533c 479 ICMPV6_NOT_NEIGHBOUR, 0);
5bb1ab09
DS
480 goto error;
481 }
1da177e4
LT
482 }
483
0954cf9c 484 mtu = ip6_dst_mtu_forward(dst);
14f3ad6f
UW
485 if (mtu < IPV6_MIN_MTU)
486 mtu = IPV6_MIN_MTU;
487
fe6cc55f 488 if (ip6_pkt_too_big(skb, mtu)) {
1da177e4
LT
489 /* Again, force OUTPUT device used as source address */
490 skb->dev = dst->dev;
14f3ad6f 491 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
15c77d8b
ED
492 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
493 IPSTATS_MIB_INTOOBIGERRORS);
494 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
495 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
496 kfree_skb(skb);
497 return -EMSGSIZE;
498 }
499
500 if (skb_cow(skb, dst->dev->hard_header_len)) {
15c77d8b
ED
501 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
502 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
503 goto drop;
504 }
505
0660e03f 506 hdr = ipv6_hdr(skb);
1da177e4
LT
507
508 /* Mangling hops number delayed to point after skb COW */
1ab1457c 509
1da177e4
LT
510 hdr->hop_limit--;
511
483a47d2 512 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
2d8dbb04 513 IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
b2e0b385 514 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
6e23ae2a 515 ip6_forward_finish);
1da177e4
LT
516
517error:
483a47d2 518 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
1da177e4
LT
519drop:
520 kfree_skb(skb);
521 return -EINVAL;
522}
523
524static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
525{
526 to->pkt_type = from->pkt_type;
527 to->priority = from->priority;
528 to->protocol = from->protocol;
adf30907
ED
529 skb_dst_drop(to);
530 skb_dst_set(to, dst_clone(skb_dst(from)));
1da177e4 531 to->dev = from->dev;
82e91ffe 532 to->mark = from->mark;
1da177e4
LT
533
534#ifdef CONFIG_NET_SCHED
535 to->tc_index = from->tc_index;
536#endif
e7ac05f3 537 nf_copy(to, from);
984bc16c 538 skb_copy_secmark(to, from);
1da177e4
LT
539}
540
ad0081e4 541int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
1da177e4 542{
1da177e4 543 struct sk_buff *frag;
67ba4152 544 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
d91675f9 545 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
1da177e4
LT
546 struct ipv6hdr *tmp_hdr;
547 struct frag_hdr *fh;
548 unsigned int mtu, hlen, left, len;
a7ae1992 549 int hroom, troom;
ae08e1f0 550 __be32 frag_id = 0;
67ba4152 551 int ptr, offset = 0, err = 0;
1da177e4 552 u8 *prevhdr, nexthdr = 0;
adf30907 553 struct net *net = dev_net(skb_dst(skb)->dev);
1da177e4 554
1da177e4
LT
555 hlen = ip6_find_1stfragopt(skb, &prevhdr);
556 nexthdr = *prevhdr;
557
628a5c56 558 mtu = ip6_skb_dst_mtu(skb);
b881ef76
JH
559
560 /* We must not fragment if the socket is set to force MTU discovery
14f3ad6f 561 * or if the skb it not generated by a local socket.
b881ef76 562 */
60ff7467 563 if (unlikely(!skb->ignore_df && skb->len > mtu) ||
4cdd3408
PM
564 (IP6CB(skb)->frag_max_size &&
565 IP6CB(skb)->frag_max_size > mtu)) {
a34a101e
ED
566 if (skb->sk && dst_allfrag(skb_dst(skb)))
567 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
568
adf30907 569 skb->dev = skb_dst(skb)->dev;
3ffe533c 570 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
adf30907 571 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 572 IPSTATS_MIB_FRAGFAILS);
b881ef76
JH
573 kfree_skb(skb);
574 return -EMSGSIZE;
575 }
576
d91675f9
YH
577 if (np && np->frag_size < mtu) {
578 if (np->frag_size)
579 mtu = np->frag_size;
580 }
581 mtu -= hlen + sizeof(struct frag_hdr);
1da177e4 582
21dc3301 583 if (skb_has_frag_list(skb)) {
1da177e4 584 int first_len = skb_pagelen(skb);
3d13008e 585 struct sk_buff *frag2;
1da177e4
LT
586
587 if (first_len - hlen > mtu ||
588 ((first_len - hlen) & 7) ||
589 skb_cloned(skb))
590 goto slow_path;
591
4d9092bb 592 skb_walk_frags(skb, frag) {
1da177e4
LT
593 /* Correct geometry. */
594 if (frag->len > mtu ||
595 ((frag->len & 7) && frag->next) ||
596 skb_headroom(frag) < hlen)
3d13008e 597 goto slow_path_clean;
1da177e4 598
1da177e4
LT
599 /* Partially cloned skb? */
600 if (skb_shared(frag))
3d13008e 601 goto slow_path_clean;
2fdba6b0
HX
602
603 BUG_ON(frag->sk);
604 if (skb->sk) {
2fdba6b0
HX
605 frag->sk = skb->sk;
606 frag->destructor = sock_wfree;
2fdba6b0 607 }
3d13008e 608 skb->truesize -= frag->truesize;
1da177e4
LT
609 }
610
611 err = 0;
612 offset = 0;
613 frag = skb_shinfo(skb)->frag_list;
4d9092bb 614 skb_frag_list_init(skb);
1da177e4
LT
615 /* BUILD HEADER */
616
9a217a1c 617 *prevhdr = NEXTHDR_FRAGMENT;
d56f90a7 618 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
1da177e4 619 if (!tmp_hdr) {
adf30907 620 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 621 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
622 return -ENOMEM;
623 }
624
1da177e4 625 __skb_pull(skb, hlen);
67ba4152 626 fh = (struct frag_hdr *)__skb_push(skb, sizeof(struct frag_hdr));
e2d1bca7
ACM
627 __skb_push(skb, hlen);
628 skb_reset_network_header(skb);
d56f90a7 629 memcpy(skb_network_header(skb), tmp_hdr, hlen);
1da177e4 630
87c48fa3 631 ipv6_select_ident(fh, rt);
1da177e4
LT
632 fh->nexthdr = nexthdr;
633 fh->reserved = 0;
634 fh->frag_off = htons(IP6_MF);
635 frag_id = fh->identification;
636
637 first_len = skb_pagelen(skb);
638 skb->data_len = first_len - skb_headlen(skb);
639 skb->len = first_len;
0660e03f
ACM
640 ipv6_hdr(skb)->payload_len = htons(first_len -
641 sizeof(struct ipv6hdr));
a11d206d 642
d8d1f30b 643 dst_hold(&rt->dst);
1da177e4
LT
644
645 for (;;) {
646 /* Prepare header of the next frame,
647 * before previous one went down. */
648 if (frag) {
649 frag->ip_summed = CHECKSUM_NONE;
badff6d0 650 skb_reset_transport_header(frag);
67ba4152 651 fh = (struct frag_hdr *)__skb_push(frag, sizeof(struct frag_hdr));
e2d1bca7
ACM
652 __skb_push(frag, hlen);
653 skb_reset_network_header(frag);
d56f90a7
ACM
654 memcpy(skb_network_header(frag), tmp_hdr,
655 hlen);
1da177e4
LT
656 offset += skb->len - hlen - sizeof(struct frag_hdr);
657 fh->nexthdr = nexthdr;
658 fh->reserved = 0;
659 fh->frag_off = htons(offset);
660 if (frag->next != NULL)
661 fh->frag_off |= htons(IP6_MF);
662 fh->identification = frag_id;
0660e03f
ACM
663 ipv6_hdr(frag)->payload_len =
664 htons(frag->len -
665 sizeof(struct ipv6hdr));
1da177e4
LT
666 ip6_copy_metadata(frag, skb);
667 }
1ab1457c 668
1da177e4 669 err = output(skb);
67ba4152 670 if (!err)
d8d1f30b 671 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 672 IPSTATS_MIB_FRAGCREATES);
dafee490 673
1da177e4
LT
674 if (err || !frag)
675 break;
676
677 skb = frag;
678 frag = skb->next;
679 skb->next = NULL;
680 }
681
a51482bd 682 kfree(tmp_hdr);
1da177e4
LT
683
684 if (err == 0) {
d8d1f30b 685 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 686 IPSTATS_MIB_FRAGOKS);
94e187c0 687 ip6_rt_put(rt);
1da177e4
LT
688 return 0;
689 }
690
46cfd725 691 kfree_skb_list(frag);
1da177e4 692
d8d1f30b 693 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 694 IPSTATS_MIB_FRAGFAILS);
94e187c0 695 ip6_rt_put(rt);
1da177e4 696 return err;
3d13008e
ED
697
698slow_path_clean:
699 skb_walk_frags(skb, frag2) {
700 if (frag2 == frag)
701 break;
702 frag2->sk = NULL;
703 frag2->destructor = NULL;
704 skb->truesize += frag2->truesize;
705 }
1da177e4
LT
706 }
707
708slow_path:
72e843bb
ED
709 if ((skb->ip_summed == CHECKSUM_PARTIAL) &&
710 skb_checksum_help(skb))
711 goto fail;
712
1da177e4
LT
713 left = skb->len - hlen; /* Space per frame */
714 ptr = hlen; /* Where to start from */
715
716 /*
717 * Fragment the datagram.
718 */
719
720 *prevhdr = NEXTHDR_FRAGMENT;
a7ae1992
HX
721 hroom = LL_RESERVED_SPACE(rt->dst.dev);
722 troom = rt->dst.dev->needed_tailroom;
1da177e4
LT
723
724 /*
725 * Keep copying data until we run out.
726 */
67ba4152 727 while (left > 0) {
1da177e4
LT
728 len = left;
729 /* IF: it doesn't fit, use 'mtu' - the data space left */
730 if (len > mtu)
731 len = mtu;
25985edc 732 /* IF: we are not sending up to and including the packet end
1da177e4
LT
733 then align the next start on an eight byte boundary */
734 if (len < left) {
735 len &= ~7;
736 }
1da177e4 737
cbffccc9
JP
738 /* Allocate buffer */
739 frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
740 hroom + troom, GFP_ATOMIC);
741 if (!frag) {
adf30907 742 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 743 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
744 err = -ENOMEM;
745 goto fail;
746 }
747
748 /*
749 * Set up data on packet
750 */
751
752 ip6_copy_metadata(frag, skb);
a7ae1992 753 skb_reserve(frag, hroom);
1da177e4 754 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
c1d2bbe1 755 skb_reset_network_header(frag);
badff6d0 756 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
b0e380b1
ACM
757 frag->transport_header = (frag->network_header + hlen +
758 sizeof(struct frag_hdr));
1da177e4
LT
759
760 /*
761 * Charge the memory for the fragment to any owner
762 * it might possess
763 */
764 if (skb->sk)
765 skb_set_owner_w(frag, skb->sk);
766
767 /*
768 * Copy the packet header into the new buffer.
769 */
d626f62b 770 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
1da177e4
LT
771
772 /*
773 * Build fragment header.
774 */
775 fh->nexthdr = nexthdr;
776 fh->reserved = 0;
f36d6ab1 777 if (!frag_id) {
87c48fa3 778 ipv6_select_ident(fh, rt);
1da177e4
LT
779 frag_id = fh->identification;
780 } else
781 fh->identification = frag_id;
782
783 /*
784 * Copy a block of the IP datagram.
785 */
e3f0b86b
HS
786 BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag),
787 len));
1da177e4
LT
788 left -= len;
789
790 fh->frag_off = htons(offset);
791 if (left > 0)
792 fh->frag_off |= htons(IP6_MF);
0660e03f
ACM
793 ipv6_hdr(frag)->payload_len = htons(frag->len -
794 sizeof(struct ipv6hdr));
1da177e4
LT
795
796 ptr += len;
797 offset += len;
798
799 /*
800 * Put this fragment into the sending queue.
801 */
1da177e4
LT
802 err = output(frag);
803 if (err)
804 goto fail;
dafee490 805
adf30907 806 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 807 IPSTATS_MIB_FRAGCREATES);
1da177e4 808 }
adf30907 809 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 810 IPSTATS_MIB_FRAGOKS);
808db80a 811 consume_skb(skb);
1da177e4
LT
812 return err;
813
814fail:
adf30907 815 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 816 IPSTATS_MIB_FRAGFAILS);
1ab1457c 817 kfree_skb(skb);
1da177e4
LT
818 return err;
819}
820
b71d1d42
ED
821static inline int ip6_rt_check(const struct rt6key *rt_key,
822 const struct in6_addr *fl_addr,
823 const struct in6_addr *addr_cache)
cf6b1982 824{
a02cec21
ED
825 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
826 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache));
cf6b1982
YH
827}
828
497c615a
HX
829static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
830 struct dst_entry *dst,
b71d1d42 831 const struct flowi6 *fl6)
1da177e4 832{
497c615a 833 struct ipv6_pinfo *np = inet6_sk(sk);
a963a37d 834 struct rt6_info *rt;
1da177e4 835
497c615a
HX
836 if (!dst)
837 goto out;
838
a963a37d
ED
839 if (dst->ops->family != AF_INET6) {
840 dst_release(dst);
841 return NULL;
842 }
843
844 rt = (struct rt6_info *)dst;
497c615a
HX
845 /* Yes, checking route validity in not connected
846 * case is not very simple. Take into account,
847 * that we do not support routing by source, TOS,
67ba4152 848 * and MSG_DONTROUTE --ANK (980726)
497c615a 849 *
cf6b1982
YH
850 * 1. ip6_rt_check(): If route was host route,
851 * check that cached destination is current.
497c615a
HX
852 * If it is network route, we still may
853 * check its validity using saved pointer
854 * to the last used address: daddr_cache.
855 * We do not want to save whole address now,
856 * (because main consumer of this service
857 * is tcp, which has not this problem),
858 * so that the last trick works only on connected
859 * sockets.
860 * 2. oif also should be the same.
861 */
4c9483b2 862 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
8e1ef0a9 863#ifdef CONFIG_IPV6_SUBTREES
4c9483b2 864 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
8e1ef0a9 865#endif
4c9483b2 866 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
497c615a
HX
867 dst_release(dst);
868 dst = NULL;
1da177e4
LT
869 }
870
497c615a
HX
871out:
872 return dst;
873}
874
875static int ip6_dst_lookup_tail(struct sock *sk,
4c9483b2 876 struct dst_entry **dst, struct flowi6 *fl6)
497c615a 877{
3b1e0a65 878 struct net *net = sock_net(sk);
69cce1d1
DM
879#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
880 struct neighbour *n;
97cac082 881 struct rt6_info *rt;
69cce1d1
DM
882#endif
883 int err;
497c615a 884
1da177e4 885 if (*dst == NULL)
4c9483b2 886 *dst = ip6_route_output(net, sk, fl6);
1da177e4 887
e5d08d71
IM
888 err = (*dst)->error;
889 if (err)
1da177e4
LT
890 goto out_err_release;
891
4c9483b2 892 if (ipv6_addr_any(&fl6->saddr)) {
c3968a85
DW
893 struct rt6_info *rt = (struct rt6_info *) *dst;
894 err = ip6_route_get_saddr(net, rt, &fl6->daddr,
895 sk ? inet6_sk(sk)->srcprefs : 0,
896 &fl6->saddr);
44456d37 897 if (err)
1da177e4 898 goto out_err_release;
1da177e4
LT
899 }
900
95c385b4 901#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
e550dfb0
NH
902 /*
903 * Here if the dst entry we've looked up
904 * has a neighbour entry that is in the INCOMPLETE
905 * state and the src address from the flow is
906 * marked as OPTIMISTIC, we release the found
907 * dst entry and replace it instead with the
908 * dst entry of the nexthop router
909 */
c56bf6fe 910 rt = (struct rt6_info *) *dst;
707be1ff 911 rcu_read_lock_bh();
550bab42 912 n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt));
707be1ff
YH
913 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
914 rcu_read_unlock_bh();
915
916 if (err) {
e550dfb0 917 struct inet6_ifaddr *ifp;
4c9483b2 918 struct flowi6 fl_gw6;
e550dfb0
NH
919 int redirect;
920
4c9483b2 921 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
e550dfb0
NH
922 (*dst)->dev, 1);
923
924 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
925 if (ifp)
926 in6_ifa_put(ifp);
927
928 if (redirect) {
929 /*
930 * We need to get the dst entry for the
931 * default router instead
932 */
933 dst_release(*dst);
4c9483b2
DM
934 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
935 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
936 *dst = ip6_route_output(net, sk, &fl_gw6);
e5d08d71
IM
937 err = (*dst)->error;
938 if (err)
e550dfb0 939 goto out_err_release;
95c385b4 940 }
e550dfb0 941 }
95c385b4
NH
942#endif
943
1da177e4
LT
944 return 0;
945
946out_err_release:
ca46f9c8 947 if (err == -ENETUNREACH)
5ac68e7c 948 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
949 dst_release(*dst);
950 *dst = NULL;
951 return err;
952}
34a0b3cd 953
497c615a
HX
954/**
955 * ip6_dst_lookup - perform route lookup on flow
956 * @sk: socket which provides route info
957 * @dst: pointer to dst_entry * for result
4c9483b2 958 * @fl6: flow to lookup
497c615a
HX
959 *
960 * This function performs a route lookup on the given flow.
961 *
962 * It returns zero on success, or a standard errno code on error.
963 */
4c9483b2 964int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6)
497c615a
HX
965{
966 *dst = NULL;
4c9483b2 967 return ip6_dst_lookup_tail(sk, dst, fl6);
497c615a 968}
3cf3dc6c
ACM
969EXPORT_SYMBOL_GPL(ip6_dst_lookup);
970
497c615a 971/**
68d0c6d3
DM
972 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
973 * @sk: socket which provides route info
4c9483b2 974 * @fl6: flow to lookup
68d0c6d3 975 * @final_dst: final destination address for ipsec lookup
68d0c6d3
DM
976 *
977 * This function performs a route lookup on the given flow.
978 *
979 * It returns a valid dst pointer on success, or a pointer encoded
980 * error code.
981 */
4c9483b2 982struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
0e0d44ab 983 const struct in6_addr *final_dst)
68d0c6d3
DM
984{
985 struct dst_entry *dst = NULL;
986 int err;
987
4c9483b2 988 err = ip6_dst_lookup_tail(sk, &dst, fl6);
68d0c6d3
DM
989 if (err)
990 return ERR_PTR(err);
991 if (final_dst)
4e3fd7a0 992 fl6->daddr = *final_dst;
2774c131 993
f92ee619 994 return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
68d0c6d3
DM
995}
996EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
997
998/**
999 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
497c615a 1000 * @sk: socket which provides the dst cache and route info
4c9483b2 1001 * @fl6: flow to lookup
68d0c6d3 1002 * @final_dst: final destination address for ipsec lookup
497c615a
HX
1003 *
1004 * This function performs a route lookup on the given flow with the
1005 * possibility of using the cached route in the socket if it is valid.
1006 * It will take the socket dst lock when operating on the dst cache.
1007 * As a result, this function can only be used in process context.
1008 *
68d0c6d3
DM
1009 * It returns a valid dst pointer on success, or a pointer encoded
1010 * error code.
497c615a 1011 */
4c9483b2 1012struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
0e0d44ab 1013 const struct in6_addr *final_dst)
497c615a 1014{
68d0c6d3
DM
1015 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1016 int err;
497c615a 1017
4c9483b2 1018 dst = ip6_sk_dst_check(sk, dst, fl6);
68d0c6d3 1019
4c9483b2 1020 err = ip6_dst_lookup_tail(sk, &dst, fl6);
68d0c6d3
DM
1021 if (err)
1022 return ERR_PTR(err);
1023 if (final_dst)
4e3fd7a0 1024 fl6->daddr = *final_dst;
2774c131 1025
f92ee619 1026 return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
497c615a 1027}
68d0c6d3 1028EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
497c615a 1029
34a0b3cd 1030static inline int ip6_ufo_append_data(struct sock *sk,
0bbe84a6 1031 struct sk_buff_head *queue,
e89e9cf5
AR
1032 int getfrag(void *from, char *to, int offset, int len,
1033 int odd, struct sk_buff *skb),
1034 void *from, int length, int hh_len, int fragheaderlen,
67ba4152 1035 int transhdrlen, int mtu, unsigned int flags,
87c48fa3 1036 struct rt6_info *rt)
e89e9cf5
AR
1037
1038{
1039 struct sk_buff *skb;
c547dbf5 1040 struct frag_hdr fhdr;
e89e9cf5
AR
1041 int err;
1042
1043 /* There is support for UDP large send offload by network
1044 * device, so create one single skb packet containing complete
1045 * udp datagram
1046 */
0bbe84a6 1047 skb = skb_peek_tail(queue);
e5d08d71 1048 if (skb == NULL) {
e89e9cf5
AR
1049 skb = sock_alloc_send_skb(sk,
1050 hh_len + fragheaderlen + transhdrlen + 20,
1051 (flags & MSG_DONTWAIT), &err);
1052 if (skb == NULL)
504744e4 1053 return err;
e89e9cf5
AR
1054
1055 /* reserve space for Hardware header */
1056 skb_reserve(skb, hh_len);
1057
1058 /* create space for UDP/IP header */
67ba4152 1059 skb_put(skb, fragheaderlen + transhdrlen);
e89e9cf5
AR
1060
1061 /* initialize network header pointer */
c1d2bbe1 1062 skb_reset_network_header(skb);
e89e9cf5
AR
1063
1064 /* initialize protocol header pointer */
b0e380b1 1065 skb->transport_header = skb->network_header + fragheaderlen;
e89e9cf5 1066
9c9c9ad5 1067 skb->protocol = htons(ETH_P_IPV6);
e89e9cf5 1068 skb->csum = 0;
e89e9cf5 1069
0bbe84a6 1070 __skb_queue_tail(queue, skb);
c547dbf5
JP
1071 } else if (skb_is_gso(skb)) {
1072 goto append;
e89e9cf5 1073 }
e89e9cf5 1074
c547dbf5
JP
1075 skb->ip_summed = CHECKSUM_PARTIAL;
1076 /* Specify the length of each IPv6 datagram fragment.
1077 * It has to be a multiple of 8.
1078 */
1079 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1080 sizeof(struct frag_hdr)) & ~7;
1081 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1082 ipv6_select_ident(&fhdr, rt);
1083 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1084
1085append:
2811ebac
HFS
1086 return skb_append_datato_frags(sk, skb, getfrag, from,
1087 (length - transhdrlen));
e89e9cf5 1088}
1da177e4 1089
0178b695
HX
1090static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1091 gfp_t gfp)
1092{
1093 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1094}
1095
1096static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1097 gfp_t gfp)
1098{
1099 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1100}
1101
75a493e6 1102static void ip6_append_data_mtu(unsigned int *mtu,
0c183379
G
1103 int *maxfraglen,
1104 unsigned int fragheaderlen,
1105 struct sk_buff *skb,
75a493e6 1106 struct rt6_info *rt,
e367c2d0 1107 unsigned int orig_mtu)
0c183379
G
1108{
1109 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1110 if (skb == NULL) {
1111 /* first fragment, reserve header_len */
e367c2d0 1112 *mtu = orig_mtu - rt->dst.header_len;
0c183379
G
1113
1114 } else {
1115 /*
1116 * this fragment is not first, the headers
1117 * space is regarded as data space.
1118 */
e367c2d0 1119 *mtu = orig_mtu;
0c183379
G
1120 }
1121 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1122 + fragheaderlen - sizeof(struct frag_hdr);
1123 }
1124}
1125
366e41d9
VY
1126static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1127 struct inet6_cork *v6_cork,
1128 int hlimit, int tclass, struct ipv6_txoptions *opt,
1129 struct rt6_info *rt, struct flowi6 *fl6)
1130{
1131 struct ipv6_pinfo *np = inet6_sk(sk);
1132 unsigned int mtu;
1133
1134 /*
1135 * setup for corking
1136 */
1137 if (opt) {
1138 if (WARN_ON(v6_cork->opt))
1139 return -EINVAL;
1140
1141 v6_cork->opt = kzalloc(opt->tot_len, sk->sk_allocation);
1142 if (unlikely(v6_cork->opt == NULL))
1143 return -ENOBUFS;
1144
1145 v6_cork->opt->tot_len = opt->tot_len;
1146 v6_cork->opt->opt_flen = opt->opt_flen;
1147 v6_cork->opt->opt_nflen = opt->opt_nflen;
1148
1149 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1150 sk->sk_allocation);
1151 if (opt->dst0opt && !v6_cork->opt->dst0opt)
1152 return -ENOBUFS;
1153
1154 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1155 sk->sk_allocation);
1156 if (opt->dst1opt && !v6_cork->opt->dst1opt)
1157 return -ENOBUFS;
1158
1159 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1160 sk->sk_allocation);
1161 if (opt->hopopt && !v6_cork->opt->hopopt)
1162 return -ENOBUFS;
1163
1164 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1165 sk->sk_allocation);
1166 if (opt->srcrt && !v6_cork->opt->srcrt)
1167 return -ENOBUFS;
1168
1169 /* need source address above miyazawa*/
1170 }
1171 dst_hold(&rt->dst);
1172 cork->base.dst = &rt->dst;
1173 cork->fl.u.ip6 = *fl6;
1174 v6_cork->hop_limit = hlimit;
1175 v6_cork->tclass = tclass;
1176 if (rt->dst.flags & DST_XFRM_TUNNEL)
1177 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1178 rt->dst.dev->mtu : dst_mtu(&rt->dst);
1179 else
1180 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1181 rt->dst.dev->mtu : dst_mtu(rt->dst.path);
1182 if (np->frag_size < mtu) {
1183 if (np->frag_size)
1184 mtu = np->frag_size;
1185 }
1186 cork->base.fragsize = mtu;
1187 if (dst_allfrag(rt->dst.path))
1188 cork->base.flags |= IPCORK_ALLFRAG;
1189 cork->base.length = 0;
1190
1191 return 0;
1192}
1193
0bbe84a6
VY
1194static int __ip6_append_data(struct sock *sk,
1195 struct flowi6 *fl6,
1196 struct sk_buff_head *queue,
1197 struct inet_cork *cork,
1198 struct inet6_cork *v6_cork,
1199 struct page_frag *pfrag,
1200 int getfrag(void *from, char *to, int offset,
1201 int len, int odd, struct sk_buff *skb),
1202 void *from, int length, int transhdrlen,
1203 unsigned int flags, int dontfrag)
1da177e4 1204{
0c183379 1205 struct sk_buff *skb, *skb_prev = NULL;
e367c2d0 1206 unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu;
0bbe84a6
VY
1207 int exthdrlen = 0;
1208 int dst_exthdrlen = 0;
1da177e4 1209 int hh_len;
1da177e4
LT
1210 int copy;
1211 int err;
1212 int offset = 0;
a693e698 1213 __u8 tx_flags = 0;
09c2d251 1214 u32 tskey = 0;
0bbe84a6
VY
1215 struct rt6_info *rt = (struct rt6_info *)cork->dst;
1216 struct ipv6_txoptions *opt = v6_cork->opt;
32dce968 1217 int csummode = CHECKSUM_NONE;
1da177e4 1218
0bbe84a6
VY
1219 skb = skb_peek_tail(queue);
1220 if (!skb) {
1221 exthdrlen = opt ? opt->opt_flen : 0;
7efdba5b 1222 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1da177e4 1223 }
0bbe84a6 1224
366e41d9 1225 mtu = cork->fragsize;
e367c2d0 1226 orig_mtu = mtu;
1da177e4 1227
d8d1f30b 1228 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1da177e4 1229
a1b05140 1230 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
b4ce9277 1231 (opt ? opt->opt_nflen : 0);
4df98e76
HFS
1232 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1233 sizeof(struct frag_hdr);
1da177e4
LT
1234
1235 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
4df98e76
HFS
1236 unsigned int maxnonfragsize, headersize;
1237
1238 headersize = sizeof(struct ipv6hdr) +
3a1cebe7 1239 (opt ? opt->opt_flen + opt->opt_nflen : 0) +
4df98e76
HFS
1240 (dst_allfrag(&rt->dst) ?
1241 sizeof(struct frag_hdr) : 0) +
1242 rt->rt6i_nfheader_len;
1243
60ff7467 1244 if (ip6_sk_ignore_df(sk))
0b95227a
HFS
1245 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1246 else
1247 maxnonfragsize = mtu;
4df98e76
HFS
1248
1249 /* dontfrag active */
1250 if ((cork->length + length > mtu - headersize) && dontfrag &&
1251 (sk->sk_protocol == IPPROTO_UDP ||
1252 sk->sk_protocol == IPPROTO_RAW)) {
1253 ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1254 sizeof(struct ipv6hdr));
1255 goto emsgsize;
1256 }
1257
1258 if (cork->length + length > maxnonfragsize - headersize) {
1259emsgsize:
1260 ipv6_local_error(sk, EMSGSIZE, fl6,
1261 mtu - headersize +
1262 sizeof(struct ipv6hdr));
1da177e4
LT
1263 return -EMSGSIZE;
1264 }
1265 }
1266
09c2d251 1267 if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) {
bf84a010 1268 sock_tx_timestamp(sk, &tx_flags);
09c2d251
WB
1269 if (tx_flags & SKBTX_ANY_SW_TSTAMP &&
1270 sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1271 tskey = sk->sk_tskey++;
1272 }
a693e698 1273
32dce968
VY
1274 /* If this is the first and only packet and device
1275 * supports checksum offloading, let's use it.
1276 */
bf250a1f 1277 if (!skb && sk->sk_protocol == IPPROTO_UDP &&
32dce968
VY
1278 length + fragheaderlen < mtu &&
1279 rt->dst.dev->features & NETIF_F_V6_CSUM &&
1280 !exthdrlen)
1281 csummode = CHECKSUM_PARTIAL;
1da177e4
LT
1282 /*
1283 * Let's try using as much space as possible.
1284 * Use MTU if total length of the message fits into the MTU.
1285 * Otherwise, we need to reserve fragment header and
1286 * fragment alignment (= 8-15 octects, in total).
1287 *
1288 * Note that we may need to "move" the data from the tail of
1ab1457c 1289 * of the buffer to the new fragment when we split
1da177e4
LT
1290 * the message.
1291 *
1ab1457c 1292 * FIXME: It may be fragmented into multiple chunks
1da177e4
LT
1293 * at once if non-fragmentable extension headers
1294 * are too large.
1ab1457c 1295 * --yoshfuji
1da177e4
LT
1296 */
1297
2811ebac
HFS
1298 cork->length += length;
1299 if (((length > mtu) ||
1300 (skb && skb_is_gso(skb))) &&
1301 (sk->sk_protocol == IPPROTO_UDP) &&
acf8dd0a
MK
1302 (rt->dst.dev->features & NETIF_F_UFO) &&
1303 (sk->sk_type == SOCK_DGRAM)) {
0bbe84a6 1304 err = ip6_ufo_append_data(sk, queue, getfrag, from, length,
2811ebac
HFS
1305 hh_len, fragheaderlen,
1306 transhdrlen, mtu, flags, rt);
1307 if (err)
1308 goto error;
1309 return 0;
e89e9cf5 1310 }
1da177e4 1311
2811ebac 1312 if (!skb)
1da177e4
LT
1313 goto alloc_new_skb;
1314
1315 while (length > 0) {
1316 /* Check if the remaining data fits into current packet. */
bdc712b4 1317 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1da177e4
LT
1318 if (copy < length)
1319 copy = maxfraglen - skb->len;
1320
1321 if (copy <= 0) {
1322 char *data;
1323 unsigned int datalen;
1324 unsigned int fraglen;
1325 unsigned int fraggap;
1326 unsigned int alloclen;
1da177e4 1327alloc_new_skb:
1da177e4 1328 /* There's no room in the current skb */
0c183379
G
1329 if (skb)
1330 fraggap = skb->len - maxfraglen;
1da177e4
LT
1331 else
1332 fraggap = 0;
0c183379
G
1333 /* update mtu and maxfraglen if necessary */
1334 if (skb == NULL || skb_prev == NULL)
1335 ip6_append_data_mtu(&mtu, &maxfraglen,
75a493e6 1336 fragheaderlen, skb, rt,
e367c2d0 1337 orig_mtu);
0c183379
G
1338
1339 skb_prev = skb;
1da177e4
LT
1340
1341 /*
1342 * If remaining data exceeds the mtu,
1343 * we know we need more fragment(s).
1344 */
1345 datalen = length + fraggap;
1da177e4 1346
0c183379
G
1347 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1348 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1da177e4 1349 if ((flags & MSG_MORE) &&
d8d1f30b 1350 !(rt->dst.dev->features&NETIF_F_SG))
1da177e4
LT
1351 alloclen = mtu;
1352 else
1353 alloclen = datalen + fragheaderlen;
1354
299b0767
SK
1355 alloclen += dst_exthdrlen;
1356
0c183379
G
1357 if (datalen != length + fraggap) {
1358 /*
1359 * this is not the last fragment, the trailer
1360 * space is regarded as data space.
1361 */
1362 datalen += rt->dst.trailer_len;
1363 }
1364
1365 alloclen += rt->dst.trailer_len;
1366 fraglen = datalen + fragheaderlen;
1da177e4
LT
1367
1368 /*
1369 * We just reserve space for fragment header.
1ab1457c 1370 * Note: this may be overallocation if the message
1da177e4
LT
1371 * (without MSG_MORE) fits into the MTU.
1372 */
1373 alloclen += sizeof(struct frag_hdr);
1374
1375 if (transhdrlen) {
1376 skb = sock_alloc_send_skb(sk,
1377 alloclen + hh_len,
1378 (flags & MSG_DONTWAIT), &err);
1379 } else {
1380 skb = NULL;
1381 if (atomic_read(&sk->sk_wmem_alloc) <=
1382 2 * sk->sk_sndbuf)
1383 skb = sock_wmalloc(sk,
1384 alloclen + hh_len, 1,
1385 sk->sk_allocation);
1386 if (unlikely(skb == NULL))
1387 err = -ENOBUFS;
1388 }
1389 if (skb == NULL)
1390 goto error;
1391 /*
1392 * Fill in the control structures
1393 */
9c9c9ad5 1394 skb->protocol = htons(ETH_P_IPV6);
32dce968 1395 skb->ip_summed = csummode;
1da177e4 1396 skb->csum = 0;
1f85851e
G
1397 /* reserve for fragmentation and ipsec header */
1398 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1399 dst_exthdrlen);
1da177e4 1400
11878b40
WB
1401 /* Only the initial fragment is time stamped */
1402 skb_shinfo(skb)->tx_flags = tx_flags;
1403 tx_flags = 0;
09c2d251
WB
1404 skb_shinfo(skb)->tskey = tskey;
1405 tskey = 0;
a693e698 1406
1da177e4
LT
1407 /*
1408 * Find where to start putting bytes
1409 */
1f85851e
G
1410 data = skb_put(skb, fraglen);
1411 skb_set_network_header(skb, exthdrlen);
1412 data += fragheaderlen;
b0e380b1
ACM
1413 skb->transport_header = (skb->network_header +
1414 fragheaderlen);
1da177e4
LT
1415 if (fraggap) {
1416 skb->csum = skb_copy_and_csum_bits(
1417 skb_prev, maxfraglen,
1418 data + transhdrlen, fraggap, 0);
1419 skb_prev->csum = csum_sub(skb_prev->csum,
1420 skb->csum);
1421 data += fraggap;
e9fa4f7b 1422 pskb_trim_unique(skb_prev, maxfraglen);
1da177e4
LT
1423 }
1424 copy = datalen - transhdrlen - fraggap;
299b0767 1425
1da177e4
LT
1426 if (copy < 0) {
1427 err = -EINVAL;
1428 kfree_skb(skb);
1429 goto error;
1430 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1431 err = -EFAULT;
1432 kfree_skb(skb);
1433 goto error;
1434 }
1435
1436 offset += copy;
1437 length -= datalen - fraggap;
1438 transhdrlen = 0;
1439 exthdrlen = 0;
299b0767 1440 dst_exthdrlen = 0;
1da177e4
LT
1441
1442 /*
1443 * Put the packet on the pending queue
1444 */
0bbe84a6 1445 __skb_queue_tail(queue, skb);
1da177e4
LT
1446 continue;
1447 }
1448
1449 if (copy > length)
1450 copy = length;
1451
d8d1f30b 1452 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1da177e4
LT
1453 unsigned int off;
1454
1455 off = skb->len;
1456 if (getfrag(from, skb_put(skb, copy),
1457 offset, copy, off, skb) < 0) {
1458 __skb_trim(skb, off);
1459 err = -EFAULT;
1460 goto error;
1461 }
1462 } else {
1463 int i = skb_shinfo(skb)->nr_frags;
1da177e4 1464
5640f768
ED
1465 err = -ENOMEM;
1466 if (!sk_page_frag_refill(sk, pfrag))
1da177e4 1467 goto error;
5640f768
ED
1468
1469 if (!skb_can_coalesce(skb, i, pfrag->page,
1470 pfrag->offset)) {
1471 err = -EMSGSIZE;
1472 if (i == MAX_SKB_FRAGS)
1473 goto error;
1474
1475 __skb_fill_page_desc(skb, i, pfrag->page,
1476 pfrag->offset, 0);
1477 skb_shinfo(skb)->nr_frags = ++i;
1478 get_page(pfrag->page);
1da177e4 1479 }
5640f768 1480 copy = min_t(int, copy, pfrag->size - pfrag->offset);
9e903e08 1481 if (getfrag(from,
5640f768
ED
1482 page_address(pfrag->page) + pfrag->offset,
1483 offset, copy, skb->len, skb) < 0)
1484 goto error_efault;
1485
1486 pfrag->offset += copy;
1487 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1da177e4
LT
1488 skb->len += copy;
1489 skb->data_len += copy;
f945fa7a
HX
1490 skb->truesize += copy;
1491 atomic_add(copy, &sk->sk_wmem_alloc);
1da177e4
LT
1492 }
1493 offset += copy;
1494 length -= copy;
1495 }
5640f768 1496
1da177e4 1497 return 0;
5640f768
ED
1498
1499error_efault:
1500 err = -EFAULT;
1da177e4 1501error:
bdc712b4 1502 cork->length -= length;
3bd653c8 1503 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1504 return err;
1505}
0bbe84a6
VY
1506
1507int ip6_append_data(struct sock *sk,
1508 int getfrag(void *from, char *to, int offset, int len,
1509 int odd, struct sk_buff *skb),
1510 void *from, int length, int transhdrlen, int hlimit,
1511 int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
1512 struct rt6_info *rt, unsigned int flags, int dontfrag)
1513{
1514 struct inet_sock *inet = inet_sk(sk);
1515 struct ipv6_pinfo *np = inet6_sk(sk);
1516 int exthdrlen;
1517 int err;
1518
1519 if (flags&MSG_PROBE)
1520 return 0;
1521 if (skb_queue_empty(&sk->sk_write_queue)) {
1522 /*
1523 * setup for corking
1524 */
1525 err = ip6_setup_cork(sk, &inet->cork, &np->cork, hlimit,
1526 tclass, opt, rt, fl6);
1527 if (err)
1528 return err;
1529
1530 exthdrlen = (opt ? opt->opt_flen : 0);
1531 length += exthdrlen;
1532 transhdrlen += exthdrlen;
1533 } else {
1534 fl6 = &inet->cork.fl.u.ip6;
1535 transhdrlen = 0;
1536 }
1537
1538 return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1539 &np->cork, sk_page_frag(sk), getfrag,
1540 from, length, transhdrlen, flags, dontfrag);
1541}
a495f836 1542EXPORT_SYMBOL_GPL(ip6_append_data);
1da177e4 1543
366e41d9
VY
1544static void ip6_cork_release(struct inet_cork_full *cork,
1545 struct inet6_cork *v6_cork)
bf138862 1546{
366e41d9
VY
1547 if (v6_cork->opt) {
1548 kfree(v6_cork->opt->dst0opt);
1549 kfree(v6_cork->opt->dst1opt);
1550 kfree(v6_cork->opt->hopopt);
1551 kfree(v6_cork->opt->srcrt);
1552 kfree(v6_cork->opt);
1553 v6_cork->opt = NULL;
0178b695
HX
1554 }
1555
366e41d9
VY
1556 if (cork->base.dst) {
1557 dst_release(cork->base.dst);
1558 cork->base.dst = NULL;
1559 cork->base.flags &= ~IPCORK_ALLFRAG;
bf138862 1560 }
366e41d9 1561 memset(&cork->fl, 0, sizeof(cork->fl));
bf138862
PE
1562}
1563
6422398c
VY
1564struct sk_buff *__ip6_make_skb(struct sock *sk,
1565 struct sk_buff_head *queue,
1566 struct inet_cork_full *cork,
1567 struct inet6_cork *v6_cork)
1da177e4
LT
1568{
1569 struct sk_buff *skb, *tmp_skb;
1570 struct sk_buff **tail_skb;
1571 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1da177e4 1572 struct ipv6_pinfo *np = inet6_sk(sk);
3bd653c8 1573 struct net *net = sock_net(sk);
1da177e4 1574 struct ipv6hdr *hdr;
6422398c
VY
1575 struct ipv6_txoptions *opt = v6_cork->opt;
1576 struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1577 struct flowi6 *fl6 = &cork->fl.u.ip6;
4c9483b2 1578 unsigned char proto = fl6->flowi6_proto;
1da177e4 1579
6422398c 1580 skb = __skb_dequeue(queue);
e5d08d71 1581 if (skb == NULL)
1da177e4
LT
1582 goto out;
1583 tail_skb = &(skb_shinfo(skb)->frag_list);
1584
1585 /* move skb->data to ip header from ext header */
d56f90a7 1586 if (skb->data < skb_network_header(skb))
bbe735e4 1587 __skb_pull(skb, skb_network_offset(skb));
6422398c 1588 while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
cfe1fc77 1589 __skb_pull(tmp_skb, skb_network_header_len(skb));
1da177e4
LT
1590 *tail_skb = tmp_skb;
1591 tail_skb = &(tmp_skb->next);
1592 skb->len += tmp_skb->len;
1593 skb->data_len += tmp_skb->len;
1da177e4 1594 skb->truesize += tmp_skb->truesize;
1da177e4
LT
1595 tmp_skb->destructor = NULL;
1596 tmp_skb->sk = NULL;
1da177e4
LT
1597 }
1598
28a89453 1599 /* Allow local fragmentation. */
60ff7467 1600 skb->ignore_df = ip6_sk_ignore_df(sk);
28a89453 1601
4e3fd7a0 1602 *final_dst = fl6->daddr;
cfe1fc77 1603 __skb_pull(skb, skb_network_header_len(skb));
1da177e4
LT
1604 if (opt && opt->opt_flen)
1605 ipv6_push_frag_opts(skb, opt, &proto);
1606 if (opt && opt->opt_nflen)
1607 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1608
e2d1bca7
ACM
1609 skb_push(skb, sizeof(struct ipv6hdr));
1610 skb_reset_network_header(skb);
0660e03f 1611 hdr = ipv6_hdr(skb);
1ab1457c 1612
6422398c 1613 ip6_flow_hdr(hdr, v6_cork->tclass,
cb1ce2ef
TH
1614 ip6_make_flowlabel(net, skb, fl6->flowlabel,
1615 np->autoflowlabel));
6422398c 1616 hdr->hop_limit = v6_cork->hop_limit;
1da177e4 1617 hdr->nexthdr = proto;
4e3fd7a0
AD
1618 hdr->saddr = fl6->saddr;
1619 hdr->daddr = *final_dst;
1da177e4 1620
a2c2064f 1621 skb->priority = sk->sk_priority;
4a19ec58 1622 skb->mark = sk->sk_mark;
a2c2064f 1623
d8d1f30b 1624 skb_dst_set(skb, dst_clone(&rt->dst));
edf391ff 1625 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
14878f75 1626 if (proto == IPPROTO_ICMPV6) {
adf30907 1627 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
14878f75 1628
43a43b60
HFS
1629 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1630 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
14878f75
DS
1631 }
1632
6422398c
VY
1633 ip6_cork_release(cork, v6_cork);
1634out:
1635 return skb;
1636}
1637
1638int ip6_send_skb(struct sk_buff *skb)
1639{
1640 struct net *net = sock_net(skb->sk);
1641 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1642 int err;
1643
ef76bc23 1644 err = ip6_local_out(skb);
1da177e4
LT
1645 if (err) {
1646 if (err > 0)
6ce9e7b5 1647 err = net_xmit_errno(err);
1da177e4 1648 if (err)
6422398c
VY
1649 IP6_INC_STATS(net, rt->rt6i_idev,
1650 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1651 }
1652
1da177e4 1653 return err;
6422398c
VY
1654}
1655
1656int ip6_push_pending_frames(struct sock *sk)
1657{
1658 struct sk_buff *skb;
1659
1660 skb = ip6_finish_skb(sk);
1661 if (!skb)
1662 return 0;
1663
1664 return ip6_send_skb(skb);
1da177e4 1665}
a495f836 1666EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1da177e4 1667
0bbe84a6 1668static void __ip6_flush_pending_frames(struct sock *sk,
6422398c
VY
1669 struct sk_buff_head *queue,
1670 struct inet_cork_full *cork,
1671 struct inet6_cork *v6_cork)
1da177e4 1672{
1da177e4
LT
1673 struct sk_buff *skb;
1674
0bbe84a6 1675 while ((skb = __skb_dequeue_tail(queue)) != NULL) {
adf30907
ED
1676 if (skb_dst(skb))
1677 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
e1f52208 1678 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1679 kfree_skb(skb);
1680 }
1681
6422398c 1682 ip6_cork_release(cork, v6_cork);
1da177e4 1683}
0bbe84a6
VY
1684
1685void ip6_flush_pending_frames(struct sock *sk)
1686{
6422398c
VY
1687 __ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1688 &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
0bbe84a6 1689}
a495f836 1690EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
6422398c
VY
1691
1692struct sk_buff *ip6_make_skb(struct sock *sk,
1693 int getfrag(void *from, char *to, int offset,
1694 int len, int odd, struct sk_buff *skb),
1695 void *from, int length, int transhdrlen,
1696 int hlimit, int tclass,
1697 struct ipv6_txoptions *opt, struct flowi6 *fl6,
1698 struct rt6_info *rt, unsigned int flags,
1699 int dontfrag)
1700{
1701 struct inet_cork_full cork;
1702 struct inet6_cork v6_cork;
1703 struct sk_buff_head queue;
1704 int exthdrlen = (opt ? opt->opt_flen : 0);
1705 int err;
1706
1707 if (flags & MSG_PROBE)
1708 return NULL;
1709
1710 __skb_queue_head_init(&queue);
1711
1712 cork.base.flags = 0;
1713 cork.base.addr = 0;
1714 cork.base.opt = NULL;
1715 v6_cork.opt = NULL;
1716 err = ip6_setup_cork(sk, &cork, &v6_cork, hlimit, tclass, opt, rt, fl6);
1717 if (err)
1718 return ERR_PTR(err);
1719
1720 if (dontfrag < 0)
1721 dontfrag = inet6_sk(sk)->dontfrag;
1722
1723 err = __ip6_append_data(sk, fl6, &queue, &cork.base, &v6_cork,
1724 &current->task_frag, getfrag, from,
1725 length + exthdrlen, transhdrlen + exthdrlen,
1726 flags, dontfrag);
1727 if (err) {
1728 __ip6_flush_pending_frames(sk, &queue, &cork, &v6_cork);
1729 return ERR_PTR(err);
1730 }
1731
1732 return __ip6_make_skb(sk, &queue, &cork, &v6_cork);
1733}