]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blame - net/ipv6/ip6_output.c
bridge: Add br_netif_receive_skb remove netif_receive_skb_sk
[mirror_ubuntu-zesty-kernel.git] / net / ipv6 / ip6_output.c
CommitLineData
1da177e4
LT
1/*
2 * IPv6 output functions
1ab1457c 3 * Linux INET6 implementation
1da177e4
LT
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * Based on linux/net/ipv4/ip_output.c
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 *
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
21 *
22 * H. von Brand : Added missing #include <linux/string.h>
67ba4152 23 * Imran Patel : frag id should be in NBO
1da177e4
LT
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
27 */
28
1da177e4 29#include <linux/errno.h>
ef76bc23 30#include <linux/kernel.h>
1da177e4
LT
31#include <linux/string.h>
32#include <linux/socket.h>
33#include <linux/net.h>
34#include <linux/netdevice.h>
35#include <linux/if_arp.h>
36#include <linux/in6.h>
37#include <linux/tcp.h>
38#include <linux/route.h>
b59f45d0 39#include <linux/module.h>
5a0e3ad6 40#include <linux/slab.h>
1da177e4
LT
41
42#include <linux/netfilter.h>
43#include <linux/netfilter_ipv6.h>
44
45#include <net/sock.h>
46#include <net/snmp.h>
47
48#include <net/ipv6.h>
49#include <net/ndisc.h>
50#include <net/protocol.h>
51#include <net/ip6_route.h>
52#include <net/addrconf.h>
53#include <net/rawv6.h>
54#include <net/icmp.h>
55#include <net/xfrm.h>
56#include <net/checksum.h>
7bc570c8 57#include <linux/mroute6.h>
1da177e4 58
7026b1dd 59static int ip6_finish_output2(struct sock *sk, struct sk_buff *skb)
1da177e4 60{
adf30907 61 struct dst_entry *dst = skb_dst(skb);
1da177e4 62 struct net_device *dev = dst->dev;
78126c41 63 struct net *net = dev_net(dev);
f6b72b62 64 struct neighbour *neigh;
6fd6ce20
YH
65 struct in6_addr *nexthop;
66 int ret;
1da177e4
LT
67
68 skb->protocol = htons(ETH_P_IPV6);
69 skb->dev = dev;
70
0660e03f 71 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
adf30907 72 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1da177e4 73
7026b1dd 74 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
78126c41 75 ((mroute6_socket(net, skb) &&
bd91b8bf 76 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
7bc570c8
YH
77 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
78 &ipv6_hdr(skb)->saddr))) {
1da177e4
LT
79 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
80
81 /* Do not check for IFF_ALLMULTI; multicast routing
82 is not supported in any case.
83 */
84 if (newskb)
b2e0b385 85 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
7026b1dd 86 sk, newskb, NULL, newskb->dev,
95603e22 87 dev_loopback_xmit);
1da177e4 88
0660e03f 89 if (ipv6_hdr(skb)->hop_limit == 0) {
78126c41 90 IP6_INC_STATS(net, idev,
3bd653c8 91 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
92 kfree_skb(skb);
93 return 0;
94 }
95 }
96
78126c41 97 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
dd408515
HFS
98
99 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
100 IPV6_ADDR_SCOPE_NODELOCAL &&
101 !(dev->flags & IFF_LOOPBACK)) {
102 kfree_skb(skb);
103 return 0;
104 }
1da177e4
LT
105 }
106
6fd6ce20 107 rcu_read_lock_bh();
2647a9b0 108 nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
6fd6ce20
YH
109 neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
110 if (unlikely(!neigh))
111 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
112 if (!IS_ERR(neigh)) {
113 ret = dst_neigh_output(dst, neigh, skb);
114 rcu_read_unlock_bh();
115 return ret;
116 }
117 rcu_read_unlock_bh();
05e3aa09 118
78126c41 119 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
9e508490
JE
120 kfree_skb(skb);
121 return -EINVAL;
1da177e4
LT
122}
123
7026b1dd 124static int ip6_finish_output(struct sock *sk, struct sk_buff *skb)
9e508490
JE
125{
126 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
9037c357
JP
127 dst_allfrag(skb_dst(skb)) ||
128 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
7026b1dd 129 return ip6_fragment(sk, skb, ip6_finish_output2);
9e508490 130 else
7026b1dd 131 return ip6_finish_output2(sk, skb);
9e508490
JE
132}
133
aad88724 134int ip6_output(struct sock *sk, struct sk_buff *skb)
1da177e4 135{
9e508490 136 struct net_device *dev = skb_dst(skb)->dev;
adf30907 137 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
19a0644c 138 struct net *net = dev_net(dev);
778d80be 139 if (unlikely(idev->cnf.disable_ipv6)) {
19a0644c 140 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
778d80be
YH
141 kfree_skb(skb);
142 return 0;
143 }
144
7026b1dd
DM
145 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, sk, skb,
146 NULL, dev,
9c6eb28a
JE
147 ip6_finish_output,
148 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
1da177e4
LT
149}
150
1da177e4 151/*
b5d43998 152 * xmit an sk_buff (used by TCP, SCTP and DCCP)
1da177e4
LT
153 */
154
4c9483b2 155int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
b903d324 156 struct ipv6_txoptions *opt, int tclass)
1da177e4 157{
3bd653c8 158 struct net *net = sock_net(sk);
b30bd282 159 struct ipv6_pinfo *np = inet6_sk(sk);
4c9483b2 160 struct in6_addr *first_hop = &fl6->daddr;
adf30907 161 struct dst_entry *dst = skb_dst(skb);
1da177e4 162 struct ipv6hdr *hdr;
4c9483b2 163 u8 proto = fl6->flowi6_proto;
1da177e4 164 int seg_len = skb->len;
e651f03a 165 int hlimit = -1;
1da177e4
LT
166 u32 mtu;
167
168 if (opt) {
c2636b4d 169 unsigned int head_room;
1da177e4
LT
170
171 /* First: exthdrs may take lots of space (~8K for now)
172 MAX_HEADER is not enough.
173 */
174 head_room = opt->opt_nflen + opt->opt_flen;
175 seg_len += head_room;
176 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
177
178 if (skb_headroom(skb) < head_room) {
179 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
63159f29 180 if (!skb2) {
adf30907 181 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d
YH
182 IPSTATS_MIB_OUTDISCARDS);
183 kfree_skb(skb);
1da177e4
LT
184 return -ENOBUFS;
185 }
808db80a 186 consume_skb(skb);
a11d206d 187 skb = skb2;
83d7eb29 188 skb_set_owner_w(skb, sk);
1da177e4
LT
189 }
190 if (opt->opt_flen)
191 ipv6_push_frag_opts(skb, opt, &proto);
192 if (opt->opt_nflen)
193 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
194 }
195
e2d1bca7
ACM
196 skb_push(skb, sizeof(struct ipv6hdr));
197 skb_reset_network_header(skb);
0660e03f 198 hdr = ipv6_hdr(skb);
1da177e4
LT
199
200 /*
201 * Fill in the IPv6 header
202 */
b903d324 203 if (np)
1da177e4
LT
204 hlimit = np->hop_limit;
205 if (hlimit < 0)
6b75d090 206 hlimit = ip6_dst_hoplimit(dst);
1da177e4 207
cb1ce2ef 208 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
67800f9b 209 np->autoflowlabel, fl6));
41a1f8ea 210
1da177e4
LT
211 hdr->payload_len = htons(seg_len);
212 hdr->nexthdr = proto;
213 hdr->hop_limit = hlimit;
214
4e3fd7a0
AD
215 hdr->saddr = fl6->saddr;
216 hdr->daddr = *first_hop;
1da177e4 217
9c9c9ad5 218 skb->protocol = htons(ETH_P_IPV6);
a2c2064f 219 skb->priority = sk->sk_priority;
4a19ec58 220 skb->mark = sk->sk_mark;
a2c2064f 221
1da177e4 222 mtu = dst_mtu(dst);
60ff7467 223 if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
adf30907 224 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
edf391ff 225 IPSTATS_MIB_OUT, skb->len);
7026b1dd 226 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, sk, skb,
5a70649e 227 NULL, dst->dev, dst_output);
1da177e4
LT
228 }
229
1da177e4 230 skb->dev = dst->dev;
f4e53e29 231 ipv6_local_error(sk, EMSGSIZE, fl6, mtu);
adf30907 232 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
233 kfree_skb(skb);
234 return -EMSGSIZE;
235}
7159039a
YH
236EXPORT_SYMBOL(ip6_xmit);
237
1da177e4
LT
238static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
239{
240 struct ip6_ra_chain *ra;
241 struct sock *last = NULL;
242
243 read_lock(&ip6_ra_lock);
244 for (ra = ip6_ra_chain; ra; ra = ra->next) {
245 struct sock *sk = ra->sk;
0bd1b59b
AM
246 if (sk && ra->sel == sel &&
247 (!sk->sk_bound_dev_if ||
248 sk->sk_bound_dev_if == skb->dev->ifindex)) {
1da177e4
LT
249 if (last) {
250 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
251 if (skb2)
252 rawv6_rcv(last, skb2);
253 }
254 last = sk;
255 }
256 }
257
258 if (last) {
259 rawv6_rcv(last, skb);
260 read_unlock(&ip6_ra_lock);
261 return 1;
262 }
263 read_unlock(&ip6_ra_lock);
264 return 0;
265}
266
e21e0b5f
VN
267static int ip6_forward_proxy_check(struct sk_buff *skb)
268{
0660e03f 269 struct ipv6hdr *hdr = ipv6_hdr(skb);
e21e0b5f 270 u8 nexthdr = hdr->nexthdr;
75f2811c 271 __be16 frag_off;
e21e0b5f
VN
272 int offset;
273
274 if (ipv6_ext_hdr(nexthdr)) {
75f2811c 275 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
e21e0b5f
VN
276 if (offset < 0)
277 return 0;
278 } else
279 offset = sizeof(struct ipv6hdr);
280
281 if (nexthdr == IPPROTO_ICMPV6) {
282 struct icmp6hdr *icmp6;
283
d56f90a7
ACM
284 if (!pskb_may_pull(skb, (skb_network_header(skb) +
285 offset + 1 - skb->data)))
e21e0b5f
VN
286 return 0;
287
d56f90a7 288 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
e21e0b5f
VN
289
290 switch (icmp6->icmp6_type) {
291 case NDISC_ROUTER_SOLICITATION:
292 case NDISC_ROUTER_ADVERTISEMENT:
293 case NDISC_NEIGHBOUR_SOLICITATION:
294 case NDISC_NEIGHBOUR_ADVERTISEMENT:
295 case NDISC_REDIRECT:
296 /* For reaction involving unicast neighbor discovery
297 * message destined to the proxied address, pass it to
298 * input function.
299 */
300 return 1;
301 default:
302 break;
303 }
304 }
305
74553b09
VN
306 /*
307 * The proxying router can't forward traffic sent to a link-local
308 * address, so signal the sender and discard the packet. This
309 * behavior is clarified by the MIPv6 specification.
310 */
311 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
312 dst_link_failure(skb);
313 return -1;
314 }
315
e21e0b5f
VN
316 return 0;
317}
318
7026b1dd 319static inline int ip6_forward_finish(struct sock *sk, struct sk_buff *skb)
1da177e4 320{
c29390c6 321 skb_sender_cpu_clear(skb);
5a70649e 322 return dst_output(sk, skb);
1da177e4
LT
323}
324
0954cf9c
HFS
325static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
326{
327 unsigned int mtu;
328 struct inet6_dev *idev;
329
330 if (dst_metric_locked(dst, RTAX_MTU)) {
331 mtu = dst_metric_raw(dst, RTAX_MTU);
332 if (mtu)
333 return mtu;
334 }
335
336 mtu = IPV6_MIN_MTU;
337 rcu_read_lock();
338 idev = __in6_dev_get(dst->dev);
339 if (idev)
340 mtu = idev->cnf.mtu6;
341 rcu_read_unlock();
342
343 return mtu;
344}
345
fe6cc55f
FW
346static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
347{
418a3156 348 if (skb->len <= mtu)
fe6cc55f
FW
349 return false;
350
60ff7467 351 /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
fe6cc55f
FW
352 if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
353 return true;
354
60ff7467 355 if (skb->ignore_df)
418a3156
FW
356 return false;
357
fe6cc55f
FW
358 if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
359 return false;
360
361 return true;
362}
363
1da177e4
LT
364int ip6_forward(struct sk_buff *skb)
365{
adf30907 366 struct dst_entry *dst = skb_dst(skb);
0660e03f 367 struct ipv6hdr *hdr = ipv6_hdr(skb);
1da177e4 368 struct inet6_skb_parm *opt = IP6CB(skb);
c346dca1 369 struct net *net = dev_net(dst->dev);
14f3ad6f 370 u32 mtu;
1ab1457c 371
53b7997f 372 if (net->ipv6.devconf_all->forwarding == 0)
1da177e4
LT
373 goto error;
374
090f1166
LR
375 if (skb->pkt_type != PACKET_HOST)
376 goto drop;
377
4497b076
BH
378 if (skb_warn_if_lro(skb))
379 goto drop;
380
1da177e4 381 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
15c77d8b
ED
382 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
383 IPSTATS_MIB_INDISCARDS);
1da177e4
LT
384 goto drop;
385 }
386
35fc92a9 387 skb_forward_csum(skb);
1da177e4
LT
388
389 /*
390 * We DO NOT make any processing on
391 * RA packets, pushing them to user level AS IS
392 * without ane WARRANTY that application will be able
393 * to interpret them. The reason is that we
394 * cannot make anything clever here.
395 *
396 * We are not end-node, so that if packet contains
397 * AH/ESP, we cannot make anything.
398 * Defragmentation also would be mistake, RA packets
399 * cannot be fragmented, because there is no warranty
400 * that different fragments will go along one path. --ANK
401 */
ab4eb353
YH
402 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
403 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
1da177e4
LT
404 return 0;
405 }
406
407 /*
408 * check and decrement ttl
409 */
410 if (hdr->hop_limit <= 1) {
411 /* Force OUTPUT device used as source address */
412 skb->dev = dst->dev;
3ffe533c 413 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
15c77d8b
ED
414 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
415 IPSTATS_MIB_INHDRERRORS);
1da177e4
LT
416
417 kfree_skb(skb);
418 return -ETIMEDOUT;
419 }
420
fbea49e1 421 /* XXX: idev->cnf.proxy_ndp? */
53b7997f 422 if (net->ipv6.devconf_all->proxy_ndp &&
8a3edd80 423 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
74553b09
VN
424 int proxied = ip6_forward_proxy_check(skb);
425 if (proxied > 0)
e21e0b5f 426 return ip6_input(skb);
74553b09 427 else if (proxied < 0) {
15c77d8b
ED
428 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
429 IPSTATS_MIB_INDISCARDS);
74553b09
VN
430 goto drop;
431 }
e21e0b5f
VN
432 }
433
1da177e4 434 if (!xfrm6_route_forward(skb)) {
15c77d8b
ED
435 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
436 IPSTATS_MIB_INDISCARDS);
1da177e4
LT
437 goto drop;
438 }
adf30907 439 dst = skb_dst(skb);
1da177e4
LT
440
441 /* IPv6 specs say nothing about it, but it is clear that we cannot
442 send redirects to source routed frames.
1e5dc146 443 We don't send redirects to frames decapsulated from IPsec.
1da177e4 444 */
c45a3dfb 445 if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
1da177e4 446 struct in6_addr *target = NULL;
fbfe95a4 447 struct inet_peer *peer;
1da177e4 448 struct rt6_info *rt;
1da177e4
LT
449
450 /*
451 * incoming and outgoing devices are the same
452 * send a redirect.
453 */
454
455 rt = (struct rt6_info *) dst;
c45a3dfb
DM
456 if (rt->rt6i_flags & RTF_GATEWAY)
457 target = &rt->rt6i_gateway;
1da177e4
LT
458 else
459 target = &hdr->daddr;
460
fd0273d7 461 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
92d86829 462
1da177e4
LT
463 /* Limit redirects both by destination (here)
464 and by source (inside ndisc_send_redirect)
465 */
fbfe95a4 466 if (inet_peer_xrlim_allow(peer, 1*HZ))
4991969a 467 ndisc_send_redirect(skb, target);
1d861aa4
DM
468 if (peer)
469 inet_putpeer(peer);
5bb1ab09
DS
470 } else {
471 int addrtype = ipv6_addr_type(&hdr->saddr);
472
1da177e4 473 /* This check is security critical. */
f81b2e7d
YH
474 if (addrtype == IPV6_ADDR_ANY ||
475 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
5bb1ab09
DS
476 goto error;
477 if (addrtype & IPV6_ADDR_LINKLOCAL) {
478 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
3ffe533c 479 ICMPV6_NOT_NEIGHBOUR, 0);
5bb1ab09
DS
480 goto error;
481 }
1da177e4
LT
482 }
483
0954cf9c 484 mtu = ip6_dst_mtu_forward(dst);
14f3ad6f
UW
485 if (mtu < IPV6_MIN_MTU)
486 mtu = IPV6_MIN_MTU;
487
fe6cc55f 488 if (ip6_pkt_too_big(skb, mtu)) {
1da177e4
LT
489 /* Again, force OUTPUT device used as source address */
490 skb->dev = dst->dev;
14f3ad6f 491 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
15c77d8b
ED
492 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
493 IPSTATS_MIB_INTOOBIGERRORS);
494 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
495 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
496 kfree_skb(skb);
497 return -EMSGSIZE;
498 }
499
500 if (skb_cow(skb, dst->dev->hard_header_len)) {
15c77d8b
ED
501 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
502 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
503 goto drop;
504 }
505
0660e03f 506 hdr = ipv6_hdr(skb);
1da177e4
LT
507
508 /* Mangling hops number delayed to point after skb COW */
1ab1457c 509
1da177e4
LT
510 hdr->hop_limit--;
511
483a47d2 512 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
2d8dbb04 513 IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
7026b1dd
DM
514 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, NULL, skb,
515 skb->dev, dst->dev,
6e23ae2a 516 ip6_forward_finish);
1da177e4
LT
517
518error:
483a47d2 519 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
1da177e4
LT
520drop:
521 kfree_skb(skb);
522 return -EINVAL;
523}
524
525static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
526{
527 to->pkt_type = from->pkt_type;
528 to->priority = from->priority;
529 to->protocol = from->protocol;
adf30907
ED
530 skb_dst_drop(to);
531 skb_dst_set(to, dst_clone(skb_dst(from)));
1da177e4 532 to->dev = from->dev;
82e91ffe 533 to->mark = from->mark;
1da177e4
LT
534
535#ifdef CONFIG_NET_SCHED
536 to->tc_index = from->tc_index;
537#endif
e7ac05f3 538 nf_copy(to, from);
984bc16c 539 skb_copy_secmark(to, from);
1da177e4
LT
540}
541
7026b1dd
DM
542int ip6_fragment(struct sock *sk, struct sk_buff *skb,
543 int (*output)(struct sock *, struct sk_buff *))
1da177e4 544{
1da177e4 545 struct sk_buff *frag;
67ba4152 546 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
f60e5990 547 struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
548 inet6_sk(skb->sk) : NULL;
1da177e4
LT
549 struct ipv6hdr *tmp_hdr;
550 struct frag_hdr *fh;
551 unsigned int mtu, hlen, left, len;
a7ae1992 552 int hroom, troom;
286c2349 553 __be32 frag_id;
67ba4152 554 int ptr, offset = 0, err = 0;
1da177e4 555 u8 *prevhdr, nexthdr = 0;
adf30907 556 struct net *net = dev_net(skb_dst(skb)->dev);
1da177e4 557
1da177e4
LT
558 hlen = ip6_find_1stfragopt(skb, &prevhdr);
559 nexthdr = *prevhdr;
560
628a5c56 561 mtu = ip6_skb_dst_mtu(skb);
b881ef76
JH
562
563 /* We must not fragment if the socket is set to force MTU discovery
14f3ad6f 564 * or if the skb it not generated by a local socket.
b881ef76 565 */
485fca66
FW
566 if (unlikely(!skb->ignore_df && skb->len > mtu))
567 goto fail_toobig;
a34a101e 568
485fca66
FW
569 if (IP6CB(skb)->frag_max_size) {
570 if (IP6CB(skb)->frag_max_size > mtu)
571 goto fail_toobig;
572
573 /* don't send fragments larger than what we received */
574 mtu = IP6CB(skb)->frag_max_size;
575 if (mtu < IPV6_MIN_MTU)
576 mtu = IPV6_MIN_MTU;
b881ef76
JH
577 }
578
d91675f9
YH
579 if (np && np->frag_size < mtu) {
580 if (np->frag_size)
581 mtu = np->frag_size;
582 }
583 mtu -= hlen + sizeof(struct frag_hdr);
1da177e4 584
fd0273d7
MKL
585 frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
586 &ipv6_hdr(skb)->saddr);
286c2349 587
21dc3301 588 if (skb_has_frag_list(skb)) {
1da177e4 589 int first_len = skb_pagelen(skb);
3d13008e 590 struct sk_buff *frag2;
1da177e4
LT
591
592 if (first_len - hlen > mtu ||
593 ((first_len - hlen) & 7) ||
594 skb_cloned(skb))
595 goto slow_path;
596
4d9092bb 597 skb_walk_frags(skb, frag) {
1da177e4
LT
598 /* Correct geometry. */
599 if (frag->len > mtu ||
600 ((frag->len & 7) && frag->next) ||
601 skb_headroom(frag) < hlen)
3d13008e 602 goto slow_path_clean;
1da177e4 603
1da177e4
LT
604 /* Partially cloned skb? */
605 if (skb_shared(frag))
3d13008e 606 goto slow_path_clean;
2fdba6b0
HX
607
608 BUG_ON(frag->sk);
609 if (skb->sk) {
2fdba6b0
HX
610 frag->sk = skb->sk;
611 frag->destructor = sock_wfree;
2fdba6b0 612 }
3d13008e 613 skb->truesize -= frag->truesize;
1da177e4
LT
614 }
615
616 err = 0;
617 offset = 0;
618 frag = skb_shinfo(skb)->frag_list;
4d9092bb 619 skb_frag_list_init(skb);
1da177e4
LT
620 /* BUILD HEADER */
621
9a217a1c 622 *prevhdr = NEXTHDR_FRAGMENT;
d56f90a7 623 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
1da177e4 624 if (!tmp_hdr) {
adf30907 625 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 626 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
627 return -ENOMEM;
628 }
629
1da177e4 630 __skb_pull(skb, hlen);
67ba4152 631 fh = (struct frag_hdr *)__skb_push(skb, sizeof(struct frag_hdr));
e2d1bca7
ACM
632 __skb_push(skb, hlen);
633 skb_reset_network_header(skb);
d56f90a7 634 memcpy(skb_network_header(skb), tmp_hdr, hlen);
1da177e4 635
1da177e4
LT
636 fh->nexthdr = nexthdr;
637 fh->reserved = 0;
638 fh->frag_off = htons(IP6_MF);
286c2349 639 fh->identification = frag_id;
1da177e4
LT
640
641 first_len = skb_pagelen(skb);
642 skb->data_len = first_len - skb_headlen(skb);
643 skb->len = first_len;
0660e03f
ACM
644 ipv6_hdr(skb)->payload_len = htons(first_len -
645 sizeof(struct ipv6hdr));
a11d206d 646
d8d1f30b 647 dst_hold(&rt->dst);
1da177e4
LT
648
649 for (;;) {
650 /* Prepare header of the next frame,
651 * before previous one went down. */
652 if (frag) {
653 frag->ip_summed = CHECKSUM_NONE;
badff6d0 654 skb_reset_transport_header(frag);
67ba4152 655 fh = (struct frag_hdr *)__skb_push(frag, sizeof(struct frag_hdr));
e2d1bca7
ACM
656 __skb_push(frag, hlen);
657 skb_reset_network_header(frag);
d56f90a7
ACM
658 memcpy(skb_network_header(frag), tmp_hdr,
659 hlen);
1da177e4
LT
660 offset += skb->len - hlen - sizeof(struct frag_hdr);
661 fh->nexthdr = nexthdr;
662 fh->reserved = 0;
663 fh->frag_off = htons(offset);
53b24b8f 664 if (frag->next)
1da177e4
LT
665 fh->frag_off |= htons(IP6_MF);
666 fh->identification = frag_id;
0660e03f
ACM
667 ipv6_hdr(frag)->payload_len =
668 htons(frag->len -
669 sizeof(struct ipv6hdr));
1da177e4
LT
670 ip6_copy_metadata(frag, skb);
671 }
1ab1457c 672
7026b1dd 673 err = output(sk, skb);
67ba4152 674 if (!err)
d8d1f30b 675 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 676 IPSTATS_MIB_FRAGCREATES);
dafee490 677
1da177e4
LT
678 if (err || !frag)
679 break;
680
681 skb = frag;
682 frag = skb->next;
683 skb->next = NULL;
684 }
685
a51482bd 686 kfree(tmp_hdr);
1da177e4
LT
687
688 if (err == 0) {
d8d1f30b 689 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 690 IPSTATS_MIB_FRAGOKS);
94e187c0 691 ip6_rt_put(rt);
1da177e4
LT
692 return 0;
693 }
694
46cfd725 695 kfree_skb_list(frag);
1da177e4 696
d8d1f30b 697 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 698 IPSTATS_MIB_FRAGFAILS);
94e187c0 699 ip6_rt_put(rt);
1da177e4 700 return err;
3d13008e
ED
701
702slow_path_clean:
703 skb_walk_frags(skb, frag2) {
704 if (frag2 == frag)
705 break;
706 frag2->sk = NULL;
707 frag2->destructor = NULL;
708 skb->truesize += frag2->truesize;
709 }
1da177e4
LT
710 }
711
712slow_path:
72e843bb
ED
713 if ((skb->ip_summed == CHECKSUM_PARTIAL) &&
714 skb_checksum_help(skb))
715 goto fail;
716
1da177e4
LT
717 left = skb->len - hlen; /* Space per frame */
718 ptr = hlen; /* Where to start from */
719
720 /*
721 * Fragment the datagram.
722 */
723
724 *prevhdr = NEXTHDR_FRAGMENT;
a7ae1992
HX
725 hroom = LL_RESERVED_SPACE(rt->dst.dev);
726 troom = rt->dst.dev->needed_tailroom;
1da177e4
LT
727
728 /*
729 * Keep copying data until we run out.
730 */
67ba4152 731 while (left > 0) {
1da177e4
LT
732 len = left;
733 /* IF: it doesn't fit, use 'mtu' - the data space left */
734 if (len > mtu)
735 len = mtu;
25985edc 736 /* IF: we are not sending up to and including the packet end
1da177e4
LT
737 then align the next start on an eight byte boundary */
738 if (len < left) {
739 len &= ~7;
740 }
1da177e4 741
cbffccc9
JP
742 /* Allocate buffer */
743 frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
744 hroom + troom, GFP_ATOMIC);
745 if (!frag) {
adf30907 746 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 747 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
748 err = -ENOMEM;
749 goto fail;
750 }
751
752 /*
753 * Set up data on packet
754 */
755
756 ip6_copy_metadata(frag, skb);
a7ae1992 757 skb_reserve(frag, hroom);
1da177e4 758 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
c1d2bbe1 759 skb_reset_network_header(frag);
badff6d0 760 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
b0e380b1
ACM
761 frag->transport_header = (frag->network_header + hlen +
762 sizeof(struct frag_hdr));
1da177e4
LT
763
764 /*
765 * Charge the memory for the fragment to any owner
766 * it might possess
767 */
768 if (skb->sk)
769 skb_set_owner_w(frag, skb->sk);
770
771 /*
772 * Copy the packet header into the new buffer.
773 */
d626f62b 774 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
1da177e4
LT
775
776 /*
777 * Build fragment header.
778 */
779 fh->nexthdr = nexthdr;
780 fh->reserved = 0;
286c2349 781 fh->identification = frag_id;
1da177e4
LT
782
783 /*
784 * Copy a block of the IP datagram.
785 */
e3f0b86b
HS
786 BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag),
787 len));
1da177e4
LT
788 left -= len;
789
790 fh->frag_off = htons(offset);
791 if (left > 0)
792 fh->frag_off |= htons(IP6_MF);
0660e03f
ACM
793 ipv6_hdr(frag)->payload_len = htons(frag->len -
794 sizeof(struct ipv6hdr));
1da177e4
LT
795
796 ptr += len;
797 offset += len;
798
799 /*
800 * Put this fragment into the sending queue.
801 */
7026b1dd 802 err = output(sk, frag);
1da177e4
LT
803 if (err)
804 goto fail;
dafee490 805
adf30907 806 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 807 IPSTATS_MIB_FRAGCREATES);
1da177e4 808 }
adf30907 809 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 810 IPSTATS_MIB_FRAGOKS);
808db80a 811 consume_skb(skb);
1da177e4
LT
812 return err;
813
485fca66
FW
814fail_toobig:
815 if (skb->sk && dst_allfrag(skb_dst(skb)))
816 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
817
818 skb->dev = skb_dst(skb)->dev;
819 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
820 err = -EMSGSIZE;
821
1da177e4 822fail:
adf30907 823 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 824 IPSTATS_MIB_FRAGFAILS);
1ab1457c 825 kfree_skb(skb);
1da177e4
LT
826 return err;
827}
828
b71d1d42
ED
829static inline int ip6_rt_check(const struct rt6key *rt_key,
830 const struct in6_addr *fl_addr,
831 const struct in6_addr *addr_cache)
cf6b1982 832{
a02cec21 833 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
63159f29 834 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
cf6b1982
YH
835}
836
497c615a
HX
837static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
838 struct dst_entry *dst,
b71d1d42 839 const struct flowi6 *fl6)
1da177e4 840{
497c615a 841 struct ipv6_pinfo *np = inet6_sk(sk);
a963a37d 842 struct rt6_info *rt;
1da177e4 843
497c615a
HX
844 if (!dst)
845 goto out;
846
a963a37d
ED
847 if (dst->ops->family != AF_INET6) {
848 dst_release(dst);
849 return NULL;
850 }
851
852 rt = (struct rt6_info *)dst;
497c615a
HX
853 /* Yes, checking route validity in not connected
854 * case is not very simple. Take into account,
855 * that we do not support routing by source, TOS,
67ba4152 856 * and MSG_DONTROUTE --ANK (980726)
497c615a 857 *
cf6b1982
YH
858 * 1. ip6_rt_check(): If route was host route,
859 * check that cached destination is current.
497c615a
HX
860 * If it is network route, we still may
861 * check its validity using saved pointer
862 * to the last used address: daddr_cache.
863 * We do not want to save whole address now,
864 * (because main consumer of this service
865 * is tcp, which has not this problem),
866 * so that the last trick works only on connected
867 * sockets.
868 * 2. oif also should be the same.
869 */
4c9483b2 870 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
8e1ef0a9 871#ifdef CONFIG_IPV6_SUBTREES
4c9483b2 872 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
8e1ef0a9 873#endif
4c9483b2 874 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
497c615a
HX
875 dst_release(dst);
876 dst = NULL;
1da177e4
LT
877 }
878
497c615a
HX
879out:
880 return dst;
881}
882
343d60aa 883static int ip6_dst_lookup_tail(struct net *net, struct sock *sk,
4c9483b2 884 struct dst_entry **dst, struct flowi6 *fl6)
497c615a 885{
69cce1d1
DM
886#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
887 struct neighbour *n;
97cac082 888 struct rt6_info *rt;
69cce1d1
DM
889#endif
890 int err;
497c615a 891
e16e888b
MS
892 /* The correct way to handle this would be to do
893 * ip6_route_get_saddr, and then ip6_route_output; however,
894 * the route-specific preferred source forces the
895 * ip6_route_output call _before_ ip6_route_get_saddr.
896 *
897 * In source specific routing (no src=any default route),
898 * ip6_route_output will fail given src=any saddr, though, so
899 * that's why we try it again later.
900 */
901 if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
902 struct rt6_info *rt;
903 bool had_dst = *dst != NULL;
1da177e4 904
e16e888b
MS
905 if (!had_dst)
906 *dst = ip6_route_output(net, sk, fl6);
907 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
c3968a85
DW
908 err = ip6_route_get_saddr(net, rt, &fl6->daddr,
909 sk ? inet6_sk(sk)->srcprefs : 0,
910 &fl6->saddr);
44456d37 911 if (err)
1da177e4 912 goto out_err_release;
e16e888b
MS
913
914 /* If we had an erroneous initial result, pretend it
915 * never existed and let the SA-enabled version take
916 * over.
917 */
918 if (!had_dst && (*dst)->error) {
919 dst_release(*dst);
920 *dst = NULL;
921 }
1da177e4
LT
922 }
923
e16e888b
MS
924 if (!*dst)
925 *dst = ip6_route_output(net, sk, fl6);
926
927 err = (*dst)->error;
928 if (err)
929 goto out_err_release;
930
95c385b4 931#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
e550dfb0
NH
932 /*
933 * Here if the dst entry we've looked up
934 * has a neighbour entry that is in the INCOMPLETE
935 * state and the src address from the flow is
936 * marked as OPTIMISTIC, we release the found
937 * dst entry and replace it instead with the
938 * dst entry of the nexthop router
939 */
c56bf6fe 940 rt = (struct rt6_info *) *dst;
707be1ff 941 rcu_read_lock_bh();
2647a9b0
MKL
942 n = __ipv6_neigh_lookup_noref(rt->dst.dev,
943 rt6_nexthop(rt, &fl6->daddr));
707be1ff
YH
944 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
945 rcu_read_unlock_bh();
946
947 if (err) {
e550dfb0 948 struct inet6_ifaddr *ifp;
4c9483b2 949 struct flowi6 fl_gw6;
e550dfb0
NH
950 int redirect;
951
4c9483b2 952 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
e550dfb0
NH
953 (*dst)->dev, 1);
954
955 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
956 if (ifp)
957 in6_ifa_put(ifp);
958
959 if (redirect) {
960 /*
961 * We need to get the dst entry for the
962 * default router instead
963 */
964 dst_release(*dst);
4c9483b2
DM
965 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
966 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
967 *dst = ip6_route_output(net, sk, &fl_gw6);
e5d08d71
IM
968 err = (*dst)->error;
969 if (err)
e550dfb0 970 goto out_err_release;
95c385b4 971 }
e550dfb0 972 }
95c385b4
NH
973#endif
974
1da177e4
LT
975 return 0;
976
977out_err_release:
ca46f9c8 978 if (err == -ENETUNREACH)
5ac68e7c 979 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
980 dst_release(*dst);
981 *dst = NULL;
982 return err;
983}
34a0b3cd 984
497c615a
HX
985/**
986 * ip6_dst_lookup - perform route lookup on flow
987 * @sk: socket which provides route info
988 * @dst: pointer to dst_entry * for result
4c9483b2 989 * @fl6: flow to lookup
497c615a
HX
990 *
991 * This function performs a route lookup on the given flow.
992 *
993 * It returns zero on success, or a standard errno code on error.
994 */
343d60aa
RP
995int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
996 struct flowi6 *fl6)
497c615a
HX
997{
998 *dst = NULL;
343d60aa 999 return ip6_dst_lookup_tail(net, sk, dst, fl6);
497c615a 1000}
3cf3dc6c
ACM
1001EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1002
497c615a 1003/**
68d0c6d3
DM
1004 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1005 * @sk: socket which provides route info
4c9483b2 1006 * @fl6: flow to lookup
68d0c6d3 1007 * @final_dst: final destination address for ipsec lookup
68d0c6d3
DM
1008 *
1009 * This function performs a route lookup on the given flow.
1010 *
1011 * It returns a valid dst pointer on success, or a pointer encoded
1012 * error code.
1013 */
4c9483b2 1014struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
0e0d44ab 1015 const struct in6_addr *final_dst)
68d0c6d3
DM
1016{
1017 struct dst_entry *dst = NULL;
1018 int err;
1019
343d60aa 1020 err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
68d0c6d3
DM
1021 if (err)
1022 return ERR_PTR(err);
1023 if (final_dst)
4e3fd7a0 1024 fl6->daddr = *final_dst;
a0a9f33b
PS
1025 if (!fl6->flowi6_oif)
1026 fl6->flowi6_oif = dst->dev->ifindex;
2774c131 1027
f92ee619 1028 return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
68d0c6d3
DM
1029}
1030EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1031
1032/**
1033 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
497c615a 1034 * @sk: socket which provides the dst cache and route info
4c9483b2 1035 * @fl6: flow to lookup
68d0c6d3 1036 * @final_dst: final destination address for ipsec lookup
497c615a
HX
1037 *
1038 * This function performs a route lookup on the given flow with the
1039 * possibility of using the cached route in the socket if it is valid.
1040 * It will take the socket dst lock when operating on the dst cache.
1041 * As a result, this function can only be used in process context.
1042 *
68d0c6d3
DM
1043 * It returns a valid dst pointer on success, or a pointer encoded
1044 * error code.
497c615a 1045 */
4c9483b2 1046struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
0e0d44ab 1047 const struct in6_addr *final_dst)
497c615a 1048{
68d0c6d3
DM
1049 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1050 int err;
497c615a 1051
4c9483b2 1052 dst = ip6_sk_dst_check(sk, dst, fl6);
68d0c6d3 1053
343d60aa 1054 err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
68d0c6d3
DM
1055 if (err)
1056 return ERR_PTR(err);
1057 if (final_dst)
4e3fd7a0 1058 fl6->daddr = *final_dst;
2774c131 1059
f92ee619 1060 return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
497c615a 1061}
68d0c6d3 1062EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
497c615a 1063
34a0b3cd 1064static inline int ip6_ufo_append_data(struct sock *sk,
0bbe84a6 1065 struct sk_buff_head *queue,
e89e9cf5
AR
1066 int getfrag(void *from, char *to, int offset, int len,
1067 int odd, struct sk_buff *skb),
1068 void *from, int length, int hh_len, int fragheaderlen,
67ba4152 1069 int transhdrlen, int mtu, unsigned int flags,
fd0273d7 1070 const struct flowi6 *fl6)
e89e9cf5
AR
1071
1072{
1073 struct sk_buff *skb;
1074 int err;
1075
1076 /* There is support for UDP large send offload by network
1077 * device, so create one single skb packet containing complete
1078 * udp datagram
1079 */
0bbe84a6 1080 skb = skb_peek_tail(queue);
63159f29 1081 if (!skb) {
e89e9cf5
AR
1082 skb = sock_alloc_send_skb(sk,
1083 hh_len + fragheaderlen + transhdrlen + 20,
1084 (flags & MSG_DONTWAIT), &err);
63159f29 1085 if (!skb)
504744e4 1086 return err;
e89e9cf5
AR
1087
1088 /* reserve space for Hardware header */
1089 skb_reserve(skb, hh_len);
1090
1091 /* create space for UDP/IP header */
67ba4152 1092 skb_put(skb, fragheaderlen + transhdrlen);
e89e9cf5
AR
1093
1094 /* initialize network header pointer */
c1d2bbe1 1095 skb_reset_network_header(skb);
e89e9cf5
AR
1096
1097 /* initialize protocol header pointer */
b0e380b1 1098 skb->transport_header = skb->network_header + fragheaderlen;
e89e9cf5 1099
9c9c9ad5 1100 skb->protocol = htons(ETH_P_IPV6);
e89e9cf5 1101 skb->csum = 0;
e89e9cf5 1102
0bbe84a6 1103 __skb_queue_tail(queue, skb);
c547dbf5
JP
1104 } else if (skb_is_gso(skb)) {
1105 goto append;
e89e9cf5 1106 }
e89e9cf5 1107
c547dbf5
JP
1108 skb->ip_summed = CHECKSUM_PARTIAL;
1109 /* Specify the length of each IPv6 datagram fragment.
1110 * It has to be a multiple of 8.
1111 */
1112 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1113 sizeof(struct frag_hdr)) & ~7;
1114 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
fd0273d7
MKL
1115 skb_shinfo(skb)->ip6_frag_id = ipv6_select_ident(sock_net(sk),
1116 &fl6->daddr,
1117 &fl6->saddr);
c547dbf5
JP
1118
1119append:
2811ebac
HFS
1120 return skb_append_datato_frags(sk, skb, getfrag, from,
1121 (length - transhdrlen));
e89e9cf5 1122}
1da177e4 1123
0178b695
HX
1124static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1125 gfp_t gfp)
1126{
1127 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1128}
1129
1130static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1131 gfp_t gfp)
1132{
1133 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1134}
1135
75a493e6 1136static void ip6_append_data_mtu(unsigned int *mtu,
0c183379
G
1137 int *maxfraglen,
1138 unsigned int fragheaderlen,
1139 struct sk_buff *skb,
75a493e6 1140 struct rt6_info *rt,
e367c2d0 1141 unsigned int orig_mtu)
0c183379
G
1142{
1143 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
63159f29 1144 if (!skb) {
0c183379 1145 /* first fragment, reserve header_len */
e367c2d0 1146 *mtu = orig_mtu - rt->dst.header_len;
0c183379
G
1147
1148 } else {
1149 /*
1150 * this fragment is not first, the headers
1151 * space is regarded as data space.
1152 */
e367c2d0 1153 *mtu = orig_mtu;
0c183379
G
1154 }
1155 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1156 + fragheaderlen - sizeof(struct frag_hdr);
1157 }
1158}
1159
366e41d9
VY
1160static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1161 struct inet6_cork *v6_cork,
1162 int hlimit, int tclass, struct ipv6_txoptions *opt,
1163 struct rt6_info *rt, struct flowi6 *fl6)
1164{
1165 struct ipv6_pinfo *np = inet6_sk(sk);
1166 unsigned int mtu;
1167
1168 /*
1169 * setup for corking
1170 */
1171 if (opt) {
1172 if (WARN_ON(v6_cork->opt))
1173 return -EINVAL;
1174
1175 v6_cork->opt = kzalloc(opt->tot_len, sk->sk_allocation);
63159f29 1176 if (unlikely(!v6_cork->opt))
366e41d9
VY
1177 return -ENOBUFS;
1178
1179 v6_cork->opt->tot_len = opt->tot_len;
1180 v6_cork->opt->opt_flen = opt->opt_flen;
1181 v6_cork->opt->opt_nflen = opt->opt_nflen;
1182
1183 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1184 sk->sk_allocation);
1185 if (opt->dst0opt && !v6_cork->opt->dst0opt)
1186 return -ENOBUFS;
1187
1188 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1189 sk->sk_allocation);
1190 if (opt->dst1opt && !v6_cork->opt->dst1opt)
1191 return -ENOBUFS;
1192
1193 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1194 sk->sk_allocation);
1195 if (opt->hopopt && !v6_cork->opt->hopopt)
1196 return -ENOBUFS;
1197
1198 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1199 sk->sk_allocation);
1200 if (opt->srcrt && !v6_cork->opt->srcrt)
1201 return -ENOBUFS;
1202
1203 /* need source address above miyazawa*/
1204 }
1205 dst_hold(&rt->dst);
1206 cork->base.dst = &rt->dst;
1207 cork->fl.u.ip6 = *fl6;
1208 v6_cork->hop_limit = hlimit;
1209 v6_cork->tclass = tclass;
1210 if (rt->dst.flags & DST_XFRM_TUNNEL)
1211 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1212 rt->dst.dev->mtu : dst_mtu(&rt->dst);
1213 else
1214 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1215 rt->dst.dev->mtu : dst_mtu(rt->dst.path);
1216 if (np->frag_size < mtu) {
1217 if (np->frag_size)
1218 mtu = np->frag_size;
1219 }
1220 cork->base.fragsize = mtu;
1221 if (dst_allfrag(rt->dst.path))
1222 cork->base.flags |= IPCORK_ALLFRAG;
1223 cork->base.length = 0;
1224
1225 return 0;
1226}
1227
0bbe84a6
VY
1228static int __ip6_append_data(struct sock *sk,
1229 struct flowi6 *fl6,
1230 struct sk_buff_head *queue,
1231 struct inet_cork *cork,
1232 struct inet6_cork *v6_cork,
1233 struct page_frag *pfrag,
1234 int getfrag(void *from, char *to, int offset,
1235 int len, int odd, struct sk_buff *skb),
1236 void *from, int length, int transhdrlen,
1237 unsigned int flags, int dontfrag)
1da177e4 1238{
0c183379 1239 struct sk_buff *skb, *skb_prev = NULL;
e367c2d0 1240 unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu;
0bbe84a6
VY
1241 int exthdrlen = 0;
1242 int dst_exthdrlen = 0;
1da177e4 1243 int hh_len;
1da177e4
LT
1244 int copy;
1245 int err;
1246 int offset = 0;
a693e698 1247 __u8 tx_flags = 0;
09c2d251 1248 u32 tskey = 0;
0bbe84a6
VY
1249 struct rt6_info *rt = (struct rt6_info *)cork->dst;
1250 struct ipv6_txoptions *opt = v6_cork->opt;
32dce968 1251 int csummode = CHECKSUM_NONE;
1da177e4 1252
0bbe84a6
VY
1253 skb = skb_peek_tail(queue);
1254 if (!skb) {
1255 exthdrlen = opt ? opt->opt_flen : 0;
7efdba5b 1256 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1da177e4 1257 }
0bbe84a6 1258
366e41d9 1259 mtu = cork->fragsize;
e367c2d0 1260 orig_mtu = mtu;
1da177e4 1261
d8d1f30b 1262 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1da177e4 1263
a1b05140 1264 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
b4ce9277 1265 (opt ? opt->opt_nflen : 0);
4df98e76
HFS
1266 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1267 sizeof(struct frag_hdr);
1da177e4
LT
1268
1269 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
4df98e76
HFS
1270 unsigned int maxnonfragsize, headersize;
1271
1272 headersize = sizeof(struct ipv6hdr) +
3a1cebe7 1273 (opt ? opt->opt_flen + opt->opt_nflen : 0) +
4df98e76
HFS
1274 (dst_allfrag(&rt->dst) ?
1275 sizeof(struct frag_hdr) : 0) +
1276 rt->rt6i_nfheader_len;
1277
60ff7467 1278 if (ip6_sk_ignore_df(sk))
0b95227a
HFS
1279 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1280 else
1281 maxnonfragsize = mtu;
4df98e76
HFS
1282
1283 /* dontfrag active */
1284 if ((cork->length + length > mtu - headersize) && dontfrag &&
1285 (sk->sk_protocol == IPPROTO_UDP ||
1286 sk->sk_protocol == IPPROTO_RAW)) {
1287 ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1288 sizeof(struct ipv6hdr));
1289 goto emsgsize;
1290 }
1291
1292 if (cork->length + length > maxnonfragsize - headersize) {
1293emsgsize:
1294 ipv6_local_error(sk, EMSGSIZE, fl6,
1295 mtu - headersize +
1296 sizeof(struct ipv6hdr));
1da177e4
LT
1297 return -EMSGSIZE;
1298 }
1299 }
1300
09c2d251 1301 if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) {
bf84a010 1302 sock_tx_timestamp(sk, &tx_flags);
09c2d251
WB
1303 if (tx_flags & SKBTX_ANY_SW_TSTAMP &&
1304 sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1305 tskey = sk->sk_tskey++;
1306 }
a693e698 1307
32dce968
VY
1308 /* If this is the first and only packet and device
1309 * supports checksum offloading, let's use it.
e87a468e
VY
1310 * Use transhdrlen, same as IPv4, because partial
1311 * sums only work when transhdrlen is set.
32dce968 1312 */
e87a468e 1313 if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
32dce968
VY
1314 length + fragheaderlen < mtu &&
1315 rt->dst.dev->features & NETIF_F_V6_CSUM &&
1316 !exthdrlen)
1317 csummode = CHECKSUM_PARTIAL;
1da177e4
LT
1318 /*
1319 * Let's try using as much space as possible.
1320 * Use MTU if total length of the message fits into the MTU.
1321 * Otherwise, we need to reserve fragment header and
1322 * fragment alignment (= 8-15 octects, in total).
1323 *
1324 * Note that we may need to "move" the data from the tail of
1ab1457c 1325 * of the buffer to the new fragment when we split
1da177e4
LT
1326 * the message.
1327 *
1ab1457c 1328 * FIXME: It may be fragmented into multiple chunks
1da177e4
LT
1329 * at once if non-fragmentable extension headers
1330 * are too large.
1ab1457c 1331 * --yoshfuji
1da177e4
LT
1332 */
1333
2811ebac
HFS
1334 cork->length += length;
1335 if (((length > mtu) ||
1336 (skb && skb_is_gso(skb))) &&
1337 (sk->sk_protocol == IPPROTO_UDP) &&
acf8dd0a
MK
1338 (rt->dst.dev->features & NETIF_F_UFO) &&
1339 (sk->sk_type == SOCK_DGRAM)) {
0bbe84a6 1340 err = ip6_ufo_append_data(sk, queue, getfrag, from, length,
2811ebac 1341 hh_len, fragheaderlen,
fd0273d7 1342 transhdrlen, mtu, flags, fl6);
2811ebac
HFS
1343 if (err)
1344 goto error;
1345 return 0;
e89e9cf5 1346 }
1da177e4 1347
2811ebac 1348 if (!skb)
1da177e4
LT
1349 goto alloc_new_skb;
1350
1351 while (length > 0) {
1352 /* Check if the remaining data fits into current packet. */
bdc712b4 1353 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1da177e4
LT
1354 if (copy < length)
1355 copy = maxfraglen - skb->len;
1356
1357 if (copy <= 0) {
1358 char *data;
1359 unsigned int datalen;
1360 unsigned int fraglen;
1361 unsigned int fraggap;
1362 unsigned int alloclen;
1da177e4 1363alloc_new_skb:
1da177e4 1364 /* There's no room in the current skb */
0c183379
G
1365 if (skb)
1366 fraggap = skb->len - maxfraglen;
1da177e4
LT
1367 else
1368 fraggap = 0;
0c183379 1369 /* update mtu and maxfraglen if necessary */
63159f29 1370 if (!skb || !skb_prev)
0c183379 1371 ip6_append_data_mtu(&mtu, &maxfraglen,
75a493e6 1372 fragheaderlen, skb, rt,
e367c2d0 1373 orig_mtu);
0c183379
G
1374
1375 skb_prev = skb;
1da177e4
LT
1376
1377 /*
1378 * If remaining data exceeds the mtu,
1379 * we know we need more fragment(s).
1380 */
1381 datalen = length + fraggap;
1da177e4 1382
0c183379
G
1383 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1384 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1da177e4 1385 if ((flags & MSG_MORE) &&
d8d1f30b 1386 !(rt->dst.dev->features&NETIF_F_SG))
1da177e4
LT
1387 alloclen = mtu;
1388 else
1389 alloclen = datalen + fragheaderlen;
1390
299b0767
SK
1391 alloclen += dst_exthdrlen;
1392
0c183379
G
1393 if (datalen != length + fraggap) {
1394 /*
1395 * this is not the last fragment, the trailer
1396 * space is regarded as data space.
1397 */
1398 datalen += rt->dst.trailer_len;
1399 }
1400
1401 alloclen += rt->dst.trailer_len;
1402 fraglen = datalen + fragheaderlen;
1da177e4
LT
1403
1404 /*
1405 * We just reserve space for fragment header.
1ab1457c 1406 * Note: this may be overallocation if the message
1da177e4
LT
1407 * (without MSG_MORE) fits into the MTU.
1408 */
1409 alloclen += sizeof(struct frag_hdr);
1410
1411 if (transhdrlen) {
1412 skb = sock_alloc_send_skb(sk,
1413 alloclen + hh_len,
1414 (flags & MSG_DONTWAIT), &err);
1415 } else {
1416 skb = NULL;
1417 if (atomic_read(&sk->sk_wmem_alloc) <=
1418 2 * sk->sk_sndbuf)
1419 skb = sock_wmalloc(sk,
1420 alloclen + hh_len, 1,
1421 sk->sk_allocation);
63159f29 1422 if (unlikely(!skb))
1da177e4
LT
1423 err = -ENOBUFS;
1424 }
63159f29 1425 if (!skb)
1da177e4
LT
1426 goto error;
1427 /*
1428 * Fill in the control structures
1429 */
9c9c9ad5 1430 skb->protocol = htons(ETH_P_IPV6);
32dce968 1431 skb->ip_summed = csummode;
1da177e4 1432 skb->csum = 0;
1f85851e
G
1433 /* reserve for fragmentation and ipsec header */
1434 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1435 dst_exthdrlen);
1da177e4 1436
11878b40
WB
1437 /* Only the initial fragment is time stamped */
1438 skb_shinfo(skb)->tx_flags = tx_flags;
1439 tx_flags = 0;
09c2d251
WB
1440 skb_shinfo(skb)->tskey = tskey;
1441 tskey = 0;
a693e698 1442
1da177e4
LT
1443 /*
1444 * Find where to start putting bytes
1445 */
1f85851e
G
1446 data = skb_put(skb, fraglen);
1447 skb_set_network_header(skb, exthdrlen);
1448 data += fragheaderlen;
b0e380b1
ACM
1449 skb->transport_header = (skb->network_header +
1450 fragheaderlen);
1da177e4
LT
1451 if (fraggap) {
1452 skb->csum = skb_copy_and_csum_bits(
1453 skb_prev, maxfraglen,
1454 data + transhdrlen, fraggap, 0);
1455 skb_prev->csum = csum_sub(skb_prev->csum,
1456 skb->csum);
1457 data += fraggap;
e9fa4f7b 1458 pskb_trim_unique(skb_prev, maxfraglen);
1da177e4
LT
1459 }
1460 copy = datalen - transhdrlen - fraggap;
299b0767 1461
1da177e4
LT
1462 if (copy < 0) {
1463 err = -EINVAL;
1464 kfree_skb(skb);
1465 goto error;
1466 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1467 err = -EFAULT;
1468 kfree_skb(skb);
1469 goto error;
1470 }
1471
1472 offset += copy;
1473 length -= datalen - fraggap;
1474 transhdrlen = 0;
1475 exthdrlen = 0;
299b0767 1476 dst_exthdrlen = 0;
1da177e4
LT
1477
1478 /*
1479 * Put the packet on the pending queue
1480 */
0bbe84a6 1481 __skb_queue_tail(queue, skb);
1da177e4
LT
1482 continue;
1483 }
1484
1485 if (copy > length)
1486 copy = length;
1487
d8d1f30b 1488 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1da177e4
LT
1489 unsigned int off;
1490
1491 off = skb->len;
1492 if (getfrag(from, skb_put(skb, copy),
1493 offset, copy, off, skb) < 0) {
1494 __skb_trim(skb, off);
1495 err = -EFAULT;
1496 goto error;
1497 }
1498 } else {
1499 int i = skb_shinfo(skb)->nr_frags;
1da177e4 1500
5640f768
ED
1501 err = -ENOMEM;
1502 if (!sk_page_frag_refill(sk, pfrag))
1da177e4 1503 goto error;
5640f768
ED
1504
1505 if (!skb_can_coalesce(skb, i, pfrag->page,
1506 pfrag->offset)) {
1507 err = -EMSGSIZE;
1508 if (i == MAX_SKB_FRAGS)
1509 goto error;
1510
1511 __skb_fill_page_desc(skb, i, pfrag->page,
1512 pfrag->offset, 0);
1513 skb_shinfo(skb)->nr_frags = ++i;
1514 get_page(pfrag->page);
1da177e4 1515 }
5640f768 1516 copy = min_t(int, copy, pfrag->size - pfrag->offset);
9e903e08 1517 if (getfrag(from,
5640f768
ED
1518 page_address(pfrag->page) + pfrag->offset,
1519 offset, copy, skb->len, skb) < 0)
1520 goto error_efault;
1521
1522 pfrag->offset += copy;
1523 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1da177e4
LT
1524 skb->len += copy;
1525 skb->data_len += copy;
f945fa7a
HX
1526 skb->truesize += copy;
1527 atomic_add(copy, &sk->sk_wmem_alloc);
1da177e4
LT
1528 }
1529 offset += copy;
1530 length -= copy;
1531 }
5640f768 1532
1da177e4 1533 return 0;
5640f768
ED
1534
1535error_efault:
1536 err = -EFAULT;
1da177e4 1537error:
bdc712b4 1538 cork->length -= length;
3bd653c8 1539 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1540 return err;
1541}
0bbe84a6
VY
1542
1543int ip6_append_data(struct sock *sk,
1544 int getfrag(void *from, char *to, int offset, int len,
1545 int odd, struct sk_buff *skb),
1546 void *from, int length, int transhdrlen, int hlimit,
1547 int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
1548 struct rt6_info *rt, unsigned int flags, int dontfrag)
1549{
1550 struct inet_sock *inet = inet_sk(sk);
1551 struct ipv6_pinfo *np = inet6_sk(sk);
1552 int exthdrlen;
1553 int err;
1554
1555 if (flags&MSG_PROBE)
1556 return 0;
1557 if (skb_queue_empty(&sk->sk_write_queue)) {
1558 /*
1559 * setup for corking
1560 */
1561 err = ip6_setup_cork(sk, &inet->cork, &np->cork, hlimit,
1562 tclass, opt, rt, fl6);
1563 if (err)
1564 return err;
1565
1566 exthdrlen = (opt ? opt->opt_flen : 0);
1567 length += exthdrlen;
1568 transhdrlen += exthdrlen;
1569 } else {
1570 fl6 = &inet->cork.fl.u.ip6;
1571 transhdrlen = 0;
1572 }
1573
1574 return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1575 &np->cork, sk_page_frag(sk), getfrag,
1576 from, length, transhdrlen, flags, dontfrag);
1577}
a495f836 1578EXPORT_SYMBOL_GPL(ip6_append_data);
1da177e4 1579
366e41d9
VY
1580static void ip6_cork_release(struct inet_cork_full *cork,
1581 struct inet6_cork *v6_cork)
bf138862 1582{
366e41d9
VY
1583 if (v6_cork->opt) {
1584 kfree(v6_cork->opt->dst0opt);
1585 kfree(v6_cork->opt->dst1opt);
1586 kfree(v6_cork->opt->hopopt);
1587 kfree(v6_cork->opt->srcrt);
1588 kfree(v6_cork->opt);
1589 v6_cork->opt = NULL;
0178b695
HX
1590 }
1591
366e41d9
VY
1592 if (cork->base.dst) {
1593 dst_release(cork->base.dst);
1594 cork->base.dst = NULL;
1595 cork->base.flags &= ~IPCORK_ALLFRAG;
bf138862 1596 }
366e41d9 1597 memset(&cork->fl, 0, sizeof(cork->fl));
bf138862
PE
1598}
1599
6422398c
VY
1600struct sk_buff *__ip6_make_skb(struct sock *sk,
1601 struct sk_buff_head *queue,
1602 struct inet_cork_full *cork,
1603 struct inet6_cork *v6_cork)
1da177e4
LT
1604{
1605 struct sk_buff *skb, *tmp_skb;
1606 struct sk_buff **tail_skb;
1607 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1da177e4 1608 struct ipv6_pinfo *np = inet6_sk(sk);
3bd653c8 1609 struct net *net = sock_net(sk);
1da177e4 1610 struct ipv6hdr *hdr;
6422398c
VY
1611 struct ipv6_txoptions *opt = v6_cork->opt;
1612 struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1613 struct flowi6 *fl6 = &cork->fl.u.ip6;
4c9483b2 1614 unsigned char proto = fl6->flowi6_proto;
1da177e4 1615
6422398c 1616 skb = __skb_dequeue(queue);
63159f29 1617 if (!skb)
1da177e4
LT
1618 goto out;
1619 tail_skb = &(skb_shinfo(skb)->frag_list);
1620
1621 /* move skb->data to ip header from ext header */
d56f90a7 1622 if (skb->data < skb_network_header(skb))
bbe735e4 1623 __skb_pull(skb, skb_network_offset(skb));
6422398c 1624 while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
cfe1fc77 1625 __skb_pull(tmp_skb, skb_network_header_len(skb));
1da177e4
LT
1626 *tail_skb = tmp_skb;
1627 tail_skb = &(tmp_skb->next);
1628 skb->len += tmp_skb->len;
1629 skb->data_len += tmp_skb->len;
1da177e4 1630 skb->truesize += tmp_skb->truesize;
1da177e4
LT
1631 tmp_skb->destructor = NULL;
1632 tmp_skb->sk = NULL;
1da177e4
LT
1633 }
1634
28a89453 1635 /* Allow local fragmentation. */
60ff7467 1636 skb->ignore_df = ip6_sk_ignore_df(sk);
28a89453 1637
4e3fd7a0 1638 *final_dst = fl6->daddr;
cfe1fc77 1639 __skb_pull(skb, skb_network_header_len(skb));
1da177e4
LT
1640 if (opt && opt->opt_flen)
1641 ipv6_push_frag_opts(skb, opt, &proto);
1642 if (opt && opt->opt_nflen)
1643 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1644
e2d1bca7
ACM
1645 skb_push(skb, sizeof(struct ipv6hdr));
1646 skb_reset_network_header(skb);
0660e03f 1647 hdr = ipv6_hdr(skb);
1ab1457c 1648
6422398c 1649 ip6_flow_hdr(hdr, v6_cork->tclass,
cb1ce2ef 1650 ip6_make_flowlabel(net, skb, fl6->flowlabel,
67800f9b 1651 np->autoflowlabel, fl6));
6422398c 1652 hdr->hop_limit = v6_cork->hop_limit;
1da177e4 1653 hdr->nexthdr = proto;
4e3fd7a0
AD
1654 hdr->saddr = fl6->saddr;
1655 hdr->daddr = *final_dst;
1da177e4 1656
a2c2064f 1657 skb->priority = sk->sk_priority;
4a19ec58 1658 skb->mark = sk->sk_mark;
a2c2064f 1659
d8d1f30b 1660 skb_dst_set(skb, dst_clone(&rt->dst));
edf391ff 1661 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
14878f75 1662 if (proto == IPPROTO_ICMPV6) {
adf30907 1663 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
14878f75 1664
43a43b60
HFS
1665 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1666 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
14878f75
DS
1667 }
1668
6422398c
VY
1669 ip6_cork_release(cork, v6_cork);
1670out:
1671 return skb;
1672}
1673
1674int ip6_send_skb(struct sk_buff *skb)
1675{
1676 struct net *net = sock_net(skb->sk);
1677 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1678 int err;
1679
ef76bc23 1680 err = ip6_local_out(skb);
1da177e4
LT
1681 if (err) {
1682 if (err > 0)
6ce9e7b5 1683 err = net_xmit_errno(err);
1da177e4 1684 if (err)
6422398c
VY
1685 IP6_INC_STATS(net, rt->rt6i_idev,
1686 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1687 }
1688
1da177e4 1689 return err;
6422398c
VY
1690}
1691
1692int ip6_push_pending_frames(struct sock *sk)
1693{
1694 struct sk_buff *skb;
1695
1696 skb = ip6_finish_skb(sk);
1697 if (!skb)
1698 return 0;
1699
1700 return ip6_send_skb(skb);
1da177e4 1701}
a495f836 1702EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1da177e4 1703
0bbe84a6 1704static void __ip6_flush_pending_frames(struct sock *sk,
6422398c
VY
1705 struct sk_buff_head *queue,
1706 struct inet_cork_full *cork,
1707 struct inet6_cork *v6_cork)
1da177e4 1708{
1da177e4
LT
1709 struct sk_buff *skb;
1710
0bbe84a6 1711 while ((skb = __skb_dequeue_tail(queue)) != NULL) {
adf30907
ED
1712 if (skb_dst(skb))
1713 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
e1f52208 1714 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1715 kfree_skb(skb);
1716 }
1717
6422398c 1718 ip6_cork_release(cork, v6_cork);
1da177e4 1719}
0bbe84a6
VY
1720
1721void ip6_flush_pending_frames(struct sock *sk)
1722{
6422398c
VY
1723 __ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1724 &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
0bbe84a6 1725}
a495f836 1726EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
6422398c
VY
1727
1728struct sk_buff *ip6_make_skb(struct sock *sk,
1729 int getfrag(void *from, char *to, int offset,
1730 int len, int odd, struct sk_buff *skb),
1731 void *from, int length, int transhdrlen,
1732 int hlimit, int tclass,
1733 struct ipv6_txoptions *opt, struct flowi6 *fl6,
1734 struct rt6_info *rt, unsigned int flags,
1735 int dontfrag)
1736{
1737 struct inet_cork_full cork;
1738 struct inet6_cork v6_cork;
1739 struct sk_buff_head queue;
1740 int exthdrlen = (opt ? opt->opt_flen : 0);
1741 int err;
1742
1743 if (flags & MSG_PROBE)
1744 return NULL;
1745
1746 __skb_queue_head_init(&queue);
1747
1748 cork.base.flags = 0;
1749 cork.base.addr = 0;
1750 cork.base.opt = NULL;
1751 v6_cork.opt = NULL;
1752 err = ip6_setup_cork(sk, &cork, &v6_cork, hlimit, tclass, opt, rt, fl6);
1753 if (err)
1754 return ERR_PTR(err);
1755
1756 if (dontfrag < 0)
1757 dontfrag = inet6_sk(sk)->dontfrag;
1758
1759 err = __ip6_append_data(sk, fl6, &queue, &cork.base, &v6_cork,
1760 &current->task_frag, getfrag, from,
1761 length + exthdrlen, transhdrlen + exthdrlen,
1762 flags, dontfrag);
1763 if (err) {
1764 __ip6_flush_pending_frames(sk, &queue, &cork, &v6_cork);
1765 return ERR_PTR(err);
1766 }
1767
1768 return __ip6_make_skb(sk, &queue, &cork, &v6_cork);
1769}