]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - net/ipv6/ip6_output.c
ipv6: move ip6_local_out into core kernel
[mirror_ubuntu-artful-kernel.git] / net / ipv6 / ip6_output.c
CommitLineData
1da177e4
LT
1/*
2 * IPv6 output functions
1ab1457c 3 * Linux INET6 implementation
1da177e4
LT
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * Based on linux/net/ipv4/ip_output.c
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 *
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
21 *
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
27 */
28
1da177e4 29#include <linux/errno.h>
ef76bc23 30#include <linux/kernel.h>
1da177e4
LT
31#include <linux/string.h>
32#include <linux/socket.h>
33#include <linux/net.h>
34#include <linux/netdevice.h>
35#include <linux/if_arp.h>
36#include <linux/in6.h>
37#include <linux/tcp.h>
38#include <linux/route.h>
b59f45d0 39#include <linux/module.h>
5a0e3ad6 40#include <linux/slab.h>
1da177e4
LT
41
42#include <linux/netfilter.h>
43#include <linux/netfilter_ipv6.h>
44
45#include <net/sock.h>
46#include <net/snmp.h>
47
48#include <net/ipv6.h>
49#include <net/ndisc.h>
50#include <net/protocol.h>
51#include <net/ip6_route.h>
52#include <net/addrconf.h>
53#include <net/rawv6.h>
54#include <net/icmp.h>
55#include <net/xfrm.h>
56#include <net/checksum.h>
7bc570c8 57#include <linux/mroute6.h>
1da177e4 58
9e508490 59static int ip6_finish_output2(struct sk_buff *skb)
1da177e4 60{
adf30907 61 struct dst_entry *dst = skb_dst(skb);
1da177e4 62 struct net_device *dev = dst->dev;
f6b72b62 63 struct neighbour *neigh;
6fd6ce20
YH
64 struct in6_addr *nexthop;
65 int ret;
1da177e4
LT
66
67 skb->protocol = htons(ETH_P_IPV6);
68 skb->dev = dev;
69
0660e03f 70 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
adf30907 71 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1da177e4 72
7ad6848c 73 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
d1db275d 74 ((mroute6_socket(dev_net(dev), skb) &&
bd91b8bf 75 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
7bc570c8
YH
76 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
77 &ipv6_hdr(skb)->saddr))) {
1da177e4
LT
78 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
79
80 /* Do not check for IFF_ALLMULTI; multicast routing
81 is not supported in any case.
82 */
83 if (newskb)
b2e0b385
JE
84 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
85 newskb, NULL, newskb->dev,
95603e22 86 dev_loopback_xmit);
1da177e4 87
0660e03f 88 if (ipv6_hdr(skb)->hop_limit == 0) {
3bd653c8
DL
89 IP6_INC_STATS(dev_net(dev), idev,
90 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
91 kfree_skb(skb);
92 return 0;
93 }
94 }
95
edf391ff
NH
96 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
97 skb->len);
dd408515
HFS
98
99 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
100 IPV6_ADDR_SCOPE_NODELOCAL &&
101 !(dev->flags & IFF_LOOPBACK)) {
102 kfree_skb(skb);
103 return 0;
104 }
1da177e4
LT
105 }
106
6fd6ce20
YH
107 rcu_read_lock_bh();
108 nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
109 neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
110 if (unlikely(!neigh))
111 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
112 if (!IS_ERR(neigh)) {
113 ret = dst_neigh_output(dst, neigh, skb);
114 rcu_read_unlock_bh();
115 return ret;
116 }
117 rcu_read_unlock_bh();
05e3aa09 118
9e508490
JE
119 IP6_INC_STATS_BH(dev_net(dst->dev),
120 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
121 kfree_skb(skb);
122 return -EINVAL;
1da177e4
LT
123}
124
9e508490
JE
125static int ip6_finish_output(struct sk_buff *skb)
126{
127 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
128 dst_allfrag(skb_dst(skb)))
129 return ip6_fragment(skb, ip6_finish_output2);
130 else
131 return ip6_finish_output2(skb);
132}
133
1da177e4
LT
134int ip6_output(struct sk_buff *skb)
135{
9e508490 136 struct net_device *dev = skb_dst(skb)->dev;
adf30907 137 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
778d80be 138 if (unlikely(idev->cnf.disable_ipv6)) {
9e508490 139 IP6_INC_STATS(dev_net(dev), idev,
3bd653c8 140 IPSTATS_MIB_OUTDISCARDS);
778d80be
YH
141 kfree_skb(skb);
142 return 0;
143 }
144
9c6eb28a
JE
145 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
146 ip6_finish_output,
147 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
1da177e4
LT
148}
149
1da177e4 150/*
b5d43998 151 * xmit an sk_buff (used by TCP, SCTP and DCCP)
1da177e4
LT
152 */
153
4c9483b2 154int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
b903d324 155 struct ipv6_txoptions *opt, int tclass)
1da177e4 156{
3bd653c8 157 struct net *net = sock_net(sk);
b30bd282 158 struct ipv6_pinfo *np = inet6_sk(sk);
4c9483b2 159 struct in6_addr *first_hop = &fl6->daddr;
adf30907 160 struct dst_entry *dst = skb_dst(skb);
1da177e4 161 struct ipv6hdr *hdr;
4c9483b2 162 u8 proto = fl6->flowi6_proto;
1da177e4 163 int seg_len = skb->len;
e651f03a 164 int hlimit = -1;
1da177e4
LT
165 u32 mtu;
166
167 if (opt) {
c2636b4d 168 unsigned int head_room;
1da177e4
LT
169
170 /* First: exthdrs may take lots of space (~8K for now)
171 MAX_HEADER is not enough.
172 */
173 head_room = opt->opt_nflen + opt->opt_flen;
174 seg_len += head_room;
175 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
176
177 if (skb_headroom(skb) < head_room) {
178 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
a11d206d 179 if (skb2 == NULL) {
adf30907 180 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d
YH
181 IPSTATS_MIB_OUTDISCARDS);
182 kfree_skb(skb);
1da177e4
LT
183 return -ENOBUFS;
184 }
808db80a 185 consume_skb(skb);
a11d206d 186 skb = skb2;
83d7eb29 187 skb_set_owner_w(skb, sk);
1da177e4
LT
188 }
189 if (opt->opt_flen)
190 ipv6_push_frag_opts(skb, opt, &proto);
191 if (opt->opt_nflen)
192 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
193 }
194
e2d1bca7
ACM
195 skb_push(skb, sizeof(struct ipv6hdr));
196 skb_reset_network_header(skb);
0660e03f 197 hdr = ipv6_hdr(skb);
1da177e4
LT
198
199 /*
200 * Fill in the IPv6 header
201 */
b903d324 202 if (np)
1da177e4
LT
203 hlimit = np->hop_limit;
204 if (hlimit < 0)
6b75d090 205 hlimit = ip6_dst_hoplimit(dst);
1da177e4 206
3e4e4c1f 207 ip6_flow_hdr(hdr, tclass, fl6->flowlabel);
41a1f8ea 208
1da177e4
LT
209 hdr->payload_len = htons(seg_len);
210 hdr->nexthdr = proto;
211 hdr->hop_limit = hlimit;
212
4e3fd7a0
AD
213 hdr->saddr = fl6->saddr;
214 hdr->daddr = *first_hop;
1da177e4 215
a2c2064f 216 skb->priority = sk->sk_priority;
4a19ec58 217 skb->mark = sk->sk_mark;
a2c2064f 218
1da177e4 219 mtu = dst_mtu(dst);
283d07ac 220 if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
adf30907 221 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
edf391ff 222 IPSTATS_MIB_OUT, skb->len);
b2e0b385
JE
223 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
224 dst->dev, dst_output);
1da177e4
LT
225 }
226
1da177e4 227 skb->dev = dst->dev;
f4e53e29 228 ipv6_local_error(sk, EMSGSIZE, fl6, mtu);
adf30907 229 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
230 kfree_skb(skb);
231 return -EMSGSIZE;
232}
233
7159039a
YH
234EXPORT_SYMBOL(ip6_xmit);
235
1da177e4
LT
236static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
237{
238 struct ip6_ra_chain *ra;
239 struct sock *last = NULL;
240
241 read_lock(&ip6_ra_lock);
242 for (ra = ip6_ra_chain; ra; ra = ra->next) {
243 struct sock *sk = ra->sk;
0bd1b59b
AM
244 if (sk && ra->sel == sel &&
245 (!sk->sk_bound_dev_if ||
246 sk->sk_bound_dev_if == skb->dev->ifindex)) {
1da177e4
LT
247 if (last) {
248 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
249 if (skb2)
250 rawv6_rcv(last, skb2);
251 }
252 last = sk;
253 }
254 }
255
256 if (last) {
257 rawv6_rcv(last, skb);
258 read_unlock(&ip6_ra_lock);
259 return 1;
260 }
261 read_unlock(&ip6_ra_lock);
262 return 0;
263}
264
e21e0b5f
VN
265static int ip6_forward_proxy_check(struct sk_buff *skb)
266{
0660e03f 267 struct ipv6hdr *hdr = ipv6_hdr(skb);
e21e0b5f 268 u8 nexthdr = hdr->nexthdr;
75f2811c 269 __be16 frag_off;
e21e0b5f
VN
270 int offset;
271
272 if (ipv6_ext_hdr(nexthdr)) {
75f2811c 273 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
e21e0b5f
VN
274 if (offset < 0)
275 return 0;
276 } else
277 offset = sizeof(struct ipv6hdr);
278
279 if (nexthdr == IPPROTO_ICMPV6) {
280 struct icmp6hdr *icmp6;
281
d56f90a7
ACM
282 if (!pskb_may_pull(skb, (skb_network_header(skb) +
283 offset + 1 - skb->data)))
e21e0b5f
VN
284 return 0;
285
d56f90a7 286 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
e21e0b5f
VN
287
288 switch (icmp6->icmp6_type) {
289 case NDISC_ROUTER_SOLICITATION:
290 case NDISC_ROUTER_ADVERTISEMENT:
291 case NDISC_NEIGHBOUR_SOLICITATION:
292 case NDISC_NEIGHBOUR_ADVERTISEMENT:
293 case NDISC_REDIRECT:
294 /* For reaction involving unicast neighbor discovery
295 * message destined to the proxied address, pass it to
296 * input function.
297 */
298 return 1;
299 default:
300 break;
301 }
302 }
303
74553b09
VN
304 /*
305 * The proxying router can't forward traffic sent to a link-local
306 * address, so signal the sender and discard the packet. This
307 * behavior is clarified by the MIPv6 specification.
308 */
309 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
310 dst_link_failure(skb);
311 return -1;
312 }
313
e21e0b5f
VN
314 return 0;
315}
316
1da177e4
LT
317static inline int ip6_forward_finish(struct sk_buff *skb)
318{
319 return dst_output(skb);
320}
321
322int ip6_forward(struct sk_buff *skb)
323{
adf30907 324 struct dst_entry *dst = skb_dst(skb);
0660e03f 325 struct ipv6hdr *hdr = ipv6_hdr(skb);
1da177e4 326 struct inet6_skb_parm *opt = IP6CB(skb);
c346dca1 327 struct net *net = dev_net(dst->dev);
14f3ad6f 328 u32 mtu;
1ab1457c 329
53b7997f 330 if (net->ipv6.devconf_all->forwarding == 0)
1da177e4
LT
331 goto error;
332
4497b076
BH
333 if (skb_warn_if_lro(skb))
334 goto drop;
335
1da177e4 336 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
3bd653c8 337 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
1da177e4
LT
338 goto drop;
339 }
340
72b43d08
AK
341 if (skb->pkt_type != PACKET_HOST)
342 goto drop;
343
35fc92a9 344 skb_forward_csum(skb);
1da177e4
LT
345
346 /*
347 * We DO NOT make any processing on
348 * RA packets, pushing them to user level AS IS
349 * without ane WARRANTY that application will be able
350 * to interpret them. The reason is that we
351 * cannot make anything clever here.
352 *
353 * We are not end-node, so that if packet contains
354 * AH/ESP, we cannot make anything.
355 * Defragmentation also would be mistake, RA packets
356 * cannot be fragmented, because there is no warranty
357 * that different fragments will go along one path. --ANK
358 */
ab4eb353
YH
359 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
360 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
1da177e4
LT
361 return 0;
362 }
363
364 /*
365 * check and decrement ttl
366 */
367 if (hdr->hop_limit <= 1) {
368 /* Force OUTPUT device used as source address */
369 skb->dev = dst->dev;
3ffe533c 370 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
483a47d2
DL
371 IP6_INC_STATS_BH(net,
372 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
1da177e4
LT
373
374 kfree_skb(skb);
375 return -ETIMEDOUT;
376 }
377
fbea49e1 378 /* XXX: idev->cnf.proxy_ndp? */
53b7997f 379 if (net->ipv6.devconf_all->proxy_ndp &&
8a3edd80 380 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
74553b09
VN
381 int proxied = ip6_forward_proxy_check(skb);
382 if (proxied > 0)
e21e0b5f 383 return ip6_input(skb);
74553b09 384 else if (proxied < 0) {
3bd653c8
DL
385 IP6_INC_STATS(net, ip6_dst_idev(dst),
386 IPSTATS_MIB_INDISCARDS);
74553b09
VN
387 goto drop;
388 }
e21e0b5f
VN
389 }
390
1da177e4 391 if (!xfrm6_route_forward(skb)) {
3bd653c8 392 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
1da177e4
LT
393 goto drop;
394 }
adf30907 395 dst = skb_dst(skb);
1da177e4
LT
396
397 /* IPv6 specs say nothing about it, but it is clear that we cannot
398 send redirects to source routed frames.
1e5dc146 399 We don't send redirects to frames decapsulated from IPsec.
1da177e4 400 */
c45a3dfb 401 if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
1da177e4 402 struct in6_addr *target = NULL;
fbfe95a4 403 struct inet_peer *peer;
1da177e4 404 struct rt6_info *rt;
1da177e4
LT
405
406 /*
407 * incoming and outgoing devices are the same
408 * send a redirect.
409 */
410
411 rt = (struct rt6_info *) dst;
c45a3dfb
DM
412 if (rt->rt6i_flags & RTF_GATEWAY)
413 target = &rt->rt6i_gateway;
1da177e4
LT
414 else
415 target = &hdr->daddr;
416
1d861aa4 417 peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
92d86829 418
1da177e4
LT
419 /* Limit redirects both by destination (here)
420 and by source (inside ndisc_send_redirect)
421 */
fbfe95a4 422 if (inet_peer_xrlim_allow(peer, 1*HZ))
4991969a 423 ndisc_send_redirect(skb, target);
1d861aa4
DM
424 if (peer)
425 inet_putpeer(peer);
5bb1ab09
DS
426 } else {
427 int addrtype = ipv6_addr_type(&hdr->saddr);
428
1da177e4 429 /* This check is security critical. */
f81b2e7d
YH
430 if (addrtype == IPV6_ADDR_ANY ||
431 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
5bb1ab09
DS
432 goto error;
433 if (addrtype & IPV6_ADDR_LINKLOCAL) {
434 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
3ffe533c 435 ICMPV6_NOT_NEIGHBOUR, 0);
5bb1ab09
DS
436 goto error;
437 }
1da177e4
LT
438 }
439
14f3ad6f
UW
440 mtu = dst_mtu(dst);
441 if (mtu < IPV6_MIN_MTU)
442 mtu = IPV6_MIN_MTU;
443
4cdd3408
PM
444 if ((!skb->local_df && skb->len > mtu && !skb_is_gso(skb)) ||
445 (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)) {
1da177e4
LT
446 /* Again, force OUTPUT device used as source address */
447 skb->dev = dst->dev;
14f3ad6f 448 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
483a47d2
DL
449 IP6_INC_STATS_BH(net,
450 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
451 IP6_INC_STATS_BH(net,
452 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
453 kfree_skb(skb);
454 return -EMSGSIZE;
455 }
456
457 if (skb_cow(skb, dst->dev->hard_header_len)) {
3bd653c8 458 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
459 goto drop;
460 }
461
0660e03f 462 hdr = ipv6_hdr(skb);
1da177e4
LT
463
464 /* Mangling hops number delayed to point after skb COW */
1ab1457c 465
1da177e4
LT
466 hdr->hop_limit--;
467
483a47d2 468 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
2d8dbb04 469 IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
b2e0b385 470 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
6e23ae2a 471 ip6_forward_finish);
1da177e4
LT
472
473error:
483a47d2 474 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
1da177e4
LT
475drop:
476 kfree_skb(skb);
477 return -EINVAL;
478}
479
480static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
481{
482 to->pkt_type = from->pkt_type;
483 to->priority = from->priority;
484 to->protocol = from->protocol;
adf30907
ED
485 skb_dst_drop(to);
486 skb_dst_set(to, dst_clone(skb_dst(from)));
1da177e4 487 to->dev = from->dev;
82e91ffe 488 to->mark = from->mark;
1da177e4
LT
489
490#ifdef CONFIG_NET_SCHED
491 to->tc_index = from->tc_index;
492#endif
e7ac05f3 493 nf_copy(to, from);
07a93626 494#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
ba9dda3a
JK
495 to->nf_trace = from->nf_trace;
496#endif
984bc16c 497 skb_copy_secmark(to, from);
1da177e4
LT
498}
499
ad0081e4 500int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
1da177e4 501{
1da177e4 502 struct sk_buff *frag;
adf30907 503 struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
d91675f9 504 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
1da177e4
LT
505 struct ipv6hdr *tmp_hdr;
506 struct frag_hdr *fh;
507 unsigned int mtu, hlen, left, len;
a7ae1992 508 int hroom, troom;
ae08e1f0 509 __be32 frag_id = 0;
1da177e4
LT
510 int ptr, offset = 0, err=0;
511 u8 *prevhdr, nexthdr = 0;
adf30907 512 struct net *net = dev_net(skb_dst(skb)->dev);
1da177e4 513
1da177e4
LT
514 hlen = ip6_find_1stfragopt(skb, &prevhdr);
515 nexthdr = *prevhdr;
516
628a5c56 517 mtu = ip6_skb_dst_mtu(skb);
b881ef76
JH
518
519 /* We must not fragment if the socket is set to force MTU discovery
14f3ad6f 520 * or if the skb it not generated by a local socket.
b881ef76 521 */
4cdd3408
PM
522 if (unlikely(!skb->local_df && skb->len > mtu) ||
523 (IP6CB(skb)->frag_max_size &&
524 IP6CB(skb)->frag_max_size > mtu)) {
a34a101e
ED
525 if (skb->sk && dst_allfrag(skb_dst(skb)))
526 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
527
adf30907 528 skb->dev = skb_dst(skb)->dev;
3ffe533c 529 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
adf30907 530 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 531 IPSTATS_MIB_FRAGFAILS);
b881ef76
JH
532 kfree_skb(skb);
533 return -EMSGSIZE;
534 }
535
d91675f9
YH
536 if (np && np->frag_size < mtu) {
537 if (np->frag_size)
538 mtu = np->frag_size;
539 }
540 mtu -= hlen + sizeof(struct frag_hdr);
1da177e4 541
21dc3301 542 if (skb_has_frag_list(skb)) {
1da177e4 543 int first_len = skb_pagelen(skb);
3d13008e 544 struct sk_buff *frag2;
1da177e4
LT
545
546 if (first_len - hlen > mtu ||
547 ((first_len - hlen) & 7) ||
548 skb_cloned(skb))
549 goto slow_path;
550
4d9092bb 551 skb_walk_frags(skb, frag) {
1da177e4
LT
552 /* Correct geometry. */
553 if (frag->len > mtu ||
554 ((frag->len & 7) && frag->next) ||
555 skb_headroom(frag) < hlen)
3d13008e 556 goto slow_path_clean;
1da177e4 557
1da177e4
LT
558 /* Partially cloned skb? */
559 if (skb_shared(frag))
3d13008e 560 goto slow_path_clean;
2fdba6b0
HX
561
562 BUG_ON(frag->sk);
563 if (skb->sk) {
2fdba6b0
HX
564 frag->sk = skb->sk;
565 frag->destructor = sock_wfree;
2fdba6b0 566 }
3d13008e 567 skb->truesize -= frag->truesize;
1da177e4
LT
568 }
569
570 err = 0;
571 offset = 0;
572 frag = skb_shinfo(skb)->frag_list;
4d9092bb 573 skb_frag_list_init(skb);
1da177e4
LT
574 /* BUILD HEADER */
575
9a217a1c 576 *prevhdr = NEXTHDR_FRAGMENT;
d56f90a7 577 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
1da177e4 578 if (!tmp_hdr) {
adf30907 579 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 580 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
581 return -ENOMEM;
582 }
583
1da177e4
LT
584 __skb_pull(skb, hlen);
585 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
e2d1bca7
ACM
586 __skb_push(skb, hlen);
587 skb_reset_network_header(skb);
d56f90a7 588 memcpy(skb_network_header(skb), tmp_hdr, hlen);
1da177e4 589
87c48fa3 590 ipv6_select_ident(fh, rt);
1da177e4
LT
591 fh->nexthdr = nexthdr;
592 fh->reserved = 0;
593 fh->frag_off = htons(IP6_MF);
594 frag_id = fh->identification;
595
596 first_len = skb_pagelen(skb);
597 skb->data_len = first_len - skb_headlen(skb);
598 skb->len = first_len;
0660e03f
ACM
599 ipv6_hdr(skb)->payload_len = htons(first_len -
600 sizeof(struct ipv6hdr));
a11d206d 601
d8d1f30b 602 dst_hold(&rt->dst);
1da177e4
LT
603
604 for (;;) {
605 /* Prepare header of the next frame,
606 * before previous one went down. */
607 if (frag) {
608 frag->ip_summed = CHECKSUM_NONE;
badff6d0 609 skb_reset_transport_header(frag);
1da177e4 610 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
e2d1bca7
ACM
611 __skb_push(frag, hlen);
612 skb_reset_network_header(frag);
d56f90a7
ACM
613 memcpy(skb_network_header(frag), tmp_hdr,
614 hlen);
1da177e4
LT
615 offset += skb->len - hlen - sizeof(struct frag_hdr);
616 fh->nexthdr = nexthdr;
617 fh->reserved = 0;
618 fh->frag_off = htons(offset);
619 if (frag->next != NULL)
620 fh->frag_off |= htons(IP6_MF);
621 fh->identification = frag_id;
0660e03f
ACM
622 ipv6_hdr(frag)->payload_len =
623 htons(frag->len -
624 sizeof(struct ipv6hdr));
1da177e4
LT
625 ip6_copy_metadata(frag, skb);
626 }
1ab1457c 627
1da177e4 628 err = output(skb);
dafee490 629 if(!err)
d8d1f30b 630 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 631 IPSTATS_MIB_FRAGCREATES);
dafee490 632
1da177e4
LT
633 if (err || !frag)
634 break;
635
636 skb = frag;
637 frag = skb->next;
638 skb->next = NULL;
639 }
640
a51482bd 641 kfree(tmp_hdr);
1da177e4
LT
642
643 if (err == 0) {
d8d1f30b 644 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 645 IPSTATS_MIB_FRAGOKS);
94e187c0 646 ip6_rt_put(rt);
1da177e4
LT
647 return 0;
648 }
649
650 while (frag) {
651 skb = frag->next;
652 kfree_skb(frag);
653 frag = skb;
654 }
655
d8d1f30b 656 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 657 IPSTATS_MIB_FRAGFAILS);
94e187c0 658 ip6_rt_put(rt);
1da177e4 659 return err;
3d13008e
ED
660
661slow_path_clean:
662 skb_walk_frags(skb, frag2) {
663 if (frag2 == frag)
664 break;
665 frag2->sk = NULL;
666 frag2->destructor = NULL;
667 skb->truesize += frag2->truesize;
668 }
1da177e4
LT
669 }
670
671slow_path:
72e843bb
ED
672 if ((skb->ip_summed == CHECKSUM_PARTIAL) &&
673 skb_checksum_help(skb))
674 goto fail;
675
1da177e4
LT
676 left = skb->len - hlen; /* Space per frame */
677 ptr = hlen; /* Where to start from */
678
679 /*
680 * Fragment the datagram.
681 */
682
683 *prevhdr = NEXTHDR_FRAGMENT;
a7ae1992
HX
684 hroom = LL_RESERVED_SPACE(rt->dst.dev);
685 troom = rt->dst.dev->needed_tailroom;
1da177e4
LT
686
687 /*
688 * Keep copying data until we run out.
689 */
690 while(left > 0) {
691 len = left;
692 /* IF: it doesn't fit, use 'mtu' - the data space left */
693 if (len > mtu)
694 len = mtu;
25985edc 695 /* IF: we are not sending up to and including the packet end
1da177e4
LT
696 then align the next start on an eight byte boundary */
697 if (len < left) {
698 len &= ~7;
699 }
700 /*
701 * Allocate buffer.
702 */
703
a7ae1992
HX
704 if ((frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
705 hroom + troom, GFP_ATOMIC)) == NULL) {
64ce2073 706 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
adf30907 707 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 708 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
709 err = -ENOMEM;
710 goto fail;
711 }
712
713 /*
714 * Set up data on packet
715 */
716
717 ip6_copy_metadata(frag, skb);
a7ae1992 718 skb_reserve(frag, hroom);
1da177e4 719 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
c1d2bbe1 720 skb_reset_network_header(frag);
badff6d0 721 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
b0e380b1
ACM
722 frag->transport_header = (frag->network_header + hlen +
723 sizeof(struct frag_hdr));
1da177e4
LT
724
725 /*
726 * Charge the memory for the fragment to any owner
727 * it might possess
728 */
729 if (skb->sk)
730 skb_set_owner_w(frag, skb->sk);
731
732 /*
733 * Copy the packet header into the new buffer.
734 */
d626f62b 735 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
1da177e4
LT
736
737 /*
738 * Build fragment header.
739 */
740 fh->nexthdr = nexthdr;
741 fh->reserved = 0;
f36d6ab1 742 if (!frag_id) {
87c48fa3 743 ipv6_select_ident(fh, rt);
1da177e4
LT
744 frag_id = fh->identification;
745 } else
746 fh->identification = frag_id;
747
748 /*
749 * Copy a block of the IP datagram.
750 */
8984e41d 751 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
1da177e4
LT
752 BUG();
753 left -= len;
754
755 fh->frag_off = htons(offset);
756 if (left > 0)
757 fh->frag_off |= htons(IP6_MF);
0660e03f
ACM
758 ipv6_hdr(frag)->payload_len = htons(frag->len -
759 sizeof(struct ipv6hdr));
1da177e4
LT
760
761 ptr += len;
762 offset += len;
763
764 /*
765 * Put this fragment into the sending queue.
766 */
1da177e4
LT
767 err = output(frag);
768 if (err)
769 goto fail;
dafee490 770
adf30907 771 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 772 IPSTATS_MIB_FRAGCREATES);
1da177e4 773 }
adf30907 774 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 775 IPSTATS_MIB_FRAGOKS);
808db80a 776 consume_skb(skb);
1da177e4
LT
777 return err;
778
779fail:
adf30907 780 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 781 IPSTATS_MIB_FRAGFAILS);
1ab1457c 782 kfree_skb(skb);
1da177e4
LT
783 return err;
784}
785
b71d1d42
ED
786static inline int ip6_rt_check(const struct rt6key *rt_key,
787 const struct in6_addr *fl_addr,
788 const struct in6_addr *addr_cache)
cf6b1982 789{
a02cec21
ED
790 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
791 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache));
cf6b1982
YH
792}
793
497c615a
HX
794static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
795 struct dst_entry *dst,
b71d1d42 796 const struct flowi6 *fl6)
1da177e4 797{
497c615a 798 struct ipv6_pinfo *np = inet6_sk(sk);
a963a37d 799 struct rt6_info *rt;
1da177e4 800
497c615a
HX
801 if (!dst)
802 goto out;
803
a963a37d
ED
804 if (dst->ops->family != AF_INET6) {
805 dst_release(dst);
806 return NULL;
807 }
808
809 rt = (struct rt6_info *)dst;
497c615a
HX
810 /* Yes, checking route validity in not connected
811 * case is not very simple. Take into account,
812 * that we do not support routing by source, TOS,
813 * and MSG_DONTROUTE --ANK (980726)
814 *
cf6b1982
YH
815 * 1. ip6_rt_check(): If route was host route,
816 * check that cached destination is current.
497c615a
HX
817 * If it is network route, we still may
818 * check its validity using saved pointer
819 * to the last used address: daddr_cache.
820 * We do not want to save whole address now,
821 * (because main consumer of this service
822 * is tcp, which has not this problem),
823 * so that the last trick works only on connected
824 * sockets.
825 * 2. oif also should be the same.
826 */
4c9483b2 827 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
8e1ef0a9 828#ifdef CONFIG_IPV6_SUBTREES
4c9483b2 829 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
8e1ef0a9 830#endif
4c9483b2 831 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
497c615a
HX
832 dst_release(dst);
833 dst = NULL;
1da177e4
LT
834 }
835
497c615a
HX
836out:
837 return dst;
838}
839
840static int ip6_dst_lookup_tail(struct sock *sk,
4c9483b2 841 struct dst_entry **dst, struct flowi6 *fl6)
497c615a 842{
3b1e0a65 843 struct net *net = sock_net(sk);
69cce1d1
DM
844#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
845 struct neighbour *n;
97cac082 846 struct rt6_info *rt;
69cce1d1
DM
847#endif
848 int err;
497c615a 849
1da177e4 850 if (*dst == NULL)
4c9483b2 851 *dst = ip6_route_output(net, sk, fl6);
1da177e4
LT
852
853 if ((err = (*dst)->error))
854 goto out_err_release;
855
4c9483b2 856 if (ipv6_addr_any(&fl6->saddr)) {
c3968a85
DW
857 struct rt6_info *rt = (struct rt6_info *) *dst;
858 err = ip6_route_get_saddr(net, rt, &fl6->daddr,
859 sk ? inet6_sk(sk)->srcprefs : 0,
860 &fl6->saddr);
44456d37 861 if (err)
1da177e4 862 goto out_err_release;
1da177e4
LT
863 }
864
95c385b4 865#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
e550dfb0
NH
866 /*
867 * Here if the dst entry we've looked up
868 * has a neighbour entry that is in the INCOMPLETE
869 * state and the src address from the flow is
870 * marked as OPTIMISTIC, we release the found
871 * dst entry and replace it instead with the
872 * dst entry of the nexthop router
873 */
c56bf6fe 874 rt = (struct rt6_info *) *dst;
707be1ff
YH
875 rcu_read_lock_bh();
876 n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt, &fl6->daddr));
877 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
878 rcu_read_unlock_bh();
879
880 if (err) {
e550dfb0 881 struct inet6_ifaddr *ifp;
4c9483b2 882 struct flowi6 fl_gw6;
e550dfb0
NH
883 int redirect;
884
4c9483b2 885 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
e550dfb0
NH
886 (*dst)->dev, 1);
887
888 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
889 if (ifp)
890 in6_ifa_put(ifp);
891
892 if (redirect) {
893 /*
894 * We need to get the dst entry for the
895 * default router instead
896 */
897 dst_release(*dst);
4c9483b2
DM
898 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
899 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
900 *dst = ip6_route_output(net, sk, &fl_gw6);
e550dfb0
NH
901 if ((err = (*dst)->error))
902 goto out_err_release;
95c385b4 903 }
e550dfb0 904 }
95c385b4
NH
905#endif
906
1da177e4
LT
907 return 0;
908
909out_err_release:
ca46f9c8 910 if (err == -ENETUNREACH)
483a47d2 911 IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
912 dst_release(*dst);
913 *dst = NULL;
914 return err;
915}
34a0b3cd 916
497c615a
HX
917/**
918 * ip6_dst_lookup - perform route lookup on flow
919 * @sk: socket which provides route info
920 * @dst: pointer to dst_entry * for result
4c9483b2 921 * @fl6: flow to lookup
497c615a
HX
922 *
923 * This function performs a route lookup on the given flow.
924 *
925 * It returns zero on success, or a standard errno code on error.
926 */
4c9483b2 927int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6)
497c615a
HX
928{
929 *dst = NULL;
4c9483b2 930 return ip6_dst_lookup_tail(sk, dst, fl6);
497c615a 931}
3cf3dc6c
ACM
932EXPORT_SYMBOL_GPL(ip6_dst_lookup);
933
497c615a 934/**
68d0c6d3
DM
935 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
936 * @sk: socket which provides route info
4c9483b2 937 * @fl6: flow to lookup
68d0c6d3 938 * @final_dst: final destination address for ipsec lookup
a1414715 939 * @can_sleep: we are in a sleepable context
68d0c6d3
DM
940 *
941 * This function performs a route lookup on the given flow.
942 *
943 * It returns a valid dst pointer on success, or a pointer encoded
944 * error code.
945 */
4c9483b2 946struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
68d0c6d3 947 const struct in6_addr *final_dst,
a1414715 948 bool can_sleep)
68d0c6d3
DM
949{
950 struct dst_entry *dst = NULL;
951 int err;
952
4c9483b2 953 err = ip6_dst_lookup_tail(sk, &dst, fl6);
68d0c6d3
DM
954 if (err)
955 return ERR_PTR(err);
956 if (final_dst)
4e3fd7a0 957 fl6->daddr = *final_dst;
2774c131 958 if (can_sleep)
4c9483b2 959 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
2774c131 960
4c9483b2 961 return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
68d0c6d3
DM
962}
963EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
964
965/**
966 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
497c615a 967 * @sk: socket which provides the dst cache and route info
4c9483b2 968 * @fl6: flow to lookup
68d0c6d3 969 * @final_dst: final destination address for ipsec lookup
a1414715 970 * @can_sleep: we are in a sleepable context
497c615a
HX
971 *
972 * This function performs a route lookup on the given flow with the
973 * possibility of using the cached route in the socket if it is valid.
974 * It will take the socket dst lock when operating on the dst cache.
975 * As a result, this function can only be used in process context.
976 *
68d0c6d3
DM
977 * It returns a valid dst pointer on success, or a pointer encoded
978 * error code.
497c615a 979 */
4c9483b2 980struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
68d0c6d3 981 const struct in6_addr *final_dst,
a1414715 982 bool can_sleep)
497c615a 983{
68d0c6d3
DM
984 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
985 int err;
497c615a 986
4c9483b2 987 dst = ip6_sk_dst_check(sk, dst, fl6);
68d0c6d3 988
4c9483b2 989 err = ip6_dst_lookup_tail(sk, &dst, fl6);
68d0c6d3
DM
990 if (err)
991 return ERR_PTR(err);
992 if (final_dst)
4e3fd7a0 993 fl6->daddr = *final_dst;
2774c131 994 if (can_sleep)
4c9483b2 995 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
2774c131 996
4c9483b2 997 return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
497c615a 998}
68d0c6d3 999EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
497c615a 1000
34a0b3cd 1001static inline int ip6_ufo_append_data(struct sock *sk,
e89e9cf5
AR
1002 int getfrag(void *from, char *to, int offset, int len,
1003 int odd, struct sk_buff *skb),
1004 void *from, int length, int hh_len, int fragheaderlen,
87c48fa3
ED
1005 int transhdrlen, int mtu,unsigned int flags,
1006 struct rt6_info *rt)
e89e9cf5
AR
1007
1008{
1009 struct sk_buff *skb;
1010 int err;
1011
1012 /* There is support for UDP large send offload by network
1013 * device, so create one single skb packet containing complete
1014 * udp datagram
1015 */
1016 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1017 skb = sock_alloc_send_skb(sk,
1018 hh_len + fragheaderlen + transhdrlen + 20,
1019 (flags & MSG_DONTWAIT), &err);
1020 if (skb == NULL)
504744e4 1021 return err;
e89e9cf5
AR
1022
1023 /* reserve space for Hardware header */
1024 skb_reserve(skb, hh_len);
1025
1026 /* create space for UDP/IP header */
1027 skb_put(skb,fragheaderlen + transhdrlen);
1028
1029 /* initialize network header pointer */
c1d2bbe1 1030 skb_reset_network_header(skb);
e89e9cf5
AR
1031
1032 /* initialize protocol header pointer */
b0e380b1 1033 skb->transport_header = skb->network_header + fragheaderlen;
e89e9cf5 1034
84fa7933 1035 skb->ip_summed = CHECKSUM_PARTIAL;
e89e9cf5 1036 skb->csum = 0;
e89e9cf5
AR
1037 }
1038
1039 err = skb_append_datato_frags(sk,skb, getfrag, from,
1040 (length - transhdrlen));
1041 if (!err) {
1042 struct frag_hdr fhdr;
1043
c31d5326
SS
1044 /* Specify the length of each IPv6 datagram fragment.
1045 * It has to be a multiple of 8.
1046 */
1047 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1048 sizeof(struct frag_hdr)) & ~7;
f83ef8c0 1049 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
87c48fa3 1050 ipv6_select_ident(&fhdr, rt);
e89e9cf5
AR
1051 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1052 __skb_queue_tail(&sk->sk_write_queue, skb);
1053
1054 return 0;
1055 }
1056 /* There is not enough support do UPD LSO,
1057 * so follow normal path
1058 */
1059 kfree_skb(skb);
1060
1061 return err;
1062}
1da177e4 1063
0178b695
HX
1064static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1065 gfp_t gfp)
1066{
1067 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1068}
1069
1070static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1071 gfp_t gfp)
1072{
1073 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1074}
1075
75a493e6 1076static void ip6_append_data_mtu(unsigned int *mtu,
0c183379
G
1077 int *maxfraglen,
1078 unsigned int fragheaderlen,
1079 struct sk_buff *skb,
75a493e6
HFS
1080 struct rt6_info *rt,
1081 bool pmtuprobe)
0c183379
G
1082{
1083 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1084 if (skb == NULL) {
1085 /* first fragment, reserve header_len */
1086 *mtu = *mtu - rt->dst.header_len;
1087
1088 } else {
1089 /*
1090 * this fragment is not first, the headers
1091 * space is regarded as data space.
1092 */
75a493e6
HFS
1093 *mtu = min(*mtu, pmtuprobe ?
1094 rt->dst.dev->mtu :
1095 dst_mtu(rt->dst.path));
0c183379
G
1096 }
1097 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1098 + fragheaderlen - sizeof(struct frag_hdr);
1099 }
1100}
1101
41a1f8ea
YH
1102int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1103 int offset, int len, int odd, struct sk_buff *skb),
1104 void *from, int length, int transhdrlen,
4c9483b2 1105 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
13b52cd4 1106 struct rt6_info *rt, unsigned int flags, int dontfrag)
1da177e4
LT
1107{
1108 struct inet_sock *inet = inet_sk(sk);
1109 struct ipv6_pinfo *np = inet6_sk(sk);
bdc712b4 1110 struct inet_cork *cork;
0c183379 1111 struct sk_buff *skb, *skb_prev = NULL;
75a493e6 1112 unsigned int maxfraglen, fragheaderlen, mtu;
1da177e4 1113 int exthdrlen;
299b0767 1114 int dst_exthdrlen;
1da177e4 1115 int hh_len;
1da177e4
LT
1116 int copy;
1117 int err;
1118 int offset = 0;
a693e698 1119 __u8 tx_flags = 0;
1da177e4
LT
1120
1121 if (flags&MSG_PROBE)
1122 return 0;
bdc712b4 1123 cork = &inet->cork.base;
1da177e4
LT
1124 if (skb_queue_empty(&sk->sk_write_queue)) {
1125 /*
1126 * setup for corking
1127 */
1128 if (opt) {
0178b695 1129 if (WARN_ON(np->cork.opt))
1da177e4 1130 return -EINVAL;
0178b695 1131
284041ef 1132 np->cork.opt = kzalloc(opt->tot_len, sk->sk_allocation);
0178b695
HX
1133 if (unlikely(np->cork.opt == NULL))
1134 return -ENOBUFS;
1135
1136 np->cork.opt->tot_len = opt->tot_len;
1137 np->cork.opt->opt_flen = opt->opt_flen;
1138 np->cork.opt->opt_nflen = opt->opt_nflen;
1139
1140 np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1141 sk->sk_allocation);
1142 if (opt->dst0opt && !np->cork.opt->dst0opt)
1143 return -ENOBUFS;
1144
1145 np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1146 sk->sk_allocation);
1147 if (opt->dst1opt && !np->cork.opt->dst1opt)
1148 return -ENOBUFS;
1149
1150 np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1151 sk->sk_allocation);
1152 if (opt->hopopt && !np->cork.opt->hopopt)
1153 return -ENOBUFS;
1154
1155 np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1156 sk->sk_allocation);
1157 if (opt->srcrt && !np->cork.opt->srcrt)
1158 return -ENOBUFS;
1159
1da177e4
LT
1160 /* need source address above miyazawa*/
1161 }
d8d1f30b 1162 dst_hold(&rt->dst);
bdc712b4 1163 cork->dst = &rt->dst;
4c9483b2 1164 inet->cork.fl.u.ip6 = *fl6;
1da177e4 1165 np->cork.hop_limit = hlimit;
41a1f8ea 1166 np->cork.tclass = tclass;
0c183379
G
1167 if (rt->dst.flags & DST_XFRM_TUNNEL)
1168 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1169 rt->dst.dev->mtu : dst_mtu(&rt->dst);
1170 else
1171 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1172 rt->dst.dev->mtu : dst_mtu(rt->dst.path);
c7503609 1173 if (np->frag_size < mtu) {
d91675f9
YH
1174 if (np->frag_size)
1175 mtu = np->frag_size;
1176 }
bdc712b4 1177 cork->fragsize = mtu;
d8d1f30b 1178 if (dst_allfrag(rt->dst.path))
bdc712b4
DM
1179 cork->flags |= IPCORK_ALLFRAG;
1180 cork->length = 0;
7efdba5b 1181 exthdrlen = (opt ? opt->opt_flen : 0);
1da177e4
LT
1182 length += exthdrlen;
1183 transhdrlen += exthdrlen;
7efdba5b 1184 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1da177e4 1185 } else {
bdc712b4 1186 rt = (struct rt6_info *)cork->dst;
4c9483b2 1187 fl6 = &inet->cork.fl.u.ip6;
0178b695 1188 opt = np->cork.opt;
1da177e4
LT
1189 transhdrlen = 0;
1190 exthdrlen = 0;
299b0767 1191 dst_exthdrlen = 0;
bdc712b4 1192 mtu = cork->fragsize;
1da177e4
LT
1193 }
1194
d8d1f30b 1195 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1da177e4 1196
a1b05140 1197 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
b4ce9277 1198 (opt ? opt->opt_nflen : 0);
1da177e4
LT
1199 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1200
1201 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
bdc712b4 1202 if (cork->length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
4c9483b2 1203 ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen);
1da177e4
LT
1204 return -EMSGSIZE;
1205 }
1206 }
1207
a693e698 1208 /* For UDP, check if TX timestamp is enabled */
bf84a010
DB
1209 if (sk->sk_type == SOCK_DGRAM)
1210 sock_tx_timestamp(sk, &tx_flags);
a693e698 1211
1da177e4
LT
1212 /*
1213 * Let's try using as much space as possible.
1214 * Use MTU if total length of the message fits into the MTU.
1215 * Otherwise, we need to reserve fragment header and
1216 * fragment alignment (= 8-15 octects, in total).
1217 *
1218 * Note that we may need to "move" the data from the tail of
1ab1457c 1219 * of the buffer to the new fragment when we split
1da177e4
LT
1220 * the message.
1221 *
1ab1457c 1222 * FIXME: It may be fragmented into multiple chunks
1da177e4
LT
1223 * at once if non-fragmentable extension headers
1224 * are too large.
1ab1457c 1225 * --yoshfuji
1da177e4
LT
1226 */
1227
bdc712b4 1228 cork->length += length;
4b340ae2
BH
1229 if (length > mtu) {
1230 int proto = sk->sk_protocol;
1231 if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){
4c9483b2 1232 ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen);
4b340ae2
BH
1233 return -EMSGSIZE;
1234 }
e89e9cf5 1235
4b340ae2 1236 if (proto == IPPROTO_UDP &&
d8d1f30b 1237 (rt->dst.dev->features & NETIF_F_UFO)) {
4b340ae2
BH
1238
1239 err = ip6_ufo_append_data(sk, getfrag, from, length,
1240 hh_len, fragheaderlen,
87c48fa3 1241 transhdrlen, mtu, flags, rt);
4b340ae2
BH
1242 if (err)
1243 goto error;
1244 return 0;
1245 }
e89e9cf5 1246 }
1da177e4
LT
1247
1248 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1249 goto alloc_new_skb;
1250
1251 while (length > 0) {
1252 /* Check if the remaining data fits into current packet. */
bdc712b4 1253 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1da177e4
LT
1254 if (copy < length)
1255 copy = maxfraglen - skb->len;
1256
1257 if (copy <= 0) {
1258 char *data;
1259 unsigned int datalen;
1260 unsigned int fraglen;
1261 unsigned int fraggap;
1262 unsigned int alloclen;
1da177e4 1263alloc_new_skb:
1da177e4 1264 /* There's no room in the current skb */
0c183379
G
1265 if (skb)
1266 fraggap = skb->len - maxfraglen;
1da177e4
LT
1267 else
1268 fraggap = 0;
0c183379
G
1269 /* update mtu and maxfraglen if necessary */
1270 if (skb == NULL || skb_prev == NULL)
1271 ip6_append_data_mtu(&mtu, &maxfraglen,
75a493e6
HFS
1272 fragheaderlen, skb, rt,
1273 np->pmtudisc ==
1274 IPV6_PMTUDISC_PROBE);
0c183379
G
1275
1276 skb_prev = skb;
1da177e4
LT
1277
1278 /*
1279 * If remaining data exceeds the mtu,
1280 * we know we need more fragment(s).
1281 */
1282 datalen = length + fraggap;
1da177e4 1283
0c183379
G
1284 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1285 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1da177e4 1286 if ((flags & MSG_MORE) &&
d8d1f30b 1287 !(rt->dst.dev->features&NETIF_F_SG))
1da177e4
LT
1288 alloclen = mtu;
1289 else
1290 alloclen = datalen + fragheaderlen;
1291
299b0767
SK
1292 alloclen += dst_exthdrlen;
1293
0c183379
G
1294 if (datalen != length + fraggap) {
1295 /*
1296 * this is not the last fragment, the trailer
1297 * space is regarded as data space.
1298 */
1299 datalen += rt->dst.trailer_len;
1300 }
1301
1302 alloclen += rt->dst.trailer_len;
1303 fraglen = datalen + fragheaderlen;
1da177e4
LT
1304
1305 /*
1306 * We just reserve space for fragment header.
1ab1457c 1307 * Note: this may be overallocation if the message
1da177e4
LT
1308 * (without MSG_MORE) fits into the MTU.
1309 */
1310 alloclen += sizeof(struct frag_hdr);
1311
1312 if (transhdrlen) {
1313 skb = sock_alloc_send_skb(sk,
1314 alloclen + hh_len,
1315 (flags & MSG_DONTWAIT), &err);
1316 } else {
1317 skb = NULL;
1318 if (atomic_read(&sk->sk_wmem_alloc) <=
1319 2 * sk->sk_sndbuf)
1320 skb = sock_wmalloc(sk,
1321 alloclen + hh_len, 1,
1322 sk->sk_allocation);
1323 if (unlikely(skb == NULL))
1324 err = -ENOBUFS;
a693e698
AB
1325 else {
1326 /* Only the initial fragment
1327 * is time stamped.
1328 */
1329 tx_flags = 0;
1330 }
1da177e4
LT
1331 }
1332 if (skb == NULL)
1333 goto error;
1334 /*
1335 * Fill in the control structures
1336 */
d7f7c0ac 1337 skb->ip_summed = CHECKSUM_NONE;
1da177e4 1338 skb->csum = 0;
1f85851e
G
1339 /* reserve for fragmentation and ipsec header */
1340 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1341 dst_exthdrlen);
1da177e4 1342
a693e698
AB
1343 if (sk->sk_type == SOCK_DGRAM)
1344 skb_shinfo(skb)->tx_flags = tx_flags;
1345
1da177e4
LT
1346 /*
1347 * Find where to start putting bytes
1348 */
1f85851e
G
1349 data = skb_put(skb, fraglen);
1350 skb_set_network_header(skb, exthdrlen);
1351 data += fragheaderlen;
b0e380b1
ACM
1352 skb->transport_header = (skb->network_header +
1353 fragheaderlen);
1da177e4
LT
1354 if (fraggap) {
1355 skb->csum = skb_copy_and_csum_bits(
1356 skb_prev, maxfraglen,
1357 data + transhdrlen, fraggap, 0);
1358 skb_prev->csum = csum_sub(skb_prev->csum,
1359 skb->csum);
1360 data += fraggap;
e9fa4f7b 1361 pskb_trim_unique(skb_prev, maxfraglen);
1da177e4
LT
1362 }
1363 copy = datalen - transhdrlen - fraggap;
299b0767 1364
1da177e4
LT
1365 if (copy < 0) {
1366 err = -EINVAL;
1367 kfree_skb(skb);
1368 goto error;
1369 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1370 err = -EFAULT;
1371 kfree_skb(skb);
1372 goto error;
1373 }
1374
1375 offset += copy;
1376 length -= datalen - fraggap;
1377 transhdrlen = 0;
1378 exthdrlen = 0;
299b0767 1379 dst_exthdrlen = 0;
1da177e4
LT
1380
1381 /*
1382 * Put the packet on the pending queue
1383 */
1384 __skb_queue_tail(&sk->sk_write_queue, skb);
1385 continue;
1386 }
1387
1388 if (copy > length)
1389 copy = length;
1390
d8d1f30b 1391 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1da177e4
LT
1392 unsigned int off;
1393
1394 off = skb->len;
1395 if (getfrag(from, skb_put(skb, copy),
1396 offset, copy, off, skb) < 0) {
1397 __skb_trim(skb, off);
1398 err = -EFAULT;
1399 goto error;
1400 }
1401 } else {
1402 int i = skb_shinfo(skb)->nr_frags;
5640f768 1403 struct page_frag *pfrag = sk_page_frag(sk);
1da177e4 1404
5640f768
ED
1405 err = -ENOMEM;
1406 if (!sk_page_frag_refill(sk, pfrag))
1da177e4 1407 goto error;
5640f768
ED
1408
1409 if (!skb_can_coalesce(skb, i, pfrag->page,
1410 pfrag->offset)) {
1411 err = -EMSGSIZE;
1412 if (i == MAX_SKB_FRAGS)
1413 goto error;
1414
1415 __skb_fill_page_desc(skb, i, pfrag->page,
1416 pfrag->offset, 0);
1417 skb_shinfo(skb)->nr_frags = ++i;
1418 get_page(pfrag->page);
1da177e4 1419 }
5640f768 1420 copy = min_t(int, copy, pfrag->size - pfrag->offset);
9e903e08 1421 if (getfrag(from,
5640f768
ED
1422 page_address(pfrag->page) + pfrag->offset,
1423 offset, copy, skb->len, skb) < 0)
1424 goto error_efault;
1425
1426 pfrag->offset += copy;
1427 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1da177e4
LT
1428 skb->len += copy;
1429 skb->data_len += copy;
f945fa7a
HX
1430 skb->truesize += copy;
1431 atomic_add(copy, &sk->sk_wmem_alloc);
1da177e4
LT
1432 }
1433 offset += copy;
1434 length -= copy;
1435 }
5640f768 1436
1da177e4 1437 return 0;
5640f768
ED
1438
1439error_efault:
1440 err = -EFAULT;
1da177e4 1441error:
bdc712b4 1442 cork->length -= length;
3bd653c8 1443 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1444 return err;
1445}
a495f836 1446EXPORT_SYMBOL_GPL(ip6_append_data);
1da177e4 1447
bf138862
PE
1448static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1449{
0178b695
HX
1450 if (np->cork.opt) {
1451 kfree(np->cork.opt->dst0opt);
1452 kfree(np->cork.opt->dst1opt);
1453 kfree(np->cork.opt->hopopt);
1454 kfree(np->cork.opt->srcrt);
1455 kfree(np->cork.opt);
1456 np->cork.opt = NULL;
1457 }
1458
bdc712b4
DM
1459 if (inet->cork.base.dst) {
1460 dst_release(inet->cork.base.dst);
1461 inet->cork.base.dst = NULL;
1462 inet->cork.base.flags &= ~IPCORK_ALLFRAG;
bf138862
PE
1463 }
1464 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1465}
1466
1da177e4
LT
1467int ip6_push_pending_frames(struct sock *sk)
1468{
1469 struct sk_buff *skb, *tmp_skb;
1470 struct sk_buff **tail_skb;
1471 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1472 struct inet_sock *inet = inet_sk(sk);
1473 struct ipv6_pinfo *np = inet6_sk(sk);
3bd653c8 1474 struct net *net = sock_net(sk);
1da177e4
LT
1475 struct ipv6hdr *hdr;
1476 struct ipv6_txoptions *opt = np->cork.opt;
bdc712b4 1477 struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst;
4c9483b2
DM
1478 struct flowi6 *fl6 = &inet->cork.fl.u.ip6;
1479 unsigned char proto = fl6->flowi6_proto;
1da177e4
LT
1480 int err = 0;
1481
1482 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1483 goto out;
1484 tail_skb = &(skb_shinfo(skb)->frag_list);
1485
1486 /* move skb->data to ip header from ext header */
d56f90a7 1487 if (skb->data < skb_network_header(skb))
bbe735e4 1488 __skb_pull(skb, skb_network_offset(skb));
1da177e4 1489 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
cfe1fc77 1490 __skb_pull(tmp_skb, skb_network_header_len(skb));
1da177e4
LT
1491 *tail_skb = tmp_skb;
1492 tail_skb = &(tmp_skb->next);
1493 skb->len += tmp_skb->len;
1494 skb->data_len += tmp_skb->len;
1da177e4 1495 skb->truesize += tmp_skb->truesize;
1da177e4
LT
1496 tmp_skb->destructor = NULL;
1497 tmp_skb->sk = NULL;
1da177e4
LT
1498 }
1499
28a89453 1500 /* Allow local fragmentation. */
b5c15fc0 1501 if (np->pmtudisc < IPV6_PMTUDISC_DO)
28a89453
HX
1502 skb->local_df = 1;
1503
4e3fd7a0 1504 *final_dst = fl6->daddr;
cfe1fc77 1505 __skb_pull(skb, skb_network_header_len(skb));
1da177e4
LT
1506 if (opt && opt->opt_flen)
1507 ipv6_push_frag_opts(skb, opt, &proto);
1508 if (opt && opt->opt_nflen)
1509 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1510
e2d1bca7
ACM
1511 skb_push(skb, sizeof(struct ipv6hdr));
1512 skb_reset_network_header(skb);
0660e03f 1513 hdr = ipv6_hdr(skb);
1ab1457c 1514
3e4e4c1f 1515 ip6_flow_hdr(hdr, np->cork.tclass, fl6->flowlabel);
1da177e4
LT
1516 hdr->hop_limit = np->cork.hop_limit;
1517 hdr->nexthdr = proto;
4e3fd7a0
AD
1518 hdr->saddr = fl6->saddr;
1519 hdr->daddr = *final_dst;
1da177e4 1520
a2c2064f 1521 skb->priority = sk->sk_priority;
4a19ec58 1522 skb->mark = sk->sk_mark;
a2c2064f 1523
d8d1f30b 1524 skb_dst_set(skb, dst_clone(&rt->dst));
edf391ff 1525 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
14878f75 1526 if (proto == IPPROTO_ICMPV6) {
adf30907 1527 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
14878f75 1528
5a57d4c7 1529 ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
e41b5368 1530 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
14878f75
DS
1531 }
1532
ef76bc23 1533 err = ip6_local_out(skb);
1da177e4
LT
1534 if (err) {
1535 if (err > 0)
6ce9e7b5 1536 err = net_xmit_errno(err);
1da177e4
LT
1537 if (err)
1538 goto error;
1539 }
1540
1541out:
bf138862 1542 ip6_cork_release(inet, np);
1da177e4
LT
1543 return err;
1544error:
06254914 1545 IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1546 goto out;
1547}
a495f836 1548EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1da177e4
LT
1549
1550void ip6_flush_pending_frames(struct sock *sk)
1551{
1da177e4
LT
1552 struct sk_buff *skb;
1553
1554 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
adf30907
ED
1555 if (skb_dst(skb))
1556 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
e1f52208 1557 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1558 kfree_skb(skb);
1559 }
1560
bf138862 1561 ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1da177e4 1562}
a495f836 1563EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);