]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blame - net/ipv6/ip6_output.c
[IPV4] MROUTE: Adjust include files for user-space.
[mirror_ubuntu-zesty-kernel.git] / net / ipv6 / ip6_output.c
CommitLineData
1da177e4
LT
1/*
2 * IPv6 output functions
1ab1457c 3 * Linux INET6 implementation
1da177e4
LT
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4
LT
7 *
8 * $Id: ip6_output.c,v 1.34 2002/02/01 22:01:04 davem Exp $
9 *
10 * Based on linux/net/ipv4/ip_output.c
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 * Changes:
18 * A.N.Kuznetsov : airthmetics in fragmentation.
19 * extension headers are implemented.
20 * route changes now work.
21 * ip6_forward does not confuse sniffers.
22 * etc.
23 *
24 * H. von Brand : Added missing #include <linux/string.h>
25 * Imran Patel : frag id should be in NBO
26 * Kazunori MIYAZAWA @USAGI
27 * : add ip6_append_data and related functions
28 * for datagram xmit
29 */
30
1da177e4 31#include <linux/errno.h>
ef76bc23 32#include <linux/kernel.h>
1da177e4
LT
33#include <linux/string.h>
34#include <linux/socket.h>
35#include <linux/net.h>
36#include <linux/netdevice.h>
37#include <linux/if_arp.h>
38#include <linux/in6.h>
39#include <linux/tcp.h>
40#include <linux/route.h>
b59f45d0 41#include <linux/module.h>
1da177e4
LT
42
43#include <linux/netfilter.h>
44#include <linux/netfilter_ipv6.h>
45
46#include <net/sock.h>
47#include <net/snmp.h>
48
49#include <net/ipv6.h>
50#include <net/ndisc.h>
51#include <net/protocol.h>
52#include <net/ip6_route.h>
53#include <net/addrconf.h>
54#include <net/rawv6.h>
55#include <net/icmp.h>
56#include <net/xfrm.h>
57#include <net/checksum.h>
58
59static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
60
61static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
62{
63 static u32 ipv6_fragmentation_id = 1;
64 static DEFINE_SPINLOCK(ip6_id_lock);
65
66 spin_lock_bh(&ip6_id_lock);
67 fhdr->identification = htonl(ipv6_fragmentation_id);
68 if (++ipv6_fragmentation_id == 0)
69 ipv6_fragmentation_id = 1;
70 spin_unlock_bh(&ip6_id_lock);
71}
72
ef76bc23
HX
73int __ip6_local_out(struct sk_buff *skb)
74{
75 int len;
76
77 len = skb->len - sizeof(struct ipv6hdr);
78 if (len > IPV6_MAXPLEN)
79 len = 0;
80 ipv6_hdr(skb)->payload_len = htons(len);
81
6e23ae2a 82 return nf_hook(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dst->dev,
ef76bc23
HX
83 dst_output);
84}
85
86int ip6_local_out(struct sk_buff *skb)
87{
88 int err;
89
90 err = __ip6_local_out(skb);
91 if (likely(err == 1))
92 err = dst_output(skb);
93
94 return err;
95}
96EXPORT_SYMBOL_GPL(ip6_local_out);
97
ad643a79 98static int ip6_output_finish(struct sk_buff *skb)
1da177e4 99{
1da177e4 100 struct dst_entry *dst = skb->dst;
1da177e4 101
3644f0ce
SH
102 if (dst->hh)
103 return neigh_hh_output(dst->hh, skb);
104 else if (dst->neighbour)
1da177e4
LT
105 return dst->neighbour->output(skb);
106
a11d206d 107 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
108 kfree_skb(skb);
109 return -EINVAL;
110
111}
112
113/* dev_loopback_xmit for use with netfilter. */
114static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
115{
459a98ed 116 skb_reset_mac_header(newskb);
bbe735e4 117 __skb_pull(newskb, skb_network_offset(newskb));
1da177e4
LT
118 newskb->pkt_type = PACKET_LOOPBACK;
119 newskb->ip_summed = CHECKSUM_UNNECESSARY;
120 BUG_TRAP(newskb->dst);
121
122 netif_rx(newskb);
123 return 0;
124}
125
126
127static int ip6_output2(struct sk_buff *skb)
128{
129 struct dst_entry *dst = skb->dst;
130 struct net_device *dev = dst->dev;
131
132 skb->protocol = htons(ETH_P_IPV6);
133 skb->dev = dev;
134
0660e03f 135 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
1da177e4 136 struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
a11d206d 137 struct inet6_dev *idev = ip6_dst_idev(skb->dst);
1da177e4
LT
138
139 if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
0660e03f
ACM
140 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
141 &ipv6_hdr(skb)->saddr)) {
1da177e4
LT
142 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
143
144 /* Do not check for IFF_ALLMULTI; multicast routing
145 is not supported in any case.
146 */
147 if (newskb)
6e23ae2a
PM
148 NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, newskb,
149 NULL, newskb->dev,
1da177e4
LT
150 ip6_dev_loopback_xmit);
151
0660e03f 152 if (ipv6_hdr(skb)->hop_limit == 0) {
a11d206d 153 IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
154 kfree_skb(skb);
155 return 0;
156 }
157 }
158
a11d206d 159 IP6_INC_STATS(idev, IPSTATS_MIB_OUTMCASTPKTS);
1da177e4
LT
160 }
161
6e23ae2a
PM
162 return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
163 ip6_output_finish);
1da177e4
LT
164}
165
628a5c56
JH
166static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
167{
168 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
169
170 return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
171 skb->dst->dev->mtu : dst_mtu(skb->dst);
172}
173
1da177e4
LT
174int ip6_output(struct sk_buff *skb)
175{
628a5c56 176 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
e89e9cf5 177 dst_allfrag(skb->dst))
1da177e4
LT
178 return ip6_fragment(skb, ip6_output2);
179 else
180 return ip6_output2(skb);
181}
182
1da177e4
LT
183/*
184 * xmit an sk_buff (used by TCP)
185 */
186
187int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
188 struct ipv6_txoptions *opt, int ipfragok)
189{
b30bd282 190 struct ipv6_pinfo *np = inet6_sk(sk);
1da177e4
LT
191 struct in6_addr *first_hop = &fl->fl6_dst;
192 struct dst_entry *dst = skb->dst;
193 struct ipv6hdr *hdr;
194 u8 proto = fl->proto;
195 int seg_len = skb->len;
41a1f8ea 196 int hlimit, tclass;
1da177e4
LT
197 u32 mtu;
198
199 if (opt) {
c2636b4d 200 unsigned int head_room;
1da177e4
LT
201
202 /* First: exthdrs may take lots of space (~8K for now)
203 MAX_HEADER is not enough.
204 */
205 head_room = opt->opt_nflen + opt->opt_flen;
206 seg_len += head_room;
207 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
208
209 if (skb_headroom(skb) < head_room) {
210 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
a11d206d
YH
211 if (skb2 == NULL) {
212 IP6_INC_STATS(ip6_dst_idev(skb->dst),
213 IPSTATS_MIB_OUTDISCARDS);
214 kfree_skb(skb);
1da177e4
LT
215 return -ENOBUFS;
216 }
a11d206d
YH
217 kfree_skb(skb);
218 skb = skb2;
1da177e4
LT
219 if (sk)
220 skb_set_owner_w(skb, sk);
221 }
222 if (opt->opt_flen)
223 ipv6_push_frag_opts(skb, opt, &proto);
224 if (opt->opt_nflen)
225 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
226 }
227
e2d1bca7
ACM
228 skb_push(skb, sizeof(struct ipv6hdr));
229 skb_reset_network_header(skb);
0660e03f 230 hdr = ipv6_hdr(skb);
1da177e4
LT
231
232 /*
233 * Fill in the IPv6 header
234 */
235
1da177e4
LT
236 hlimit = -1;
237 if (np)
238 hlimit = np->hop_limit;
239 if (hlimit < 0)
6b75d090 240 hlimit = ip6_dst_hoplimit(dst);
1da177e4 241
41a1f8ea
YH
242 tclass = -1;
243 if (np)
244 tclass = np->tclass;
245 if (tclass < 0)
246 tclass = 0;
247
90bcaf7b 248 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
41a1f8ea 249
1da177e4
LT
250 hdr->payload_len = htons(seg_len);
251 hdr->nexthdr = proto;
252 hdr->hop_limit = hlimit;
253
254 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
255 ipv6_addr_copy(&hdr->daddr, first_hop);
256
a2c2064f 257 skb->priority = sk->sk_priority;
4a19ec58 258 skb->mark = sk->sk_mark;
a2c2064f 259
1da177e4 260 mtu = dst_mtu(dst);
89114afd 261 if ((skb->len <= mtu) || ipfragok || skb_is_gso(skb)) {
a11d206d
YH
262 IP6_INC_STATS(ip6_dst_idev(skb->dst),
263 IPSTATS_MIB_OUTREQUESTS);
6e23ae2a 264 return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
6869c4d8 265 dst_output);
1da177e4
LT
266 }
267
268 if (net_ratelimit())
269 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
270 skb->dev = dst->dev;
271 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
a11d206d 272 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
273 kfree_skb(skb);
274 return -EMSGSIZE;
275}
276
7159039a
YH
277EXPORT_SYMBOL(ip6_xmit);
278
1da177e4
LT
279/*
280 * To avoid extra problems ND packets are send through this
281 * routine. It's code duplication but I really want to avoid
282 * extra checks since ipv6_build_header is used by TCP (which
283 * is for us performance critical)
284 */
285
286int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
287 struct in6_addr *saddr, struct in6_addr *daddr,
288 int proto, int len)
289{
290 struct ipv6_pinfo *np = inet6_sk(sk);
291 struct ipv6hdr *hdr;
292 int totlen;
293
294 skb->protocol = htons(ETH_P_IPV6);
295 skb->dev = dev;
296
297 totlen = len + sizeof(struct ipv6hdr);
298
55f79cc0
ACM
299 skb_reset_network_header(skb);
300 skb_put(skb, sizeof(struct ipv6hdr));
0660e03f 301 hdr = ipv6_hdr(skb);
1da177e4 302
ae08e1f0 303 *(__be32*)hdr = htonl(0x60000000);
1da177e4
LT
304
305 hdr->payload_len = htons(len);
306 hdr->nexthdr = proto;
307 hdr->hop_limit = np->hop_limit;
308
309 ipv6_addr_copy(&hdr->saddr, saddr);
310 ipv6_addr_copy(&hdr->daddr, daddr);
311
312 return 0;
313}
314
315static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
316{
317 struct ip6_ra_chain *ra;
318 struct sock *last = NULL;
319
320 read_lock(&ip6_ra_lock);
321 for (ra = ip6_ra_chain; ra; ra = ra->next) {
322 struct sock *sk = ra->sk;
0bd1b59b
AM
323 if (sk && ra->sel == sel &&
324 (!sk->sk_bound_dev_if ||
325 sk->sk_bound_dev_if == skb->dev->ifindex)) {
1da177e4
LT
326 if (last) {
327 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
328 if (skb2)
329 rawv6_rcv(last, skb2);
330 }
331 last = sk;
332 }
333 }
334
335 if (last) {
336 rawv6_rcv(last, skb);
337 read_unlock(&ip6_ra_lock);
338 return 1;
339 }
340 read_unlock(&ip6_ra_lock);
341 return 0;
342}
343
e21e0b5f
VN
344static int ip6_forward_proxy_check(struct sk_buff *skb)
345{
0660e03f 346 struct ipv6hdr *hdr = ipv6_hdr(skb);
e21e0b5f
VN
347 u8 nexthdr = hdr->nexthdr;
348 int offset;
349
350 if (ipv6_ext_hdr(nexthdr)) {
351 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
352 if (offset < 0)
353 return 0;
354 } else
355 offset = sizeof(struct ipv6hdr);
356
357 if (nexthdr == IPPROTO_ICMPV6) {
358 struct icmp6hdr *icmp6;
359
d56f90a7
ACM
360 if (!pskb_may_pull(skb, (skb_network_header(skb) +
361 offset + 1 - skb->data)))
e21e0b5f
VN
362 return 0;
363
d56f90a7 364 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
e21e0b5f
VN
365
366 switch (icmp6->icmp6_type) {
367 case NDISC_ROUTER_SOLICITATION:
368 case NDISC_ROUTER_ADVERTISEMENT:
369 case NDISC_NEIGHBOUR_SOLICITATION:
370 case NDISC_NEIGHBOUR_ADVERTISEMENT:
371 case NDISC_REDIRECT:
372 /* For reaction involving unicast neighbor discovery
373 * message destined to the proxied address, pass it to
374 * input function.
375 */
376 return 1;
377 default:
378 break;
379 }
380 }
381
74553b09
VN
382 /*
383 * The proxying router can't forward traffic sent to a link-local
384 * address, so signal the sender and discard the packet. This
385 * behavior is clarified by the MIPv6 specification.
386 */
387 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
388 dst_link_failure(skb);
389 return -1;
390 }
391
e21e0b5f
VN
392 return 0;
393}
394
1da177e4
LT
395static inline int ip6_forward_finish(struct sk_buff *skb)
396{
397 return dst_output(skb);
398}
399
400int ip6_forward(struct sk_buff *skb)
401{
402 struct dst_entry *dst = skb->dst;
0660e03f 403 struct ipv6hdr *hdr = ipv6_hdr(skb);
1da177e4 404 struct inet6_skb_parm *opt = IP6CB(skb);
c346dca1 405 struct net *net = dev_net(dst->dev);
1ab1457c 406
1da177e4
LT
407 if (ipv6_devconf.forwarding == 0)
408 goto error;
409
410 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
a11d206d 411 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
1da177e4
LT
412 goto drop;
413 }
414
35fc92a9 415 skb_forward_csum(skb);
1da177e4
LT
416
417 /*
418 * We DO NOT make any processing on
419 * RA packets, pushing them to user level AS IS
420 * without ane WARRANTY that application will be able
421 * to interpret them. The reason is that we
422 * cannot make anything clever here.
423 *
424 * We are not end-node, so that if packet contains
425 * AH/ESP, we cannot make anything.
426 * Defragmentation also would be mistake, RA packets
427 * cannot be fragmented, because there is no warranty
428 * that different fragments will go along one path. --ANK
429 */
430 if (opt->ra) {
d56f90a7 431 u8 *ptr = skb_network_header(skb) + opt->ra;
1da177e4
LT
432 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
433 return 0;
434 }
435
436 /*
437 * check and decrement ttl
438 */
439 if (hdr->hop_limit <= 1) {
440 /* Force OUTPUT device used as source address */
441 skb->dev = dst->dev;
442 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
443 0, skb->dev);
a11d206d 444 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
1da177e4
LT
445
446 kfree_skb(skb);
447 return -ETIMEDOUT;
448 }
449
fbea49e1
YH
450 /* XXX: idev->cnf.proxy_ndp? */
451 if (ipv6_devconf.proxy_ndp &&
8a3edd80 452 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
74553b09
VN
453 int proxied = ip6_forward_proxy_check(skb);
454 if (proxied > 0)
e21e0b5f 455 return ip6_input(skb);
74553b09 456 else if (proxied < 0) {
a11d206d 457 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
74553b09
VN
458 goto drop;
459 }
e21e0b5f
VN
460 }
461
1da177e4 462 if (!xfrm6_route_forward(skb)) {
a11d206d 463 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
1da177e4
LT
464 goto drop;
465 }
466 dst = skb->dst;
467
468 /* IPv6 specs say nothing about it, but it is clear that we cannot
469 send redirects to source routed frames.
1e5dc146 470 We don't send redirects to frames decapsulated from IPsec.
1da177e4 471 */
1e5dc146
MN
472 if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
473 !skb->sp) {
1da177e4
LT
474 struct in6_addr *target = NULL;
475 struct rt6_info *rt;
476 struct neighbour *n = dst->neighbour;
477
478 /*
479 * incoming and outgoing devices are the same
480 * send a redirect.
481 */
482
483 rt = (struct rt6_info *) dst;
484 if ((rt->rt6i_flags & RTF_GATEWAY))
485 target = (struct in6_addr*)&n->primary_key;
486 else
487 target = &hdr->daddr;
488
489 /* Limit redirects both by destination (here)
490 and by source (inside ndisc_send_redirect)
491 */
492 if (xrlim_allow(dst, 1*HZ))
493 ndisc_send_redirect(skb, n, target);
5bb1ab09
DS
494 } else {
495 int addrtype = ipv6_addr_type(&hdr->saddr);
496
1da177e4 497 /* This check is security critical. */
5bb1ab09
DS
498 if (addrtype & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LOOPBACK))
499 goto error;
500 if (addrtype & IPV6_ADDR_LINKLOCAL) {
501 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
502 ICMPV6_NOT_NEIGHBOUR, 0, skb->dev);
503 goto error;
504 }
1da177e4
LT
505 }
506
507 if (skb->len > dst_mtu(dst)) {
508 /* Again, force OUTPUT device used as source address */
509 skb->dev = dst->dev;
510 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
a11d206d
YH
511 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
512 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
513 kfree_skb(skb);
514 return -EMSGSIZE;
515 }
516
517 if (skb_cow(skb, dst->dev->hard_header_len)) {
a11d206d 518 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
519 goto drop;
520 }
521
0660e03f 522 hdr = ipv6_hdr(skb);
1da177e4
LT
523
524 /* Mangling hops number delayed to point after skb COW */
1ab1457c 525
1da177e4
LT
526 hdr->hop_limit--;
527
a11d206d 528 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
6e23ae2a
PM
529 return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
530 ip6_forward_finish);
1da177e4
LT
531
532error:
a11d206d 533 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
1da177e4
LT
534drop:
535 kfree_skb(skb);
536 return -EINVAL;
537}
538
539static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
540{
541 to->pkt_type = from->pkt_type;
542 to->priority = from->priority;
543 to->protocol = from->protocol;
1da177e4
LT
544 dst_release(to->dst);
545 to->dst = dst_clone(from->dst);
546 to->dev = from->dev;
82e91ffe 547 to->mark = from->mark;
1da177e4
LT
548
549#ifdef CONFIG_NET_SCHED
550 to->tc_index = from->tc_index;
551#endif
e7ac05f3 552 nf_copy(to, from);
ba9dda3a
JK
553#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
554 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
555 to->nf_trace = from->nf_trace;
556#endif
984bc16c 557 skb_copy_secmark(to, from);
1da177e4
LT
558}
559
560int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
561{
562 u16 offset = sizeof(struct ipv6hdr);
0660e03f
ACM
563 struct ipv6_opt_hdr *exthdr =
564 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
27a884dc 565 unsigned int packet_len = skb->tail - skb->network_header;
1da177e4 566 int found_rhdr = 0;
0660e03f 567 *nexthdr = &ipv6_hdr(skb)->nexthdr;
1da177e4
LT
568
569 while (offset + 1 <= packet_len) {
570
571 switch (**nexthdr) {
572
573 case NEXTHDR_HOP:
27637df9 574 break;
1da177e4 575 case NEXTHDR_ROUTING:
27637df9
MN
576 found_rhdr = 1;
577 break;
1da177e4 578 case NEXTHDR_DEST:
59fbb3a6 579#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
27637df9
MN
580 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
581 break;
582#endif
583 if (found_rhdr)
584 return offset;
1da177e4
LT
585 break;
586 default :
587 return offset;
588 }
27637df9
MN
589
590 offset += ipv6_optlen(exthdr);
591 *nexthdr = &exthdr->nexthdr;
d56f90a7
ACM
592 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
593 offset);
1da177e4
LT
594 }
595
596 return offset;
597}
598
599static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
600{
601 struct net_device *dev;
602 struct sk_buff *frag;
603 struct rt6_info *rt = (struct rt6_info*)skb->dst;
d91675f9 604 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
1da177e4
LT
605 struct ipv6hdr *tmp_hdr;
606 struct frag_hdr *fh;
607 unsigned int mtu, hlen, left, len;
ae08e1f0 608 __be32 frag_id = 0;
1da177e4
LT
609 int ptr, offset = 0, err=0;
610 u8 *prevhdr, nexthdr = 0;
611
612 dev = rt->u.dst.dev;
613 hlen = ip6_find_1stfragopt(skb, &prevhdr);
614 nexthdr = *prevhdr;
615
628a5c56 616 mtu = ip6_skb_dst_mtu(skb);
b881ef76
JH
617
618 /* We must not fragment if the socket is set to force MTU discovery
619 * or if the skb it not generated by a local socket. (This last
620 * check should be redundant, but it's free.)
621 */
b5c15fc0 622 if (!skb->local_df) {
b881ef76
JH
623 skb->dev = skb->dst->dev;
624 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
625 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
626 kfree_skb(skb);
627 return -EMSGSIZE;
628 }
629
d91675f9
YH
630 if (np && np->frag_size < mtu) {
631 if (np->frag_size)
632 mtu = np->frag_size;
633 }
634 mtu -= hlen + sizeof(struct frag_hdr);
1da177e4
LT
635
636 if (skb_shinfo(skb)->frag_list) {
637 int first_len = skb_pagelen(skb);
29ffe1a5 638 int truesizes = 0;
1da177e4
LT
639
640 if (first_len - hlen > mtu ||
641 ((first_len - hlen) & 7) ||
642 skb_cloned(skb))
643 goto slow_path;
644
645 for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
646 /* Correct geometry. */
647 if (frag->len > mtu ||
648 ((frag->len & 7) && frag->next) ||
649 skb_headroom(frag) < hlen)
650 goto slow_path;
651
1da177e4
LT
652 /* Partially cloned skb? */
653 if (skb_shared(frag))
654 goto slow_path;
2fdba6b0
HX
655
656 BUG_ON(frag->sk);
657 if (skb->sk) {
658 sock_hold(skb->sk);
659 frag->sk = skb->sk;
660 frag->destructor = sock_wfree;
29ffe1a5 661 truesizes += frag->truesize;
2fdba6b0 662 }
1da177e4
LT
663 }
664
665 err = 0;
666 offset = 0;
667 frag = skb_shinfo(skb)->frag_list;
668 skb_shinfo(skb)->frag_list = NULL;
669 /* BUILD HEADER */
670
9a217a1c 671 *prevhdr = NEXTHDR_FRAGMENT;
d56f90a7 672 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
1da177e4 673 if (!tmp_hdr) {
a11d206d 674 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
675 return -ENOMEM;
676 }
677
1da177e4
LT
678 __skb_pull(skb, hlen);
679 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
e2d1bca7
ACM
680 __skb_push(skb, hlen);
681 skb_reset_network_header(skb);
d56f90a7 682 memcpy(skb_network_header(skb), tmp_hdr, hlen);
1da177e4
LT
683
684 ipv6_select_ident(skb, fh);
685 fh->nexthdr = nexthdr;
686 fh->reserved = 0;
687 fh->frag_off = htons(IP6_MF);
688 frag_id = fh->identification;
689
690 first_len = skb_pagelen(skb);
691 skb->data_len = first_len - skb_headlen(skb);
29ffe1a5 692 skb->truesize -= truesizes;
1da177e4 693 skb->len = first_len;
0660e03f
ACM
694 ipv6_hdr(skb)->payload_len = htons(first_len -
695 sizeof(struct ipv6hdr));
a11d206d
YH
696
697 dst_hold(&rt->u.dst);
1da177e4
LT
698
699 for (;;) {
700 /* Prepare header of the next frame,
701 * before previous one went down. */
702 if (frag) {
703 frag->ip_summed = CHECKSUM_NONE;
badff6d0 704 skb_reset_transport_header(frag);
1da177e4 705 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
e2d1bca7
ACM
706 __skb_push(frag, hlen);
707 skb_reset_network_header(frag);
d56f90a7
ACM
708 memcpy(skb_network_header(frag), tmp_hdr,
709 hlen);
1da177e4
LT
710 offset += skb->len - hlen - sizeof(struct frag_hdr);
711 fh->nexthdr = nexthdr;
712 fh->reserved = 0;
713 fh->frag_off = htons(offset);
714 if (frag->next != NULL)
715 fh->frag_off |= htons(IP6_MF);
716 fh->identification = frag_id;
0660e03f
ACM
717 ipv6_hdr(frag)->payload_len =
718 htons(frag->len -
719 sizeof(struct ipv6hdr));
1da177e4
LT
720 ip6_copy_metadata(frag, skb);
721 }
1ab1457c 722
1da177e4 723 err = output(skb);
dafee490 724 if(!err)
a11d206d 725 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGCREATES);
dafee490 726
1da177e4
LT
727 if (err || !frag)
728 break;
729
730 skb = frag;
731 frag = skb->next;
732 skb->next = NULL;
733 }
734
a51482bd 735 kfree(tmp_hdr);
1da177e4
LT
736
737 if (err == 0) {
a11d206d
YH
738 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGOKS);
739 dst_release(&rt->u.dst);
1da177e4
LT
740 return 0;
741 }
742
743 while (frag) {
744 skb = frag->next;
745 kfree_skb(frag);
746 frag = skb;
747 }
748
a11d206d
YH
749 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGFAILS);
750 dst_release(&rt->u.dst);
1da177e4
LT
751 return err;
752 }
753
754slow_path:
755 left = skb->len - hlen; /* Space per frame */
756 ptr = hlen; /* Where to start from */
757
758 /*
759 * Fragment the datagram.
760 */
761
762 *prevhdr = NEXTHDR_FRAGMENT;
763
764 /*
765 * Keep copying data until we run out.
766 */
767 while(left > 0) {
768 len = left;
769 /* IF: it doesn't fit, use 'mtu' - the data space left */
770 if (len > mtu)
771 len = mtu;
772 /* IF: we are not sending upto and including the packet end
773 then align the next start on an eight byte boundary */
774 if (len < left) {
775 len &= ~7;
776 }
777 /*
778 * Allocate buffer.
779 */
780
781 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
64ce2073 782 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
a11d206d
YH
783 IP6_INC_STATS(ip6_dst_idev(skb->dst),
784 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
785 err = -ENOMEM;
786 goto fail;
787 }
788
789 /*
790 * Set up data on packet
791 */
792
793 ip6_copy_metadata(frag, skb);
794 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
795 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
c1d2bbe1 796 skb_reset_network_header(frag);
badff6d0 797 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
b0e380b1
ACM
798 frag->transport_header = (frag->network_header + hlen +
799 sizeof(struct frag_hdr));
1da177e4
LT
800
801 /*
802 * Charge the memory for the fragment to any owner
803 * it might possess
804 */
805 if (skb->sk)
806 skb_set_owner_w(frag, skb->sk);
807
808 /*
809 * Copy the packet header into the new buffer.
810 */
d626f62b 811 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
1da177e4
LT
812
813 /*
814 * Build fragment header.
815 */
816 fh->nexthdr = nexthdr;
817 fh->reserved = 0;
f36d6ab1 818 if (!frag_id) {
1da177e4
LT
819 ipv6_select_ident(skb, fh);
820 frag_id = fh->identification;
821 } else
822 fh->identification = frag_id;
823
824 /*
825 * Copy a block of the IP datagram.
826 */
8984e41d 827 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
1da177e4
LT
828 BUG();
829 left -= len;
830
831 fh->frag_off = htons(offset);
832 if (left > 0)
833 fh->frag_off |= htons(IP6_MF);
0660e03f
ACM
834 ipv6_hdr(frag)->payload_len = htons(frag->len -
835 sizeof(struct ipv6hdr));
1da177e4
LT
836
837 ptr += len;
838 offset += len;
839
840 /*
841 * Put this fragment into the sending queue.
842 */
1da177e4
LT
843 err = output(frag);
844 if (err)
845 goto fail;
dafee490 846
a11d206d 847 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGCREATES);
1da177e4 848 }
a11d206d
YH
849 IP6_INC_STATS(ip6_dst_idev(skb->dst),
850 IPSTATS_MIB_FRAGOKS);
1da177e4 851 kfree_skb(skb);
1da177e4
LT
852 return err;
853
854fail:
a11d206d
YH
855 IP6_INC_STATS(ip6_dst_idev(skb->dst),
856 IPSTATS_MIB_FRAGFAILS);
1ab1457c 857 kfree_skb(skb);
1da177e4
LT
858 return err;
859}
860
cf6b1982
YH
861static inline int ip6_rt_check(struct rt6key *rt_key,
862 struct in6_addr *fl_addr,
863 struct in6_addr *addr_cache)
864{
865 return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
866 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)));
867}
868
497c615a
HX
869static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
870 struct dst_entry *dst,
871 struct flowi *fl)
1da177e4 872{
497c615a
HX
873 struct ipv6_pinfo *np = inet6_sk(sk);
874 struct rt6_info *rt = (struct rt6_info *)dst;
1da177e4 875
497c615a
HX
876 if (!dst)
877 goto out;
878
879 /* Yes, checking route validity in not connected
880 * case is not very simple. Take into account,
881 * that we do not support routing by source, TOS,
882 * and MSG_DONTROUTE --ANK (980726)
883 *
cf6b1982
YH
884 * 1. ip6_rt_check(): If route was host route,
885 * check that cached destination is current.
497c615a
HX
886 * If it is network route, we still may
887 * check its validity using saved pointer
888 * to the last used address: daddr_cache.
889 * We do not want to save whole address now,
890 * (because main consumer of this service
891 * is tcp, which has not this problem),
892 * so that the last trick works only on connected
893 * sockets.
894 * 2. oif also should be the same.
895 */
cf6b1982 896 if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
8e1ef0a9
YH
897#ifdef CONFIG_IPV6_SUBTREES
898 ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
899#endif
cf6b1982 900 (fl->oif && fl->oif != dst->dev->ifindex)) {
497c615a
HX
901 dst_release(dst);
902 dst = NULL;
1da177e4
LT
903 }
904
497c615a
HX
905out:
906 return dst;
907}
908
909static int ip6_dst_lookup_tail(struct sock *sk,
910 struct dst_entry **dst, struct flowi *fl)
911{
912 int err;
3b1e0a65 913 struct net *net = sock_net(sk);
497c615a 914
1da177e4 915 if (*dst == NULL)
8a3edd80 916 *dst = ip6_route_output(net, sk, fl);
1da177e4
LT
917
918 if ((err = (*dst)->error))
919 goto out_err_release;
920
921 if (ipv6_addr_any(&fl->fl6_src)) {
5e5f3f0f 922 err = ipv6_dev_get_saddr(ip6_dst_idev(*dst)->dev,
7cbca67c
YH
923 &fl->fl6_dst,
924 sk ? inet6_sk(sk)->srcprefs : 0,
925 &fl->fl6_src);
44456d37 926 if (err)
1da177e4 927 goto out_err_release;
1da177e4
LT
928 }
929
95c385b4
NH
930#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
931 /*
932 * Here if the dst entry we've looked up
933 * has a neighbour entry that is in the INCOMPLETE
934 * state and the src address from the flow is
935 * marked as OPTIMISTIC, we release the found
936 * dst entry and replace it instead with the
937 * dst entry of the nexthop router
938 */
939 if (!((*dst)->neighbour->nud_state & NUD_VALID)) {
940 struct inet6_ifaddr *ifp;
941 struct flowi fl_gw;
942 int redirect;
943
8a3edd80 944 ifp = ipv6_get_ifaddr(net, &fl->fl6_src,
1cab3da6 945 (*dst)->dev, 1);
95c385b4
NH
946
947 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
948 if (ifp)
949 in6_ifa_put(ifp);
950
951 if (redirect) {
952 /*
953 * We need to get the dst entry for the
954 * default router instead
955 */
956 dst_release(*dst);
957 memcpy(&fl_gw, fl, sizeof(struct flowi));
958 memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
8a3edd80 959 *dst = ip6_route_output(net, sk, &fl_gw);
95c385b4
NH
960 if ((err = (*dst)->error))
961 goto out_err_release;
962 }
963 }
964#endif
965
1da177e4
LT
966 return 0;
967
968out_err_release:
ca46f9c8
MC
969 if (err == -ENETUNREACH)
970 IP6_INC_STATS_BH(NULL, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
971 dst_release(*dst);
972 *dst = NULL;
973 return err;
974}
34a0b3cd 975
497c615a
HX
976/**
977 * ip6_dst_lookup - perform route lookup on flow
978 * @sk: socket which provides route info
979 * @dst: pointer to dst_entry * for result
980 * @fl: flow to lookup
981 *
982 * This function performs a route lookup on the given flow.
983 *
984 * It returns zero on success, or a standard errno code on error.
985 */
986int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
987{
988 *dst = NULL;
989 return ip6_dst_lookup_tail(sk, dst, fl);
990}
3cf3dc6c
ACM
991EXPORT_SYMBOL_GPL(ip6_dst_lookup);
992
497c615a
HX
993/**
994 * ip6_sk_dst_lookup - perform socket cached route lookup on flow
995 * @sk: socket which provides the dst cache and route info
996 * @dst: pointer to dst_entry * for result
997 * @fl: flow to lookup
998 *
999 * This function performs a route lookup on the given flow with the
1000 * possibility of using the cached route in the socket if it is valid.
1001 * It will take the socket dst lock when operating on the dst cache.
1002 * As a result, this function can only be used in process context.
1003 *
1004 * It returns zero on success, or a standard errno code on error.
1005 */
1006int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1007{
1008 *dst = NULL;
1009 if (sk) {
1010 *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1011 *dst = ip6_sk_dst_check(sk, *dst, fl);
1012 }
1013
1014 return ip6_dst_lookup_tail(sk, dst, fl);
1015}
1016EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);
1017
34a0b3cd 1018static inline int ip6_ufo_append_data(struct sock *sk,
e89e9cf5
AR
1019 int getfrag(void *from, char *to, int offset, int len,
1020 int odd, struct sk_buff *skb),
1021 void *from, int length, int hh_len, int fragheaderlen,
1022 int transhdrlen, int mtu,unsigned int flags)
1023
1024{
1025 struct sk_buff *skb;
1026 int err;
1027
1028 /* There is support for UDP large send offload by network
1029 * device, so create one single skb packet containing complete
1030 * udp datagram
1031 */
1032 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1033 skb = sock_alloc_send_skb(sk,
1034 hh_len + fragheaderlen + transhdrlen + 20,
1035 (flags & MSG_DONTWAIT), &err);
1036 if (skb == NULL)
1037 return -ENOMEM;
1038
1039 /* reserve space for Hardware header */
1040 skb_reserve(skb, hh_len);
1041
1042 /* create space for UDP/IP header */
1043 skb_put(skb,fragheaderlen + transhdrlen);
1044
1045 /* initialize network header pointer */
c1d2bbe1 1046 skb_reset_network_header(skb);
e89e9cf5
AR
1047
1048 /* initialize protocol header pointer */
b0e380b1 1049 skb->transport_header = skb->network_header + fragheaderlen;
e89e9cf5 1050
84fa7933 1051 skb->ip_summed = CHECKSUM_PARTIAL;
e89e9cf5
AR
1052 skb->csum = 0;
1053 sk->sk_sndmsg_off = 0;
1054 }
1055
1056 err = skb_append_datato_frags(sk,skb, getfrag, from,
1057 (length - transhdrlen));
1058 if (!err) {
1059 struct frag_hdr fhdr;
1060
1061 /* specify the length of each IP datagram fragment*/
1ab1457c 1062 skb_shinfo(skb)->gso_size = mtu - fragheaderlen -
7967168c 1063 sizeof(struct frag_hdr);
f83ef8c0 1064 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
e89e9cf5
AR
1065 ipv6_select_ident(skb, &fhdr);
1066 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1067 __skb_queue_tail(&sk->sk_write_queue, skb);
1068
1069 return 0;
1070 }
1071 /* There is not enough support do UPD LSO,
1072 * so follow normal path
1073 */
1074 kfree_skb(skb);
1075
1076 return err;
1077}
1da177e4 1078
41a1f8ea
YH
1079int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1080 int offset, int len, int odd, struct sk_buff *skb),
1081 void *from, int length, int transhdrlen,
1082 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
1083 struct rt6_info *rt, unsigned int flags)
1da177e4
LT
1084{
1085 struct inet_sock *inet = inet_sk(sk);
1086 struct ipv6_pinfo *np = inet6_sk(sk);
1087 struct sk_buff *skb;
1088 unsigned int maxfraglen, fragheaderlen;
1089 int exthdrlen;
1090 int hh_len;
1091 int mtu;
1092 int copy;
1093 int err;
1094 int offset = 0;
1095 int csummode = CHECKSUM_NONE;
1096
1097 if (flags&MSG_PROBE)
1098 return 0;
1099 if (skb_queue_empty(&sk->sk_write_queue)) {
1100 /*
1101 * setup for corking
1102 */
1103 if (opt) {
1104 if (np->cork.opt == NULL) {
1105 np->cork.opt = kmalloc(opt->tot_len,
1106 sk->sk_allocation);
1107 if (unlikely(np->cork.opt == NULL))
1108 return -ENOBUFS;
1109 } else if (np->cork.opt->tot_len < opt->tot_len) {
1110 printk(KERN_DEBUG "ip6_append_data: invalid option length\n");
1111 return -EINVAL;
1112 }
1113 memcpy(np->cork.opt, opt, opt->tot_len);
1114 inet->cork.flags |= IPCORK_OPT;
1115 /* need source address above miyazawa*/
1116 }
1117 dst_hold(&rt->u.dst);
c8cdaf99 1118 inet->cork.dst = &rt->u.dst;
1da177e4
LT
1119 inet->cork.fl = *fl;
1120 np->cork.hop_limit = hlimit;
41a1f8ea 1121 np->cork.tclass = tclass;
628a5c56
JH
1122 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1123 rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
c7503609 1124 if (np->frag_size < mtu) {
d91675f9
YH
1125 if (np->frag_size)
1126 mtu = np->frag_size;
1127 }
1128 inet->cork.fragsize = mtu;
1da177e4
LT
1129 if (dst_allfrag(rt->u.dst.path))
1130 inet->cork.flags |= IPCORK_ALLFRAG;
1131 inet->cork.length = 0;
1132 sk->sk_sndmsg_page = NULL;
1133 sk->sk_sndmsg_off = 0;
01488942 1134 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) -
a1b05140 1135 rt->rt6i_nfheader_len;
1da177e4
LT
1136 length += exthdrlen;
1137 transhdrlen += exthdrlen;
1138 } else {
c8cdaf99 1139 rt = (struct rt6_info *)inet->cork.dst;
1da177e4
LT
1140 fl = &inet->cork.fl;
1141 if (inet->cork.flags & IPCORK_OPT)
1142 opt = np->cork.opt;
1143 transhdrlen = 0;
1144 exthdrlen = 0;
1145 mtu = inet->cork.fragsize;
1146 }
1147
1148 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1149
a1b05140 1150 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
b4ce9277 1151 (opt ? opt->opt_nflen : 0);
1da177e4
LT
1152 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1153
1154 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1155 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1156 ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
1157 return -EMSGSIZE;
1158 }
1159 }
1160
1161 /*
1162 * Let's try using as much space as possible.
1163 * Use MTU if total length of the message fits into the MTU.
1164 * Otherwise, we need to reserve fragment header and
1165 * fragment alignment (= 8-15 octects, in total).
1166 *
1167 * Note that we may need to "move" the data from the tail of
1ab1457c 1168 * of the buffer to the new fragment when we split
1da177e4
LT
1169 * the message.
1170 *
1ab1457c 1171 * FIXME: It may be fragmented into multiple chunks
1da177e4
LT
1172 * at once if non-fragmentable extension headers
1173 * are too large.
1ab1457c 1174 * --yoshfuji
1da177e4
LT
1175 */
1176
1177 inet->cork.length += length;
e89e9cf5
AR
1178 if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
1179 (rt->u.dst.dev->features & NETIF_F_UFO)) {
1180
baa829d8
PM
1181 err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len,
1182 fragheaderlen, transhdrlen, mtu,
1183 flags);
1184 if (err)
e89e9cf5 1185 goto error;
e89e9cf5
AR
1186 return 0;
1187 }
1da177e4
LT
1188
1189 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1190 goto alloc_new_skb;
1191
1192 while (length > 0) {
1193 /* Check if the remaining data fits into current packet. */
1194 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1195 if (copy < length)
1196 copy = maxfraglen - skb->len;
1197
1198 if (copy <= 0) {
1199 char *data;
1200 unsigned int datalen;
1201 unsigned int fraglen;
1202 unsigned int fraggap;
1203 unsigned int alloclen;
1204 struct sk_buff *skb_prev;
1205alloc_new_skb:
1206 skb_prev = skb;
1207
1208 /* There's no room in the current skb */
1209 if (skb_prev)
1210 fraggap = skb_prev->len - maxfraglen;
1211 else
1212 fraggap = 0;
1213
1214 /*
1215 * If remaining data exceeds the mtu,
1216 * we know we need more fragment(s).
1217 */
1218 datalen = length + fraggap;
1219 if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1220 datalen = maxfraglen - fragheaderlen;
1221
1222 fraglen = datalen + fragheaderlen;
1223 if ((flags & MSG_MORE) &&
1224 !(rt->u.dst.dev->features&NETIF_F_SG))
1225 alloclen = mtu;
1226 else
1227 alloclen = datalen + fragheaderlen;
1228
1229 /*
1230 * The last fragment gets additional space at tail.
1231 * Note: we overallocate on fragments with MSG_MODE
1232 * because we have no idea if we're the last one.
1233 */
1234 if (datalen == length + fraggap)
1235 alloclen += rt->u.dst.trailer_len;
1236
1237 /*
1238 * We just reserve space for fragment header.
1ab1457c 1239 * Note: this may be overallocation if the message
1da177e4
LT
1240 * (without MSG_MORE) fits into the MTU.
1241 */
1242 alloclen += sizeof(struct frag_hdr);
1243
1244 if (transhdrlen) {
1245 skb = sock_alloc_send_skb(sk,
1246 alloclen + hh_len,
1247 (flags & MSG_DONTWAIT), &err);
1248 } else {
1249 skb = NULL;
1250 if (atomic_read(&sk->sk_wmem_alloc) <=
1251 2 * sk->sk_sndbuf)
1252 skb = sock_wmalloc(sk,
1253 alloclen + hh_len, 1,
1254 sk->sk_allocation);
1255 if (unlikely(skb == NULL))
1256 err = -ENOBUFS;
1257 }
1258 if (skb == NULL)
1259 goto error;
1260 /*
1261 * Fill in the control structures
1262 */
1263 skb->ip_summed = csummode;
1264 skb->csum = 0;
1265 /* reserve for fragmentation */
1266 skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1267
1268 /*
1269 * Find where to start putting bytes
1270 */
1271 data = skb_put(skb, fraglen);
c14d2450 1272 skb_set_network_header(skb, exthdrlen);
1da177e4 1273 data += fragheaderlen;
b0e380b1
ACM
1274 skb->transport_header = (skb->network_header +
1275 fragheaderlen);
1da177e4
LT
1276 if (fraggap) {
1277 skb->csum = skb_copy_and_csum_bits(
1278 skb_prev, maxfraglen,
1279 data + transhdrlen, fraggap, 0);
1280 skb_prev->csum = csum_sub(skb_prev->csum,
1281 skb->csum);
1282 data += fraggap;
e9fa4f7b 1283 pskb_trim_unique(skb_prev, maxfraglen);
1da177e4
LT
1284 }
1285 copy = datalen - transhdrlen - fraggap;
1286 if (copy < 0) {
1287 err = -EINVAL;
1288 kfree_skb(skb);
1289 goto error;
1290 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1291 err = -EFAULT;
1292 kfree_skb(skb);
1293 goto error;
1294 }
1295
1296 offset += copy;
1297 length -= datalen - fraggap;
1298 transhdrlen = 0;
1299 exthdrlen = 0;
1300 csummode = CHECKSUM_NONE;
1301
1302 /*
1303 * Put the packet on the pending queue
1304 */
1305 __skb_queue_tail(&sk->sk_write_queue, skb);
1306 continue;
1307 }
1308
1309 if (copy > length)
1310 copy = length;
1311
1312 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1313 unsigned int off;
1314
1315 off = skb->len;
1316 if (getfrag(from, skb_put(skb, copy),
1317 offset, copy, off, skb) < 0) {
1318 __skb_trim(skb, off);
1319 err = -EFAULT;
1320 goto error;
1321 }
1322 } else {
1323 int i = skb_shinfo(skb)->nr_frags;
1324 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1325 struct page *page = sk->sk_sndmsg_page;
1326 int off = sk->sk_sndmsg_off;
1327 unsigned int left;
1328
1329 if (page && (left = PAGE_SIZE - off) > 0) {
1330 if (copy >= left)
1331 copy = left;
1332 if (page != frag->page) {
1333 if (i == MAX_SKB_FRAGS) {
1334 err = -EMSGSIZE;
1335 goto error;
1336 }
1337 get_page(page);
1338 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1339 frag = &skb_shinfo(skb)->frags[i];
1340 }
1341 } else if(i < MAX_SKB_FRAGS) {
1342 if (copy > PAGE_SIZE)
1343 copy = PAGE_SIZE;
1344 page = alloc_pages(sk->sk_allocation, 0);
1345 if (page == NULL) {
1346 err = -ENOMEM;
1347 goto error;
1348 }
1349 sk->sk_sndmsg_page = page;
1350 sk->sk_sndmsg_off = 0;
1351
1352 skb_fill_page_desc(skb, i, page, 0, 0);
1353 frag = &skb_shinfo(skb)->frags[i];
1da177e4
LT
1354 } else {
1355 err = -EMSGSIZE;
1356 goto error;
1357 }
1358 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1359 err = -EFAULT;
1360 goto error;
1361 }
1362 sk->sk_sndmsg_off += copy;
1363 frag->size += copy;
1364 skb->len += copy;
1365 skb->data_len += copy;
f945fa7a
HX
1366 skb->truesize += copy;
1367 atomic_add(copy, &sk->sk_wmem_alloc);
1da177e4
LT
1368 }
1369 offset += copy;
1370 length -= copy;
1371 }
1372 return 0;
1373error:
1374 inet->cork.length -= length;
a11d206d 1375 IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1376 return err;
1377}
1378
bf138862
PE
1379static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1380{
1381 inet->cork.flags &= ~IPCORK_OPT;
1382 kfree(np->cork.opt);
1383 np->cork.opt = NULL;
c8cdaf99
YH
1384 if (inet->cork.dst) {
1385 dst_release(inet->cork.dst);
1386 inet->cork.dst = NULL;
bf138862
PE
1387 inet->cork.flags &= ~IPCORK_ALLFRAG;
1388 }
1389 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1390}
1391
1da177e4
LT
1392int ip6_push_pending_frames(struct sock *sk)
1393{
1394 struct sk_buff *skb, *tmp_skb;
1395 struct sk_buff **tail_skb;
1396 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1397 struct inet_sock *inet = inet_sk(sk);
1398 struct ipv6_pinfo *np = inet6_sk(sk);
1399 struct ipv6hdr *hdr;
1400 struct ipv6_txoptions *opt = np->cork.opt;
c8cdaf99 1401 struct rt6_info *rt = (struct rt6_info *)inet->cork.dst;
1da177e4
LT
1402 struct flowi *fl = &inet->cork.fl;
1403 unsigned char proto = fl->proto;
1404 int err = 0;
1405
1406 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1407 goto out;
1408 tail_skb = &(skb_shinfo(skb)->frag_list);
1409
1410 /* move skb->data to ip header from ext header */
d56f90a7 1411 if (skb->data < skb_network_header(skb))
bbe735e4 1412 __skb_pull(skb, skb_network_offset(skb));
1da177e4 1413 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
cfe1fc77 1414 __skb_pull(tmp_skb, skb_network_header_len(skb));
1da177e4
LT
1415 *tail_skb = tmp_skb;
1416 tail_skb = &(tmp_skb->next);
1417 skb->len += tmp_skb->len;
1418 skb->data_len += tmp_skb->len;
1da177e4
LT
1419 skb->truesize += tmp_skb->truesize;
1420 __sock_put(tmp_skb->sk);
1421 tmp_skb->destructor = NULL;
1422 tmp_skb->sk = NULL;
1da177e4
LT
1423 }
1424
28a89453 1425 /* Allow local fragmentation. */
b5c15fc0 1426 if (np->pmtudisc < IPV6_PMTUDISC_DO)
28a89453
HX
1427 skb->local_df = 1;
1428
1da177e4 1429 ipv6_addr_copy(final_dst, &fl->fl6_dst);
cfe1fc77 1430 __skb_pull(skb, skb_network_header_len(skb));
1da177e4
LT
1431 if (opt && opt->opt_flen)
1432 ipv6_push_frag_opts(skb, opt, &proto);
1433 if (opt && opt->opt_nflen)
1434 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1435
e2d1bca7
ACM
1436 skb_push(skb, sizeof(struct ipv6hdr));
1437 skb_reset_network_header(skb);
0660e03f 1438 hdr = ipv6_hdr(skb);
1ab1457c 1439
90bcaf7b 1440 *(__be32*)hdr = fl->fl6_flowlabel |
41a1f8ea 1441 htonl(0x60000000 | ((int)np->cork.tclass << 20));
1da177e4 1442
1da177e4
LT
1443 hdr->hop_limit = np->cork.hop_limit;
1444 hdr->nexthdr = proto;
1445 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1446 ipv6_addr_copy(&hdr->daddr, final_dst);
1447
a2c2064f 1448 skb->priority = sk->sk_priority;
4a19ec58 1449 skb->mark = sk->sk_mark;
a2c2064f 1450
1da177e4 1451 skb->dst = dst_clone(&rt->u.dst);
a11d206d 1452 IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
14878f75
DS
1453 if (proto == IPPROTO_ICMPV6) {
1454 struct inet6_dev *idev = ip6_dst_idev(skb->dst);
1455
1456 ICMP6MSGOUT_INC_STATS_BH(idev, icmp6_hdr(skb)->icmp6_type);
1457 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS);
1458 }
1459
ef76bc23 1460 err = ip6_local_out(skb);
1da177e4
LT
1461 if (err) {
1462 if (err > 0)
3320da89 1463 err = np->recverr ? net_xmit_errno(err) : 0;
1da177e4
LT
1464 if (err)
1465 goto error;
1466 }
1467
1468out:
bf138862 1469 ip6_cork_release(inet, np);
1da177e4
LT
1470 return err;
1471error:
1472 goto out;
1473}
1474
1475void ip6_flush_pending_frames(struct sock *sk)
1476{
1da177e4
LT
1477 struct sk_buff *skb;
1478
1479 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
e1f52208
YH
1480 if (skb->dst)
1481 IP6_INC_STATS(ip6_dst_idev(skb->dst),
1482 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1483 kfree_skb(skb);
1484 }
1485
bf138862 1486 ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1da177e4 1487}