]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blame - net/ipv6/ip6_output.c
[IPSEC]: Add async resume support on input
[mirror_ubuntu-zesty-kernel.git] / net / ipv6 / ip6_output.c
CommitLineData
1da177e4
LT
1/*
2 * IPv6 output functions
1ab1457c 3 * Linux INET6 implementation
1da177e4
LT
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4
LT
7 *
8 * $Id: ip6_output.c,v 1.34 2002/02/01 22:01:04 davem Exp $
9 *
10 * Based on linux/net/ipv4/ip_output.c
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 * Changes:
18 * A.N.Kuznetsov : airthmetics in fragmentation.
19 * extension headers are implemented.
20 * route changes now work.
21 * ip6_forward does not confuse sniffers.
22 * etc.
23 *
24 * H. von Brand : Added missing #include <linux/string.h>
25 * Imran Patel : frag id should be in NBO
26 * Kazunori MIYAZAWA @USAGI
27 * : add ip6_append_data and related functions
28 * for datagram xmit
29 */
30
1da177e4 31#include <linux/errno.h>
ef76bc23 32#include <linux/kernel.h>
1da177e4
LT
33#include <linux/string.h>
34#include <linux/socket.h>
35#include <linux/net.h>
36#include <linux/netdevice.h>
37#include <linux/if_arp.h>
38#include <linux/in6.h>
39#include <linux/tcp.h>
40#include <linux/route.h>
b59f45d0 41#include <linux/module.h>
1da177e4
LT
42
43#include <linux/netfilter.h>
44#include <linux/netfilter_ipv6.h>
45
46#include <net/sock.h>
47#include <net/snmp.h>
48
49#include <net/ipv6.h>
50#include <net/ndisc.h>
51#include <net/protocol.h>
52#include <net/ip6_route.h>
53#include <net/addrconf.h>
54#include <net/rawv6.h>
55#include <net/icmp.h>
56#include <net/xfrm.h>
57#include <net/checksum.h>
58
59static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
60
61static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
62{
63 static u32 ipv6_fragmentation_id = 1;
64 static DEFINE_SPINLOCK(ip6_id_lock);
65
66 spin_lock_bh(&ip6_id_lock);
67 fhdr->identification = htonl(ipv6_fragmentation_id);
68 if (++ipv6_fragmentation_id == 0)
69 ipv6_fragmentation_id = 1;
70 spin_unlock_bh(&ip6_id_lock);
71}
72
ef76bc23
HX
73int __ip6_local_out(struct sk_buff *skb)
74{
75 int len;
76
77 len = skb->len - sizeof(struct ipv6hdr);
78 if (len > IPV6_MAXPLEN)
79 len = 0;
80 ipv6_hdr(skb)->payload_len = htons(len);
81
82 return nf_hook(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev,
83 dst_output);
84}
85
86int ip6_local_out(struct sk_buff *skb)
87{
88 int err;
89
90 err = __ip6_local_out(skb);
91 if (likely(err == 1))
92 err = dst_output(skb);
93
94 return err;
95}
96EXPORT_SYMBOL_GPL(ip6_local_out);
97
ad643a79 98static int ip6_output_finish(struct sk_buff *skb)
1da177e4 99{
1da177e4 100 struct dst_entry *dst = skb->dst;
1da177e4 101
3644f0ce
SH
102 if (dst->hh)
103 return neigh_hh_output(dst->hh, skb);
104 else if (dst->neighbour)
1da177e4
LT
105 return dst->neighbour->output(skb);
106
a11d206d 107 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
108 kfree_skb(skb);
109 return -EINVAL;
110
111}
112
113/* dev_loopback_xmit for use with netfilter. */
114static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
115{
459a98ed 116 skb_reset_mac_header(newskb);
bbe735e4 117 __skb_pull(newskb, skb_network_offset(newskb));
1da177e4
LT
118 newskb->pkt_type = PACKET_LOOPBACK;
119 newskb->ip_summed = CHECKSUM_UNNECESSARY;
120 BUG_TRAP(newskb->dst);
121
122 netif_rx(newskb);
123 return 0;
124}
125
126
127static int ip6_output2(struct sk_buff *skb)
128{
129 struct dst_entry *dst = skb->dst;
130 struct net_device *dev = dst->dev;
131
132 skb->protocol = htons(ETH_P_IPV6);
133 skb->dev = dev;
134
0660e03f 135 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
1da177e4 136 struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
a11d206d 137 struct inet6_dev *idev = ip6_dst_idev(skb->dst);
1da177e4
LT
138
139 if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
0660e03f
ACM
140 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
141 &ipv6_hdr(skb)->saddr)) {
1da177e4
LT
142 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
143
144 /* Do not check for IFF_ALLMULTI; multicast routing
145 is not supported in any case.
146 */
147 if (newskb)
148 NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, newskb, NULL,
149 newskb->dev,
150 ip6_dev_loopback_xmit);
151
0660e03f 152 if (ipv6_hdr(skb)->hop_limit == 0) {
a11d206d 153 IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
154 kfree_skb(skb);
155 return 0;
156 }
157 }
158
a11d206d 159 IP6_INC_STATS(idev, IPSTATS_MIB_OUTMCASTPKTS);
1da177e4
LT
160 }
161
162 return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish);
163}
164
628a5c56
JH
165static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
166{
167 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
168
169 return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
170 skb->dst->dev->mtu : dst_mtu(skb->dst);
171}
172
1da177e4
LT
173int ip6_output(struct sk_buff *skb)
174{
628a5c56 175 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
e89e9cf5 176 dst_allfrag(skb->dst))
1da177e4
LT
177 return ip6_fragment(skb, ip6_output2);
178 else
179 return ip6_output2(skb);
180}
181
1da177e4
LT
182/*
183 * xmit an sk_buff (used by TCP)
184 */
185
186int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
187 struct ipv6_txoptions *opt, int ipfragok)
188{
b30bd282 189 struct ipv6_pinfo *np = inet6_sk(sk);
1da177e4
LT
190 struct in6_addr *first_hop = &fl->fl6_dst;
191 struct dst_entry *dst = skb->dst;
192 struct ipv6hdr *hdr;
193 u8 proto = fl->proto;
194 int seg_len = skb->len;
41a1f8ea 195 int hlimit, tclass;
1da177e4
LT
196 u32 mtu;
197
198 if (opt) {
c2636b4d 199 unsigned int head_room;
1da177e4
LT
200
201 /* First: exthdrs may take lots of space (~8K for now)
202 MAX_HEADER is not enough.
203 */
204 head_room = opt->opt_nflen + opt->opt_flen;
205 seg_len += head_room;
206 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
207
208 if (skb_headroom(skb) < head_room) {
209 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
a11d206d
YH
210 if (skb2 == NULL) {
211 IP6_INC_STATS(ip6_dst_idev(skb->dst),
212 IPSTATS_MIB_OUTDISCARDS);
213 kfree_skb(skb);
1da177e4
LT
214 return -ENOBUFS;
215 }
a11d206d
YH
216 kfree_skb(skb);
217 skb = skb2;
1da177e4
LT
218 if (sk)
219 skb_set_owner_w(skb, sk);
220 }
221 if (opt->opt_flen)
222 ipv6_push_frag_opts(skb, opt, &proto);
223 if (opt->opt_nflen)
224 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
225 }
226
e2d1bca7
ACM
227 skb_push(skb, sizeof(struct ipv6hdr));
228 skb_reset_network_header(skb);
0660e03f 229 hdr = ipv6_hdr(skb);
1da177e4
LT
230
231 /*
232 * Fill in the IPv6 header
233 */
234
1da177e4
LT
235 hlimit = -1;
236 if (np)
237 hlimit = np->hop_limit;
238 if (hlimit < 0)
239 hlimit = dst_metric(dst, RTAX_HOPLIMIT);
240 if (hlimit < 0)
241 hlimit = ipv6_get_hoplimit(dst->dev);
242
41a1f8ea
YH
243 tclass = -1;
244 if (np)
245 tclass = np->tclass;
246 if (tclass < 0)
247 tclass = 0;
248
90bcaf7b 249 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
41a1f8ea 250
1da177e4
LT
251 hdr->payload_len = htons(seg_len);
252 hdr->nexthdr = proto;
253 hdr->hop_limit = hlimit;
254
255 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
256 ipv6_addr_copy(&hdr->daddr, first_hop);
257
a2c2064f
PM
258 skb->priority = sk->sk_priority;
259
1da177e4 260 mtu = dst_mtu(dst);
89114afd 261 if ((skb->len <= mtu) || ipfragok || skb_is_gso(skb)) {
a11d206d
YH
262 IP6_INC_STATS(ip6_dst_idev(skb->dst),
263 IPSTATS_MIB_OUTREQUESTS);
6869c4d8
HW
264 return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev,
265 dst_output);
1da177e4
LT
266 }
267
268 if (net_ratelimit())
269 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
270 skb->dev = dst->dev;
271 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
a11d206d 272 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
273 kfree_skb(skb);
274 return -EMSGSIZE;
275}
276
7159039a
YH
277EXPORT_SYMBOL(ip6_xmit);
278
1da177e4
LT
279/*
280 * To avoid extra problems ND packets are send through this
281 * routine. It's code duplication but I really want to avoid
282 * extra checks since ipv6_build_header is used by TCP (which
283 * is for us performance critical)
284 */
285
286int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
287 struct in6_addr *saddr, struct in6_addr *daddr,
288 int proto, int len)
289{
290 struct ipv6_pinfo *np = inet6_sk(sk);
291 struct ipv6hdr *hdr;
292 int totlen;
293
294 skb->protocol = htons(ETH_P_IPV6);
295 skb->dev = dev;
296
297 totlen = len + sizeof(struct ipv6hdr);
298
55f79cc0
ACM
299 skb_reset_network_header(skb);
300 skb_put(skb, sizeof(struct ipv6hdr));
0660e03f 301 hdr = ipv6_hdr(skb);
1da177e4 302
ae08e1f0 303 *(__be32*)hdr = htonl(0x60000000);
1da177e4
LT
304
305 hdr->payload_len = htons(len);
306 hdr->nexthdr = proto;
307 hdr->hop_limit = np->hop_limit;
308
309 ipv6_addr_copy(&hdr->saddr, saddr);
310 ipv6_addr_copy(&hdr->daddr, daddr);
311
312 return 0;
313}
314
315static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
316{
317 struct ip6_ra_chain *ra;
318 struct sock *last = NULL;
319
320 read_lock(&ip6_ra_lock);
321 for (ra = ip6_ra_chain; ra; ra = ra->next) {
322 struct sock *sk = ra->sk;
0bd1b59b
AM
323 if (sk && ra->sel == sel &&
324 (!sk->sk_bound_dev_if ||
325 sk->sk_bound_dev_if == skb->dev->ifindex)) {
1da177e4
LT
326 if (last) {
327 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
328 if (skb2)
329 rawv6_rcv(last, skb2);
330 }
331 last = sk;
332 }
333 }
334
335 if (last) {
336 rawv6_rcv(last, skb);
337 read_unlock(&ip6_ra_lock);
338 return 1;
339 }
340 read_unlock(&ip6_ra_lock);
341 return 0;
342}
343
e21e0b5f
VN
344static int ip6_forward_proxy_check(struct sk_buff *skb)
345{
0660e03f 346 struct ipv6hdr *hdr = ipv6_hdr(skb);
e21e0b5f
VN
347 u8 nexthdr = hdr->nexthdr;
348 int offset;
349
350 if (ipv6_ext_hdr(nexthdr)) {
351 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
352 if (offset < 0)
353 return 0;
354 } else
355 offset = sizeof(struct ipv6hdr);
356
357 if (nexthdr == IPPROTO_ICMPV6) {
358 struct icmp6hdr *icmp6;
359
d56f90a7
ACM
360 if (!pskb_may_pull(skb, (skb_network_header(skb) +
361 offset + 1 - skb->data)))
e21e0b5f
VN
362 return 0;
363
d56f90a7 364 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
e21e0b5f
VN
365
366 switch (icmp6->icmp6_type) {
367 case NDISC_ROUTER_SOLICITATION:
368 case NDISC_ROUTER_ADVERTISEMENT:
369 case NDISC_NEIGHBOUR_SOLICITATION:
370 case NDISC_NEIGHBOUR_ADVERTISEMENT:
371 case NDISC_REDIRECT:
372 /* For reaction involving unicast neighbor discovery
373 * message destined to the proxied address, pass it to
374 * input function.
375 */
376 return 1;
377 default:
378 break;
379 }
380 }
381
74553b09
VN
382 /*
383 * The proxying router can't forward traffic sent to a link-local
384 * address, so signal the sender and discard the packet. This
385 * behavior is clarified by the MIPv6 specification.
386 */
387 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
388 dst_link_failure(skb);
389 return -1;
390 }
391
e21e0b5f
VN
392 return 0;
393}
394
1da177e4
LT
395static inline int ip6_forward_finish(struct sk_buff *skb)
396{
397 return dst_output(skb);
398}
399
400int ip6_forward(struct sk_buff *skb)
401{
402 struct dst_entry *dst = skb->dst;
0660e03f 403 struct ipv6hdr *hdr = ipv6_hdr(skb);
1da177e4 404 struct inet6_skb_parm *opt = IP6CB(skb);
1ab1457c 405
1da177e4
LT
406 if (ipv6_devconf.forwarding == 0)
407 goto error;
408
409 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
a11d206d 410 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
1da177e4
LT
411 goto drop;
412 }
413
35fc92a9 414 skb_forward_csum(skb);
1da177e4
LT
415
416 /*
417 * We DO NOT make any processing on
418 * RA packets, pushing them to user level AS IS
419 * without ane WARRANTY that application will be able
420 * to interpret them. The reason is that we
421 * cannot make anything clever here.
422 *
423 * We are not end-node, so that if packet contains
424 * AH/ESP, we cannot make anything.
425 * Defragmentation also would be mistake, RA packets
426 * cannot be fragmented, because there is no warranty
427 * that different fragments will go along one path. --ANK
428 */
429 if (opt->ra) {
d56f90a7 430 u8 *ptr = skb_network_header(skb) + opt->ra;
1da177e4
LT
431 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
432 return 0;
433 }
434
435 /*
436 * check and decrement ttl
437 */
438 if (hdr->hop_limit <= 1) {
439 /* Force OUTPUT device used as source address */
440 skb->dev = dst->dev;
441 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
442 0, skb->dev);
a11d206d 443 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
1da177e4
LT
444
445 kfree_skb(skb);
446 return -ETIMEDOUT;
447 }
448
fbea49e1
YH
449 /* XXX: idev->cnf.proxy_ndp? */
450 if (ipv6_devconf.proxy_ndp &&
451 pneigh_lookup(&nd_tbl, &hdr->daddr, skb->dev, 0)) {
74553b09
VN
452 int proxied = ip6_forward_proxy_check(skb);
453 if (proxied > 0)
e21e0b5f 454 return ip6_input(skb);
74553b09 455 else if (proxied < 0) {
a11d206d 456 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
74553b09
VN
457 goto drop;
458 }
e21e0b5f
VN
459 }
460
1da177e4 461 if (!xfrm6_route_forward(skb)) {
a11d206d 462 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
1da177e4
LT
463 goto drop;
464 }
465 dst = skb->dst;
466
467 /* IPv6 specs say nothing about it, but it is clear that we cannot
468 send redirects to source routed frames.
1e5dc146 469 We don't send redirects to frames decapsulated from IPsec.
1da177e4 470 */
1e5dc146
MN
471 if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
472 !skb->sp) {
1da177e4
LT
473 struct in6_addr *target = NULL;
474 struct rt6_info *rt;
475 struct neighbour *n = dst->neighbour;
476
477 /*
478 * incoming and outgoing devices are the same
479 * send a redirect.
480 */
481
482 rt = (struct rt6_info *) dst;
483 if ((rt->rt6i_flags & RTF_GATEWAY))
484 target = (struct in6_addr*)&n->primary_key;
485 else
486 target = &hdr->daddr;
487
488 /* Limit redirects both by destination (here)
489 and by source (inside ndisc_send_redirect)
490 */
491 if (xrlim_allow(dst, 1*HZ))
492 ndisc_send_redirect(skb, n, target);
5bb1ab09
DS
493 } else {
494 int addrtype = ipv6_addr_type(&hdr->saddr);
495
1da177e4 496 /* This check is security critical. */
5bb1ab09
DS
497 if (addrtype & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LOOPBACK))
498 goto error;
499 if (addrtype & IPV6_ADDR_LINKLOCAL) {
500 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
501 ICMPV6_NOT_NEIGHBOUR, 0, skb->dev);
502 goto error;
503 }
1da177e4
LT
504 }
505
506 if (skb->len > dst_mtu(dst)) {
507 /* Again, force OUTPUT device used as source address */
508 skb->dev = dst->dev;
509 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
a11d206d
YH
510 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
511 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
512 kfree_skb(skb);
513 return -EMSGSIZE;
514 }
515
516 if (skb_cow(skb, dst->dev->hard_header_len)) {
a11d206d 517 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
518 goto drop;
519 }
520
0660e03f 521 hdr = ipv6_hdr(skb);
1da177e4
LT
522
523 /* Mangling hops number delayed to point after skb COW */
1ab1457c 524
1da177e4
LT
525 hdr->hop_limit--;
526
a11d206d 527 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
1da177e4
LT
528 return NF_HOOK(PF_INET6,NF_IP6_FORWARD, skb, skb->dev, dst->dev, ip6_forward_finish);
529
530error:
a11d206d 531 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
1da177e4
LT
532drop:
533 kfree_skb(skb);
534 return -EINVAL;
535}
536
537static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
538{
539 to->pkt_type = from->pkt_type;
540 to->priority = from->priority;
541 to->protocol = from->protocol;
1da177e4
LT
542 dst_release(to->dst);
543 to->dst = dst_clone(from->dst);
544 to->dev = from->dev;
82e91ffe 545 to->mark = from->mark;
1da177e4
LT
546
547#ifdef CONFIG_NET_SCHED
548 to->tc_index = from->tc_index;
549#endif
e7ac05f3 550 nf_copy(to, from);
ba9dda3a
JK
551#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
552 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
553 to->nf_trace = from->nf_trace;
554#endif
984bc16c 555 skb_copy_secmark(to, from);
1da177e4
LT
556}
557
558int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
559{
560 u16 offset = sizeof(struct ipv6hdr);
0660e03f
ACM
561 struct ipv6_opt_hdr *exthdr =
562 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
27a884dc 563 unsigned int packet_len = skb->tail - skb->network_header;
1da177e4 564 int found_rhdr = 0;
0660e03f 565 *nexthdr = &ipv6_hdr(skb)->nexthdr;
1da177e4
LT
566
567 while (offset + 1 <= packet_len) {
568
569 switch (**nexthdr) {
570
571 case NEXTHDR_HOP:
27637df9 572 break;
1da177e4 573 case NEXTHDR_ROUTING:
27637df9
MN
574 found_rhdr = 1;
575 break;
1da177e4 576 case NEXTHDR_DEST:
59fbb3a6 577#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
27637df9
MN
578 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
579 break;
580#endif
581 if (found_rhdr)
582 return offset;
1da177e4
LT
583 break;
584 default :
585 return offset;
586 }
27637df9
MN
587
588 offset += ipv6_optlen(exthdr);
589 *nexthdr = &exthdr->nexthdr;
d56f90a7
ACM
590 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
591 offset);
1da177e4
LT
592 }
593
594 return offset;
595}
b59f45d0 596EXPORT_SYMBOL_GPL(ip6_find_1stfragopt);
1da177e4
LT
597
598static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
599{
600 struct net_device *dev;
601 struct sk_buff *frag;
602 struct rt6_info *rt = (struct rt6_info*)skb->dst;
d91675f9 603 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
1da177e4
LT
604 struct ipv6hdr *tmp_hdr;
605 struct frag_hdr *fh;
606 unsigned int mtu, hlen, left, len;
ae08e1f0 607 __be32 frag_id = 0;
1da177e4
LT
608 int ptr, offset = 0, err=0;
609 u8 *prevhdr, nexthdr = 0;
610
611 dev = rt->u.dst.dev;
612 hlen = ip6_find_1stfragopt(skb, &prevhdr);
613 nexthdr = *prevhdr;
614
628a5c56 615 mtu = ip6_skb_dst_mtu(skb);
b881ef76
JH
616
617 /* We must not fragment if the socket is set to force MTU discovery
618 * or if the skb it not generated by a local socket. (This last
619 * check should be redundant, but it's free.)
620 */
621 if (!np || np->pmtudisc >= IPV6_PMTUDISC_DO) {
622 skb->dev = skb->dst->dev;
623 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
624 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
625 kfree_skb(skb);
626 return -EMSGSIZE;
627 }
628
d91675f9
YH
629 if (np && np->frag_size < mtu) {
630 if (np->frag_size)
631 mtu = np->frag_size;
632 }
633 mtu -= hlen + sizeof(struct frag_hdr);
1da177e4
LT
634
635 if (skb_shinfo(skb)->frag_list) {
636 int first_len = skb_pagelen(skb);
637
638 if (first_len - hlen > mtu ||
639 ((first_len - hlen) & 7) ||
640 skb_cloned(skb))
641 goto slow_path;
642
643 for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
644 /* Correct geometry. */
645 if (frag->len > mtu ||
646 ((frag->len & 7) && frag->next) ||
647 skb_headroom(frag) < hlen)
648 goto slow_path;
649
1da177e4
LT
650 /* Partially cloned skb? */
651 if (skb_shared(frag))
652 goto slow_path;
2fdba6b0
HX
653
654 BUG_ON(frag->sk);
655 if (skb->sk) {
656 sock_hold(skb->sk);
657 frag->sk = skb->sk;
658 frag->destructor = sock_wfree;
659 skb->truesize -= frag->truesize;
660 }
1da177e4
LT
661 }
662
663 err = 0;
664 offset = 0;
665 frag = skb_shinfo(skb)->frag_list;
666 skb_shinfo(skb)->frag_list = NULL;
667 /* BUILD HEADER */
668
9a217a1c 669 *prevhdr = NEXTHDR_FRAGMENT;
d56f90a7 670 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
1da177e4 671 if (!tmp_hdr) {
a11d206d 672 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
673 return -ENOMEM;
674 }
675
1da177e4
LT
676 __skb_pull(skb, hlen);
677 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
e2d1bca7
ACM
678 __skb_push(skb, hlen);
679 skb_reset_network_header(skb);
d56f90a7 680 memcpy(skb_network_header(skb), tmp_hdr, hlen);
1da177e4
LT
681
682 ipv6_select_ident(skb, fh);
683 fh->nexthdr = nexthdr;
684 fh->reserved = 0;
685 fh->frag_off = htons(IP6_MF);
686 frag_id = fh->identification;
687
688 first_len = skb_pagelen(skb);
689 skb->data_len = first_len - skb_headlen(skb);
690 skb->len = first_len;
0660e03f
ACM
691 ipv6_hdr(skb)->payload_len = htons(first_len -
692 sizeof(struct ipv6hdr));
a11d206d
YH
693
694 dst_hold(&rt->u.dst);
1da177e4
LT
695
696 for (;;) {
697 /* Prepare header of the next frame,
698 * before previous one went down. */
699 if (frag) {
700 frag->ip_summed = CHECKSUM_NONE;
badff6d0 701 skb_reset_transport_header(frag);
1da177e4 702 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
e2d1bca7
ACM
703 __skb_push(frag, hlen);
704 skb_reset_network_header(frag);
d56f90a7
ACM
705 memcpy(skb_network_header(frag), tmp_hdr,
706 hlen);
1da177e4
LT
707 offset += skb->len - hlen - sizeof(struct frag_hdr);
708 fh->nexthdr = nexthdr;
709 fh->reserved = 0;
710 fh->frag_off = htons(offset);
711 if (frag->next != NULL)
712 fh->frag_off |= htons(IP6_MF);
713 fh->identification = frag_id;
0660e03f
ACM
714 ipv6_hdr(frag)->payload_len =
715 htons(frag->len -
716 sizeof(struct ipv6hdr));
1da177e4
LT
717 ip6_copy_metadata(frag, skb);
718 }
1ab1457c 719
1da177e4 720 err = output(skb);
dafee490 721 if(!err)
a11d206d 722 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGCREATES);
dafee490 723
1da177e4
LT
724 if (err || !frag)
725 break;
726
727 skb = frag;
728 frag = skb->next;
729 skb->next = NULL;
730 }
731
a51482bd 732 kfree(tmp_hdr);
1da177e4
LT
733
734 if (err == 0) {
a11d206d
YH
735 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGOKS);
736 dst_release(&rt->u.dst);
1da177e4
LT
737 return 0;
738 }
739
740 while (frag) {
741 skb = frag->next;
742 kfree_skb(frag);
743 frag = skb;
744 }
745
a11d206d
YH
746 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGFAILS);
747 dst_release(&rt->u.dst);
1da177e4
LT
748 return err;
749 }
750
751slow_path:
752 left = skb->len - hlen; /* Space per frame */
753 ptr = hlen; /* Where to start from */
754
755 /*
756 * Fragment the datagram.
757 */
758
759 *prevhdr = NEXTHDR_FRAGMENT;
760
761 /*
762 * Keep copying data until we run out.
763 */
764 while(left > 0) {
765 len = left;
766 /* IF: it doesn't fit, use 'mtu' - the data space left */
767 if (len > mtu)
768 len = mtu;
769 /* IF: we are not sending upto and including the packet end
770 then align the next start on an eight byte boundary */
771 if (len < left) {
772 len &= ~7;
773 }
774 /*
775 * Allocate buffer.
776 */
777
778 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
64ce2073 779 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
a11d206d
YH
780 IP6_INC_STATS(ip6_dst_idev(skb->dst),
781 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
782 err = -ENOMEM;
783 goto fail;
784 }
785
786 /*
787 * Set up data on packet
788 */
789
790 ip6_copy_metadata(frag, skb);
791 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
792 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
c1d2bbe1 793 skb_reset_network_header(frag);
badff6d0 794 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
b0e380b1
ACM
795 frag->transport_header = (frag->network_header + hlen +
796 sizeof(struct frag_hdr));
1da177e4
LT
797
798 /*
799 * Charge the memory for the fragment to any owner
800 * it might possess
801 */
802 if (skb->sk)
803 skb_set_owner_w(frag, skb->sk);
804
805 /*
806 * Copy the packet header into the new buffer.
807 */
d626f62b 808 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
1da177e4
LT
809
810 /*
811 * Build fragment header.
812 */
813 fh->nexthdr = nexthdr;
814 fh->reserved = 0;
f36d6ab1 815 if (!frag_id) {
1da177e4
LT
816 ipv6_select_ident(skb, fh);
817 frag_id = fh->identification;
818 } else
819 fh->identification = frag_id;
820
821 /*
822 * Copy a block of the IP datagram.
823 */
8984e41d 824 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
1da177e4
LT
825 BUG();
826 left -= len;
827
828 fh->frag_off = htons(offset);
829 if (left > 0)
830 fh->frag_off |= htons(IP6_MF);
0660e03f
ACM
831 ipv6_hdr(frag)->payload_len = htons(frag->len -
832 sizeof(struct ipv6hdr));
1da177e4
LT
833
834 ptr += len;
835 offset += len;
836
837 /*
838 * Put this fragment into the sending queue.
839 */
1da177e4
LT
840 err = output(frag);
841 if (err)
842 goto fail;
dafee490 843
a11d206d 844 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGCREATES);
1da177e4 845 }
a11d206d
YH
846 IP6_INC_STATS(ip6_dst_idev(skb->dst),
847 IPSTATS_MIB_FRAGOKS);
1da177e4 848 kfree_skb(skb);
1da177e4
LT
849 return err;
850
851fail:
a11d206d
YH
852 IP6_INC_STATS(ip6_dst_idev(skb->dst),
853 IPSTATS_MIB_FRAGFAILS);
1ab1457c 854 kfree_skb(skb);
1da177e4
LT
855 return err;
856}
857
cf6b1982
YH
858static inline int ip6_rt_check(struct rt6key *rt_key,
859 struct in6_addr *fl_addr,
860 struct in6_addr *addr_cache)
861{
862 return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
863 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)));
864}
865
497c615a
HX
866static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
867 struct dst_entry *dst,
868 struct flowi *fl)
1da177e4 869{
497c615a
HX
870 struct ipv6_pinfo *np = inet6_sk(sk);
871 struct rt6_info *rt = (struct rt6_info *)dst;
1da177e4 872
497c615a
HX
873 if (!dst)
874 goto out;
875
876 /* Yes, checking route validity in not connected
877 * case is not very simple. Take into account,
878 * that we do not support routing by source, TOS,
879 * and MSG_DONTROUTE --ANK (980726)
880 *
cf6b1982
YH
881 * 1. ip6_rt_check(): If route was host route,
882 * check that cached destination is current.
497c615a
HX
883 * If it is network route, we still may
884 * check its validity using saved pointer
885 * to the last used address: daddr_cache.
886 * We do not want to save whole address now,
887 * (because main consumer of this service
888 * is tcp, which has not this problem),
889 * so that the last trick works only on connected
890 * sockets.
891 * 2. oif also should be the same.
892 */
cf6b1982 893 if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
8e1ef0a9
YH
894#ifdef CONFIG_IPV6_SUBTREES
895 ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
896#endif
cf6b1982 897 (fl->oif && fl->oif != dst->dev->ifindex)) {
497c615a
HX
898 dst_release(dst);
899 dst = NULL;
1da177e4
LT
900 }
901
497c615a
HX
902out:
903 return dst;
904}
905
906static int ip6_dst_lookup_tail(struct sock *sk,
907 struct dst_entry **dst, struct flowi *fl)
908{
909 int err;
910
1da177e4
LT
911 if (*dst == NULL)
912 *dst = ip6_route_output(sk, fl);
913
914 if ((err = (*dst)->error))
915 goto out_err_release;
916
917 if (ipv6_addr_any(&fl->fl6_src)) {
918 err = ipv6_get_saddr(*dst, &fl->fl6_dst, &fl->fl6_src);
44456d37 919 if (err)
1da177e4 920 goto out_err_release;
1da177e4
LT
921 }
922
95c385b4
NH
923#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
924 /*
925 * Here if the dst entry we've looked up
926 * has a neighbour entry that is in the INCOMPLETE
927 * state and the src address from the flow is
928 * marked as OPTIMISTIC, we release the found
929 * dst entry and replace it instead with the
930 * dst entry of the nexthop router
931 */
932 if (!((*dst)->neighbour->nud_state & NUD_VALID)) {
933 struct inet6_ifaddr *ifp;
934 struct flowi fl_gw;
935 int redirect;
936
937 ifp = ipv6_get_ifaddr(&fl->fl6_src, (*dst)->dev, 1);
938
939 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
940 if (ifp)
941 in6_ifa_put(ifp);
942
943 if (redirect) {
944 /*
945 * We need to get the dst entry for the
946 * default router instead
947 */
948 dst_release(*dst);
949 memcpy(&fl_gw, fl, sizeof(struct flowi));
950 memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
951 *dst = ip6_route_output(sk, &fl_gw);
952 if ((err = (*dst)->error))
953 goto out_err_release;
954 }
955 }
956#endif
957
1da177e4
LT
958 return 0;
959
960out_err_release:
ca46f9c8
MC
961 if (err == -ENETUNREACH)
962 IP6_INC_STATS_BH(NULL, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
963 dst_release(*dst);
964 *dst = NULL;
965 return err;
966}
34a0b3cd 967
497c615a
HX
968/**
969 * ip6_dst_lookup - perform route lookup on flow
970 * @sk: socket which provides route info
971 * @dst: pointer to dst_entry * for result
972 * @fl: flow to lookup
973 *
974 * This function performs a route lookup on the given flow.
975 *
976 * It returns zero on success, or a standard errno code on error.
977 */
978int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
979{
980 *dst = NULL;
981 return ip6_dst_lookup_tail(sk, dst, fl);
982}
3cf3dc6c
ACM
983EXPORT_SYMBOL_GPL(ip6_dst_lookup);
984
497c615a
HX
985/**
986 * ip6_sk_dst_lookup - perform socket cached route lookup on flow
987 * @sk: socket which provides the dst cache and route info
988 * @dst: pointer to dst_entry * for result
989 * @fl: flow to lookup
990 *
991 * This function performs a route lookup on the given flow with the
992 * possibility of using the cached route in the socket if it is valid.
993 * It will take the socket dst lock when operating on the dst cache.
994 * As a result, this function can only be used in process context.
995 *
996 * It returns zero on success, or a standard errno code on error.
997 */
998int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
999{
1000 *dst = NULL;
1001 if (sk) {
1002 *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1003 *dst = ip6_sk_dst_check(sk, *dst, fl);
1004 }
1005
1006 return ip6_dst_lookup_tail(sk, dst, fl);
1007}
1008EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);
1009
34a0b3cd 1010static inline int ip6_ufo_append_data(struct sock *sk,
e89e9cf5
AR
1011 int getfrag(void *from, char *to, int offset, int len,
1012 int odd, struct sk_buff *skb),
1013 void *from, int length, int hh_len, int fragheaderlen,
1014 int transhdrlen, int mtu,unsigned int flags)
1015
1016{
1017 struct sk_buff *skb;
1018 int err;
1019
1020 /* There is support for UDP large send offload by network
1021 * device, so create one single skb packet containing complete
1022 * udp datagram
1023 */
1024 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1025 skb = sock_alloc_send_skb(sk,
1026 hh_len + fragheaderlen + transhdrlen + 20,
1027 (flags & MSG_DONTWAIT), &err);
1028 if (skb == NULL)
1029 return -ENOMEM;
1030
1031 /* reserve space for Hardware header */
1032 skb_reserve(skb, hh_len);
1033
1034 /* create space for UDP/IP header */
1035 skb_put(skb,fragheaderlen + transhdrlen);
1036
1037 /* initialize network header pointer */
c1d2bbe1 1038 skb_reset_network_header(skb);
e89e9cf5
AR
1039
1040 /* initialize protocol header pointer */
b0e380b1 1041 skb->transport_header = skb->network_header + fragheaderlen;
e89e9cf5 1042
84fa7933 1043 skb->ip_summed = CHECKSUM_PARTIAL;
e89e9cf5
AR
1044 skb->csum = 0;
1045 sk->sk_sndmsg_off = 0;
1046 }
1047
1048 err = skb_append_datato_frags(sk,skb, getfrag, from,
1049 (length - transhdrlen));
1050 if (!err) {
1051 struct frag_hdr fhdr;
1052
1053 /* specify the length of each IP datagram fragment*/
1ab1457c 1054 skb_shinfo(skb)->gso_size = mtu - fragheaderlen -
7967168c 1055 sizeof(struct frag_hdr);
f83ef8c0 1056 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
e89e9cf5
AR
1057 ipv6_select_ident(skb, &fhdr);
1058 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1059 __skb_queue_tail(&sk->sk_write_queue, skb);
1060
1061 return 0;
1062 }
1063 /* There is not enough support do UPD LSO,
1064 * so follow normal path
1065 */
1066 kfree_skb(skb);
1067
1068 return err;
1069}
1da177e4 1070
41a1f8ea
YH
1071int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1072 int offset, int len, int odd, struct sk_buff *skb),
1073 void *from, int length, int transhdrlen,
1074 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
1075 struct rt6_info *rt, unsigned int flags)
1da177e4
LT
1076{
1077 struct inet_sock *inet = inet_sk(sk);
1078 struct ipv6_pinfo *np = inet6_sk(sk);
1079 struct sk_buff *skb;
1080 unsigned int maxfraglen, fragheaderlen;
1081 int exthdrlen;
1082 int hh_len;
1083 int mtu;
1084 int copy;
1085 int err;
1086 int offset = 0;
1087 int csummode = CHECKSUM_NONE;
1088
1089 if (flags&MSG_PROBE)
1090 return 0;
1091 if (skb_queue_empty(&sk->sk_write_queue)) {
1092 /*
1093 * setup for corking
1094 */
1095 if (opt) {
1096 if (np->cork.opt == NULL) {
1097 np->cork.opt = kmalloc(opt->tot_len,
1098 sk->sk_allocation);
1099 if (unlikely(np->cork.opt == NULL))
1100 return -ENOBUFS;
1101 } else if (np->cork.opt->tot_len < opt->tot_len) {
1102 printk(KERN_DEBUG "ip6_append_data: invalid option length\n");
1103 return -EINVAL;
1104 }
1105 memcpy(np->cork.opt, opt, opt->tot_len);
1106 inet->cork.flags |= IPCORK_OPT;
1107 /* need source address above miyazawa*/
1108 }
1109 dst_hold(&rt->u.dst);
1110 np->cork.rt = rt;
1111 inet->cork.fl = *fl;
1112 np->cork.hop_limit = hlimit;
41a1f8ea 1113 np->cork.tclass = tclass;
628a5c56
JH
1114 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1115 rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
c7503609 1116 if (np->frag_size < mtu) {
d91675f9
YH
1117 if (np->frag_size)
1118 mtu = np->frag_size;
1119 }
1120 inet->cork.fragsize = mtu;
1da177e4
LT
1121 if (dst_allfrag(rt->u.dst.path))
1122 inet->cork.flags |= IPCORK_ALLFRAG;
1123 inet->cork.length = 0;
1124 sk->sk_sndmsg_page = NULL;
1125 sk->sk_sndmsg_off = 0;
01488942 1126 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) -
b4ce9277 1127 rt->nfheader_len;
1da177e4
LT
1128 length += exthdrlen;
1129 transhdrlen += exthdrlen;
1130 } else {
1131 rt = np->cork.rt;
1132 fl = &inet->cork.fl;
1133 if (inet->cork.flags & IPCORK_OPT)
1134 opt = np->cork.opt;
1135 transhdrlen = 0;
1136 exthdrlen = 0;
1137 mtu = inet->cork.fragsize;
1138 }
1139
1140 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1141
b4ce9277
HX
1142 fragheaderlen = sizeof(struct ipv6hdr) + rt->nfheader_len +
1143 (opt ? opt->opt_nflen : 0);
1da177e4
LT
1144 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1145
1146 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1147 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1148 ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
1149 return -EMSGSIZE;
1150 }
1151 }
1152
1153 /*
1154 * Let's try using as much space as possible.
1155 * Use MTU if total length of the message fits into the MTU.
1156 * Otherwise, we need to reserve fragment header and
1157 * fragment alignment (= 8-15 octects, in total).
1158 *
1159 * Note that we may need to "move" the data from the tail of
1ab1457c 1160 * of the buffer to the new fragment when we split
1da177e4
LT
1161 * the message.
1162 *
1ab1457c 1163 * FIXME: It may be fragmented into multiple chunks
1da177e4
LT
1164 * at once if non-fragmentable extension headers
1165 * are too large.
1ab1457c 1166 * --yoshfuji
1da177e4
LT
1167 */
1168
1169 inet->cork.length += length;
e89e9cf5
AR
1170 if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
1171 (rt->u.dst.dev->features & NETIF_F_UFO)) {
1172
baa829d8
PM
1173 err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len,
1174 fragheaderlen, transhdrlen, mtu,
1175 flags);
1176 if (err)
e89e9cf5 1177 goto error;
e89e9cf5
AR
1178 return 0;
1179 }
1da177e4
LT
1180
1181 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1182 goto alloc_new_skb;
1183
1184 while (length > 0) {
1185 /* Check if the remaining data fits into current packet. */
1186 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1187 if (copy < length)
1188 copy = maxfraglen - skb->len;
1189
1190 if (copy <= 0) {
1191 char *data;
1192 unsigned int datalen;
1193 unsigned int fraglen;
1194 unsigned int fraggap;
1195 unsigned int alloclen;
1196 struct sk_buff *skb_prev;
1197alloc_new_skb:
1198 skb_prev = skb;
1199
1200 /* There's no room in the current skb */
1201 if (skb_prev)
1202 fraggap = skb_prev->len - maxfraglen;
1203 else
1204 fraggap = 0;
1205
1206 /*
1207 * If remaining data exceeds the mtu,
1208 * we know we need more fragment(s).
1209 */
1210 datalen = length + fraggap;
1211 if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1212 datalen = maxfraglen - fragheaderlen;
1213
1214 fraglen = datalen + fragheaderlen;
1215 if ((flags & MSG_MORE) &&
1216 !(rt->u.dst.dev->features&NETIF_F_SG))
1217 alloclen = mtu;
1218 else
1219 alloclen = datalen + fragheaderlen;
1220
1221 /*
1222 * The last fragment gets additional space at tail.
1223 * Note: we overallocate on fragments with MSG_MODE
1224 * because we have no idea if we're the last one.
1225 */
1226 if (datalen == length + fraggap)
1227 alloclen += rt->u.dst.trailer_len;
1228
1229 /*
1230 * We just reserve space for fragment header.
1ab1457c 1231 * Note: this may be overallocation if the message
1da177e4
LT
1232 * (without MSG_MORE) fits into the MTU.
1233 */
1234 alloclen += sizeof(struct frag_hdr);
1235
1236 if (transhdrlen) {
1237 skb = sock_alloc_send_skb(sk,
1238 alloclen + hh_len,
1239 (flags & MSG_DONTWAIT), &err);
1240 } else {
1241 skb = NULL;
1242 if (atomic_read(&sk->sk_wmem_alloc) <=
1243 2 * sk->sk_sndbuf)
1244 skb = sock_wmalloc(sk,
1245 alloclen + hh_len, 1,
1246 sk->sk_allocation);
1247 if (unlikely(skb == NULL))
1248 err = -ENOBUFS;
1249 }
1250 if (skb == NULL)
1251 goto error;
1252 /*
1253 * Fill in the control structures
1254 */
1255 skb->ip_summed = csummode;
1256 skb->csum = 0;
1257 /* reserve for fragmentation */
1258 skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1259
1260 /*
1261 * Find where to start putting bytes
1262 */
1263 data = skb_put(skb, fraglen);
c14d2450 1264 skb_set_network_header(skb, exthdrlen);
1da177e4 1265 data += fragheaderlen;
b0e380b1
ACM
1266 skb->transport_header = (skb->network_header +
1267 fragheaderlen);
1da177e4
LT
1268 if (fraggap) {
1269 skb->csum = skb_copy_and_csum_bits(
1270 skb_prev, maxfraglen,
1271 data + transhdrlen, fraggap, 0);
1272 skb_prev->csum = csum_sub(skb_prev->csum,
1273 skb->csum);
1274 data += fraggap;
e9fa4f7b 1275 pskb_trim_unique(skb_prev, maxfraglen);
1da177e4
LT
1276 }
1277 copy = datalen - transhdrlen - fraggap;
1278 if (copy < 0) {
1279 err = -EINVAL;
1280 kfree_skb(skb);
1281 goto error;
1282 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1283 err = -EFAULT;
1284 kfree_skb(skb);
1285 goto error;
1286 }
1287
1288 offset += copy;
1289 length -= datalen - fraggap;
1290 transhdrlen = 0;
1291 exthdrlen = 0;
1292 csummode = CHECKSUM_NONE;
1293
1294 /*
1295 * Put the packet on the pending queue
1296 */
1297 __skb_queue_tail(&sk->sk_write_queue, skb);
1298 continue;
1299 }
1300
1301 if (copy > length)
1302 copy = length;
1303
1304 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1305 unsigned int off;
1306
1307 off = skb->len;
1308 if (getfrag(from, skb_put(skb, copy),
1309 offset, copy, off, skb) < 0) {
1310 __skb_trim(skb, off);
1311 err = -EFAULT;
1312 goto error;
1313 }
1314 } else {
1315 int i = skb_shinfo(skb)->nr_frags;
1316 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1317 struct page *page = sk->sk_sndmsg_page;
1318 int off = sk->sk_sndmsg_off;
1319 unsigned int left;
1320
1321 if (page && (left = PAGE_SIZE - off) > 0) {
1322 if (copy >= left)
1323 copy = left;
1324 if (page != frag->page) {
1325 if (i == MAX_SKB_FRAGS) {
1326 err = -EMSGSIZE;
1327 goto error;
1328 }
1329 get_page(page);
1330 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1331 frag = &skb_shinfo(skb)->frags[i];
1332 }
1333 } else if(i < MAX_SKB_FRAGS) {
1334 if (copy > PAGE_SIZE)
1335 copy = PAGE_SIZE;
1336 page = alloc_pages(sk->sk_allocation, 0);
1337 if (page == NULL) {
1338 err = -ENOMEM;
1339 goto error;
1340 }
1341 sk->sk_sndmsg_page = page;
1342 sk->sk_sndmsg_off = 0;
1343
1344 skb_fill_page_desc(skb, i, page, 0, 0);
1345 frag = &skb_shinfo(skb)->frags[i];
1da177e4
LT
1346 } else {
1347 err = -EMSGSIZE;
1348 goto error;
1349 }
1350 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1351 err = -EFAULT;
1352 goto error;
1353 }
1354 sk->sk_sndmsg_off += copy;
1355 frag->size += copy;
1356 skb->len += copy;
1357 skb->data_len += copy;
f945fa7a
HX
1358 skb->truesize += copy;
1359 atomic_add(copy, &sk->sk_wmem_alloc);
1da177e4
LT
1360 }
1361 offset += copy;
1362 length -= copy;
1363 }
1364 return 0;
1365error:
1366 inet->cork.length -= length;
a11d206d 1367 IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1368 return err;
1369}
1370
bf138862
PE
1371static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1372{
1373 inet->cork.flags &= ~IPCORK_OPT;
1374 kfree(np->cork.opt);
1375 np->cork.opt = NULL;
1376 if (np->cork.rt) {
1377 dst_release(&np->cork.rt->u.dst);
1378 np->cork.rt = NULL;
1379 inet->cork.flags &= ~IPCORK_ALLFRAG;
1380 }
1381 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1382}
1383
1da177e4
LT
1384int ip6_push_pending_frames(struct sock *sk)
1385{
1386 struct sk_buff *skb, *tmp_skb;
1387 struct sk_buff **tail_skb;
1388 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1389 struct inet_sock *inet = inet_sk(sk);
1390 struct ipv6_pinfo *np = inet6_sk(sk);
1391 struct ipv6hdr *hdr;
1392 struct ipv6_txoptions *opt = np->cork.opt;
1393 struct rt6_info *rt = np->cork.rt;
1394 struct flowi *fl = &inet->cork.fl;
1395 unsigned char proto = fl->proto;
1396 int err = 0;
1397
1398 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1399 goto out;
1400 tail_skb = &(skb_shinfo(skb)->frag_list);
1401
1402 /* move skb->data to ip header from ext header */
d56f90a7 1403 if (skb->data < skb_network_header(skb))
bbe735e4 1404 __skb_pull(skb, skb_network_offset(skb));
1da177e4 1405 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
cfe1fc77 1406 __skb_pull(tmp_skb, skb_network_header_len(skb));
1da177e4
LT
1407 *tail_skb = tmp_skb;
1408 tail_skb = &(tmp_skb->next);
1409 skb->len += tmp_skb->len;
1410 skb->data_len += tmp_skb->len;
1da177e4
LT
1411 skb->truesize += tmp_skb->truesize;
1412 __sock_put(tmp_skb->sk);
1413 tmp_skb->destructor = NULL;
1414 tmp_skb->sk = NULL;
1da177e4
LT
1415 }
1416
1417 ipv6_addr_copy(final_dst, &fl->fl6_dst);
cfe1fc77 1418 __skb_pull(skb, skb_network_header_len(skb));
1da177e4
LT
1419 if (opt && opt->opt_flen)
1420 ipv6_push_frag_opts(skb, opt, &proto);
1421 if (opt && opt->opt_nflen)
1422 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1423
e2d1bca7
ACM
1424 skb_push(skb, sizeof(struct ipv6hdr));
1425 skb_reset_network_header(skb);
0660e03f 1426 hdr = ipv6_hdr(skb);
1ab1457c 1427
90bcaf7b 1428 *(__be32*)hdr = fl->fl6_flowlabel |
41a1f8ea 1429 htonl(0x60000000 | ((int)np->cork.tclass << 20));
1da177e4 1430
1da177e4
LT
1431 hdr->hop_limit = np->cork.hop_limit;
1432 hdr->nexthdr = proto;
1433 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1434 ipv6_addr_copy(&hdr->daddr, final_dst);
1435
a2c2064f
PM
1436 skb->priority = sk->sk_priority;
1437
1da177e4 1438 skb->dst = dst_clone(&rt->u.dst);
a11d206d 1439 IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
14878f75
DS
1440 if (proto == IPPROTO_ICMPV6) {
1441 struct inet6_dev *idev = ip6_dst_idev(skb->dst);
1442
1443 ICMP6MSGOUT_INC_STATS_BH(idev, icmp6_hdr(skb)->icmp6_type);
1444 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS);
1445 }
1446
ef76bc23 1447 err = ip6_local_out(skb);
1da177e4
LT
1448 if (err) {
1449 if (err > 0)
3320da89 1450 err = np->recverr ? net_xmit_errno(err) : 0;
1da177e4
LT
1451 if (err)
1452 goto error;
1453 }
1454
1455out:
bf138862 1456 ip6_cork_release(inet, np);
1da177e4
LT
1457 return err;
1458error:
1459 goto out;
1460}
1461
1462void ip6_flush_pending_frames(struct sock *sk)
1463{
1da177e4
LT
1464 struct sk_buff *skb;
1465
1466 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
e1f52208
YH
1467 if (skb->dst)
1468 IP6_INC_STATS(ip6_dst_idev(skb->dst),
1469 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1470 kfree_skb(skb);
1471 }
1472
bf138862 1473 ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1da177e4 1474}