]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blame - net/ipv6/ip6_output.c
ipv6: ip6_push_pending_frames() should increment IPSTATS_MIB_OUTDISCARDS
[mirror_ubuntu-zesty-kernel.git] / net / ipv6 / ip6_output.c
CommitLineData
1da177e4
LT
1/*
2 * IPv6 output functions
1ab1457c 3 * Linux INET6 implementation
1da177e4
LT
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * Based on linux/net/ipv4/ip_output.c
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 *
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
21 *
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
27 */
28
1da177e4 29#include <linux/errno.h>
ef76bc23 30#include <linux/kernel.h>
1da177e4
LT
31#include <linux/string.h>
32#include <linux/socket.h>
33#include <linux/net.h>
34#include <linux/netdevice.h>
35#include <linux/if_arp.h>
36#include <linux/in6.h>
37#include <linux/tcp.h>
38#include <linux/route.h>
b59f45d0 39#include <linux/module.h>
1da177e4
LT
40
41#include <linux/netfilter.h>
42#include <linux/netfilter_ipv6.h>
43
44#include <net/sock.h>
45#include <net/snmp.h>
46
47#include <net/ipv6.h>
48#include <net/ndisc.h>
49#include <net/protocol.h>
50#include <net/ip6_route.h>
51#include <net/addrconf.h>
52#include <net/rawv6.h>
53#include <net/icmp.h>
54#include <net/xfrm.h>
55#include <net/checksum.h>
7bc570c8 56#include <linux/mroute6.h>
1da177e4
LT
57
58static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
59
ef76bc23
HX
60int __ip6_local_out(struct sk_buff *skb)
61{
62 int len;
63
64 len = skb->len - sizeof(struct ipv6hdr);
65 if (len > IPV6_MAXPLEN)
66 len = 0;
67 ipv6_hdr(skb)->payload_len = htons(len);
68
adf30907 69 return nf_hook(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev,
ef76bc23
HX
70 dst_output);
71}
72
73int ip6_local_out(struct sk_buff *skb)
74{
75 int err;
76
77 err = __ip6_local_out(skb);
78 if (likely(err == 1))
79 err = dst_output(skb);
80
81 return err;
82}
83EXPORT_SYMBOL_GPL(ip6_local_out);
84
ad643a79 85static int ip6_output_finish(struct sk_buff *skb)
1da177e4 86{
adf30907 87 struct dst_entry *dst = skb_dst(skb);
1da177e4 88
3644f0ce
SH
89 if (dst->hh)
90 return neigh_hh_output(dst->hh, skb);
91 else if (dst->neighbour)
1da177e4
LT
92 return dst->neighbour->output(skb);
93
483a47d2
DL
94 IP6_INC_STATS_BH(dev_net(dst->dev),
95 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
96 kfree_skb(skb);
97 return -EINVAL;
98
99}
100
101/* dev_loopback_xmit for use with netfilter. */
102static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
103{
459a98ed 104 skb_reset_mac_header(newskb);
bbe735e4 105 __skb_pull(newskb, skb_network_offset(newskb));
1da177e4
LT
106 newskb->pkt_type = PACKET_LOOPBACK;
107 newskb->ip_summed = CHECKSUM_UNNECESSARY;
adf30907 108 WARN_ON(!skb_dst(newskb));
1da177e4
LT
109
110 netif_rx(newskb);
111 return 0;
112}
113
114
115static int ip6_output2(struct sk_buff *skb)
116{
adf30907 117 struct dst_entry *dst = skb_dst(skb);
1da177e4
LT
118 struct net_device *dev = dst->dev;
119
120 skb->protocol = htons(ETH_P_IPV6);
121 skb->dev = dev;
122
0660e03f 123 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
1da177e4 124 struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
adf30907 125 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1da177e4
LT
126
127 if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
bd91b8bf
BT
128 ((mroute6_socket(dev_net(dev)) &&
129 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
7bc570c8
YH
130 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
131 &ipv6_hdr(skb)->saddr))) {
1da177e4
LT
132 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
133
134 /* Do not check for IFF_ALLMULTI; multicast routing
135 is not supported in any case.
136 */
137 if (newskb)
6e23ae2a
PM
138 NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, newskb,
139 NULL, newskb->dev,
1da177e4
LT
140 ip6_dev_loopback_xmit);
141
0660e03f 142 if (ipv6_hdr(skb)->hop_limit == 0) {
3bd653c8
DL
143 IP6_INC_STATS(dev_net(dev), idev,
144 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
145 kfree_skb(skb);
146 return 0;
147 }
148 }
149
edf391ff
NH
150 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
151 skb->len);
1da177e4
LT
152 }
153
6e23ae2a
PM
154 return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
155 ip6_output_finish);
1da177e4
LT
156}
157
628a5c56
JH
158static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
159{
160 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
161
162 return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
adf30907 163 skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb));
628a5c56
JH
164}
165
1da177e4
LT
166int ip6_output(struct sk_buff *skb)
167{
adf30907 168 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
778d80be 169 if (unlikely(idev->cnf.disable_ipv6)) {
adf30907 170 IP6_INC_STATS(dev_net(skb_dst(skb)->dev), idev,
3bd653c8 171 IPSTATS_MIB_OUTDISCARDS);
778d80be
YH
172 kfree_skb(skb);
173 return 0;
174 }
175
628a5c56 176 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
adf30907 177 dst_allfrag(skb_dst(skb)))
1da177e4
LT
178 return ip6_fragment(skb, ip6_output2);
179 else
180 return ip6_output2(skb);
181}
182
1da177e4
LT
183/*
184 * xmit an sk_buff (used by TCP)
185 */
186
187int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
188 struct ipv6_txoptions *opt, int ipfragok)
189{
3bd653c8 190 struct net *net = sock_net(sk);
b30bd282 191 struct ipv6_pinfo *np = inet6_sk(sk);
1da177e4 192 struct in6_addr *first_hop = &fl->fl6_dst;
adf30907 193 struct dst_entry *dst = skb_dst(skb);
1da177e4
LT
194 struct ipv6hdr *hdr;
195 u8 proto = fl->proto;
196 int seg_len = skb->len;
e651f03a
GR
197 int hlimit = -1;
198 int tclass = 0;
1da177e4
LT
199 u32 mtu;
200
201 if (opt) {
c2636b4d 202 unsigned int head_room;
1da177e4
LT
203
204 /* First: exthdrs may take lots of space (~8K for now)
205 MAX_HEADER is not enough.
206 */
207 head_room = opt->opt_nflen + opt->opt_flen;
208 seg_len += head_room;
209 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
210
211 if (skb_headroom(skb) < head_room) {
212 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
a11d206d 213 if (skb2 == NULL) {
adf30907 214 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d
YH
215 IPSTATS_MIB_OUTDISCARDS);
216 kfree_skb(skb);
1da177e4
LT
217 return -ENOBUFS;
218 }
a11d206d
YH
219 kfree_skb(skb);
220 skb = skb2;
1da177e4
LT
221 if (sk)
222 skb_set_owner_w(skb, sk);
223 }
224 if (opt->opt_flen)
225 ipv6_push_frag_opts(skb, opt, &proto);
226 if (opt->opt_nflen)
227 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
228 }
229
e2d1bca7
ACM
230 skb_push(skb, sizeof(struct ipv6hdr));
231 skb_reset_network_header(skb);
0660e03f 232 hdr = ipv6_hdr(skb);
1da177e4 233
77e2f14f
WY
234 /* Allow local fragmentation. */
235 if (ipfragok)
236 skb->local_df = 1;
237
1da177e4
LT
238 /*
239 * Fill in the IPv6 header
240 */
e651f03a
GR
241 if (np) {
242 tclass = np->tclass;
1da177e4 243 hlimit = np->hop_limit;
e651f03a 244 }
1da177e4 245 if (hlimit < 0)
6b75d090 246 hlimit = ip6_dst_hoplimit(dst);
1da177e4 247
90bcaf7b 248 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
41a1f8ea 249
1da177e4
LT
250 hdr->payload_len = htons(seg_len);
251 hdr->nexthdr = proto;
252 hdr->hop_limit = hlimit;
253
254 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
255 ipv6_addr_copy(&hdr->daddr, first_hop);
256
a2c2064f 257 skb->priority = sk->sk_priority;
4a19ec58 258 skb->mark = sk->sk_mark;
a2c2064f 259
1da177e4 260 mtu = dst_mtu(dst);
283d07ac 261 if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
adf30907 262 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
edf391ff 263 IPSTATS_MIB_OUT, skb->len);
6e23ae2a 264 return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
6869c4d8 265 dst_output);
1da177e4
LT
266 }
267
268 if (net_ratelimit())
269 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
270 skb->dev = dst->dev;
271 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
adf30907 272 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
273 kfree_skb(skb);
274 return -EMSGSIZE;
275}
276
7159039a
YH
277EXPORT_SYMBOL(ip6_xmit);
278
1da177e4
LT
279/*
280 * To avoid extra problems ND packets are send through this
281 * routine. It's code duplication but I really want to avoid
282 * extra checks since ipv6_build_header is used by TCP (which
283 * is for us performance critical)
284 */
285
286int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
9acd9f3a 287 const struct in6_addr *saddr, const struct in6_addr *daddr,
1da177e4
LT
288 int proto, int len)
289{
290 struct ipv6_pinfo *np = inet6_sk(sk);
291 struct ipv6hdr *hdr;
292 int totlen;
293
294 skb->protocol = htons(ETH_P_IPV6);
295 skb->dev = dev;
296
297 totlen = len + sizeof(struct ipv6hdr);
298
55f79cc0
ACM
299 skb_reset_network_header(skb);
300 skb_put(skb, sizeof(struct ipv6hdr));
0660e03f 301 hdr = ipv6_hdr(skb);
1da177e4 302
ae08e1f0 303 *(__be32*)hdr = htonl(0x60000000);
1da177e4
LT
304
305 hdr->payload_len = htons(len);
306 hdr->nexthdr = proto;
307 hdr->hop_limit = np->hop_limit;
308
309 ipv6_addr_copy(&hdr->saddr, saddr);
310 ipv6_addr_copy(&hdr->daddr, daddr);
311
312 return 0;
313}
314
315static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
316{
317 struct ip6_ra_chain *ra;
318 struct sock *last = NULL;
319
320 read_lock(&ip6_ra_lock);
321 for (ra = ip6_ra_chain; ra; ra = ra->next) {
322 struct sock *sk = ra->sk;
0bd1b59b
AM
323 if (sk && ra->sel == sel &&
324 (!sk->sk_bound_dev_if ||
325 sk->sk_bound_dev_if == skb->dev->ifindex)) {
1da177e4
LT
326 if (last) {
327 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
328 if (skb2)
329 rawv6_rcv(last, skb2);
330 }
331 last = sk;
332 }
333 }
334
335 if (last) {
336 rawv6_rcv(last, skb);
337 read_unlock(&ip6_ra_lock);
338 return 1;
339 }
340 read_unlock(&ip6_ra_lock);
341 return 0;
342}
343
e21e0b5f
VN
344static int ip6_forward_proxy_check(struct sk_buff *skb)
345{
0660e03f 346 struct ipv6hdr *hdr = ipv6_hdr(skb);
e21e0b5f
VN
347 u8 nexthdr = hdr->nexthdr;
348 int offset;
349
350 if (ipv6_ext_hdr(nexthdr)) {
351 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
352 if (offset < 0)
353 return 0;
354 } else
355 offset = sizeof(struct ipv6hdr);
356
357 if (nexthdr == IPPROTO_ICMPV6) {
358 struct icmp6hdr *icmp6;
359
d56f90a7
ACM
360 if (!pskb_may_pull(skb, (skb_network_header(skb) +
361 offset + 1 - skb->data)))
e21e0b5f
VN
362 return 0;
363
d56f90a7 364 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
e21e0b5f
VN
365
366 switch (icmp6->icmp6_type) {
367 case NDISC_ROUTER_SOLICITATION:
368 case NDISC_ROUTER_ADVERTISEMENT:
369 case NDISC_NEIGHBOUR_SOLICITATION:
370 case NDISC_NEIGHBOUR_ADVERTISEMENT:
371 case NDISC_REDIRECT:
372 /* For reaction involving unicast neighbor discovery
373 * message destined to the proxied address, pass it to
374 * input function.
375 */
376 return 1;
377 default:
378 break;
379 }
380 }
381
74553b09
VN
382 /*
383 * The proxying router can't forward traffic sent to a link-local
384 * address, so signal the sender and discard the packet. This
385 * behavior is clarified by the MIPv6 specification.
386 */
387 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
388 dst_link_failure(skb);
389 return -1;
390 }
391
e21e0b5f
VN
392 return 0;
393}
394
1da177e4
LT
395static inline int ip6_forward_finish(struct sk_buff *skb)
396{
397 return dst_output(skb);
398}
399
400int ip6_forward(struct sk_buff *skb)
401{
adf30907 402 struct dst_entry *dst = skb_dst(skb);
0660e03f 403 struct ipv6hdr *hdr = ipv6_hdr(skb);
1da177e4 404 struct inet6_skb_parm *opt = IP6CB(skb);
c346dca1 405 struct net *net = dev_net(dst->dev);
1ab1457c 406
53b7997f 407 if (net->ipv6.devconf_all->forwarding == 0)
1da177e4
LT
408 goto error;
409
4497b076
BH
410 if (skb_warn_if_lro(skb))
411 goto drop;
412
1da177e4 413 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
3bd653c8 414 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
1da177e4
LT
415 goto drop;
416 }
417
35fc92a9 418 skb_forward_csum(skb);
1da177e4
LT
419
420 /*
421 * We DO NOT make any processing on
422 * RA packets, pushing them to user level AS IS
423 * without ane WARRANTY that application will be able
424 * to interpret them. The reason is that we
425 * cannot make anything clever here.
426 *
427 * We are not end-node, so that if packet contains
428 * AH/ESP, we cannot make anything.
429 * Defragmentation also would be mistake, RA packets
430 * cannot be fragmented, because there is no warranty
431 * that different fragments will go along one path. --ANK
432 */
433 if (opt->ra) {
d56f90a7 434 u8 *ptr = skb_network_header(skb) + opt->ra;
1da177e4
LT
435 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
436 return 0;
437 }
438
439 /*
440 * check and decrement ttl
441 */
442 if (hdr->hop_limit <= 1) {
443 /* Force OUTPUT device used as source address */
444 skb->dev = dst->dev;
445 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
446 0, skb->dev);
483a47d2
DL
447 IP6_INC_STATS_BH(net,
448 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
1da177e4
LT
449
450 kfree_skb(skb);
451 return -ETIMEDOUT;
452 }
453
fbea49e1 454 /* XXX: idev->cnf.proxy_ndp? */
53b7997f 455 if (net->ipv6.devconf_all->proxy_ndp &&
8a3edd80 456 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
74553b09
VN
457 int proxied = ip6_forward_proxy_check(skb);
458 if (proxied > 0)
e21e0b5f 459 return ip6_input(skb);
74553b09 460 else if (proxied < 0) {
3bd653c8
DL
461 IP6_INC_STATS(net, ip6_dst_idev(dst),
462 IPSTATS_MIB_INDISCARDS);
74553b09
VN
463 goto drop;
464 }
e21e0b5f
VN
465 }
466
1da177e4 467 if (!xfrm6_route_forward(skb)) {
3bd653c8 468 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
1da177e4
LT
469 goto drop;
470 }
adf30907 471 dst = skb_dst(skb);
1da177e4
LT
472
473 /* IPv6 specs say nothing about it, but it is clear that we cannot
474 send redirects to source routed frames.
1e5dc146 475 We don't send redirects to frames decapsulated from IPsec.
1da177e4 476 */
1e5dc146 477 if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
def8b4fa 478 !skb_sec_path(skb)) {
1da177e4
LT
479 struct in6_addr *target = NULL;
480 struct rt6_info *rt;
481 struct neighbour *n = dst->neighbour;
482
483 /*
484 * incoming and outgoing devices are the same
485 * send a redirect.
486 */
487
488 rt = (struct rt6_info *) dst;
489 if ((rt->rt6i_flags & RTF_GATEWAY))
490 target = (struct in6_addr*)&n->primary_key;
491 else
492 target = &hdr->daddr;
493
494 /* Limit redirects both by destination (here)
495 and by source (inside ndisc_send_redirect)
496 */
497 if (xrlim_allow(dst, 1*HZ))
498 ndisc_send_redirect(skb, n, target);
5bb1ab09
DS
499 } else {
500 int addrtype = ipv6_addr_type(&hdr->saddr);
501
1da177e4 502 /* This check is security critical. */
f81b2e7d
YH
503 if (addrtype == IPV6_ADDR_ANY ||
504 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
5bb1ab09
DS
505 goto error;
506 if (addrtype & IPV6_ADDR_LINKLOCAL) {
507 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
508 ICMPV6_NOT_NEIGHBOUR, 0, skb->dev);
509 goto error;
510 }
1da177e4
LT
511 }
512
513 if (skb->len > dst_mtu(dst)) {
514 /* Again, force OUTPUT device used as source address */
515 skb->dev = dst->dev;
516 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
483a47d2
DL
517 IP6_INC_STATS_BH(net,
518 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
519 IP6_INC_STATS_BH(net,
520 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
521 kfree_skb(skb);
522 return -EMSGSIZE;
523 }
524
525 if (skb_cow(skb, dst->dev->hard_header_len)) {
3bd653c8 526 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
527 goto drop;
528 }
529
0660e03f 530 hdr = ipv6_hdr(skb);
1da177e4
LT
531
532 /* Mangling hops number delayed to point after skb COW */
1ab1457c 533
1da177e4
LT
534 hdr->hop_limit--;
535
483a47d2 536 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
6e23ae2a
PM
537 return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
538 ip6_forward_finish);
1da177e4
LT
539
540error:
483a47d2 541 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
1da177e4
LT
542drop:
543 kfree_skb(skb);
544 return -EINVAL;
545}
546
547static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
548{
549 to->pkt_type = from->pkt_type;
550 to->priority = from->priority;
551 to->protocol = from->protocol;
adf30907
ED
552 skb_dst_drop(to);
553 skb_dst_set(to, dst_clone(skb_dst(from)));
1da177e4 554 to->dev = from->dev;
82e91ffe 555 to->mark = from->mark;
1da177e4
LT
556
557#ifdef CONFIG_NET_SCHED
558 to->tc_index = from->tc_index;
559#endif
e7ac05f3 560 nf_copy(to, from);
ba9dda3a
JK
561#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
562 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
563 to->nf_trace = from->nf_trace;
564#endif
984bc16c 565 skb_copy_secmark(to, from);
1da177e4
LT
566}
567
568int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
569{
570 u16 offset = sizeof(struct ipv6hdr);
0660e03f
ACM
571 struct ipv6_opt_hdr *exthdr =
572 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
27a884dc 573 unsigned int packet_len = skb->tail - skb->network_header;
1da177e4 574 int found_rhdr = 0;
0660e03f 575 *nexthdr = &ipv6_hdr(skb)->nexthdr;
1da177e4
LT
576
577 while (offset + 1 <= packet_len) {
578
579 switch (**nexthdr) {
580
581 case NEXTHDR_HOP:
27637df9 582 break;
1da177e4 583 case NEXTHDR_ROUTING:
27637df9
MN
584 found_rhdr = 1;
585 break;
1da177e4 586 case NEXTHDR_DEST:
59fbb3a6 587#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
27637df9
MN
588 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
589 break;
590#endif
591 if (found_rhdr)
592 return offset;
1da177e4
LT
593 break;
594 default :
595 return offset;
596 }
27637df9
MN
597
598 offset += ipv6_optlen(exthdr);
599 *nexthdr = &exthdr->nexthdr;
d56f90a7
ACM
600 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
601 offset);
1da177e4
LT
602 }
603
604 return offset;
605}
606
607static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
608{
1da177e4 609 struct sk_buff *frag;
adf30907 610 struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
d91675f9 611 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
1da177e4
LT
612 struct ipv6hdr *tmp_hdr;
613 struct frag_hdr *fh;
614 unsigned int mtu, hlen, left, len;
ae08e1f0 615 __be32 frag_id = 0;
1da177e4
LT
616 int ptr, offset = 0, err=0;
617 u8 *prevhdr, nexthdr = 0;
adf30907 618 struct net *net = dev_net(skb_dst(skb)->dev);
1da177e4 619
1da177e4
LT
620 hlen = ip6_find_1stfragopt(skb, &prevhdr);
621 nexthdr = *prevhdr;
622
628a5c56 623 mtu = ip6_skb_dst_mtu(skb);
b881ef76
JH
624
625 /* We must not fragment if the socket is set to force MTU discovery
626 * or if the skb it not generated by a local socket. (This last
627 * check should be redundant, but it's free.)
628 */
b5c15fc0 629 if (!skb->local_df) {
adf30907 630 skb->dev = skb_dst(skb)->dev;
b881ef76 631 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
adf30907 632 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 633 IPSTATS_MIB_FRAGFAILS);
b881ef76
JH
634 kfree_skb(skb);
635 return -EMSGSIZE;
636 }
637
d91675f9
YH
638 if (np && np->frag_size < mtu) {
639 if (np->frag_size)
640 mtu = np->frag_size;
641 }
642 mtu -= hlen + sizeof(struct frag_hdr);
1da177e4 643
4d9092bb 644 if (skb_has_frags(skb)) {
1da177e4 645 int first_len = skb_pagelen(skb);
29ffe1a5 646 int truesizes = 0;
1da177e4
LT
647
648 if (first_len - hlen > mtu ||
649 ((first_len - hlen) & 7) ||
650 skb_cloned(skb))
651 goto slow_path;
652
4d9092bb 653 skb_walk_frags(skb, frag) {
1da177e4
LT
654 /* Correct geometry. */
655 if (frag->len > mtu ||
656 ((frag->len & 7) && frag->next) ||
657 skb_headroom(frag) < hlen)
658 goto slow_path;
659
1da177e4
LT
660 /* Partially cloned skb? */
661 if (skb_shared(frag))
662 goto slow_path;
2fdba6b0
HX
663
664 BUG_ON(frag->sk);
665 if (skb->sk) {
2fdba6b0
HX
666 frag->sk = skb->sk;
667 frag->destructor = sock_wfree;
29ffe1a5 668 truesizes += frag->truesize;
2fdba6b0 669 }
1da177e4
LT
670 }
671
672 err = 0;
673 offset = 0;
674 frag = skb_shinfo(skb)->frag_list;
4d9092bb 675 skb_frag_list_init(skb);
1da177e4
LT
676 /* BUILD HEADER */
677
9a217a1c 678 *prevhdr = NEXTHDR_FRAGMENT;
d56f90a7 679 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
1da177e4 680 if (!tmp_hdr) {
adf30907 681 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 682 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
683 return -ENOMEM;
684 }
685
1da177e4
LT
686 __skb_pull(skb, hlen);
687 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
e2d1bca7
ACM
688 __skb_push(skb, hlen);
689 skb_reset_network_header(skb);
d56f90a7 690 memcpy(skb_network_header(skb), tmp_hdr, hlen);
1da177e4 691
7ea2f2c5 692 ipv6_select_ident(fh);
1da177e4
LT
693 fh->nexthdr = nexthdr;
694 fh->reserved = 0;
695 fh->frag_off = htons(IP6_MF);
696 frag_id = fh->identification;
697
698 first_len = skb_pagelen(skb);
699 skb->data_len = first_len - skb_headlen(skb);
29ffe1a5 700 skb->truesize -= truesizes;
1da177e4 701 skb->len = first_len;
0660e03f
ACM
702 ipv6_hdr(skb)->payload_len = htons(first_len -
703 sizeof(struct ipv6hdr));
a11d206d
YH
704
705 dst_hold(&rt->u.dst);
1da177e4
LT
706
707 for (;;) {
708 /* Prepare header of the next frame,
709 * before previous one went down. */
710 if (frag) {
711 frag->ip_summed = CHECKSUM_NONE;
badff6d0 712 skb_reset_transport_header(frag);
1da177e4 713 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
e2d1bca7
ACM
714 __skb_push(frag, hlen);
715 skb_reset_network_header(frag);
d56f90a7
ACM
716 memcpy(skb_network_header(frag), tmp_hdr,
717 hlen);
1da177e4
LT
718 offset += skb->len - hlen - sizeof(struct frag_hdr);
719 fh->nexthdr = nexthdr;
720 fh->reserved = 0;
721 fh->frag_off = htons(offset);
722 if (frag->next != NULL)
723 fh->frag_off |= htons(IP6_MF);
724 fh->identification = frag_id;
0660e03f
ACM
725 ipv6_hdr(frag)->payload_len =
726 htons(frag->len -
727 sizeof(struct ipv6hdr));
1da177e4
LT
728 ip6_copy_metadata(frag, skb);
729 }
1ab1457c 730
1da177e4 731 err = output(skb);
dafee490 732 if(!err)
3bd653c8
DL
733 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
734 IPSTATS_MIB_FRAGCREATES);
dafee490 735
1da177e4
LT
736 if (err || !frag)
737 break;
738
739 skb = frag;
740 frag = skb->next;
741 skb->next = NULL;
742 }
743
a51482bd 744 kfree(tmp_hdr);
1da177e4
LT
745
746 if (err == 0) {
3bd653c8
DL
747 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
748 IPSTATS_MIB_FRAGOKS);
a11d206d 749 dst_release(&rt->u.dst);
1da177e4
LT
750 return 0;
751 }
752
753 while (frag) {
754 skb = frag->next;
755 kfree_skb(frag);
756 frag = skb;
757 }
758
3bd653c8
DL
759 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
760 IPSTATS_MIB_FRAGFAILS);
a11d206d 761 dst_release(&rt->u.dst);
1da177e4
LT
762 return err;
763 }
764
765slow_path:
766 left = skb->len - hlen; /* Space per frame */
767 ptr = hlen; /* Where to start from */
768
769 /*
770 * Fragment the datagram.
771 */
772
773 *prevhdr = NEXTHDR_FRAGMENT;
774
775 /*
776 * Keep copying data until we run out.
777 */
778 while(left > 0) {
779 len = left;
780 /* IF: it doesn't fit, use 'mtu' - the data space left */
781 if (len > mtu)
782 len = mtu;
783 /* IF: we are not sending upto and including the packet end
784 then align the next start on an eight byte boundary */
785 if (len < left) {
786 len &= ~7;
787 }
788 /*
789 * Allocate buffer.
790 */
791
f5184d26 792 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
64ce2073 793 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
adf30907 794 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 795 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
796 err = -ENOMEM;
797 goto fail;
798 }
799
800 /*
801 * Set up data on packet
802 */
803
804 ip6_copy_metadata(frag, skb);
805 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
806 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
c1d2bbe1 807 skb_reset_network_header(frag);
badff6d0 808 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
b0e380b1
ACM
809 frag->transport_header = (frag->network_header + hlen +
810 sizeof(struct frag_hdr));
1da177e4
LT
811
812 /*
813 * Charge the memory for the fragment to any owner
814 * it might possess
815 */
816 if (skb->sk)
817 skb_set_owner_w(frag, skb->sk);
818
819 /*
820 * Copy the packet header into the new buffer.
821 */
d626f62b 822 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
1da177e4
LT
823
824 /*
825 * Build fragment header.
826 */
827 fh->nexthdr = nexthdr;
828 fh->reserved = 0;
f36d6ab1 829 if (!frag_id) {
7ea2f2c5 830 ipv6_select_ident(fh);
1da177e4
LT
831 frag_id = fh->identification;
832 } else
833 fh->identification = frag_id;
834
835 /*
836 * Copy a block of the IP datagram.
837 */
8984e41d 838 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
1da177e4
LT
839 BUG();
840 left -= len;
841
842 fh->frag_off = htons(offset);
843 if (left > 0)
844 fh->frag_off |= htons(IP6_MF);
0660e03f
ACM
845 ipv6_hdr(frag)->payload_len = htons(frag->len -
846 sizeof(struct ipv6hdr));
1da177e4
LT
847
848 ptr += len;
849 offset += len;
850
851 /*
852 * Put this fragment into the sending queue.
853 */
1da177e4
LT
854 err = output(frag);
855 if (err)
856 goto fail;
dafee490 857
adf30907 858 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 859 IPSTATS_MIB_FRAGCREATES);
1da177e4 860 }
adf30907 861 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 862 IPSTATS_MIB_FRAGOKS);
1da177e4 863 kfree_skb(skb);
1da177e4
LT
864 return err;
865
866fail:
adf30907 867 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 868 IPSTATS_MIB_FRAGFAILS);
1ab1457c 869 kfree_skb(skb);
1da177e4
LT
870 return err;
871}
872
cf6b1982
YH
873static inline int ip6_rt_check(struct rt6key *rt_key,
874 struct in6_addr *fl_addr,
875 struct in6_addr *addr_cache)
876{
877 return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
878 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)));
879}
880
497c615a
HX
881static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
882 struct dst_entry *dst,
883 struct flowi *fl)
1da177e4 884{
497c615a
HX
885 struct ipv6_pinfo *np = inet6_sk(sk);
886 struct rt6_info *rt = (struct rt6_info *)dst;
1da177e4 887
497c615a
HX
888 if (!dst)
889 goto out;
890
891 /* Yes, checking route validity in not connected
892 * case is not very simple. Take into account,
893 * that we do not support routing by source, TOS,
894 * and MSG_DONTROUTE --ANK (980726)
895 *
cf6b1982
YH
896 * 1. ip6_rt_check(): If route was host route,
897 * check that cached destination is current.
497c615a
HX
898 * If it is network route, we still may
899 * check its validity using saved pointer
900 * to the last used address: daddr_cache.
901 * We do not want to save whole address now,
902 * (because main consumer of this service
903 * is tcp, which has not this problem),
904 * so that the last trick works only on connected
905 * sockets.
906 * 2. oif also should be the same.
907 */
cf6b1982 908 if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
8e1ef0a9
YH
909#ifdef CONFIG_IPV6_SUBTREES
910 ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
911#endif
cf6b1982 912 (fl->oif && fl->oif != dst->dev->ifindex)) {
497c615a
HX
913 dst_release(dst);
914 dst = NULL;
1da177e4
LT
915 }
916
497c615a
HX
917out:
918 return dst;
919}
920
921static int ip6_dst_lookup_tail(struct sock *sk,
922 struct dst_entry **dst, struct flowi *fl)
923{
924 int err;
3b1e0a65 925 struct net *net = sock_net(sk);
497c615a 926
1da177e4 927 if (*dst == NULL)
8a3edd80 928 *dst = ip6_route_output(net, sk, fl);
1da177e4
LT
929
930 if ((err = (*dst)->error))
931 goto out_err_release;
932
933 if (ipv6_addr_any(&fl->fl6_src)) {
191cd582 934 err = ipv6_dev_get_saddr(net, ip6_dst_idev(*dst)->dev,
7cbca67c
YH
935 &fl->fl6_dst,
936 sk ? inet6_sk(sk)->srcprefs : 0,
937 &fl->fl6_src);
44456d37 938 if (err)
1da177e4 939 goto out_err_release;
1da177e4
LT
940 }
941
95c385b4 942#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
e550dfb0
NH
943 /*
944 * Here if the dst entry we've looked up
945 * has a neighbour entry that is in the INCOMPLETE
946 * state and the src address from the flow is
947 * marked as OPTIMISTIC, we release the found
948 * dst entry and replace it instead with the
949 * dst entry of the nexthop router
950 */
951 if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) {
952 struct inet6_ifaddr *ifp;
953 struct flowi fl_gw;
954 int redirect;
955
956 ifp = ipv6_get_ifaddr(net, &fl->fl6_src,
957 (*dst)->dev, 1);
958
959 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
960 if (ifp)
961 in6_ifa_put(ifp);
962
963 if (redirect) {
964 /*
965 * We need to get the dst entry for the
966 * default router instead
967 */
968 dst_release(*dst);
969 memcpy(&fl_gw, fl, sizeof(struct flowi));
970 memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
971 *dst = ip6_route_output(net, sk, &fl_gw);
972 if ((err = (*dst)->error))
973 goto out_err_release;
95c385b4 974 }
e550dfb0 975 }
95c385b4
NH
976#endif
977
1da177e4
LT
978 return 0;
979
980out_err_release:
ca46f9c8 981 if (err == -ENETUNREACH)
483a47d2 982 IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
983 dst_release(*dst);
984 *dst = NULL;
985 return err;
986}
34a0b3cd 987
497c615a
HX
988/**
989 * ip6_dst_lookup - perform route lookup on flow
990 * @sk: socket which provides route info
991 * @dst: pointer to dst_entry * for result
992 * @fl: flow to lookup
993 *
994 * This function performs a route lookup on the given flow.
995 *
996 * It returns zero on success, or a standard errno code on error.
997 */
998int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
999{
1000 *dst = NULL;
1001 return ip6_dst_lookup_tail(sk, dst, fl);
1002}
3cf3dc6c
ACM
1003EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1004
497c615a
HX
1005/**
1006 * ip6_sk_dst_lookup - perform socket cached route lookup on flow
1007 * @sk: socket which provides the dst cache and route info
1008 * @dst: pointer to dst_entry * for result
1009 * @fl: flow to lookup
1010 *
1011 * This function performs a route lookup on the given flow with the
1012 * possibility of using the cached route in the socket if it is valid.
1013 * It will take the socket dst lock when operating on the dst cache.
1014 * As a result, this function can only be used in process context.
1015 *
1016 * It returns zero on success, or a standard errno code on error.
1017 */
1018int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1019{
1020 *dst = NULL;
1021 if (sk) {
1022 *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1023 *dst = ip6_sk_dst_check(sk, *dst, fl);
1024 }
1025
1026 return ip6_dst_lookup_tail(sk, dst, fl);
1027}
1028EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);
1029
34a0b3cd 1030static inline int ip6_ufo_append_data(struct sock *sk,
e89e9cf5
AR
1031 int getfrag(void *from, char *to, int offset, int len,
1032 int odd, struct sk_buff *skb),
1033 void *from, int length, int hh_len, int fragheaderlen,
1034 int transhdrlen, int mtu,unsigned int flags)
1035
1036{
1037 struct sk_buff *skb;
1038 int err;
1039
1040 /* There is support for UDP large send offload by network
1041 * device, so create one single skb packet containing complete
1042 * udp datagram
1043 */
1044 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1045 skb = sock_alloc_send_skb(sk,
1046 hh_len + fragheaderlen + transhdrlen + 20,
1047 (flags & MSG_DONTWAIT), &err);
1048 if (skb == NULL)
1049 return -ENOMEM;
1050
1051 /* reserve space for Hardware header */
1052 skb_reserve(skb, hh_len);
1053
1054 /* create space for UDP/IP header */
1055 skb_put(skb,fragheaderlen + transhdrlen);
1056
1057 /* initialize network header pointer */
c1d2bbe1 1058 skb_reset_network_header(skb);
e89e9cf5
AR
1059
1060 /* initialize protocol header pointer */
b0e380b1 1061 skb->transport_header = skb->network_header + fragheaderlen;
e89e9cf5 1062
84fa7933 1063 skb->ip_summed = CHECKSUM_PARTIAL;
e89e9cf5
AR
1064 skb->csum = 0;
1065 sk->sk_sndmsg_off = 0;
1066 }
1067
1068 err = skb_append_datato_frags(sk,skb, getfrag, from,
1069 (length - transhdrlen));
1070 if (!err) {
1071 struct frag_hdr fhdr;
1072
c31d5326
SS
1073 /* Specify the length of each IPv6 datagram fragment.
1074 * It has to be a multiple of 8.
1075 */
1076 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1077 sizeof(struct frag_hdr)) & ~7;
f83ef8c0 1078 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
7ea2f2c5 1079 ipv6_select_ident(&fhdr);
e89e9cf5
AR
1080 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1081 __skb_queue_tail(&sk->sk_write_queue, skb);
1082
1083 return 0;
1084 }
1085 /* There is not enough support do UPD LSO,
1086 * so follow normal path
1087 */
1088 kfree_skb(skb);
1089
1090 return err;
1091}
1da177e4 1092
0178b695
HX
1093static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1094 gfp_t gfp)
1095{
1096 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1097}
1098
1099static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1100 gfp_t gfp)
1101{
1102 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1103}
1104
41a1f8ea
YH
1105int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1106 int offset, int len, int odd, struct sk_buff *skb),
1107 void *from, int length, int transhdrlen,
1108 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
1109 struct rt6_info *rt, unsigned int flags)
1da177e4
LT
1110{
1111 struct inet_sock *inet = inet_sk(sk);
1112 struct ipv6_pinfo *np = inet6_sk(sk);
1113 struct sk_buff *skb;
1114 unsigned int maxfraglen, fragheaderlen;
1115 int exthdrlen;
1116 int hh_len;
1117 int mtu;
1118 int copy;
1119 int err;
1120 int offset = 0;
1121 int csummode = CHECKSUM_NONE;
1122
1123 if (flags&MSG_PROBE)
1124 return 0;
1125 if (skb_queue_empty(&sk->sk_write_queue)) {
1126 /*
1127 * setup for corking
1128 */
1129 if (opt) {
0178b695 1130 if (WARN_ON(np->cork.opt))
1da177e4 1131 return -EINVAL;
0178b695
HX
1132
1133 np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation);
1134 if (unlikely(np->cork.opt == NULL))
1135 return -ENOBUFS;
1136
1137 np->cork.opt->tot_len = opt->tot_len;
1138 np->cork.opt->opt_flen = opt->opt_flen;
1139 np->cork.opt->opt_nflen = opt->opt_nflen;
1140
1141 np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1142 sk->sk_allocation);
1143 if (opt->dst0opt && !np->cork.opt->dst0opt)
1144 return -ENOBUFS;
1145
1146 np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1147 sk->sk_allocation);
1148 if (opt->dst1opt && !np->cork.opt->dst1opt)
1149 return -ENOBUFS;
1150
1151 np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1152 sk->sk_allocation);
1153 if (opt->hopopt && !np->cork.opt->hopopt)
1154 return -ENOBUFS;
1155
1156 np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1157 sk->sk_allocation);
1158 if (opt->srcrt && !np->cork.opt->srcrt)
1159 return -ENOBUFS;
1160
1da177e4
LT
1161 /* need source address above miyazawa*/
1162 }
1163 dst_hold(&rt->u.dst);
c8cdaf99 1164 inet->cork.dst = &rt->u.dst;
1da177e4
LT
1165 inet->cork.fl = *fl;
1166 np->cork.hop_limit = hlimit;
41a1f8ea 1167 np->cork.tclass = tclass;
628a5c56
JH
1168 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1169 rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
c7503609 1170 if (np->frag_size < mtu) {
d91675f9
YH
1171 if (np->frag_size)
1172 mtu = np->frag_size;
1173 }
1174 inet->cork.fragsize = mtu;
1da177e4
LT
1175 if (dst_allfrag(rt->u.dst.path))
1176 inet->cork.flags |= IPCORK_ALLFRAG;
1177 inet->cork.length = 0;
1178 sk->sk_sndmsg_page = NULL;
1179 sk->sk_sndmsg_off = 0;
01488942 1180 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) -
a1b05140 1181 rt->rt6i_nfheader_len;
1da177e4
LT
1182 length += exthdrlen;
1183 transhdrlen += exthdrlen;
1184 } else {
c8cdaf99 1185 rt = (struct rt6_info *)inet->cork.dst;
1da177e4 1186 fl = &inet->cork.fl;
0178b695 1187 opt = np->cork.opt;
1da177e4
LT
1188 transhdrlen = 0;
1189 exthdrlen = 0;
1190 mtu = inet->cork.fragsize;
1191 }
1192
1193 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1194
a1b05140 1195 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
b4ce9277 1196 (opt ? opt->opt_nflen : 0);
1da177e4
LT
1197 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1198
1199 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1200 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1201 ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
1202 return -EMSGSIZE;
1203 }
1204 }
1205
1206 /*
1207 * Let's try using as much space as possible.
1208 * Use MTU if total length of the message fits into the MTU.
1209 * Otherwise, we need to reserve fragment header and
1210 * fragment alignment (= 8-15 octects, in total).
1211 *
1212 * Note that we may need to "move" the data from the tail of
1ab1457c 1213 * of the buffer to the new fragment when we split
1da177e4
LT
1214 * the message.
1215 *
1ab1457c 1216 * FIXME: It may be fragmented into multiple chunks
1da177e4
LT
1217 * at once if non-fragmentable extension headers
1218 * are too large.
1ab1457c 1219 * --yoshfuji
1da177e4
LT
1220 */
1221
1222 inet->cork.length += length;
e89e9cf5
AR
1223 if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
1224 (rt->u.dst.dev->features & NETIF_F_UFO)) {
1225
baa829d8
PM
1226 err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len,
1227 fragheaderlen, transhdrlen, mtu,
1228 flags);
1229 if (err)
e89e9cf5 1230 goto error;
e89e9cf5
AR
1231 return 0;
1232 }
1da177e4
LT
1233
1234 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1235 goto alloc_new_skb;
1236
1237 while (length > 0) {
1238 /* Check if the remaining data fits into current packet. */
1239 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1240 if (copy < length)
1241 copy = maxfraglen - skb->len;
1242
1243 if (copy <= 0) {
1244 char *data;
1245 unsigned int datalen;
1246 unsigned int fraglen;
1247 unsigned int fraggap;
1248 unsigned int alloclen;
1249 struct sk_buff *skb_prev;
1250alloc_new_skb:
1251 skb_prev = skb;
1252
1253 /* There's no room in the current skb */
1254 if (skb_prev)
1255 fraggap = skb_prev->len - maxfraglen;
1256 else
1257 fraggap = 0;
1258
1259 /*
1260 * If remaining data exceeds the mtu,
1261 * we know we need more fragment(s).
1262 */
1263 datalen = length + fraggap;
1264 if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1265 datalen = maxfraglen - fragheaderlen;
1266
1267 fraglen = datalen + fragheaderlen;
1268 if ((flags & MSG_MORE) &&
1269 !(rt->u.dst.dev->features&NETIF_F_SG))
1270 alloclen = mtu;
1271 else
1272 alloclen = datalen + fragheaderlen;
1273
1274 /*
1275 * The last fragment gets additional space at tail.
1276 * Note: we overallocate on fragments with MSG_MODE
1277 * because we have no idea if we're the last one.
1278 */
1279 if (datalen == length + fraggap)
1280 alloclen += rt->u.dst.trailer_len;
1281
1282 /*
1283 * We just reserve space for fragment header.
1ab1457c 1284 * Note: this may be overallocation if the message
1da177e4
LT
1285 * (without MSG_MORE) fits into the MTU.
1286 */
1287 alloclen += sizeof(struct frag_hdr);
1288
1289 if (transhdrlen) {
1290 skb = sock_alloc_send_skb(sk,
1291 alloclen + hh_len,
1292 (flags & MSG_DONTWAIT), &err);
1293 } else {
1294 skb = NULL;
1295 if (atomic_read(&sk->sk_wmem_alloc) <=
1296 2 * sk->sk_sndbuf)
1297 skb = sock_wmalloc(sk,
1298 alloclen + hh_len, 1,
1299 sk->sk_allocation);
1300 if (unlikely(skb == NULL))
1301 err = -ENOBUFS;
1302 }
1303 if (skb == NULL)
1304 goto error;
1305 /*
1306 * Fill in the control structures
1307 */
1308 skb->ip_summed = csummode;
1309 skb->csum = 0;
1310 /* reserve for fragmentation */
1311 skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1312
1313 /*
1314 * Find where to start putting bytes
1315 */
1316 data = skb_put(skb, fraglen);
c14d2450 1317 skb_set_network_header(skb, exthdrlen);
1da177e4 1318 data += fragheaderlen;
b0e380b1
ACM
1319 skb->transport_header = (skb->network_header +
1320 fragheaderlen);
1da177e4
LT
1321 if (fraggap) {
1322 skb->csum = skb_copy_and_csum_bits(
1323 skb_prev, maxfraglen,
1324 data + transhdrlen, fraggap, 0);
1325 skb_prev->csum = csum_sub(skb_prev->csum,
1326 skb->csum);
1327 data += fraggap;
e9fa4f7b 1328 pskb_trim_unique(skb_prev, maxfraglen);
1da177e4
LT
1329 }
1330 copy = datalen - transhdrlen - fraggap;
1331 if (copy < 0) {
1332 err = -EINVAL;
1333 kfree_skb(skb);
1334 goto error;
1335 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1336 err = -EFAULT;
1337 kfree_skb(skb);
1338 goto error;
1339 }
1340
1341 offset += copy;
1342 length -= datalen - fraggap;
1343 transhdrlen = 0;
1344 exthdrlen = 0;
1345 csummode = CHECKSUM_NONE;
1346
1347 /*
1348 * Put the packet on the pending queue
1349 */
1350 __skb_queue_tail(&sk->sk_write_queue, skb);
1351 continue;
1352 }
1353
1354 if (copy > length)
1355 copy = length;
1356
1357 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1358 unsigned int off;
1359
1360 off = skb->len;
1361 if (getfrag(from, skb_put(skb, copy),
1362 offset, copy, off, skb) < 0) {
1363 __skb_trim(skb, off);
1364 err = -EFAULT;
1365 goto error;
1366 }
1367 } else {
1368 int i = skb_shinfo(skb)->nr_frags;
1369 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1370 struct page *page = sk->sk_sndmsg_page;
1371 int off = sk->sk_sndmsg_off;
1372 unsigned int left;
1373
1374 if (page && (left = PAGE_SIZE - off) > 0) {
1375 if (copy >= left)
1376 copy = left;
1377 if (page != frag->page) {
1378 if (i == MAX_SKB_FRAGS) {
1379 err = -EMSGSIZE;
1380 goto error;
1381 }
1382 get_page(page);
1383 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1384 frag = &skb_shinfo(skb)->frags[i];
1385 }
1386 } else if(i < MAX_SKB_FRAGS) {
1387 if (copy > PAGE_SIZE)
1388 copy = PAGE_SIZE;
1389 page = alloc_pages(sk->sk_allocation, 0);
1390 if (page == NULL) {
1391 err = -ENOMEM;
1392 goto error;
1393 }
1394 sk->sk_sndmsg_page = page;
1395 sk->sk_sndmsg_off = 0;
1396
1397 skb_fill_page_desc(skb, i, page, 0, 0);
1398 frag = &skb_shinfo(skb)->frags[i];
1da177e4
LT
1399 } else {
1400 err = -EMSGSIZE;
1401 goto error;
1402 }
1403 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1404 err = -EFAULT;
1405 goto error;
1406 }
1407 sk->sk_sndmsg_off += copy;
1408 frag->size += copy;
1409 skb->len += copy;
1410 skb->data_len += copy;
f945fa7a
HX
1411 skb->truesize += copy;
1412 atomic_add(copy, &sk->sk_wmem_alloc);
1da177e4
LT
1413 }
1414 offset += copy;
1415 length -= copy;
1416 }
1417 return 0;
1418error:
1419 inet->cork.length -= length;
3bd653c8 1420 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1421 return err;
1422}
1423
bf138862
PE
1424static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1425{
0178b695
HX
1426 if (np->cork.opt) {
1427 kfree(np->cork.opt->dst0opt);
1428 kfree(np->cork.opt->dst1opt);
1429 kfree(np->cork.opt->hopopt);
1430 kfree(np->cork.opt->srcrt);
1431 kfree(np->cork.opt);
1432 np->cork.opt = NULL;
1433 }
1434
c8cdaf99
YH
1435 if (inet->cork.dst) {
1436 dst_release(inet->cork.dst);
1437 inet->cork.dst = NULL;
bf138862
PE
1438 inet->cork.flags &= ~IPCORK_ALLFRAG;
1439 }
1440 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1441}
1442
1da177e4
LT
1443int ip6_push_pending_frames(struct sock *sk)
1444{
1445 struct sk_buff *skb, *tmp_skb;
1446 struct sk_buff **tail_skb;
1447 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1448 struct inet_sock *inet = inet_sk(sk);
1449 struct ipv6_pinfo *np = inet6_sk(sk);
3bd653c8 1450 struct net *net = sock_net(sk);
1da177e4
LT
1451 struct ipv6hdr *hdr;
1452 struct ipv6_txoptions *opt = np->cork.opt;
c8cdaf99 1453 struct rt6_info *rt = (struct rt6_info *)inet->cork.dst;
1da177e4
LT
1454 struct flowi *fl = &inet->cork.fl;
1455 unsigned char proto = fl->proto;
1456 int err = 0;
1457
1458 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1459 goto out;
1460 tail_skb = &(skb_shinfo(skb)->frag_list);
1461
1462 /* move skb->data to ip header from ext header */
d56f90a7 1463 if (skb->data < skb_network_header(skb))
bbe735e4 1464 __skb_pull(skb, skb_network_offset(skb));
1da177e4 1465 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
cfe1fc77 1466 __skb_pull(tmp_skb, skb_network_header_len(skb));
1da177e4
LT
1467 *tail_skb = tmp_skb;
1468 tail_skb = &(tmp_skb->next);
1469 skb->len += tmp_skb->len;
1470 skb->data_len += tmp_skb->len;
1da177e4 1471 skb->truesize += tmp_skb->truesize;
1da177e4
LT
1472 tmp_skb->destructor = NULL;
1473 tmp_skb->sk = NULL;
1da177e4
LT
1474 }
1475
28a89453 1476 /* Allow local fragmentation. */
b5c15fc0 1477 if (np->pmtudisc < IPV6_PMTUDISC_DO)
28a89453
HX
1478 skb->local_df = 1;
1479
1da177e4 1480 ipv6_addr_copy(final_dst, &fl->fl6_dst);
cfe1fc77 1481 __skb_pull(skb, skb_network_header_len(skb));
1da177e4
LT
1482 if (opt && opt->opt_flen)
1483 ipv6_push_frag_opts(skb, opt, &proto);
1484 if (opt && opt->opt_nflen)
1485 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1486
e2d1bca7
ACM
1487 skb_push(skb, sizeof(struct ipv6hdr));
1488 skb_reset_network_header(skb);
0660e03f 1489 hdr = ipv6_hdr(skb);
1ab1457c 1490
90bcaf7b 1491 *(__be32*)hdr = fl->fl6_flowlabel |
41a1f8ea 1492 htonl(0x60000000 | ((int)np->cork.tclass << 20));
1da177e4 1493
1da177e4
LT
1494 hdr->hop_limit = np->cork.hop_limit;
1495 hdr->nexthdr = proto;
1496 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1497 ipv6_addr_copy(&hdr->daddr, final_dst);
1498
a2c2064f 1499 skb->priority = sk->sk_priority;
4a19ec58 1500 skb->mark = sk->sk_mark;
a2c2064f 1501
adf30907 1502 skb_dst_set(skb, dst_clone(&rt->u.dst));
edf391ff 1503 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
14878f75 1504 if (proto == IPPROTO_ICMPV6) {
adf30907 1505 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
14878f75 1506
5a57d4c7 1507 ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
e41b5368 1508 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
14878f75
DS
1509 }
1510
ef76bc23 1511 err = ip6_local_out(skb);
1da177e4
LT
1512 if (err) {
1513 if (err > 0)
3320da89 1514 err = np->recverr ? net_xmit_errno(err) : 0;
1da177e4
LT
1515 if (err)
1516 goto error;
1517 }
1518
1519out:
bf138862 1520 ip6_cork_release(inet, np);
1da177e4
LT
1521 return err;
1522error:
06254914 1523 IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1524 goto out;
1525}
1526
1527void ip6_flush_pending_frames(struct sock *sk)
1528{
1da177e4
LT
1529 struct sk_buff *skb;
1530
1531 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
adf30907
ED
1532 if (skb_dst(skb))
1533 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
e1f52208 1534 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1535 kfree_skb(skb);
1536 }
1537
bf138862 1538 ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1da177e4 1539}