]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - net/ipv6/ip6_output.c
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit...
[mirror_ubuntu-bionic-kernel.git] / net / ipv6 / ip6_output.c
1 /*
2 * IPv6 output functions
3 * Linux INET6 implementation
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * Based on linux/net/ipv4/ip_output.c
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 *
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
21 *
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
27 */
28
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
40 #include <linux/slab.h>
41
42 #include <linux/netfilter.h>
43 #include <linux/netfilter_ipv6.h>
44
45 #include <net/sock.h>
46 #include <net/snmp.h>
47
48 #include <net/ipv6.h>
49 #include <net/ndisc.h>
50 #include <net/protocol.h>
51 #include <net/ip6_route.h>
52 #include <net/addrconf.h>
53 #include <net/rawv6.h>
54 #include <net/icmp.h>
55 #include <net/xfrm.h>
56 #include <net/checksum.h>
57 #include <linux/mroute6.h>
58
59 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
60
61 int __ip6_local_out(struct sk_buff *skb)
62 {
63 int len;
64
65 len = skb->len - sizeof(struct ipv6hdr);
66 if (len > IPV6_MAXPLEN)
67 len = 0;
68 ipv6_hdr(skb)->payload_len = htons(len);
69
70 return nf_hook(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev,
71 dst_output);
72 }
73
74 int ip6_local_out(struct sk_buff *skb)
75 {
76 int err;
77
78 err = __ip6_local_out(skb);
79 if (likely(err == 1))
80 err = dst_output(skb);
81
82 return err;
83 }
84 EXPORT_SYMBOL_GPL(ip6_local_out);
85
86 static int ip6_output_finish(struct sk_buff *skb)
87 {
88 struct dst_entry *dst = skb_dst(skb);
89
90 if (dst->hh)
91 return neigh_hh_output(dst->hh, skb);
92 else if (dst->neighbour)
93 return dst->neighbour->output(skb);
94
95 IP6_INC_STATS_BH(dev_net(dst->dev),
96 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
97 kfree_skb(skb);
98 return -EINVAL;
99
100 }
101
102 /* dev_loopback_xmit for use with netfilter. */
103 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
104 {
105 skb_reset_mac_header(newskb);
106 __skb_pull(newskb, skb_network_offset(newskb));
107 newskb->pkt_type = PACKET_LOOPBACK;
108 newskb->ip_summed = CHECKSUM_UNNECESSARY;
109 WARN_ON(!skb_dst(newskb));
110
111 netif_rx(newskb);
112 return 0;
113 }
114
115
116 static int ip6_output2(struct sk_buff *skb)
117 {
118 struct dst_entry *dst = skb_dst(skb);
119 struct net_device *dev = dst->dev;
120
121 skb->protocol = htons(ETH_P_IPV6);
122 skb->dev = dev;
123
124 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
125 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
126
127 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
128 ((mroute6_socket(dev_net(dev)) &&
129 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
130 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
131 &ipv6_hdr(skb)->saddr))) {
132 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
133
134 /* Do not check for IFF_ALLMULTI; multicast routing
135 is not supported in any case.
136 */
137 if (newskb)
138 NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, newskb,
139 NULL, newskb->dev,
140 ip6_dev_loopback_xmit);
141
142 if (ipv6_hdr(skb)->hop_limit == 0) {
143 IP6_INC_STATS(dev_net(dev), idev,
144 IPSTATS_MIB_OUTDISCARDS);
145 kfree_skb(skb);
146 return 0;
147 }
148 }
149
150 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
151 skb->len);
152 }
153
154 return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
155 ip6_output_finish);
156 }
157
158 static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
159 {
160 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
161
162 return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
163 skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb));
164 }
165
166 int ip6_output(struct sk_buff *skb)
167 {
168 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
169 if (unlikely(idev->cnf.disable_ipv6)) {
170 IP6_INC_STATS(dev_net(skb_dst(skb)->dev), idev,
171 IPSTATS_MIB_OUTDISCARDS);
172 kfree_skb(skb);
173 return 0;
174 }
175
176 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
177 dst_allfrag(skb_dst(skb)))
178 return ip6_fragment(skb, ip6_output2);
179 else
180 return ip6_output2(skb);
181 }
182
183 /*
184 * xmit an sk_buff (used by TCP)
185 */
186
187 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
188 struct ipv6_txoptions *opt, int ipfragok)
189 {
190 struct net *net = sock_net(sk);
191 struct ipv6_pinfo *np = inet6_sk(sk);
192 struct in6_addr *first_hop = &fl->fl6_dst;
193 struct dst_entry *dst = skb_dst(skb);
194 struct ipv6hdr *hdr;
195 u8 proto = fl->proto;
196 int seg_len = skb->len;
197 int hlimit = -1;
198 int tclass = 0;
199 u32 mtu;
200
201 if (opt) {
202 unsigned int head_room;
203
204 /* First: exthdrs may take lots of space (~8K for now)
205 MAX_HEADER is not enough.
206 */
207 head_room = opt->opt_nflen + opt->opt_flen;
208 seg_len += head_room;
209 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
210
211 if (skb_headroom(skb) < head_room) {
212 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
213 if (skb2 == NULL) {
214 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
215 IPSTATS_MIB_OUTDISCARDS);
216 kfree_skb(skb);
217 return -ENOBUFS;
218 }
219 kfree_skb(skb);
220 skb = skb2;
221 if (sk)
222 skb_set_owner_w(skb, sk);
223 }
224 if (opt->opt_flen)
225 ipv6_push_frag_opts(skb, opt, &proto);
226 if (opt->opt_nflen)
227 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
228 }
229
230 skb_push(skb, sizeof(struct ipv6hdr));
231 skb_reset_network_header(skb);
232 hdr = ipv6_hdr(skb);
233
234 /* Allow local fragmentation. */
235 if (ipfragok)
236 skb->local_df = 1;
237
238 /*
239 * Fill in the IPv6 header
240 */
241 if (np) {
242 tclass = np->tclass;
243 hlimit = np->hop_limit;
244 }
245 if (hlimit < 0)
246 hlimit = ip6_dst_hoplimit(dst);
247
248 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
249
250 hdr->payload_len = htons(seg_len);
251 hdr->nexthdr = proto;
252 hdr->hop_limit = hlimit;
253
254 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
255 ipv6_addr_copy(&hdr->daddr, first_hop);
256
257 skb->priority = sk->sk_priority;
258 skb->mark = sk->sk_mark;
259
260 mtu = dst_mtu(dst);
261 if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
262 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
263 IPSTATS_MIB_OUT, skb->len);
264 return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
265 dst_output);
266 }
267
268 if (net_ratelimit())
269 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
270 skb->dev = dst->dev;
271 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
272 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
273 kfree_skb(skb);
274 return -EMSGSIZE;
275 }
276
277 EXPORT_SYMBOL(ip6_xmit);
278
279 /*
280 * To avoid extra problems ND packets are send through this
281 * routine. It's code duplication but I really want to avoid
282 * extra checks since ipv6_build_header is used by TCP (which
283 * is for us performance critical)
284 */
285
286 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
287 const struct in6_addr *saddr, const struct in6_addr *daddr,
288 int proto, int len)
289 {
290 struct ipv6_pinfo *np = inet6_sk(sk);
291 struct ipv6hdr *hdr;
292 int totlen;
293
294 skb->protocol = htons(ETH_P_IPV6);
295 skb->dev = dev;
296
297 totlen = len + sizeof(struct ipv6hdr);
298
299 skb_reset_network_header(skb);
300 skb_put(skb, sizeof(struct ipv6hdr));
301 hdr = ipv6_hdr(skb);
302
303 *(__be32*)hdr = htonl(0x60000000);
304
305 hdr->payload_len = htons(len);
306 hdr->nexthdr = proto;
307 hdr->hop_limit = np->hop_limit;
308
309 ipv6_addr_copy(&hdr->saddr, saddr);
310 ipv6_addr_copy(&hdr->daddr, daddr);
311
312 return 0;
313 }
314
315 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
316 {
317 struct ip6_ra_chain *ra;
318 struct sock *last = NULL;
319
320 read_lock(&ip6_ra_lock);
321 for (ra = ip6_ra_chain; ra; ra = ra->next) {
322 struct sock *sk = ra->sk;
323 if (sk && ra->sel == sel &&
324 (!sk->sk_bound_dev_if ||
325 sk->sk_bound_dev_if == skb->dev->ifindex)) {
326 if (last) {
327 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
328 if (skb2)
329 rawv6_rcv(last, skb2);
330 }
331 last = sk;
332 }
333 }
334
335 if (last) {
336 rawv6_rcv(last, skb);
337 read_unlock(&ip6_ra_lock);
338 return 1;
339 }
340 read_unlock(&ip6_ra_lock);
341 return 0;
342 }
343
344 static int ip6_forward_proxy_check(struct sk_buff *skb)
345 {
346 struct ipv6hdr *hdr = ipv6_hdr(skb);
347 u8 nexthdr = hdr->nexthdr;
348 int offset;
349
350 if (ipv6_ext_hdr(nexthdr)) {
351 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
352 if (offset < 0)
353 return 0;
354 } else
355 offset = sizeof(struct ipv6hdr);
356
357 if (nexthdr == IPPROTO_ICMPV6) {
358 struct icmp6hdr *icmp6;
359
360 if (!pskb_may_pull(skb, (skb_network_header(skb) +
361 offset + 1 - skb->data)))
362 return 0;
363
364 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
365
366 switch (icmp6->icmp6_type) {
367 case NDISC_ROUTER_SOLICITATION:
368 case NDISC_ROUTER_ADVERTISEMENT:
369 case NDISC_NEIGHBOUR_SOLICITATION:
370 case NDISC_NEIGHBOUR_ADVERTISEMENT:
371 case NDISC_REDIRECT:
372 /* For reaction involving unicast neighbor discovery
373 * message destined to the proxied address, pass it to
374 * input function.
375 */
376 return 1;
377 default:
378 break;
379 }
380 }
381
382 /*
383 * The proxying router can't forward traffic sent to a link-local
384 * address, so signal the sender and discard the packet. This
385 * behavior is clarified by the MIPv6 specification.
386 */
387 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
388 dst_link_failure(skb);
389 return -1;
390 }
391
392 return 0;
393 }
394
395 static inline int ip6_forward_finish(struct sk_buff *skb)
396 {
397 return dst_output(skb);
398 }
399
400 int ip6_forward(struct sk_buff *skb)
401 {
402 struct dst_entry *dst = skb_dst(skb);
403 struct ipv6hdr *hdr = ipv6_hdr(skb);
404 struct inet6_skb_parm *opt = IP6CB(skb);
405 struct net *net = dev_net(dst->dev);
406 u32 mtu;
407
408 if (net->ipv6.devconf_all->forwarding == 0)
409 goto error;
410
411 if (skb_warn_if_lro(skb))
412 goto drop;
413
414 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
415 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
416 goto drop;
417 }
418
419 skb_forward_csum(skb);
420
421 /*
422 * We DO NOT make any processing on
423 * RA packets, pushing them to user level AS IS
424 * without ane WARRANTY that application will be able
425 * to interpret them. The reason is that we
426 * cannot make anything clever here.
427 *
428 * We are not end-node, so that if packet contains
429 * AH/ESP, we cannot make anything.
430 * Defragmentation also would be mistake, RA packets
431 * cannot be fragmented, because there is no warranty
432 * that different fragments will go along one path. --ANK
433 */
434 if (opt->ra) {
435 u8 *ptr = skb_network_header(skb) + opt->ra;
436 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
437 return 0;
438 }
439
440 /*
441 * check and decrement ttl
442 */
443 if (hdr->hop_limit <= 1) {
444 /* Force OUTPUT device used as source address */
445 skb->dev = dst->dev;
446 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
447 IP6_INC_STATS_BH(net,
448 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
449
450 kfree_skb(skb);
451 return -ETIMEDOUT;
452 }
453
454 /* XXX: idev->cnf.proxy_ndp? */
455 if (net->ipv6.devconf_all->proxy_ndp &&
456 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
457 int proxied = ip6_forward_proxy_check(skb);
458 if (proxied > 0)
459 return ip6_input(skb);
460 else if (proxied < 0) {
461 IP6_INC_STATS(net, ip6_dst_idev(dst),
462 IPSTATS_MIB_INDISCARDS);
463 goto drop;
464 }
465 }
466
467 if (!xfrm6_route_forward(skb)) {
468 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
469 goto drop;
470 }
471 dst = skb_dst(skb);
472
473 /* IPv6 specs say nothing about it, but it is clear that we cannot
474 send redirects to source routed frames.
475 We don't send redirects to frames decapsulated from IPsec.
476 */
477 if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
478 !skb_sec_path(skb)) {
479 struct in6_addr *target = NULL;
480 struct rt6_info *rt;
481 struct neighbour *n = dst->neighbour;
482
483 /*
484 * incoming and outgoing devices are the same
485 * send a redirect.
486 */
487
488 rt = (struct rt6_info *) dst;
489 if ((rt->rt6i_flags & RTF_GATEWAY))
490 target = (struct in6_addr*)&n->primary_key;
491 else
492 target = &hdr->daddr;
493
494 /* Limit redirects both by destination (here)
495 and by source (inside ndisc_send_redirect)
496 */
497 if (xrlim_allow(dst, 1*HZ))
498 ndisc_send_redirect(skb, n, target);
499 } else {
500 int addrtype = ipv6_addr_type(&hdr->saddr);
501
502 /* This check is security critical. */
503 if (addrtype == IPV6_ADDR_ANY ||
504 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
505 goto error;
506 if (addrtype & IPV6_ADDR_LINKLOCAL) {
507 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
508 ICMPV6_NOT_NEIGHBOUR, 0);
509 goto error;
510 }
511 }
512
513 mtu = dst_mtu(dst);
514 if (mtu < IPV6_MIN_MTU)
515 mtu = IPV6_MIN_MTU;
516
517 if (skb->len > mtu) {
518 /* Again, force OUTPUT device used as source address */
519 skb->dev = dst->dev;
520 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
521 IP6_INC_STATS_BH(net,
522 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
523 IP6_INC_STATS_BH(net,
524 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
525 kfree_skb(skb);
526 return -EMSGSIZE;
527 }
528
529 if (skb_cow(skb, dst->dev->hard_header_len)) {
530 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
531 goto drop;
532 }
533
534 hdr = ipv6_hdr(skb);
535
536 /* Mangling hops number delayed to point after skb COW */
537
538 hdr->hop_limit--;
539
540 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
541 return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
542 ip6_forward_finish);
543
544 error:
545 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
546 drop:
547 kfree_skb(skb);
548 return -EINVAL;
549 }
550
551 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
552 {
553 to->pkt_type = from->pkt_type;
554 to->priority = from->priority;
555 to->protocol = from->protocol;
556 skb_dst_drop(to);
557 skb_dst_set(to, dst_clone(skb_dst(from)));
558 to->dev = from->dev;
559 to->mark = from->mark;
560
561 #ifdef CONFIG_NET_SCHED
562 to->tc_index = from->tc_index;
563 #endif
564 nf_copy(to, from);
565 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
566 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
567 to->nf_trace = from->nf_trace;
568 #endif
569 skb_copy_secmark(to, from);
570 }
571
572 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
573 {
574 u16 offset = sizeof(struct ipv6hdr);
575 struct ipv6_opt_hdr *exthdr =
576 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
577 unsigned int packet_len = skb->tail - skb->network_header;
578 int found_rhdr = 0;
579 *nexthdr = &ipv6_hdr(skb)->nexthdr;
580
581 while (offset + 1 <= packet_len) {
582
583 switch (**nexthdr) {
584
585 case NEXTHDR_HOP:
586 break;
587 case NEXTHDR_ROUTING:
588 found_rhdr = 1;
589 break;
590 case NEXTHDR_DEST:
591 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
592 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
593 break;
594 #endif
595 if (found_rhdr)
596 return offset;
597 break;
598 default :
599 return offset;
600 }
601
602 offset += ipv6_optlen(exthdr);
603 *nexthdr = &exthdr->nexthdr;
604 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
605 offset);
606 }
607
608 return offset;
609 }
610
611 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
612 {
613 struct sk_buff *frag;
614 struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
615 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
616 struct ipv6hdr *tmp_hdr;
617 struct frag_hdr *fh;
618 unsigned int mtu, hlen, left, len;
619 __be32 frag_id = 0;
620 int ptr, offset = 0, err=0;
621 u8 *prevhdr, nexthdr = 0;
622 struct net *net = dev_net(skb_dst(skb)->dev);
623
624 hlen = ip6_find_1stfragopt(skb, &prevhdr);
625 nexthdr = *prevhdr;
626
627 mtu = ip6_skb_dst_mtu(skb);
628
629 /* We must not fragment if the socket is set to force MTU discovery
630 * or if the skb it not generated by a local socket.
631 */
632 if (!skb->local_df) {
633 skb->dev = skb_dst(skb)->dev;
634 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
635 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
636 IPSTATS_MIB_FRAGFAILS);
637 kfree_skb(skb);
638 return -EMSGSIZE;
639 }
640
641 if (np && np->frag_size < mtu) {
642 if (np->frag_size)
643 mtu = np->frag_size;
644 }
645 mtu -= hlen + sizeof(struct frag_hdr);
646
647 if (skb_has_frags(skb)) {
648 int first_len = skb_pagelen(skb);
649 int truesizes = 0;
650
651 if (first_len - hlen > mtu ||
652 ((first_len - hlen) & 7) ||
653 skb_cloned(skb))
654 goto slow_path;
655
656 skb_walk_frags(skb, frag) {
657 /* Correct geometry. */
658 if (frag->len > mtu ||
659 ((frag->len & 7) && frag->next) ||
660 skb_headroom(frag) < hlen)
661 goto slow_path;
662
663 /* Partially cloned skb? */
664 if (skb_shared(frag))
665 goto slow_path;
666
667 BUG_ON(frag->sk);
668 if (skb->sk) {
669 frag->sk = skb->sk;
670 frag->destructor = sock_wfree;
671 truesizes += frag->truesize;
672 }
673 }
674
675 err = 0;
676 offset = 0;
677 frag = skb_shinfo(skb)->frag_list;
678 skb_frag_list_init(skb);
679 /* BUILD HEADER */
680
681 *prevhdr = NEXTHDR_FRAGMENT;
682 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
683 if (!tmp_hdr) {
684 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
685 IPSTATS_MIB_FRAGFAILS);
686 return -ENOMEM;
687 }
688
689 __skb_pull(skb, hlen);
690 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
691 __skb_push(skb, hlen);
692 skb_reset_network_header(skb);
693 memcpy(skb_network_header(skb), tmp_hdr, hlen);
694
695 ipv6_select_ident(fh);
696 fh->nexthdr = nexthdr;
697 fh->reserved = 0;
698 fh->frag_off = htons(IP6_MF);
699 frag_id = fh->identification;
700
701 first_len = skb_pagelen(skb);
702 skb->data_len = first_len - skb_headlen(skb);
703 skb->truesize -= truesizes;
704 skb->len = first_len;
705 ipv6_hdr(skb)->payload_len = htons(first_len -
706 sizeof(struct ipv6hdr));
707
708 dst_hold(&rt->u.dst);
709
710 for (;;) {
711 /* Prepare header of the next frame,
712 * before previous one went down. */
713 if (frag) {
714 frag->ip_summed = CHECKSUM_NONE;
715 skb_reset_transport_header(frag);
716 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
717 __skb_push(frag, hlen);
718 skb_reset_network_header(frag);
719 memcpy(skb_network_header(frag), tmp_hdr,
720 hlen);
721 offset += skb->len - hlen - sizeof(struct frag_hdr);
722 fh->nexthdr = nexthdr;
723 fh->reserved = 0;
724 fh->frag_off = htons(offset);
725 if (frag->next != NULL)
726 fh->frag_off |= htons(IP6_MF);
727 fh->identification = frag_id;
728 ipv6_hdr(frag)->payload_len =
729 htons(frag->len -
730 sizeof(struct ipv6hdr));
731 ip6_copy_metadata(frag, skb);
732 }
733
734 err = output(skb);
735 if(!err)
736 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
737 IPSTATS_MIB_FRAGCREATES);
738
739 if (err || !frag)
740 break;
741
742 skb = frag;
743 frag = skb->next;
744 skb->next = NULL;
745 }
746
747 kfree(tmp_hdr);
748
749 if (err == 0) {
750 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
751 IPSTATS_MIB_FRAGOKS);
752 dst_release(&rt->u.dst);
753 return 0;
754 }
755
756 while (frag) {
757 skb = frag->next;
758 kfree_skb(frag);
759 frag = skb;
760 }
761
762 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
763 IPSTATS_MIB_FRAGFAILS);
764 dst_release(&rt->u.dst);
765 return err;
766 }
767
768 slow_path:
769 left = skb->len - hlen; /* Space per frame */
770 ptr = hlen; /* Where to start from */
771
772 /*
773 * Fragment the datagram.
774 */
775
776 *prevhdr = NEXTHDR_FRAGMENT;
777
778 /*
779 * Keep copying data until we run out.
780 */
781 while(left > 0) {
782 len = left;
783 /* IF: it doesn't fit, use 'mtu' - the data space left */
784 if (len > mtu)
785 len = mtu;
786 /* IF: we are not sending upto and including the packet end
787 then align the next start on an eight byte boundary */
788 if (len < left) {
789 len &= ~7;
790 }
791 /*
792 * Allocate buffer.
793 */
794
795 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
796 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
797 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
798 IPSTATS_MIB_FRAGFAILS);
799 err = -ENOMEM;
800 goto fail;
801 }
802
803 /*
804 * Set up data on packet
805 */
806
807 ip6_copy_metadata(frag, skb);
808 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
809 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
810 skb_reset_network_header(frag);
811 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
812 frag->transport_header = (frag->network_header + hlen +
813 sizeof(struct frag_hdr));
814
815 /*
816 * Charge the memory for the fragment to any owner
817 * it might possess
818 */
819 if (skb->sk)
820 skb_set_owner_w(frag, skb->sk);
821
822 /*
823 * Copy the packet header into the new buffer.
824 */
825 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
826
827 /*
828 * Build fragment header.
829 */
830 fh->nexthdr = nexthdr;
831 fh->reserved = 0;
832 if (!frag_id) {
833 ipv6_select_ident(fh);
834 frag_id = fh->identification;
835 } else
836 fh->identification = frag_id;
837
838 /*
839 * Copy a block of the IP datagram.
840 */
841 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
842 BUG();
843 left -= len;
844
845 fh->frag_off = htons(offset);
846 if (left > 0)
847 fh->frag_off |= htons(IP6_MF);
848 ipv6_hdr(frag)->payload_len = htons(frag->len -
849 sizeof(struct ipv6hdr));
850
851 ptr += len;
852 offset += len;
853
854 /*
855 * Put this fragment into the sending queue.
856 */
857 err = output(frag);
858 if (err)
859 goto fail;
860
861 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
862 IPSTATS_MIB_FRAGCREATES);
863 }
864 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
865 IPSTATS_MIB_FRAGOKS);
866 kfree_skb(skb);
867 return err;
868
869 fail:
870 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
871 IPSTATS_MIB_FRAGFAILS);
872 kfree_skb(skb);
873 return err;
874 }
875
876 static inline int ip6_rt_check(struct rt6key *rt_key,
877 struct in6_addr *fl_addr,
878 struct in6_addr *addr_cache)
879 {
880 return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
881 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)));
882 }
883
884 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
885 struct dst_entry *dst,
886 struct flowi *fl)
887 {
888 struct ipv6_pinfo *np = inet6_sk(sk);
889 struct rt6_info *rt = (struct rt6_info *)dst;
890
891 if (!dst)
892 goto out;
893
894 /* Yes, checking route validity in not connected
895 * case is not very simple. Take into account,
896 * that we do not support routing by source, TOS,
897 * and MSG_DONTROUTE --ANK (980726)
898 *
899 * 1. ip6_rt_check(): If route was host route,
900 * check that cached destination is current.
901 * If it is network route, we still may
902 * check its validity using saved pointer
903 * to the last used address: daddr_cache.
904 * We do not want to save whole address now,
905 * (because main consumer of this service
906 * is tcp, which has not this problem),
907 * so that the last trick works only on connected
908 * sockets.
909 * 2. oif also should be the same.
910 */
911 if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
912 #ifdef CONFIG_IPV6_SUBTREES
913 ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
914 #endif
915 (fl->oif && fl->oif != dst->dev->ifindex)) {
916 dst_release(dst);
917 dst = NULL;
918 }
919
920 out:
921 return dst;
922 }
923
924 static int ip6_dst_lookup_tail(struct sock *sk,
925 struct dst_entry **dst, struct flowi *fl)
926 {
927 int err;
928 struct net *net = sock_net(sk);
929
930 if (*dst == NULL)
931 *dst = ip6_route_output(net, sk, fl);
932
933 if ((err = (*dst)->error))
934 goto out_err_release;
935
936 if (ipv6_addr_any(&fl->fl6_src)) {
937 err = ipv6_dev_get_saddr(net, ip6_dst_idev(*dst)->dev,
938 &fl->fl6_dst,
939 sk ? inet6_sk(sk)->srcprefs : 0,
940 &fl->fl6_src);
941 if (err)
942 goto out_err_release;
943 }
944
945 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
946 /*
947 * Here if the dst entry we've looked up
948 * has a neighbour entry that is in the INCOMPLETE
949 * state and the src address from the flow is
950 * marked as OPTIMISTIC, we release the found
951 * dst entry and replace it instead with the
952 * dst entry of the nexthop router
953 */
954 if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) {
955 struct inet6_ifaddr *ifp;
956 struct flowi fl_gw;
957 int redirect;
958
959 ifp = ipv6_get_ifaddr(net, &fl->fl6_src,
960 (*dst)->dev, 1);
961
962 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
963 if (ifp)
964 in6_ifa_put(ifp);
965
966 if (redirect) {
967 /*
968 * We need to get the dst entry for the
969 * default router instead
970 */
971 dst_release(*dst);
972 memcpy(&fl_gw, fl, sizeof(struct flowi));
973 memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
974 *dst = ip6_route_output(net, sk, &fl_gw);
975 if ((err = (*dst)->error))
976 goto out_err_release;
977 }
978 }
979 #endif
980
981 return 0;
982
983 out_err_release:
984 if (err == -ENETUNREACH)
985 IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
986 dst_release(*dst);
987 *dst = NULL;
988 return err;
989 }
990
991 /**
992 * ip6_dst_lookup - perform route lookup on flow
993 * @sk: socket which provides route info
994 * @dst: pointer to dst_entry * for result
995 * @fl: flow to lookup
996 *
997 * This function performs a route lookup on the given flow.
998 *
999 * It returns zero on success, or a standard errno code on error.
1000 */
1001 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1002 {
1003 *dst = NULL;
1004 return ip6_dst_lookup_tail(sk, dst, fl);
1005 }
1006 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1007
1008 /**
1009 * ip6_sk_dst_lookup - perform socket cached route lookup on flow
1010 * @sk: socket which provides the dst cache and route info
1011 * @dst: pointer to dst_entry * for result
1012 * @fl: flow to lookup
1013 *
1014 * This function performs a route lookup on the given flow with the
1015 * possibility of using the cached route in the socket if it is valid.
1016 * It will take the socket dst lock when operating on the dst cache.
1017 * As a result, this function can only be used in process context.
1018 *
1019 * It returns zero on success, or a standard errno code on error.
1020 */
1021 int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1022 {
1023 *dst = NULL;
1024 if (sk) {
1025 *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1026 *dst = ip6_sk_dst_check(sk, *dst, fl);
1027 }
1028
1029 return ip6_dst_lookup_tail(sk, dst, fl);
1030 }
1031 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);
1032
1033 static inline int ip6_ufo_append_data(struct sock *sk,
1034 int getfrag(void *from, char *to, int offset, int len,
1035 int odd, struct sk_buff *skb),
1036 void *from, int length, int hh_len, int fragheaderlen,
1037 int transhdrlen, int mtu,unsigned int flags)
1038
1039 {
1040 struct sk_buff *skb;
1041 int err;
1042
1043 /* There is support for UDP large send offload by network
1044 * device, so create one single skb packet containing complete
1045 * udp datagram
1046 */
1047 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1048 skb = sock_alloc_send_skb(sk,
1049 hh_len + fragheaderlen + transhdrlen + 20,
1050 (flags & MSG_DONTWAIT), &err);
1051 if (skb == NULL)
1052 return -ENOMEM;
1053
1054 /* reserve space for Hardware header */
1055 skb_reserve(skb, hh_len);
1056
1057 /* create space for UDP/IP header */
1058 skb_put(skb,fragheaderlen + transhdrlen);
1059
1060 /* initialize network header pointer */
1061 skb_reset_network_header(skb);
1062
1063 /* initialize protocol header pointer */
1064 skb->transport_header = skb->network_header + fragheaderlen;
1065
1066 skb->ip_summed = CHECKSUM_PARTIAL;
1067 skb->csum = 0;
1068 sk->sk_sndmsg_off = 0;
1069 }
1070
1071 err = skb_append_datato_frags(sk,skb, getfrag, from,
1072 (length - transhdrlen));
1073 if (!err) {
1074 struct frag_hdr fhdr;
1075
1076 /* Specify the length of each IPv6 datagram fragment.
1077 * It has to be a multiple of 8.
1078 */
1079 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1080 sizeof(struct frag_hdr)) & ~7;
1081 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1082 ipv6_select_ident(&fhdr);
1083 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1084 __skb_queue_tail(&sk->sk_write_queue, skb);
1085
1086 return 0;
1087 }
1088 /* There is not enough support do UPD LSO,
1089 * so follow normal path
1090 */
1091 kfree_skb(skb);
1092
1093 return err;
1094 }
1095
1096 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1097 gfp_t gfp)
1098 {
1099 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1100 }
1101
1102 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1103 gfp_t gfp)
1104 {
1105 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1106 }
1107
1108 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1109 int offset, int len, int odd, struct sk_buff *skb),
1110 void *from, int length, int transhdrlen,
1111 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
1112 struct rt6_info *rt, unsigned int flags)
1113 {
1114 struct inet_sock *inet = inet_sk(sk);
1115 struct ipv6_pinfo *np = inet6_sk(sk);
1116 struct sk_buff *skb;
1117 unsigned int maxfraglen, fragheaderlen;
1118 int exthdrlen;
1119 int hh_len;
1120 int mtu;
1121 int copy;
1122 int err;
1123 int offset = 0;
1124 int csummode = CHECKSUM_NONE;
1125
1126 if (flags&MSG_PROBE)
1127 return 0;
1128 if (skb_queue_empty(&sk->sk_write_queue)) {
1129 /*
1130 * setup for corking
1131 */
1132 if (opt) {
1133 if (WARN_ON(np->cork.opt))
1134 return -EINVAL;
1135
1136 np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation);
1137 if (unlikely(np->cork.opt == NULL))
1138 return -ENOBUFS;
1139
1140 np->cork.opt->tot_len = opt->tot_len;
1141 np->cork.opt->opt_flen = opt->opt_flen;
1142 np->cork.opt->opt_nflen = opt->opt_nflen;
1143
1144 np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1145 sk->sk_allocation);
1146 if (opt->dst0opt && !np->cork.opt->dst0opt)
1147 return -ENOBUFS;
1148
1149 np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1150 sk->sk_allocation);
1151 if (opt->dst1opt && !np->cork.opt->dst1opt)
1152 return -ENOBUFS;
1153
1154 np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1155 sk->sk_allocation);
1156 if (opt->hopopt && !np->cork.opt->hopopt)
1157 return -ENOBUFS;
1158
1159 np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1160 sk->sk_allocation);
1161 if (opt->srcrt && !np->cork.opt->srcrt)
1162 return -ENOBUFS;
1163
1164 /* need source address above miyazawa*/
1165 }
1166 dst_hold(&rt->u.dst);
1167 inet->cork.dst = &rt->u.dst;
1168 inet->cork.fl = *fl;
1169 np->cork.hop_limit = hlimit;
1170 np->cork.tclass = tclass;
1171 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1172 rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
1173 if (np->frag_size < mtu) {
1174 if (np->frag_size)
1175 mtu = np->frag_size;
1176 }
1177 inet->cork.fragsize = mtu;
1178 if (dst_allfrag(rt->u.dst.path))
1179 inet->cork.flags |= IPCORK_ALLFRAG;
1180 inet->cork.length = 0;
1181 sk->sk_sndmsg_page = NULL;
1182 sk->sk_sndmsg_off = 0;
1183 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) -
1184 rt->rt6i_nfheader_len;
1185 length += exthdrlen;
1186 transhdrlen += exthdrlen;
1187 } else {
1188 rt = (struct rt6_info *)inet->cork.dst;
1189 fl = &inet->cork.fl;
1190 opt = np->cork.opt;
1191 transhdrlen = 0;
1192 exthdrlen = 0;
1193 mtu = inet->cork.fragsize;
1194 }
1195
1196 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1197
1198 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1199 (opt ? opt->opt_nflen : 0);
1200 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1201
1202 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1203 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1204 ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
1205 return -EMSGSIZE;
1206 }
1207 }
1208
1209 /*
1210 * Let's try using as much space as possible.
1211 * Use MTU if total length of the message fits into the MTU.
1212 * Otherwise, we need to reserve fragment header and
1213 * fragment alignment (= 8-15 octects, in total).
1214 *
1215 * Note that we may need to "move" the data from the tail of
1216 * of the buffer to the new fragment when we split
1217 * the message.
1218 *
1219 * FIXME: It may be fragmented into multiple chunks
1220 * at once if non-fragmentable extension headers
1221 * are too large.
1222 * --yoshfuji
1223 */
1224
1225 inet->cork.length += length;
1226 if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
1227 (rt->u.dst.dev->features & NETIF_F_UFO)) {
1228
1229 err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len,
1230 fragheaderlen, transhdrlen, mtu,
1231 flags);
1232 if (err)
1233 goto error;
1234 return 0;
1235 }
1236
1237 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1238 goto alloc_new_skb;
1239
1240 while (length > 0) {
1241 /* Check if the remaining data fits into current packet. */
1242 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1243 if (copy < length)
1244 copy = maxfraglen - skb->len;
1245
1246 if (copy <= 0) {
1247 char *data;
1248 unsigned int datalen;
1249 unsigned int fraglen;
1250 unsigned int fraggap;
1251 unsigned int alloclen;
1252 struct sk_buff *skb_prev;
1253 alloc_new_skb:
1254 skb_prev = skb;
1255
1256 /* There's no room in the current skb */
1257 if (skb_prev)
1258 fraggap = skb_prev->len - maxfraglen;
1259 else
1260 fraggap = 0;
1261
1262 /*
1263 * If remaining data exceeds the mtu,
1264 * we know we need more fragment(s).
1265 */
1266 datalen = length + fraggap;
1267 if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1268 datalen = maxfraglen - fragheaderlen;
1269
1270 fraglen = datalen + fragheaderlen;
1271 if ((flags & MSG_MORE) &&
1272 !(rt->u.dst.dev->features&NETIF_F_SG))
1273 alloclen = mtu;
1274 else
1275 alloclen = datalen + fragheaderlen;
1276
1277 /*
1278 * The last fragment gets additional space at tail.
1279 * Note: we overallocate on fragments with MSG_MODE
1280 * because we have no idea if we're the last one.
1281 */
1282 if (datalen == length + fraggap)
1283 alloclen += rt->u.dst.trailer_len;
1284
1285 /*
1286 * We just reserve space for fragment header.
1287 * Note: this may be overallocation if the message
1288 * (without MSG_MORE) fits into the MTU.
1289 */
1290 alloclen += sizeof(struct frag_hdr);
1291
1292 if (transhdrlen) {
1293 skb = sock_alloc_send_skb(sk,
1294 alloclen + hh_len,
1295 (flags & MSG_DONTWAIT), &err);
1296 } else {
1297 skb = NULL;
1298 if (atomic_read(&sk->sk_wmem_alloc) <=
1299 2 * sk->sk_sndbuf)
1300 skb = sock_wmalloc(sk,
1301 alloclen + hh_len, 1,
1302 sk->sk_allocation);
1303 if (unlikely(skb == NULL))
1304 err = -ENOBUFS;
1305 }
1306 if (skb == NULL)
1307 goto error;
1308 /*
1309 * Fill in the control structures
1310 */
1311 skb->ip_summed = csummode;
1312 skb->csum = 0;
1313 /* reserve for fragmentation */
1314 skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1315
1316 /*
1317 * Find where to start putting bytes
1318 */
1319 data = skb_put(skb, fraglen);
1320 skb_set_network_header(skb, exthdrlen);
1321 data += fragheaderlen;
1322 skb->transport_header = (skb->network_header +
1323 fragheaderlen);
1324 if (fraggap) {
1325 skb->csum = skb_copy_and_csum_bits(
1326 skb_prev, maxfraglen,
1327 data + transhdrlen, fraggap, 0);
1328 skb_prev->csum = csum_sub(skb_prev->csum,
1329 skb->csum);
1330 data += fraggap;
1331 pskb_trim_unique(skb_prev, maxfraglen);
1332 }
1333 copy = datalen - transhdrlen - fraggap;
1334 if (copy < 0) {
1335 err = -EINVAL;
1336 kfree_skb(skb);
1337 goto error;
1338 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1339 err = -EFAULT;
1340 kfree_skb(skb);
1341 goto error;
1342 }
1343
1344 offset += copy;
1345 length -= datalen - fraggap;
1346 transhdrlen = 0;
1347 exthdrlen = 0;
1348 csummode = CHECKSUM_NONE;
1349
1350 /*
1351 * Put the packet on the pending queue
1352 */
1353 __skb_queue_tail(&sk->sk_write_queue, skb);
1354 continue;
1355 }
1356
1357 if (copy > length)
1358 copy = length;
1359
1360 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1361 unsigned int off;
1362
1363 off = skb->len;
1364 if (getfrag(from, skb_put(skb, copy),
1365 offset, copy, off, skb) < 0) {
1366 __skb_trim(skb, off);
1367 err = -EFAULT;
1368 goto error;
1369 }
1370 } else {
1371 int i = skb_shinfo(skb)->nr_frags;
1372 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1373 struct page *page = sk->sk_sndmsg_page;
1374 int off = sk->sk_sndmsg_off;
1375 unsigned int left;
1376
1377 if (page && (left = PAGE_SIZE - off) > 0) {
1378 if (copy >= left)
1379 copy = left;
1380 if (page != frag->page) {
1381 if (i == MAX_SKB_FRAGS) {
1382 err = -EMSGSIZE;
1383 goto error;
1384 }
1385 get_page(page);
1386 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1387 frag = &skb_shinfo(skb)->frags[i];
1388 }
1389 } else if(i < MAX_SKB_FRAGS) {
1390 if (copy > PAGE_SIZE)
1391 copy = PAGE_SIZE;
1392 page = alloc_pages(sk->sk_allocation, 0);
1393 if (page == NULL) {
1394 err = -ENOMEM;
1395 goto error;
1396 }
1397 sk->sk_sndmsg_page = page;
1398 sk->sk_sndmsg_off = 0;
1399
1400 skb_fill_page_desc(skb, i, page, 0, 0);
1401 frag = &skb_shinfo(skb)->frags[i];
1402 } else {
1403 err = -EMSGSIZE;
1404 goto error;
1405 }
1406 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1407 err = -EFAULT;
1408 goto error;
1409 }
1410 sk->sk_sndmsg_off += copy;
1411 frag->size += copy;
1412 skb->len += copy;
1413 skb->data_len += copy;
1414 skb->truesize += copy;
1415 atomic_add(copy, &sk->sk_wmem_alloc);
1416 }
1417 offset += copy;
1418 length -= copy;
1419 }
1420 return 0;
1421 error:
1422 inet->cork.length -= length;
1423 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1424 return err;
1425 }
1426
1427 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1428 {
1429 if (np->cork.opt) {
1430 kfree(np->cork.opt->dst0opt);
1431 kfree(np->cork.opt->dst1opt);
1432 kfree(np->cork.opt->hopopt);
1433 kfree(np->cork.opt->srcrt);
1434 kfree(np->cork.opt);
1435 np->cork.opt = NULL;
1436 }
1437
1438 if (inet->cork.dst) {
1439 dst_release(inet->cork.dst);
1440 inet->cork.dst = NULL;
1441 inet->cork.flags &= ~IPCORK_ALLFRAG;
1442 }
1443 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1444 }
1445
1446 int ip6_push_pending_frames(struct sock *sk)
1447 {
1448 struct sk_buff *skb, *tmp_skb;
1449 struct sk_buff **tail_skb;
1450 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1451 struct inet_sock *inet = inet_sk(sk);
1452 struct ipv6_pinfo *np = inet6_sk(sk);
1453 struct net *net = sock_net(sk);
1454 struct ipv6hdr *hdr;
1455 struct ipv6_txoptions *opt = np->cork.opt;
1456 struct rt6_info *rt = (struct rt6_info *)inet->cork.dst;
1457 struct flowi *fl = &inet->cork.fl;
1458 unsigned char proto = fl->proto;
1459 int err = 0;
1460
1461 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1462 goto out;
1463 tail_skb = &(skb_shinfo(skb)->frag_list);
1464
1465 /* move skb->data to ip header from ext header */
1466 if (skb->data < skb_network_header(skb))
1467 __skb_pull(skb, skb_network_offset(skb));
1468 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1469 __skb_pull(tmp_skb, skb_network_header_len(skb));
1470 *tail_skb = tmp_skb;
1471 tail_skb = &(tmp_skb->next);
1472 skb->len += tmp_skb->len;
1473 skb->data_len += tmp_skb->len;
1474 skb->truesize += tmp_skb->truesize;
1475 tmp_skb->destructor = NULL;
1476 tmp_skb->sk = NULL;
1477 }
1478
1479 /* Allow local fragmentation. */
1480 if (np->pmtudisc < IPV6_PMTUDISC_DO)
1481 skb->local_df = 1;
1482
1483 ipv6_addr_copy(final_dst, &fl->fl6_dst);
1484 __skb_pull(skb, skb_network_header_len(skb));
1485 if (opt && opt->opt_flen)
1486 ipv6_push_frag_opts(skb, opt, &proto);
1487 if (opt && opt->opt_nflen)
1488 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1489
1490 skb_push(skb, sizeof(struct ipv6hdr));
1491 skb_reset_network_header(skb);
1492 hdr = ipv6_hdr(skb);
1493
1494 *(__be32*)hdr = fl->fl6_flowlabel |
1495 htonl(0x60000000 | ((int)np->cork.tclass << 20));
1496
1497 hdr->hop_limit = np->cork.hop_limit;
1498 hdr->nexthdr = proto;
1499 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1500 ipv6_addr_copy(&hdr->daddr, final_dst);
1501
1502 skb->priority = sk->sk_priority;
1503 skb->mark = sk->sk_mark;
1504
1505 skb_dst_set(skb, dst_clone(&rt->u.dst));
1506 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1507 if (proto == IPPROTO_ICMPV6) {
1508 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1509
1510 ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
1511 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
1512 }
1513
1514 err = ip6_local_out(skb);
1515 if (err) {
1516 if (err > 0)
1517 err = net_xmit_errno(err);
1518 if (err)
1519 goto error;
1520 }
1521
1522 out:
1523 ip6_cork_release(inet, np);
1524 return err;
1525 error:
1526 IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1527 goto out;
1528 }
1529
1530 void ip6_flush_pending_frames(struct sock *sk)
1531 {
1532 struct sk_buff *skb;
1533
1534 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1535 if (skb_dst(skb))
1536 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1537 IPSTATS_MIB_OUTDISCARDS);
1538 kfree_skb(skb);
1539 }
1540
1541 ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1542 }