]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blob - net/ipv6/ip6_output.c
xfrm: Handle blackhole route creation via afinfo.
[mirror_ubuntu-artful-kernel.git] / net / ipv6 / ip6_output.c
1 /*
2 * IPv6 output functions
3 * Linux INET6 implementation
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * Based on linux/net/ipv4/ip_output.c
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 *
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
21 *
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
27 */
28
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
40 #include <linux/slab.h>
41
42 #include <linux/netfilter.h>
43 #include <linux/netfilter_ipv6.h>
44
45 #include <net/sock.h>
46 #include <net/snmp.h>
47
48 #include <net/ipv6.h>
49 #include <net/ndisc.h>
50 #include <net/protocol.h>
51 #include <net/ip6_route.h>
52 #include <net/addrconf.h>
53 #include <net/rawv6.h>
54 #include <net/icmp.h>
55 #include <net/xfrm.h>
56 #include <net/checksum.h>
57 #include <linux/mroute6.h>
58
59 int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
60
61 int __ip6_local_out(struct sk_buff *skb)
62 {
63 int len;
64
65 len = skb->len - sizeof(struct ipv6hdr);
66 if (len > IPV6_MAXPLEN)
67 len = 0;
68 ipv6_hdr(skb)->payload_len = htons(len);
69
70 return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
71 skb_dst(skb)->dev, dst_output);
72 }
73
74 int ip6_local_out(struct sk_buff *skb)
75 {
76 int err;
77
78 err = __ip6_local_out(skb);
79 if (likely(err == 1))
80 err = dst_output(skb);
81
82 return err;
83 }
84 EXPORT_SYMBOL_GPL(ip6_local_out);
85
86 /* dev_loopback_xmit for use with netfilter. */
87 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
88 {
89 skb_reset_mac_header(newskb);
90 __skb_pull(newskb, skb_network_offset(newskb));
91 newskb->pkt_type = PACKET_LOOPBACK;
92 newskb->ip_summed = CHECKSUM_UNNECESSARY;
93 WARN_ON(!skb_dst(newskb));
94
95 netif_rx_ni(newskb);
96 return 0;
97 }
98
99 static int ip6_finish_output2(struct sk_buff *skb)
100 {
101 struct dst_entry *dst = skb_dst(skb);
102 struct net_device *dev = dst->dev;
103
104 skb->protocol = htons(ETH_P_IPV6);
105 skb->dev = dev;
106
107 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
108 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
109
110 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
111 ((mroute6_socket(dev_net(dev), skb) &&
112 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
113 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
114 &ipv6_hdr(skb)->saddr))) {
115 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
116
117 /* Do not check for IFF_ALLMULTI; multicast routing
118 is not supported in any case.
119 */
120 if (newskb)
121 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
122 newskb, NULL, newskb->dev,
123 ip6_dev_loopback_xmit);
124
125 if (ipv6_hdr(skb)->hop_limit == 0) {
126 IP6_INC_STATS(dev_net(dev), idev,
127 IPSTATS_MIB_OUTDISCARDS);
128 kfree_skb(skb);
129 return 0;
130 }
131 }
132
133 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
134 skb->len);
135 }
136
137 if (dst->hh)
138 return neigh_hh_output(dst->hh, skb);
139 else if (dst->neighbour)
140 return dst->neighbour->output(skb);
141
142 IP6_INC_STATS_BH(dev_net(dst->dev),
143 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
144 kfree_skb(skb);
145 return -EINVAL;
146 }
147
148 static int ip6_finish_output(struct sk_buff *skb)
149 {
150 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
151 dst_allfrag(skb_dst(skb)))
152 return ip6_fragment(skb, ip6_finish_output2);
153 else
154 return ip6_finish_output2(skb);
155 }
156
157 int ip6_output(struct sk_buff *skb)
158 {
159 struct net_device *dev = skb_dst(skb)->dev;
160 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
161 if (unlikely(idev->cnf.disable_ipv6)) {
162 IP6_INC_STATS(dev_net(dev), idev,
163 IPSTATS_MIB_OUTDISCARDS);
164 kfree_skb(skb);
165 return 0;
166 }
167
168 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
169 ip6_finish_output,
170 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
171 }
172
173 /*
174 * xmit an sk_buff (used by TCP, SCTP and DCCP)
175 */
176
177 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
178 struct ipv6_txoptions *opt)
179 {
180 struct net *net = sock_net(sk);
181 struct ipv6_pinfo *np = inet6_sk(sk);
182 struct in6_addr *first_hop = &fl->fl6_dst;
183 struct dst_entry *dst = skb_dst(skb);
184 struct ipv6hdr *hdr;
185 u8 proto = fl->proto;
186 int seg_len = skb->len;
187 int hlimit = -1;
188 int tclass = 0;
189 u32 mtu;
190
191 if (opt) {
192 unsigned int head_room;
193
194 /* First: exthdrs may take lots of space (~8K for now)
195 MAX_HEADER is not enough.
196 */
197 head_room = opt->opt_nflen + opt->opt_flen;
198 seg_len += head_room;
199 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
200
201 if (skb_headroom(skb) < head_room) {
202 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
203 if (skb2 == NULL) {
204 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
205 IPSTATS_MIB_OUTDISCARDS);
206 kfree_skb(skb);
207 return -ENOBUFS;
208 }
209 kfree_skb(skb);
210 skb = skb2;
211 skb_set_owner_w(skb, sk);
212 }
213 if (opt->opt_flen)
214 ipv6_push_frag_opts(skb, opt, &proto);
215 if (opt->opt_nflen)
216 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
217 }
218
219 skb_push(skb, sizeof(struct ipv6hdr));
220 skb_reset_network_header(skb);
221 hdr = ipv6_hdr(skb);
222
223 /*
224 * Fill in the IPv6 header
225 */
226 if (np) {
227 tclass = np->tclass;
228 hlimit = np->hop_limit;
229 }
230 if (hlimit < 0)
231 hlimit = ip6_dst_hoplimit(dst);
232
233 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
234
235 hdr->payload_len = htons(seg_len);
236 hdr->nexthdr = proto;
237 hdr->hop_limit = hlimit;
238
239 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
240 ipv6_addr_copy(&hdr->daddr, first_hop);
241
242 skb->priority = sk->sk_priority;
243 skb->mark = sk->sk_mark;
244
245 mtu = dst_mtu(dst);
246 if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
247 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
248 IPSTATS_MIB_OUT, skb->len);
249 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
250 dst->dev, dst_output);
251 }
252
253 if (net_ratelimit())
254 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
255 skb->dev = dst->dev;
256 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
257 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
258 kfree_skb(skb);
259 return -EMSGSIZE;
260 }
261
262 EXPORT_SYMBOL(ip6_xmit);
263
264 /*
265 * To avoid extra problems ND packets are send through this
266 * routine. It's code duplication but I really want to avoid
267 * extra checks since ipv6_build_header is used by TCP (which
268 * is for us performance critical)
269 */
270
271 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
272 const struct in6_addr *saddr, const struct in6_addr *daddr,
273 int proto, int len)
274 {
275 struct ipv6_pinfo *np = inet6_sk(sk);
276 struct ipv6hdr *hdr;
277
278 skb->protocol = htons(ETH_P_IPV6);
279 skb->dev = dev;
280
281 skb_reset_network_header(skb);
282 skb_put(skb, sizeof(struct ipv6hdr));
283 hdr = ipv6_hdr(skb);
284
285 *(__be32*)hdr = htonl(0x60000000);
286
287 hdr->payload_len = htons(len);
288 hdr->nexthdr = proto;
289 hdr->hop_limit = np->hop_limit;
290
291 ipv6_addr_copy(&hdr->saddr, saddr);
292 ipv6_addr_copy(&hdr->daddr, daddr);
293
294 return 0;
295 }
296
297 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
298 {
299 struct ip6_ra_chain *ra;
300 struct sock *last = NULL;
301
302 read_lock(&ip6_ra_lock);
303 for (ra = ip6_ra_chain; ra; ra = ra->next) {
304 struct sock *sk = ra->sk;
305 if (sk && ra->sel == sel &&
306 (!sk->sk_bound_dev_if ||
307 sk->sk_bound_dev_if == skb->dev->ifindex)) {
308 if (last) {
309 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
310 if (skb2)
311 rawv6_rcv(last, skb2);
312 }
313 last = sk;
314 }
315 }
316
317 if (last) {
318 rawv6_rcv(last, skb);
319 read_unlock(&ip6_ra_lock);
320 return 1;
321 }
322 read_unlock(&ip6_ra_lock);
323 return 0;
324 }
325
326 static int ip6_forward_proxy_check(struct sk_buff *skb)
327 {
328 struct ipv6hdr *hdr = ipv6_hdr(skb);
329 u8 nexthdr = hdr->nexthdr;
330 int offset;
331
332 if (ipv6_ext_hdr(nexthdr)) {
333 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
334 if (offset < 0)
335 return 0;
336 } else
337 offset = sizeof(struct ipv6hdr);
338
339 if (nexthdr == IPPROTO_ICMPV6) {
340 struct icmp6hdr *icmp6;
341
342 if (!pskb_may_pull(skb, (skb_network_header(skb) +
343 offset + 1 - skb->data)))
344 return 0;
345
346 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
347
348 switch (icmp6->icmp6_type) {
349 case NDISC_ROUTER_SOLICITATION:
350 case NDISC_ROUTER_ADVERTISEMENT:
351 case NDISC_NEIGHBOUR_SOLICITATION:
352 case NDISC_NEIGHBOUR_ADVERTISEMENT:
353 case NDISC_REDIRECT:
354 /* For reaction involving unicast neighbor discovery
355 * message destined to the proxied address, pass it to
356 * input function.
357 */
358 return 1;
359 default:
360 break;
361 }
362 }
363
364 /*
365 * The proxying router can't forward traffic sent to a link-local
366 * address, so signal the sender and discard the packet. This
367 * behavior is clarified by the MIPv6 specification.
368 */
369 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
370 dst_link_failure(skb);
371 return -1;
372 }
373
374 return 0;
375 }
376
377 static inline int ip6_forward_finish(struct sk_buff *skb)
378 {
379 return dst_output(skb);
380 }
381
382 int ip6_forward(struct sk_buff *skb)
383 {
384 struct dst_entry *dst = skb_dst(skb);
385 struct ipv6hdr *hdr = ipv6_hdr(skb);
386 struct inet6_skb_parm *opt = IP6CB(skb);
387 struct net *net = dev_net(dst->dev);
388 u32 mtu;
389
390 if (net->ipv6.devconf_all->forwarding == 0)
391 goto error;
392
393 if (skb_warn_if_lro(skb))
394 goto drop;
395
396 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
397 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
398 goto drop;
399 }
400
401 if (skb->pkt_type != PACKET_HOST)
402 goto drop;
403
404 skb_forward_csum(skb);
405
406 /*
407 * We DO NOT make any processing on
408 * RA packets, pushing them to user level AS IS
409 * without ane WARRANTY that application will be able
410 * to interpret them. The reason is that we
411 * cannot make anything clever here.
412 *
413 * We are not end-node, so that if packet contains
414 * AH/ESP, we cannot make anything.
415 * Defragmentation also would be mistake, RA packets
416 * cannot be fragmented, because there is no warranty
417 * that different fragments will go along one path. --ANK
418 */
419 if (opt->ra) {
420 u8 *ptr = skb_network_header(skb) + opt->ra;
421 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
422 return 0;
423 }
424
425 /*
426 * check and decrement ttl
427 */
428 if (hdr->hop_limit <= 1) {
429 /* Force OUTPUT device used as source address */
430 skb->dev = dst->dev;
431 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
432 IP6_INC_STATS_BH(net,
433 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
434
435 kfree_skb(skb);
436 return -ETIMEDOUT;
437 }
438
439 /* XXX: idev->cnf.proxy_ndp? */
440 if (net->ipv6.devconf_all->proxy_ndp &&
441 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
442 int proxied = ip6_forward_proxy_check(skb);
443 if (proxied > 0)
444 return ip6_input(skb);
445 else if (proxied < 0) {
446 IP6_INC_STATS(net, ip6_dst_idev(dst),
447 IPSTATS_MIB_INDISCARDS);
448 goto drop;
449 }
450 }
451
452 if (!xfrm6_route_forward(skb)) {
453 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
454 goto drop;
455 }
456 dst = skb_dst(skb);
457
458 /* IPv6 specs say nothing about it, but it is clear that we cannot
459 send redirects to source routed frames.
460 We don't send redirects to frames decapsulated from IPsec.
461 */
462 if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
463 !skb_sec_path(skb)) {
464 struct in6_addr *target = NULL;
465 struct rt6_info *rt;
466 struct neighbour *n = dst->neighbour;
467
468 /*
469 * incoming and outgoing devices are the same
470 * send a redirect.
471 */
472
473 rt = (struct rt6_info *) dst;
474 if ((rt->rt6i_flags & RTF_GATEWAY))
475 target = (struct in6_addr*)&n->primary_key;
476 else
477 target = &hdr->daddr;
478
479 if (!rt->rt6i_peer)
480 rt6_bind_peer(rt, 1);
481
482 /* Limit redirects both by destination (here)
483 and by source (inside ndisc_send_redirect)
484 */
485 if (inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ))
486 ndisc_send_redirect(skb, n, target);
487 } else {
488 int addrtype = ipv6_addr_type(&hdr->saddr);
489
490 /* This check is security critical. */
491 if (addrtype == IPV6_ADDR_ANY ||
492 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
493 goto error;
494 if (addrtype & IPV6_ADDR_LINKLOCAL) {
495 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
496 ICMPV6_NOT_NEIGHBOUR, 0);
497 goto error;
498 }
499 }
500
501 mtu = dst_mtu(dst);
502 if (mtu < IPV6_MIN_MTU)
503 mtu = IPV6_MIN_MTU;
504
505 if (skb->len > mtu && !skb_is_gso(skb)) {
506 /* Again, force OUTPUT device used as source address */
507 skb->dev = dst->dev;
508 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
509 IP6_INC_STATS_BH(net,
510 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
511 IP6_INC_STATS_BH(net,
512 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
513 kfree_skb(skb);
514 return -EMSGSIZE;
515 }
516
517 if (skb_cow(skb, dst->dev->hard_header_len)) {
518 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
519 goto drop;
520 }
521
522 hdr = ipv6_hdr(skb);
523
524 /* Mangling hops number delayed to point after skb COW */
525
526 hdr->hop_limit--;
527
528 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
529 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
530 ip6_forward_finish);
531
532 error:
533 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
534 drop:
535 kfree_skb(skb);
536 return -EINVAL;
537 }
538
539 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
540 {
541 to->pkt_type = from->pkt_type;
542 to->priority = from->priority;
543 to->protocol = from->protocol;
544 skb_dst_drop(to);
545 skb_dst_set(to, dst_clone(skb_dst(from)));
546 to->dev = from->dev;
547 to->mark = from->mark;
548
549 #ifdef CONFIG_NET_SCHED
550 to->tc_index = from->tc_index;
551 #endif
552 nf_copy(to, from);
553 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
554 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
555 to->nf_trace = from->nf_trace;
556 #endif
557 skb_copy_secmark(to, from);
558 }
559
560 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
561 {
562 u16 offset = sizeof(struct ipv6hdr);
563 struct ipv6_opt_hdr *exthdr =
564 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
565 unsigned int packet_len = skb->tail - skb->network_header;
566 int found_rhdr = 0;
567 *nexthdr = &ipv6_hdr(skb)->nexthdr;
568
569 while (offset + 1 <= packet_len) {
570
571 switch (**nexthdr) {
572
573 case NEXTHDR_HOP:
574 break;
575 case NEXTHDR_ROUTING:
576 found_rhdr = 1;
577 break;
578 case NEXTHDR_DEST:
579 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
580 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
581 break;
582 #endif
583 if (found_rhdr)
584 return offset;
585 break;
586 default :
587 return offset;
588 }
589
590 offset += ipv6_optlen(exthdr);
591 *nexthdr = &exthdr->nexthdr;
592 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
593 offset);
594 }
595
596 return offset;
597 }
598
599 int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
600 {
601 struct sk_buff *frag;
602 struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
603 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
604 struct ipv6hdr *tmp_hdr;
605 struct frag_hdr *fh;
606 unsigned int mtu, hlen, left, len;
607 __be32 frag_id = 0;
608 int ptr, offset = 0, err=0;
609 u8 *prevhdr, nexthdr = 0;
610 struct net *net = dev_net(skb_dst(skb)->dev);
611
612 hlen = ip6_find_1stfragopt(skb, &prevhdr);
613 nexthdr = *prevhdr;
614
615 mtu = ip6_skb_dst_mtu(skb);
616
617 /* We must not fragment if the socket is set to force MTU discovery
618 * or if the skb it not generated by a local socket.
619 */
620 if (!skb->local_df && skb->len > mtu) {
621 skb->dev = skb_dst(skb)->dev;
622 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
623 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
624 IPSTATS_MIB_FRAGFAILS);
625 kfree_skb(skb);
626 return -EMSGSIZE;
627 }
628
629 if (np && np->frag_size < mtu) {
630 if (np->frag_size)
631 mtu = np->frag_size;
632 }
633 mtu -= hlen + sizeof(struct frag_hdr);
634
635 if (skb_has_frag_list(skb)) {
636 int first_len = skb_pagelen(skb);
637 struct sk_buff *frag2;
638
639 if (first_len - hlen > mtu ||
640 ((first_len - hlen) & 7) ||
641 skb_cloned(skb))
642 goto slow_path;
643
644 skb_walk_frags(skb, frag) {
645 /* Correct geometry. */
646 if (frag->len > mtu ||
647 ((frag->len & 7) && frag->next) ||
648 skb_headroom(frag) < hlen)
649 goto slow_path_clean;
650
651 /* Partially cloned skb? */
652 if (skb_shared(frag))
653 goto slow_path_clean;
654
655 BUG_ON(frag->sk);
656 if (skb->sk) {
657 frag->sk = skb->sk;
658 frag->destructor = sock_wfree;
659 }
660 skb->truesize -= frag->truesize;
661 }
662
663 err = 0;
664 offset = 0;
665 frag = skb_shinfo(skb)->frag_list;
666 skb_frag_list_init(skb);
667 /* BUILD HEADER */
668
669 *prevhdr = NEXTHDR_FRAGMENT;
670 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
671 if (!tmp_hdr) {
672 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
673 IPSTATS_MIB_FRAGFAILS);
674 return -ENOMEM;
675 }
676
677 __skb_pull(skb, hlen);
678 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
679 __skb_push(skb, hlen);
680 skb_reset_network_header(skb);
681 memcpy(skb_network_header(skb), tmp_hdr, hlen);
682
683 ipv6_select_ident(fh);
684 fh->nexthdr = nexthdr;
685 fh->reserved = 0;
686 fh->frag_off = htons(IP6_MF);
687 frag_id = fh->identification;
688
689 first_len = skb_pagelen(skb);
690 skb->data_len = first_len - skb_headlen(skb);
691 skb->len = first_len;
692 ipv6_hdr(skb)->payload_len = htons(first_len -
693 sizeof(struct ipv6hdr));
694
695 dst_hold(&rt->dst);
696
697 for (;;) {
698 /* Prepare header of the next frame,
699 * before previous one went down. */
700 if (frag) {
701 frag->ip_summed = CHECKSUM_NONE;
702 skb_reset_transport_header(frag);
703 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
704 __skb_push(frag, hlen);
705 skb_reset_network_header(frag);
706 memcpy(skb_network_header(frag), tmp_hdr,
707 hlen);
708 offset += skb->len - hlen - sizeof(struct frag_hdr);
709 fh->nexthdr = nexthdr;
710 fh->reserved = 0;
711 fh->frag_off = htons(offset);
712 if (frag->next != NULL)
713 fh->frag_off |= htons(IP6_MF);
714 fh->identification = frag_id;
715 ipv6_hdr(frag)->payload_len =
716 htons(frag->len -
717 sizeof(struct ipv6hdr));
718 ip6_copy_metadata(frag, skb);
719 }
720
721 err = output(skb);
722 if(!err)
723 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
724 IPSTATS_MIB_FRAGCREATES);
725
726 if (err || !frag)
727 break;
728
729 skb = frag;
730 frag = skb->next;
731 skb->next = NULL;
732 }
733
734 kfree(tmp_hdr);
735
736 if (err == 0) {
737 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
738 IPSTATS_MIB_FRAGOKS);
739 dst_release(&rt->dst);
740 return 0;
741 }
742
743 while (frag) {
744 skb = frag->next;
745 kfree_skb(frag);
746 frag = skb;
747 }
748
749 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
750 IPSTATS_MIB_FRAGFAILS);
751 dst_release(&rt->dst);
752 return err;
753
754 slow_path_clean:
755 skb_walk_frags(skb, frag2) {
756 if (frag2 == frag)
757 break;
758 frag2->sk = NULL;
759 frag2->destructor = NULL;
760 skb->truesize += frag2->truesize;
761 }
762 }
763
764 slow_path:
765 left = skb->len - hlen; /* Space per frame */
766 ptr = hlen; /* Where to start from */
767
768 /*
769 * Fragment the datagram.
770 */
771
772 *prevhdr = NEXTHDR_FRAGMENT;
773
774 /*
775 * Keep copying data until we run out.
776 */
777 while(left > 0) {
778 len = left;
779 /* IF: it doesn't fit, use 'mtu' - the data space left */
780 if (len > mtu)
781 len = mtu;
782 /* IF: we are not sending upto and including the packet end
783 then align the next start on an eight byte boundary */
784 if (len < left) {
785 len &= ~7;
786 }
787 /*
788 * Allocate buffer.
789 */
790
791 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->dst.dev), GFP_ATOMIC)) == NULL) {
792 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
793 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
794 IPSTATS_MIB_FRAGFAILS);
795 err = -ENOMEM;
796 goto fail;
797 }
798
799 /*
800 * Set up data on packet
801 */
802
803 ip6_copy_metadata(frag, skb);
804 skb_reserve(frag, LL_RESERVED_SPACE(rt->dst.dev));
805 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
806 skb_reset_network_header(frag);
807 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
808 frag->transport_header = (frag->network_header + hlen +
809 sizeof(struct frag_hdr));
810
811 /*
812 * Charge the memory for the fragment to any owner
813 * it might possess
814 */
815 if (skb->sk)
816 skb_set_owner_w(frag, skb->sk);
817
818 /*
819 * Copy the packet header into the new buffer.
820 */
821 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
822
823 /*
824 * Build fragment header.
825 */
826 fh->nexthdr = nexthdr;
827 fh->reserved = 0;
828 if (!frag_id) {
829 ipv6_select_ident(fh);
830 frag_id = fh->identification;
831 } else
832 fh->identification = frag_id;
833
834 /*
835 * Copy a block of the IP datagram.
836 */
837 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
838 BUG();
839 left -= len;
840
841 fh->frag_off = htons(offset);
842 if (left > 0)
843 fh->frag_off |= htons(IP6_MF);
844 ipv6_hdr(frag)->payload_len = htons(frag->len -
845 sizeof(struct ipv6hdr));
846
847 ptr += len;
848 offset += len;
849
850 /*
851 * Put this fragment into the sending queue.
852 */
853 err = output(frag);
854 if (err)
855 goto fail;
856
857 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
858 IPSTATS_MIB_FRAGCREATES);
859 }
860 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
861 IPSTATS_MIB_FRAGOKS);
862 kfree_skb(skb);
863 return err;
864
865 fail:
866 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
867 IPSTATS_MIB_FRAGFAILS);
868 kfree_skb(skb);
869 return err;
870 }
871
872 static inline int ip6_rt_check(struct rt6key *rt_key,
873 struct in6_addr *fl_addr,
874 struct in6_addr *addr_cache)
875 {
876 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
877 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache));
878 }
879
880 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
881 struct dst_entry *dst,
882 struct flowi *fl)
883 {
884 struct ipv6_pinfo *np = inet6_sk(sk);
885 struct rt6_info *rt = (struct rt6_info *)dst;
886
887 if (!dst)
888 goto out;
889
890 /* Yes, checking route validity in not connected
891 * case is not very simple. Take into account,
892 * that we do not support routing by source, TOS,
893 * and MSG_DONTROUTE --ANK (980726)
894 *
895 * 1. ip6_rt_check(): If route was host route,
896 * check that cached destination is current.
897 * If it is network route, we still may
898 * check its validity using saved pointer
899 * to the last used address: daddr_cache.
900 * We do not want to save whole address now,
901 * (because main consumer of this service
902 * is tcp, which has not this problem),
903 * so that the last trick works only on connected
904 * sockets.
905 * 2. oif also should be the same.
906 */
907 if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
908 #ifdef CONFIG_IPV6_SUBTREES
909 ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
910 #endif
911 (fl->oif && fl->oif != dst->dev->ifindex)) {
912 dst_release(dst);
913 dst = NULL;
914 }
915
916 out:
917 return dst;
918 }
919
920 static int ip6_dst_lookup_tail(struct sock *sk,
921 struct dst_entry **dst, struct flowi *fl)
922 {
923 int err;
924 struct net *net = sock_net(sk);
925
926 if (*dst == NULL)
927 *dst = ip6_route_output(net, sk, fl);
928
929 if ((err = (*dst)->error))
930 goto out_err_release;
931
932 if (ipv6_addr_any(&fl->fl6_src)) {
933 err = ipv6_dev_get_saddr(net, ip6_dst_idev(*dst)->dev,
934 &fl->fl6_dst,
935 sk ? inet6_sk(sk)->srcprefs : 0,
936 &fl->fl6_src);
937 if (err)
938 goto out_err_release;
939 }
940
941 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
942 /*
943 * Here if the dst entry we've looked up
944 * has a neighbour entry that is in the INCOMPLETE
945 * state and the src address from the flow is
946 * marked as OPTIMISTIC, we release the found
947 * dst entry and replace it instead with the
948 * dst entry of the nexthop router
949 */
950 if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) {
951 struct inet6_ifaddr *ifp;
952 struct flowi fl_gw;
953 int redirect;
954
955 ifp = ipv6_get_ifaddr(net, &fl->fl6_src,
956 (*dst)->dev, 1);
957
958 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
959 if (ifp)
960 in6_ifa_put(ifp);
961
962 if (redirect) {
963 /*
964 * We need to get the dst entry for the
965 * default router instead
966 */
967 dst_release(*dst);
968 memcpy(&fl_gw, fl, sizeof(struct flowi));
969 memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
970 *dst = ip6_route_output(net, sk, &fl_gw);
971 if ((err = (*dst)->error))
972 goto out_err_release;
973 }
974 }
975 #endif
976
977 return 0;
978
979 out_err_release:
980 if (err == -ENETUNREACH)
981 IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
982 dst_release(*dst);
983 *dst = NULL;
984 return err;
985 }
986
987 /**
988 * ip6_dst_lookup - perform route lookup on flow
989 * @sk: socket which provides route info
990 * @dst: pointer to dst_entry * for result
991 * @fl: flow to lookup
992 *
993 * This function performs a route lookup on the given flow.
994 *
995 * It returns zero on success, or a standard errno code on error.
996 */
997 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
998 {
999 *dst = NULL;
1000 return ip6_dst_lookup_tail(sk, dst, fl);
1001 }
1002 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1003
1004 /**
1005 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1006 * @sk: socket which provides route info
1007 * @fl: flow to lookup
1008 * @final_dst: final destination address for ipsec lookup
1009 * @can_sleep: we are in a sleepable context
1010 *
1011 * This function performs a route lookup on the given flow.
1012 *
1013 * It returns a valid dst pointer on success, or a pointer encoded
1014 * error code.
1015 */
1016 struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi *fl,
1017 const struct in6_addr *final_dst,
1018 bool can_sleep)
1019 {
1020 struct dst_entry *dst = NULL;
1021 int err;
1022
1023 err = ip6_dst_lookup_tail(sk, &dst, fl);
1024 if (err)
1025 return ERR_PTR(err);
1026 if (final_dst)
1027 ipv6_addr_copy(&fl->fl6_dst, final_dst);
1028 if (can_sleep)
1029 fl->flags |= FLOWI_FLAG_CAN_SLEEP;
1030
1031 err = xfrm_lookup(sock_net(sk), &dst, fl, sk, 0);
1032 if (err)
1033 return ERR_PTR(err);
1034 return dst;
1035 }
1036 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1037
1038 /**
1039 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1040 * @sk: socket which provides the dst cache and route info
1041 * @fl: flow to lookup
1042 * @final_dst: final destination address for ipsec lookup
1043 * @can_sleep: we are in a sleepable context
1044 *
1045 * This function performs a route lookup on the given flow with the
1046 * possibility of using the cached route in the socket if it is valid.
1047 * It will take the socket dst lock when operating on the dst cache.
1048 * As a result, this function can only be used in process context.
1049 *
1050 * It returns a valid dst pointer on success, or a pointer encoded
1051 * error code.
1052 */
1053 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi *fl,
1054 const struct in6_addr *final_dst,
1055 bool can_sleep)
1056 {
1057 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1058 int err;
1059
1060 dst = ip6_sk_dst_check(sk, dst, fl);
1061
1062 err = ip6_dst_lookup_tail(sk, &dst, fl);
1063 if (err)
1064 return ERR_PTR(err);
1065 if (final_dst)
1066 ipv6_addr_copy(&fl->fl6_dst, final_dst);
1067 if (can_sleep)
1068 fl->flags |= FLOWI_FLAG_CAN_SLEEP;
1069
1070 err = xfrm_lookup(sock_net(sk), &dst, fl, sk, 0);
1071 if (err)
1072 return ERR_PTR(err);
1073 return dst;
1074 }
1075 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1076
1077 static inline int ip6_ufo_append_data(struct sock *sk,
1078 int getfrag(void *from, char *to, int offset, int len,
1079 int odd, struct sk_buff *skb),
1080 void *from, int length, int hh_len, int fragheaderlen,
1081 int transhdrlen, int mtu,unsigned int flags)
1082
1083 {
1084 struct sk_buff *skb;
1085 int err;
1086
1087 /* There is support for UDP large send offload by network
1088 * device, so create one single skb packet containing complete
1089 * udp datagram
1090 */
1091 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1092 skb = sock_alloc_send_skb(sk,
1093 hh_len + fragheaderlen + transhdrlen + 20,
1094 (flags & MSG_DONTWAIT), &err);
1095 if (skb == NULL)
1096 return -ENOMEM;
1097
1098 /* reserve space for Hardware header */
1099 skb_reserve(skb, hh_len);
1100
1101 /* create space for UDP/IP header */
1102 skb_put(skb,fragheaderlen + transhdrlen);
1103
1104 /* initialize network header pointer */
1105 skb_reset_network_header(skb);
1106
1107 /* initialize protocol header pointer */
1108 skb->transport_header = skb->network_header + fragheaderlen;
1109
1110 skb->ip_summed = CHECKSUM_PARTIAL;
1111 skb->csum = 0;
1112 }
1113
1114 err = skb_append_datato_frags(sk,skb, getfrag, from,
1115 (length - transhdrlen));
1116 if (!err) {
1117 struct frag_hdr fhdr;
1118
1119 /* Specify the length of each IPv6 datagram fragment.
1120 * It has to be a multiple of 8.
1121 */
1122 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1123 sizeof(struct frag_hdr)) & ~7;
1124 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1125 ipv6_select_ident(&fhdr);
1126 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1127 __skb_queue_tail(&sk->sk_write_queue, skb);
1128
1129 return 0;
1130 }
1131 /* There is not enough support do UPD LSO,
1132 * so follow normal path
1133 */
1134 kfree_skb(skb);
1135
1136 return err;
1137 }
1138
1139 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1140 gfp_t gfp)
1141 {
1142 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1143 }
1144
1145 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1146 gfp_t gfp)
1147 {
1148 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1149 }
1150
1151 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1152 int offset, int len, int odd, struct sk_buff *skb),
1153 void *from, int length, int transhdrlen,
1154 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
1155 struct rt6_info *rt, unsigned int flags, int dontfrag)
1156 {
1157 struct inet_sock *inet = inet_sk(sk);
1158 struct ipv6_pinfo *np = inet6_sk(sk);
1159 struct sk_buff *skb;
1160 unsigned int maxfraglen, fragheaderlen;
1161 int exthdrlen;
1162 int hh_len;
1163 int mtu;
1164 int copy;
1165 int err;
1166 int offset = 0;
1167 int csummode = CHECKSUM_NONE;
1168 __u8 tx_flags = 0;
1169
1170 if (flags&MSG_PROBE)
1171 return 0;
1172 if (skb_queue_empty(&sk->sk_write_queue)) {
1173 /*
1174 * setup for corking
1175 */
1176 if (opt) {
1177 if (WARN_ON(np->cork.opt))
1178 return -EINVAL;
1179
1180 np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation);
1181 if (unlikely(np->cork.opt == NULL))
1182 return -ENOBUFS;
1183
1184 np->cork.opt->tot_len = opt->tot_len;
1185 np->cork.opt->opt_flen = opt->opt_flen;
1186 np->cork.opt->opt_nflen = opt->opt_nflen;
1187
1188 np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1189 sk->sk_allocation);
1190 if (opt->dst0opt && !np->cork.opt->dst0opt)
1191 return -ENOBUFS;
1192
1193 np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1194 sk->sk_allocation);
1195 if (opt->dst1opt && !np->cork.opt->dst1opt)
1196 return -ENOBUFS;
1197
1198 np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1199 sk->sk_allocation);
1200 if (opt->hopopt && !np->cork.opt->hopopt)
1201 return -ENOBUFS;
1202
1203 np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1204 sk->sk_allocation);
1205 if (opt->srcrt && !np->cork.opt->srcrt)
1206 return -ENOBUFS;
1207
1208 /* need source address above miyazawa*/
1209 }
1210 dst_hold(&rt->dst);
1211 inet->cork.dst = &rt->dst;
1212 inet->cork.fl = *fl;
1213 np->cork.hop_limit = hlimit;
1214 np->cork.tclass = tclass;
1215 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1216 rt->dst.dev->mtu : dst_mtu(rt->dst.path);
1217 if (np->frag_size < mtu) {
1218 if (np->frag_size)
1219 mtu = np->frag_size;
1220 }
1221 inet->cork.fragsize = mtu;
1222 if (dst_allfrag(rt->dst.path))
1223 inet->cork.flags |= IPCORK_ALLFRAG;
1224 inet->cork.length = 0;
1225 sk->sk_sndmsg_page = NULL;
1226 sk->sk_sndmsg_off = 0;
1227 exthdrlen = rt->dst.header_len + (opt ? opt->opt_flen : 0) -
1228 rt->rt6i_nfheader_len;
1229 length += exthdrlen;
1230 transhdrlen += exthdrlen;
1231 } else {
1232 rt = (struct rt6_info *)inet->cork.dst;
1233 fl = &inet->cork.fl;
1234 opt = np->cork.opt;
1235 transhdrlen = 0;
1236 exthdrlen = 0;
1237 mtu = inet->cork.fragsize;
1238 }
1239
1240 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1241
1242 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1243 (opt ? opt->opt_nflen : 0);
1244 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1245
1246 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1247 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1248 ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
1249 return -EMSGSIZE;
1250 }
1251 }
1252
1253 /* For UDP, check if TX timestamp is enabled */
1254 if (sk->sk_type == SOCK_DGRAM) {
1255 err = sock_tx_timestamp(sk, &tx_flags);
1256 if (err)
1257 goto error;
1258 }
1259
1260 /*
1261 * Let's try using as much space as possible.
1262 * Use MTU if total length of the message fits into the MTU.
1263 * Otherwise, we need to reserve fragment header and
1264 * fragment alignment (= 8-15 octects, in total).
1265 *
1266 * Note that we may need to "move" the data from the tail of
1267 * of the buffer to the new fragment when we split
1268 * the message.
1269 *
1270 * FIXME: It may be fragmented into multiple chunks
1271 * at once if non-fragmentable extension headers
1272 * are too large.
1273 * --yoshfuji
1274 */
1275
1276 inet->cork.length += length;
1277 if (length > mtu) {
1278 int proto = sk->sk_protocol;
1279 if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){
1280 ipv6_local_rxpmtu(sk, fl, mtu-exthdrlen);
1281 return -EMSGSIZE;
1282 }
1283
1284 if (proto == IPPROTO_UDP &&
1285 (rt->dst.dev->features & NETIF_F_UFO)) {
1286
1287 err = ip6_ufo_append_data(sk, getfrag, from, length,
1288 hh_len, fragheaderlen,
1289 transhdrlen, mtu, flags);
1290 if (err)
1291 goto error;
1292 return 0;
1293 }
1294 }
1295
1296 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1297 goto alloc_new_skb;
1298
1299 while (length > 0) {
1300 /* Check if the remaining data fits into current packet. */
1301 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1302 if (copy < length)
1303 copy = maxfraglen - skb->len;
1304
1305 if (copy <= 0) {
1306 char *data;
1307 unsigned int datalen;
1308 unsigned int fraglen;
1309 unsigned int fraggap;
1310 unsigned int alloclen;
1311 struct sk_buff *skb_prev;
1312 alloc_new_skb:
1313 skb_prev = skb;
1314
1315 /* There's no room in the current skb */
1316 if (skb_prev)
1317 fraggap = skb_prev->len - maxfraglen;
1318 else
1319 fraggap = 0;
1320
1321 /*
1322 * If remaining data exceeds the mtu,
1323 * we know we need more fragment(s).
1324 */
1325 datalen = length + fraggap;
1326 if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1327 datalen = maxfraglen - fragheaderlen;
1328
1329 fraglen = datalen + fragheaderlen;
1330 if ((flags & MSG_MORE) &&
1331 !(rt->dst.dev->features&NETIF_F_SG))
1332 alloclen = mtu;
1333 else
1334 alloclen = datalen + fragheaderlen;
1335
1336 /*
1337 * The last fragment gets additional space at tail.
1338 * Note: we overallocate on fragments with MSG_MODE
1339 * because we have no idea if we're the last one.
1340 */
1341 if (datalen == length + fraggap)
1342 alloclen += rt->dst.trailer_len;
1343
1344 /*
1345 * We just reserve space for fragment header.
1346 * Note: this may be overallocation if the message
1347 * (without MSG_MORE) fits into the MTU.
1348 */
1349 alloclen += sizeof(struct frag_hdr);
1350
1351 if (transhdrlen) {
1352 skb = sock_alloc_send_skb(sk,
1353 alloclen + hh_len,
1354 (flags & MSG_DONTWAIT), &err);
1355 } else {
1356 skb = NULL;
1357 if (atomic_read(&sk->sk_wmem_alloc) <=
1358 2 * sk->sk_sndbuf)
1359 skb = sock_wmalloc(sk,
1360 alloclen + hh_len, 1,
1361 sk->sk_allocation);
1362 if (unlikely(skb == NULL))
1363 err = -ENOBUFS;
1364 else {
1365 /* Only the initial fragment
1366 * is time stamped.
1367 */
1368 tx_flags = 0;
1369 }
1370 }
1371 if (skb == NULL)
1372 goto error;
1373 /*
1374 * Fill in the control structures
1375 */
1376 skb->ip_summed = csummode;
1377 skb->csum = 0;
1378 /* reserve for fragmentation */
1379 skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1380
1381 if (sk->sk_type == SOCK_DGRAM)
1382 skb_shinfo(skb)->tx_flags = tx_flags;
1383
1384 /*
1385 * Find where to start putting bytes
1386 */
1387 data = skb_put(skb, fraglen);
1388 skb_set_network_header(skb, exthdrlen);
1389 data += fragheaderlen;
1390 skb->transport_header = (skb->network_header +
1391 fragheaderlen);
1392 if (fraggap) {
1393 skb->csum = skb_copy_and_csum_bits(
1394 skb_prev, maxfraglen,
1395 data + transhdrlen, fraggap, 0);
1396 skb_prev->csum = csum_sub(skb_prev->csum,
1397 skb->csum);
1398 data += fraggap;
1399 pskb_trim_unique(skb_prev, maxfraglen);
1400 }
1401 copy = datalen - transhdrlen - fraggap;
1402 if (copy < 0) {
1403 err = -EINVAL;
1404 kfree_skb(skb);
1405 goto error;
1406 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1407 err = -EFAULT;
1408 kfree_skb(skb);
1409 goto error;
1410 }
1411
1412 offset += copy;
1413 length -= datalen - fraggap;
1414 transhdrlen = 0;
1415 exthdrlen = 0;
1416 csummode = CHECKSUM_NONE;
1417
1418 /*
1419 * Put the packet on the pending queue
1420 */
1421 __skb_queue_tail(&sk->sk_write_queue, skb);
1422 continue;
1423 }
1424
1425 if (copy > length)
1426 copy = length;
1427
1428 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1429 unsigned int off;
1430
1431 off = skb->len;
1432 if (getfrag(from, skb_put(skb, copy),
1433 offset, copy, off, skb) < 0) {
1434 __skb_trim(skb, off);
1435 err = -EFAULT;
1436 goto error;
1437 }
1438 } else {
1439 int i = skb_shinfo(skb)->nr_frags;
1440 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1441 struct page *page = sk->sk_sndmsg_page;
1442 int off = sk->sk_sndmsg_off;
1443 unsigned int left;
1444
1445 if (page && (left = PAGE_SIZE - off) > 0) {
1446 if (copy >= left)
1447 copy = left;
1448 if (page != frag->page) {
1449 if (i == MAX_SKB_FRAGS) {
1450 err = -EMSGSIZE;
1451 goto error;
1452 }
1453 get_page(page);
1454 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1455 frag = &skb_shinfo(skb)->frags[i];
1456 }
1457 } else if(i < MAX_SKB_FRAGS) {
1458 if (copy > PAGE_SIZE)
1459 copy = PAGE_SIZE;
1460 page = alloc_pages(sk->sk_allocation, 0);
1461 if (page == NULL) {
1462 err = -ENOMEM;
1463 goto error;
1464 }
1465 sk->sk_sndmsg_page = page;
1466 sk->sk_sndmsg_off = 0;
1467
1468 skb_fill_page_desc(skb, i, page, 0, 0);
1469 frag = &skb_shinfo(skb)->frags[i];
1470 } else {
1471 err = -EMSGSIZE;
1472 goto error;
1473 }
1474 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1475 err = -EFAULT;
1476 goto error;
1477 }
1478 sk->sk_sndmsg_off += copy;
1479 frag->size += copy;
1480 skb->len += copy;
1481 skb->data_len += copy;
1482 skb->truesize += copy;
1483 atomic_add(copy, &sk->sk_wmem_alloc);
1484 }
1485 offset += copy;
1486 length -= copy;
1487 }
1488 return 0;
1489 error:
1490 inet->cork.length -= length;
1491 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1492 return err;
1493 }
1494
1495 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1496 {
1497 if (np->cork.opt) {
1498 kfree(np->cork.opt->dst0opt);
1499 kfree(np->cork.opt->dst1opt);
1500 kfree(np->cork.opt->hopopt);
1501 kfree(np->cork.opt->srcrt);
1502 kfree(np->cork.opt);
1503 np->cork.opt = NULL;
1504 }
1505
1506 if (inet->cork.dst) {
1507 dst_release(inet->cork.dst);
1508 inet->cork.dst = NULL;
1509 inet->cork.flags &= ~IPCORK_ALLFRAG;
1510 }
1511 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1512 }
1513
1514 int ip6_push_pending_frames(struct sock *sk)
1515 {
1516 struct sk_buff *skb, *tmp_skb;
1517 struct sk_buff **tail_skb;
1518 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1519 struct inet_sock *inet = inet_sk(sk);
1520 struct ipv6_pinfo *np = inet6_sk(sk);
1521 struct net *net = sock_net(sk);
1522 struct ipv6hdr *hdr;
1523 struct ipv6_txoptions *opt = np->cork.opt;
1524 struct rt6_info *rt = (struct rt6_info *)inet->cork.dst;
1525 struct flowi *fl = &inet->cork.fl;
1526 unsigned char proto = fl->proto;
1527 int err = 0;
1528
1529 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1530 goto out;
1531 tail_skb = &(skb_shinfo(skb)->frag_list);
1532
1533 /* move skb->data to ip header from ext header */
1534 if (skb->data < skb_network_header(skb))
1535 __skb_pull(skb, skb_network_offset(skb));
1536 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1537 __skb_pull(tmp_skb, skb_network_header_len(skb));
1538 *tail_skb = tmp_skb;
1539 tail_skb = &(tmp_skb->next);
1540 skb->len += tmp_skb->len;
1541 skb->data_len += tmp_skb->len;
1542 skb->truesize += tmp_skb->truesize;
1543 tmp_skb->destructor = NULL;
1544 tmp_skb->sk = NULL;
1545 }
1546
1547 /* Allow local fragmentation. */
1548 if (np->pmtudisc < IPV6_PMTUDISC_DO)
1549 skb->local_df = 1;
1550
1551 ipv6_addr_copy(final_dst, &fl->fl6_dst);
1552 __skb_pull(skb, skb_network_header_len(skb));
1553 if (opt && opt->opt_flen)
1554 ipv6_push_frag_opts(skb, opt, &proto);
1555 if (opt && opt->opt_nflen)
1556 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1557
1558 skb_push(skb, sizeof(struct ipv6hdr));
1559 skb_reset_network_header(skb);
1560 hdr = ipv6_hdr(skb);
1561
1562 *(__be32*)hdr = fl->fl6_flowlabel |
1563 htonl(0x60000000 | ((int)np->cork.tclass << 20));
1564
1565 hdr->hop_limit = np->cork.hop_limit;
1566 hdr->nexthdr = proto;
1567 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1568 ipv6_addr_copy(&hdr->daddr, final_dst);
1569
1570 skb->priority = sk->sk_priority;
1571 skb->mark = sk->sk_mark;
1572
1573 skb_dst_set(skb, dst_clone(&rt->dst));
1574 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1575 if (proto == IPPROTO_ICMPV6) {
1576 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1577
1578 ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
1579 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
1580 }
1581
1582 err = ip6_local_out(skb);
1583 if (err) {
1584 if (err > 0)
1585 err = net_xmit_errno(err);
1586 if (err)
1587 goto error;
1588 }
1589
1590 out:
1591 ip6_cork_release(inet, np);
1592 return err;
1593 error:
1594 IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1595 goto out;
1596 }
1597
1598 void ip6_flush_pending_frames(struct sock *sk)
1599 {
1600 struct sk_buff *skb;
1601
1602 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1603 if (skb_dst(skb))
1604 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1605 IPSTATS_MIB_OUTDISCARDS);
1606 kfree_skb(skb);
1607 }
1608
1609 ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1610 }