]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blob - net/ipv6/ip6_output.c
Merge branch 'exynos-drm-fixes' of git://git.infradead.org/users/kmpark/linux-samsung...
[mirror_ubuntu-zesty-kernel.git] / net / ipv6 / ip6_output.c
1 /*
2 * IPv6 output functions
3 * Linux INET6 implementation
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * Based on linux/net/ipv4/ip_output.c
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 *
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
21 *
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
27 */
28
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
40 #include <linux/slab.h>
41
42 #include <linux/netfilter.h>
43 #include <linux/netfilter_ipv6.h>
44
45 #include <net/sock.h>
46 #include <net/snmp.h>
47
48 #include <net/ipv6.h>
49 #include <net/ndisc.h>
50 #include <net/protocol.h>
51 #include <net/ip6_route.h>
52 #include <net/addrconf.h>
53 #include <net/rawv6.h>
54 #include <net/icmp.h>
55 #include <net/xfrm.h>
56 #include <net/checksum.h>
57 #include <linux/mroute6.h>
58
59 int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
60
61 int __ip6_local_out(struct sk_buff *skb)
62 {
63 int len;
64
65 len = skb->len - sizeof(struct ipv6hdr);
66 if (len > IPV6_MAXPLEN)
67 len = 0;
68 ipv6_hdr(skb)->payload_len = htons(len);
69
70 return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
71 skb_dst(skb)->dev, dst_output);
72 }
73
74 int ip6_local_out(struct sk_buff *skb)
75 {
76 int err;
77
78 err = __ip6_local_out(skb);
79 if (likely(err == 1))
80 err = dst_output(skb);
81
82 return err;
83 }
84 EXPORT_SYMBOL_GPL(ip6_local_out);
85
86 /* dev_loopback_xmit for use with netfilter. */
87 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
88 {
89 skb_reset_mac_header(newskb);
90 __skb_pull(newskb, skb_network_offset(newskb));
91 newskb->pkt_type = PACKET_LOOPBACK;
92 newskb->ip_summed = CHECKSUM_UNNECESSARY;
93 WARN_ON(!skb_dst(newskb));
94
95 netif_rx_ni(newskb);
96 return 0;
97 }
98
99 static int ip6_finish_output2(struct sk_buff *skb)
100 {
101 struct dst_entry *dst = skb_dst(skb);
102 struct net_device *dev = dst->dev;
103 struct neighbour *neigh;
104
105 skb->protocol = htons(ETH_P_IPV6);
106 skb->dev = dev;
107
108 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
109 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
110
111 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
112 ((mroute6_socket(dev_net(dev), skb) &&
113 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
114 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
115 &ipv6_hdr(skb)->saddr))) {
116 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
117
118 /* Do not check for IFF_ALLMULTI; multicast routing
119 is not supported in any case.
120 */
121 if (newskb)
122 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
123 newskb, NULL, newskb->dev,
124 ip6_dev_loopback_xmit);
125
126 if (ipv6_hdr(skb)->hop_limit == 0) {
127 IP6_INC_STATS(dev_net(dev), idev,
128 IPSTATS_MIB_OUTDISCARDS);
129 kfree_skb(skb);
130 return 0;
131 }
132 }
133
134 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
135 skb->len);
136 }
137
138 rcu_read_lock();
139 neigh = dst_get_neighbour_noref(dst);
140 if (neigh) {
141 int res = neigh_output(neigh, skb);
142
143 rcu_read_unlock();
144 return res;
145 }
146 rcu_read_unlock();
147 IP6_INC_STATS_BH(dev_net(dst->dev),
148 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
149 kfree_skb(skb);
150 return -EINVAL;
151 }
152
153 static int ip6_finish_output(struct sk_buff *skb)
154 {
155 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
156 dst_allfrag(skb_dst(skb)))
157 return ip6_fragment(skb, ip6_finish_output2);
158 else
159 return ip6_finish_output2(skb);
160 }
161
162 int ip6_output(struct sk_buff *skb)
163 {
164 struct net_device *dev = skb_dst(skb)->dev;
165 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
166 if (unlikely(idev->cnf.disable_ipv6)) {
167 IP6_INC_STATS(dev_net(dev), idev,
168 IPSTATS_MIB_OUTDISCARDS);
169 kfree_skb(skb);
170 return 0;
171 }
172
173 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
174 ip6_finish_output,
175 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
176 }
177
178 /*
179 * xmit an sk_buff (used by TCP, SCTP and DCCP)
180 */
181
182 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
183 struct ipv6_txoptions *opt, int tclass)
184 {
185 struct net *net = sock_net(sk);
186 struct ipv6_pinfo *np = inet6_sk(sk);
187 struct in6_addr *first_hop = &fl6->daddr;
188 struct dst_entry *dst = skb_dst(skb);
189 struct ipv6hdr *hdr;
190 u8 proto = fl6->flowi6_proto;
191 int seg_len = skb->len;
192 int hlimit = -1;
193 u32 mtu;
194
195 if (opt) {
196 unsigned int head_room;
197
198 /* First: exthdrs may take lots of space (~8K for now)
199 MAX_HEADER is not enough.
200 */
201 head_room = opt->opt_nflen + opt->opt_flen;
202 seg_len += head_room;
203 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
204
205 if (skb_headroom(skb) < head_room) {
206 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
207 if (skb2 == NULL) {
208 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
209 IPSTATS_MIB_OUTDISCARDS);
210 kfree_skb(skb);
211 return -ENOBUFS;
212 }
213 consume_skb(skb);
214 skb = skb2;
215 skb_set_owner_w(skb, sk);
216 }
217 if (opt->opt_flen)
218 ipv6_push_frag_opts(skb, opt, &proto);
219 if (opt->opt_nflen)
220 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
221 }
222
223 skb_push(skb, sizeof(struct ipv6hdr));
224 skb_reset_network_header(skb);
225 hdr = ipv6_hdr(skb);
226
227 /*
228 * Fill in the IPv6 header
229 */
230 if (np)
231 hlimit = np->hop_limit;
232 if (hlimit < 0)
233 hlimit = ip6_dst_hoplimit(dst);
234
235 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl6->flowlabel;
236
237 hdr->payload_len = htons(seg_len);
238 hdr->nexthdr = proto;
239 hdr->hop_limit = hlimit;
240
241 hdr->saddr = fl6->saddr;
242 hdr->daddr = *first_hop;
243
244 skb->priority = sk->sk_priority;
245 skb->mark = sk->sk_mark;
246
247 mtu = dst_mtu(dst);
248 if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
249 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
250 IPSTATS_MIB_OUT, skb->len);
251 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
252 dst->dev, dst_output);
253 }
254
255 net_dbg_ratelimited("IPv6: sending pkt_too_big to self\n");
256 skb->dev = dst->dev;
257 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
258 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
259 kfree_skb(skb);
260 return -EMSGSIZE;
261 }
262
263 EXPORT_SYMBOL(ip6_xmit);
264
265 /*
266 * To avoid extra problems ND packets are send through this
267 * routine. It's code duplication but I really want to avoid
268 * extra checks since ipv6_build_header is used by TCP (which
269 * is for us performance critical)
270 */
271
272 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
273 const struct in6_addr *saddr, const struct in6_addr *daddr,
274 int proto, int len)
275 {
276 struct ipv6_pinfo *np = inet6_sk(sk);
277 struct ipv6hdr *hdr;
278
279 skb->protocol = htons(ETH_P_IPV6);
280 skb->dev = dev;
281
282 skb_reset_network_header(skb);
283 skb_put(skb, sizeof(struct ipv6hdr));
284 hdr = ipv6_hdr(skb);
285
286 *(__be32*)hdr = htonl(0x60000000);
287
288 hdr->payload_len = htons(len);
289 hdr->nexthdr = proto;
290 hdr->hop_limit = np->hop_limit;
291
292 hdr->saddr = *saddr;
293 hdr->daddr = *daddr;
294
295 return 0;
296 }
297
298 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
299 {
300 struct ip6_ra_chain *ra;
301 struct sock *last = NULL;
302
303 read_lock(&ip6_ra_lock);
304 for (ra = ip6_ra_chain; ra; ra = ra->next) {
305 struct sock *sk = ra->sk;
306 if (sk && ra->sel == sel &&
307 (!sk->sk_bound_dev_if ||
308 sk->sk_bound_dev_if == skb->dev->ifindex)) {
309 if (last) {
310 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
311 if (skb2)
312 rawv6_rcv(last, skb2);
313 }
314 last = sk;
315 }
316 }
317
318 if (last) {
319 rawv6_rcv(last, skb);
320 read_unlock(&ip6_ra_lock);
321 return 1;
322 }
323 read_unlock(&ip6_ra_lock);
324 return 0;
325 }
326
327 static int ip6_forward_proxy_check(struct sk_buff *skb)
328 {
329 struct ipv6hdr *hdr = ipv6_hdr(skb);
330 u8 nexthdr = hdr->nexthdr;
331 __be16 frag_off;
332 int offset;
333
334 if (ipv6_ext_hdr(nexthdr)) {
335 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
336 if (offset < 0)
337 return 0;
338 } else
339 offset = sizeof(struct ipv6hdr);
340
341 if (nexthdr == IPPROTO_ICMPV6) {
342 struct icmp6hdr *icmp6;
343
344 if (!pskb_may_pull(skb, (skb_network_header(skb) +
345 offset + 1 - skb->data)))
346 return 0;
347
348 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
349
350 switch (icmp6->icmp6_type) {
351 case NDISC_ROUTER_SOLICITATION:
352 case NDISC_ROUTER_ADVERTISEMENT:
353 case NDISC_NEIGHBOUR_SOLICITATION:
354 case NDISC_NEIGHBOUR_ADVERTISEMENT:
355 case NDISC_REDIRECT:
356 /* For reaction involving unicast neighbor discovery
357 * message destined to the proxied address, pass it to
358 * input function.
359 */
360 return 1;
361 default:
362 break;
363 }
364 }
365
366 /*
367 * The proxying router can't forward traffic sent to a link-local
368 * address, so signal the sender and discard the packet. This
369 * behavior is clarified by the MIPv6 specification.
370 */
371 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
372 dst_link_failure(skb);
373 return -1;
374 }
375
376 return 0;
377 }
378
379 static inline int ip6_forward_finish(struct sk_buff *skb)
380 {
381 return dst_output(skb);
382 }
383
384 int ip6_forward(struct sk_buff *skb)
385 {
386 struct dst_entry *dst = skb_dst(skb);
387 struct ipv6hdr *hdr = ipv6_hdr(skb);
388 struct inet6_skb_parm *opt = IP6CB(skb);
389 struct net *net = dev_net(dst->dev);
390 u32 mtu;
391
392 if (net->ipv6.devconf_all->forwarding == 0)
393 goto error;
394
395 if (skb_warn_if_lro(skb))
396 goto drop;
397
398 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
399 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
400 goto drop;
401 }
402
403 if (skb->pkt_type != PACKET_HOST)
404 goto drop;
405
406 skb_forward_csum(skb);
407
408 /*
409 * We DO NOT make any processing on
410 * RA packets, pushing them to user level AS IS
411 * without ane WARRANTY that application will be able
412 * to interpret them. The reason is that we
413 * cannot make anything clever here.
414 *
415 * We are not end-node, so that if packet contains
416 * AH/ESP, we cannot make anything.
417 * Defragmentation also would be mistake, RA packets
418 * cannot be fragmented, because there is no warranty
419 * that different fragments will go along one path. --ANK
420 */
421 if (opt->ra) {
422 u8 *ptr = skb_network_header(skb) + opt->ra;
423 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
424 return 0;
425 }
426
427 /*
428 * check and decrement ttl
429 */
430 if (hdr->hop_limit <= 1) {
431 /* Force OUTPUT device used as source address */
432 skb->dev = dst->dev;
433 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
434 IP6_INC_STATS_BH(net,
435 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
436
437 kfree_skb(skb);
438 return -ETIMEDOUT;
439 }
440
441 /* XXX: idev->cnf.proxy_ndp? */
442 if (net->ipv6.devconf_all->proxy_ndp &&
443 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
444 int proxied = ip6_forward_proxy_check(skb);
445 if (proxied > 0)
446 return ip6_input(skb);
447 else if (proxied < 0) {
448 IP6_INC_STATS(net, ip6_dst_idev(dst),
449 IPSTATS_MIB_INDISCARDS);
450 goto drop;
451 }
452 }
453
454 if (!xfrm6_route_forward(skb)) {
455 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
456 goto drop;
457 }
458 dst = skb_dst(skb);
459
460 /* IPv6 specs say nothing about it, but it is clear that we cannot
461 send redirects to source routed frames.
462 We don't send redirects to frames decapsulated from IPsec.
463 */
464 if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
465 struct in6_addr *target = NULL;
466 struct rt6_info *rt;
467
468 /*
469 * incoming and outgoing devices are the same
470 * send a redirect.
471 */
472
473 rt = (struct rt6_info *) dst;
474 if (rt->rt6i_flags & RTF_GATEWAY)
475 target = &rt->rt6i_gateway;
476 else
477 target = &hdr->daddr;
478
479 if (!rt->rt6i_peer)
480 rt6_bind_peer(rt, 1);
481
482 /* Limit redirects both by destination (here)
483 and by source (inside ndisc_send_redirect)
484 */
485 if (inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ))
486 ndisc_send_redirect(skb, target);
487 } else {
488 int addrtype = ipv6_addr_type(&hdr->saddr);
489
490 /* This check is security critical. */
491 if (addrtype == IPV6_ADDR_ANY ||
492 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
493 goto error;
494 if (addrtype & IPV6_ADDR_LINKLOCAL) {
495 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
496 ICMPV6_NOT_NEIGHBOUR, 0);
497 goto error;
498 }
499 }
500
501 mtu = dst_mtu(dst);
502 if (mtu < IPV6_MIN_MTU)
503 mtu = IPV6_MIN_MTU;
504
505 if (skb->len > mtu && !skb_is_gso(skb)) {
506 /* Again, force OUTPUT device used as source address */
507 skb->dev = dst->dev;
508 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
509 IP6_INC_STATS_BH(net,
510 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
511 IP6_INC_STATS_BH(net,
512 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
513 kfree_skb(skb);
514 return -EMSGSIZE;
515 }
516
517 if (skb_cow(skb, dst->dev->hard_header_len)) {
518 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
519 goto drop;
520 }
521
522 hdr = ipv6_hdr(skb);
523
524 /* Mangling hops number delayed to point after skb COW */
525
526 hdr->hop_limit--;
527
528 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
529 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
530 ip6_forward_finish);
531
532 error:
533 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
534 drop:
535 kfree_skb(skb);
536 return -EINVAL;
537 }
538
539 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
540 {
541 to->pkt_type = from->pkt_type;
542 to->priority = from->priority;
543 to->protocol = from->protocol;
544 skb_dst_drop(to);
545 skb_dst_set(to, dst_clone(skb_dst(from)));
546 to->dev = from->dev;
547 to->mark = from->mark;
548
549 #ifdef CONFIG_NET_SCHED
550 to->tc_index = from->tc_index;
551 #endif
552 nf_copy(to, from);
553 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
554 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
555 to->nf_trace = from->nf_trace;
556 #endif
557 skb_copy_secmark(to, from);
558 }
559
560 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
561 {
562 u16 offset = sizeof(struct ipv6hdr);
563 struct ipv6_opt_hdr *exthdr =
564 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
565 unsigned int packet_len = skb->tail - skb->network_header;
566 int found_rhdr = 0;
567 *nexthdr = &ipv6_hdr(skb)->nexthdr;
568
569 while (offset + 1 <= packet_len) {
570
571 switch (**nexthdr) {
572
573 case NEXTHDR_HOP:
574 break;
575 case NEXTHDR_ROUTING:
576 found_rhdr = 1;
577 break;
578 case NEXTHDR_DEST:
579 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
580 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
581 break;
582 #endif
583 if (found_rhdr)
584 return offset;
585 break;
586 default :
587 return offset;
588 }
589
590 offset += ipv6_optlen(exthdr);
591 *nexthdr = &exthdr->nexthdr;
592 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
593 offset);
594 }
595
596 return offset;
597 }
598
599 void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
600 {
601 static atomic_t ipv6_fragmentation_id;
602 int old, new;
603
604 if (rt && !(rt->dst.flags & DST_NOPEER)) {
605 struct inet_peer *peer;
606
607 if (!rt->rt6i_peer)
608 rt6_bind_peer(rt, 1);
609 peer = rt->rt6i_peer;
610 if (peer) {
611 fhdr->identification = htonl(inet_getid(peer, 0));
612 return;
613 }
614 }
615 do {
616 old = atomic_read(&ipv6_fragmentation_id);
617 new = old + 1;
618 if (!new)
619 new = 1;
620 } while (atomic_cmpxchg(&ipv6_fragmentation_id, old, new) != old);
621 fhdr->identification = htonl(new);
622 }
623
624 int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
625 {
626 struct sk_buff *frag;
627 struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
628 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
629 struct ipv6hdr *tmp_hdr;
630 struct frag_hdr *fh;
631 unsigned int mtu, hlen, left, len;
632 int hroom, troom;
633 __be32 frag_id = 0;
634 int ptr, offset = 0, err=0;
635 u8 *prevhdr, nexthdr = 0;
636 struct net *net = dev_net(skb_dst(skb)->dev);
637
638 hlen = ip6_find_1stfragopt(skb, &prevhdr);
639 nexthdr = *prevhdr;
640
641 mtu = ip6_skb_dst_mtu(skb);
642
643 /* We must not fragment if the socket is set to force MTU discovery
644 * or if the skb it not generated by a local socket.
645 */
646 if (unlikely(!skb->local_df && skb->len > mtu)) {
647 if (skb->sk && dst_allfrag(skb_dst(skb)))
648 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
649
650 skb->dev = skb_dst(skb)->dev;
651 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
652 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
653 IPSTATS_MIB_FRAGFAILS);
654 kfree_skb(skb);
655 return -EMSGSIZE;
656 }
657
658 if (np && np->frag_size < mtu) {
659 if (np->frag_size)
660 mtu = np->frag_size;
661 }
662 mtu -= hlen + sizeof(struct frag_hdr);
663
664 if (skb_has_frag_list(skb)) {
665 int first_len = skb_pagelen(skb);
666 struct sk_buff *frag2;
667
668 if (first_len - hlen > mtu ||
669 ((first_len - hlen) & 7) ||
670 skb_cloned(skb))
671 goto slow_path;
672
673 skb_walk_frags(skb, frag) {
674 /* Correct geometry. */
675 if (frag->len > mtu ||
676 ((frag->len & 7) && frag->next) ||
677 skb_headroom(frag) < hlen)
678 goto slow_path_clean;
679
680 /* Partially cloned skb? */
681 if (skb_shared(frag))
682 goto slow_path_clean;
683
684 BUG_ON(frag->sk);
685 if (skb->sk) {
686 frag->sk = skb->sk;
687 frag->destructor = sock_wfree;
688 }
689 skb->truesize -= frag->truesize;
690 }
691
692 err = 0;
693 offset = 0;
694 frag = skb_shinfo(skb)->frag_list;
695 skb_frag_list_init(skb);
696 /* BUILD HEADER */
697
698 *prevhdr = NEXTHDR_FRAGMENT;
699 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
700 if (!tmp_hdr) {
701 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
702 IPSTATS_MIB_FRAGFAILS);
703 return -ENOMEM;
704 }
705
706 __skb_pull(skb, hlen);
707 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
708 __skb_push(skb, hlen);
709 skb_reset_network_header(skb);
710 memcpy(skb_network_header(skb), tmp_hdr, hlen);
711
712 ipv6_select_ident(fh, rt);
713 fh->nexthdr = nexthdr;
714 fh->reserved = 0;
715 fh->frag_off = htons(IP6_MF);
716 frag_id = fh->identification;
717
718 first_len = skb_pagelen(skb);
719 skb->data_len = first_len - skb_headlen(skb);
720 skb->len = first_len;
721 ipv6_hdr(skb)->payload_len = htons(first_len -
722 sizeof(struct ipv6hdr));
723
724 dst_hold(&rt->dst);
725
726 for (;;) {
727 /* Prepare header of the next frame,
728 * before previous one went down. */
729 if (frag) {
730 frag->ip_summed = CHECKSUM_NONE;
731 skb_reset_transport_header(frag);
732 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
733 __skb_push(frag, hlen);
734 skb_reset_network_header(frag);
735 memcpy(skb_network_header(frag), tmp_hdr,
736 hlen);
737 offset += skb->len - hlen - sizeof(struct frag_hdr);
738 fh->nexthdr = nexthdr;
739 fh->reserved = 0;
740 fh->frag_off = htons(offset);
741 if (frag->next != NULL)
742 fh->frag_off |= htons(IP6_MF);
743 fh->identification = frag_id;
744 ipv6_hdr(frag)->payload_len =
745 htons(frag->len -
746 sizeof(struct ipv6hdr));
747 ip6_copy_metadata(frag, skb);
748 }
749
750 err = output(skb);
751 if(!err)
752 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
753 IPSTATS_MIB_FRAGCREATES);
754
755 if (err || !frag)
756 break;
757
758 skb = frag;
759 frag = skb->next;
760 skb->next = NULL;
761 }
762
763 kfree(tmp_hdr);
764
765 if (err == 0) {
766 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
767 IPSTATS_MIB_FRAGOKS);
768 dst_release(&rt->dst);
769 return 0;
770 }
771
772 while (frag) {
773 skb = frag->next;
774 kfree_skb(frag);
775 frag = skb;
776 }
777
778 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
779 IPSTATS_MIB_FRAGFAILS);
780 dst_release(&rt->dst);
781 return err;
782
783 slow_path_clean:
784 skb_walk_frags(skb, frag2) {
785 if (frag2 == frag)
786 break;
787 frag2->sk = NULL;
788 frag2->destructor = NULL;
789 skb->truesize += frag2->truesize;
790 }
791 }
792
793 slow_path:
794 if ((skb->ip_summed == CHECKSUM_PARTIAL) &&
795 skb_checksum_help(skb))
796 goto fail;
797
798 left = skb->len - hlen; /* Space per frame */
799 ptr = hlen; /* Where to start from */
800
801 /*
802 * Fragment the datagram.
803 */
804
805 *prevhdr = NEXTHDR_FRAGMENT;
806 hroom = LL_RESERVED_SPACE(rt->dst.dev);
807 troom = rt->dst.dev->needed_tailroom;
808
809 /*
810 * Keep copying data until we run out.
811 */
812 while(left > 0) {
813 len = left;
814 /* IF: it doesn't fit, use 'mtu' - the data space left */
815 if (len > mtu)
816 len = mtu;
817 /* IF: we are not sending up to and including the packet end
818 then align the next start on an eight byte boundary */
819 if (len < left) {
820 len &= ~7;
821 }
822 /*
823 * Allocate buffer.
824 */
825
826 if ((frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
827 hroom + troom, GFP_ATOMIC)) == NULL) {
828 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
829 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
830 IPSTATS_MIB_FRAGFAILS);
831 err = -ENOMEM;
832 goto fail;
833 }
834
835 /*
836 * Set up data on packet
837 */
838
839 ip6_copy_metadata(frag, skb);
840 skb_reserve(frag, hroom);
841 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
842 skb_reset_network_header(frag);
843 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
844 frag->transport_header = (frag->network_header + hlen +
845 sizeof(struct frag_hdr));
846
847 /*
848 * Charge the memory for the fragment to any owner
849 * it might possess
850 */
851 if (skb->sk)
852 skb_set_owner_w(frag, skb->sk);
853
854 /*
855 * Copy the packet header into the new buffer.
856 */
857 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
858
859 /*
860 * Build fragment header.
861 */
862 fh->nexthdr = nexthdr;
863 fh->reserved = 0;
864 if (!frag_id) {
865 ipv6_select_ident(fh, rt);
866 frag_id = fh->identification;
867 } else
868 fh->identification = frag_id;
869
870 /*
871 * Copy a block of the IP datagram.
872 */
873 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
874 BUG();
875 left -= len;
876
877 fh->frag_off = htons(offset);
878 if (left > 0)
879 fh->frag_off |= htons(IP6_MF);
880 ipv6_hdr(frag)->payload_len = htons(frag->len -
881 sizeof(struct ipv6hdr));
882
883 ptr += len;
884 offset += len;
885
886 /*
887 * Put this fragment into the sending queue.
888 */
889 err = output(frag);
890 if (err)
891 goto fail;
892
893 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
894 IPSTATS_MIB_FRAGCREATES);
895 }
896 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
897 IPSTATS_MIB_FRAGOKS);
898 consume_skb(skb);
899 return err;
900
901 fail:
902 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
903 IPSTATS_MIB_FRAGFAILS);
904 kfree_skb(skb);
905 return err;
906 }
907
908 static inline int ip6_rt_check(const struct rt6key *rt_key,
909 const struct in6_addr *fl_addr,
910 const struct in6_addr *addr_cache)
911 {
912 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
913 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache));
914 }
915
916 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
917 struct dst_entry *dst,
918 const struct flowi6 *fl6)
919 {
920 struct ipv6_pinfo *np = inet6_sk(sk);
921 struct rt6_info *rt = (struct rt6_info *)dst;
922
923 if (!dst)
924 goto out;
925
926 /* Yes, checking route validity in not connected
927 * case is not very simple. Take into account,
928 * that we do not support routing by source, TOS,
929 * and MSG_DONTROUTE --ANK (980726)
930 *
931 * 1. ip6_rt_check(): If route was host route,
932 * check that cached destination is current.
933 * If it is network route, we still may
934 * check its validity using saved pointer
935 * to the last used address: daddr_cache.
936 * We do not want to save whole address now,
937 * (because main consumer of this service
938 * is tcp, which has not this problem),
939 * so that the last trick works only on connected
940 * sockets.
941 * 2. oif also should be the same.
942 */
943 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
944 #ifdef CONFIG_IPV6_SUBTREES
945 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
946 #endif
947 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
948 dst_release(dst);
949 dst = NULL;
950 }
951
952 out:
953 return dst;
954 }
955
956 static int ip6_dst_lookup_tail(struct sock *sk,
957 struct dst_entry **dst, struct flowi6 *fl6)
958 {
959 struct net *net = sock_net(sk);
960 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
961 struct neighbour *n;
962 #endif
963 int err;
964
965 if (*dst == NULL)
966 *dst = ip6_route_output(net, sk, fl6);
967
968 if ((err = (*dst)->error))
969 goto out_err_release;
970
971 if (ipv6_addr_any(&fl6->saddr)) {
972 struct rt6_info *rt = (struct rt6_info *) *dst;
973 err = ip6_route_get_saddr(net, rt, &fl6->daddr,
974 sk ? inet6_sk(sk)->srcprefs : 0,
975 &fl6->saddr);
976 if (err)
977 goto out_err_release;
978 }
979
980 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
981 /*
982 * Here if the dst entry we've looked up
983 * has a neighbour entry that is in the INCOMPLETE
984 * state and the src address from the flow is
985 * marked as OPTIMISTIC, we release the found
986 * dst entry and replace it instead with the
987 * dst entry of the nexthop router
988 */
989 rcu_read_lock();
990 n = dst_get_neighbour_noref(*dst);
991 if (n && !(n->nud_state & NUD_VALID)) {
992 struct inet6_ifaddr *ifp;
993 struct flowi6 fl_gw6;
994 int redirect;
995
996 rcu_read_unlock();
997 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
998 (*dst)->dev, 1);
999
1000 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1001 if (ifp)
1002 in6_ifa_put(ifp);
1003
1004 if (redirect) {
1005 /*
1006 * We need to get the dst entry for the
1007 * default router instead
1008 */
1009 dst_release(*dst);
1010 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1011 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1012 *dst = ip6_route_output(net, sk, &fl_gw6);
1013 if ((err = (*dst)->error))
1014 goto out_err_release;
1015 }
1016 } else {
1017 rcu_read_unlock();
1018 }
1019 #endif
1020
1021 return 0;
1022
1023 out_err_release:
1024 if (err == -ENETUNREACH)
1025 IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1026 dst_release(*dst);
1027 *dst = NULL;
1028 return err;
1029 }
1030
1031 /**
1032 * ip6_dst_lookup - perform route lookup on flow
1033 * @sk: socket which provides route info
1034 * @dst: pointer to dst_entry * for result
1035 * @fl6: flow to lookup
1036 *
1037 * This function performs a route lookup on the given flow.
1038 *
1039 * It returns zero on success, or a standard errno code on error.
1040 */
1041 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6)
1042 {
1043 *dst = NULL;
1044 return ip6_dst_lookup_tail(sk, dst, fl6);
1045 }
1046 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1047
1048 /**
1049 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1050 * @sk: socket which provides route info
1051 * @fl6: flow to lookup
1052 * @final_dst: final destination address for ipsec lookup
1053 * @can_sleep: we are in a sleepable context
1054 *
1055 * This function performs a route lookup on the given flow.
1056 *
1057 * It returns a valid dst pointer on success, or a pointer encoded
1058 * error code.
1059 */
1060 struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1061 const struct in6_addr *final_dst,
1062 bool can_sleep)
1063 {
1064 struct dst_entry *dst = NULL;
1065 int err;
1066
1067 err = ip6_dst_lookup_tail(sk, &dst, fl6);
1068 if (err)
1069 return ERR_PTR(err);
1070 if (final_dst)
1071 fl6->daddr = *final_dst;
1072 if (can_sleep)
1073 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
1074
1075 return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1076 }
1077 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1078
1079 /**
1080 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1081 * @sk: socket which provides the dst cache and route info
1082 * @fl6: flow to lookup
1083 * @final_dst: final destination address for ipsec lookup
1084 * @can_sleep: we are in a sleepable context
1085 *
1086 * This function performs a route lookup on the given flow with the
1087 * possibility of using the cached route in the socket if it is valid.
1088 * It will take the socket dst lock when operating on the dst cache.
1089 * As a result, this function can only be used in process context.
1090 *
1091 * It returns a valid dst pointer on success, or a pointer encoded
1092 * error code.
1093 */
1094 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1095 const struct in6_addr *final_dst,
1096 bool can_sleep)
1097 {
1098 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1099 int err;
1100
1101 dst = ip6_sk_dst_check(sk, dst, fl6);
1102
1103 err = ip6_dst_lookup_tail(sk, &dst, fl6);
1104 if (err)
1105 return ERR_PTR(err);
1106 if (final_dst)
1107 fl6->daddr = *final_dst;
1108 if (can_sleep)
1109 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
1110
1111 return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1112 }
1113 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1114
1115 static inline int ip6_ufo_append_data(struct sock *sk,
1116 int getfrag(void *from, char *to, int offset, int len,
1117 int odd, struct sk_buff *skb),
1118 void *from, int length, int hh_len, int fragheaderlen,
1119 int transhdrlen, int mtu,unsigned int flags,
1120 struct rt6_info *rt)
1121
1122 {
1123 struct sk_buff *skb;
1124 int err;
1125
1126 /* There is support for UDP large send offload by network
1127 * device, so create one single skb packet containing complete
1128 * udp datagram
1129 */
1130 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1131 skb = sock_alloc_send_skb(sk,
1132 hh_len + fragheaderlen + transhdrlen + 20,
1133 (flags & MSG_DONTWAIT), &err);
1134 if (skb == NULL)
1135 return err;
1136
1137 /* reserve space for Hardware header */
1138 skb_reserve(skb, hh_len);
1139
1140 /* create space for UDP/IP header */
1141 skb_put(skb,fragheaderlen + transhdrlen);
1142
1143 /* initialize network header pointer */
1144 skb_reset_network_header(skb);
1145
1146 /* initialize protocol header pointer */
1147 skb->transport_header = skb->network_header + fragheaderlen;
1148
1149 skb->ip_summed = CHECKSUM_PARTIAL;
1150 skb->csum = 0;
1151 }
1152
1153 err = skb_append_datato_frags(sk,skb, getfrag, from,
1154 (length - transhdrlen));
1155 if (!err) {
1156 struct frag_hdr fhdr;
1157
1158 /* Specify the length of each IPv6 datagram fragment.
1159 * It has to be a multiple of 8.
1160 */
1161 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1162 sizeof(struct frag_hdr)) & ~7;
1163 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1164 ipv6_select_ident(&fhdr, rt);
1165 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1166 __skb_queue_tail(&sk->sk_write_queue, skb);
1167
1168 return 0;
1169 }
1170 /* There is not enough support do UPD LSO,
1171 * so follow normal path
1172 */
1173 kfree_skb(skb);
1174
1175 return err;
1176 }
1177
1178 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1179 gfp_t gfp)
1180 {
1181 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1182 }
1183
1184 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1185 gfp_t gfp)
1186 {
1187 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1188 }
1189
1190 static void ip6_append_data_mtu(int *mtu,
1191 int *maxfraglen,
1192 unsigned int fragheaderlen,
1193 struct sk_buff *skb,
1194 struct rt6_info *rt)
1195 {
1196 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1197 if (skb == NULL) {
1198 /* first fragment, reserve header_len */
1199 *mtu = *mtu - rt->dst.header_len;
1200
1201 } else {
1202 /*
1203 * this fragment is not first, the headers
1204 * space is regarded as data space.
1205 */
1206 *mtu = dst_mtu(rt->dst.path);
1207 }
1208 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1209 + fragheaderlen - sizeof(struct frag_hdr);
1210 }
1211 }
1212
1213 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1214 int offset, int len, int odd, struct sk_buff *skb),
1215 void *from, int length, int transhdrlen,
1216 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
1217 struct rt6_info *rt, unsigned int flags, int dontfrag)
1218 {
1219 struct inet_sock *inet = inet_sk(sk);
1220 struct ipv6_pinfo *np = inet6_sk(sk);
1221 struct inet_cork *cork;
1222 struct sk_buff *skb, *skb_prev = NULL;
1223 unsigned int maxfraglen, fragheaderlen;
1224 int exthdrlen;
1225 int dst_exthdrlen;
1226 int hh_len;
1227 int mtu;
1228 int copy;
1229 int err;
1230 int offset = 0;
1231 __u8 tx_flags = 0;
1232
1233 if (flags&MSG_PROBE)
1234 return 0;
1235 cork = &inet->cork.base;
1236 if (skb_queue_empty(&sk->sk_write_queue)) {
1237 /*
1238 * setup for corking
1239 */
1240 if (opt) {
1241 if (WARN_ON(np->cork.opt))
1242 return -EINVAL;
1243
1244 np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation);
1245 if (unlikely(np->cork.opt == NULL))
1246 return -ENOBUFS;
1247
1248 np->cork.opt->tot_len = opt->tot_len;
1249 np->cork.opt->opt_flen = opt->opt_flen;
1250 np->cork.opt->opt_nflen = opt->opt_nflen;
1251
1252 np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1253 sk->sk_allocation);
1254 if (opt->dst0opt && !np->cork.opt->dst0opt)
1255 return -ENOBUFS;
1256
1257 np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1258 sk->sk_allocation);
1259 if (opt->dst1opt && !np->cork.opt->dst1opt)
1260 return -ENOBUFS;
1261
1262 np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1263 sk->sk_allocation);
1264 if (opt->hopopt && !np->cork.opt->hopopt)
1265 return -ENOBUFS;
1266
1267 np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1268 sk->sk_allocation);
1269 if (opt->srcrt && !np->cork.opt->srcrt)
1270 return -ENOBUFS;
1271
1272 /* need source address above miyazawa*/
1273 }
1274 dst_hold(&rt->dst);
1275 cork->dst = &rt->dst;
1276 inet->cork.fl.u.ip6 = *fl6;
1277 np->cork.hop_limit = hlimit;
1278 np->cork.tclass = tclass;
1279 if (rt->dst.flags & DST_XFRM_TUNNEL)
1280 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1281 rt->dst.dev->mtu : dst_mtu(&rt->dst);
1282 else
1283 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1284 rt->dst.dev->mtu : dst_mtu(rt->dst.path);
1285 if (np->frag_size < mtu) {
1286 if (np->frag_size)
1287 mtu = np->frag_size;
1288 }
1289 cork->fragsize = mtu;
1290 if (dst_allfrag(rt->dst.path))
1291 cork->flags |= IPCORK_ALLFRAG;
1292 cork->length = 0;
1293 sk->sk_sndmsg_page = NULL;
1294 sk->sk_sndmsg_off = 0;
1295 exthdrlen = (opt ? opt->opt_flen : 0) - rt->rt6i_nfheader_len;
1296 length += exthdrlen;
1297 transhdrlen += exthdrlen;
1298 dst_exthdrlen = rt->dst.header_len;
1299 } else {
1300 rt = (struct rt6_info *)cork->dst;
1301 fl6 = &inet->cork.fl.u.ip6;
1302 opt = np->cork.opt;
1303 transhdrlen = 0;
1304 exthdrlen = 0;
1305 dst_exthdrlen = 0;
1306 mtu = cork->fragsize;
1307 }
1308
1309 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1310
1311 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1312 (opt ? opt->opt_nflen : 0);
1313 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1314
1315 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1316 if (cork->length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1317 ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen);
1318 return -EMSGSIZE;
1319 }
1320 }
1321
1322 /* For UDP, check if TX timestamp is enabled */
1323 if (sk->sk_type == SOCK_DGRAM) {
1324 err = sock_tx_timestamp(sk, &tx_flags);
1325 if (err)
1326 goto error;
1327 }
1328
1329 /*
1330 * Let's try using as much space as possible.
1331 * Use MTU if total length of the message fits into the MTU.
1332 * Otherwise, we need to reserve fragment header and
1333 * fragment alignment (= 8-15 octects, in total).
1334 *
1335 * Note that we may need to "move" the data from the tail of
1336 * of the buffer to the new fragment when we split
1337 * the message.
1338 *
1339 * FIXME: It may be fragmented into multiple chunks
1340 * at once if non-fragmentable extension headers
1341 * are too large.
1342 * --yoshfuji
1343 */
1344
1345 cork->length += length;
1346 if (length > mtu) {
1347 int proto = sk->sk_protocol;
1348 if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){
1349 ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen);
1350 return -EMSGSIZE;
1351 }
1352
1353 if (proto == IPPROTO_UDP &&
1354 (rt->dst.dev->features & NETIF_F_UFO)) {
1355
1356 err = ip6_ufo_append_data(sk, getfrag, from, length,
1357 hh_len, fragheaderlen,
1358 transhdrlen, mtu, flags, rt);
1359 if (err)
1360 goto error;
1361 return 0;
1362 }
1363 }
1364
1365 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1366 goto alloc_new_skb;
1367
1368 while (length > 0) {
1369 /* Check if the remaining data fits into current packet. */
1370 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1371 if (copy < length)
1372 copy = maxfraglen - skb->len;
1373
1374 if (copy <= 0) {
1375 char *data;
1376 unsigned int datalen;
1377 unsigned int fraglen;
1378 unsigned int fraggap;
1379 unsigned int alloclen;
1380 alloc_new_skb:
1381 /* There's no room in the current skb */
1382 if (skb)
1383 fraggap = skb->len - maxfraglen;
1384 else
1385 fraggap = 0;
1386 /* update mtu and maxfraglen if necessary */
1387 if (skb == NULL || skb_prev == NULL)
1388 ip6_append_data_mtu(&mtu, &maxfraglen,
1389 fragheaderlen, skb, rt);
1390
1391 skb_prev = skb;
1392
1393 /*
1394 * If remaining data exceeds the mtu,
1395 * we know we need more fragment(s).
1396 */
1397 datalen = length + fraggap;
1398
1399 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1400 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1401 if ((flags & MSG_MORE) &&
1402 !(rt->dst.dev->features&NETIF_F_SG))
1403 alloclen = mtu;
1404 else
1405 alloclen = datalen + fragheaderlen;
1406
1407 alloclen += dst_exthdrlen;
1408
1409 if (datalen != length + fraggap) {
1410 /*
1411 * this is not the last fragment, the trailer
1412 * space is regarded as data space.
1413 */
1414 datalen += rt->dst.trailer_len;
1415 }
1416
1417 alloclen += rt->dst.trailer_len;
1418 fraglen = datalen + fragheaderlen;
1419
1420 /*
1421 * We just reserve space for fragment header.
1422 * Note: this may be overallocation if the message
1423 * (without MSG_MORE) fits into the MTU.
1424 */
1425 alloclen += sizeof(struct frag_hdr);
1426
1427 if (transhdrlen) {
1428 skb = sock_alloc_send_skb(sk,
1429 alloclen + hh_len,
1430 (flags & MSG_DONTWAIT), &err);
1431 } else {
1432 skb = NULL;
1433 if (atomic_read(&sk->sk_wmem_alloc) <=
1434 2 * sk->sk_sndbuf)
1435 skb = sock_wmalloc(sk,
1436 alloclen + hh_len, 1,
1437 sk->sk_allocation);
1438 if (unlikely(skb == NULL))
1439 err = -ENOBUFS;
1440 else {
1441 /* Only the initial fragment
1442 * is time stamped.
1443 */
1444 tx_flags = 0;
1445 }
1446 }
1447 if (skb == NULL)
1448 goto error;
1449 /*
1450 * Fill in the control structures
1451 */
1452 skb->ip_summed = CHECKSUM_NONE;
1453 skb->csum = 0;
1454 /* reserve for fragmentation and ipsec header */
1455 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1456 dst_exthdrlen);
1457
1458 if (sk->sk_type == SOCK_DGRAM)
1459 skb_shinfo(skb)->tx_flags = tx_flags;
1460
1461 /*
1462 * Find where to start putting bytes
1463 */
1464 data = skb_put(skb, fraglen);
1465 skb_set_network_header(skb, exthdrlen);
1466 data += fragheaderlen;
1467 skb->transport_header = (skb->network_header +
1468 fragheaderlen);
1469 if (fraggap) {
1470 skb->csum = skb_copy_and_csum_bits(
1471 skb_prev, maxfraglen,
1472 data + transhdrlen, fraggap, 0);
1473 skb_prev->csum = csum_sub(skb_prev->csum,
1474 skb->csum);
1475 data += fraggap;
1476 pskb_trim_unique(skb_prev, maxfraglen);
1477 }
1478 copy = datalen - transhdrlen - fraggap;
1479
1480 if (copy < 0) {
1481 err = -EINVAL;
1482 kfree_skb(skb);
1483 goto error;
1484 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1485 err = -EFAULT;
1486 kfree_skb(skb);
1487 goto error;
1488 }
1489
1490 offset += copy;
1491 length -= datalen - fraggap;
1492 transhdrlen = 0;
1493 exthdrlen = 0;
1494 dst_exthdrlen = 0;
1495
1496 /*
1497 * Put the packet on the pending queue
1498 */
1499 __skb_queue_tail(&sk->sk_write_queue, skb);
1500 continue;
1501 }
1502
1503 if (copy > length)
1504 copy = length;
1505
1506 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1507 unsigned int off;
1508
1509 off = skb->len;
1510 if (getfrag(from, skb_put(skb, copy),
1511 offset, copy, off, skb) < 0) {
1512 __skb_trim(skb, off);
1513 err = -EFAULT;
1514 goto error;
1515 }
1516 } else {
1517 int i = skb_shinfo(skb)->nr_frags;
1518 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1519 struct page *page = sk->sk_sndmsg_page;
1520 int off = sk->sk_sndmsg_off;
1521 unsigned int left;
1522
1523 if (page && (left = PAGE_SIZE - off) > 0) {
1524 if (copy >= left)
1525 copy = left;
1526 if (page != skb_frag_page(frag)) {
1527 if (i == MAX_SKB_FRAGS) {
1528 err = -EMSGSIZE;
1529 goto error;
1530 }
1531 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1532 skb_frag_ref(skb, i);
1533 frag = &skb_shinfo(skb)->frags[i];
1534 }
1535 } else if(i < MAX_SKB_FRAGS) {
1536 if (copy > PAGE_SIZE)
1537 copy = PAGE_SIZE;
1538 page = alloc_pages(sk->sk_allocation, 0);
1539 if (page == NULL) {
1540 err = -ENOMEM;
1541 goto error;
1542 }
1543 sk->sk_sndmsg_page = page;
1544 sk->sk_sndmsg_off = 0;
1545
1546 skb_fill_page_desc(skb, i, page, 0, 0);
1547 frag = &skb_shinfo(skb)->frags[i];
1548 } else {
1549 err = -EMSGSIZE;
1550 goto error;
1551 }
1552 if (getfrag(from,
1553 skb_frag_address(frag) + skb_frag_size(frag),
1554 offset, copy, skb->len, skb) < 0) {
1555 err = -EFAULT;
1556 goto error;
1557 }
1558 sk->sk_sndmsg_off += copy;
1559 skb_frag_size_add(frag, copy);
1560 skb->len += copy;
1561 skb->data_len += copy;
1562 skb->truesize += copy;
1563 atomic_add(copy, &sk->sk_wmem_alloc);
1564 }
1565 offset += copy;
1566 length -= copy;
1567 }
1568 return 0;
1569 error:
1570 cork->length -= length;
1571 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1572 return err;
1573 }
1574 EXPORT_SYMBOL_GPL(ip6_append_data);
1575
1576 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1577 {
1578 if (np->cork.opt) {
1579 kfree(np->cork.opt->dst0opt);
1580 kfree(np->cork.opt->dst1opt);
1581 kfree(np->cork.opt->hopopt);
1582 kfree(np->cork.opt->srcrt);
1583 kfree(np->cork.opt);
1584 np->cork.opt = NULL;
1585 }
1586
1587 if (inet->cork.base.dst) {
1588 dst_release(inet->cork.base.dst);
1589 inet->cork.base.dst = NULL;
1590 inet->cork.base.flags &= ~IPCORK_ALLFRAG;
1591 }
1592 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1593 }
1594
1595 int ip6_push_pending_frames(struct sock *sk)
1596 {
1597 struct sk_buff *skb, *tmp_skb;
1598 struct sk_buff **tail_skb;
1599 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1600 struct inet_sock *inet = inet_sk(sk);
1601 struct ipv6_pinfo *np = inet6_sk(sk);
1602 struct net *net = sock_net(sk);
1603 struct ipv6hdr *hdr;
1604 struct ipv6_txoptions *opt = np->cork.opt;
1605 struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst;
1606 struct flowi6 *fl6 = &inet->cork.fl.u.ip6;
1607 unsigned char proto = fl6->flowi6_proto;
1608 int err = 0;
1609
1610 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1611 goto out;
1612 tail_skb = &(skb_shinfo(skb)->frag_list);
1613
1614 /* move skb->data to ip header from ext header */
1615 if (skb->data < skb_network_header(skb))
1616 __skb_pull(skb, skb_network_offset(skb));
1617 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1618 __skb_pull(tmp_skb, skb_network_header_len(skb));
1619 *tail_skb = tmp_skb;
1620 tail_skb = &(tmp_skb->next);
1621 skb->len += tmp_skb->len;
1622 skb->data_len += tmp_skb->len;
1623 skb->truesize += tmp_skb->truesize;
1624 tmp_skb->destructor = NULL;
1625 tmp_skb->sk = NULL;
1626 }
1627
1628 /* Allow local fragmentation. */
1629 if (np->pmtudisc < IPV6_PMTUDISC_DO)
1630 skb->local_df = 1;
1631
1632 *final_dst = fl6->daddr;
1633 __skb_pull(skb, skb_network_header_len(skb));
1634 if (opt && opt->opt_flen)
1635 ipv6_push_frag_opts(skb, opt, &proto);
1636 if (opt && opt->opt_nflen)
1637 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1638
1639 skb_push(skb, sizeof(struct ipv6hdr));
1640 skb_reset_network_header(skb);
1641 hdr = ipv6_hdr(skb);
1642
1643 *(__be32*)hdr = fl6->flowlabel |
1644 htonl(0x60000000 | ((int)np->cork.tclass << 20));
1645
1646 hdr->hop_limit = np->cork.hop_limit;
1647 hdr->nexthdr = proto;
1648 hdr->saddr = fl6->saddr;
1649 hdr->daddr = *final_dst;
1650
1651 skb->priority = sk->sk_priority;
1652 skb->mark = sk->sk_mark;
1653
1654 skb_dst_set(skb, dst_clone(&rt->dst));
1655 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1656 if (proto == IPPROTO_ICMPV6) {
1657 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1658
1659 ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
1660 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
1661 }
1662
1663 err = ip6_local_out(skb);
1664 if (err) {
1665 if (err > 0)
1666 err = net_xmit_errno(err);
1667 if (err)
1668 goto error;
1669 }
1670
1671 out:
1672 ip6_cork_release(inet, np);
1673 return err;
1674 error:
1675 IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1676 goto out;
1677 }
1678 EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1679
1680 void ip6_flush_pending_frames(struct sock *sk)
1681 {
1682 struct sk_buff *skb;
1683
1684 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1685 if (skb_dst(skb))
1686 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1687 IPSTATS_MIB_OUTDISCARDS);
1688 kfree_skb(skb);
1689 }
1690
1691 ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1692 }
1693 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);