]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - net/ipv6/ip6_output.c
Merge branch 'timers-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[mirror_ubuntu-bionic-kernel.git] / net / ipv6 / ip6_output.c
1 /*
2 * IPv6 output functions
3 * Linux INET6 implementation
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * Based on linux/net/ipv4/ip_output.c
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 *
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
21 *
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
27 */
28
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
40 #include <linux/slab.h>
41
42 #include <linux/netfilter.h>
43 #include <linux/netfilter_ipv6.h>
44
45 #include <net/sock.h>
46 #include <net/snmp.h>
47
48 #include <net/ipv6.h>
49 #include <net/ndisc.h>
50 #include <net/protocol.h>
51 #include <net/ip6_route.h>
52 #include <net/addrconf.h>
53 #include <net/rawv6.h>
54 #include <net/icmp.h>
55 #include <net/xfrm.h>
56 #include <net/checksum.h>
57 #include <linux/mroute6.h>
58
59 int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
60
61 int __ip6_local_out(struct sk_buff *skb)
62 {
63 int len;
64
65 len = skb->len - sizeof(struct ipv6hdr);
66 if (len > IPV6_MAXPLEN)
67 len = 0;
68 ipv6_hdr(skb)->payload_len = htons(len);
69
70 return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
71 skb_dst(skb)->dev, dst_output);
72 }
73
74 int ip6_local_out(struct sk_buff *skb)
75 {
76 int err;
77
78 err = __ip6_local_out(skb);
79 if (likely(err == 1))
80 err = dst_output(skb);
81
82 return err;
83 }
84 EXPORT_SYMBOL_GPL(ip6_local_out);
85
86 /* dev_loopback_xmit for use with netfilter. */
87 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
88 {
89 skb_reset_mac_header(newskb);
90 __skb_pull(newskb, skb_network_offset(newskb));
91 newskb->pkt_type = PACKET_LOOPBACK;
92 newskb->ip_summed = CHECKSUM_UNNECESSARY;
93 WARN_ON(!skb_dst(newskb));
94
95 netif_rx_ni(newskb);
96 return 0;
97 }
98
99 static int ip6_finish_output2(struct sk_buff *skb)
100 {
101 struct dst_entry *dst = skb_dst(skb);
102 struct net_device *dev = dst->dev;
103 struct neighbour *neigh;
104
105 skb->protocol = htons(ETH_P_IPV6);
106 skb->dev = dev;
107
108 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
109 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
110
111 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
112 ((mroute6_socket(dev_net(dev), skb) &&
113 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
114 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
115 &ipv6_hdr(skb)->saddr))) {
116 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
117
118 /* Do not check for IFF_ALLMULTI; multicast routing
119 is not supported in any case.
120 */
121 if (newskb)
122 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
123 newskb, NULL, newskb->dev,
124 ip6_dev_loopback_xmit);
125
126 if (ipv6_hdr(skb)->hop_limit == 0) {
127 IP6_INC_STATS(dev_net(dev), idev,
128 IPSTATS_MIB_OUTDISCARDS);
129 kfree_skb(skb);
130 return 0;
131 }
132 }
133
134 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
135 skb->len);
136 }
137
138 rcu_read_lock();
139 neigh = dst_get_neighbour_noref(dst);
140 if (neigh) {
141 int res = neigh_output(neigh, skb);
142
143 rcu_read_unlock();
144 return res;
145 }
146 rcu_read_unlock();
147 IP6_INC_STATS_BH(dev_net(dst->dev),
148 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
149 kfree_skb(skb);
150 return -EINVAL;
151 }
152
153 static int ip6_finish_output(struct sk_buff *skb)
154 {
155 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
156 dst_allfrag(skb_dst(skb)))
157 return ip6_fragment(skb, ip6_finish_output2);
158 else
159 return ip6_finish_output2(skb);
160 }
161
162 int ip6_output(struct sk_buff *skb)
163 {
164 struct net_device *dev = skb_dst(skb)->dev;
165 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
166 if (unlikely(idev->cnf.disable_ipv6)) {
167 IP6_INC_STATS(dev_net(dev), idev,
168 IPSTATS_MIB_OUTDISCARDS);
169 kfree_skb(skb);
170 return 0;
171 }
172
173 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
174 ip6_finish_output,
175 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
176 }
177
178 /*
179 * xmit an sk_buff (used by TCP, SCTP and DCCP)
180 */
181
182 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
183 struct ipv6_txoptions *opt, int tclass)
184 {
185 struct net *net = sock_net(sk);
186 struct ipv6_pinfo *np = inet6_sk(sk);
187 struct in6_addr *first_hop = &fl6->daddr;
188 struct dst_entry *dst = skb_dst(skb);
189 struct ipv6hdr *hdr;
190 u8 proto = fl6->flowi6_proto;
191 int seg_len = skb->len;
192 int hlimit = -1;
193 u32 mtu;
194
195 if (opt) {
196 unsigned int head_room;
197
198 /* First: exthdrs may take lots of space (~8K for now)
199 MAX_HEADER is not enough.
200 */
201 head_room = opt->opt_nflen + opt->opt_flen;
202 seg_len += head_room;
203 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
204
205 if (skb_headroom(skb) < head_room) {
206 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
207 if (skb2 == NULL) {
208 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
209 IPSTATS_MIB_OUTDISCARDS);
210 kfree_skb(skb);
211 return -ENOBUFS;
212 }
213 kfree_skb(skb);
214 skb = skb2;
215 skb_set_owner_w(skb, sk);
216 }
217 if (opt->opt_flen)
218 ipv6_push_frag_opts(skb, opt, &proto);
219 if (opt->opt_nflen)
220 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
221 }
222
223 skb_push(skb, sizeof(struct ipv6hdr));
224 skb_reset_network_header(skb);
225 hdr = ipv6_hdr(skb);
226
227 /*
228 * Fill in the IPv6 header
229 */
230 if (np)
231 hlimit = np->hop_limit;
232 if (hlimit < 0)
233 hlimit = ip6_dst_hoplimit(dst);
234
235 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl6->flowlabel;
236
237 hdr->payload_len = htons(seg_len);
238 hdr->nexthdr = proto;
239 hdr->hop_limit = hlimit;
240
241 hdr->saddr = fl6->saddr;
242 hdr->daddr = *first_hop;
243
244 skb->priority = sk->sk_priority;
245 skb->mark = sk->sk_mark;
246
247 mtu = dst_mtu(dst);
248 if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
249 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
250 IPSTATS_MIB_OUT, skb->len);
251 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
252 dst->dev, dst_output);
253 }
254
255 if (net_ratelimit())
256 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
257 skb->dev = dst->dev;
258 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
259 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
260 kfree_skb(skb);
261 return -EMSGSIZE;
262 }
263
264 EXPORT_SYMBOL(ip6_xmit);
265
266 /*
267 * To avoid extra problems ND packets are send through this
268 * routine. It's code duplication but I really want to avoid
269 * extra checks since ipv6_build_header is used by TCP (which
270 * is for us performance critical)
271 */
272
273 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
274 const struct in6_addr *saddr, const struct in6_addr *daddr,
275 int proto, int len)
276 {
277 struct ipv6_pinfo *np = inet6_sk(sk);
278 struct ipv6hdr *hdr;
279
280 skb->protocol = htons(ETH_P_IPV6);
281 skb->dev = dev;
282
283 skb_reset_network_header(skb);
284 skb_put(skb, sizeof(struct ipv6hdr));
285 hdr = ipv6_hdr(skb);
286
287 *(__be32*)hdr = htonl(0x60000000);
288
289 hdr->payload_len = htons(len);
290 hdr->nexthdr = proto;
291 hdr->hop_limit = np->hop_limit;
292
293 hdr->saddr = *saddr;
294 hdr->daddr = *daddr;
295
296 return 0;
297 }
298
299 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
300 {
301 struct ip6_ra_chain *ra;
302 struct sock *last = NULL;
303
304 read_lock(&ip6_ra_lock);
305 for (ra = ip6_ra_chain; ra; ra = ra->next) {
306 struct sock *sk = ra->sk;
307 if (sk && ra->sel == sel &&
308 (!sk->sk_bound_dev_if ||
309 sk->sk_bound_dev_if == skb->dev->ifindex)) {
310 if (last) {
311 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
312 if (skb2)
313 rawv6_rcv(last, skb2);
314 }
315 last = sk;
316 }
317 }
318
319 if (last) {
320 rawv6_rcv(last, skb);
321 read_unlock(&ip6_ra_lock);
322 return 1;
323 }
324 read_unlock(&ip6_ra_lock);
325 return 0;
326 }
327
328 static int ip6_forward_proxy_check(struct sk_buff *skb)
329 {
330 struct ipv6hdr *hdr = ipv6_hdr(skb);
331 u8 nexthdr = hdr->nexthdr;
332 __be16 frag_off;
333 int offset;
334
335 if (ipv6_ext_hdr(nexthdr)) {
336 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
337 if (offset < 0)
338 return 0;
339 } else
340 offset = sizeof(struct ipv6hdr);
341
342 if (nexthdr == IPPROTO_ICMPV6) {
343 struct icmp6hdr *icmp6;
344
345 if (!pskb_may_pull(skb, (skb_network_header(skb) +
346 offset + 1 - skb->data)))
347 return 0;
348
349 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
350
351 switch (icmp6->icmp6_type) {
352 case NDISC_ROUTER_SOLICITATION:
353 case NDISC_ROUTER_ADVERTISEMENT:
354 case NDISC_NEIGHBOUR_SOLICITATION:
355 case NDISC_NEIGHBOUR_ADVERTISEMENT:
356 case NDISC_REDIRECT:
357 /* For reaction involving unicast neighbor discovery
358 * message destined to the proxied address, pass it to
359 * input function.
360 */
361 return 1;
362 default:
363 break;
364 }
365 }
366
367 /*
368 * The proxying router can't forward traffic sent to a link-local
369 * address, so signal the sender and discard the packet. This
370 * behavior is clarified by the MIPv6 specification.
371 */
372 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
373 dst_link_failure(skb);
374 return -1;
375 }
376
377 return 0;
378 }
379
380 static inline int ip6_forward_finish(struct sk_buff *skb)
381 {
382 return dst_output(skb);
383 }
384
385 int ip6_forward(struct sk_buff *skb)
386 {
387 struct dst_entry *dst = skb_dst(skb);
388 struct ipv6hdr *hdr = ipv6_hdr(skb);
389 struct inet6_skb_parm *opt = IP6CB(skb);
390 struct net *net = dev_net(dst->dev);
391 u32 mtu;
392
393 if (net->ipv6.devconf_all->forwarding == 0)
394 goto error;
395
396 if (skb_warn_if_lro(skb))
397 goto drop;
398
399 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
400 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
401 goto drop;
402 }
403
404 if (skb->pkt_type != PACKET_HOST)
405 goto drop;
406
407 skb_forward_csum(skb);
408
409 /*
410 * We DO NOT make any processing on
411 * RA packets, pushing them to user level AS IS
412 * without ane WARRANTY that application will be able
413 * to interpret them. The reason is that we
414 * cannot make anything clever here.
415 *
416 * We are not end-node, so that if packet contains
417 * AH/ESP, we cannot make anything.
418 * Defragmentation also would be mistake, RA packets
419 * cannot be fragmented, because there is no warranty
420 * that different fragments will go along one path. --ANK
421 */
422 if (opt->ra) {
423 u8 *ptr = skb_network_header(skb) + opt->ra;
424 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
425 return 0;
426 }
427
428 /*
429 * check and decrement ttl
430 */
431 if (hdr->hop_limit <= 1) {
432 /* Force OUTPUT device used as source address */
433 skb->dev = dst->dev;
434 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
435 IP6_INC_STATS_BH(net,
436 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
437
438 kfree_skb(skb);
439 return -ETIMEDOUT;
440 }
441
442 /* XXX: idev->cnf.proxy_ndp? */
443 if (net->ipv6.devconf_all->proxy_ndp &&
444 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
445 int proxied = ip6_forward_proxy_check(skb);
446 if (proxied > 0)
447 return ip6_input(skb);
448 else if (proxied < 0) {
449 IP6_INC_STATS(net, ip6_dst_idev(dst),
450 IPSTATS_MIB_INDISCARDS);
451 goto drop;
452 }
453 }
454
455 if (!xfrm6_route_forward(skb)) {
456 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
457 goto drop;
458 }
459 dst = skb_dst(skb);
460
461 /* IPv6 specs say nothing about it, but it is clear that we cannot
462 send redirects to source routed frames.
463 We don't send redirects to frames decapsulated from IPsec.
464 */
465 if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
466 struct in6_addr *target = NULL;
467 struct rt6_info *rt;
468
469 /*
470 * incoming and outgoing devices are the same
471 * send a redirect.
472 */
473
474 rt = (struct rt6_info *) dst;
475 if (rt->rt6i_flags & RTF_GATEWAY)
476 target = &rt->rt6i_gateway;
477 else
478 target = &hdr->daddr;
479
480 if (!rt->rt6i_peer)
481 rt6_bind_peer(rt, 1);
482
483 /* Limit redirects both by destination (here)
484 and by source (inside ndisc_send_redirect)
485 */
486 if (inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ))
487 ndisc_send_redirect(skb, target);
488 } else {
489 int addrtype = ipv6_addr_type(&hdr->saddr);
490
491 /* This check is security critical. */
492 if (addrtype == IPV6_ADDR_ANY ||
493 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
494 goto error;
495 if (addrtype & IPV6_ADDR_LINKLOCAL) {
496 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
497 ICMPV6_NOT_NEIGHBOUR, 0);
498 goto error;
499 }
500 }
501
502 mtu = dst_mtu(dst);
503 if (mtu < IPV6_MIN_MTU)
504 mtu = IPV6_MIN_MTU;
505
506 if (skb->len > mtu && !skb_is_gso(skb)) {
507 /* Again, force OUTPUT device used as source address */
508 skb->dev = dst->dev;
509 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
510 IP6_INC_STATS_BH(net,
511 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
512 IP6_INC_STATS_BH(net,
513 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
514 kfree_skb(skb);
515 return -EMSGSIZE;
516 }
517
518 if (skb_cow(skb, dst->dev->hard_header_len)) {
519 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
520 goto drop;
521 }
522
523 hdr = ipv6_hdr(skb);
524
525 /* Mangling hops number delayed to point after skb COW */
526
527 hdr->hop_limit--;
528
529 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
530 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
531 ip6_forward_finish);
532
533 error:
534 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
535 drop:
536 kfree_skb(skb);
537 return -EINVAL;
538 }
539
540 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
541 {
542 to->pkt_type = from->pkt_type;
543 to->priority = from->priority;
544 to->protocol = from->protocol;
545 skb_dst_drop(to);
546 skb_dst_set(to, dst_clone(skb_dst(from)));
547 to->dev = from->dev;
548 to->mark = from->mark;
549
550 #ifdef CONFIG_NET_SCHED
551 to->tc_index = from->tc_index;
552 #endif
553 nf_copy(to, from);
554 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
555 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
556 to->nf_trace = from->nf_trace;
557 #endif
558 skb_copy_secmark(to, from);
559 }
560
561 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
562 {
563 u16 offset = sizeof(struct ipv6hdr);
564 struct ipv6_opt_hdr *exthdr =
565 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
566 unsigned int packet_len = skb->tail - skb->network_header;
567 int found_rhdr = 0;
568 *nexthdr = &ipv6_hdr(skb)->nexthdr;
569
570 while (offset + 1 <= packet_len) {
571
572 switch (**nexthdr) {
573
574 case NEXTHDR_HOP:
575 break;
576 case NEXTHDR_ROUTING:
577 found_rhdr = 1;
578 break;
579 case NEXTHDR_DEST:
580 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
581 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
582 break;
583 #endif
584 if (found_rhdr)
585 return offset;
586 break;
587 default :
588 return offset;
589 }
590
591 offset += ipv6_optlen(exthdr);
592 *nexthdr = &exthdr->nexthdr;
593 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
594 offset);
595 }
596
597 return offset;
598 }
599
600 void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
601 {
602 static atomic_t ipv6_fragmentation_id;
603 int old, new;
604
605 if (rt && !(rt->dst.flags & DST_NOPEER)) {
606 struct inet_peer *peer;
607
608 if (!rt->rt6i_peer)
609 rt6_bind_peer(rt, 1);
610 peer = rt->rt6i_peer;
611 if (peer) {
612 fhdr->identification = htonl(inet_getid(peer, 0));
613 return;
614 }
615 }
616 do {
617 old = atomic_read(&ipv6_fragmentation_id);
618 new = old + 1;
619 if (!new)
620 new = 1;
621 } while (atomic_cmpxchg(&ipv6_fragmentation_id, old, new) != old);
622 fhdr->identification = htonl(new);
623 }
624
625 int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
626 {
627 struct sk_buff *frag;
628 struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
629 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
630 struct ipv6hdr *tmp_hdr;
631 struct frag_hdr *fh;
632 unsigned int mtu, hlen, left, len;
633 int hroom, troom;
634 __be32 frag_id = 0;
635 int ptr, offset = 0, err=0;
636 u8 *prevhdr, nexthdr = 0;
637 struct net *net = dev_net(skb_dst(skb)->dev);
638
639 hlen = ip6_find_1stfragopt(skb, &prevhdr);
640 nexthdr = *prevhdr;
641
642 mtu = ip6_skb_dst_mtu(skb);
643
644 /* We must not fragment if the socket is set to force MTU discovery
645 * or if the skb it not generated by a local socket.
646 */
647 if (!skb->local_df && skb->len > mtu) {
648 skb->dev = skb_dst(skb)->dev;
649 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
650 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
651 IPSTATS_MIB_FRAGFAILS);
652 kfree_skb(skb);
653 return -EMSGSIZE;
654 }
655
656 if (np && np->frag_size < mtu) {
657 if (np->frag_size)
658 mtu = np->frag_size;
659 }
660 mtu -= hlen + sizeof(struct frag_hdr);
661
662 if (skb_has_frag_list(skb)) {
663 int first_len = skb_pagelen(skb);
664 struct sk_buff *frag2;
665
666 if (first_len - hlen > mtu ||
667 ((first_len - hlen) & 7) ||
668 skb_cloned(skb))
669 goto slow_path;
670
671 skb_walk_frags(skb, frag) {
672 /* Correct geometry. */
673 if (frag->len > mtu ||
674 ((frag->len & 7) && frag->next) ||
675 skb_headroom(frag) < hlen)
676 goto slow_path_clean;
677
678 /* Partially cloned skb? */
679 if (skb_shared(frag))
680 goto slow_path_clean;
681
682 BUG_ON(frag->sk);
683 if (skb->sk) {
684 frag->sk = skb->sk;
685 frag->destructor = sock_wfree;
686 }
687 skb->truesize -= frag->truesize;
688 }
689
690 err = 0;
691 offset = 0;
692 frag = skb_shinfo(skb)->frag_list;
693 skb_frag_list_init(skb);
694 /* BUILD HEADER */
695
696 *prevhdr = NEXTHDR_FRAGMENT;
697 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
698 if (!tmp_hdr) {
699 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
700 IPSTATS_MIB_FRAGFAILS);
701 return -ENOMEM;
702 }
703
704 __skb_pull(skb, hlen);
705 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
706 __skb_push(skb, hlen);
707 skb_reset_network_header(skb);
708 memcpy(skb_network_header(skb), tmp_hdr, hlen);
709
710 ipv6_select_ident(fh, rt);
711 fh->nexthdr = nexthdr;
712 fh->reserved = 0;
713 fh->frag_off = htons(IP6_MF);
714 frag_id = fh->identification;
715
716 first_len = skb_pagelen(skb);
717 skb->data_len = first_len - skb_headlen(skb);
718 skb->len = first_len;
719 ipv6_hdr(skb)->payload_len = htons(first_len -
720 sizeof(struct ipv6hdr));
721
722 dst_hold(&rt->dst);
723
724 for (;;) {
725 /* Prepare header of the next frame,
726 * before previous one went down. */
727 if (frag) {
728 frag->ip_summed = CHECKSUM_NONE;
729 skb_reset_transport_header(frag);
730 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
731 __skb_push(frag, hlen);
732 skb_reset_network_header(frag);
733 memcpy(skb_network_header(frag), tmp_hdr,
734 hlen);
735 offset += skb->len - hlen - sizeof(struct frag_hdr);
736 fh->nexthdr = nexthdr;
737 fh->reserved = 0;
738 fh->frag_off = htons(offset);
739 if (frag->next != NULL)
740 fh->frag_off |= htons(IP6_MF);
741 fh->identification = frag_id;
742 ipv6_hdr(frag)->payload_len =
743 htons(frag->len -
744 sizeof(struct ipv6hdr));
745 ip6_copy_metadata(frag, skb);
746 }
747
748 err = output(skb);
749 if(!err)
750 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
751 IPSTATS_MIB_FRAGCREATES);
752
753 if (err || !frag)
754 break;
755
756 skb = frag;
757 frag = skb->next;
758 skb->next = NULL;
759 }
760
761 kfree(tmp_hdr);
762
763 if (err == 0) {
764 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
765 IPSTATS_MIB_FRAGOKS);
766 dst_release(&rt->dst);
767 return 0;
768 }
769
770 while (frag) {
771 skb = frag->next;
772 kfree_skb(frag);
773 frag = skb;
774 }
775
776 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
777 IPSTATS_MIB_FRAGFAILS);
778 dst_release(&rt->dst);
779 return err;
780
781 slow_path_clean:
782 skb_walk_frags(skb, frag2) {
783 if (frag2 == frag)
784 break;
785 frag2->sk = NULL;
786 frag2->destructor = NULL;
787 skb->truesize += frag2->truesize;
788 }
789 }
790
791 slow_path:
792 left = skb->len - hlen; /* Space per frame */
793 ptr = hlen; /* Where to start from */
794
795 /*
796 * Fragment the datagram.
797 */
798
799 *prevhdr = NEXTHDR_FRAGMENT;
800 hroom = LL_RESERVED_SPACE(rt->dst.dev);
801 troom = rt->dst.dev->needed_tailroom;
802
803 /*
804 * Keep copying data until we run out.
805 */
806 while(left > 0) {
807 len = left;
808 /* IF: it doesn't fit, use 'mtu' - the data space left */
809 if (len > mtu)
810 len = mtu;
811 /* IF: we are not sending up to and including the packet end
812 then align the next start on an eight byte boundary */
813 if (len < left) {
814 len &= ~7;
815 }
816 /*
817 * Allocate buffer.
818 */
819
820 if ((frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
821 hroom + troom, GFP_ATOMIC)) == NULL) {
822 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
823 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
824 IPSTATS_MIB_FRAGFAILS);
825 err = -ENOMEM;
826 goto fail;
827 }
828
829 /*
830 * Set up data on packet
831 */
832
833 ip6_copy_metadata(frag, skb);
834 skb_reserve(frag, hroom);
835 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
836 skb_reset_network_header(frag);
837 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
838 frag->transport_header = (frag->network_header + hlen +
839 sizeof(struct frag_hdr));
840
841 /*
842 * Charge the memory for the fragment to any owner
843 * it might possess
844 */
845 if (skb->sk)
846 skb_set_owner_w(frag, skb->sk);
847
848 /*
849 * Copy the packet header into the new buffer.
850 */
851 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
852
853 /*
854 * Build fragment header.
855 */
856 fh->nexthdr = nexthdr;
857 fh->reserved = 0;
858 if (!frag_id) {
859 ipv6_select_ident(fh, rt);
860 frag_id = fh->identification;
861 } else
862 fh->identification = frag_id;
863
864 /*
865 * Copy a block of the IP datagram.
866 */
867 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
868 BUG();
869 left -= len;
870
871 fh->frag_off = htons(offset);
872 if (left > 0)
873 fh->frag_off |= htons(IP6_MF);
874 ipv6_hdr(frag)->payload_len = htons(frag->len -
875 sizeof(struct ipv6hdr));
876
877 ptr += len;
878 offset += len;
879
880 /*
881 * Put this fragment into the sending queue.
882 */
883 err = output(frag);
884 if (err)
885 goto fail;
886
887 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
888 IPSTATS_MIB_FRAGCREATES);
889 }
890 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
891 IPSTATS_MIB_FRAGOKS);
892 kfree_skb(skb);
893 return err;
894
895 fail:
896 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
897 IPSTATS_MIB_FRAGFAILS);
898 kfree_skb(skb);
899 return err;
900 }
901
902 static inline int ip6_rt_check(const struct rt6key *rt_key,
903 const struct in6_addr *fl_addr,
904 const struct in6_addr *addr_cache)
905 {
906 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
907 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache));
908 }
909
910 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
911 struct dst_entry *dst,
912 const struct flowi6 *fl6)
913 {
914 struct ipv6_pinfo *np = inet6_sk(sk);
915 struct rt6_info *rt = (struct rt6_info *)dst;
916
917 if (!dst)
918 goto out;
919
920 /* Yes, checking route validity in not connected
921 * case is not very simple. Take into account,
922 * that we do not support routing by source, TOS,
923 * and MSG_DONTROUTE --ANK (980726)
924 *
925 * 1. ip6_rt_check(): If route was host route,
926 * check that cached destination is current.
927 * If it is network route, we still may
928 * check its validity using saved pointer
929 * to the last used address: daddr_cache.
930 * We do not want to save whole address now,
931 * (because main consumer of this service
932 * is tcp, which has not this problem),
933 * so that the last trick works only on connected
934 * sockets.
935 * 2. oif also should be the same.
936 */
937 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
938 #ifdef CONFIG_IPV6_SUBTREES
939 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
940 #endif
941 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
942 dst_release(dst);
943 dst = NULL;
944 }
945
946 out:
947 return dst;
948 }
949
950 static int ip6_dst_lookup_tail(struct sock *sk,
951 struct dst_entry **dst, struct flowi6 *fl6)
952 {
953 struct net *net = sock_net(sk);
954 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
955 struct neighbour *n;
956 #endif
957 int err;
958
959 if (*dst == NULL)
960 *dst = ip6_route_output(net, sk, fl6);
961
962 if ((err = (*dst)->error))
963 goto out_err_release;
964
965 if (ipv6_addr_any(&fl6->saddr)) {
966 struct rt6_info *rt = (struct rt6_info *) *dst;
967 err = ip6_route_get_saddr(net, rt, &fl6->daddr,
968 sk ? inet6_sk(sk)->srcprefs : 0,
969 &fl6->saddr);
970 if (err)
971 goto out_err_release;
972 }
973
974 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
975 /*
976 * Here if the dst entry we've looked up
977 * has a neighbour entry that is in the INCOMPLETE
978 * state and the src address from the flow is
979 * marked as OPTIMISTIC, we release the found
980 * dst entry and replace it instead with the
981 * dst entry of the nexthop router
982 */
983 rcu_read_lock();
984 n = dst_get_neighbour_noref(*dst);
985 if (n && !(n->nud_state & NUD_VALID)) {
986 struct inet6_ifaddr *ifp;
987 struct flowi6 fl_gw6;
988 int redirect;
989
990 rcu_read_unlock();
991 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
992 (*dst)->dev, 1);
993
994 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
995 if (ifp)
996 in6_ifa_put(ifp);
997
998 if (redirect) {
999 /*
1000 * We need to get the dst entry for the
1001 * default router instead
1002 */
1003 dst_release(*dst);
1004 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1005 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1006 *dst = ip6_route_output(net, sk, &fl_gw6);
1007 if ((err = (*dst)->error))
1008 goto out_err_release;
1009 }
1010 } else {
1011 rcu_read_unlock();
1012 }
1013 #endif
1014
1015 return 0;
1016
1017 out_err_release:
1018 if (err == -ENETUNREACH)
1019 IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1020 dst_release(*dst);
1021 *dst = NULL;
1022 return err;
1023 }
1024
1025 /**
1026 * ip6_dst_lookup - perform route lookup on flow
1027 * @sk: socket which provides route info
1028 * @dst: pointer to dst_entry * for result
1029 * @fl6: flow to lookup
1030 *
1031 * This function performs a route lookup on the given flow.
1032 *
1033 * It returns zero on success, or a standard errno code on error.
1034 */
1035 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6)
1036 {
1037 *dst = NULL;
1038 return ip6_dst_lookup_tail(sk, dst, fl6);
1039 }
1040 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1041
1042 /**
1043 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1044 * @sk: socket which provides route info
1045 * @fl6: flow to lookup
1046 * @final_dst: final destination address for ipsec lookup
1047 * @can_sleep: we are in a sleepable context
1048 *
1049 * This function performs a route lookup on the given flow.
1050 *
1051 * It returns a valid dst pointer on success, or a pointer encoded
1052 * error code.
1053 */
1054 struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1055 const struct in6_addr *final_dst,
1056 bool can_sleep)
1057 {
1058 struct dst_entry *dst = NULL;
1059 int err;
1060
1061 err = ip6_dst_lookup_tail(sk, &dst, fl6);
1062 if (err)
1063 return ERR_PTR(err);
1064 if (final_dst)
1065 fl6->daddr = *final_dst;
1066 if (can_sleep)
1067 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
1068
1069 return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1070 }
1071 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1072
1073 /**
1074 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1075 * @sk: socket which provides the dst cache and route info
1076 * @fl6: flow to lookup
1077 * @final_dst: final destination address for ipsec lookup
1078 * @can_sleep: we are in a sleepable context
1079 *
1080 * This function performs a route lookup on the given flow with the
1081 * possibility of using the cached route in the socket if it is valid.
1082 * It will take the socket dst lock when operating on the dst cache.
1083 * As a result, this function can only be used in process context.
1084 *
1085 * It returns a valid dst pointer on success, or a pointer encoded
1086 * error code.
1087 */
1088 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1089 const struct in6_addr *final_dst,
1090 bool can_sleep)
1091 {
1092 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1093 int err;
1094
1095 dst = ip6_sk_dst_check(sk, dst, fl6);
1096
1097 err = ip6_dst_lookup_tail(sk, &dst, fl6);
1098 if (err)
1099 return ERR_PTR(err);
1100 if (final_dst)
1101 fl6->daddr = *final_dst;
1102 if (can_sleep)
1103 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
1104
1105 return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1106 }
1107 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1108
1109 static inline int ip6_ufo_append_data(struct sock *sk,
1110 int getfrag(void *from, char *to, int offset, int len,
1111 int odd, struct sk_buff *skb),
1112 void *from, int length, int hh_len, int fragheaderlen,
1113 int transhdrlen, int mtu,unsigned int flags,
1114 struct rt6_info *rt)
1115
1116 {
1117 struct sk_buff *skb;
1118 int err;
1119
1120 /* There is support for UDP large send offload by network
1121 * device, so create one single skb packet containing complete
1122 * udp datagram
1123 */
1124 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1125 skb = sock_alloc_send_skb(sk,
1126 hh_len + fragheaderlen + transhdrlen + 20,
1127 (flags & MSG_DONTWAIT), &err);
1128 if (skb == NULL)
1129 return err;
1130
1131 /* reserve space for Hardware header */
1132 skb_reserve(skb, hh_len);
1133
1134 /* create space for UDP/IP header */
1135 skb_put(skb,fragheaderlen + transhdrlen);
1136
1137 /* initialize network header pointer */
1138 skb_reset_network_header(skb);
1139
1140 /* initialize protocol header pointer */
1141 skb->transport_header = skb->network_header + fragheaderlen;
1142
1143 skb->ip_summed = CHECKSUM_PARTIAL;
1144 skb->csum = 0;
1145 }
1146
1147 err = skb_append_datato_frags(sk,skb, getfrag, from,
1148 (length - transhdrlen));
1149 if (!err) {
1150 struct frag_hdr fhdr;
1151
1152 /* Specify the length of each IPv6 datagram fragment.
1153 * It has to be a multiple of 8.
1154 */
1155 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1156 sizeof(struct frag_hdr)) & ~7;
1157 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1158 ipv6_select_ident(&fhdr, rt);
1159 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1160 __skb_queue_tail(&sk->sk_write_queue, skb);
1161
1162 return 0;
1163 }
1164 /* There is not enough support do UPD LSO,
1165 * so follow normal path
1166 */
1167 kfree_skb(skb);
1168
1169 return err;
1170 }
1171
1172 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1173 gfp_t gfp)
1174 {
1175 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1176 }
1177
1178 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1179 gfp_t gfp)
1180 {
1181 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1182 }
1183
1184 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1185 int offset, int len, int odd, struct sk_buff *skb),
1186 void *from, int length, int transhdrlen,
1187 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
1188 struct rt6_info *rt, unsigned int flags, int dontfrag)
1189 {
1190 struct inet_sock *inet = inet_sk(sk);
1191 struct ipv6_pinfo *np = inet6_sk(sk);
1192 struct inet_cork *cork;
1193 struct sk_buff *skb;
1194 unsigned int maxfraglen, fragheaderlen;
1195 int exthdrlen;
1196 int dst_exthdrlen;
1197 int hh_len;
1198 int mtu;
1199 int copy;
1200 int err;
1201 int offset = 0;
1202 int csummode = CHECKSUM_NONE;
1203 __u8 tx_flags = 0;
1204
1205 if (flags&MSG_PROBE)
1206 return 0;
1207 cork = &inet->cork.base;
1208 if (skb_queue_empty(&sk->sk_write_queue)) {
1209 /*
1210 * setup for corking
1211 */
1212 if (opt) {
1213 if (WARN_ON(np->cork.opt))
1214 return -EINVAL;
1215
1216 np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation);
1217 if (unlikely(np->cork.opt == NULL))
1218 return -ENOBUFS;
1219
1220 np->cork.opt->tot_len = opt->tot_len;
1221 np->cork.opt->opt_flen = opt->opt_flen;
1222 np->cork.opt->opt_nflen = opt->opt_nflen;
1223
1224 np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1225 sk->sk_allocation);
1226 if (opt->dst0opt && !np->cork.opt->dst0opt)
1227 return -ENOBUFS;
1228
1229 np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1230 sk->sk_allocation);
1231 if (opt->dst1opt && !np->cork.opt->dst1opt)
1232 return -ENOBUFS;
1233
1234 np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1235 sk->sk_allocation);
1236 if (opt->hopopt && !np->cork.opt->hopopt)
1237 return -ENOBUFS;
1238
1239 np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1240 sk->sk_allocation);
1241 if (opt->srcrt && !np->cork.opt->srcrt)
1242 return -ENOBUFS;
1243
1244 /* need source address above miyazawa*/
1245 }
1246 dst_hold(&rt->dst);
1247 cork->dst = &rt->dst;
1248 inet->cork.fl.u.ip6 = *fl6;
1249 np->cork.hop_limit = hlimit;
1250 np->cork.tclass = tclass;
1251 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1252 rt->dst.dev->mtu : dst_mtu(&rt->dst);
1253 if (np->frag_size < mtu) {
1254 if (np->frag_size)
1255 mtu = np->frag_size;
1256 }
1257 cork->fragsize = mtu;
1258 if (dst_allfrag(rt->dst.path))
1259 cork->flags |= IPCORK_ALLFRAG;
1260 cork->length = 0;
1261 sk->sk_sndmsg_page = NULL;
1262 sk->sk_sndmsg_off = 0;
1263 exthdrlen = (opt ? opt->opt_flen : 0) - rt->rt6i_nfheader_len;
1264 length += exthdrlen;
1265 transhdrlen += exthdrlen;
1266 dst_exthdrlen = rt->dst.header_len;
1267 } else {
1268 rt = (struct rt6_info *)cork->dst;
1269 fl6 = &inet->cork.fl.u.ip6;
1270 opt = np->cork.opt;
1271 transhdrlen = 0;
1272 exthdrlen = 0;
1273 dst_exthdrlen = 0;
1274 mtu = cork->fragsize;
1275 }
1276
1277 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1278
1279 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1280 (opt ? opt->opt_nflen : 0);
1281 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1282
1283 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1284 if (cork->length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1285 ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen);
1286 return -EMSGSIZE;
1287 }
1288 }
1289
1290 /* For UDP, check if TX timestamp is enabled */
1291 if (sk->sk_type == SOCK_DGRAM) {
1292 err = sock_tx_timestamp(sk, &tx_flags);
1293 if (err)
1294 goto error;
1295 }
1296
1297 /*
1298 * Let's try using as much space as possible.
1299 * Use MTU if total length of the message fits into the MTU.
1300 * Otherwise, we need to reserve fragment header and
1301 * fragment alignment (= 8-15 octects, in total).
1302 *
1303 * Note that we may need to "move" the data from the tail of
1304 * of the buffer to the new fragment when we split
1305 * the message.
1306 *
1307 * FIXME: It may be fragmented into multiple chunks
1308 * at once if non-fragmentable extension headers
1309 * are too large.
1310 * --yoshfuji
1311 */
1312
1313 cork->length += length;
1314 if (length > mtu) {
1315 int proto = sk->sk_protocol;
1316 if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){
1317 ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen);
1318 return -EMSGSIZE;
1319 }
1320
1321 if (proto == IPPROTO_UDP &&
1322 (rt->dst.dev->features & NETIF_F_UFO)) {
1323
1324 err = ip6_ufo_append_data(sk, getfrag, from, length,
1325 hh_len, fragheaderlen,
1326 transhdrlen, mtu, flags, rt);
1327 if (err)
1328 goto error;
1329 return 0;
1330 }
1331 }
1332
1333 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1334 goto alloc_new_skb;
1335
1336 while (length > 0) {
1337 /* Check if the remaining data fits into current packet. */
1338 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1339 if (copy < length)
1340 copy = maxfraglen - skb->len;
1341
1342 if (copy <= 0) {
1343 char *data;
1344 unsigned int datalen;
1345 unsigned int fraglen;
1346 unsigned int fraggap;
1347 unsigned int alloclen;
1348 struct sk_buff *skb_prev;
1349 alloc_new_skb:
1350 skb_prev = skb;
1351
1352 /* There's no room in the current skb */
1353 if (skb_prev)
1354 fraggap = skb_prev->len - maxfraglen;
1355 else
1356 fraggap = 0;
1357
1358 /*
1359 * If remaining data exceeds the mtu,
1360 * we know we need more fragment(s).
1361 */
1362 datalen = length + fraggap;
1363 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1364 datalen = maxfraglen - fragheaderlen;
1365
1366 fraglen = datalen + fragheaderlen;
1367 if ((flags & MSG_MORE) &&
1368 !(rt->dst.dev->features&NETIF_F_SG))
1369 alloclen = mtu;
1370 else
1371 alloclen = datalen + fragheaderlen;
1372
1373 alloclen += dst_exthdrlen;
1374
1375 /*
1376 * The last fragment gets additional space at tail.
1377 * Note: we overallocate on fragments with MSG_MODE
1378 * because we have no idea if we're the last one.
1379 */
1380 if (datalen == length + fraggap)
1381 alloclen += rt->dst.trailer_len;
1382
1383 /*
1384 * We just reserve space for fragment header.
1385 * Note: this may be overallocation if the message
1386 * (without MSG_MORE) fits into the MTU.
1387 */
1388 alloclen += sizeof(struct frag_hdr);
1389
1390 if (transhdrlen) {
1391 skb = sock_alloc_send_skb(sk,
1392 alloclen + hh_len,
1393 (flags & MSG_DONTWAIT), &err);
1394 } else {
1395 skb = NULL;
1396 if (atomic_read(&sk->sk_wmem_alloc) <=
1397 2 * sk->sk_sndbuf)
1398 skb = sock_wmalloc(sk,
1399 alloclen + hh_len, 1,
1400 sk->sk_allocation);
1401 if (unlikely(skb == NULL))
1402 err = -ENOBUFS;
1403 else {
1404 /* Only the initial fragment
1405 * is time stamped.
1406 */
1407 tx_flags = 0;
1408 }
1409 }
1410 if (skb == NULL)
1411 goto error;
1412 /*
1413 * Fill in the control structures
1414 */
1415 skb->ip_summed = csummode;
1416 skb->csum = 0;
1417 /* reserve for fragmentation and ipsec header */
1418 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1419 dst_exthdrlen);
1420
1421 if (sk->sk_type == SOCK_DGRAM)
1422 skb_shinfo(skb)->tx_flags = tx_flags;
1423
1424 /*
1425 * Find where to start putting bytes
1426 */
1427 data = skb_put(skb, fraglen);
1428 skb_set_network_header(skb, exthdrlen);
1429 data += fragheaderlen;
1430 skb->transport_header = (skb->network_header +
1431 fragheaderlen);
1432 if (fraggap) {
1433 skb->csum = skb_copy_and_csum_bits(
1434 skb_prev, maxfraglen,
1435 data + transhdrlen, fraggap, 0);
1436 skb_prev->csum = csum_sub(skb_prev->csum,
1437 skb->csum);
1438 data += fraggap;
1439 pskb_trim_unique(skb_prev, maxfraglen);
1440 }
1441 copy = datalen - transhdrlen - fraggap;
1442
1443 if (copy < 0) {
1444 err = -EINVAL;
1445 kfree_skb(skb);
1446 goto error;
1447 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1448 err = -EFAULT;
1449 kfree_skb(skb);
1450 goto error;
1451 }
1452
1453 offset += copy;
1454 length -= datalen - fraggap;
1455 transhdrlen = 0;
1456 exthdrlen = 0;
1457 dst_exthdrlen = 0;
1458 csummode = CHECKSUM_NONE;
1459
1460 /*
1461 * Put the packet on the pending queue
1462 */
1463 __skb_queue_tail(&sk->sk_write_queue, skb);
1464 continue;
1465 }
1466
1467 if (copy > length)
1468 copy = length;
1469
1470 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1471 unsigned int off;
1472
1473 off = skb->len;
1474 if (getfrag(from, skb_put(skb, copy),
1475 offset, copy, off, skb) < 0) {
1476 __skb_trim(skb, off);
1477 err = -EFAULT;
1478 goto error;
1479 }
1480 } else {
1481 int i = skb_shinfo(skb)->nr_frags;
1482 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1483 struct page *page = sk->sk_sndmsg_page;
1484 int off = sk->sk_sndmsg_off;
1485 unsigned int left;
1486
1487 if (page && (left = PAGE_SIZE - off) > 0) {
1488 if (copy >= left)
1489 copy = left;
1490 if (page != skb_frag_page(frag)) {
1491 if (i == MAX_SKB_FRAGS) {
1492 err = -EMSGSIZE;
1493 goto error;
1494 }
1495 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1496 skb_frag_ref(skb, i);
1497 frag = &skb_shinfo(skb)->frags[i];
1498 }
1499 } else if(i < MAX_SKB_FRAGS) {
1500 if (copy > PAGE_SIZE)
1501 copy = PAGE_SIZE;
1502 page = alloc_pages(sk->sk_allocation, 0);
1503 if (page == NULL) {
1504 err = -ENOMEM;
1505 goto error;
1506 }
1507 sk->sk_sndmsg_page = page;
1508 sk->sk_sndmsg_off = 0;
1509
1510 skb_fill_page_desc(skb, i, page, 0, 0);
1511 frag = &skb_shinfo(skb)->frags[i];
1512 } else {
1513 err = -EMSGSIZE;
1514 goto error;
1515 }
1516 if (getfrag(from,
1517 skb_frag_address(frag) + skb_frag_size(frag),
1518 offset, copy, skb->len, skb) < 0) {
1519 err = -EFAULT;
1520 goto error;
1521 }
1522 sk->sk_sndmsg_off += copy;
1523 skb_frag_size_add(frag, copy);
1524 skb->len += copy;
1525 skb->data_len += copy;
1526 skb->truesize += copy;
1527 atomic_add(copy, &sk->sk_wmem_alloc);
1528 }
1529 offset += copy;
1530 length -= copy;
1531 }
1532 return 0;
1533 error:
1534 cork->length -= length;
1535 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1536 return err;
1537 }
1538
1539 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1540 {
1541 if (np->cork.opt) {
1542 kfree(np->cork.opt->dst0opt);
1543 kfree(np->cork.opt->dst1opt);
1544 kfree(np->cork.opt->hopopt);
1545 kfree(np->cork.opt->srcrt);
1546 kfree(np->cork.opt);
1547 np->cork.opt = NULL;
1548 }
1549
1550 if (inet->cork.base.dst) {
1551 dst_release(inet->cork.base.dst);
1552 inet->cork.base.dst = NULL;
1553 inet->cork.base.flags &= ~IPCORK_ALLFRAG;
1554 }
1555 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1556 }
1557
1558 int ip6_push_pending_frames(struct sock *sk)
1559 {
1560 struct sk_buff *skb, *tmp_skb;
1561 struct sk_buff **tail_skb;
1562 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1563 struct inet_sock *inet = inet_sk(sk);
1564 struct ipv6_pinfo *np = inet6_sk(sk);
1565 struct net *net = sock_net(sk);
1566 struct ipv6hdr *hdr;
1567 struct ipv6_txoptions *opt = np->cork.opt;
1568 struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst;
1569 struct flowi6 *fl6 = &inet->cork.fl.u.ip6;
1570 unsigned char proto = fl6->flowi6_proto;
1571 int err = 0;
1572
1573 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1574 goto out;
1575 tail_skb = &(skb_shinfo(skb)->frag_list);
1576
1577 /* move skb->data to ip header from ext header */
1578 if (skb->data < skb_network_header(skb))
1579 __skb_pull(skb, skb_network_offset(skb));
1580 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1581 __skb_pull(tmp_skb, skb_network_header_len(skb));
1582 *tail_skb = tmp_skb;
1583 tail_skb = &(tmp_skb->next);
1584 skb->len += tmp_skb->len;
1585 skb->data_len += tmp_skb->len;
1586 skb->truesize += tmp_skb->truesize;
1587 tmp_skb->destructor = NULL;
1588 tmp_skb->sk = NULL;
1589 }
1590
1591 /* Allow local fragmentation. */
1592 if (np->pmtudisc < IPV6_PMTUDISC_DO)
1593 skb->local_df = 1;
1594
1595 *final_dst = fl6->daddr;
1596 __skb_pull(skb, skb_network_header_len(skb));
1597 if (opt && opt->opt_flen)
1598 ipv6_push_frag_opts(skb, opt, &proto);
1599 if (opt && opt->opt_nflen)
1600 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1601
1602 skb_push(skb, sizeof(struct ipv6hdr));
1603 skb_reset_network_header(skb);
1604 hdr = ipv6_hdr(skb);
1605
1606 *(__be32*)hdr = fl6->flowlabel |
1607 htonl(0x60000000 | ((int)np->cork.tclass << 20));
1608
1609 hdr->hop_limit = np->cork.hop_limit;
1610 hdr->nexthdr = proto;
1611 hdr->saddr = fl6->saddr;
1612 hdr->daddr = *final_dst;
1613
1614 skb->priority = sk->sk_priority;
1615 skb->mark = sk->sk_mark;
1616
1617 skb_dst_set(skb, dst_clone(&rt->dst));
1618 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1619 if (proto == IPPROTO_ICMPV6) {
1620 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1621
1622 ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
1623 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
1624 }
1625
1626 err = ip6_local_out(skb);
1627 if (err) {
1628 if (err > 0)
1629 err = net_xmit_errno(err);
1630 if (err)
1631 goto error;
1632 }
1633
1634 out:
1635 ip6_cork_release(inet, np);
1636 return err;
1637 error:
1638 IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1639 goto out;
1640 }
1641
1642 void ip6_flush_pending_frames(struct sock *sk)
1643 {
1644 struct sk_buff *skb;
1645
1646 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1647 if (skb_dst(skb))
1648 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1649 IPSTATS_MIB_OUTDISCARDS);
1650 kfree_skb(skb);
1651 }
1652
1653 ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1654 }