]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blob - net/ipv6/ip6_output.c
070a2fae2375cd5f6c4ad8109887dc6236ee5b08
[mirror_ubuntu-zesty-kernel.git] / net / ipv6 / ip6_output.c
1 /*
2 * IPv6 output functions
3 * Linux INET6 implementation
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * Based on linux/net/ipv4/ip_output.c
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 *
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
21 *
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
27 */
28
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
40 #include <linux/slab.h>
41
42 #include <linux/netfilter.h>
43 #include <linux/netfilter_ipv6.h>
44
45 #include <net/sock.h>
46 #include <net/snmp.h>
47
48 #include <net/ipv6.h>
49 #include <net/ndisc.h>
50 #include <net/protocol.h>
51 #include <net/ip6_route.h>
52 #include <net/addrconf.h>
53 #include <net/rawv6.h>
54 #include <net/icmp.h>
55 #include <net/xfrm.h>
56 #include <net/checksum.h>
57 #include <linux/mroute6.h>
58
59 static int ip6_finish_output2(struct sk_buff *skb)
60 {
61 struct dst_entry *dst = skb_dst(skb);
62 struct net_device *dev = dst->dev;
63 struct neighbour *neigh;
64 struct in6_addr *nexthop;
65 int ret;
66
67 skb->protocol = htons(ETH_P_IPV6);
68 skb->dev = dev;
69
70 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
71 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
72
73 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
74 ((mroute6_socket(dev_net(dev), skb) &&
75 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
76 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
77 &ipv6_hdr(skb)->saddr))) {
78 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
79
80 /* Do not check for IFF_ALLMULTI; multicast routing
81 is not supported in any case.
82 */
83 if (newskb)
84 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
85 newskb, NULL, newskb->dev,
86 dev_loopback_xmit);
87
88 if (ipv6_hdr(skb)->hop_limit == 0) {
89 IP6_INC_STATS(dev_net(dev), idev,
90 IPSTATS_MIB_OUTDISCARDS);
91 kfree_skb(skb);
92 return 0;
93 }
94 }
95
96 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
97 skb->len);
98
99 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
100 IPV6_ADDR_SCOPE_NODELOCAL &&
101 !(dev->flags & IFF_LOOPBACK)) {
102 kfree_skb(skb);
103 return 0;
104 }
105 }
106
107 rcu_read_lock_bh();
108 nexthop = rt6_nexthop((struct rt6_info *)dst);
109 neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
110 if (unlikely(!neigh))
111 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
112 if (!IS_ERR(neigh)) {
113 ret = dst_neigh_output(dst, neigh, skb);
114 rcu_read_unlock_bh();
115 return ret;
116 }
117 rcu_read_unlock_bh();
118
119 IP6_INC_STATS(dev_net(dst->dev),
120 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
121 kfree_skb(skb);
122 return -EINVAL;
123 }
124
125 static int ip6_finish_output(struct sk_buff *skb)
126 {
127 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
128 dst_allfrag(skb_dst(skb)) ||
129 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
130 return ip6_fragment(skb, ip6_finish_output2);
131 else
132 return ip6_finish_output2(skb);
133 }
134
135 int ip6_output(struct sk_buff *skb)
136 {
137 struct net_device *dev = skb_dst(skb)->dev;
138 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
139 if (unlikely(idev->cnf.disable_ipv6)) {
140 IP6_INC_STATS(dev_net(dev), idev,
141 IPSTATS_MIB_OUTDISCARDS);
142 kfree_skb(skb);
143 return 0;
144 }
145
146 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
147 ip6_finish_output,
148 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
149 }
150
151 /*
152 * xmit an sk_buff (used by TCP, SCTP and DCCP)
153 */
154
155 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
156 struct ipv6_txoptions *opt, int tclass)
157 {
158 struct net *net = sock_net(sk);
159 struct ipv6_pinfo *np = inet6_sk(sk);
160 struct in6_addr *first_hop = &fl6->daddr;
161 struct dst_entry *dst = skb_dst(skb);
162 struct ipv6hdr *hdr;
163 u8 proto = fl6->flowi6_proto;
164 int seg_len = skb->len;
165 int hlimit = -1;
166 u32 mtu;
167
168 if (opt) {
169 unsigned int head_room;
170
171 /* First: exthdrs may take lots of space (~8K for now)
172 MAX_HEADER is not enough.
173 */
174 head_room = opt->opt_nflen + opt->opt_flen;
175 seg_len += head_room;
176 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
177
178 if (skb_headroom(skb) < head_room) {
179 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
180 if (skb2 == NULL) {
181 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
182 IPSTATS_MIB_OUTDISCARDS);
183 kfree_skb(skb);
184 return -ENOBUFS;
185 }
186 consume_skb(skb);
187 skb = skb2;
188 skb_set_owner_w(skb, sk);
189 }
190 if (opt->opt_flen)
191 ipv6_push_frag_opts(skb, opt, &proto);
192 if (opt->opt_nflen)
193 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
194 }
195
196 skb_push(skb, sizeof(struct ipv6hdr));
197 skb_reset_network_header(skb);
198 hdr = ipv6_hdr(skb);
199
200 /*
201 * Fill in the IPv6 header
202 */
203 if (np)
204 hlimit = np->hop_limit;
205 if (hlimit < 0)
206 hlimit = ip6_dst_hoplimit(dst);
207
208 ip6_flow_hdr(hdr, tclass, fl6->flowlabel);
209
210 hdr->payload_len = htons(seg_len);
211 hdr->nexthdr = proto;
212 hdr->hop_limit = hlimit;
213
214 hdr->saddr = fl6->saddr;
215 hdr->daddr = *first_hop;
216
217 skb->protocol = htons(ETH_P_IPV6);
218 skb->priority = sk->sk_priority;
219 skb->mark = sk->sk_mark;
220
221 mtu = dst_mtu(dst);
222 if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
223 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
224 IPSTATS_MIB_OUT, skb->len);
225 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
226 dst->dev, dst_output);
227 }
228
229 skb->dev = dst->dev;
230 ipv6_local_error(sk, EMSGSIZE, fl6, mtu);
231 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
232 kfree_skb(skb);
233 return -EMSGSIZE;
234 }
235
236 EXPORT_SYMBOL(ip6_xmit);
237
238 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
239 {
240 struct ip6_ra_chain *ra;
241 struct sock *last = NULL;
242
243 read_lock(&ip6_ra_lock);
244 for (ra = ip6_ra_chain; ra; ra = ra->next) {
245 struct sock *sk = ra->sk;
246 if (sk && ra->sel == sel &&
247 (!sk->sk_bound_dev_if ||
248 sk->sk_bound_dev_if == skb->dev->ifindex)) {
249 if (last) {
250 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
251 if (skb2)
252 rawv6_rcv(last, skb2);
253 }
254 last = sk;
255 }
256 }
257
258 if (last) {
259 rawv6_rcv(last, skb);
260 read_unlock(&ip6_ra_lock);
261 return 1;
262 }
263 read_unlock(&ip6_ra_lock);
264 return 0;
265 }
266
267 static int ip6_forward_proxy_check(struct sk_buff *skb)
268 {
269 struct ipv6hdr *hdr = ipv6_hdr(skb);
270 u8 nexthdr = hdr->nexthdr;
271 __be16 frag_off;
272 int offset;
273
274 if (ipv6_ext_hdr(nexthdr)) {
275 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
276 if (offset < 0)
277 return 0;
278 } else
279 offset = sizeof(struct ipv6hdr);
280
281 if (nexthdr == IPPROTO_ICMPV6) {
282 struct icmp6hdr *icmp6;
283
284 if (!pskb_may_pull(skb, (skb_network_header(skb) +
285 offset + 1 - skb->data)))
286 return 0;
287
288 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
289
290 switch (icmp6->icmp6_type) {
291 case NDISC_ROUTER_SOLICITATION:
292 case NDISC_ROUTER_ADVERTISEMENT:
293 case NDISC_NEIGHBOUR_SOLICITATION:
294 case NDISC_NEIGHBOUR_ADVERTISEMENT:
295 case NDISC_REDIRECT:
296 /* For reaction involving unicast neighbor discovery
297 * message destined to the proxied address, pass it to
298 * input function.
299 */
300 return 1;
301 default:
302 break;
303 }
304 }
305
306 /*
307 * The proxying router can't forward traffic sent to a link-local
308 * address, so signal the sender and discard the packet. This
309 * behavior is clarified by the MIPv6 specification.
310 */
311 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
312 dst_link_failure(skb);
313 return -1;
314 }
315
316 return 0;
317 }
318
319 static inline int ip6_forward_finish(struct sk_buff *skb)
320 {
321 return dst_output(skb);
322 }
323
324 static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
325 {
326 unsigned int mtu;
327 struct inet6_dev *idev;
328
329 if (dst_metric_locked(dst, RTAX_MTU)) {
330 mtu = dst_metric_raw(dst, RTAX_MTU);
331 if (mtu)
332 return mtu;
333 }
334
335 mtu = IPV6_MIN_MTU;
336 rcu_read_lock();
337 idev = __in6_dev_get(dst->dev);
338 if (idev)
339 mtu = idev->cnf.mtu6;
340 rcu_read_unlock();
341
342 return mtu;
343 }
344
345 static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
346 {
347 if (skb->len <= mtu || skb->local_df)
348 return false;
349
350 if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
351 return true;
352
353 if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
354 return false;
355
356 return true;
357 }
358
359 int ip6_forward(struct sk_buff *skb)
360 {
361 struct dst_entry *dst = skb_dst(skb);
362 struct ipv6hdr *hdr = ipv6_hdr(skb);
363 struct inet6_skb_parm *opt = IP6CB(skb);
364 struct net *net = dev_net(dst->dev);
365 u32 mtu;
366
367 if (net->ipv6.devconf_all->forwarding == 0)
368 goto error;
369
370 if (skb_warn_if_lro(skb))
371 goto drop;
372
373 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
374 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
375 IPSTATS_MIB_INDISCARDS);
376 goto drop;
377 }
378
379 if (skb->pkt_type != PACKET_HOST)
380 goto drop;
381
382 skb_forward_csum(skb);
383
384 /*
385 * We DO NOT make any processing on
386 * RA packets, pushing them to user level AS IS
387 * without ane WARRANTY that application will be able
388 * to interpret them. The reason is that we
389 * cannot make anything clever here.
390 *
391 * We are not end-node, so that if packet contains
392 * AH/ESP, we cannot make anything.
393 * Defragmentation also would be mistake, RA packets
394 * cannot be fragmented, because there is no warranty
395 * that different fragments will go along one path. --ANK
396 */
397 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
398 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
399 return 0;
400 }
401
402 /*
403 * check and decrement ttl
404 */
405 if (hdr->hop_limit <= 1) {
406 /* Force OUTPUT device used as source address */
407 skb->dev = dst->dev;
408 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
409 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
410 IPSTATS_MIB_INHDRERRORS);
411
412 kfree_skb(skb);
413 return -ETIMEDOUT;
414 }
415
416 /* XXX: idev->cnf.proxy_ndp? */
417 if (net->ipv6.devconf_all->proxy_ndp &&
418 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
419 int proxied = ip6_forward_proxy_check(skb);
420 if (proxied > 0)
421 return ip6_input(skb);
422 else if (proxied < 0) {
423 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
424 IPSTATS_MIB_INDISCARDS);
425 goto drop;
426 }
427 }
428
429 if (!xfrm6_route_forward(skb)) {
430 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
431 IPSTATS_MIB_INDISCARDS);
432 goto drop;
433 }
434 dst = skb_dst(skb);
435
436 /* IPv6 specs say nothing about it, but it is clear that we cannot
437 send redirects to source routed frames.
438 We don't send redirects to frames decapsulated from IPsec.
439 */
440 if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
441 struct in6_addr *target = NULL;
442 struct inet_peer *peer;
443 struct rt6_info *rt;
444
445 /*
446 * incoming and outgoing devices are the same
447 * send a redirect.
448 */
449
450 rt = (struct rt6_info *) dst;
451 if (rt->rt6i_flags & RTF_GATEWAY)
452 target = &rt->rt6i_gateway;
453 else
454 target = &hdr->daddr;
455
456 peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
457
458 /* Limit redirects both by destination (here)
459 and by source (inside ndisc_send_redirect)
460 */
461 if (inet_peer_xrlim_allow(peer, 1*HZ))
462 ndisc_send_redirect(skb, target);
463 if (peer)
464 inet_putpeer(peer);
465 } else {
466 int addrtype = ipv6_addr_type(&hdr->saddr);
467
468 /* This check is security critical. */
469 if (addrtype == IPV6_ADDR_ANY ||
470 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
471 goto error;
472 if (addrtype & IPV6_ADDR_LINKLOCAL) {
473 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
474 ICMPV6_NOT_NEIGHBOUR, 0);
475 goto error;
476 }
477 }
478
479 mtu = ip6_dst_mtu_forward(dst);
480 if (mtu < IPV6_MIN_MTU)
481 mtu = IPV6_MIN_MTU;
482
483 if (ip6_pkt_too_big(skb, mtu)) {
484 /* Again, force OUTPUT device used as source address */
485 skb->dev = dst->dev;
486 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
487 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
488 IPSTATS_MIB_INTOOBIGERRORS);
489 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
490 IPSTATS_MIB_FRAGFAILS);
491 kfree_skb(skb);
492 return -EMSGSIZE;
493 }
494
495 if (skb_cow(skb, dst->dev->hard_header_len)) {
496 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
497 IPSTATS_MIB_OUTDISCARDS);
498 goto drop;
499 }
500
501 hdr = ipv6_hdr(skb);
502
503 /* Mangling hops number delayed to point after skb COW */
504
505 hdr->hop_limit--;
506
507 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
508 IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
509 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
510 ip6_forward_finish);
511
512 error:
513 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
514 drop:
515 kfree_skb(skb);
516 return -EINVAL;
517 }
518
519 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
520 {
521 to->pkt_type = from->pkt_type;
522 to->priority = from->priority;
523 to->protocol = from->protocol;
524 skb_dst_drop(to);
525 skb_dst_set(to, dst_clone(skb_dst(from)));
526 to->dev = from->dev;
527 to->mark = from->mark;
528
529 #ifdef CONFIG_NET_SCHED
530 to->tc_index = from->tc_index;
531 #endif
532 nf_copy(to, from);
533 #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
534 to->nf_trace = from->nf_trace;
535 #endif
536 skb_copy_secmark(to, from);
537 }
538
539 int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
540 {
541 struct sk_buff *frag;
542 struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
543 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
544 struct ipv6hdr *tmp_hdr;
545 struct frag_hdr *fh;
546 unsigned int mtu, hlen, left, len;
547 int hroom, troom;
548 __be32 frag_id = 0;
549 int ptr, offset = 0, err=0;
550 u8 *prevhdr, nexthdr = 0;
551 struct net *net = dev_net(skb_dst(skb)->dev);
552
553 hlen = ip6_find_1stfragopt(skb, &prevhdr);
554 nexthdr = *prevhdr;
555
556 mtu = ip6_skb_dst_mtu(skb);
557
558 /* We must not fragment if the socket is set to force MTU discovery
559 * or if the skb it not generated by a local socket.
560 */
561 if (unlikely(!skb->local_df && skb->len > mtu) ||
562 (IP6CB(skb)->frag_max_size &&
563 IP6CB(skb)->frag_max_size > mtu)) {
564 if (skb->sk && dst_allfrag(skb_dst(skb)))
565 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
566
567 skb->dev = skb_dst(skb)->dev;
568 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
569 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
570 IPSTATS_MIB_FRAGFAILS);
571 kfree_skb(skb);
572 return -EMSGSIZE;
573 }
574
575 if (np && np->frag_size < mtu) {
576 if (np->frag_size)
577 mtu = np->frag_size;
578 }
579 mtu -= hlen + sizeof(struct frag_hdr);
580
581 if (skb_has_frag_list(skb)) {
582 int first_len = skb_pagelen(skb);
583 struct sk_buff *frag2;
584
585 if (first_len - hlen > mtu ||
586 ((first_len - hlen) & 7) ||
587 skb_cloned(skb))
588 goto slow_path;
589
590 skb_walk_frags(skb, frag) {
591 /* Correct geometry. */
592 if (frag->len > mtu ||
593 ((frag->len & 7) && frag->next) ||
594 skb_headroom(frag) < hlen)
595 goto slow_path_clean;
596
597 /* Partially cloned skb? */
598 if (skb_shared(frag))
599 goto slow_path_clean;
600
601 BUG_ON(frag->sk);
602 if (skb->sk) {
603 frag->sk = skb->sk;
604 frag->destructor = sock_wfree;
605 }
606 skb->truesize -= frag->truesize;
607 }
608
609 err = 0;
610 offset = 0;
611 frag = skb_shinfo(skb)->frag_list;
612 skb_frag_list_init(skb);
613 /* BUILD HEADER */
614
615 *prevhdr = NEXTHDR_FRAGMENT;
616 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
617 if (!tmp_hdr) {
618 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
619 IPSTATS_MIB_FRAGFAILS);
620 return -ENOMEM;
621 }
622
623 __skb_pull(skb, hlen);
624 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
625 __skb_push(skb, hlen);
626 skb_reset_network_header(skb);
627 memcpy(skb_network_header(skb), tmp_hdr, hlen);
628
629 ipv6_select_ident(fh, rt);
630 fh->nexthdr = nexthdr;
631 fh->reserved = 0;
632 fh->frag_off = htons(IP6_MF);
633 frag_id = fh->identification;
634
635 first_len = skb_pagelen(skb);
636 skb->data_len = first_len - skb_headlen(skb);
637 skb->len = first_len;
638 ipv6_hdr(skb)->payload_len = htons(first_len -
639 sizeof(struct ipv6hdr));
640
641 dst_hold(&rt->dst);
642
643 for (;;) {
644 /* Prepare header of the next frame,
645 * before previous one went down. */
646 if (frag) {
647 frag->ip_summed = CHECKSUM_NONE;
648 skb_reset_transport_header(frag);
649 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
650 __skb_push(frag, hlen);
651 skb_reset_network_header(frag);
652 memcpy(skb_network_header(frag), tmp_hdr,
653 hlen);
654 offset += skb->len - hlen - sizeof(struct frag_hdr);
655 fh->nexthdr = nexthdr;
656 fh->reserved = 0;
657 fh->frag_off = htons(offset);
658 if (frag->next != NULL)
659 fh->frag_off |= htons(IP6_MF);
660 fh->identification = frag_id;
661 ipv6_hdr(frag)->payload_len =
662 htons(frag->len -
663 sizeof(struct ipv6hdr));
664 ip6_copy_metadata(frag, skb);
665 }
666
667 err = output(skb);
668 if(!err)
669 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
670 IPSTATS_MIB_FRAGCREATES);
671
672 if (err || !frag)
673 break;
674
675 skb = frag;
676 frag = skb->next;
677 skb->next = NULL;
678 }
679
680 kfree(tmp_hdr);
681
682 if (err == 0) {
683 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
684 IPSTATS_MIB_FRAGOKS);
685 ip6_rt_put(rt);
686 return 0;
687 }
688
689 while (frag) {
690 skb = frag->next;
691 kfree_skb(frag);
692 frag = skb;
693 }
694
695 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
696 IPSTATS_MIB_FRAGFAILS);
697 ip6_rt_put(rt);
698 return err;
699
700 slow_path_clean:
701 skb_walk_frags(skb, frag2) {
702 if (frag2 == frag)
703 break;
704 frag2->sk = NULL;
705 frag2->destructor = NULL;
706 skb->truesize += frag2->truesize;
707 }
708 }
709
710 slow_path:
711 if ((skb->ip_summed == CHECKSUM_PARTIAL) &&
712 skb_checksum_help(skb))
713 goto fail;
714
715 left = skb->len - hlen; /* Space per frame */
716 ptr = hlen; /* Where to start from */
717
718 /*
719 * Fragment the datagram.
720 */
721
722 *prevhdr = NEXTHDR_FRAGMENT;
723 hroom = LL_RESERVED_SPACE(rt->dst.dev);
724 troom = rt->dst.dev->needed_tailroom;
725
726 /*
727 * Keep copying data until we run out.
728 */
729 while(left > 0) {
730 len = left;
731 /* IF: it doesn't fit, use 'mtu' - the data space left */
732 if (len > mtu)
733 len = mtu;
734 /* IF: we are not sending up to and including the packet end
735 then align the next start on an eight byte boundary */
736 if (len < left) {
737 len &= ~7;
738 }
739 /*
740 * Allocate buffer.
741 */
742
743 if ((frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
744 hroom + troom, GFP_ATOMIC)) == NULL) {
745 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
746 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
747 IPSTATS_MIB_FRAGFAILS);
748 err = -ENOMEM;
749 goto fail;
750 }
751
752 /*
753 * Set up data on packet
754 */
755
756 ip6_copy_metadata(frag, skb);
757 skb_reserve(frag, hroom);
758 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
759 skb_reset_network_header(frag);
760 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
761 frag->transport_header = (frag->network_header + hlen +
762 sizeof(struct frag_hdr));
763
764 /*
765 * Charge the memory for the fragment to any owner
766 * it might possess
767 */
768 if (skb->sk)
769 skb_set_owner_w(frag, skb->sk);
770
771 /*
772 * Copy the packet header into the new buffer.
773 */
774 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
775
776 /*
777 * Build fragment header.
778 */
779 fh->nexthdr = nexthdr;
780 fh->reserved = 0;
781 if (!frag_id) {
782 ipv6_select_ident(fh, rt);
783 frag_id = fh->identification;
784 } else
785 fh->identification = frag_id;
786
787 /*
788 * Copy a block of the IP datagram.
789 */
790 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
791 BUG();
792 left -= len;
793
794 fh->frag_off = htons(offset);
795 if (left > 0)
796 fh->frag_off |= htons(IP6_MF);
797 ipv6_hdr(frag)->payload_len = htons(frag->len -
798 sizeof(struct ipv6hdr));
799
800 ptr += len;
801 offset += len;
802
803 /*
804 * Put this fragment into the sending queue.
805 */
806 err = output(frag);
807 if (err)
808 goto fail;
809
810 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
811 IPSTATS_MIB_FRAGCREATES);
812 }
813 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
814 IPSTATS_MIB_FRAGOKS);
815 consume_skb(skb);
816 return err;
817
818 fail:
819 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
820 IPSTATS_MIB_FRAGFAILS);
821 kfree_skb(skb);
822 return err;
823 }
824
825 static inline int ip6_rt_check(const struct rt6key *rt_key,
826 const struct in6_addr *fl_addr,
827 const struct in6_addr *addr_cache)
828 {
829 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
830 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache));
831 }
832
833 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
834 struct dst_entry *dst,
835 const struct flowi6 *fl6)
836 {
837 struct ipv6_pinfo *np = inet6_sk(sk);
838 struct rt6_info *rt;
839
840 if (!dst)
841 goto out;
842
843 if (dst->ops->family != AF_INET6) {
844 dst_release(dst);
845 return NULL;
846 }
847
848 rt = (struct rt6_info *)dst;
849 /* Yes, checking route validity in not connected
850 * case is not very simple. Take into account,
851 * that we do not support routing by source, TOS,
852 * and MSG_DONTROUTE --ANK (980726)
853 *
854 * 1. ip6_rt_check(): If route was host route,
855 * check that cached destination is current.
856 * If it is network route, we still may
857 * check its validity using saved pointer
858 * to the last used address: daddr_cache.
859 * We do not want to save whole address now,
860 * (because main consumer of this service
861 * is tcp, which has not this problem),
862 * so that the last trick works only on connected
863 * sockets.
864 * 2. oif also should be the same.
865 */
866 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
867 #ifdef CONFIG_IPV6_SUBTREES
868 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
869 #endif
870 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
871 dst_release(dst);
872 dst = NULL;
873 }
874
875 out:
876 return dst;
877 }
878
879 static int ip6_dst_lookup_tail(struct sock *sk,
880 struct dst_entry **dst, struct flowi6 *fl6)
881 {
882 struct net *net = sock_net(sk);
883 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
884 struct neighbour *n;
885 struct rt6_info *rt;
886 #endif
887 int err;
888
889 if (*dst == NULL)
890 *dst = ip6_route_output(net, sk, fl6);
891
892 if ((err = (*dst)->error))
893 goto out_err_release;
894
895 if (ipv6_addr_any(&fl6->saddr)) {
896 struct rt6_info *rt = (struct rt6_info *) *dst;
897 err = ip6_route_get_saddr(net, rt, &fl6->daddr,
898 sk ? inet6_sk(sk)->srcprefs : 0,
899 &fl6->saddr);
900 if (err)
901 goto out_err_release;
902 }
903
904 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
905 /*
906 * Here if the dst entry we've looked up
907 * has a neighbour entry that is in the INCOMPLETE
908 * state and the src address from the flow is
909 * marked as OPTIMISTIC, we release the found
910 * dst entry and replace it instead with the
911 * dst entry of the nexthop router
912 */
913 rt = (struct rt6_info *) *dst;
914 rcu_read_lock_bh();
915 n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt));
916 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
917 rcu_read_unlock_bh();
918
919 if (err) {
920 struct inet6_ifaddr *ifp;
921 struct flowi6 fl_gw6;
922 int redirect;
923
924 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
925 (*dst)->dev, 1);
926
927 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
928 if (ifp)
929 in6_ifa_put(ifp);
930
931 if (redirect) {
932 /*
933 * We need to get the dst entry for the
934 * default router instead
935 */
936 dst_release(*dst);
937 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
938 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
939 *dst = ip6_route_output(net, sk, &fl_gw6);
940 if ((err = (*dst)->error))
941 goto out_err_release;
942 }
943 }
944 #endif
945
946 return 0;
947
948 out_err_release:
949 if (err == -ENETUNREACH)
950 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
951 dst_release(*dst);
952 *dst = NULL;
953 return err;
954 }
955
956 /**
957 * ip6_dst_lookup - perform route lookup on flow
958 * @sk: socket which provides route info
959 * @dst: pointer to dst_entry * for result
960 * @fl6: flow to lookup
961 *
962 * This function performs a route lookup on the given flow.
963 *
964 * It returns zero on success, or a standard errno code on error.
965 */
966 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6)
967 {
968 *dst = NULL;
969 return ip6_dst_lookup_tail(sk, dst, fl6);
970 }
971 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
972
973 /**
974 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
975 * @sk: socket which provides route info
976 * @fl6: flow to lookup
977 * @final_dst: final destination address for ipsec lookup
978 *
979 * This function performs a route lookup on the given flow.
980 *
981 * It returns a valid dst pointer on success, or a pointer encoded
982 * error code.
983 */
984 struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
985 const struct in6_addr *final_dst)
986 {
987 struct dst_entry *dst = NULL;
988 int err;
989
990 err = ip6_dst_lookup_tail(sk, &dst, fl6);
991 if (err)
992 return ERR_PTR(err);
993 if (final_dst)
994 fl6->daddr = *final_dst;
995
996 return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
997 }
998 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
999
1000 /**
1001 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1002 * @sk: socket which provides the dst cache and route info
1003 * @fl6: flow to lookup
1004 * @final_dst: final destination address for ipsec lookup
1005 *
1006 * This function performs a route lookup on the given flow with the
1007 * possibility of using the cached route in the socket if it is valid.
1008 * It will take the socket dst lock when operating on the dst cache.
1009 * As a result, this function can only be used in process context.
1010 *
1011 * It returns a valid dst pointer on success, or a pointer encoded
1012 * error code.
1013 */
1014 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1015 const struct in6_addr *final_dst)
1016 {
1017 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1018 int err;
1019
1020 dst = ip6_sk_dst_check(sk, dst, fl6);
1021
1022 err = ip6_dst_lookup_tail(sk, &dst, fl6);
1023 if (err)
1024 return ERR_PTR(err);
1025 if (final_dst)
1026 fl6->daddr = *final_dst;
1027
1028 return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1029 }
1030 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1031
1032 static inline int ip6_ufo_append_data(struct sock *sk,
1033 int getfrag(void *from, char *to, int offset, int len,
1034 int odd, struct sk_buff *skb),
1035 void *from, int length, int hh_len, int fragheaderlen,
1036 int transhdrlen, int mtu,unsigned int flags,
1037 struct rt6_info *rt)
1038
1039 {
1040 struct sk_buff *skb;
1041 struct frag_hdr fhdr;
1042 int err;
1043
1044 /* There is support for UDP large send offload by network
1045 * device, so create one single skb packet containing complete
1046 * udp datagram
1047 */
1048 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1049 skb = sock_alloc_send_skb(sk,
1050 hh_len + fragheaderlen + transhdrlen + 20,
1051 (flags & MSG_DONTWAIT), &err);
1052 if (skb == NULL)
1053 return err;
1054
1055 /* reserve space for Hardware header */
1056 skb_reserve(skb, hh_len);
1057
1058 /* create space for UDP/IP header */
1059 skb_put(skb,fragheaderlen + transhdrlen);
1060
1061 /* initialize network header pointer */
1062 skb_reset_network_header(skb);
1063
1064 /* initialize protocol header pointer */
1065 skb->transport_header = skb->network_header + fragheaderlen;
1066
1067 skb->protocol = htons(ETH_P_IPV6);
1068 skb->csum = 0;
1069
1070 __skb_queue_tail(&sk->sk_write_queue, skb);
1071 } else if (skb_is_gso(skb)) {
1072 goto append;
1073 }
1074
1075 skb->ip_summed = CHECKSUM_PARTIAL;
1076 /* Specify the length of each IPv6 datagram fragment.
1077 * It has to be a multiple of 8.
1078 */
1079 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1080 sizeof(struct frag_hdr)) & ~7;
1081 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1082 ipv6_select_ident(&fhdr, rt);
1083 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1084
1085 append:
1086 return skb_append_datato_frags(sk, skb, getfrag, from,
1087 (length - transhdrlen));
1088 }
1089
1090 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1091 gfp_t gfp)
1092 {
1093 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1094 }
1095
1096 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1097 gfp_t gfp)
1098 {
1099 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1100 }
1101
1102 static void ip6_append_data_mtu(unsigned int *mtu,
1103 int *maxfraglen,
1104 unsigned int fragheaderlen,
1105 struct sk_buff *skb,
1106 struct rt6_info *rt,
1107 bool pmtuprobe)
1108 {
1109 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1110 if (skb == NULL) {
1111 /* first fragment, reserve header_len */
1112 *mtu = *mtu - rt->dst.header_len;
1113
1114 } else {
1115 /*
1116 * this fragment is not first, the headers
1117 * space is regarded as data space.
1118 */
1119 *mtu = min(*mtu, pmtuprobe ?
1120 rt->dst.dev->mtu :
1121 dst_mtu(rt->dst.path));
1122 }
1123 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1124 + fragheaderlen - sizeof(struct frag_hdr);
1125 }
1126 }
1127
1128 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1129 int offset, int len, int odd, struct sk_buff *skb),
1130 void *from, int length, int transhdrlen,
1131 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
1132 struct rt6_info *rt, unsigned int flags, int dontfrag)
1133 {
1134 struct inet_sock *inet = inet_sk(sk);
1135 struct ipv6_pinfo *np = inet6_sk(sk);
1136 struct inet_cork *cork;
1137 struct sk_buff *skb, *skb_prev = NULL;
1138 unsigned int maxfraglen, fragheaderlen, mtu;
1139 int exthdrlen;
1140 int dst_exthdrlen;
1141 int hh_len;
1142 int copy;
1143 int err;
1144 int offset = 0;
1145 __u8 tx_flags = 0;
1146
1147 if (flags&MSG_PROBE)
1148 return 0;
1149 cork = &inet->cork.base;
1150 if (skb_queue_empty(&sk->sk_write_queue)) {
1151 /*
1152 * setup for corking
1153 */
1154 if (opt) {
1155 if (WARN_ON(np->cork.opt))
1156 return -EINVAL;
1157
1158 np->cork.opt = kzalloc(opt->tot_len, sk->sk_allocation);
1159 if (unlikely(np->cork.opt == NULL))
1160 return -ENOBUFS;
1161
1162 np->cork.opt->tot_len = opt->tot_len;
1163 np->cork.opt->opt_flen = opt->opt_flen;
1164 np->cork.opt->opt_nflen = opt->opt_nflen;
1165
1166 np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1167 sk->sk_allocation);
1168 if (opt->dst0opt && !np->cork.opt->dst0opt)
1169 return -ENOBUFS;
1170
1171 np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1172 sk->sk_allocation);
1173 if (opt->dst1opt && !np->cork.opt->dst1opt)
1174 return -ENOBUFS;
1175
1176 np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1177 sk->sk_allocation);
1178 if (opt->hopopt && !np->cork.opt->hopopt)
1179 return -ENOBUFS;
1180
1181 np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1182 sk->sk_allocation);
1183 if (opt->srcrt && !np->cork.opt->srcrt)
1184 return -ENOBUFS;
1185
1186 /* need source address above miyazawa*/
1187 }
1188 dst_hold(&rt->dst);
1189 cork->dst = &rt->dst;
1190 inet->cork.fl.u.ip6 = *fl6;
1191 np->cork.hop_limit = hlimit;
1192 np->cork.tclass = tclass;
1193 if (rt->dst.flags & DST_XFRM_TUNNEL)
1194 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1195 rt->dst.dev->mtu : dst_mtu(&rt->dst);
1196 else
1197 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1198 rt->dst.dev->mtu : dst_mtu(rt->dst.path);
1199 if (np->frag_size < mtu) {
1200 if (np->frag_size)
1201 mtu = np->frag_size;
1202 }
1203 cork->fragsize = mtu;
1204 if (dst_allfrag(rt->dst.path))
1205 cork->flags |= IPCORK_ALLFRAG;
1206 cork->length = 0;
1207 exthdrlen = (opt ? opt->opt_flen : 0);
1208 length += exthdrlen;
1209 transhdrlen += exthdrlen;
1210 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1211 } else {
1212 rt = (struct rt6_info *)cork->dst;
1213 fl6 = &inet->cork.fl.u.ip6;
1214 opt = np->cork.opt;
1215 transhdrlen = 0;
1216 exthdrlen = 0;
1217 dst_exthdrlen = 0;
1218 mtu = cork->fragsize;
1219 }
1220
1221 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1222
1223 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1224 (opt ? opt->opt_nflen : 0);
1225 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1226 sizeof(struct frag_hdr);
1227
1228 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1229 unsigned int maxnonfragsize, headersize;
1230
1231 headersize = sizeof(struct ipv6hdr) +
1232 (opt ? opt->tot_len : 0) +
1233 (dst_allfrag(&rt->dst) ?
1234 sizeof(struct frag_hdr) : 0) +
1235 rt->rt6i_nfheader_len;
1236
1237 maxnonfragsize = (np->pmtudisc >= IPV6_PMTUDISC_DO) ?
1238 mtu : sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1239
1240 /* dontfrag active */
1241 if ((cork->length + length > mtu - headersize) && dontfrag &&
1242 (sk->sk_protocol == IPPROTO_UDP ||
1243 sk->sk_protocol == IPPROTO_RAW)) {
1244 ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1245 sizeof(struct ipv6hdr));
1246 goto emsgsize;
1247 }
1248
1249 if (cork->length + length > maxnonfragsize - headersize) {
1250 emsgsize:
1251 ipv6_local_error(sk, EMSGSIZE, fl6,
1252 mtu - headersize +
1253 sizeof(struct ipv6hdr));
1254 return -EMSGSIZE;
1255 }
1256 }
1257
1258 /* For UDP, check if TX timestamp is enabled */
1259 if (sk->sk_type == SOCK_DGRAM)
1260 sock_tx_timestamp(sk, &tx_flags);
1261
1262 /*
1263 * Let's try using as much space as possible.
1264 * Use MTU if total length of the message fits into the MTU.
1265 * Otherwise, we need to reserve fragment header and
1266 * fragment alignment (= 8-15 octects, in total).
1267 *
1268 * Note that we may need to "move" the data from the tail of
1269 * of the buffer to the new fragment when we split
1270 * the message.
1271 *
1272 * FIXME: It may be fragmented into multiple chunks
1273 * at once if non-fragmentable extension headers
1274 * are too large.
1275 * --yoshfuji
1276 */
1277
1278 skb = skb_peek_tail(&sk->sk_write_queue);
1279 cork->length += length;
1280 if (((length > mtu) ||
1281 (skb && skb_is_gso(skb))) &&
1282 (sk->sk_protocol == IPPROTO_UDP) &&
1283 (rt->dst.dev->features & NETIF_F_UFO)) {
1284 err = ip6_ufo_append_data(sk, getfrag, from, length,
1285 hh_len, fragheaderlen,
1286 transhdrlen, mtu, flags, rt);
1287 if (err)
1288 goto error;
1289 return 0;
1290 }
1291
1292 if (!skb)
1293 goto alloc_new_skb;
1294
1295 while (length > 0) {
1296 /* Check if the remaining data fits into current packet. */
1297 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1298 if (copy < length)
1299 copy = maxfraglen - skb->len;
1300
1301 if (copy <= 0) {
1302 char *data;
1303 unsigned int datalen;
1304 unsigned int fraglen;
1305 unsigned int fraggap;
1306 unsigned int alloclen;
1307 alloc_new_skb:
1308 /* There's no room in the current skb */
1309 if (skb)
1310 fraggap = skb->len - maxfraglen;
1311 else
1312 fraggap = 0;
1313 /* update mtu and maxfraglen if necessary */
1314 if (skb == NULL || skb_prev == NULL)
1315 ip6_append_data_mtu(&mtu, &maxfraglen,
1316 fragheaderlen, skb, rt,
1317 np->pmtudisc >=
1318 IPV6_PMTUDISC_PROBE);
1319
1320 skb_prev = skb;
1321
1322 /*
1323 * If remaining data exceeds the mtu,
1324 * we know we need more fragment(s).
1325 */
1326 datalen = length + fraggap;
1327
1328 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1329 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1330 if ((flags & MSG_MORE) &&
1331 !(rt->dst.dev->features&NETIF_F_SG))
1332 alloclen = mtu;
1333 else
1334 alloclen = datalen + fragheaderlen;
1335
1336 alloclen += dst_exthdrlen;
1337
1338 if (datalen != length + fraggap) {
1339 /*
1340 * this is not the last fragment, the trailer
1341 * space is regarded as data space.
1342 */
1343 datalen += rt->dst.trailer_len;
1344 }
1345
1346 alloclen += rt->dst.trailer_len;
1347 fraglen = datalen + fragheaderlen;
1348
1349 /*
1350 * We just reserve space for fragment header.
1351 * Note: this may be overallocation if the message
1352 * (without MSG_MORE) fits into the MTU.
1353 */
1354 alloclen += sizeof(struct frag_hdr);
1355
1356 if (transhdrlen) {
1357 skb = sock_alloc_send_skb(sk,
1358 alloclen + hh_len,
1359 (flags & MSG_DONTWAIT), &err);
1360 } else {
1361 skb = NULL;
1362 if (atomic_read(&sk->sk_wmem_alloc) <=
1363 2 * sk->sk_sndbuf)
1364 skb = sock_wmalloc(sk,
1365 alloclen + hh_len, 1,
1366 sk->sk_allocation);
1367 if (unlikely(skb == NULL))
1368 err = -ENOBUFS;
1369 else {
1370 /* Only the initial fragment
1371 * is time stamped.
1372 */
1373 tx_flags = 0;
1374 }
1375 }
1376 if (skb == NULL)
1377 goto error;
1378 /*
1379 * Fill in the control structures
1380 */
1381 skb->protocol = htons(ETH_P_IPV6);
1382 skb->ip_summed = CHECKSUM_NONE;
1383 skb->csum = 0;
1384 /* reserve for fragmentation and ipsec header */
1385 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1386 dst_exthdrlen);
1387
1388 if (sk->sk_type == SOCK_DGRAM)
1389 skb_shinfo(skb)->tx_flags = tx_flags;
1390
1391 /*
1392 * Find where to start putting bytes
1393 */
1394 data = skb_put(skb, fraglen);
1395 skb_set_network_header(skb, exthdrlen);
1396 data += fragheaderlen;
1397 skb->transport_header = (skb->network_header +
1398 fragheaderlen);
1399 if (fraggap) {
1400 skb->csum = skb_copy_and_csum_bits(
1401 skb_prev, maxfraglen,
1402 data + transhdrlen, fraggap, 0);
1403 skb_prev->csum = csum_sub(skb_prev->csum,
1404 skb->csum);
1405 data += fraggap;
1406 pskb_trim_unique(skb_prev, maxfraglen);
1407 }
1408 copy = datalen - transhdrlen - fraggap;
1409
1410 if (copy < 0) {
1411 err = -EINVAL;
1412 kfree_skb(skb);
1413 goto error;
1414 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1415 err = -EFAULT;
1416 kfree_skb(skb);
1417 goto error;
1418 }
1419
1420 offset += copy;
1421 length -= datalen - fraggap;
1422 transhdrlen = 0;
1423 exthdrlen = 0;
1424 dst_exthdrlen = 0;
1425
1426 /*
1427 * Put the packet on the pending queue
1428 */
1429 __skb_queue_tail(&sk->sk_write_queue, skb);
1430 continue;
1431 }
1432
1433 if (copy > length)
1434 copy = length;
1435
1436 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1437 unsigned int off;
1438
1439 off = skb->len;
1440 if (getfrag(from, skb_put(skb, copy),
1441 offset, copy, off, skb) < 0) {
1442 __skb_trim(skb, off);
1443 err = -EFAULT;
1444 goto error;
1445 }
1446 } else {
1447 int i = skb_shinfo(skb)->nr_frags;
1448 struct page_frag *pfrag = sk_page_frag(sk);
1449
1450 err = -ENOMEM;
1451 if (!sk_page_frag_refill(sk, pfrag))
1452 goto error;
1453
1454 if (!skb_can_coalesce(skb, i, pfrag->page,
1455 pfrag->offset)) {
1456 err = -EMSGSIZE;
1457 if (i == MAX_SKB_FRAGS)
1458 goto error;
1459
1460 __skb_fill_page_desc(skb, i, pfrag->page,
1461 pfrag->offset, 0);
1462 skb_shinfo(skb)->nr_frags = ++i;
1463 get_page(pfrag->page);
1464 }
1465 copy = min_t(int, copy, pfrag->size - pfrag->offset);
1466 if (getfrag(from,
1467 page_address(pfrag->page) + pfrag->offset,
1468 offset, copy, skb->len, skb) < 0)
1469 goto error_efault;
1470
1471 pfrag->offset += copy;
1472 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1473 skb->len += copy;
1474 skb->data_len += copy;
1475 skb->truesize += copy;
1476 atomic_add(copy, &sk->sk_wmem_alloc);
1477 }
1478 offset += copy;
1479 length -= copy;
1480 }
1481
1482 return 0;
1483
1484 error_efault:
1485 err = -EFAULT;
1486 error:
1487 cork->length -= length;
1488 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1489 return err;
1490 }
1491 EXPORT_SYMBOL_GPL(ip6_append_data);
1492
1493 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1494 {
1495 if (np->cork.opt) {
1496 kfree(np->cork.opt->dst0opt);
1497 kfree(np->cork.opt->dst1opt);
1498 kfree(np->cork.opt->hopopt);
1499 kfree(np->cork.opt->srcrt);
1500 kfree(np->cork.opt);
1501 np->cork.opt = NULL;
1502 }
1503
1504 if (inet->cork.base.dst) {
1505 dst_release(inet->cork.base.dst);
1506 inet->cork.base.dst = NULL;
1507 inet->cork.base.flags &= ~IPCORK_ALLFRAG;
1508 }
1509 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1510 }
1511
1512 int ip6_push_pending_frames(struct sock *sk)
1513 {
1514 struct sk_buff *skb, *tmp_skb;
1515 struct sk_buff **tail_skb;
1516 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1517 struct inet_sock *inet = inet_sk(sk);
1518 struct ipv6_pinfo *np = inet6_sk(sk);
1519 struct net *net = sock_net(sk);
1520 struct ipv6hdr *hdr;
1521 struct ipv6_txoptions *opt = np->cork.opt;
1522 struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst;
1523 struct flowi6 *fl6 = &inet->cork.fl.u.ip6;
1524 unsigned char proto = fl6->flowi6_proto;
1525 int err = 0;
1526
1527 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1528 goto out;
1529 tail_skb = &(skb_shinfo(skb)->frag_list);
1530
1531 /* move skb->data to ip header from ext header */
1532 if (skb->data < skb_network_header(skb))
1533 __skb_pull(skb, skb_network_offset(skb));
1534 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1535 __skb_pull(tmp_skb, skb_network_header_len(skb));
1536 *tail_skb = tmp_skb;
1537 tail_skb = &(tmp_skb->next);
1538 skb->len += tmp_skb->len;
1539 skb->data_len += tmp_skb->len;
1540 skb->truesize += tmp_skb->truesize;
1541 tmp_skb->destructor = NULL;
1542 tmp_skb->sk = NULL;
1543 }
1544
1545 /* Allow local fragmentation. */
1546 if (np->pmtudisc < IPV6_PMTUDISC_DO)
1547 skb->local_df = 1;
1548
1549 *final_dst = fl6->daddr;
1550 __skb_pull(skb, skb_network_header_len(skb));
1551 if (opt && opt->opt_flen)
1552 ipv6_push_frag_opts(skb, opt, &proto);
1553 if (opt && opt->opt_nflen)
1554 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1555
1556 skb_push(skb, sizeof(struct ipv6hdr));
1557 skb_reset_network_header(skb);
1558 hdr = ipv6_hdr(skb);
1559
1560 ip6_flow_hdr(hdr, np->cork.tclass, fl6->flowlabel);
1561 hdr->hop_limit = np->cork.hop_limit;
1562 hdr->nexthdr = proto;
1563 hdr->saddr = fl6->saddr;
1564 hdr->daddr = *final_dst;
1565
1566 skb->priority = sk->sk_priority;
1567 skb->mark = sk->sk_mark;
1568
1569 skb_dst_set(skb, dst_clone(&rt->dst));
1570 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1571 if (proto == IPPROTO_ICMPV6) {
1572 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1573
1574 ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
1575 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
1576 }
1577
1578 err = ip6_local_out(skb);
1579 if (err) {
1580 if (err > 0)
1581 err = net_xmit_errno(err);
1582 if (err)
1583 goto error;
1584 }
1585
1586 out:
1587 ip6_cork_release(inet, np);
1588 return err;
1589 error:
1590 IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1591 goto out;
1592 }
1593 EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1594
1595 void ip6_flush_pending_frames(struct sock *sk)
1596 {
1597 struct sk_buff *skb;
1598
1599 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1600 if (skb_dst(skb))
1601 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1602 IPSTATS_MIB_OUTDISCARDS);
1603 kfree_skb(skb);
1604 }
1605
1606 ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1607 }
1608 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);