]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - net/ipv6/ip6_output.c
Merge remote-tracking branches 'asoc/topic/tlv320aic3x', 'asoc/topic/width', 'asoc...
[mirror_ubuntu-bionic-kernel.git] / net / ipv6 / ip6_output.c
1 /*
2 * IPv6 output functions
3 * Linux INET6 implementation
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * Based on linux/net/ipv4/ip_output.c
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 *
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
21 *
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
27 */
28
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
40 #include <linux/slab.h>
41
42 #include <linux/netfilter.h>
43 #include <linux/netfilter_ipv6.h>
44
45 #include <net/sock.h>
46 #include <net/snmp.h>
47
48 #include <net/ipv6.h>
49 #include <net/ndisc.h>
50 #include <net/protocol.h>
51 #include <net/ip6_route.h>
52 #include <net/addrconf.h>
53 #include <net/rawv6.h>
54 #include <net/icmp.h>
55 #include <net/xfrm.h>
56 #include <net/checksum.h>
57 #include <linux/mroute6.h>
58
59 static int ip6_finish_output2(struct sk_buff *skb)
60 {
61 struct dst_entry *dst = skb_dst(skb);
62 struct net_device *dev = dst->dev;
63 struct neighbour *neigh;
64 struct in6_addr *nexthop;
65 int ret;
66
67 skb->protocol = htons(ETH_P_IPV6);
68 skb->dev = dev;
69
70 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
71 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
72
73 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
74 ((mroute6_socket(dev_net(dev), skb) &&
75 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
76 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
77 &ipv6_hdr(skb)->saddr))) {
78 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
79
80 /* Do not check for IFF_ALLMULTI; multicast routing
81 is not supported in any case.
82 */
83 if (newskb)
84 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
85 newskb, NULL, newskb->dev,
86 dev_loopback_xmit);
87
88 if (ipv6_hdr(skb)->hop_limit == 0) {
89 IP6_INC_STATS(dev_net(dev), idev,
90 IPSTATS_MIB_OUTDISCARDS);
91 kfree_skb(skb);
92 return 0;
93 }
94 }
95
96 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
97 skb->len);
98
99 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
100 IPV6_ADDR_SCOPE_NODELOCAL &&
101 !(dev->flags & IFF_LOOPBACK)) {
102 kfree_skb(skb);
103 return 0;
104 }
105 }
106
107 rcu_read_lock_bh();
108 nexthop = rt6_nexthop((struct rt6_info *)dst);
109 neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
110 if (unlikely(!neigh))
111 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
112 if (!IS_ERR(neigh)) {
113 ret = dst_neigh_output(dst, neigh, skb);
114 rcu_read_unlock_bh();
115 return ret;
116 }
117 rcu_read_unlock_bh();
118
119 IP6_INC_STATS(dev_net(dst->dev),
120 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
121 kfree_skb(skb);
122 return -EINVAL;
123 }
124
125 static int ip6_finish_output(struct sk_buff *skb)
126 {
127 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
128 dst_allfrag(skb_dst(skb)) ||
129 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
130 return ip6_fragment(skb, ip6_finish_output2);
131 else
132 return ip6_finish_output2(skb);
133 }
134
135 int ip6_output(struct sock *sk, struct sk_buff *skb)
136 {
137 struct net_device *dev = skb_dst(skb)->dev;
138 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
139 if (unlikely(idev->cnf.disable_ipv6)) {
140 IP6_INC_STATS(dev_net(dev), idev,
141 IPSTATS_MIB_OUTDISCARDS);
142 kfree_skb(skb);
143 return 0;
144 }
145
146 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
147 ip6_finish_output,
148 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
149 }
150
151 /*
152 * xmit an sk_buff (used by TCP, SCTP and DCCP)
153 */
154
155 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
156 struct ipv6_txoptions *opt, int tclass)
157 {
158 struct net *net = sock_net(sk);
159 struct ipv6_pinfo *np = inet6_sk(sk);
160 struct in6_addr *first_hop = &fl6->daddr;
161 struct dst_entry *dst = skb_dst(skb);
162 struct ipv6hdr *hdr;
163 u8 proto = fl6->flowi6_proto;
164 int seg_len = skb->len;
165 int hlimit = -1;
166 u32 mtu;
167
168 if (opt) {
169 unsigned int head_room;
170
171 /* First: exthdrs may take lots of space (~8K for now)
172 MAX_HEADER is not enough.
173 */
174 head_room = opt->opt_nflen + opt->opt_flen;
175 seg_len += head_room;
176 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
177
178 if (skb_headroom(skb) < head_room) {
179 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
180 if (skb2 == NULL) {
181 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
182 IPSTATS_MIB_OUTDISCARDS);
183 kfree_skb(skb);
184 return -ENOBUFS;
185 }
186 consume_skb(skb);
187 skb = skb2;
188 skb_set_owner_w(skb, sk);
189 }
190 if (opt->opt_flen)
191 ipv6_push_frag_opts(skb, opt, &proto);
192 if (opt->opt_nflen)
193 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
194 }
195
196 skb_push(skb, sizeof(struct ipv6hdr));
197 skb_reset_network_header(skb);
198 hdr = ipv6_hdr(skb);
199
200 /*
201 * Fill in the IPv6 header
202 */
203 if (np)
204 hlimit = np->hop_limit;
205 if (hlimit < 0)
206 hlimit = ip6_dst_hoplimit(dst);
207
208 ip6_flow_hdr(hdr, tclass, fl6->flowlabel);
209
210 hdr->payload_len = htons(seg_len);
211 hdr->nexthdr = proto;
212 hdr->hop_limit = hlimit;
213
214 hdr->saddr = fl6->saddr;
215 hdr->daddr = *first_hop;
216
217 skb->protocol = htons(ETH_P_IPV6);
218 skb->priority = sk->sk_priority;
219 skb->mark = sk->sk_mark;
220
221 mtu = dst_mtu(dst);
222 if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
223 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
224 IPSTATS_MIB_OUT, skb->len);
225 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
226 dst->dev, dst_output);
227 }
228
229 skb->dev = dst->dev;
230 ipv6_local_error(sk, EMSGSIZE, fl6, mtu);
231 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
232 kfree_skb(skb);
233 return -EMSGSIZE;
234 }
235
236 EXPORT_SYMBOL(ip6_xmit);
237
238 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
239 {
240 struct ip6_ra_chain *ra;
241 struct sock *last = NULL;
242
243 read_lock(&ip6_ra_lock);
244 for (ra = ip6_ra_chain; ra; ra = ra->next) {
245 struct sock *sk = ra->sk;
246 if (sk && ra->sel == sel &&
247 (!sk->sk_bound_dev_if ||
248 sk->sk_bound_dev_if == skb->dev->ifindex)) {
249 if (last) {
250 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
251 if (skb2)
252 rawv6_rcv(last, skb2);
253 }
254 last = sk;
255 }
256 }
257
258 if (last) {
259 rawv6_rcv(last, skb);
260 read_unlock(&ip6_ra_lock);
261 return 1;
262 }
263 read_unlock(&ip6_ra_lock);
264 return 0;
265 }
266
267 static int ip6_forward_proxy_check(struct sk_buff *skb)
268 {
269 struct ipv6hdr *hdr = ipv6_hdr(skb);
270 u8 nexthdr = hdr->nexthdr;
271 __be16 frag_off;
272 int offset;
273
274 if (ipv6_ext_hdr(nexthdr)) {
275 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
276 if (offset < 0)
277 return 0;
278 } else
279 offset = sizeof(struct ipv6hdr);
280
281 if (nexthdr == IPPROTO_ICMPV6) {
282 struct icmp6hdr *icmp6;
283
284 if (!pskb_may_pull(skb, (skb_network_header(skb) +
285 offset + 1 - skb->data)))
286 return 0;
287
288 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
289
290 switch (icmp6->icmp6_type) {
291 case NDISC_ROUTER_SOLICITATION:
292 case NDISC_ROUTER_ADVERTISEMENT:
293 case NDISC_NEIGHBOUR_SOLICITATION:
294 case NDISC_NEIGHBOUR_ADVERTISEMENT:
295 case NDISC_REDIRECT:
296 /* For reaction involving unicast neighbor discovery
297 * message destined to the proxied address, pass it to
298 * input function.
299 */
300 return 1;
301 default:
302 break;
303 }
304 }
305
306 /*
307 * The proxying router can't forward traffic sent to a link-local
308 * address, so signal the sender and discard the packet. This
309 * behavior is clarified by the MIPv6 specification.
310 */
311 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
312 dst_link_failure(skb);
313 return -1;
314 }
315
316 return 0;
317 }
318
319 static inline int ip6_forward_finish(struct sk_buff *skb)
320 {
321 return dst_output(skb);
322 }
323
324 static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
325 {
326 unsigned int mtu;
327 struct inet6_dev *idev;
328
329 if (dst_metric_locked(dst, RTAX_MTU)) {
330 mtu = dst_metric_raw(dst, RTAX_MTU);
331 if (mtu)
332 return mtu;
333 }
334
335 mtu = IPV6_MIN_MTU;
336 rcu_read_lock();
337 idev = __in6_dev_get(dst->dev);
338 if (idev)
339 mtu = idev->cnf.mtu6;
340 rcu_read_unlock();
341
342 return mtu;
343 }
344
345 static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
346 {
347 if (skb->len <= mtu)
348 return false;
349
350 /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
351 if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
352 return true;
353
354 if (skb->ignore_df)
355 return false;
356
357 if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
358 return false;
359
360 return true;
361 }
362
363 int ip6_forward(struct sk_buff *skb)
364 {
365 struct dst_entry *dst = skb_dst(skb);
366 struct ipv6hdr *hdr = ipv6_hdr(skb);
367 struct inet6_skb_parm *opt = IP6CB(skb);
368 struct net *net = dev_net(dst->dev);
369 u32 mtu;
370
371 if (net->ipv6.devconf_all->forwarding == 0)
372 goto error;
373
374 if (skb->pkt_type != PACKET_HOST)
375 goto drop;
376
377 if (skb_warn_if_lro(skb))
378 goto drop;
379
380 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
381 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
382 IPSTATS_MIB_INDISCARDS);
383 goto drop;
384 }
385
386 skb_forward_csum(skb);
387
388 /*
389 * We DO NOT make any processing on
390 * RA packets, pushing them to user level AS IS
391 * without ane WARRANTY that application will be able
392 * to interpret them. The reason is that we
393 * cannot make anything clever here.
394 *
395 * We are not end-node, so that if packet contains
396 * AH/ESP, we cannot make anything.
397 * Defragmentation also would be mistake, RA packets
398 * cannot be fragmented, because there is no warranty
399 * that different fragments will go along one path. --ANK
400 */
401 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
402 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
403 return 0;
404 }
405
406 /*
407 * check and decrement ttl
408 */
409 if (hdr->hop_limit <= 1) {
410 /* Force OUTPUT device used as source address */
411 skb->dev = dst->dev;
412 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
413 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
414 IPSTATS_MIB_INHDRERRORS);
415
416 kfree_skb(skb);
417 return -ETIMEDOUT;
418 }
419
420 /* XXX: idev->cnf.proxy_ndp? */
421 if (net->ipv6.devconf_all->proxy_ndp &&
422 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
423 int proxied = ip6_forward_proxy_check(skb);
424 if (proxied > 0)
425 return ip6_input(skb);
426 else if (proxied < 0) {
427 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
428 IPSTATS_MIB_INDISCARDS);
429 goto drop;
430 }
431 }
432
433 if (!xfrm6_route_forward(skb)) {
434 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
435 IPSTATS_MIB_INDISCARDS);
436 goto drop;
437 }
438 dst = skb_dst(skb);
439
440 /* IPv6 specs say nothing about it, but it is clear that we cannot
441 send redirects to source routed frames.
442 We don't send redirects to frames decapsulated from IPsec.
443 */
444 if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
445 struct in6_addr *target = NULL;
446 struct inet_peer *peer;
447 struct rt6_info *rt;
448
449 /*
450 * incoming and outgoing devices are the same
451 * send a redirect.
452 */
453
454 rt = (struct rt6_info *) dst;
455 if (rt->rt6i_flags & RTF_GATEWAY)
456 target = &rt->rt6i_gateway;
457 else
458 target = &hdr->daddr;
459
460 peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
461
462 /* Limit redirects both by destination (here)
463 and by source (inside ndisc_send_redirect)
464 */
465 if (inet_peer_xrlim_allow(peer, 1*HZ))
466 ndisc_send_redirect(skb, target);
467 if (peer)
468 inet_putpeer(peer);
469 } else {
470 int addrtype = ipv6_addr_type(&hdr->saddr);
471
472 /* This check is security critical. */
473 if (addrtype == IPV6_ADDR_ANY ||
474 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
475 goto error;
476 if (addrtype & IPV6_ADDR_LINKLOCAL) {
477 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
478 ICMPV6_NOT_NEIGHBOUR, 0);
479 goto error;
480 }
481 }
482
483 mtu = ip6_dst_mtu_forward(dst);
484 if (mtu < IPV6_MIN_MTU)
485 mtu = IPV6_MIN_MTU;
486
487 if (ip6_pkt_too_big(skb, mtu)) {
488 /* Again, force OUTPUT device used as source address */
489 skb->dev = dst->dev;
490 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
491 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
492 IPSTATS_MIB_INTOOBIGERRORS);
493 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
494 IPSTATS_MIB_FRAGFAILS);
495 kfree_skb(skb);
496 return -EMSGSIZE;
497 }
498
499 if (skb_cow(skb, dst->dev->hard_header_len)) {
500 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
501 IPSTATS_MIB_OUTDISCARDS);
502 goto drop;
503 }
504
505 hdr = ipv6_hdr(skb);
506
507 /* Mangling hops number delayed to point after skb COW */
508
509 hdr->hop_limit--;
510
511 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
512 IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
513 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
514 ip6_forward_finish);
515
516 error:
517 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
518 drop:
519 kfree_skb(skb);
520 return -EINVAL;
521 }
522
523 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
524 {
525 to->pkt_type = from->pkt_type;
526 to->priority = from->priority;
527 to->protocol = from->protocol;
528 skb_dst_drop(to);
529 skb_dst_set(to, dst_clone(skb_dst(from)));
530 to->dev = from->dev;
531 to->mark = from->mark;
532
533 #ifdef CONFIG_NET_SCHED
534 to->tc_index = from->tc_index;
535 #endif
536 nf_copy(to, from);
537 skb_copy_secmark(to, from);
538 }
539
540 static void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
541 {
542 static u32 ip6_idents_hashrnd __read_mostly;
543 u32 hash, id;
544
545 net_get_random_once(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd));
546
547 hash = __ipv6_addr_jhash(&rt->rt6i_dst.addr, ip6_idents_hashrnd);
548 hash = __ipv6_addr_jhash(&rt->rt6i_src.addr, hash);
549
550 id = ip_idents_reserve(hash, 1);
551 fhdr->identification = htonl(id);
552 }
553
554 int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
555 {
556 struct sk_buff *frag;
557 struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
558 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
559 struct ipv6hdr *tmp_hdr;
560 struct frag_hdr *fh;
561 unsigned int mtu, hlen, left, len;
562 int hroom, troom;
563 __be32 frag_id = 0;
564 int ptr, offset = 0, err=0;
565 u8 *prevhdr, nexthdr = 0;
566 struct net *net = dev_net(skb_dst(skb)->dev);
567
568 hlen = ip6_find_1stfragopt(skb, &prevhdr);
569 nexthdr = *prevhdr;
570
571 mtu = ip6_skb_dst_mtu(skb);
572
573 /* We must not fragment if the socket is set to force MTU discovery
574 * or if the skb it not generated by a local socket.
575 */
576 if (unlikely(!skb->ignore_df && skb->len > mtu) ||
577 (IP6CB(skb)->frag_max_size &&
578 IP6CB(skb)->frag_max_size > mtu)) {
579 if (skb->sk && dst_allfrag(skb_dst(skb)))
580 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
581
582 skb->dev = skb_dst(skb)->dev;
583 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
584 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
585 IPSTATS_MIB_FRAGFAILS);
586 kfree_skb(skb);
587 return -EMSGSIZE;
588 }
589
590 if (np && np->frag_size < mtu) {
591 if (np->frag_size)
592 mtu = np->frag_size;
593 }
594 mtu -= hlen + sizeof(struct frag_hdr);
595
596 if (skb_has_frag_list(skb)) {
597 int first_len = skb_pagelen(skb);
598 struct sk_buff *frag2;
599
600 if (first_len - hlen > mtu ||
601 ((first_len - hlen) & 7) ||
602 skb_cloned(skb))
603 goto slow_path;
604
605 skb_walk_frags(skb, frag) {
606 /* Correct geometry. */
607 if (frag->len > mtu ||
608 ((frag->len & 7) && frag->next) ||
609 skb_headroom(frag) < hlen)
610 goto slow_path_clean;
611
612 /* Partially cloned skb? */
613 if (skb_shared(frag))
614 goto slow_path_clean;
615
616 BUG_ON(frag->sk);
617 if (skb->sk) {
618 frag->sk = skb->sk;
619 frag->destructor = sock_wfree;
620 }
621 skb->truesize -= frag->truesize;
622 }
623
624 err = 0;
625 offset = 0;
626 frag = skb_shinfo(skb)->frag_list;
627 skb_frag_list_init(skb);
628 /* BUILD HEADER */
629
630 *prevhdr = NEXTHDR_FRAGMENT;
631 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
632 if (!tmp_hdr) {
633 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
634 IPSTATS_MIB_FRAGFAILS);
635 return -ENOMEM;
636 }
637
638 __skb_pull(skb, hlen);
639 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
640 __skb_push(skb, hlen);
641 skb_reset_network_header(skb);
642 memcpy(skb_network_header(skb), tmp_hdr, hlen);
643
644 ipv6_select_ident(fh, rt);
645 fh->nexthdr = nexthdr;
646 fh->reserved = 0;
647 fh->frag_off = htons(IP6_MF);
648 frag_id = fh->identification;
649
650 first_len = skb_pagelen(skb);
651 skb->data_len = first_len - skb_headlen(skb);
652 skb->len = first_len;
653 ipv6_hdr(skb)->payload_len = htons(first_len -
654 sizeof(struct ipv6hdr));
655
656 dst_hold(&rt->dst);
657
658 for (;;) {
659 /* Prepare header of the next frame,
660 * before previous one went down. */
661 if (frag) {
662 frag->ip_summed = CHECKSUM_NONE;
663 skb_reset_transport_header(frag);
664 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
665 __skb_push(frag, hlen);
666 skb_reset_network_header(frag);
667 memcpy(skb_network_header(frag), tmp_hdr,
668 hlen);
669 offset += skb->len - hlen - sizeof(struct frag_hdr);
670 fh->nexthdr = nexthdr;
671 fh->reserved = 0;
672 fh->frag_off = htons(offset);
673 if (frag->next != NULL)
674 fh->frag_off |= htons(IP6_MF);
675 fh->identification = frag_id;
676 ipv6_hdr(frag)->payload_len =
677 htons(frag->len -
678 sizeof(struct ipv6hdr));
679 ip6_copy_metadata(frag, skb);
680 }
681
682 err = output(skb);
683 if(!err)
684 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
685 IPSTATS_MIB_FRAGCREATES);
686
687 if (err || !frag)
688 break;
689
690 skb = frag;
691 frag = skb->next;
692 skb->next = NULL;
693 }
694
695 kfree(tmp_hdr);
696
697 if (err == 0) {
698 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
699 IPSTATS_MIB_FRAGOKS);
700 ip6_rt_put(rt);
701 return 0;
702 }
703
704 while (frag) {
705 skb = frag->next;
706 kfree_skb(frag);
707 frag = skb;
708 }
709
710 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
711 IPSTATS_MIB_FRAGFAILS);
712 ip6_rt_put(rt);
713 return err;
714
715 slow_path_clean:
716 skb_walk_frags(skb, frag2) {
717 if (frag2 == frag)
718 break;
719 frag2->sk = NULL;
720 frag2->destructor = NULL;
721 skb->truesize += frag2->truesize;
722 }
723 }
724
725 slow_path:
726 if ((skb->ip_summed == CHECKSUM_PARTIAL) &&
727 skb_checksum_help(skb))
728 goto fail;
729
730 left = skb->len - hlen; /* Space per frame */
731 ptr = hlen; /* Where to start from */
732
733 /*
734 * Fragment the datagram.
735 */
736
737 *prevhdr = NEXTHDR_FRAGMENT;
738 hroom = LL_RESERVED_SPACE(rt->dst.dev);
739 troom = rt->dst.dev->needed_tailroom;
740
741 /*
742 * Keep copying data until we run out.
743 */
744 while(left > 0) {
745 len = left;
746 /* IF: it doesn't fit, use 'mtu' - the data space left */
747 if (len > mtu)
748 len = mtu;
749 /* IF: we are not sending up to and including the packet end
750 then align the next start on an eight byte boundary */
751 if (len < left) {
752 len &= ~7;
753 }
754 /*
755 * Allocate buffer.
756 */
757
758 if ((frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
759 hroom + troom, GFP_ATOMIC)) == NULL) {
760 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
761 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
762 IPSTATS_MIB_FRAGFAILS);
763 err = -ENOMEM;
764 goto fail;
765 }
766
767 /*
768 * Set up data on packet
769 */
770
771 ip6_copy_metadata(frag, skb);
772 skb_reserve(frag, hroom);
773 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
774 skb_reset_network_header(frag);
775 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
776 frag->transport_header = (frag->network_header + hlen +
777 sizeof(struct frag_hdr));
778
779 /*
780 * Charge the memory for the fragment to any owner
781 * it might possess
782 */
783 if (skb->sk)
784 skb_set_owner_w(frag, skb->sk);
785
786 /*
787 * Copy the packet header into the new buffer.
788 */
789 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
790
791 /*
792 * Build fragment header.
793 */
794 fh->nexthdr = nexthdr;
795 fh->reserved = 0;
796 if (!frag_id) {
797 ipv6_select_ident(fh, rt);
798 frag_id = fh->identification;
799 } else
800 fh->identification = frag_id;
801
802 /*
803 * Copy a block of the IP datagram.
804 */
805 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
806 BUG();
807 left -= len;
808
809 fh->frag_off = htons(offset);
810 if (left > 0)
811 fh->frag_off |= htons(IP6_MF);
812 ipv6_hdr(frag)->payload_len = htons(frag->len -
813 sizeof(struct ipv6hdr));
814
815 ptr += len;
816 offset += len;
817
818 /*
819 * Put this fragment into the sending queue.
820 */
821 err = output(frag);
822 if (err)
823 goto fail;
824
825 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
826 IPSTATS_MIB_FRAGCREATES);
827 }
828 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
829 IPSTATS_MIB_FRAGOKS);
830 consume_skb(skb);
831 return err;
832
833 fail:
834 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
835 IPSTATS_MIB_FRAGFAILS);
836 kfree_skb(skb);
837 return err;
838 }
839
840 static inline int ip6_rt_check(const struct rt6key *rt_key,
841 const struct in6_addr *fl_addr,
842 const struct in6_addr *addr_cache)
843 {
844 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
845 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache));
846 }
847
848 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
849 struct dst_entry *dst,
850 const struct flowi6 *fl6)
851 {
852 struct ipv6_pinfo *np = inet6_sk(sk);
853 struct rt6_info *rt;
854
855 if (!dst)
856 goto out;
857
858 if (dst->ops->family != AF_INET6) {
859 dst_release(dst);
860 return NULL;
861 }
862
863 rt = (struct rt6_info *)dst;
864 /* Yes, checking route validity in not connected
865 * case is not very simple. Take into account,
866 * that we do not support routing by source, TOS,
867 * and MSG_DONTROUTE --ANK (980726)
868 *
869 * 1. ip6_rt_check(): If route was host route,
870 * check that cached destination is current.
871 * If it is network route, we still may
872 * check its validity using saved pointer
873 * to the last used address: daddr_cache.
874 * We do not want to save whole address now,
875 * (because main consumer of this service
876 * is tcp, which has not this problem),
877 * so that the last trick works only on connected
878 * sockets.
879 * 2. oif also should be the same.
880 */
881 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
882 #ifdef CONFIG_IPV6_SUBTREES
883 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
884 #endif
885 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
886 dst_release(dst);
887 dst = NULL;
888 }
889
890 out:
891 return dst;
892 }
893
894 static int ip6_dst_lookup_tail(struct sock *sk,
895 struct dst_entry **dst, struct flowi6 *fl6)
896 {
897 struct net *net = sock_net(sk);
898 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
899 struct neighbour *n;
900 struct rt6_info *rt;
901 #endif
902 int err;
903
904 if (*dst == NULL)
905 *dst = ip6_route_output(net, sk, fl6);
906
907 if ((err = (*dst)->error))
908 goto out_err_release;
909
910 if (ipv6_addr_any(&fl6->saddr)) {
911 struct rt6_info *rt = (struct rt6_info *) *dst;
912 err = ip6_route_get_saddr(net, rt, &fl6->daddr,
913 sk ? inet6_sk(sk)->srcprefs : 0,
914 &fl6->saddr);
915 if (err)
916 goto out_err_release;
917 }
918
919 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
920 /*
921 * Here if the dst entry we've looked up
922 * has a neighbour entry that is in the INCOMPLETE
923 * state and the src address from the flow is
924 * marked as OPTIMISTIC, we release the found
925 * dst entry and replace it instead with the
926 * dst entry of the nexthop router
927 */
928 rt = (struct rt6_info *) *dst;
929 rcu_read_lock_bh();
930 n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt));
931 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
932 rcu_read_unlock_bh();
933
934 if (err) {
935 struct inet6_ifaddr *ifp;
936 struct flowi6 fl_gw6;
937 int redirect;
938
939 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
940 (*dst)->dev, 1);
941
942 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
943 if (ifp)
944 in6_ifa_put(ifp);
945
946 if (redirect) {
947 /*
948 * We need to get the dst entry for the
949 * default router instead
950 */
951 dst_release(*dst);
952 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
953 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
954 *dst = ip6_route_output(net, sk, &fl_gw6);
955 if ((err = (*dst)->error))
956 goto out_err_release;
957 }
958 }
959 #endif
960
961 return 0;
962
963 out_err_release:
964 if (err == -ENETUNREACH)
965 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
966 dst_release(*dst);
967 *dst = NULL;
968 return err;
969 }
970
971 /**
972 * ip6_dst_lookup - perform route lookup on flow
973 * @sk: socket which provides route info
974 * @dst: pointer to dst_entry * for result
975 * @fl6: flow to lookup
976 *
977 * This function performs a route lookup on the given flow.
978 *
979 * It returns zero on success, or a standard errno code on error.
980 */
981 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6)
982 {
983 *dst = NULL;
984 return ip6_dst_lookup_tail(sk, dst, fl6);
985 }
986 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
987
988 /**
989 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
990 * @sk: socket which provides route info
991 * @fl6: flow to lookup
992 * @final_dst: final destination address for ipsec lookup
993 *
994 * This function performs a route lookup on the given flow.
995 *
996 * It returns a valid dst pointer on success, or a pointer encoded
997 * error code.
998 */
999 struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1000 const struct in6_addr *final_dst)
1001 {
1002 struct dst_entry *dst = NULL;
1003 int err;
1004
1005 err = ip6_dst_lookup_tail(sk, &dst, fl6);
1006 if (err)
1007 return ERR_PTR(err);
1008 if (final_dst)
1009 fl6->daddr = *final_dst;
1010
1011 return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1012 }
1013 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1014
1015 /**
1016 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1017 * @sk: socket which provides the dst cache and route info
1018 * @fl6: flow to lookup
1019 * @final_dst: final destination address for ipsec lookup
1020 *
1021 * This function performs a route lookup on the given flow with the
1022 * possibility of using the cached route in the socket if it is valid.
1023 * It will take the socket dst lock when operating on the dst cache.
1024 * As a result, this function can only be used in process context.
1025 *
1026 * It returns a valid dst pointer on success, or a pointer encoded
1027 * error code.
1028 */
1029 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1030 const struct in6_addr *final_dst)
1031 {
1032 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1033 int err;
1034
1035 dst = ip6_sk_dst_check(sk, dst, fl6);
1036
1037 err = ip6_dst_lookup_tail(sk, &dst, fl6);
1038 if (err)
1039 return ERR_PTR(err);
1040 if (final_dst)
1041 fl6->daddr = *final_dst;
1042
1043 return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1044 }
1045 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1046
1047 static inline int ip6_ufo_append_data(struct sock *sk,
1048 int getfrag(void *from, char *to, int offset, int len,
1049 int odd, struct sk_buff *skb),
1050 void *from, int length, int hh_len, int fragheaderlen,
1051 int transhdrlen, int mtu,unsigned int flags,
1052 struct rt6_info *rt)
1053
1054 {
1055 struct sk_buff *skb;
1056 struct frag_hdr fhdr;
1057 int err;
1058
1059 /* There is support for UDP large send offload by network
1060 * device, so create one single skb packet containing complete
1061 * udp datagram
1062 */
1063 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1064 skb = sock_alloc_send_skb(sk,
1065 hh_len + fragheaderlen + transhdrlen + 20,
1066 (flags & MSG_DONTWAIT), &err);
1067 if (skb == NULL)
1068 return err;
1069
1070 /* reserve space for Hardware header */
1071 skb_reserve(skb, hh_len);
1072
1073 /* create space for UDP/IP header */
1074 skb_put(skb,fragheaderlen + transhdrlen);
1075
1076 /* initialize network header pointer */
1077 skb_reset_network_header(skb);
1078
1079 /* initialize protocol header pointer */
1080 skb->transport_header = skb->network_header + fragheaderlen;
1081
1082 skb->protocol = htons(ETH_P_IPV6);
1083 skb->csum = 0;
1084
1085 __skb_queue_tail(&sk->sk_write_queue, skb);
1086 } else if (skb_is_gso(skb)) {
1087 goto append;
1088 }
1089
1090 skb->ip_summed = CHECKSUM_PARTIAL;
1091 /* Specify the length of each IPv6 datagram fragment.
1092 * It has to be a multiple of 8.
1093 */
1094 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1095 sizeof(struct frag_hdr)) & ~7;
1096 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1097 ipv6_select_ident(&fhdr, rt);
1098 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1099
1100 append:
1101 return skb_append_datato_frags(sk, skb, getfrag, from,
1102 (length - transhdrlen));
1103 }
1104
1105 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1106 gfp_t gfp)
1107 {
1108 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1109 }
1110
1111 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1112 gfp_t gfp)
1113 {
1114 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1115 }
1116
1117 static void ip6_append_data_mtu(unsigned int *mtu,
1118 int *maxfraglen,
1119 unsigned int fragheaderlen,
1120 struct sk_buff *skb,
1121 struct rt6_info *rt,
1122 unsigned int orig_mtu)
1123 {
1124 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1125 if (skb == NULL) {
1126 /* first fragment, reserve header_len */
1127 *mtu = orig_mtu - rt->dst.header_len;
1128
1129 } else {
1130 /*
1131 * this fragment is not first, the headers
1132 * space is regarded as data space.
1133 */
1134 *mtu = orig_mtu;
1135 }
1136 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1137 + fragheaderlen - sizeof(struct frag_hdr);
1138 }
1139 }
1140
1141 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1142 int offset, int len, int odd, struct sk_buff *skb),
1143 void *from, int length, int transhdrlen,
1144 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
1145 struct rt6_info *rt, unsigned int flags, int dontfrag)
1146 {
1147 struct inet_sock *inet = inet_sk(sk);
1148 struct ipv6_pinfo *np = inet6_sk(sk);
1149 struct inet_cork *cork;
1150 struct sk_buff *skb, *skb_prev = NULL;
1151 unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu;
1152 int exthdrlen;
1153 int dst_exthdrlen;
1154 int hh_len;
1155 int copy;
1156 int err;
1157 int offset = 0;
1158 __u8 tx_flags = 0;
1159
1160 if (flags&MSG_PROBE)
1161 return 0;
1162 cork = &inet->cork.base;
1163 if (skb_queue_empty(&sk->sk_write_queue)) {
1164 /*
1165 * setup for corking
1166 */
1167 if (opt) {
1168 if (WARN_ON(np->cork.opt))
1169 return -EINVAL;
1170
1171 np->cork.opt = kzalloc(opt->tot_len, sk->sk_allocation);
1172 if (unlikely(np->cork.opt == NULL))
1173 return -ENOBUFS;
1174
1175 np->cork.opt->tot_len = opt->tot_len;
1176 np->cork.opt->opt_flen = opt->opt_flen;
1177 np->cork.opt->opt_nflen = opt->opt_nflen;
1178
1179 np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1180 sk->sk_allocation);
1181 if (opt->dst0opt && !np->cork.opt->dst0opt)
1182 return -ENOBUFS;
1183
1184 np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1185 sk->sk_allocation);
1186 if (opt->dst1opt && !np->cork.opt->dst1opt)
1187 return -ENOBUFS;
1188
1189 np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1190 sk->sk_allocation);
1191 if (opt->hopopt && !np->cork.opt->hopopt)
1192 return -ENOBUFS;
1193
1194 np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1195 sk->sk_allocation);
1196 if (opt->srcrt && !np->cork.opt->srcrt)
1197 return -ENOBUFS;
1198
1199 /* need source address above miyazawa*/
1200 }
1201 dst_hold(&rt->dst);
1202 cork->dst = &rt->dst;
1203 inet->cork.fl.u.ip6 = *fl6;
1204 np->cork.hop_limit = hlimit;
1205 np->cork.tclass = tclass;
1206 if (rt->dst.flags & DST_XFRM_TUNNEL)
1207 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1208 rt->dst.dev->mtu : dst_mtu(&rt->dst);
1209 else
1210 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1211 rt->dst.dev->mtu : dst_mtu(rt->dst.path);
1212 if (np->frag_size < mtu) {
1213 if (np->frag_size)
1214 mtu = np->frag_size;
1215 }
1216 cork->fragsize = mtu;
1217 if (dst_allfrag(rt->dst.path))
1218 cork->flags |= IPCORK_ALLFRAG;
1219 cork->length = 0;
1220 exthdrlen = (opt ? opt->opt_flen : 0);
1221 length += exthdrlen;
1222 transhdrlen += exthdrlen;
1223 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1224 } else {
1225 rt = (struct rt6_info *)cork->dst;
1226 fl6 = &inet->cork.fl.u.ip6;
1227 opt = np->cork.opt;
1228 transhdrlen = 0;
1229 exthdrlen = 0;
1230 dst_exthdrlen = 0;
1231 mtu = cork->fragsize;
1232 }
1233 orig_mtu = mtu;
1234
1235 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1236
1237 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1238 (opt ? opt->opt_nflen : 0);
1239 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1240 sizeof(struct frag_hdr);
1241
1242 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1243 unsigned int maxnonfragsize, headersize;
1244
1245 headersize = sizeof(struct ipv6hdr) +
1246 (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1247 (dst_allfrag(&rt->dst) ?
1248 sizeof(struct frag_hdr) : 0) +
1249 rt->rt6i_nfheader_len;
1250
1251 if (ip6_sk_ignore_df(sk))
1252 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1253 else
1254 maxnonfragsize = mtu;
1255
1256 /* dontfrag active */
1257 if ((cork->length + length > mtu - headersize) && dontfrag &&
1258 (sk->sk_protocol == IPPROTO_UDP ||
1259 sk->sk_protocol == IPPROTO_RAW)) {
1260 ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1261 sizeof(struct ipv6hdr));
1262 goto emsgsize;
1263 }
1264
1265 if (cork->length + length > maxnonfragsize - headersize) {
1266 emsgsize:
1267 ipv6_local_error(sk, EMSGSIZE, fl6,
1268 mtu - headersize +
1269 sizeof(struct ipv6hdr));
1270 return -EMSGSIZE;
1271 }
1272 }
1273
1274 /* For UDP, check if TX timestamp is enabled */
1275 if (sk->sk_type == SOCK_DGRAM)
1276 sock_tx_timestamp(sk, &tx_flags);
1277
1278 /*
1279 * Let's try using as much space as possible.
1280 * Use MTU if total length of the message fits into the MTU.
1281 * Otherwise, we need to reserve fragment header and
1282 * fragment alignment (= 8-15 octects, in total).
1283 *
1284 * Note that we may need to "move" the data from the tail of
1285 * of the buffer to the new fragment when we split
1286 * the message.
1287 *
1288 * FIXME: It may be fragmented into multiple chunks
1289 * at once if non-fragmentable extension headers
1290 * are too large.
1291 * --yoshfuji
1292 */
1293
1294 skb = skb_peek_tail(&sk->sk_write_queue);
1295 cork->length += length;
1296 if (((length > mtu) ||
1297 (skb && skb_is_gso(skb))) &&
1298 (sk->sk_protocol == IPPROTO_UDP) &&
1299 (rt->dst.dev->features & NETIF_F_UFO)) {
1300 err = ip6_ufo_append_data(sk, getfrag, from, length,
1301 hh_len, fragheaderlen,
1302 transhdrlen, mtu, flags, rt);
1303 if (err)
1304 goto error;
1305 return 0;
1306 }
1307
1308 if (!skb)
1309 goto alloc_new_skb;
1310
1311 while (length > 0) {
1312 /* Check if the remaining data fits into current packet. */
1313 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1314 if (copy < length)
1315 copy = maxfraglen - skb->len;
1316
1317 if (copy <= 0) {
1318 char *data;
1319 unsigned int datalen;
1320 unsigned int fraglen;
1321 unsigned int fraggap;
1322 unsigned int alloclen;
1323 alloc_new_skb:
1324 /* There's no room in the current skb */
1325 if (skb)
1326 fraggap = skb->len - maxfraglen;
1327 else
1328 fraggap = 0;
1329 /* update mtu and maxfraglen if necessary */
1330 if (skb == NULL || skb_prev == NULL)
1331 ip6_append_data_mtu(&mtu, &maxfraglen,
1332 fragheaderlen, skb, rt,
1333 orig_mtu);
1334
1335 skb_prev = skb;
1336
1337 /*
1338 * If remaining data exceeds the mtu,
1339 * we know we need more fragment(s).
1340 */
1341 datalen = length + fraggap;
1342
1343 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1344 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1345 if ((flags & MSG_MORE) &&
1346 !(rt->dst.dev->features&NETIF_F_SG))
1347 alloclen = mtu;
1348 else
1349 alloclen = datalen + fragheaderlen;
1350
1351 alloclen += dst_exthdrlen;
1352
1353 if (datalen != length + fraggap) {
1354 /*
1355 * this is not the last fragment, the trailer
1356 * space is regarded as data space.
1357 */
1358 datalen += rt->dst.trailer_len;
1359 }
1360
1361 alloclen += rt->dst.trailer_len;
1362 fraglen = datalen + fragheaderlen;
1363
1364 /*
1365 * We just reserve space for fragment header.
1366 * Note: this may be overallocation if the message
1367 * (without MSG_MORE) fits into the MTU.
1368 */
1369 alloclen += sizeof(struct frag_hdr);
1370
1371 if (transhdrlen) {
1372 skb = sock_alloc_send_skb(sk,
1373 alloclen + hh_len,
1374 (flags & MSG_DONTWAIT), &err);
1375 } else {
1376 skb = NULL;
1377 if (atomic_read(&sk->sk_wmem_alloc) <=
1378 2 * sk->sk_sndbuf)
1379 skb = sock_wmalloc(sk,
1380 alloclen + hh_len, 1,
1381 sk->sk_allocation);
1382 if (unlikely(skb == NULL))
1383 err = -ENOBUFS;
1384 else {
1385 /* Only the initial fragment
1386 * is time stamped.
1387 */
1388 tx_flags = 0;
1389 }
1390 }
1391 if (skb == NULL)
1392 goto error;
1393 /*
1394 * Fill in the control structures
1395 */
1396 skb->protocol = htons(ETH_P_IPV6);
1397 skb->ip_summed = CHECKSUM_NONE;
1398 skb->csum = 0;
1399 /* reserve for fragmentation and ipsec header */
1400 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1401 dst_exthdrlen);
1402
1403 if (sk->sk_type == SOCK_DGRAM)
1404 skb_shinfo(skb)->tx_flags = tx_flags;
1405
1406 /*
1407 * Find where to start putting bytes
1408 */
1409 data = skb_put(skb, fraglen);
1410 skb_set_network_header(skb, exthdrlen);
1411 data += fragheaderlen;
1412 skb->transport_header = (skb->network_header +
1413 fragheaderlen);
1414 if (fraggap) {
1415 skb->csum = skb_copy_and_csum_bits(
1416 skb_prev, maxfraglen,
1417 data + transhdrlen, fraggap, 0);
1418 skb_prev->csum = csum_sub(skb_prev->csum,
1419 skb->csum);
1420 data += fraggap;
1421 pskb_trim_unique(skb_prev, maxfraglen);
1422 }
1423 copy = datalen - transhdrlen - fraggap;
1424
1425 if (copy < 0) {
1426 err = -EINVAL;
1427 kfree_skb(skb);
1428 goto error;
1429 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1430 err = -EFAULT;
1431 kfree_skb(skb);
1432 goto error;
1433 }
1434
1435 offset += copy;
1436 length -= datalen - fraggap;
1437 transhdrlen = 0;
1438 exthdrlen = 0;
1439 dst_exthdrlen = 0;
1440
1441 /*
1442 * Put the packet on the pending queue
1443 */
1444 __skb_queue_tail(&sk->sk_write_queue, skb);
1445 continue;
1446 }
1447
1448 if (copy > length)
1449 copy = length;
1450
1451 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1452 unsigned int off;
1453
1454 off = skb->len;
1455 if (getfrag(from, skb_put(skb, copy),
1456 offset, copy, off, skb) < 0) {
1457 __skb_trim(skb, off);
1458 err = -EFAULT;
1459 goto error;
1460 }
1461 } else {
1462 int i = skb_shinfo(skb)->nr_frags;
1463 struct page_frag *pfrag = sk_page_frag(sk);
1464
1465 err = -ENOMEM;
1466 if (!sk_page_frag_refill(sk, pfrag))
1467 goto error;
1468
1469 if (!skb_can_coalesce(skb, i, pfrag->page,
1470 pfrag->offset)) {
1471 err = -EMSGSIZE;
1472 if (i == MAX_SKB_FRAGS)
1473 goto error;
1474
1475 __skb_fill_page_desc(skb, i, pfrag->page,
1476 pfrag->offset, 0);
1477 skb_shinfo(skb)->nr_frags = ++i;
1478 get_page(pfrag->page);
1479 }
1480 copy = min_t(int, copy, pfrag->size - pfrag->offset);
1481 if (getfrag(from,
1482 page_address(pfrag->page) + pfrag->offset,
1483 offset, copy, skb->len, skb) < 0)
1484 goto error_efault;
1485
1486 pfrag->offset += copy;
1487 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1488 skb->len += copy;
1489 skb->data_len += copy;
1490 skb->truesize += copy;
1491 atomic_add(copy, &sk->sk_wmem_alloc);
1492 }
1493 offset += copy;
1494 length -= copy;
1495 }
1496
1497 return 0;
1498
1499 error_efault:
1500 err = -EFAULT;
1501 error:
1502 cork->length -= length;
1503 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1504 return err;
1505 }
1506 EXPORT_SYMBOL_GPL(ip6_append_data);
1507
1508 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1509 {
1510 if (np->cork.opt) {
1511 kfree(np->cork.opt->dst0opt);
1512 kfree(np->cork.opt->dst1opt);
1513 kfree(np->cork.opt->hopopt);
1514 kfree(np->cork.opt->srcrt);
1515 kfree(np->cork.opt);
1516 np->cork.opt = NULL;
1517 }
1518
1519 if (inet->cork.base.dst) {
1520 dst_release(inet->cork.base.dst);
1521 inet->cork.base.dst = NULL;
1522 inet->cork.base.flags &= ~IPCORK_ALLFRAG;
1523 }
1524 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1525 }
1526
1527 int ip6_push_pending_frames(struct sock *sk)
1528 {
1529 struct sk_buff *skb, *tmp_skb;
1530 struct sk_buff **tail_skb;
1531 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1532 struct inet_sock *inet = inet_sk(sk);
1533 struct ipv6_pinfo *np = inet6_sk(sk);
1534 struct net *net = sock_net(sk);
1535 struct ipv6hdr *hdr;
1536 struct ipv6_txoptions *opt = np->cork.opt;
1537 struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst;
1538 struct flowi6 *fl6 = &inet->cork.fl.u.ip6;
1539 unsigned char proto = fl6->flowi6_proto;
1540 int err = 0;
1541
1542 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1543 goto out;
1544 tail_skb = &(skb_shinfo(skb)->frag_list);
1545
1546 /* move skb->data to ip header from ext header */
1547 if (skb->data < skb_network_header(skb))
1548 __skb_pull(skb, skb_network_offset(skb));
1549 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1550 __skb_pull(tmp_skb, skb_network_header_len(skb));
1551 *tail_skb = tmp_skb;
1552 tail_skb = &(tmp_skb->next);
1553 skb->len += tmp_skb->len;
1554 skb->data_len += tmp_skb->len;
1555 skb->truesize += tmp_skb->truesize;
1556 tmp_skb->destructor = NULL;
1557 tmp_skb->sk = NULL;
1558 }
1559
1560 /* Allow local fragmentation. */
1561 skb->ignore_df = ip6_sk_ignore_df(sk);
1562
1563 *final_dst = fl6->daddr;
1564 __skb_pull(skb, skb_network_header_len(skb));
1565 if (opt && opt->opt_flen)
1566 ipv6_push_frag_opts(skb, opt, &proto);
1567 if (opt && opt->opt_nflen)
1568 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1569
1570 skb_push(skb, sizeof(struct ipv6hdr));
1571 skb_reset_network_header(skb);
1572 hdr = ipv6_hdr(skb);
1573
1574 ip6_flow_hdr(hdr, np->cork.tclass, fl6->flowlabel);
1575 hdr->hop_limit = np->cork.hop_limit;
1576 hdr->nexthdr = proto;
1577 hdr->saddr = fl6->saddr;
1578 hdr->daddr = *final_dst;
1579
1580 skb->priority = sk->sk_priority;
1581 skb->mark = sk->sk_mark;
1582
1583 skb_dst_set(skb, dst_clone(&rt->dst));
1584 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1585 if (proto == IPPROTO_ICMPV6) {
1586 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1587
1588 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1589 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1590 }
1591
1592 err = ip6_local_out(skb);
1593 if (err) {
1594 if (err > 0)
1595 err = net_xmit_errno(err);
1596 if (err)
1597 goto error;
1598 }
1599
1600 out:
1601 ip6_cork_release(inet, np);
1602 return err;
1603 error:
1604 IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1605 goto out;
1606 }
1607 EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1608
1609 void ip6_flush_pending_frames(struct sock *sk)
1610 {
1611 struct sk_buff *skb;
1612
1613 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1614 if (skb_dst(skb))
1615 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1616 IPSTATS_MIB_OUTDISCARDS);
1617 kfree_skb(skb);
1618 }
1619
1620 ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1621 }
1622 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);