]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - net/ipv6/ip6_output.c
ipv6: coding style: comparison for inequality with NULL
[mirror_ubuntu-bionic-kernel.git] / net / ipv6 / ip6_output.c
1 /*
2 * IPv6 output functions
3 * Linux INET6 implementation
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * Based on linux/net/ipv4/ip_output.c
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 *
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
21 *
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
27 */
28
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
40 #include <linux/slab.h>
41
42 #include <linux/netfilter.h>
43 #include <linux/netfilter_ipv6.h>
44
45 #include <net/sock.h>
46 #include <net/snmp.h>
47
48 #include <net/ipv6.h>
49 #include <net/ndisc.h>
50 #include <net/protocol.h>
51 #include <net/ip6_route.h>
52 #include <net/addrconf.h>
53 #include <net/rawv6.h>
54 #include <net/icmp.h>
55 #include <net/xfrm.h>
56 #include <net/checksum.h>
57 #include <linux/mroute6.h>
58
59 static int ip6_finish_output2(struct sk_buff *skb)
60 {
61 struct dst_entry *dst = skb_dst(skb);
62 struct net_device *dev = dst->dev;
63 struct neighbour *neigh;
64 struct in6_addr *nexthop;
65 int ret;
66
67 skb->protocol = htons(ETH_P_IPV6);
68 skb->dev = dev;
69
70 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
71 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
72
73 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
74 ((mroute6_socket(dev_net(dev), skb) &&
75 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
76 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
77 &ipv6_hdr(skb)->saddr))) {
78 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
79
80 /* Do not check for IFF_ALLMULTI; multicast routing
81 is not supported in any case.
82 */
83 if (newskb)
84 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
85 newskb, NULL, newskb->dev,
86 dev_loopback_xmit);
87
88 if (ipv6_hdr(skb)->hop_limit == 0) {
89 IP6_INC_STATS(dev_net(dev), idev,
90 IPSTATS_MIB_OUTDISCARDS);
91 kfree_skb(skb);
92 return 0;
93 }
94 }
95
96 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
97 skb->len);
98
99 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
100 IPV6_ADDR_SCOPE_NODELOCAL &&
101 !(dev->flags & IFF_LOOPBACK)) {
102 kfree_skb(skb);
103 return 0;
104 }
105 }
106
107 rcu_read_lock_bh();
108 nexthop = rt6_nexthop((struct rt6_info *)dst);
109 neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
110 if (unlikely(!neigh))
111 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
112 if (!IS_ERR(neigh)) {
113 ret = dst_neigh_output(dst, neigh, skb);
114 rcu_read_unlock_bh();
115 return ret;
116 }
117 rcu_read_unlock_bh();
118
119 IP6_INC_STATS(dev_net(dst->dev),
120 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
121 kfree_skb(skb);
122 return -EINVAL;
123 }
124
125 static int ip6_finish_output(struct sk_buff *skb)
126 {
127 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
128 dst_allfrag(skb_dst(skb)) ||
129 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
130 return ip6_fragment(skb, ip6_finish_output2);
131 else
132 return ip6_finish_output2(skb);
133 }
134
135 int ip6_output(struct sock *sk, struct sk_buff *skb)
136 {
137 struct net_device *dev = skb_dst(skb)->dev;
138 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
139 if (unlikely(idev->cnf.disable_ipv6)) {
140 IP6_INC_STATS(dev_net(dev), idev,
141 IPSTATS_MIB_OUTDISCARDS);
142 kfree_skb(skb);
143 return 0;
144 }
145
146 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
147 ip6_finish_output,
148 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
149 }
150
151 /*
152 * xmit an sk_buff (used by TCP, SCTP and DCCP)
153 */
154
155 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
156 struct ipv6_txoptions *opt, int tclass)
157 {
158 struct net *net = sock_net(sk);
159 struct ipv6_pinfo *np = inet6_sk(sk);
160 struct in6_addr *first_hop = &fl6->daddr;
161 struct dst_entry *dst = skb_dst(skb);
162 struct ipv6hdr *hdr;
163 u8 proto = fl6->flowi6_proto;
164 int seg_len = skb->len;
165 int hlimit = -1;
166 u32 mtu;
167
168 if (opt) {
169 unsigned int head_room;
170
171 /* First: exthdrs may take lots of space (~8K for now)
172 MAX_HEADER is not enough.
173 */
174 head_room = opt->opt_nflen + opt->opt_flen;
175 seg_len += head_room;
176 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
177
178 if (skb_headroom(skb) < head_room) {
179 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
180 if (!skb2) {
181 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
182 IPSTATS_MIB_OUTDISCARDS);
183 kfree_skb(skb);
184 return -ENOBUFS;
185 }
186 consume_skb(skb);
187 skb = skb2;
188 skb_set_owner_w(skb, sk);
189 }
190 if (opt->opt_flen)
191 ipv6_push_frag_opts(skb, opt, &proto);
192 if (opt->opt_nflen)
193 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
194 }
195
196 skb_push(skb, sizeof(struct ipv6hdr));
197 skb_reset_network_header(skb);
198 hdr = ipv6_hdr(skb);
199
200 /*
201 * Fill in the IPv6 header
202 */
203 if (np)
204 hlimit = np->hop_limit;
205 if (hlimit < 0)
206 hlimit = ip6_dst_hoplimit(dst);
207
208 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
209 np->autoflowlabel));
210
211 hdr->payload_len = htons(seg_len);
212 hdr->nexthdr = proto;
213 hdr->hop_limit = hlimit;
214
215 hdr->saddr = fl6->saddr;
216 hdr->daddr = *first_hop;
217
218 skb->protocol = htons(ETH_P_IPV6);
219 skb->priority = sk->sk_priority;
220 skb->mark = sk->sk_mark;
221
222 mtu = dst_mtu(dst);
223 if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
224 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
225 IPSTATS_MIB_OUT, skb->len);
226 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
227 dst->dev, dst_output);
228 }
229
230 skb->dev = dst->dev;
231 ipv6_local_error(sk, EMSGSIZE, fl6, mtu);
232 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
233 kfree_skb(skb);
234 return -EMSGSIZE;
235 }
236 EXPORT_SYMBOL(ip6_xmit);
237
238 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
239 {
240 struct ip6_ra_chain *ra;
241 struct sock *last = NULL;
242
243 read_lock(&ip6_ra_lock);
244 for (ra = ip6_ra_chain; ra; ra = ra->next) {
245 struct sock *sk = ra->sk;
246 if (sk && ra->sel == sel &&
247 (!sk->sk_bound_dev_if ||
248 sk->sk_bound_dev_if == skb->dev->ifindex)) {
249 if (last) {
250 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
251 if (skb2)
252 rawv6_rcv(last, skb2);
253 }
254 last = sk;
255 }
256 }
257
258 if (last) {
259 rawv6_rcv(last, skb);
260 read_unlock(&ip6_ra_lock);
261 return 1;
262 }
263 read_unlock(&ip6_ra_lock);
264 return 0;
265 }
266
267 static int ip6_forward_proxy_check(struct sk_buff *skb)
268 {
269 struct ipv6hdr *hdr = ipv6_hdr(skb);
270 u8 nexthdr = hdr->nexthdr;
271 __be16 frag_off;
272 int offset;
273
274 if (ipv6_ext_hdr(nexthdr)) {
275 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
276 if (offset < 0)
277 return 0;
278 } else
279 offset = sizeof(struct ipv6hdr);
280
281 if (nexthdr == IPPROTO_ICMPV6) {
282 struct icmp6hdr *icmp6;
283
284 if (!pskb_may_pull(skb, (skb_network_header(skb) +
285 offset + 1 - skb->data)))
286 return 0;
287
288 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
289
290 switch (icmp6->icmp6_type) {
291 case NDISC_ROUTER_SOLICITATION:
292 case NDISC_ROUTER_ADVERTISEMENT:
293 case NDISC_NEIGHBOUR_SOLICITATION:
294 case NDISC_NEIGHBOUR_ADVERTISEMENT:
295 case NDISC_REDIRECT:
296 /* For reaction involving unicast neighbor discovery
297 * message destined to the proxied address, pass it to
298 * input function.
299 */
300 return 1;
301 default:
302 break;
303 }
304 }
305
306 /*
307 * The proxying router can't forward traffic sent to a link-local
308 * address, so signal the sender and discard the packet. This
309 * behavior is clarified by the MIPv6 specification.
310 */
311 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
312 dst_link_failure(skb);
313 return -1;
314 }
315
316 return 0;
317 }
318
319 static inline int ip6_forward_finish(struct sk_buff *skb)
320 {
321 skb_sender_cpu_clear(skb);
322 return dst_output(skb);
323 }
324
325 static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
326 {
327 unsigned int mtu;
328 struct inet6_dev *idev;
329
330 if (dst_metric_locked(dst, RTAX_MTU)) {
331 mtu = dst_metric_raw(dst, RTAX_MTU);
332 if (mtu)
333 return mtu;
334 }
335
336 mtu = IPV6_MIN_MTU;
337 rcu_read_lock();
338 idev = __in6_dev_get(dst->dev);
339 if (idev)
340 mtu = idev->cnf.mtu6;
341 rcu_read_unlock();
342
343 return mtu;
344 }
345
346 static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
347 {
348 if (skb->len <= mtu)
349 return false;
350
351 /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
352 if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
353 return true;
354
355 if (skb->ignore_df)
356 return false;
357
358 if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
359 return false;
360
361 return true;
362 }
363
364 int ip6_forward(struct sk_buff *skb)
365 {
366 struct dst_entry *dst = skb_dst(skb);
367 struct ipv6hdr *hdr = ipv6_hdr(skb);
368 struct inet6_skb_parm *opt = IP6CB(skb);
369 struct net *net = dev_net(dst->dev);
370 u32 mtu;
371
372 if (net->ipv6.devconf_all->forwarding == 0)
373 goto error;
374
375 if (skb->pkt_type != PACKET_HOST)
376 goto drop;
377
378 if (skb_warn_if_lro(skb))
379 goto drop;
380
381 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
382 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
383 IPSTATS_MIB_INDISCARDS);
384 goto drop;
385 }
386
387 skb_forward_csum(skb);
388
389 /*
390 * We DO NOT make any processing on
391 * RA packets, pushing them to user level AS IS
392 * without ane WARRANTY that application will be able
393 * to interpret them. The reason is that we
394 * cannot make anything clever here.
395 *
396 * We are not end-node, so that if packet contains
397 * AH/ESP, we cannot make anything.
398 * Defragmentation also would be mistake, RA packets
399 * cannot be fragmented, because there is no warranty
400 * that different fragments will go along one path. --ANK
401 */
402 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
403 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
404 return 0;
405 }
406
407 /*
408 * check and decrement ttl
409 */
410 if (hdr->hop_limit <= 1) {
411 /* Force OUTPUT device used as source address */
412 skb->dev = dst->dev;
413 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
414 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
415 IPSTATS_MIB_INHDRERRORS);
416
417 kfree_skb(skb);
418 return -ETIMEDOUT;
419 }
420
421 /* XXX: idev->cnf.proxy_ndp? */
422 if (net->ipv6.devconf_all->proxy_ndp &&
423 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
424 int proxied = ip6_forward_proxy_check(skb);
425 if (proxied > 0)
426 return ip6_input(skb);
427 else if (proxied < 0) {
428 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
429 IPSTATS_MIB_INDISCARDS);
430 goto drop;
431 }
432 }
433
434 if (!xfrm6_route_forward(skb)) {
435 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
436 IPSTATS_MIB_INDISCARDS);
437 goto drop;
438 }
439 dst = skb_dst(skb);
440
441 /* IPv6 specs say nothing about it, but it is clear that we cannot
442 send redirects to source routed frames.
443 We don't send redirects to frames decapsulated from IPsec.
444 */
445 if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
446 struct in6_addr *target = NULL;
447 struct inet_peer *peer;
448 struct rt6_info *rt;
449
450 /*
451 * incoming and outgoing devices are the same
452 * send a redirect.
453 */
454
455 rt = (struct rt6_info *) dst;
456 if (rt->rt6i_flags & RTF_GATEWAY)
457 target = &rt->rt6i_gateway;
458 else
459 target = &hdr->daddr;
460
461 peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
462
463 /* Limit redirects both by destination (here)
464 and by source (inside ndisc_send_redirect)
465 */
466 if (inet_peer_xrlim_allow(peer, 1*HZ))
467 ndisc_send_redirect(skb, target);
468 if (peer)
469 inet_putpeer(peer);
470 } else {
471 int addrtype = ipv6_addr_type(&hdr->saddr);
472
473 /* This check is security critical. */
474 if (addrtype == IPV6_ADDR_ANY ||
475 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
476 goto error;
477 if (addrtype & IPV6_ADDR_LINKLOCAL) {
478 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
479 ICMPV6_NOT_NEIGHBOUR, 0);
480 goto error;
481 }
482 }
483
484 mtu = ip6_dst_mtu_forward(dst);
485 if (mtu < IPV6_MIN_MTU)
486 mtu = IPV6_MIN_MTU;
487
488 if (ip6_pkt_too_big(skb, mtu)) {
489 /* Again, force OUTPUT device used as source address */
490 skb->dev = dst->dev;
491 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
492 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
493 IPSTATS_MIB_INTOOBIGERRORS);
494 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
495 IPSTATS_MIB_FRAGFAILS);
496 kfree_skb(skb);
497 return -EMSGSIZE;
498 }
499
500 if (skb_cow(skb, dst->dev->hard_header_len)) {
501 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
502 IPSTATS_MIB_OUTDISCARDS);
503 goto drop;
504 }
505
506 hdr = ipv6_hdr(skb);
507
508 /* Mangling hops number delayed to point after skb COW */
509
510 hdr->hop_limit--;
511
512 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
513 IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
514 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
515 ip6_forward_finish);
516
517 error:
518 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
519 drop:
520 kfree_skb(skb);
521 return -EINVAL;
522 }
523
524 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
525 {
526 to->pkt_type = from->pkt_type;
527 to->priority = from->priority;
528 to->protocol = from->protocol;
529 skb_dst_drop(to);
530 skb_dst_set(to, dst_clone(skb_dst(from)));
531 to->dev = from->dev;
532 to->mark = from->mark;
533
534 #ifdef CONFIG_NET_SCHED
535 to->tc_index = from->tc_index;
536 #endif
537 nf_copy(to, from);
538 skb_copy_secmark(to, from);
539 }
540
541 int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
542 {
543 struct sk_buff *frag;
544 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
545 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
546 struct ipv6hdr *tmp_hdr;
547 struct frag_hdr *fh;
548 unsigned int mtu, hlen, left, len;
549 int hroom, troom;
550 __be32 frag_id = 0;
551 int ptr, offset = 0, err = 0;
552 u8 *prevhdr, nexthdr = 0;
553 struct net *net = dev_net(skb_dst(skb)->dev);
554
555 hlen = ip6_find_1stfragopt(skb, &prevhdr);
556 nexthdr = *prevhdr;
557
558 mtu = ip6_skb_dst_mtu(skb);
559
560 /* We must not fragment if the socket is set to force MTU discovery
561 * or if the skb it not generated by a local socket.
562 */
563 if (unlikely(!skb->ignore_df && skb->len > mtu) ||
564 (IP6CB(skb)->frag_max_size &&
565 IP6CB(skb)->frag_max_size > mtu)) {
566 if (skb->sk && dst_allfrag(skb_dst(skb)))
567 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
568
569 skb->dev = skb_dst(skb)->dev;
570 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
571 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
572 IPSTATS_MIB_FRAGFAILS);
573 kfree_skb(skb);
574 return -EMSGSIZE;
575 }
576
577 if (np && np->frag_size < mtu) {
578 if (np->frag_size)
579 mtu = np->frag_size;
580 }
581 mtu -= hlen + sizeof(struct frag_hdr);
582
583 if (skb_has_frag_list(skb)) {
584 int first_len = skb_pagelen(skb);
585 struct sk_buff *frag2;
586
587 if (first_len - hlen > mtu ||
588 ((first_len - hlen) & 7) ||
589 skb_cloned(skb))
590 goto slow_path;
591
592 skb_walk_frags(skb, frag) {
593 /* Correct geometry. */
594 if (frag->len > mtu ||
595 ((frag->len & 7) && frag->next) ||
596 skb_headroom(frag) < hlen)
597 goto slow_path_clean;
598
599 /* Partially cloned skb? */
600 if (skb_shared(frag))
601 goto slow_path_clean;
602
603 BUG_ON(frag->sk);
604 if (skb->sk) {
605 frag->sk = skb->sk;
606 frag->destructor = sock_wfree;
607 }
608 skb->truesize -= frag->truesize;
609 }
610
611 err = 0;
612 offset = 0;
613 frag = skb_shinfo(skb)->frag_list;
614 skb_frag_list_init(skb);
615 /* BUILD HEADER */
616
617 *prevhdr = NEXTHDR_FRAGMENT;
618 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
619 if (!tmp_hdr) {
620 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
621 IPSTATS_MIB_FRAGFAILS);
622 return -ENOMEM;
623 }
624
625 __skb_pull(skb, hlen);
626 fh = (struct frag_hdr *)__skb_push(skb, sizeof(struct frag_hdr));
627 __skb_push(skb, hlen);
628 skb_reset_network_header(skb);
629 memcpy(skb_network_header(skb), tmp_hdr, hlen);
630
631 ipv6_select_ident(net, fh, rt);
632 fh->nexthdr = nexthdr;
633 fh->reserved = 0;
634 fh->frag_off = htons(IP6_MF);
635 frag_id = fh->identification;
636
637 first_len = skb_pagelen(skb);
638 skb->data_len = first_len - skb_headlen(skb);
639 skb->len = first_len;
640 ipv6_hdr(skb)->payload_len = htons(first_len -
641 sizeof(struct ipv6hdr));
642
643 dst_hold(&rt->dst);
644
645 for (;;) {
646 /* Prepare header of the next frame,
647 * before previous one went down. */
648 if (frag) {
649 frag->ip_summed = CHECKSUM_NONE;
650 skb_reset_transport_header(frag);
651 fh = (struct frag_hdr *)__skb_push(frag, sizeof(struct frag_hdr));
652 __skb_push(frag, hlen);
653 skb_reset_network_header(frag);
654 memcpy(skb_network_header(frag), tmp_hdr,
655 hlen);
656 offset += skb->len - hlen - sizeof(struct frag_hdr);
657 fh->nexthdr = nexthdr;
658 fh->reserved = 0;
659 fh->frag_off = htons(offset);
660 if (frag->next)
661 fh->frag_off |= htons(IP6_MF);
662 fh->identification = frag_id;
663 ipv6_hdr(frag)->payload_len =
664 htons(frag->len -
665 sizeof(struct ipv6hdr));
666 ip6_copy_metadata(frag, skb);
667 }
668
669 err = output(skb);
670 if (!err)
671 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
672 IPSTATS_MIB_FRAGCREATES);
673
674 if (err || !frag)
675 break;
676
677 skb = frag;
678 frag = skb->next;
679 skb->next = NULL;
680 }
681
682 kfree(tmp_hdr);
683
684 if (err == 0) {
685 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
686 IPSTATS_MIB_FRAGOKS);
687 ip6_rt_put(rt);
688 return 0;
689 }
690
691 kfree_skb_list(frag);
692
693 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
694 IPSTATS_MIB_FRAGFAILS);
695 ip6_rt_put(rt);
696 return err;
697
698 slow_path_clean:
699 skb_walk_frags(skb, frag2) {
700 if (frag2 == frag)
701 break;
702 frag2->sk = NULL;
703 frag2->destructor = NULL;
704 skb->truesize += frag2->truesize;
705 }
706 }
707
708 slow_path:
709 if ((skb->ip_summed == CHECKSUM_PARTIAL) &&
710 skb_checksum_help(skb))
711 goto fail;
712
713 left = skb->len - hlen; /* Space per frame */
714 ptr = hlen; /* Where to start from */
715
716 /*
717 * Fragment the datagram.
718 */
719
720 *prevhdr = NEXTHDR_FRAGMENT;
721 hroom = LL_RESERVED_SPACE(rt->dst.dev);
722 troom = rt->dst.dev->needed_tailroom;
723
724 /*
725 * Keep copying data until we run out.
726 */
727 while (left > 0) {
728 len = left;
729 /* IF: it doesn't fit, use 'mtu' - the data space left */
730 if (len > mtu)
731 len = mtu;
732 /* IF: we are not sending up to and including the packet end
733 then align the next start on an eight byte boundary */
734 if (len < left) {
735 len &= ~7;
736 }
737
738 /* Allocate buffer */
739 frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
740 hroom + troom, GFP_ATOMIC);
741 if (!frag) {
742 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
743 IPSTATS_MIB_FRAGFAILS);
744 err = -ENOMEM;
745 goto fail;
746 }
747
748 /*
749 * Set up data on packet
750 */
751
752 ip6_copy_metadata(frag, skb);
753 skb_reserve(frag, hroom);
754 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
755 skb_reset_network_header(frag);
756 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
757 frag->transport_header = (frag->network_header + hlen +
758 sizeof(struct frag_hdr));
759
760 /*
761 * Charge the memory for the fragment to any owner
762 * it might possess
763 */
764 if (skb->sk)
765 skb_set_owner_w(frag, skb->sk);
766
767 /*
768 * Copy the packet header into the new buffer.
769 */
770 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
771
772 /*
773 * Build fragment header.
774 */
775 fh->nexthdr = nexthdr;
776 fh->reserved = 0;
777 if (!frag_id) {
778 ipv6_select_ident(net, fh, rt);
779 frag_id = fh->identification;
780 } else
781 fh->identification = frag_id;
782
783 /*
784 * Copy a block of the IP datagram.
785 */
786 BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag),
787 len));
788 left -= len;
789
790 fh->frag_off = htons(offset);
791 if (left > 0)
792 fh->frag_off |= htons(IP6_MF);
793 ipv6_hdr(frag)->payload_len = htons(frag->len -
794 sizeof(struct ipv6hdr));
795
796 ptr += len;
797 offset += len;
798
799 /*
800 * Put this fragment into the sending queue.
801 */
802 err = output(frag);
803 if (err)
804 goto fail;
805
806 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
807 IPSTATS_MIB_FRAGCREATES);
808 }
809 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
810 IPSTATS_MIB_FRAGOKS);
811 consume_skb(skb);
812 return err;
813
814 fail:
815 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
816 IPSTATS_MIB_FRAGFAILS);
817 kfree_skb(skb);
818 return err;
819 }
820
821 static inline int ip6_rt_check(const struct rt6key *rt_key,
822 const struct in6_addr *fl_addr,
823 const struct in6_addr *addr_cache)
824 {
825 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
826 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
827 }
828
829 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
830 struct dst_entry *dst,
831 const struct flowi6 *fl6)
832 {
833 struct ipv6_pinfo *np = inet6_sk(sk);
834 struct rt6_info *rt;
835
836 if (!dst)
837 goto out;
838
839 if (dst->ops->family != AF_INET6) {
840 dst_release(dst);
841 return NULL;
842 }
843
844 rt = (struct rt6_info *)dst;
845 /* Yes, checking route validity in not connected
846 * case is not very simple. Take into account,
847 * that we do not support routing by source, TOS,
848 * and MSG_DONTROUTE --ANK (980726)
849 *
850 * 1. ip6_rt_check(): If route was host route,
851 * check that cached destination is current.
852 * If it is network route, we still may
853 * check its validity using saved pointer
854 * to the last used address: daddr_cache.
855 * We do not want to save whole address now,
856 * (because main consumer of this service
857 * is tcp, which has not this problem),
858 * so that the last trick works only on connected
859 * sockets.
860 * 2. oif also should be the same.
861 */
862 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
863 #ifdef CONFIG_IPV6_SUBTREES
864 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
865 #endif
866 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
867 dst_release(dst);
868 dst = NULL;
869 }
870
871 out:
872 return dst;
873 }
874
875 static int ip6_dst_lookup_tail(struct sock *sk,
876 struct dst_entry **dst, struct flowi6 *fl6)
877 {
878 struct net *net = sock_net(sk);
879 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
880 struct neighbour *n;
881 struct rt6_info *rt;
882 #endif
883 int err;
884
885 if (!*dst)
886 *dst = ip6_route_output(net, sk, fl6);
887
888 err = (*dst)->error;
889 if (err)
890 goto out_err_release;
891
892 if (ipv6_addr_any(&fl6->saddr)) {
893 struct rt6_info *rt = (struct rt6_info *) *dst;
894 err = ip6_route_get_saddr(net, rt, &fl6->daddr,
895 sk ? inet6_sk(sk)->srcprefs : 0,
896 &fl6->saddr);
897 if (err)
898 goto out_err_release;
899 }
900
901 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
902 /*
903 * Here if the dst entry we've looked up
904 * has a neighbour entry that is in the INCOMPLETE
905 * state and the src address from the flow is
906 * marked as OPTIMISTIC, we release the found
907 * dst entry and replace it instead with the
908 * dst entry of the nexthop router
909 */
910 rt = (struct rt6_info *) *dst;
911 rcu_read_lock_bh();
912 n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt));
913 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
914 rcu_read_unlock_bh();
915
916 if (err) {
917 struct inet6_ifaddr *ifp;
918 struct flowi6 fl_gw6;
919 int redirect;
920
921 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
922 (*dst)->dev, 1);
923
924 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
925 if (ifp)
926 in6_ifa_put(ifp);
927
928 if (redirect) {
929 /*
930 * We need to get the dst entry for the
931 * default router instead
932 */
933 dst_release(*dst);
934 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
935 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
936 *dst = ip6_route_output(net, sk, &fl_gw6);
937 err = (*dst)->error;
938 if (err)
939 goto out_err_release;
940 }
941 }
942 #endif
943
944 return 0;
945
946 out_err_release:
947 if (err == -ENETUNREACH)
948 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
949 dst_release(*dst);
950 *dst = NULL;
951 return err;
952 }
953
954 /**
955 * ip6_dst_lookup - perform route lookup on flow
956 * @sk: socket which provides route info
957 * @dst: pointer to dst_entry * for result
958 * @fl6: flow to lookup
959 *
960 * This function performs a route lookup on the given flow.
961 *
962 * It returns zero on success, or a standard errno code on error.
963 */
964 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6)
965 {
966 *dst = NULL;
967 return ip6_dst_lookup_tail(sk, dst, fl6);
968 }
969 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
970
971 /**
972 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
973 * @sk: socket which provides route info
974 * @fl6: flow to lookup
975 * @final_dst: final destination address for ipsec lookup
976 *
977 * This function performs a route lookup on the given flow.
978 *
979 * It returns a valid dst pointer on success, or a pointer encoded
980 * error code.
981 */
982 struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
983 const struct in6_addr *final_dst)
984 {
985 struct dst_entry *dst = NULL;
986 int err;
987
988 err = ip6_dst_lookup_tail(sk, &dst, fl6);
989 if (err)
990 return ERR_PTR(err);
991 if (final_dst)
992 fl6->daddr = *final_dst;
993
994 return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
995 }
996 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
997
998 /**
999 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1000 * @sk: socket which provides the dst cache and route info
1001 * @fl6: flow to lookup
1002 * @final_dst: final destination address for ipsec lookup
1003 *
1004 * This function performs a route lookup on the given flow with the
1005 * possibility of using the cached route in the socket if it is valid.
1006 * It will take the socket dst lock when operating on the dst cache.
1007 * As a result, this function can only be used in process context.
1008 *
1009 * It returns a valid dst pointer on success, or a pointer encoded
1010 * error code.
1011 */
1012 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1013 const struct in6_addr *final_dst)
1014 {
1015 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1016 int err;
1017
1018 dst = ip6_sk_dst_check(sk, dst, fl6);
1019
1020 err = ip6_dst_lookup_tail(sk, &dst, fl6);
1021 if (err)
1022 return ERR_PTR(err);
1023 if (final_dst)
1024 fl6->daddr = *final_dst;
1025
1026 return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1027 }
1028 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1029
1030 static inline int ip6_ufo_append_data(struct sock *sk,
1031 struct sk_buff_head *queue,
1032 int getfrag(void *from, char *to, int offset, int len,
1033 int odd, struct sk_buff *skb),
1034 void *from, int length, int hh_len, int fragheaderlen,
1035 int transhdrlen, int mtu, unsigned int flags,
1036 struct rt6_info *rt)
1037
1038 {
1039 struct sk_buff *skb;
1040 struct frag_hdr fhdr;
1041 int err;
1042
1043 /* There is support for UDP large send offload by network
1044 * device, so create one single skb packet containing complete
1045 * udp datagram
1046 */
1047 skb = skb_peek_tail(queue);
1048 if (!skb) {
1049 skb = sock_alloc_send_skb(sk,
1050 hh_len + fragheaderlen + transhdrlen + 20,
1051 (flags & MSG_DONTWAIT), &err);
1052 if (!skb)
1053 return err;
1054
1055 /* reserve space for Hardware header */
1056 skb_reserve(skb, hh_len);
1057
1058 /* create space for UDP/IP header */
1059 skb_put(skb, fragheaderlen + transhdrlen);
1060
1061 /* initialize network header pointer */
1062 skb_reset_network_header(skb);
1063
1064 /* initialize protocol header pointer */
1065 skb->transport_header = skb->network_header + fragheaderlen;
1066
1067 skb->protocol = htons(ETH_P_IPV6);
1068 skb->csum = 0;
1069
1070 __skb_queue_tail(queue, skb);
1071 } else if (skb_is_gso(skb)) {
1072 goto append;
1073 }
1074
1075 skb->ip_summed = CHECKSUM_PARTIAL;
1076 /* Specify the length of each IPv6 datagram fragment.
1077 * It has to be a multiple of 8.
1078 */
1079 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1080 sizeof(struct frag_hdr)) & ~7;
1081 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1082 ipv6_select_ident(sock_net(sk), &fhdr, rt);
1083 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1084
1085 append:
1086 return skb_append_datato_frags(sk, skb, getfrag, from,
1087 (length - transhdrlen));
1088 }
1089
1090 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1091 gfp_t gfp)
1092 {
1093 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1094 }
1095
1096 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1097 gfp_t gfp)
1098 {
1099 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1100 }
1101
1102 static void ip6_append_data_mtu(unsigned int *mtu,
1103 int *maxfraglen,
1104 unsigned int fragheaderlen,
1105 struct sk_buff *skb,
1106 struct rt6_info *rt,
1107 unsigned int orig_mtu)
1108 {
1109 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1110 if (!skb) {
1111 /* first fragment, reserve header_len */
1112 *mtu = orig_mtu - rt->dst.header_len;
1113
1114 } else {
1115 /*
1116 * this fragment is not first, the headers
1117 * space is regarded as data space.
1118 */
1119 *mtu = orig_mtu;
1120 }
1121 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1122 + fragheaderlen - sizeof(struct frag_hdr);
1123 }
1124 }
1125
1126 static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1127 struct inet6_cork *v6_cork,
1128 int hlimit, int tclass, struct ipv6_txoptions *opt,
1129 struct rt6_info *rt, struct flowi6 *fl6)
1130 {
1131 struct ipv6_pinfo *np = inet6_sk(sk);
1132 unsigned int mtu;
1133
1134 /*
1135 * setup for corking
1136 */
1137 if (opt) {
1138 if (WARN_ON(v6_cork->opt))
1139 return -EINVAL;
1140
1141 v6_cork->opt = kzalloc(opt->tot_len, sk->sk_allocation);
1142 if (unlikely(!v6_cork->opt))
1143 return -ENOBUFS;
1144
1145 v6_cork->opt->tot_len = opt->tot_len;
1146 v6_cork->opt->opt_flen = opt->opt_flen;
1147 v6_cork->opt->opt_nflen = opt->opt_nflen;
1148
1149 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1150 sk->sk_allocation);
1151 if (opt->dst0opt && !v6_cork->opt->dst0opt)
1152 return -ENOBUFS;
1153
1154 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1155 sk->sk_allocation);
1156 if (opt->dst1opt && !v6_cork->opt->dst1opt)
1157 return -ENOBUFS;
1158
1159 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1160 sk->sk_allocation);
1161 if (opt->hopopt && !v6_cork->opt->hopopt)
1162 return -ENOBUFS;
1163
1164 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1165 sk->sk_allocation);
1166 if (opt->srcrt && !v6_cork->opt->srcrt)
1167 return -ENOBUFS;
1168
1169 /* need source address above miyazawa*/
1170 }
1171 dst_hold(&rt->dst);
1172 cork->base.dst = &rt->dst;
1173 cork->fl.u.ip6 = *fl6;
1174 v6_cork->hop_limit = hlimit;
1175 v6_cork->tclass = tclass;
1176 if (rt->dst.flags & DST_XFRM_TUNNEL)
1177 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1178 rt->dst.dev->mtu : dst_mtu(&rt->dst);
1179 else
1180 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1181 rt->dst.dev->mtu : dst_mtu(rt->dst.path);
1182 if (np->frag_size < mtu) {
1183 if (np->frag_size)
1184 mtu = np->frag_size;
1185 }
1186 cork->base.fragsize = mtu;
1187 if (dst_allfrag(rt->dst.path))
1188 cork->base.flags |= IPCORK_ALLFRAG;
1189 cork->base.length = 0;
1190
1191 return 0;
1192 }
1193
1194 static int __ip6_append_data(struct sock *sk,
1195 struct flowi6 *fl6,
1196 struct sk_buff_head *queue,
1197 struct inet_cork *cork,
1198 struct inet6_cork *v6_cork,
1199 struct page_frag *pfrag,
1200 int getfrag(void *from, char *to, int offset,
1201 int len, int odd, struct sk_buff *skb),
1202 void *from, int length, int transhdrlen,
1203 unsigned int flags, int dontfrag)
1204 {
1205 struct sk_buff *skb, *skb_prev = NULL;
1206 unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu;
1207 int exthdrlen = 0;
1208 int dst_exthdrlen = 0;
1209 int hh_len;
1210 int copy;
1211 int err;
1212 int offset = 0;
1213 __u8 tx_flags = 0;
1214 u32 tskey = 0;
1215 struct rt6_info *rt = (struct rt6_info *)cork->dst;
1216 struct ipv6_txoptions *opt = v6_cork->opt;
1217 int csummode = CHECKSUM_NONE;
1218
1219 skb = skb_peek_tail(queue);
1220 if (!skb) {
1221 exthdrlen = opt ? opt->opt_flen : 0;
1222 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1223 }
1224
1225 mtu = cork->fragsize;
1226 orig_mtu = mtu;
1227
1228 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1229
1230 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1231 (opt ? opt->opt_nflen : 0);
1232 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1233 sizeof(struct frag_hdr);
1234
1235 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1236 unsigned int maxnonfragsize, headersize;
1237
1238 headersize = sizeof(struct ipv6hdr) +
1239 (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1240 (dst_allfrag(&rt->dst) ?
1241 sizeof(struct frag_hdr) : 0) +
1242 rt->rt6i_nfheader_len;
1243
1244 if (ip6_sk_ignore_df(sk))
1245 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1246 else
1247 maxnonfragsize = mtu;
1248
1249 /* dontfrag active */
1250 if ((cork->length + length > mtu - headersize) && dontfrag &&
1251 (sk->sk_protocol == IPPROTO_UDP ||
1252 sk->sk_protocol == IPPROTO_RAW)) {
1253 ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1254 sizeof(struct ipv6hdr));
1255 goto emsgsize;
1256 }
1257
1258 if (cork->length + length > maxnonfragsize - headersize) {
1259 emsgsize:
1260 ipv6_local_error(sk, EMSGSIZE, fl6,
1261 mtu - headersize +
1262 sizeof(struct ipv6hdr));
1263 return -EMSGSIZE;
1264 }
1265 }
1266
1267 if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) {
1268 sock_tx_timestamp(sk, &tx_flags);
1269 if (tx_flags & SKBTX_ANY_SW_TSTAMP &&
1270 sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1271 tskey = sk->sk_tskey++;
1272 }
1273
1274 /* If this is the first and only packet and device
1275 * supports checksum offloading, let's use it.
1276 */
1277 if (!skb && sk->sk_protocol == IPPROTO_UDP &&
1278 length + fragheaderlen < mtu &&
1279 rt->dst.dev->features & NETIF_F_V6_CSUM &&
1280 !exthdrlen)
1281 csummode = CHECKSUM_PARTIAL;
1282 /*
1283 * Let's try using as much space as possible.
1284 * Use MTU if total length of the message fits into the MTU.
1285 * Otherwise, we need to reserve fragment header and
1286 * fragment alignment (= 8-15 octects, in total).
1287 *
1288 * Note that we may need to "move" the data from the tail of
1289 * of the buffer to the new fragment when we split
1290 * the message.
1291 *
1292 * FIXME: It may be fragmented into multiple chunks
1293 * at once if non-fragmentable extension headers
1294 * are too large.
1295 * --yoshfuji
1296 */
1297
1298 cork->length += length;
1299 if (((length > mtu) ||
1300 (skb && skb_is_gso(skb))) &&
1301 (sk->sk_protocol == IPPROTO_UDP) &&
1302 (rt->dst.dev->features & NETIF_F_UFO) &&
1303 (sk->sk_type == SOCK_DGRAM)) {
1304 err = ip6_ufo_append_data(sk, queue, getfrag, from, length,
1305 hh_len, fragheaderlen,
1306 transhdrlen, mtu, flags, rt);
1307 if (err)
1308 goto error;
1309 return 0;
1310 }
1311
1312 if (!skb)
1313 goto alloc_new_skb;
1314
1315 while (length > 0) {
1316 /* Check if the remaining data fits into current packet. */
1317 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1318 if (copy < length)
1319 copy = maxfraglen - skb->len;
1320
1321 if (copy <= 0) {
1322 char *data;
1323 unsigned int datalen;
1324 unsigned int fraglen;
1325 unsigned int fraggap;
1326 unsigned int alloclen;
1327 alloc_new_skb:
1328 /* There's no room in the current skb */
1329 if (skb)
1330 fraggap = skb->len - maxfraglen;
1331 else
1332 fraggap = 0;
1333 /* update mtu and maxfraglen if necessary */
1334 if (!skb || !skb_prev)
1335 ip6_append_data_mtu(&mtu, &maxfraglen,
1336 fragheaderlen, skb, rt,
1337 orig_mtu);
1338
1339 skb_prev = skb;
1340
1341 /*
1342 * If remaining data exceeds the mtu,
1343 * we know we need more fragment(s).
1344 */
1345 datalen = length + fraggap;
1346
1347 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1348 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1349 if ((flags & MSG_MORE) &&
1350 !(rt->dst.dev->features&NETIF_F_SG))
1351 alloclen = mtu;
1352 else
1353 alloclen = datalen + fragheaderlen;
1354
1355 alloclen += dst_exthdrlen;
1356
1357 if (datalen != length + fraggap) {
1358 /*
1359 * this is not the last fragment, the trailer
1360 * space is regarded as data space.
1361 */
1362 datalen += rt->dst.trailer_len;
1363 }
1364
1365 alloclen += rt->dst.trailer_len;
1366 fraglen = datalen + fragheaderlen;
1367
1368 /*
1369 * We just reserve space for fragment header.
1370 * Note: this may be overallocation if the message
1371 * (without MSG_MORE) fits into the MTU.
1372 */
1373 alloclen += sizeof(struct frag_hdr);
1374
1375 if (transhdrlen) {
1376 skb = sock_alloc_send_skb(sk,
1377 alloclen + hh_len,
1378 (flags & MSG_DONTWAIT), &err);
1379 } else {
1380 skb = NULL;
1381 if (atomic_read(&sk->sk_wmem_alloc) <=
1382 2 * sk->sk_sndbuf)
1383 skb = sock_wmalloc(sk,
1384 alloclen + hh_len, 1,
1385 sk->sk_allocation);
1386 if (unlikely(!skb))
1387 err = -ENOBUFS;
1388 }
1389 if (!skb)
1390 goto error;
1391 /*
1392 * Fill in the control structures
1393 */
1394 skb->protocol = htons(ETH_P_IPV6);
1395 skb->ip_summed = csummode;
1396 skb->csum = 0;
1397 /* reserve for fragmentation and ipsec header */
1398 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1399 dst_exthdrlen);
1400
1401 /* Only the initial fragment is time stamped */
1402 skb_shinfo(skb)->tx_flags = tx_flags;
1403 tx_flags = 0;
1404 skb_shinfo(skb)->tskey = tskey;
1405 tskey = 0;
1406
1407 /*
1408 * Find where to start putting bytes
1409 */
1410 data = skb_put(skb, fraglen);
1411 skb_set_network_header(skb, exthdrlen);
1412 data += fragheaderlen;
1413 skb->transport_header = (skb->network_header +
1414 fragheaderlen);
1415 if (fraggap) {
1416 skb->csum = skb_copy_and_csum_bits(
1417 skb_prev, maxfraglen,
1418 data + transhdrlen, fraggap, 0);
1419 skb_prev->csum = csum_sub(skb_prev->csum,
1420 skb->csum);
1421 data += fraggap;
1422 pskb_trim_unique(skb_prev, maxfraglen);
1423 }
1424 copy = datalen - transhdrlen - fraggap;
1425
1426 if (copy < 0) {
1427 err = -EINVAL;
1428 kfree_skb(skb);
1429 goto error;
1430 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1431 err = -EFAULT;
1432 kfree_skb(skb);
1433 goto error;
1434 }
1435
1436 offset += copy;
1437 length -= datalen - fraggap;
1438 transhdrlen = 0;
1439 exthdrlen = 0;
1440 dst_exthdrlen = 0;
1441
1442 /*
1443 * Put the packet on the pending queue
1444 */
1445 __skb_queue_tail(queue, skb);
1446 continue;
1447 }
1448
1449 if (copy > length)
1450 copy = length;
1451
1452 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1453 unsigned int off;
1454
1455 off = skb->len;
1456 if (getfrag(from, skb_put(skb, copy),
1457 offset, copy, off, skb) < 0) {
1458 __skb_trim(skb, off);
1459 err = -EFAULT;
1460 goto error;
1461 }
1462 } else {
1463 int i = skb_shinfo(skb)->nr_frags;
1464
1465 err = -ENOMEM;
1466 if (!sk_page_frag_refill(sk, pfrag))
1467 goto error;
1468
1469 if (!skb_can_coalesce(skb, i, pfrag->page,
1470 pfrag->offset)) {
1471 err = -EMSGSIZE;
1472 if (i == MAX_SKB_FRAGS)
1473 goto error;
1474
1475 __skb_fill_page_desc(skb, i, pfrag->page,
1476 pfrag->offset, 0);
1477 skb_shinfo(skb)->nr_frags = ++i;
1478 get_page(pfrag->page);
1479 }
1480 copy = min_t(int, copy, pfrag->size - pfrag->offset);
1481 if (getfrag(from,
1482 page_address(pfrag->page) + pfrag->offset,
1483 offset, copy, skb->len, skb) < 0)
1484 goto error_efault;
1485
1486 pfrag->offset += copy;
1487 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1488 skb->len += copy;
1489 skb->data_len += copy;
1490 skb->truesize += copy;
1491 atomic_add(copy, &sk->sk_wmem_alloc);
1492 }
1493 offset += copy;
1494 length -= copy;
1495 }
1496
1497 return 0;
1498
1499 error_efault:
1500 err = -EFAULT;
1501 error:
1502 cork->length -= length;
1503 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1504 return err;
1505 }
1506
1507 int ip6_append_data(struct sock *sk,
1508 int getfrag(void *from, char *to, int offset, int len,
1509 int odd, struct sk_buff *skb),
1510 void *from, int length, int transhdrlen, int hlimit,
1511 int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
1512 struct rt6_info *rt, unsigned int flags, int dontfrag)
1513 {
1514 struct inet_sock *inet = inet_sk(sk);
1515 struct ipv6_pinfo *np = inet6_sk(sk);
1516 int exthdrlen;
1517 int err;
1518
1519 if (flags&MSG_PROBE)
1520 return 0;
1521 if (skb_queue_empty(&sk->sk_write_queue)) {
1522 /*
1523 * setup for corking
1524 */
1525 err = ip6_setup_cork(sk, &inet->cork, &np->cork, hlimit,
1526 tclass, opt, rt, fl6);
1527 if (err)
1528 return err;
1529
1530 exthdrlen = (opt ? opt->opt_flen : 0);
1531 length += exthdrlen;
1532 transhdrlen += exthdrlen;
1533 } else {
1534 fl6 = &inet->cork.fl.u.ip6;
1535 transhdrlen = 0;
1536 }
1537
1538 return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1539 &np->cork, sk_page_frag(sk), getfrag,
1540 from, length, transhdrlen, flags, dontfrag);
1541 }
1542 EXPORT_SYMBOL_GPL(ip6_append_data);
1543
1544 static void ip6_cork_release(struct inet_cork_full *cork,
1545 struct inet6_cork *v6_cork)
1546 {
1547 if (v6_cork->opt) {
1548 kfree(v6_cork->opt->dst0opt);
1549 kfree(v6_cork->opt->dst1opt);
1550 kfree(v6_cork->opt->hopopt);
1551 kfree(v6_cork->opt->srcrt);
1552 kfree(v6_cork->opt);
1553 v6_cork->opt = NULL;
1554 }
1555
1556 if (cork->base.dst) {
1557 dst_release(cork->base.dst);
1558 cork->base.dst = NULL;
1559 cork->base.flags &= ~IPCORK_ALLFRAG;
1560 }
1561 memset(&cork->fl, 0, sizeof(cork->fl));
1562 }
1563
1564 struct sk_buff *__ip6_make_skb(struct sock *sk,
1565 struct sk_buff_head *queue,
1566 struct inet_cork_full *cork,
1567 struct inet6_cork *v6_cork)
1568 {
1569 struct sk_buff *skb, *tmp_skb;
1570 struct sk_buff **tail_skb;
1571 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1572 struct ipv6_pinfo *np = inet6_sk(sk);
1573 struct net *net = sock_net(sk);
1574 struct ipv6hdr *hdr;
1575 struct ipv6_txoptions *opt = v6_cork->opt;
1576 struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1577 struct flowi6 *fl6 = &cork->fl.u.ip6;
1578 unsigned char proto = fl6->flowi6_proto;
1579
1580 skb = __skb_dequeue(queue);
1581 if (!skb)
1582 goto out;
1583 tail_skb = &(skb_shinfo(skb)->frag_list);
1584
1585 /* move skb->data to ip header from ext header */
1586 if (skb->data < skb_network_header(skb))
1587 __skb_pull(skb, skb_network_offset(skb));
1588 while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
1589 __skb_pull(tmp_skb, skb_network_header_len(skb));
1590 *tail_skb = tmp_skb;
1591 tail_skb = &(tmp_skb->next);
1592 skb->len += tmp_skb->len;
1593 skb->data_len += tmp_skb->len;
1594 skb->truesize += tmp_skb->truesize;
1595 tmp_skb->destructor = NULL;
1596 tmp_skb->sk = NULL;
1597 }
1598
1599 /* Allow local fragmentation. */
1600 skb->ignore_df = ip6_sk_ignore_df(sk);
1601
1602 *final_dst = fl6->daddr;
1603 __skb_pull(skb, skb_network_header_len(skb));
1604 if (opt && opt->opt_flen)
1605 ipv6_push_frag_opts(skb, opt, &proto);
1606 if (opt && opt->opt_nflen)
1607 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1608
1609 skb_push(skb, sizeof(struct ipv6hdr));
1610 skb_reset_network_header(skb);
1611 hdr = ipv6_hdr(skb);
1612
1613 ip6_flow_hdr(hdr, v6_cork->tclass,
1614 ip6_make_flowlabel(net, skb, fl6->flowlabel,
1615 np->autoflowlabel));
1616 hdr->hop_limit = v6_cork->hop_limit;
1617 hdr->nexthdr = proto;
1618 hdr->saddr = fl6->saddr;
1619 hdr->daddr = *final_dst;
1620
1621 skb->priority = sk->sk_priority;
1622 skb->mark = sk->sk_mark;
1623
1624 skb_dst_set(skb, dst_clone(&rt->dst));
1625 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1626 if (proto == IPPROTO_ICMPV6) {
1627 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1628
1629 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1630 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1631 }
1632
1633 ip6_cork_release(cork, v6_cork);
1634 out:
1635 return skb;
1636 }
1637
1638 int ip6_send_skb(struct sk_buff *skb)
1639 {
1640 struct net *net = sock_net(skb->sk);
1641 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1642 int err;
1643
1644 err = ip6_local_out(skb);
1645 if (err) {
1646 if (err > 0)
1647 err = net_xmit_errno(err);
1648 if (err)
1649 IP6_INC_STATS(net, rt->rt6i_idev,
1650 IPSTATS_MIB_OUTDISCARDS);
1651 }
1652
1653 return err;
1654 }
1655
1656 int ip6_push_pending_frames(struct sock *sk)
1657 {
1658 struct sk_buff *skb;
1659
1660 skb = ip6_finish_skb(sk);
1661 if (!skb)
1662 return 0;
1663
1664 return ip6_send_skb(skb);
1665 }
1666 EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1667
1668 static void __ip6_flush_pending_frames(struct sock *sk,
1669 struct sk_buff_head *queue,
1670 struct inet_cork_full *cork,
1671 struct inet6_cork *v6_cork)
1672 {
1673 struct sk_buff *skb;
1674
1675 while ((skb = __skb_dequeue_tail(queue)) != NULL) {
1676 if (skb_dst(skb))
1677 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1678 IPSTATS_MIB_OUTDISCARDS);
1679 kfree_skb(skb);
1680 }
1681
1682 ip6_cork_release(cork, v6_cork);
1683 }
1684
1685 void ip6_flush_pending_frames(struct sock *sk)
1686 {
1687 __ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1688 &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
1689 }
1690 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
1691
1692 struct sk_buff *ip6_make_skb(struct sock *sk,
1693 int getfrag(void *from, char *to, int offset,
1694 int len, int odd, struct sk_buff *skb),
1695 void *from, int length, int transhdrlen,
1696 int hlimit, int tclass,
1697 struct ipv6_txoptions *opt, struct flowi6 *fl6,
1698 struct rt6_info *rt, unsigned int flags,
1699 int dontfrag)
1700 {
1701 struct inet_cork_full cork;
1702 struct inet6_cork v6_cork;
1703 struct sk_buff_head queue;
1704 int exthdrlen = (opt ? opt->opt_flen : 0);
1705 int err;
1706
1707 if (flags & MSG_PROBE)
1708 return NULL;
1709
1710 __skb_queue_head_init(&queue);
1711
1712 cork.base.flags = 0;
1713 cork.base.addr = 0;
1714 cork.base.opt = NULL;
1715 v6_cork.opt = NULL;
1716 err = ip6_setup_cork(sk, &cork, &v6_cork, hlimit, tclass, opt, rt, fl6);
1717 if (err)
1718 return ERR_PTR(err);
1719
1720 if (dontfrag < 0)
1721 dontfrag = inet6_sk(sk)->dontfrag;
1722
1723 err = __ip6_append_data(sk, fl6, &queue, &cork.base, &v6_cork,
1724 &current->task_frag, getfrag, from,
1725 length + exthdrlen, transhdrlen + exthdrlen,
1726 flags, dontfrag);
1727 if (err) {
1728 __ip6_flush_pending_frames(sk, &queue, &cork, &v6_cork);
1729 return ERR_PTR(err);
1730 }
1731
1732 return __ip6_make_skb(sk, &queue, &cork, &v6_cork);
1733 }