]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - net/ipv6/ip6_output.c
Fix AGP compile on non-x86 architectures
[mirror_ubuntu-bionic-kernel.git] / net / ipv6 / ip6_output.c
CommitLineData
1da177e4
LT
1/*
2 * IPv6 output functions
3 * Linux INET6 implementation
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * $Id: ip6_output.c,v 1.34 2002/02/01 22:01:04 davem Exp $
9 *
10 * Based on linux/net/ipv4/ip_output.c
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 * Changes:
18 * A.N.Kuznetsov : airthmetics in fragmentation.
19 * extension headers are implemented.
20 * route changes now work.
21 * ip6_forward does not confuse sniffers.
22 * etc.
23 *
24 * H. von Brand : Added missing #include <linux/string.h>
25 * Imran Patel : frag id should be in NBO
26 * Kazunori MIYAZAWA @USAGI
27 * : add ip6_append_data and related functions
28 * for datagram xmit
29 */
30
31#include <linux/config.h>
32#include <linux/errno.h>
33#include <linux/types.h>
34#include <linux/string.h>
35#include <linux/socket.h>
36#include <linux/net.h>
37#include <linux/netdevice.h>
38#include <linux/if_arp.h>
39#include <linux/in6.h>
40#include <linux/tcp.h>
41#include <linux/route.h>
42
43#include <linux/netfilter.h>
44#include <linux/netfilter_ipv6.h>
45
46#include <net/sock.h>
47#include <net/snmp.h>
48
49#include <net/ipv6.h>
50#include <net/ndisc.h>
51#include <net/protocol.h>
52#include <net/ip6_route.h>
53#include <net/addrconf.h>
54#include <net/rawv6.h>
55#include <net/icmp.h>
56#include <net/xfrm.h>
57#include <net/checksum.h>
58
59static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
60
61static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
62{
63 static u32 ipv6_fragmentation_id = 1;
64 static DEFINE_SPINLOCK(ip6_id_lock);
65
66 spin_lock_bh(&ip6_id_lock);
67 fhdr->identification = htonl(ipv6_fragmentation_id);
68 if (++ipv6_fragmentation_id == 0)
69 ipv6_fragmentation_id = 1;
70 spin_unlock_bh(&ip6_id_lock);
71}
72
73static inline int ip6_output_finish(struct sk_buff *skb)
74{
75
76 struct dst_entry *dst = skb->dst;
77 struct hh_cache *hh = dst->hh;
78
79 if (hh) {
80 int hh_alen;
81
82 read_lock_bh(&hh->hh_lock);
83 hh_alen = HH_DATA_ALIGN(hh->hh_len);
84 memcpy(skb->data - hh_alen, hh->hh_data, hh_alen);
85 read_unlock_bh(&hh->hh_lock);
86 skb_push(skb, hh->hh_len);
87 return hh->hh_output(skb);
88 } else if (dst->neighbour)
89 return dst->neighbour->output(skb);
90
91 IP6_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
92 kfree_skb(skb);
93 return -EINVAL;
94
95}
96
97/* dev_loopback_xmit for use with netfilter. */
98static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
99{
100 newskb->mac.raw = newskb->data;
101 __skb_pull(newskb, newskb->nh.raw - newskb->data);
102 newskb->pkt_type = PACKET_LOOPBACK;
103 newskb->ip_summed = CHECKSUM_UNNECESSARY;
104 BUG_TRAP(newskb->dst);
105
106 netif_rx(newskb);
107 return 0;
108}
109
110
111static int ip6_output2(struct sk_buff *skb)
112{
113 struct dst_entry *dst = skb->dst;
114 struct net_device *dev = dst->dev;
115
116 skb->protocol = htons(ETH_P_IPV6);
117 skb->dev = dev;
118
119 if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr)) {
120 struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
121
122 if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
123 ipv6_chk_mcast_addr(dev, &skb->nh.ipv6h->daddr,
124 &skb->nh.ipv6h->saddr)) {
125 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
126
127 /* Do not check for IFF_ALLMULTI; multicast routing
128 is not supported in any case.
129 */
130 if (newskb)
131 NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, newskb, NULL,
132 newskb->dev,
133 ip6_dev_loopback_xmit);
134
135 if (skb->nh.ipv6h->hop_limit == 0) {
136 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
137 kfree_skb(skb);
138 return 0;
139 }
140 }
141
142 IP6_INC_STATS(IPSTATS_MIB_OUTMCASTPKTS);
143 }
144
145 return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish);
146}
147
148int ip6_output(struct sk_buff *skb)
149{
e89e9cf5
AR
150 if ((skb->len > dst_mtu(skb->dst) && !skb_shinfo(skb)->ufo_size) ||
151 dst_allfrag(skb->dst))
1da177e4
LT
152 return ip6_fragment(skb, ip6_output2);
153 else
154 return ip6_output2(skb);
155}
156
1da177e4
LT
157/*
158 * xmit an sk_buff (used by TCP)
159 */
160
161int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
162 struct ipv6_txoptions *opt, int ipfragok)
163{
164 struct ipv6_pinfo *np = sk ? inet6_sk(sk) : NULL;
165 struct in6_addr *first_hop = &fl->fl6_dst;
166 struct dst_entry *dst = skb->dst;
167 struct ipv6hdr *hdr;
168 u8 proto = fl->proto;
169 int seg_len = skb->len;
41a1f8ea 170 int hlimit, tclass;
1da177e4
LT
171 u32 mtu;
172
173 if (opt) {
174 int head_room;
175
176 /* First: exthdrs may take lots of space (~8K for now)
177 MAX_HEADER is not enough.
178 */
179 head_room = opt->opt_nflen + opt->opt_flen;
180 seg_len += head_room;
181 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
182
183 if (skb_headroom(skb) < head_room) {
184 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
185 kfree_skb(skb);
186 skb = skb2;
187 if (skb == NULL) {
188 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
189 return -ENOBUFS;
190 }
191 if (sk)
192 skb_set_owner_w(skb, sk);
193 }
194 if (opt->opt_flen)
195 ipv6_push_frag_opts(skb, opt, &proto);
196 if (opt->opt_nflen)
197 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
198 }
199
200 hdr = skb->nh.ipv6h = (struct ipv6hdr*)skb_push(skb, sizeof(struct ipv6hdr));
201
202 /*
203 * Fill in the IPv6 header
204 */
205
1da177e4
LT
206 hlimit = -1;
207 if (np)
208 hlimit = np->hop_limit;
209 if (hlimit < 0)
210 hlimit = dst_metric(dst, RTAX_HOPLIMIT);
211 if (hlimit < 0)
212 hlimit = ipv6_get_hoplimit(dst->dev);
213
41a1f8ea
YH
214 tclass = -1;
215 if (np)
216 tclass = np->tclass;
217 if (tclass < 0)
218 tclass = 0;
219
220 *(u32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
221
1da177e4
LT
222 hdr->payload_len = htons(seg_len);
223 hdr->nexthdr = proto;
224 hdr->hop_limit = hlimit;
225
226 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
227 ipv6_addr_copy(&hdr->daddr, first_hop);
228
229 mtu = dst_mtu(dst);
230 if ((skb->len <= mtu) || ipfragok) {
231 IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
6869c4d8
HW
232 return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev,
233 dst_output);
1da177e4
LT
234 }
235
236 if (net_ratelimit())
237 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
238 skb->dev = dst->dev;
239 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
240 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
241 kfree_skb(skb);
242 return -EMSGSIZE;
243}
244
245/*
246 * To avoid extra problems ND packets are send through this
247 * routine. It's code duplication but I really want to avoid
248 * extra checks since ipv6_build_header is used by TCP (which
249 * is for us performance critical)
250 */
251
252int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
253 struct in6_addr *saddr, struct in6_addr *daddr,
254 int proto, int len)
255{
256 struct ipv6_pinfo *np = inet6_sk(sk);
257 struct ipv6hdr *hdr;
258 int totlen;
259
260 skb->protocol = htons(ETH_P_IPV6);
261 skb->dev = dev;
262
263 totlen = len + sizeof(struct ipv6hdr);
264
265 hdr = (struct ipv6hdr *) skb_put(skb, sizeof(struct ipv6hdr));
266 skb->nh.ipv6h = hdr;
267
268 *(u32*)hdr = htonl(0x60000000);
269
270 hdr->payload_len = htons(len);
271 hdr->nexthdr = proto;
272 hdr->hop_limit = np->hop_limit;
273
274 ipv6_addr_copy(&hdr->saddr, saddr);
275 ipv6_addr_copy(&hdr->daddr, daddr);
276
277 return 0;
278}
279
280static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
281{
282 struct ip6_ra_chain *ra;
283 struct sock *last = NULL;
284
285 read_lock(&ip6_ra_lock);
286 for (ra = ip6_ra_chain; ra; ra = ra->next) {
287 struct sock *sk = ra->sk;
0bd1b59b
AM
288 if (sk && ra->sel == sel &&
289 (!sk->sk_bound_dev_if ||
290 sk->sk_bound_dev_if == skb->dev->ifindex)) {
1da177e4
LT
291 if (last) {
292 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
293 if (skb2)
294 rawv6_rcv(last, skb2);
295 }
296 last = sk;
297 }
298 }
299
300 if (last) {
301 rawv6_rcv(last, skb);
302 read_unlock(&ip6_ra_lock);
303 return 1;
304 }
305 read_unlock(&ip6_ra_lock);
306 return 0;
307}
308
309static inline int ip6_forward_finish(struct sk_buff *skb)
310{
311 return dst_output(skb);
312}
313
314int ip6_forward(struct sk_buff *skb)
315{
316 struct dst_entry *dst = skb->dst;
317 struct ipv6hdr *hdr = skb->nh.ipv6h;
318 struct inet6_skb_parm *opt = IP6CB(skb);
319
320 if (ipv6_devconf.forwarding == 0)
321 goto error;
322
323 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
324 IP6_INC_STATS(IPSTATS_MIB_INDISCARDS);
325 goto drop;
326 }
327
328 skb->ip_summed = CHECKSUM_NONE;
329
330 /*
331 * We DO NOT make any processing on
332 * RA packets, pushing them to user level AS IS
333 * without ane WARRANTY that application will be able
334 * to interpret them. The reason is that we
335 * cannot make anything clever here.
336 *
337 * We are not end-node, so that if packet contains
338 * AH/ESP, we cannot make anything.
339 * Defragmentation also would be mistake, RA packets
340 * cannot be fragmented, because there is no warranty
341 * that different fragments will go along one path. --ANK
342 */
343 if (opt->ra) {
344 u8 *ptr = skb->nh.raw + opt->ra;
345 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
346 return 0;
347 }
348
349 /*
350 * check and decrement ttl
351 */
352 if (hdr->hop_limit <= 1) {
353 /* Force OUTPUT device used as source address */
354 skb->dev = dst->dev;
355 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
356 0, skb->dev);
357
358 kfree_skb(skb);
359 return -ETIMEDOUT;
360 }
361
362 if (!xfrm6_route_forward(skb)) {
363 IP6_INC_STATS(IPSTATS_MIB_INDISCARDS);
364 goto drop;
365 }
366 dst = skb->dst;
367
368 /* IPv6 specs say nothing about it, but it is clear that we cannot
369 send redirects to source routed frames.
370 */
371 if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0) {
372 struct in6_addr *target = NULL;
373 struct rt6_info *rt;
374 struct neighbour *n = dst->neighbour;
375
376 /*
377 * incoming and outgoing devices are the same
378 * send a redirect.
379 */
380
381 rt = (struct rt6_info *) dst;
382 if ((rt->rt6i_flags & RTF_GATEWAY))
383 target = (struct in6_addr*)&n->primary_key;
384 else
385 target = &hdr->daddr;
386
387 /* Limit redirects both by destination (here)
388 and by source (inside ndisc_send_redirect)
389 */
390 if (xrlim_allow(dst, 1*HZ))
391 ndisc_send_redirect(skb, n, target);
392 } else if (ipv6_addr_type(&hdr->saddr)&(IPV6_ADDR_MULTICAST|IPV6_ADDR_LOOPBACK
393 |IPV6_ADDR_LINKLOCAL)) {
394 /* This check is security critical. */
395 goto error;
396 }
397
398 if (skb->len > dst_mtu(dst)) {
399 /* Again, force OUTPUT device used as source address */
400 skb->dev = dst->dev;
401 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
402 IP6_INC_STATS_BH(IPSTATS_MIB_INTOOBIGERRORS);
403 IP6_INC_STATS_BH(IPSTATS_MIB_FRAGFAILS);
404 kfree_skb(skb);
405 return -EMSGSIZE;
406 }
407
408 if (skb_cow(skb, dst->dev->hard_header_len)) {
409 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
410 goto drop;
411 }
412
413 hdr = skb->nh.ipv6h;
414
415 /* Mangling hops number delayed to point after skb COW */
416
417 hdr->hop_limit--;
418
419 IP6_INC_STATS_BH(IPSTATS_MIB_OUTFORWDATAGRAMS);
420 return NF_HOOK(PF_INET6,NF_IP6_FORWARD, skb, skb->dev, dst->dev, ip6_forward_finish);
421
422error:
423 IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS);
424drop:
425 kfree_skb(skb);
426 return -EINVAL;
427}
428
429static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
430{
431 to->pkt_type = from->pkt_type;
432 to->priority = from->priority;
433 to->protocol = from->protocol;
1da177e4
LT
434 dst_release(to->dst);
435 to->dst = dst_clone(from->dst);
436 to->dev = from->dev;
437
438#ifdef CONFIG_NET_SCHED
439 to->tc_index = from->tc_index;
440#endif
441#ifdef CONFIG_NETFILTER
442 to->nfmark = from->nfmark;
443 /* Connection association is same as pre-frag packet */
444 to->nfct = from->nfct;
445 nf_conntrack_get(to->nfct);
446 to->nfctinfo = from->nfctinfo;
447#ifdef CONFIG_BRIDGE_NETFILTER
448 nf_bridge_put(to->nf_bridge);
449 to->nf_bridge = from->nf_bridge;
450 nf_bridge_get(to->nf_bridge);
451#endif
1da177e4
LT
452#endif
453}
454
455int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
456{
457 u16 offset = sizeof(struct ipv6hdr);
458 struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1);
459 unsigned int packet_len = skb->tail - skb->nh.raw;
460 int found_rhdr = 0;
461 *nexthdr = &skb->nh.ipv6h->nexthdr;
462
463 while (offset + 1 <= packet_len) {
464
465 switch (**nexthdr) {
466
467 case NEXTHDR_HOP:
468 case NEXTHDR_ROUTING:
469 case NEXTHDR_DEST:
470 if (**nexthdr == NEXTHDR_ROUTING) found_rhdr = 1;
471 if (**nexthdr == NEXTHDR_DEST && found_rhdr) return offset;
472 offset += ipv6_optlen(exthdr);
473 *nexthdr = &exthdr->nexthdr;
474 exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
475 break;
476 default :
477 return offset;
478 }
479 }
480
481 return offset;
482}
483
484static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
485{
486 struct net_device *dev;
487 struct sk_buff *frag;
488 struct rt6_info *rt = (struct rt6_info*)skb->dst;
489 struct ipv6hdr *tmp_hdr;
490 struct frag_hdr *fh;
491 unsigned int mtu, hlen, left, len;
492 u32 frag_id = 0;
493 int ptr, offset = 0, err=0;
494 u8 *prevhdr, nexthdr = 0;
495
496 dev = rt->u.dst.dev;
497 hlen = ip6_find_1stfragopt(skb, &prevhdr);
498 nexthdr = *prevhdr;
499
500 mtu = dst_mtu(&rt->u.dst) - hlen - sizeof(struct frag_hdr);
501
502 if (skb_shinfo(skb)->frag_list) {
503 int first_len = skb_pagelen(skb);
504
505 if (first_len - hlen > mtu ||
506 ((first_len - hlen) & 7) ||
507 skb_cloned(skb))
508 goto slow_path;
509
510 for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
511 /* Correct geometry. */
512 if (frag->len > mtu ||
513 ((frag->len & 7) && frag->next) ||
514 skb_headroom(frag) < hlen)
515 goto slow_path;
516
1da177e4
LT
517 /* Partially cloned skb? */
518 if (skb_shared(frag))
519 goto slow_path;
2fdba6b0
HX
520
521 BUG_ON(frag->sk);
522 if (skb->sk) {
523 sock_hold(skb->sk);
524 frag->sk = skb->sk;
525 frag->destructor = sock_wfree;
526 skb->truesize -= frag->truesize;
527 }
1da177e4
LT
528 }
529
530 err = 0;
531 offset = 0;
532 frag = skb_shinfo(skb)->frag_list;
533 skb_shinfo(skb)->frag_list = NULL;
534 /* BUILD HEADER */
535
536 tmp_hdr = kmalloc(hlen, GFP_ATOMIC);
537 if (!tmp_hdr) {
538 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
539 return -ENOMEM;
540 }
541
542 *prevhdr = NEXTHDR_FRAGMENT;
543 memcpy(tmp_hdr, skb->nh.raw, hlen);
544 __skb_pull(skb, hlen);
545 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
546 skb->nh.raw = __skb_push(skb, hlen);
547 memcpy(skb->nh.raw, tmp_hdr, hlen);
548
549 ipv6_select_ident(skb, fh);
550 fh->nexthdr = nexthdr;
551 fh->reserved = 0;
552 fh->frag_off = htons(IP6_MF);
553 frag_id = fh->identification;
554
555 first_len = skb_pagelen(skb);
556 skb->data_len = first_len - skb_headlen(skb);
557 skb->len = first_len;
558 skb->nh.ipv6h->payload_len = htons(first_len - sizeof(struct ipv6hdr));
559
560
561 for (;;) {
562 /* Prepare header of the next frame,
563 * before previous one went down. */
564 if (frag) {
565 frag->ip_summed = CHECKSUM_NONE;
566 frag->h.raw = frag->data;
567 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
568 frag->nh.raw = __skb_push(frag, hlen);
569 memcpy(frag->nh.raw, tmp_hdr, hlen);
570 offset += skb->len - hlen - sizeof(struct frag_hdr);
571 fh->nexthdr = nexthdr;
572 fh->reserved = 0;
573 fh->frag_off = htons(offset);
574 if (frag->next != NULL)
575 fh->frag_off |= htons(IP6_MF);
576 fh->identification = frag_id;
577 frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
578 ip6_copy_metadata(frag, skb);
579 }
580
581 err = output(skb);
582 if (err || !frag)
583 break;
584
585 skb = frag;
586 frag = skb->next;
587 skb->next = NULL;
588 }
589
a51482bd 590 kfree(tmp_hdr);
1da177e4
LT
591
592 if (err == 0) {
593 IP6_INC_STATS(IPSTATS_MIB_FRAGOKS);
594 return 0;
595 }
596
597 while (frag) {
598 skb = frag->next;
599 kfree_skb(frag);
600 frag = skb;
601 }
602
603 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
604 return err;
605 }
606
607slow_path:
608 left = skb->len - hlen; /* Space per frame */
609 ptr = hlen; /* Where to start from */
610
611 /*
612 * Fragment the datagram.
613 */
614
615 *prevhdr = NEXTHDR_FRAGMENT;
616
617 /*
618 * Keep copying data until we run out.
619 */
620 while(left > 0) {
621 len = left;
622 /* IF: it doesn't fit, use 'mtu' - the data space left */
623 if (len > mtu)
624 len = mtu;
625 /* IF: we are not sending upto and including the packet end
626 then align the next start on an eight byte boundary */
627 if (len < left) {
628 len &= ~7;
629 }
630 /*
631 * Allocate buffer.
632 */
633
634 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
64ce2073 635 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
1da177e4
LT
636 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
637 err = -ENOMEM;
638 goto fail;
639 }
640
641 /*
642 * Set up data on packet
643 */
644
645 ip6_copy_metadata(frag, skb);
646 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
647 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
648 frag->nh.raw = frag->data;
649 fh = (struct frag_hdr*)(frag->data + hlen);
650 frag->h.raw = frag->data + hlen + sizeof(struct frag_hdr);
651
652 /*
653 * Charge the memory for the fragment to any owner
654 * it might possess
655 */
656 if (skb->sk)
657 skb_set_owner_w(frag, skb->sk);
658
659 /*
660 * Copy the packet header into the new buffer.
661 */
662 memcpy(frag->nh.raw, skb->data, hlen);
663
664 /*
665 * Build fragment header.
666 */
667 fh->nexthdr = nexthdr;
668 fh->reserved = 0;
f36d6ab1 669 if (!frag_id) {
1da177e4
LT
670 ipv6_select_ident(skb, fh);
671 frag_id = fh->identification;
672 } else
673 fh->identification = frag_id;
674
675 /*
676 * Copy a block of the IP datagram.
677 */
678 if (skb_copy_bits(skb, ptr, frag->h.raw, len))
679 BUG();
680 left -= len;
681
682 fh->frag_off = htons(offset);
683 if (left > 0)
684 fh->frag_off |= htons(IP6_MF);
685 frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
686
687 ptr += len;
688 offset += len;
689
690 /*
691 * Put this fragment into the sending queue.
692 */
693
694 IP6_INC_STATS(IPSTATS_MIB_FRAGCREATES);
695
696 err = output(frag);
697 if (err)
698 goto fail;
699 }
700 kfree_skb(skb);
701 IP6_INC_STATS(IPSTATS_MIB_FRAGOKS);
702 return err;
703
704fail:
705 kfree_skb(skb);
706 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
707 return err;
708}
709
710int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
711{
712 int err = 0;
713
714 *dst = NULL;
715 if (sk) {
716 struct ipv6_pinfo *np = inet6_sk(sk);
717
718 *dst = sk_dst_check(sk, np->dst_cookie);
719 if (*dst) {
720 struct rt6_info *rt = (struct rt6_info*)*dst;
721
722 /* Yes, checking route validity in not connected
723 case is not very simple. Take into account,
724 that we do not support routing by source, TOS,
725 and MSG_DONTROUTE --ANK (980726)
726
727 1. If route was host route, check that
728 cached destination is current.
729 If it is network route, we still may
730 check its validity using saved pointer
731 to the last used address: daddr_cache.
732 We do not want to save whole address now,
733 (because main consumer of this service
734 is tcp, which has not this problem),
735 so that the last trick works only on connected
736 sockets.
737 2. oif also should be the same.
738 */
739
740 if (((rt->rt6i_dst.plen != 128 ||
741 !ipv6_addr_equal(&fl->fl6_dst, &rt->rt6i_dst.addr))
742 && (np->daddr_cache == NULL ||
743 !ipv6_addr_equal(&fl->fl6_dst, np->daddr_cache)))
744 || (fl->oif && fl->oif != (*dst)->dev->ifindex)) {
745 dst_release(*dst);
746 *dst = NULL;
747 }
748 }
749 }
750
751 if (*dst == NULL)
752 *dst = ip6_route_output(sk, fl);
753
754 if ((err = (*dst)->error))
755 goto out_err_release;
756
757 if (ipv6_addr_any(&fl->fl6_src)) {
758 err = ipv6_get_saddr(*dst, &fl->fl6_dst, &fl->fl6_src);
759
44456d37 760 if (err)
1da177e4 761 goto out_err_release;
1da177e4
LT
762 }
763
764 return 0;
765
766out_err_release:
767 dst_release(*dst);
768 *dst = NULL;
769 return err;
770}
e89e9cf5
AR
771inline int ip6_ufo_append_data(struct sock *sk,
772 int getfrag(void *from, char *to, int offset, int len,
773 int odd, struct sk_buff *skb),
774 void *from, int length, int hh_len, int fragheaderlen,
775 int transhdrlen, int mtu,unsigned int flags)
776
777{
778 struct sk_buff *skb;
779 int err;
780
781 /* There is support for UDP large send offload by network
782 * device, so create one single skb packet containing complete
783 * udp datagram
784 */
785 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
786 skb = sock_alloc_send_skb(sk,
787 hh_len + fragheaderlen + transhdrlen + 20,
788 (flags & MSG_DONTWAIT), &err);
789 if (skb == NULL)
790 return -ENOMEM;
791
792 /* reserve space for Hardware header */
793 skb_reserve(skb, hh_len);
794
795 /* create space for UDP/IP header */
796 skb_put(skb,fragheaderlen + transhdrlen);
797
798 /* initialize network header pointer */
799 skb->nh.raw = skb->data;
800
801 /* initialize protocol header pointer */
802 skb->h.raw = skb->data + fragheaderlen;
803
804 skb->ip_summed = CHECKSUM_HW;
805 skb->csum = 0;
806 sk->sk_sndmsg_off = 0;
807 }
808
809 err = skb_append_datato_frags(sk,skb, getfrag, from,
810 (length - transhdrlen));
811 if (!err) {
812 struct frag_hdr fhdr;
813
814 /* specify the length of each IP datagram fragment*/
815 skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen) -
816 sizeof(struct frag_hdr);
817 ipv6_select_ident(skb, &fhdr);
818 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
819 __skb_queue_tail(&sk->sk_write_queue, skb);
820
821 return 0;
822 }
823 /* There is not enough support do UPD LSO,
824 * so follow normal path
825 */
826 kfree_skb(skb);
827
828 return err;
829}
1da177e4 830
41a1f8ea
YH
831int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
832 int offset, int len, int odd, struct sk_buff *skb),
833 void *from, int length, int transhdrlen,
834 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
835 struct rt6_info *rt, unsigned int flags)
1da177e4
LT
836{
837 struct inet_sock *inet = inet_sk(sk);
838 struct ipv6_pinfo *np = inet6_sk(sk);
839 struct sk_buff *skb;
840 unsigned int maxfraglen, fragheaderlen;
841 int exthdrlen;
842 int hh_len;
843 int mtu;
844 int copy;
845 int err;
846 int offset = 0;
847 int csummode = CHECKSUM_NONE;
848
849 if (flags&MSG_PROBE)
850 return 0;
851 if (skb_queue_empty(&sk->sk_write_queue)) {
852 /*
853 * setup for corking
854 */
855 if (opt) {
856 if (np->cork.opt == NULL) {
857 np->cork.opt = kmalloc(opt->tot_len,
858 sk->sk_allocation);
859 if (unlikely(np->cork.opt == NULL))
860 return -ENOBUFS;
861 } else if (np->cork.opt->tot_len < opt->tot_len) {
862 printk(KERN_DEBUG "ip6_append_data: invalid option length\n");
863 return -EINVAL;
864 }
865 memcpy(np->cork.opt, opt, opt->tot_len);
866 inet->cork.flags |= IPCORK_OPT;
867 /* need source address above miyazawa*/
868 }
869 dst_hold(&rt->u.dst);
870 np->cork.rt = rt;
871 inet->cork.fl = *fl;
872 np->cork.hop_limit = hlimit;
41a1f8ea 873 np->cork.tclass = tclass;
1da177e4
LT
874 inet->cork.fragsize = mtu = dst_mtu(rt->u.dst.path);
875 if (dst_allfrag(rt->u.dst.path))
876 inet->cork.flags |= IPCORK_ALLFRAG;
877 inet->cork.length = 0;
878 sk->sk_sndmsg_page = NULL;
879 sk->sk_sndmsg_off = 0;
880 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0);
881 length += exthdrlen;
882 transhdrlen += exthdrlen;
883 } else {
884 rt = np->cork.rt;
885 fl = &inet->cork.fl;
886 if (inet->cork.flags & IPCORK_OPT)
887 opt = np->cork.opt;
888 transhdrlen = 0;
889 exthdrlen = 0;
890 mtu = inet->cork.fragsize;
891 }
892
893 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
894
895 fragheaderlen = sizeof(struct ipv6hdr) + (opt ? opt->opt_nflen : 0);
896 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
897
898 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
899 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
900 ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
901 return -EMSGSIZE;
902 }
903 }
904
905 /*
906 * Let's try using as much space as possible.
907 * Use MTU if total length of the message fits into the MTU.
908 * Otherwise, we need to reserve fragment header and
909 * fragment alignment (= 8-15 octects, in total).
910 *
911 * Note that we may need to "move" the data from the tail of
912 * of the buffer to the new fragment when we split
913 * the message.
914 *
915 * FIXME: It may be fragmented into multiple chunks
916 * at once if non-fragmentable extension headers
917 * are too large.
918 * --yoshfuji
919 */
920
921 inet->cork.length += length;
e89e9cf5
AR
922 if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
923 (rt->u.dst.dev->features & NETIF_F_UFO)) {
924
925 if(ip6_ufo_append_data(sk, getfrag, from, length, hh_len,
926 fragheaderlen, transhdrlen, mtu, flags))
927 goto error;
928
929 return 0;
930 }
1da177e4
LT
931
932 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
933 goto alloc_new_skb;
934
935 while (length > 0) {
936 /* Check if the remaining data fits into current packet. */
937 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
938 if (copy < length)
939 copy = maxfraglen - skb->len;
940
941 if (copy <= 0) {
942 char *data;
943 unsigned int datalen;
944 unsigned int fraglen;
945 unsigned int fraggap;
946 unsigned int alloclen;
947 struct sk_buff *skb_prev;
948alloc_new_skb:
949 skb_prev = skb;
950
951 /* There's no room in the current skb */
952 if (skb_prev)
953 fraggap = skb_prev->len - maxfraglen;
954 else
955 fraggap = 0;
956
957 /*
958 * If remaining data exceeds the mtu,
959 * we know we need more fragment(s).
960 */
961 datalen = length + fraggap;
962 if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
963 datalen = maxfraglen - fragheaderlen;
964
965 fraglen = datalen + fragheaderlen;
966 if ((flags & MSG_MORE) &&
967 !(rt->u.dst.dev->features&NETIF_F_SG))
968 alloclen = mtu;
969 else
970 alloclen = datalen + fragheaderlen;
971
972 /*
973 * The last fragment gets additional space at tail.
974 * Note: we overallocate on fragments with MSG_MODE
975 * because we have no idea if we're the last one.
976 */
977 if (datalen == length + fraggap)
978 alloclen += rt->u.dst.trailer_len;
979
980 /*
981 * We just reserve space for fragment header.
982 * Note: this may be overallocation if the message
983 * (without MSG_MORE) fits into the MTU.
984 */
985 alloclen += sizeof(struct frag_hdr);
986
987 if (transhdrlen) {
988 skb = sock_alloc_send_skb(sk,
989 alloclen + hh_len,
990 (flags & MSG_DONTWAIT), &err);
991 } else {
992 skb = NULL;
993 if (atomic_read(&sk->sk_wmem_alloc) <=
994 2 * sk->sk_sndbuf)
995 skb = sock_wmalloc(sk,
996 alloclen + hh_len, 1,
997 sk->sk_allocation);
998 if (unlikely(skb == NULL))
999 err = -ENOBUFS;
1000 }
1001 if (skb == NULL)
1002 goto error;
1003 /*
1004 * Fill in the control structures
1005 */
1006 skb->ip_summed = csummode;
1007 skb->csum = 0;
1008 /* reserve for fragmentation */
1009 skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1010
1011 /*
1012 * Find where to start putting bytes
1013 */
1014 data = skb_put(skb, fraglen);
1015 skb->nh.raw = data + exthdrlen;
1016 data += fragheaderlen;
1017 skb->h.raw = data + exthdrlen;
1018
1019 if (fraggap) {
1020 skb->csum = skb_copy_and_csum_bits(
1021 skb_prev, maxfraglen,
1022 data + transhdrlen, fraggap, 0);
1023 skb_prev->csum = csum_sub(skb_prev->csum,
1024 skb->csum);
1025 data += fraggap;
1026 skb_trim(skb_prev, maxfraglen);
1027 }
1028 copy = datalen - transhdrlen - fraggap;
1029 if (copy < 0) {
1030 err = -EINVAL;
1031 kfree_skb(skb);
1032 goto error;
1033 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1034 err = -EFAULT;
1035 kfree_skb(skb);
1036 goto error;
1037 }
1038
1039 offset += copy;
1040 length -= datalen - fraggap;
1041 transhdrlen = 0;
1042 exthdrlen = 0;
1043 csummode = CHECKSUM_NONE;
1044
1045 /*
1046 * Put the packet on the pending queue
1047 */
1048 __skb_queue_tail(&sk->sk_write_queue, skb);
1049 continue;
1050 }
1051
1052 if (copy > length)
1053 copy = length;
1054
1055 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1056 unsigned int off;
1057
1058 off = skb->len;
1059 if (getfrag(from, skb_put(skb, copy),
1060 offset, copy, off, skb) < 0) {
1061 __skb_trim(skb, off);
1062 err = -EFAULT;
1063 goto error;
1064 }
1065 } else {
1066 int i = skb_shinfo(skb)->nr_frags;
1067 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1068 struct page *page = sk->sk_sndmsg_page;
1069 int off = sk->sk_sndmsg_off;
1070 unsigned int left;
1071
1072 if (page && (left = PAGE_SIZE - off) > 0) {
1073 if (copy >= left)
1074 copy = left;
1075 if (page != frag->page) {
1076 if (i == MAX_SKB_FRAGS) {
1077 err = -EMSGSIZE;
1078 goto error;
1079 }
1080 get_page(page);
1081 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1082 frag = &skb_shinfo(skb)->frags[i];
1083 }
1084 } else if(i < MAX_SKB_FRAGS) {
1085 if (copy > PAGE_SIZE)
1086 copy = PAGE_SIZE;
1087 page = alloc_pages(sk->sk_allocation, 0);
1088 if (page == NULL) {
1089 err = -ENOMEM;
1090 goto error;
1091 }
1092 sk->sk_sndmsg_page = page;
1093 sk->sk_sndmsg_off = 0;
1094
1095 skb_fill_page_desc(skb, i, page, 0, 0);
1096 frag = &skb_shinfo(skb)->frags[i];
1097 skb->truesize += PAGE_SIZE;
1098 atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc);
1099 } else {
1100 err = -EMSGSIZE;
1101 goto error;
1102 }
1103 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1104 err = -EFAULT;
1105 goto error;
1106 }
1107 sk->sk_sndmsg_off += copy;
1108 frag->size += copy;
1109 skb->len += copy;
1110 skb->data_len += copy;
1111 }
1112 offset += copy;
1113 length -= copy;
1114 }
1115 return 0;
1116error:
1117 inet->cork.length -= length;
1118 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1119 return err;
1120}
1121
1122int ip6_push_pending_frames(struct sock *sk)
1123{
1124 struct sk_buff *skb, *tmp_skb;
1125 struct sk_buff **tail_skb;
1126 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1127 struct inet_sock *inet = inet_sk(sk);
1128 struct ipv6_pinfo *np = inet6_sk(sk);
1129 struct ipv6hdr *hdr;
1130 struct ipv6_txoptions *opt = np->cork.opt;
1131 struct rt6_info *rt = np->cork.rt;
1132 struct flowi *fl = &inet->cork.fl;
1133 unsigned char proto = fl->proto;
1134 int err = 0;
1135
1136 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1137 goto out;
1138 tail_skb = &(skb_shinfo(skb)->frag_list);
1139
1140 /* move skb->data to ip header from ext header */
1141 if (skb->data < skb->nh.raw)
1142 __skb_pull(skb, skb->nh.raw - skb->data);
1143 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1144 __skb_pull(tmp_skb, skb->h.raw - skb->nh.raw);
1145 *tail_skb = tmp_skb;
1146 tail_skb = &(tmp_skb->next);
1147 skb->len += tmp_skb->len;
1148 skb->data_len += tmp_skb->len;
1da177e4
LT
1149 skb->truesize += tmp_skb->truesize;
1150 __sock_put(tmp_skb->sk);
1151 tmp_skb->destructor = NULL;
1152 tmp_skb->sk = NULL;
1da177e4
LT
1153 }
1154
1155 ipv6_addr_copy(final_dst, &fl->fl6_dst);
1156 __skb_pull(skb, skb->h.raw - skb->nh.raw);
1157 if (opt && opt->opt_flen)
1158 ipv6_push_frag_opts(skb, opt, &proto);
1159 if (opt && opt->opt_nflen)
1160 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1161
1162 skb->nh.ipv6h = hdr = (struct ipv6hdr*) skb_push(skb, sizeof(struct ipv6hdr));
1163
41a1f8ea
YH
1164 *(u32*)hdr = fl->fl6_flowlabel |
1165 htonl(0x60000000 | ((int)np->cork.tclass << 20));
1da177e4
LT
1166
1167 if (skb->len <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN)
1168 hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
1169 else
1170 hdr->payload_len = 0;
1171 hdr->hop_limit = np->cork.hop_limit;
1172 hdr->nexthdr = proto;
1173 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1174 ipv6_addr_copy(&hdr->daddr, final_dst);
1175
1176 skb->dst = dst_clone(&rt->u.dst);
1177 IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
1178 err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output);
1179 if (err) {
1180 if (err > 0)
3320da89 1181 err = np->recverr ? net_xmit_errno(err) : 0;
1da177e4
LT
1182 if (err)
1183 goto error;
1184 }
1185
1186out:
1187 inet->cork.flags &= ~IPCORK_OPT;
a51482bd
JJ
1188 kfree(np->cork.opt);
1189 np->cork.opt = NULL;
1da177e4
LT
1190 if (np->cork.rt) {
1191 dst_release(&np->cork.rt->u.dst);
1192 np->cork.rt = NULL;
1193 inet->cork.flags &= ~IPCORK_ALLFRAG;
1194 }
1195 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1196 return err;
1197error:
1198 goto out;
1199}
1200
1201void ip6_flush_pending_frames(struct sock *sk)
1202{
1203 struct inet_sock *inet = inet_sk(sk);
1204 struct ipv6_pinfo *np = inet6_sk(sk);
1205 struct sk_buff *skb;
1206
1207 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1208 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1209 kfree_skb(skb);
1210 }
1211
1212 inet->cork.flags &= ~IPCORK_OPT;
1213
a51482bd
JJ
1214 kfree(np->cork.opt);
1215 np->cork.opt = NULL;
1da177e4
LT
1216 if (np->cork.rt) {
1217 dst_release(&np->cork.rt->u.dst);
1218 np->cork.rt = NULL;
1219 inet->cork.flags &= ~IPCORK_ALLFRAG;
1220 }
1221 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1222}