]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blame - net/ipv6/ip6_output.c
[NETFILTER]: Add "nfnetlink_queue" netfilter queue handler over nfnetlink
[mirror_ubuntu-zesty-kernel.git] / net / ipv6 / ip6_output.c
CommitLineData
1da177e4
LT
1/*
2 * IPv6 output functions
3 * Linux INET6 implementation
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * $Id: ip6_output.c,v 1.34 2002/02/01 22:01:04 davem Exp $
9 *
10 * Based on linux/net/ipv4/ip_output.c
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 * Changes:
18 * A.N.Kuznetsov : airthmetics in fragmentation.
19 * extension headers are implemented.
20 * route changes now work.
21 * ip6_forward does not confuse sniffers.
22 * etc.
23 *
24 * H. von Brand : Added missing #include <linux/string.h>
25 * Imran Patel : frag id should be in NBO
26 * Kazunori MIYAZAWA @USAGI
27 * : add ip6_append_data and related functions
28 * for datagram xmit
29 */
30
31#include <linux/config.h>
32#include <linux/errno.h>
33#include <linux/types.h>
34#include <linux/string.h>
35#include <linux/socket.h>
36#include <linux/net.h>
37#include <linux/netdevice.h>
38#include <linux/if_arp.h>
39#include <linux/in6.h>
40#include <linux/tcp.h>
41#include <linux/route.h>
42
43#include <linux/netfilter.h>
44#include <linux/netfilter_ipv6.h>
45
46#include <net/sock.h>
47#include <net/snmp.h>
48
49#include <net/ipv6.h>
50#include <net/ndisc.h>
51#include <net/protocol.h>
52#include <net/ip6_route.h>
53#include <net/addrconf.h>
54#include <net/rawv6.h>
55#include <net/icmp.h>
56#include <net/xfrm.h>
57#include <net/checksum.h>
58
59static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
60
61static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
62{
63 static u32 ipv6_fragmentation_id = 1;
64 static DEFINE_SPINLOCK(ip6_id_lock);
65
66 spin_lock_bh(&ip6_id_lock);
67 fhdr->identification = htonl(ipv6_fragmentation_id);
68 if (++ipv6_fragmentation_id == 0)
69 ipv6_fragmentation_id = 1;
70 spin_unlock_bh(&ip6_id_lock);
71}
72
73static inline int ip6_output_finish(struct sk_buff *skb)
74{
75
76 struct dst_entry *dst = skb->dst;
77 struct hh_cache *hh = dst->hh;
78
79 if (hh) {
80 int hh_alen;
81
82 read_lock_bh(&hh->hh_lock);
83 hh_alen = HH_DATA_ALIGN(hh->hh_len);
84 memcpy(skb->data - hh_alen, hh->hh_data, hh_alen);
85 read_unlock_bh(&hh->hh_lock);
86 skb_push(skb, hh->hh_len);
87 return hh->hh_output(skb);
88 } else if (dst->neighbour)
89 return dst->neighbour->output(skb);
90
91 IP6_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
92 kfree_skb(skb);
93 return -EINVAL;
94
95}
96
97/* dev_loopback_xmit for use with netfilter. */
98static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
99{
100 newskb->mac.raw = newskb->data;
101 __skb_pull(newskb, newskb->nh.raw - newskb->data);
102 newskb->pkt_type = PACKET_LOOPBACK;
103 newskb->ip_summed = CHECKSUM_UNNECESSARY;
104 BUG_TRAP(newskb->dst);
105
106 netif_rx(newskb);
107 return 0;
108}
109
110
111static int ip6_output2(struct sk_buff *skb)
112{
113 struct dst_entry *dst = skb->dst;
114 struct net_device *dev = dst->dev;
115
116 skb->protocol = htons(ETH_P_IPV6);
117 skb->dev = dev;
118
119 if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr)) {
120 struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
121
122 if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
123 ipv6_chk_mcast_addr(dev, &skb->nh.ipv6h->daddr,
124 &skb->nh.ipv6h->saddr)) {
125 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
126
127 /* Do not check for IFF_ALLMULTI; multicast routing
128 is not supported in any case.
129 */
130 if (newskb)
131 NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, newskb, NULL,
132 newskb->dev,
133 ip6_dev_loopback_xmit);
134
135 if (skb->nh.ipv6h->hop_limit == 0) {
136 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
137 kfree_skb(skb);
138 return 0;
139 }
140 }
141
142 IP6_INC_STATS(IPSTATS_MIB_OUTMCASTPKTS);
143 }
144
145 return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish);
146}
147
148int ip6_output(struct sk_buff *skb)
149{
150 if (skb->len > dst_mtu(skb->dst) || dst_allfrag(skb->dst))
151 return ip6_fragment(skb, ip6_output2);
152 else
153 return ip6_output2(skb);
154}
155
1da177e4
LT
156/*
157 * xmit an sk_buff (used by TCP)
158 */
159
160int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
161 struct ipv6_txoptions *opt, int ipfragok)
162{
163 struct ipv6_pinfo *np = sk ? inet6_sk(sk) : NULL;
164 struct in6_addr *first_hop = &fl->fl6_dst;
165 struct dst_entry *dst = skb->dst;
166 struct ipv6hdr *hdr;
167 u8 proto = fl->proto;
168 int seg_len = skb->len;
169 int hlimit;
170 u32 mtu;
171
172 if (opt) {
173 int head_room;
174
175 /* First: exthdrs may take lots of space (~8K for now)
176 MAX_HEADER is not enough.
177 */
178 head_room = opt->opt_nflen + opt->opt_flen;
179 seg_len += head_room;
180 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
181
182 if (skb_headroom(skb) < head_room) {
183 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
184 kfree_skb(skb);
185 skb = skb2;
186 if (skb == NULL) {
187 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
188 return -ENOBUFS;
189 }
190 if (sk)
191 skb_set_owner_w(skb, sk);
192 }
193 if (opt->opt_flen)
194 ipv6_push_frag_opts(skb, opt, &proto);
195 if (opt->opt_nflen)
196 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
197 }
198
199 hdr = skb->nh.ipv6h = (struct ipv6hdr*)skb_push(skb, sizeof(struct ipv6hdr));
200
201 /*
202 * Fill in the IPv6 header
203 */
204
205 *(u32*)hdr = htonl(0x60000000) | fl->fl6_flowlabel;
206 hlimit = -1;
207 if (np)
208 hlimit = np->hop_limit;
209 if (hlimit < 0)
210 hlimit = dst_metric(dst, RTAX_HOPLIMIT);
211 if (hlimit < 0)
212 hlimit = ipv6_get_hoplimit(dst->dev);
213
214 hdr->payload_len = htons(seg_len);
215 hdr->nexthdr = proto;
216 hdr->hop_limit = hlimit;
217
218 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
219 ipv6_addr_copy(&hdr->daddr, first_hop);
220
221 mtu = dst_mtu(dst);
222 if ((skb->len <= mtu) || ipfragok) {
223 IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
6869c4d8
HW
224 return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev,
225 dst_output);
1da177e4
LT
226 }
227
228 if (net_ratelimit())
229 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
230 skb->dev = dst->dev;
231 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
232 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
233 kfree_skb(skb);
234 return -EMSGSIZE;
235}
236
237/*
238 * To avoid extra problems ND packets are send through this
239 * routine. It's code duplication but I really want to avoid
240 * extra checks since ipv6_build_header is used by TCP (which
241 * is for us performance critical)
242 */
243
244int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
245 struct in6_addr *saddr, struct in6_addr *daddr,
246 int proto, int len)
247{
248 struct ipv6_pinfo *np = inet6_sk(sk);
249 struct ipv6hdr *hdr;
250 int totlen;
251
252 skb->protocol = htons(ETH_P_IPV6);
253 skb->dev = dev;
254
255 totlen = len + sizeof(struct ipv6hdr);
256
257 hdr = (struct ipv6hdr *) skb_put(skb, sizeof(struct ipv6hdr));
258 skb->nh.ipv6h = hdr;
259
260 *(u32*)hdr = htonl(0x60000000);
261
262 hdr->payload_len = htons(len);
263 hdr->nexthdr = proto;
264 hdr->hop_limit = np->hop_limit;
265
266 ipv6_addr_copy(&hdr->saddr, saddr);
267 ipv6_addr_copy(&hdr->daddr, daddr);
268
269 return 0;
270}
271
272static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
273{
274 struct ip6_ra_chain *ra;
275 struct sock *last = NULL;
276
277 read_lock(&ip6_ra_lock);
278 for (ra = ip6_ra_chain; ra; ra = ra->next) {
279 struct sock *sk = ra->sk;
280 if (sk && ra->sel == sel) {
281 if (last) {
282 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
283 if (skb2)
284 rawv6_rcv(last, skb2);
285 }
286 last = sk;
287 }
288 }
289
290 if (last) {
291 rawv6_rcv(last, skb);
292 read_unlock(&ip6_ra_lock);
293 return 1;
294 }
295 read_unlock(&ip6_ra_lock);
296 return 0;
297}
298
299static inline int ip6_forward_finish(struct sk_buff *skb)
300{
301 return dst_output(skb);
302}
303
304int ip6_forward(struct sk_buff *skb)
305{
306 struct dst_entry *dst = skb->dst;
307 struct ipv6hdr *hdr = skb->nh.ipv6h;
308 struct inet6_skb_parm *opt = IP6CB(skb);
309
310 if (ipv6_devconf.forwarding == 0)
311 goto error;
312
313 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
314 IP6_INC_STATS(IPSTATS_MIB_INDISCARDS);
315 goto drop;
316 }
317
318 skb->ip_summed = CHECKSUM_NONE;
319
320 /*
321 * We DO NOT make any processing on
322 * RA packets, pushing them to user level AS IS
323 * without ane WARRANTY that application will be able
324 * to interpret them. The reason is that we
325 * cannot make anything clever here.
326 *
327 * We are not end-node, so that if packet contains
328 * AH/ESP, we cannot make anything.
329 * Defragmentation also would be mistake, RA packets
330 * cannot be fragmented, because there is no warranty
331 * that different fragments will go along one path. --ANK
332 */
333 if (opt->ra) {
334 u8 *ptr = skb->nh.raw + opt->ra;
335 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
336 return 0;
337 }
338
339 /*
340 * check and decrement ttl
341 */
342 if (hdr->hop_limit <= 1) {
343 /* Force OUTPUT device used as source address */
344 skb->dev = dst->dev;
345 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
346 0, skb->dev);
347
348 kfree_skb(skb);
349 return -ETIMEDOUT;
350 }
351
352 if (!xfrm6_route_forward(skb)) {
353 IP6_INC_STATS(IPSTATS_MIB_INDISCARDS);
354 goto drop;
355 }
356 dst = skb->dst;
357
358 /* IPv6 specs say nothing about it, but it is clear that we cannot
359 send redirects to source routed frames.
360 */
361 if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0) {
362 struct in6_addr *target = NULL;
363 struct rt6_info *rt;
364 struct neighbour *n = dst->neighbour;
365
366 /*
367 * incoming and outgoing devices are the same
368 * send a redirect.
369 */
370
371 rt = (struct rt6_info *) dst;
372 if ((rt->rt6i_flags & RTF_GATEWAY))
373 target = (struct in6_addr*)&n->primary_key;
374 else
375 target = &hdr->daddr;
376
377 /* Limit redirects both by destination (here)
378 and by source (inside ndisc_send_redirect)
379 */
380 if (xrlim_allow(dst, 1*HZ))
381 ndisc_send_redirect(skb, n, target);
382 } else if (ipv6_addr_type(&hdr->saddr)&(IPV6_ADDR_MULTICAST|IPV6_ADDR_LOOPBACK
383 |IPV6_ADDR_LINKLOCAL)) {
384 /* This check is security critical. */
385 goto error;
386 }
387
388 if (skb->len > dst_mtu(dst)) {
389 /* Again, force OUTPUT device used as source address */
390 skb->dev = dst->dev;
391 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
392 IP6_INC_STATS_BH(IPSTATS_MIB_INTOOBIGERRORS);
393 IP6_INC_STATS_BH(IPSTATS_MIB_FRAGFAILS);
394 kfree_skb(skb);
395 return -EMSGSIZE;
396 }
397
398 if (skb_cow(skb, dst->dev->hard_header_len)) {
399 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
400 goto drop;
401 }
402
403 hdr = skb->nh.ipv6h;
404
405 /* Mangling hops number delayed to point after skb COW */
406
407 hdr->hop_limit--;
408
409 IP6_INC_STATS_BH(IPSTATS_MIB_OUTFORWDATAGRAMS);
410 return NF_HOOK(PF_INET6,NF_IP6_FORWARD, skb, skb->dev, dst->dev, ip6_forward_finish);
411
412error:
413 IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS);
414drop:
415 kfree_skb(skb);
416 return -EINVAL;
417}
418
419static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
420{
421 to->pkt_type = from->pkt_type;
422 to->priority = from->priority;
423 to->protocol = from->protocol;
1da177e4
LT
424 dst_release(to->dst);
425 to->dst = dst_clone(from->dst);
426 to->dev = from->dev;
427
428#ifdef CONFIG_NET_SCHED
429 to->tc_index = from->tc_index;
430#endif
431#ifdef CONFIG_NETFILTER
432 to->nfmark = from->nfmark;
433 /* Connection association is same as pre-frag packet */
434 to->nfct = from->nfct;
435 nf_conntrack_get(to->nfct);
436 to->nfctinfo = from->nfctinfo;
437#ifdef CONFIG_BRIDGE_NETFILTER
438 nf_bridge_put(to->nf_bridge);
439 to->nf_bridge = from->nf_bridge;
440 nf_bridge_get(to->nf_bridge);
441#endif
1da177e4
LT
442#endif
443}
444
445int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
446{
447 u16 offset = sizeof(struct ipv6hdr);
448 struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1);
449 unsigned int packet_len = skb->tail - skb->nh.raw;
450 int found_rhdr = 0;
451 *nexthdr = &skb->nh.ipv6h->nexthdr;
452
453 while (offset + 1 <= packet_len) {
454
455 switch (**nexthdr) {
456
457 case NEXTHDR_HOP:
458 case NEXTHDR_ROUTING:
459 case NEXTHDR_DEST:
460 if (**nexthdr == NEXTHDR_ROUTING) found_rhdr = 1;
461 if (**nexthdr == NEXTHDR_DEST && found_rhdr) return offset;
462 offset += ipv6_optlen(exthdr);
463 *nexthdr = &exthdr->nexthdr;
464 exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
465 break;
466 default :
467 return offset;
468 }
469 }
470
471 return offset;
472}
473
474static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
475{
476 struct net_device *dev;
477 struct sk_buff *frag;
478 struct rt6_info *rt = (struct rt6_info*)skb->dst;
479 struct ipv6hdr *tmp_hdr;
480 struct frag_hdr *fh;
481 unsigned int mtu, hlen, left, len;
482 u32 frag_id = 0;
483 int ptr, offset = 0, err=0;
484 u8 *prevhdr, nexthdr = 0;
485
486 dev = rt->u.dst.dev;
487 hlen = ip6_find_1stfragopt(skb, &prevhdr);
488 nexthdr = *prevhdr;
489
490 mtu = dst_mtu(&rt->u.dst) - hlen - sizeof(struct frag_hdr);
491
492 if (skb_shinfo(skb)->frag_list) {
493 int first_len = skb_pagelen(skb);
494
495 if (first_len - hlen > mtu ||
496 ((first_len - hlen) & 7) ||
497 skb_cloned(skb))
498 goto slow_path;
499
500 for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
501 /* Correct geometry. */
502 if (frag->len > mtu ||
503 ((frag->len & 7) && frag->next) ||
504 skb_headroom(frag) < hlen)
505 goto slow_path;
506
1da177e4
LT
507 /* Partially cloned skb? */
508 if (skb_shared(frag))
509 goto slow_path;
2fdba6b0
HX
510
511 BUG_ON(frag->sk);
512 if (skb->sk) {
513 sock_hold(skb->sk);
514 frag->sk = skb->sk;
515 frag->destructor = sock_wfree;
516 skb->truesize -= frag->truesize;
517 }
1da177e4
LT
518 }
519
520 err = 0;
521 offset = 0;
522 frag = skb_shinfo(skb)->frag_list;
523 skb_shinfo(skb)->frag_list = NULL;
524 /* BUILD HEADER */
525
526 tmp_hdr = kmalloc(hlen, GFP_ATOMIC);
527 if (!tmp_hdr) {
528 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
529 return -ENOMEM;
530 }
531
532 *prevhdr = NEXTHDR_FRAGMENT;
533 memcpy(tmp_hdr, skb->nh.raw, hlen);
534 __skb_pull(skb, hlen);
535 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
536 skb->nh.raw = __skb_push(skb, hlen);
537 memcpy(skb->nh.raw, tmp_hdr, hlen);
538
539 ipv6_select_ident(skb, fh);
540 fh->nexthdr = nexthdr;
541 fh->reserved = 0;
542 fh->frag_off = htons(IP6_MF);
543 frag_id = fh->identification;
544
545 first_len = skb_pagelen(skb);
546 skb->data_len = first_len - skb_headlen(skb);
547 skb->len = first_len;
548 skb->nh.ipv6h->payload_len = htons(first_len - sizeof(struct ipv6hdr));
549
550
551 for (;;) {
552 /* Prepare header of the next frame,
553 * before previous one went down. */
554 if (frag) {
555 frag->ip_summed = CHECKSUM_NONE;
556 frag->h.raw = frag->data;
557 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
558 frag->nh.raw = __skb_push(frag, hlen);
559 memcpy(frag->nh.raw, tmp_hdr, hlen);
560 offset += skb->len - hlen - sizeof(struct frag_hdr);
561 fh->nexthdr = nexthdr;
562 fh->reserved = 0;
563 fh->frag_off = htons(offset);
564 if (frag->next != NULL)
565 fh->frag_off |= htons(IP6_MF);
566 fh->identification = frag_id;
567 frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
568 ip6_copy_metadata(frag, skb);
569 }
570
571 err = output(skb);
572 if (err || !frag)
573 break;
574
575 skb = frag;
576 frag = skb->next;
577 skb->next = NULL;
578 }
579
580 if (tmp_hdr)
581 kfree(tmp_hdr);
582
583 if (err == 0) {
584 IP6_INC_STATS(IPSTATS_MIB_FRAGOKS);
585 return 0;
586 }
587
588 while (frag) {
589 skb = frag->next;
590 kfree_skb(frag);
591 frag = skb;
592 }
593
594 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
595 return err;
596 }
597
598slow_path:
599 left = skb->len - hlen; /* Space per frame */
600 ptr = hlen; /* Where to start from */
601
602 /*
603 * Fragment the datagram.
604 */
605
606 *prevhdr = NEXTHDR_FRAGMENT;
607
608 /*
609 * Keep copying data until we run out.
610 */
611 while(left > 0) {
612 len = left;
613 /* IF: it doesn't fit, use 'mtu' - the data space left */
614 if (len > mtu)
615 len = mtu;
616 /* IF: we are not sending upto and including the packet end
617 then align the next start on an eight byte boundary */
618 if (len < left) {
619 len &= ~7;
620 }
621 /*
622 * Allocate buffer.
623 */
624
625 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
626 NETDEBUG(printk(KERN_INFO "IPv6: frag: no memory for new fragment!\n"));
627 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
628 err = -ENOMEM;
629 goto fail;
630 }
631
632 /*
633 * Set up data on packet
634 */
635
636 ip6_copy_metadata(frag, skb);
637 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
638 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
639 frag->nh.raw = frag->data;
640 fh = (struct frag_hdr*)(frag->data + hlen);
641 frag->h.raw = frag->data + hlen + sizeof(struct frag_hdr);
642
643 /*
644 * Charge the memory for the fragment to any owner
645 * it might possess
646 */
647 if (skb->sk)
648 skb_set_owner_w(frag, skb->sk);
649
650 /*
651 * Copy the packet header into the new buffer.
652 */
653 memcpy(frag->nh.raw, skb->data, hlen);
654
655 /*
656 * Build fragment header.
657 */
658 fh->nexthdr = nexthdr;
659 fh->reserved = 0;
660 if (frag_id) {
661 ipv6_select_ident(skb, fh);
662 frag_id = fh->identification;
663 } else
664 fh->identification = frag_id;
665
666 /*
667 * Copy a block of the IP datagram.
668 */
669 if (skb_copy_bits(skb, ptr, frag->h.raw, len))
670 BUG();
671 left -= len;
672
673 fh->frag_off = htons(offset);
674 if (left > 0)
675 fh->frag_off |= htons(IP6_MF);
676 frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
677
678 ptr += len;
679 offset += len;
680
681 /*
682 * Put this fragment into the sending queue.
683 */
684
685 IP6_INC_STATS(IPSTATS_MIB_FRAGCREATES);
686
687 err = output(frag);
688 if (err)
689 goto fail;
690 }
691 kfree_skb(skb);
692 IP6_INC_STATS(IPSTATS_MIB_FRAGOKS);
693 return err;
694
695fail:
696 kfree_skb(skb);
697 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
698 return err;
699}
700
701int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
702{
703 int err = 0;
704
705 *dst = NULL;
706 if (sk) {
707 struct ipv6_pinfo *np = inet6_sk(sk);
708
709 *dst = sk_dst_check(sk, np->dst_cookie);
710 if (*dst) {
711 struct rt6_info *rt = (struct rt6_info*)*dst;
712
713 /* Yes, checking route validity in not connected
714 case is not very simple. Take into account,
715 that we do not support routing by source, TOS,
716 and MSG_DONTROUTE --ANK (980726)
717
718 1. If route was host route, check that
719 cached destination is current.
720 If it is network route, we still may
721 check its validity using saved pointer
722 to the last used address: daddr_cache.
723 We do not want to save whole address now,
724 (because main consumer of this service
725 is tcp, which has not this problem),
726 so that the last trick works only on connected
727 sockets.
728 2. oif also should be the same.
729 */
730
731 if (((rt->rt6i_dst.plen != 128 ||
732 !ipv6_addr_equal(&fl->fl6_dst, &rt->rt6i_dst.addr))
733 && (np->daddr_cache == NULL ||
734 !ipv6_addr_equal(&fl->fl6_dst, np->daddr_cache)))
735 || (fl->oif && fl->oif != (*dst)->dev->ifindex)) {
736 dst_release(*dst);
737 *dst = NULL;
738 }
739 }
740 }
741
742 if (*dst == NULL)
743 *dst = ip6_route_output(sk, fl);
744
745 if ((err = (*dst)->error))
746 goto out_err_release;
747
748 if (ipv6_addr_any(&fl->fl6_src)) {
749 err = ipv6_get_saddr(*dst, &fl->fl6_dst, &fl->fl6_src);
750
44456d37 751 if (err)
1da177e4 752 goto out_err_release;
1da177e4
LT
753 }
754
755 return 0;
756
757out_err_release:
758 dst_release(*dst);
759 *dst = NULL;
760 return err;
761}
762
763int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb),
764 void *from, int length, int transhdrlen,
765 int hlimit, struct ipv6_txoptions *opt, struct flowi *fl, struct rt6_info *rt,
766 unsigned int flags)
767{
768 struct inet_sock *inet = inet_sk(sk);
769 struct ipv6_pinfo *np = inet6_sk(sk);
770 struct sk_buff *skb;
771 unsigned int maxfraglen, fragheaderlen;
772 int exthdrlen;
773 int hh_len;
774 int mtu;
775 int copy;
776 int err;
777 int offset = 0;
778 int csummode = CHECKSUM_NONE;
779
780 if (flags&MSG_PROBE)
781 return 0;
782 if (skb_queue_empty(&sk->sk_write_queue)) {
783 /*
784 * setup for corking
785 */
786 if (opt) {
787 if (np->cork.opt == NULL) {
788 np->cork.opt = kmalloc(opt->tot_len,
789 sk->sk_allocation);
790 if (unlikely(np->cork.opt == NULL))
791 return -ENOBUFS;
792 } else if (np->cork.opt->tot_len < opt->tot_len) {
793 printk(KERN_DEBUG "ip6_append_data: invalid option length\n");
794 return -EINVAL;
795 }
796 memcpy(np->cork.opt, opt, opt->tot_len);
797 inet->cork.flags |= IPCORK_OPT;
798 /* need source address above miyazawa*/
799 }
800 dst_hold(&rt->u.dst);
801 np->cork.rt = rt;
802 inet->cork.fl = *fl;
803 np->cork.hop_limit = hlimit;
804 inet->cork.fragsize = mtu = dst_mtu(rt->u.dst.path);
805 if (dst_allfrag(rt->u.dst.path))
806 inet->cork.flags |= IPCORK_ALLFRAG;
807 inet->cork.length = 0;
808 sk->sk_sndmsg_page = NULL;
809 sk->sk_sndmsg_off = 0;
810 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0);
811 length += exthdrlen;
812 transhdrlen += exthdrlen;
813 } else {
814 rt = np->cork.rt;
815 fl = &inet->cork.fl;
816 if (inet->cork.flags & IPCORK_OPT)
817 opt = np->cork.opt;
818 transhdrlen = 0;
819 exthdrlen = 0;
820 mtu = inet->cork.fragsize;
821 }
822
823 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
824
825 fragheaderlen = sizeof(struct ipv6hdr) + (opt ? opt->opt_nflen : 0);
826 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
827
828 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
829 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
830 ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
831 return -EMSGSIZE;
832 }
833 }
834
835 /*
836 * Let's try using as much space as possible.
837 * Use MTU if total length of the message fits into the MTU.
838 * Otherwise, we need to reserve fragment header and
839 * fragment alignment (= 8-15 octects, in total).
840 *
841 * Note that we may need to "move" the data from the tail of
842 * of the buffer to the new fragment when we split
843 * the message.
844 *
845 * FIXME: It may be fragmented into multiple chunks
846 * at once if non-fragmentable extension headers
847 * are too large.
848 * --yoshfuji
849 */
850
851 inet->cork.length += length;
852
853 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
854 goto alloc_new_skb;
855
856 while (length > 0) {
857 /* Check if the remaining data fits into current packet. */
858 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
859 if (copy < length)
860 copy = maxfraglen - skb->len;
861
862 if (copy <= 0) {
863 char *data;
864 unsigned int datalen;
865 unsigned int fraglen;
866 unsigned int fraggap;
867 unsigned int alloclen;
868 struct sk_buff *skb_prev;
869alloc_new_skb:
870 skb_prev = skb;
871
872 /* There's no room in the current skb */
873 if (skb_prev)
874 fraggap = skb_prev->len - maxfraglen;
875 else
876 fraggap = 0;
877
878 /*
879 * If remaining data exceeds the mtu,
880 * we know we need more fragment(s).
881 */
882 datalen = length + fraggap;
883 if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
884 datalen = maxfraglen - fragheaderlen;
885
886 fraglen = datalen + fragheaderlen;
887 if ((flags & MSG_MORE) &&
888 !(rt->u.dst.dev->features&NETIF_F_SG))
889 alloclen = mtu;
890 else
891 alloclen = datalen + fragheaderlen;
892
893 /*
894 * The last fragment gets additional space at tail.
895 * Note: we overallocate on fragments with MSG_MODE
896 * because we have no idea if we're the last one.
897 */
898 if (datalen == length + fraggap)
899 alloclen += rt->u.dst.trailer_len;
900
901 /*
902 * We just reserve space for fragment header.
903 * Note: this may be overallocation if the message
904 * (without MSG_MORE) fits into the MTU.
905 */
906 alloclen += sizeof(struct frag_hdr);
907
908 if (transhdrlen) {
909 skb = sock_alloc_send_skb(sk,
910 alloclen + hh_len,
911 (flags & MSG_DONTWAIT), &err);
912 } else {
913 skb = NULL;
914 if (atomic_read(&sk->sk_wmem_alloc) <=
915 2 * sk->sk_sndbuf)
916 skb = sock_wmalloc(sk,
917 alloclen + hh_len, 1,
918 sk->sk_allocation);
919 if (unlikely(skb == NULL))
920 err = -ENOBUFS;
921 }
922 if (skb == NULL)
923 goto error;
924 /*
925 * Fill in the control structures
926 */
927 skb->ip_summed = csummode;
928 skb->csum = 0;
929 /* reserve for fragmentation */
930 skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
931
932 /*
933 * Find where to start putting bytes
934 */
935 data = skb_put(skb, fraglen);
936 skb->nh.raw = data + exthdrlen;
937 data += fragheaderlen;
938 skb->h.raw = data + exthdrlen;
939
940 if (fraggap) {
941 skb->csum = skb_copy_and_csum_bits(
942 skb_prev, maxfraglen,
943 data + transhdrlen, fraggap, 0);
944 skb_prev->csum = csum_sub(skb_prev->csum,
945 skb->csum);
946 data += fraggap;
947 skb_trim(skb_prev, maxfraglen);
948 }
949 copy = datalen - transhdrlen - fraggap;
950 if (copy < 0) {
951 err = -EINVAL;
952 kfree_skb(skb);
953 goto error;
954 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
955 err = -EFAULT;
956 kfree_skb(skb);
957 goto error;
958 }
959
960 offset += copy;
961 length -= datalen - fraggap;
962 transhdrlen = 0;
963 exthdrlen = 0;
964 csummode = CHECKSUM_NONE;
965
966 /*
967 * Put the packet on the pending queue
968 */
969 __skb_queue_tail(&sk->sk_write_queue, skb);
970 continue;
971 }
972
973 if (copy > length)
974 copy = length;
975
976 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
977 unsigned int off;
978
979 off = skb->len;
980 if (getfrag(from, skb_put(skb, copy),
981 offset, copy, off, skb) < 0) {
982 __skb_trim(skb, off);
983 err = -EFAULT;
984 goto error;
985 }
986 } else {
987 int i = skb_shinfo(skb)->nr_frags;
988 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
989 struct page *page = sk->sk_sndmsg_page;
990 int off = sk->sk_sndmsg_off;
991 unsigned int left;
992
993 if (page && (left = PAGE_SIZE - off) > 0) {
994 if (copy >= left)
995 copy = left;
996 if (page != frag->page) {
997 if (i == MAX_SKB_FRAGS) {
998 err = -EMSGSIZE;
999 goto error;
1000 }
1001 get_page(page);
1002 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1003 frag = &skb_shinfo(skb)->frags[i];
1004 }
1005 } else if(i < MAX_SKB_FRAGS) {
1006 if (copy > PAGE_SIZE)
1007 copy = PAGE_SIZE;
1008 page = alloc_pages(sk->sk_allocation, 0);
1009 if (page == NULL) {
1010 err = -ENOMEM;
1011 goto error;
1012 }
1013 sk->sk_sndmsg_page = page;
1014 sk->sk_sndmsg_off = 0;
1015
1016 skb_fill_page_desc(skb, i, page, 0, 0);
1017 frag = &skb_shinfo(skb)->frags[i];
1018 skb->truesize += PAGE_SIZE;
1019 atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc);
1020 } else {
1021 err = -EMSGSIZE;
1022 goto error;
1023 }
1024 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1025 err = -EFAULT;
1026 goto error;
1027 }
1028 sk->sk_sndmsg_off += copy;
1029 frag->size += copy;
1030 skb->len += copy;
1031 skb->data_len += copy;
1032 }
1033 offset += copy;
1034 length -= copy;
1035 }
1036 return 0;
1037error:
1038 inet->cork.length -= length;
1039 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1040 return err;
1041}
1042
1043int ip6_push_pending_frames(struct sock *sk)
1044{
1045 struct sk_buff *skb, *tmp_skb;
1046 struct sk_buff **tail_skb;
1047 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1048 struct inet_sock *inet = inet_sk(sk);
1049 struct ipv6_pinfo *np = inet6_sk(sk);
1050 struct ipv6hdr *hdr;
1051 struct ipv6_txoptions *opt = np->cork.opt;
1052 struct rt6_info *rt = np->cork.rt;
1053 struct flowi *fl = &inet->cork.fl;
1054 unsigned char proto = fl->proto;
1055 int err = 0;
1056
1057 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1058 goto out;
1059 tail_skb = &(skb_shinfo(skb)->frag_list);
1060
1061 /* move skb->data to ip header from ext header */
1062 if (skb->data < skb->nh.raw)
1063 __skb_pull(skb, skb->nh.raw - skb->data);
1064 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1065 __skb_pull(tmp_skb, skb->h.raw - skb->nh.raw);
1066 *tail_skb = tmp_skb;
1067 tail_skb = &(tmp_skb->next);
1068 skb->len += tmp_skb->len;
1069 skb->data_len += tmp_skb->len;
1da177e4
LT
1070 skb->truesize += tmp_skb->truesize;
1071 __sock_put(tmp_skb->sk);
1072 tmp_skb->destructor = NULL;
1073 tmp_skb->sk = NULL;
1da177e4
LT
1074 }
1075
1076 ipv6_addr_copy(final_dst, &fl->fl6_dst);
1077 __skb_pull(skb, skb->h.raw - skb->nh.raw);
1078 if (opt && opt->opt_flen)
1079 ipv6_push_frag_opts(skb, opt, &proto);
1080 if (opt && opt->opt_nflen)
1081 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1082
1083 skb->nh.ipv6h = hdr = (struct ipv6hdr*) skb_push(skb, sizeof(struct ipv6hdr));
1084
1085 *(u32*)hdr = fl->fl6_flowlabel | htonl(0x60000000);
1086
1087 if (skb->len <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN)
1088 hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
1089 else
1090 hdr->payload_len = 0;
1091 hdr->hop_limit = np->cork.hop_limit;
1092 hdr->nexthdr = proto;
1093 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1094 ipv6_addr_copy(&hdr->daddr, final_dst);
1095
1096 skb->dst = dst_clone(&rt->u.dst);
1097 IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
1098 err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output);
1099 if (err) {
1100 if (err > 0)
3320da89 1101 err = np->recverr ? net_xmit_errno(err) : 0;
1da177e4
LT
1102 if (err)
1103 goto error;
1104 }
1105
1106out:
1107 inet->cork.flags &= ~IPCORK_OPT;
1108 if (np->cork.opt) {
1109 kfree(np->cork.opt);
1110 np->cork.opt = NULL;
1111 }
1112 if (np->cork.rt) {
1113 dst_release(&np->cork.rt->u.dst);
1114 np->cork.rt = NULL;
1115 inet->cork.flags &= ~IPCORK_ALLFRAG;
1116 }
1117 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1118 return err;
1119error:
1120 goto out;
1121}
1122
1123void ip6_flush_pending_frames(struct sock *sk)
1124{
1125 struct inet_sock *inet = inet_sk(sk);
1126 struct ipv6_pinfo *np = inet6_sk(sk);
1127 struct sk_buff *skb;
1128
1129 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1130 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1131 kfree_skb(skb);
1132 }
1133
1134 inet->cork.flags &= ~IPCORK_OPT;
1135
1136 if (np->cork.opt) {
1137 kfree(np->cork.opt);
1138 np->cork.opt = NULL;
1139 }
1140 if (np->cork.rt) {
1141 dst_release(&np->cork.rt->u.dst);
1142 np->cork.rt = NULL;
1143 inet->cork.flags &= ~IPCORK_ALLFRAG;
1144 }
1145 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1146}