]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blame - net/ipv6/ip6_output.c
[NETFILTER]: convert nfmark and conntrack mark to 32bit
[mirror_ubuntu-zesty-kernel.git] / net / ipv6 / ip6_output.c
CommitLineData
1da177e4
LT
1/*
2 * IPv6 output functions
3 * Linux INET6 implementation
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * $Id: ip6_output.c,v 1.34 2002/02/01 22:01:04 davem Exp $
9 *
10 * Based on linux/net/ipv4/ip_output.c
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 * Changes:
18 * A.N.Kuznetsov : airthmetics in fragmentation.
19 * extension headers are implemented.
20 * route changes now work.
21 * ip6_forward does not confuse sniffers.
22 * etc.
23 *
24 * H. von Brand : Added missing #include <linux/string.h>
25 * Imran Patel : frag id should be in NBO
26 * Kazunori MIYAZAWA @USAGI
27 * : add ip6_append_data and related functions
28 * for datagram xmit
29 */
30
31#include <linux/config.h>
32#include <linux/errno.h>
33#include <linux/types.h>
34#include <linux/string.h>
35#include <linux/socket.h>
36#include <linux/net.h>
37#include <linux/netdevice.h>
38#include <linux/if_arp.h>
39#include <linux/in6.h>
40#include <linux/tcp.h>
41#include <linux/route.h>
42
43#include <linux/netfilter.h>
44#include <linux/netfilter_ipv6.h>
45
46#include <net/sock.h>
47#include <net/snmp.h>
48
49#include <net/ipv6.h>
50#include <net/ndisc.h>
51#include <net/protocol.h>
52#include <net/ip6_route.h>
53#include <net/addrconf.h>
54#include <net/rawv6.h>
55#include <net/icmp.h>
56#include <net/xfrm.h>
57#include <net/checksum.h>
58
59static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
60
61static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
62{
63 static u32 ipv6_fragmentation_id = 1;
64 static DEFINE_SPINLOCK(ip6_id_lock);
65
66 spin_lock_bh(&ip6_id_lock);
67 fhdr->identification = htonl(ipv6_fragmentation_id);
68 if (++ipv6_fragmentation_id == 0)
69 ipv6_fragmentation_id = 1;
70 spin_unlock_bh(&ip6_id_lock);
71}
72
73static inline int ip6_output_finish(struct sk_buff *skb)
74{
75
76 struct dst_entry *dst = skb->dst;
77 struct hh_cache *hh = dst->hh;
78
79 if (hh) {
80 int hh_alen;
81
82 read_lock_bh(&hh->hh_lock);
83 hh_alen = HH_DATA_ALIGN(hh->hh_len);
84 memcpy(skb->data - hh_alen, hh->hh_data, hh_alen);
85 read_unlock_bh(&hh->hh_lock);
86 skb_push(skb, hh->hh_len);
87 return hh->hh_output(skb);
88 } else if (dst->neighbour)
89 return dst->neighbour->output(skb);
90
91 IP6_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
92 kfree_skb(skb);
93 return -EINVAL;
94
95}
96
97/* dev_loopback_xmit for use with netfilter. */
98static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
99{
100 newskb->mac.raw = newskb->data;
101 __skb_pull(newskb, newskb->nh.raw - newskb->data);
102 newskb->pkt_type = PACKET_LOOPBACK;
103 newskb->ip_summed = CHECKSUM_UNNECESSARY;
104 BUG_TRAP(newskb->dst);
105
106 netif_rx(newskb);
107 return 0;
108}
109
110
111static int ip6_output2(struct sk_buff *skb)
112{
113 struct dst_entry *dst = skb->dst;
114 struct net_device *dev = dst->dev;
115
116 skb->protocol = htons(ETH_P_IPV6);
117 skb->dev = dev;
118
119 if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr)) {
120 struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
121
122 if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
123 ipv6_chk_mcast_addr(dev, &skb->nh.ipv6h->daddr,
124 &skb->nh.ipv6h->saddr)) {
125 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
126
127 /* Do not check for IFF_ALLMULTI; multicast routing
128 is not supported in any case.
129 */
130 if (newskb)
131 NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, newskb, NULL,
132 newskb->dev,
133 ip6_dev_loopback_xmit);
134
135 if (skb->nh.ipv6h->hop_limit == 0) {
136 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
137 kfree_skb(skb);
138 return 0;
139 }
140 }
141
142 IP6_INC_STATS(IPSTATS_MIB_OUTMCASTPKTS);
143 }
144
145 return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish);
146}
147
148int ip6_output(struct sk_buff *skb)
149{
150 if (skb->len > dst_mtu(skb->dst) || dst_allfrag(skb->dst))
151 return ip6_fragment(skb, ip6_output2);
152 else
153 return ip6_output2(skb);
154}
155
156#ifdef CONFIG_NETFILTER
157int ip6_route_me_harder(struct sk_buff *skb)
158{
159 struct ipv6hdr *iph = skb->nh.ipv6h;
160 struct dst_entry *dst;
161 struct flowi fl = {
162 .oif = skb->sk ? skb->sk->sk_bound_dev_if : 0,
163 .nl_u =
164 { .ip6_u =
165 { .daddr = iph->daddr,
166 .saddr = iph->saddr, } },
167 .proto = iph->nexthdr,
168 };
169
170 dst = ip6_route_output(skb->sk, &fl);
171
172 if (dst->error) {
173 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
174 LIMIT_NETDEBUG(
175 printk(KERN_DEBUG "ip6_route_me_harder: No more route.\n"));
176 dst_release(dst);
177 return -EINVAL;
178 }
179
180 /* Drop old route. */
181 dst_release(skb->dst);
182
183 skb->dst = dst;
184 return 0;
185}
186#endif
187
188static inline int ip6_maybe_reroute(struct sk_buff *skb)
189{
190#ifdef CONFIG_NETFILTER
191 if (skb->nfcache & NFC_ALTERED){
192 if (ip6_route_me_harder(skb) != 0){
193 kfree_skb(skb);
194 return -EINVAL;
195 }
196 }
197#endif /* CONFIG_NETFILTER */
198 return dst_output(skb);
199}
200
201/*
202 * xmit an sk_buff (used by TCP)
203 */
204
205int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
206 struct ipv6_txoptions *opt, int ipfragok)
207{
208 struct ipv6_pinfo *np = sk ? inet6_sk(sk) : NULL;
209 struct in6_addr *first_hop = &fl->fl6_dst;
210 struct dst_entry *dst = skb->dst;
211 struct ipv6hdr *hdr;
212 u8 proto = fl->proto;
213 int seg_len = skb->len;
214 int hlimit;
215 u32 mtu;
216
217 if (opt) {
218 int head_room;
219
220 /* First: exthdrs may take lots of space (~8K for now)
221 MAX_HEADER is not enough.
222 */
223 head_room = opt->opt_nflen + opt->opt_flen;
224 seg_len += head_room;
225 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
226
227 if (skb_headroom(skb) < head_room) {
228 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
229 kfree_skb(skb);
230 skb = skb2;
231 if (skb == NULL) {
232 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
233 return -ENOBUFS;
234 }
235 if (sk)
236 skb_set_owner_w(skb, sk);
237 }
238 if (opt->opt_flen)
239 ipv6_push_frag_opts(skb, opt, &proto);
240 if (opt->opt_nflen)
241 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
242 }
243
244 hdr = skb->nh.ipv6h = (struct ipv6hdr*)skb_push(skb, sizeof(struct ipv6hdr));
245
246 /*
247 * Fill in the IPv6 header
248 */
249
250 *(u32*)hdr = htonl(0x60000000) | fl->fl6_flowlabel;
251 hlimit = -1;
252 if (np)
253 hlimit = np->hop_limit;
254 if (hlimit < 0)
255 hlimit = dst_metric(dst, RTAX_HOPLIMIT);
256 if (hlimit < 0)
257 hlimit = ipv6_get_hoplimit(dst->dev);
258
259 hdr->payload_len = htons(seg_len);
260 hdr->nexthdr = proto;
261 hdr->hop_limit = hlimit;
262
263 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
264 ipv6_addr_copy(&hdr->daddr, first_hop);
265
266 mtu = dst_mtu(dst);
267 if ((skb->len <= mtu) || ipfragok) {
268 IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
269 return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, ip6_maybe_reroute);
270 }
271
272 if (net_ratelimit())
273 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
274 skb->dev = dst->dev;
275 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
276 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
277 kfree_skb(skb);
278 return -EMSGSIZE;
279}
280
281/*
282 * To avoid extra problems ND packets are send through this
283 * routine. It's code duplication but I really want to avoid
284 * extra checks since ipv6_build_header is used by TCP (which
285 * is for us performance critical)
286 */
287
288int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
289 struct in6_addr *saddr, struct in6_addr *daddr,
290 int proto, int len)
291{
292 struct ipv6_pinfo *np = inet6_sk(sk);
293 struct ipv6hdr *hdr;
294 int totlen;
295
296 skb->protocol = htons(ETH_P_IPV6);
297 skb->dev = dev;
298
299 totlen = len + sizeof(struct ipv6hdr);
300
301 hdr = (struct ipv6hdr *) skb_put(skb, sizeof(struct ipv6hdr));
302 skb->nh.ipv6h = hdr;
303
304 *(u32*)hdr = htonl(0x60000000);
305
306 hdr->payload_len = htons(len);
307 hdr->nexthdr = proto;
308 hdr->hop_limit = np->hop_limit;
309
310 ipv6_addr_copy(&hdr->saddr, saddr);
311 ipv6_addr_copy(&hdr->daddr, daddr);
312
313 return 0;
314}
315
316static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
317{
318 struct ip6_ra_chain *ra;
319 struct sock *last = NULL;
320
321 read_lock(&ip6_ra_lock);
322 for (ra = ip6_ra_chain; ra; ra = ra->next) {
323 struct sock *sk = ra->sk;
324 if (sk && ra->sel == sel) {
325 if (last) {
326 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
327 if (skb2)
328 rawv6_rcv(last, skb2);
329 }
330 last = sk;
331 }
332 }
333
334 if (last) {
335 rawv6_rcv(last, skb);
336 read_unlock(&ip6_ra_lock);
337 return 1;
338 }
339 read_unlock(&ip6_ra_lock);
340 return 0;
341}
342
343static inline int ip6_forward_finish(struct sk_buff *skb)
344{
345 return dst_output(skb);
346}
347
348int ip6_forward(struct sk_buff *skb)
349{
350 struct dst_entry *dst = skb->dst;
351 struct ipv6hdr *hdr = skb->nh.ipv6h;
352 struct inet6_skb_parm *opt = IP6CB(skb);
353
354 if (ipv6_devconf.forwarding == 0)
355 goto error;
356
357 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
358 IP6_INC_STATS(IPSTATS_MIB_INDISCARDS);
359 goto drop;
360 }
361
362 skb->ip_summed = CHECKSUM_NONE;
363
364 /*
365 * We DO NOT make any processing on
366 * RA packets, pushing them to user level AS IS
367 * without ane WARRANTY that application will be able
368 * to interpret them. The reason is that we
369 * cannot make anything clever here.
370 *
371 * We are not end-node, so that if packet contains
372 * AH/ESP, we cannot make anything.
373 * Defragmentation also would be mistake, RA packets
374 * cannot be fragmented, because there is no warranty
375 * that different fragments will go along one path. --ANK
376 */
377 if (opt->ra) {
378 u8 *ptr = skb->nh.raw + opt->ra;
379 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
380 return 0;
381 }
382
383 /*
384 * check and decrement ttl
385 */
386 if (hdr->hop_limit <= 1) {
387 /* Force OUTPUT device used as source address */
388 skb->dev = dst->dev;
389 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
390 0, skb->dev);
391
392 kfree_skb(skb);
393 return -ETIMEDOUT;
394 }
395
396 if (!xfrm6_route_forward(skb)) {
397 IP6_INC_STATS(IPSTATS_MIB_INDISCARDS);
398 goto drop;
399 }
400 dst = skb->dst;
401
402 /* IPv6 specs say nothing about it, but it is clear that we cannot
403 send redirects to source routed frames.
404 */
405 if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0) {
406 struct in6_addr *target = NULL;
407 struct rt6_info *rt;
408 struct neighbour *n = dst->neighbour;
409
410 /*
411 * incoming and outgoing devices are the same
412 * send a redirect.
413 */
414
415 rt = (struct rt6_info *) dst;
416 if ((rt->rt6i_flags & RTF_GATEWAY))
417 target = (struct in6_addr*)&n->primary_key;
418 else
419 target = &hdr->daddr;
420
421 /* Limit redirects both by destination (here)
422 and by source (inside ndisc_send_redirect)
423 */
424 if (xrlim_allow(dst, 1*HZ))
425 ndisc_send_redirect(skb, n, target);
426 } else if (ipv6_addr_type(&hdr->saddr)&(IPV6_ADDR_MULTICAST|IPV6_ADDR_LOOPBACK
427 |IPV6_ADDR_LINKLOCAL)) {
428 /* This check is security critical. */
429 goto error;
430 }
431
432 if (skb->len > dst_mtu(dst)) {
433 /* Again, force OUTPUT device used as source address */
434 skb->dev = dst->dev;
435 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
436 IP6_INC_STATS_BH(IPSTATS_MIB_INTOOBIGERRORS);
437 IP6_INC_STATS_BH(IPSTATS_MIB_FRAGFAILS);
438 kfree_skb(skb);
439 return -EMSGSIZE;
440 }
441
442 if (skb_cow(skb, dst->dev->hard_header_len)) {
443 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
444 goto drop;
445 }
446
447 hdr = skb->nh.ipv6h;
448
449 /* Mangling hops number delayed to point after skb COW */
450
451 hdr->hop_limit--;
452
453 IP6_INC_STATS_BH(IPSTATS_MIB_OUTFORWDATAGRAMS);
454 return NF_HOOK(PF_INET6,NF_IP6_FORWARD, skb, skb->dev, dst->dev, ip6_forward_finish);
455
456error:
457 IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS);
458drop:
459 kfree_skb(skb);
460 return -EINVAL;
461}
462
463static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
464{
465 to->pkt_type = from->pkt_type;
466 to->priority = from->priority;
467 to->protocol = from->protocol;
1da177e4
LT
468 dst_release(to->dst);
469 to->dst = dst_clone(from->dst);
470 to->dev = from->dev;
471
472#ifdef CONFIG_NET_SCHED
473 to->tc_index = from->tc_index;
474#endif
475#ifdef CONFIG_NETFILTER
476 to->nfmark = from->nfmark;
477 /* Connection association is same as pre-frag packet */
478 to->nfct = from->nfct;
479 nf_conntrack_get(to->nfct);
480 to->nfctinfo = from->nfctinfo;
481#ifdef CONFIG_BRIDGE_NETFILTER
482 nf_bridge_put(to->nf_bridge);
483 to->nf_bridge = from->nf_bridge;
484 nf_bridge_get(to->nf_bridge);
485#endif
1da177e4
LT
486#endif
487}
488
489int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
490{
491 u16 offset = sizeof(struct ipv6hdr);
492 struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1);
493 unsigned int packet_len = skb->tail - skb->nh.raw;
494 int found_rhdr = 0;
495 *nexthdr = &skb->nh.ipv6h->nexthdr;
496
497 while (offset + 1 <= packet_len) {
498
499 switch (**nexthdr) {
500
501 case NEXTHDR_HOP:
502 case NEXTHDR_ROUTING:
503 case NEXTHDR_DEST:
504 if (**nexthdr == NEXTHDR_ROUTING) found_rhdr = 1;
505 if (**nexthdr == NEXTHDR_DEST && found_rhdr) return offset;
506 offset += ipv6_optlen(exthdr);
507 *nexthdr = &exthdr->nexthdr;
508 exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
509 break;
510 default :
511 return offset;
512 }
513 }
514
515 return offset;
516}
517
518static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
519{
520 struct net_device *dev;
521 struct sk_buff *frag;
522 struct rt6_info *rt = (struct rt6_info*)skb->dst;
523 struct ipv6hdr *tmp_hdr;
524 struct frag_hdr *fh;
525 unsigned int mtu, hlen, left, len;
526 u32 frag_id = 0;
527 int ptr, offset = 0, err=0;
528 u8 *prevhdr, nexthdr = 0;
529
530 dev = rt->u.dst.dev;
531 hlen = ip6_find_1stfragopt(skb, &prevhdr);
532 nexthdr = *prevhdr;
533
534 mtu = dst_mtu(&rt->u.dst) - hlen - sizeof(struct frag_hdr);
535
536 if (skb_shinfo(skb)->frag_list) {
537 int first_len = skb_pagelen(skb);
538
539 if (first_len - hlen > mtu ||
540 ((first_len - hlen) & 7) ||
541 skb_cloned(skb))
542 goto slow_path;
543
544 for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
545 /* Correct geometry. */
546 if (frag->len > mtu ||
547 ((frag->len & 7) && frag->next) ||
548 skb_headroom(frag) < hlen)
549 goto slow_path;
550
1da177e4
LT
551 /* Partially cloned skb? */
552 if (skb_shared(frag))
553 goto slow_path;
2fdba6b0
HX
554
555 BUG_ON(frag->sk);
556 if (skb->sk) {
557 sock_hold(skb->sk);
558 frag->sk = skb->sk;
559 frag->destructor = sock_wfree;
560 skb->truesize -= frag->truesize;
561 }
1da177e4
LT
562 }
563
564 err = 0;
565 offset = 0;
566 frag = skb_shinfo(skb)->frag_list;
567 skb_shinfo(skb)->frag_list = NULL;
568 /* BUILD HEADER */
569
570 tmp_hdr = kmalloc(hlen, GFP_ATOMIC);
571 if (!tmp_hdr) {
572 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
573 return -ENOMEM;
574 }
575
576 *prevhdr = NEXTHDR_FRAGMENT;
577 memcpy(tmp_hdr, skb->nh.raw, hlen);
578 __skb_pull(skb, hlen);
579 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
580 skb->nh.raw = __skb_push(skb, hlen);
581 memcpy(skb->nh.raw, tmp_hdr, hlen);
582
583 ipv6_select_ident(skb, fh);
584 fh->nexthdr = nexthdr;
585 fh->reserved = 0;
586 fh->frag_off = htons(IP6_MF);
587 frag_id = fh->identification;
588
589 first_len = skb_pagelen(skb);
590 skb->data_len = first_len - skb_headlen(skb);
591 skb->len = first_len;
592 skb->nh.ipv6h->payload_len = htons(first_len - sizeof(struct ipv6hdr));
593
594
595 for (;;) {
596 /* Prepare header of the next frame,
597 * before previous one went down. */
598 if (frag) {
599 frag->ip_summed = CHECKSUM_NONE;
600 frag->h.raw = frag->data;
601 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
602 frag->nh.raw = __skb_push(frag, hlen);
603 memcpy(frag->nh.raw, tmp_hdr, hlen);
604 offset += skb->len - hlen - sizeof(struct frag_hdr);
605 fh->nexthdr = nexthdr;
606 fh->reserved = 0;
607 fh->frag_off = htons(offset);
608 if (frag->next != NULL)
609 fh->frag_off |= htons(IP6_MF);
610 fh->identification = frag_id;
611 frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
612 ip6_copy_metadata(frag, skb);
613 }
614
615 err = output(skb);
616 if (err || !frag)
617 break;
618
619 skb = frag;
620 frag = skb->next;
621 skb->next = NULL;
622 }
623
624 if (tmp_hdr)
625 kfree(tmp_hdr);
626
627 if (err == 0) {
628 IP6_INC_STATS(IPSTATS_MIB_FRAGOKS);
629 return 0;
630 }
631
632 while (frag) {
633 skb = frag->next;
634 kfree_skb(frag);
635 frag = skb;
636 }
637
638 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
639 return err;
640 }
641
642slow_path:
643 left = skb->len - hlen; /* Space per frame */
644 ptr = hlen; /* Where to start from */
645
646 /*
647 * Fragment the datagram.
648 */
649
650 *prevhdr = NEXTHDR_FRAGMENT;
651
652 /*
653 * Keep copying data until we run out.
654 */
655 while(left > 0) {
656 len = left;
657 /* IF: it doesn't fit, use 'mtu' - the data space left */
658 if (len > mtu)
659 len = mtu;
660 /* IF: we are not sending upto and including the packet end
661 then align the next start on an eight byte boundary */
662 if (len < left) {
663 len &= ~7;
664 }
665 /*
666 * Allocate buffer.
667 */
668
669 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
670 NETDEBUG(printk(KERN_INFO "IPv6: frag: no memory for new fragment!\n"));
671 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
672 err = -ENOMEM;
673 goto fail;
674 }
675
676 /*
677 * Set up data on packet
678 */
679
680 ip6_copy_metadata(frag, skb);
681 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
682 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
683 frag->nh.raw = frag->data;
684 fh = (struct frag_hdr*)(frag->data + hlen);
685 frag->h.raw = frag->data + hlen + sizeof(struct frag_hdr);
686
687 /*
688 * Charge the memory for the fragment to any owner
689 * it might possess
690 */
691 if (skb->sk)
692 skb_set_owner_w(frag, skb->sk);
693
694 /*
695 * Copy the packet header into the new buffer.
696 */
697 memcpy(frag->nh.raw, skb->data, hlen);
698
699 /*
700 * Build fragment header.
701 */
702 fh->nexthdr = nexthdr;
703 fh->reserved = 0;
704 if (frag_id) {
705 ipv6_select_ident(skb, fh);
706 frag_id = fh->identification;
707 } else
708 fh->identification = frag_id;
709
710 /*
711 * Copy a block of the IP datagram.
712 */
713 if (skb_copy_bits(skb, ptr, frag->h.raw, len))
714 BUG();
715 left -= len;
716
717 fh->frag_off = htons(offset);
718 if (left > 0)
719 fh->frag_off |= htons(IP6_MF);
720 frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
721
722 ptr += len;
723 offset += len;
724
725 /*
726 * Put this fragment into the sending queue.
727 */
728
729 IP6_INC_STATS(IPSTATS_MIB_FRAGCREATES);
730
731 err = output(frag);
732 if (err)
733 goto fail;
734 }
735 kfree_skb(skb);
736 IP6_INC_STATS(IPSTATS_MIB_FRAGOKS);
737 return err;
738
739fail:
740 kfree_skb(skb);
741 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
742 return err;
743}
744
745int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
746{
747 int err = 0;
748
749 *dst = NULL;
750 if (sk) {
751 struct ipv6_pinfo *np = inet6_sk(sk);
752
753 *dst = sk_dst_check(sk, np->dst_cookie);
754 if (*dst) {
755 struct rt6_info *rt = (struct rt6_info*)*dst;
756
757 /* Yes, checking route validity in not connected
758 case is not very simple. Take into account,
759 that we do not support routing by source, TOS,
760 and MSG_DONTROUTE --ANK (980726)
761
762 1. If route was host route, check that
763 cached destination is current.
764 If it is network route, we still may
765 check its validity using saved pointer
766 to the last used address: daddr_cache.
767 We do not want to save whole address now,
768 (because main consumer of this service
769 is tcp, which has not this problem),
770 so that the last trick works only on connected
771 sockets.
772 2. oif also should be the same.
773 */
774
775 if (((rt->rt6i_dst.plen != 128 ||
776 !ipv6_addr_equal(&fl->fl6_dst, &rt->rt6i_dst.addr))
777 && (np->daddr_cache == NULL ||
778 !ipv6_addr_equal(&fl->fl6_dst, np->daddr_cache)))
779 || (fl->oif && fl->oif != (*dst)->dev->ifindex)) {
780 dst_release(*dst);
781 *dst = NULL;
782 }
783 }
784 }
785
786 if (*dst == NULL)
787 *dst = ip6_route_output(sk, fl);
788
789 if ((err = (*dst)->error))
790 goto out_err_release;
791
792 if (ipv6_addr_any(&fl->fl6_src)) {
793 err = ipv6_get_saddr(*dst, &fl->fl6_dst, &fl->fl6_src);
794
44456d37 795 if (err)
1da177e4 796 goto out_err_release;
1da177e4
LT
797 }
798
799 return 0;
800
801out_err_release:
802 dst_release(*dst);
803 *dst = NULL;
804 return err;
805}
806
807int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb),
808 void *from, int length, int transhdrlen,
809 int hlimit, struct ipv6_txoptions *opt, struct flowi *fl, struct rt6_info *rt,
810 unsigned int flags)
811{
812 struct inet_sock *inet = inet_sk(sk);
813 struct ipv6_pinfo *np = inet6_sk(sk);
814 struct sk_buff *skb;
815 unsigned int maxfraglen, fragheaderlen;
816 int exthdrlen;
817 int hh_len;
818 int mtu;
819 int copy;
820 int err;
821 int offset = 0;
822 int csummode = CHECKSUM_NONE;
823
824 if (flags&MSG_PROBE)
825 return 0;
826 if (skb_queue_empty(&sk->sk_write_queue)) {
827 /*
828 * setup for corking
829 */
830 if (opt) {
831 if (np->cork.opt == NULL) {
832 np->cork.opt = kmalloc(opt->tot_len,
833 sk->sk_allocation);
834 if (unlikely(np->cork.opt == NULL))
835 return -ENOBUFS;
836 } else if (np->cork.opt->tot_len < opt->tot_len) {
837 printk(KERN_DEBUG "ip6_append_data: invalid option length\n");
838 return -EINVAL;
839 }
840 memcpy(np->cork.opt, opt, opt->tot_len);
841 inet->cork.flags |= IPCORK_OPT;
842 /* need source address above miyazawa*/
843 }
844 dst_hold(&rt->u.dst);
845 np->cork.rt = rt;
846 inet->cork.fl = *fl;
847 np->cork.hop_limit = hlimit;
848 inet->cork.fragsize = mtu = dst_mtu(rt->u.dst.path);
849 if (dst_allfrag(rt->u.dst.path))
850 inet->cork.flags |= IPCORK_ALLFRAG;
851 inet->cork.length = 0;
852 sk->sk_sndmsg_page = NULL;
853 sk->sk_sndmsg_off = 0;
854 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0);
855 length += exthdrlen;
856 transhdrlen += exthdrlen;
857 } else {
858 rt = np->cork.rt;
859 fl = &inet->cork.fl;
860 if (inet->cork.flags & IPCORK_OPT)
861 opt = np->cork.opt;
862 transhdrlen = 0;
863 exthdrlen = 0;
864 mtu = inet->cork.fragsize;
865 }
866
867 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
868
869 fragheaderlen = sizeof(struct ipv6hdr) + (opt ? opt->opt_nflen : 0);
870 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
871
872 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
873 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
874 ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
875 return -EMSGSIZE;
876 }
877 }
878
879 /*
880 * Let's try using as much space as possible.
881 * Use MTU if total length of the message fits into the MTU.
882 * Otherwise, we need to reserve fragment header and
883 * fragment alignment (= 8-15 octects, in total).
884 *
885 * Note that we may need to "move" the data from the tail of
886 * of the buffer to the new fragment when we split
887 * the message.
888 *
889 * FIXME: It may be fragmented into multiple chunks
890 * at once if non-fragmentable extension headers
891 * are too large.
892 * --yoshfuji
893 */
894
895 inet->cork.length += length;
896
897 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
898 goto alloc_new_skb;
899
900 while (length > 0) {
901 /* Check if the remaining data fits into current packet. */
902 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
903 if (copy < length)
904 copy = maxfraglen - skb->len;
905
906 if (copy <= 0) {
907 char *data;
908 unsigned int datalen;
909 unsigned int fraglen;
910 unsigned int fraggap;
911 unsigned int alloclen;
912 struct sk_buff *skb_prev;
913alloc_new_skb:
914 skb_prev = skb;
915
916 /* There's no room in the current skb */
917 if (skb_prev)
918 fraggap = skb_prev->len - maxfraglen;
919 else
920 fraggap = 0;
921
922 /*
923 * If remaining data exceeds the mtu,
924 * we know we need more fragment(s).
925 */
926 datalen = length + fraggap;
927 if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
928 datalen = maxfraglen - fragheaderlen;
929
930 fraglen = datalen + fragheaderlen;
931 if ((flags & MSG_MORE) &&
932 !(rt->u.dst.dev->features&NETIF_F_SG))
933 alloclen = mtu;
934 else
935 alloclen = datalen + fragheaderlen;
936
937 /*
938 * The last fragment gets additional space at tail.
939 * Note: we overallocate on fragments with MSG_MODE
940 * because we have no idea if we're the last one.
941 */
942 if (datalen == length + fraggap)
943 alloclen += rt->u.dst.trailer_len;
944
945 /*
946 * We just reserve space for fragment header.
947 * Note: this may be overallocation if the message
948 * (without MSG_MORE) fits into the MTU.
949 */
950 alloclen += sizeof(struct frag_hdr);
951
952 if (transhdrlen) {
953 skb = sock_alloc_send_skb(sk,
954 alloclen + hh_len,
955 (flags & MSG_DONTWAIT), &err);
956 } else {
957 skb = NULL;
958 if (atomic_read(&sk->sk_wmem_alloc) <=
959 2 * sk->sk_sndbuf)
960 skb = sock_wmalloc(sk,
961 alloclen + hh_len, 1,
962 sk->sk_allocation);
963 if (unlikely(skb == NULL))
964 err = -ENOBUFS;
965 }
966 if (skb == NULL)
967 goto error;
968 /*
969 * Fill in the control structures
970 */
971 skb->ip_summed = csummode;
972 skb->csum = 0;
973 /* reserve for fragmentation */
974 skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
975
976 /*
977 * Find where to start putting bytes
978 */
979 data = skb_put(skb, fraglen);
980 skb->nh.raw = data + exthdrlen;
981 data += fragheaderlen;
982 skb->h.raw = data + exthdrlen;
983
984 if (fraggap) {
985 skb->csum = skb_copy_and_csum_bits(
986 skb_prev, maxfraglen,
987 data + transhdrlen, fraggap, 0);
988 skb_prev->csum = csum_sub(skb_prev->csum,
989 skb->csum);
990 data += fraggap;
991 skb_trim(skb_prev, maxfraglen);
992 }
993 copy = datalen - transhdrlen - fraggap;
994 if (copy < 0) {
995 err = -EINVAL;
996 kfree_skb(skb);
997 goto error;
998 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
999 err = -EFAULT;
1000 kfree_skb(skb);
1001 goto error;
1002 }
1003
1004 offset += copy;
1005 length -= datalen - fraggap;
1006 transhdrlen = 0;
1007 exthdrlen = 0;
1008 csummode = CHECKSUM_NONE;
1009
1010 /*
1011 * Put the packet on the pending queue
1012 */
1013 __skb_queue_tail(&sk->sk_write_queue, skb);
1014 continue;
1015 }
1016
1017 if (copy > length)
1018 copy = length;
1019
1020 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1021 unsigned int off;
1022
1023 off = skb->len;
1024 if (getfrag(from, skb_put(skb, copy),
1025 offset, copy, off, skb) < 0) {
1026 __skb_trim(skb, off);
1027 err = -EFAULT;
1028 goto error;
1029 }
1030 } else {
1031 int i = skb_shinfo(skb)->nr_frags;
1032 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1033 struct page *page = sk->sk_sndmsg_page;
1034 int off = sk->sk_sndmsg_off;
1035 unsigned int left;
1036
1037 if (page && (left = PAGE_SIZE - off) > 0) {
1038 if (copy >= left)
1039 copy = left;
1040 if (page != frag->page) {
1041 if (i == MAX_SKB_FRAGS) {
1042 err = -EMSGSIZE;
1043 goto error;
1044 }
1045 get_page(page);
1046 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1047 frag = &skb_shinfo(skb)->frags[i];
1048 }
1049 } else if(i < MAX_SKB_FRAGS) {
1050 if (copy > PAGE_SIZE)
1051 copy = PAGE_SIZE;
1052 page = alloc_pages(sk->sk_allocation, 0);
1053 if (page == NULL) {
1054 err = -ENOMEM;
1055 goto error;
1056 }
1057 sk->sk_sndmsg_page = page;
1058 sk->sk_sndmsg_off = 0;
1059
1060 skb_fill_page_desc(skb, i, page, 0, 0);
1061 frag = &skb_shinfo(skb)->frags[i];
1062 skb->truesize += PAGE_SIZE;
1063 atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc);
1064 } else {
1065 err = -EMSGSIZE;
1066 goto error;
1067 }
1068 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1069 err = -EFAULT;
1070 goto error;
1071 }
1072 sk->sk_sndmsg_off += copy;
1073 frag->size += copy;
1074 skb->len += copy;
1075 skb->data_len += copy;
1076 }
1077 offset += copy;
1078 length -= copy;
1079 }
1080 return 0;
1081error:
1082 inet->cork.length -= length;
1083 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1084 return err;
1085}
1086
1087int ip6_push_pending_frames(struct sock *sk)
1088{
1089 struct sk_buff *skb, *tmp_skb;
1090 struct sk_buff **tail_skb;
1091 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1092 struct inet_sock *inet = inet_sk(sk);
1093 struct ipv6_pinfo *np = inet6_sk(sk);
1094 struct ipv6hdr *hdr;
1095 struct ipv6_txoptions *opt = np->cork.opt;
1096 struct rt6_info *rt = np->cork.rt;
1097 struct flowi *fl = &inet->cork.fl;
1098 unsigned char proto = fl->proto;
1099 int err = 0;
1100
1101 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1102 goto out;
1103 tail_skb = &(skb_shinfo(skb)->frag_list);
1104
1105 /* move skb->data to ip header from ext header */
1106 if (skb->data < skb->nh.raw)
1107 __skb_pull(skb, skb->nh.raw - skb->data);
1108 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1109 __skb_pull(tmp_skb, skb->h.raw - skb->nh.raw);
1110 *tail_skb = tmp_skb;
1111 tail_skb = &(tmp_skb->next);
1112 skb->len += tmp_skb->len;
1113 skb->data_len += tmp_skb->len;
1da177e4
LT
1114 skb->truesize += tmp_skb->truesize;
1115 __sock_put(tmp_skb->sk);
1116 tmp_skb->destructor = NULL;
1117 tmp_skb->sk = NULL;
1da177e4
LT
1118 }
1119
1120 ipv6_addr_copy(final_dst, &fl->fl6_dst);
1121 __skb_pull(skb, skb->h.raw - skb->nh.raw);
1122 if (opt && opt->opt_flen)
1123 ipv6_push_frag_opts(skb, opt, &proto);
1124 if (opt && opt->opt_nflen)
1125 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1126
1127 skb->nh.ipv6h = hdr = (struct ipv6hdr*) skb_push(skb, sizeof(struct ipv6hdr));
1128
1129 *(u32*)hdr = fl->fl6_flowlabel | htonl(0x60000000);
1130
1131 if (skb->len <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN)
1132 hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
1133 else
1134 hdr->payload_len = 0;
1135 hdr->hop_limit = np->cork.hop_limit;
1136 hdr->nexthdr = proto;
1137 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1138 ipv6_addr_copy(&hdr->daddr, final_dst);
1139
1140 skb->dst = dst_clone(&rt->u.dst);
1141 IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
1142 err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output);
1143 if (err) {
1144 if (err > 0)
3320da89 1145 err = np->recverr ? net_xmit_errno(err) : 0;
1da177e4
LT
1146 if (err)
1147 goto error;
1148 }
1149
1150out:
1151 inet->cork.flags &= ~IPCORK_OPT;
1152 if (np->cork.opt) {
1153 kfree(np->cork.opt);
1154 np->cork.opt = NULL;
1155 }
1156 if (np->cork.rt) {
1157 dst_release(&np->cork.rt->u.dst);
1158 np->cork.rt = NULL;
1159 inet->cork.flags &= ~IPCORK_ALLFRAG;
1160 }
1161 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1162 return err;
1163error:
1164 goto out;
1165}
1166
1167void ip6_flush_pending_frames(struct sock *sk)
1168{
1169 struct inet_sock *inet = inet_sk(sk);
1170 struct ipv6_pinfo *np = inet6_sk(sk);
1171 struct sk_buff *skb;
1172
1173 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1174 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1175 kfree_skb(skb);
1176 }
1177
1178 inet->cork.flags &= ~IPCORK_OPT;
1179
1180 if (np->cork.opt) {
1181 kfree(np->cork.opt);
1182 np->cork.opt = NULL;
1183 }
1184 if (np->cork.rt) {
1185 dst_release(&np->cork.rt->u.dst);
1186 np->cork.rt = NULL;
1187 inet->cork.flags &= ~IPCORK_ALLFRAG;
1188 }
1189 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1190}