]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - net/ipv6/ip6_output.c
net: add skb frag size accessors
[mirror_ubuntu-artful-kernel.git] / net / ipv6 / ip6_output.c
CommitLineData
1da177e4
LT
1/*
2 * IPv6 output functions
1ab1457c 3 * Linux INET6 implementation
1da177e4
LT
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * Based on linux/net/ipv4/ip_output.c
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 *
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
21 *
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
27 */
28
1da177e4 29#include <linux/errno.h>
ef76bc23 30#include <linux/kernel.h>
1da177e4
LT
31#include <linux/string.h>
32#include <linux/socket.h>
33#include <linux/net.h>
34#include <linux/netdevice.h>
35#include <linux/if_arp.h>
36#include <linux/in6.h>
37#include <linux/tcp.h>
38#include <linux/route.h>
b59f45d0 39#include <linux/module.h>
5a0e3ad6 40#include <linux/slab.h>
1da177e4
LT
41
42#include <linux/netfilter.h>
43#include <linux/netfilter_ipv6.h>
44
45#include <net/sock.h>
46#include <net/snmp.h>
47
48#include <net/ipv6.h>
49#include <net/ndisc.h>
50#include <net/protocol.h>
51#include <net/ip6_route.h>
52#include <net/addrconf.h>
53#include <net/rawv6.h>
54#include <net/icmp.h>
55#include <net/xfrm.h>
56#include <net/checksum.h>
7bc570c8 57#include <linux/mroute6.h>
1da177e4 58
ad0081e4 59int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
1da177e4 60
ef76bc23
HX
61int __ip6_local_out(struct sk_buff *skb)
62{
63 int len;
64
65 len = skb->len - sizeof(struct ipv6hdr);
66 if (len > IPV6_MAXPLEN)
67 len = 0;
68 ipv6_hdr(skb)->payload_len = htons(len);
69
b2e0b385
JE
70 return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
71 skb_dst(skb)->dev, dst_output);
ef76bc23
HX
72}
73
74int ip6_local_out(struct sk_buff *skb)
75{
76 int err;
77
78 err = __ip6_local_out(skb);
79 if (likely(err == 1))
80 err = dst_output(skb);
81
82 return err;
83}
84EXPORT_SYMBOL_GPL(ip6_local_out);
85
1da177e4
LT
86/* dev_loopback_xmit for use with netfilter. */
87static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
88{
459a98ed 89 skb_reset_mac_header(newskb);
bbe735e4 90 __skb_pull(newskb, skb_network_offset(newskb));
1da177e4
LT
91 newskb->pkt_type = PACKET_LOOPBACK;
92 newskb->ip_summed = CHECKSUM_UNNECESSARY;
adf30907 93 WARN_ON(!skb_dst(newskb));
1da177e4 94
e30b38c2 95 netif_rx_ni(newskb);
1da177e4
LT
96 return 0;
97}
98
9e508490 99static int ip6_finish_output2(struct sk_buff *skb)
1da177e4 100{
adf30907 101 struct dst_entry *dst = skb_dst(skb);
1da177e4 102 struct net_device *dev = dst->dev;
f6b72b62 103 struct neighbour *neigh;
1da177e4
LT
104
105 skb->protocol = htons(ETH_P_IPV6);
106 skb->dev = dev;
107
0660e03f 108 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
adf30907 109 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1da177e4 110
7ad6848c 111 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
d1db275d 112 ((mroute6_socket(dev_net(dev), skb) &&
bd91b8bf 113 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
7bc570c8
YH
114 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
115 &ipv6_hdr(skb)->saddr))) {
1da177e4
LT
116 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
117
118 /* Do not check for IFF_ALLMULTI; multicast routing
119 is not supported in any case.
120 */
121 if (newskb)
b2e0b385
JE
122 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
123 newskb, NULL, newskb->dev,
1da177e4
LT
124 ip6_dev_loopback_xmit);
125
0660e03f 126 if (ipv6_hdr(skb)->hop_limit == 0) {
3bd653c8
DL
127 IP6_INC_STATS(dev_net(dev), idev,
128 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
129 kfree_skb(skb);
130 return 0;
131 }
132 }
133
edf391ff
NH
134 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
135 skb->len);
1da177e4
LT
136 }
137
f2c31e32 138 rcu_read_lock();
69cce1d1 139 neigh = dst_get_neighbour(dst);
f2c31e32
ED
140 if (neigh) {
141 int res = neigh_output(neigh, skb);
05e3aa09 142
f2c31e32
ED
143 rcu_read_unlock();
144 return res;
145 }
146 rcu_read_unlock();
9e508490
JE
147 IP6_INC_STATS_BH(dev_net(dst->dev),
148 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
149 kfree_skb(skb);
150 return -EINVAL;
1da177e4
LT
151}
152
9e508490
JE
153static int ip6_finish_output(struct sk_buff *skb)
154{
155 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
156 dst_allfrag(skb_dst(skb)))
157 return ip6_fragment(skb, ip6_finish_output2);
158 else
159 return ip6_finish_output2(skb);
160}
161
1da177e4
LT
162int ip6_output(struct sk_buff *skb)
163{
9e508490 164 struct net_device *dev = skb_dst(skb)->dev;
adf30907 165 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
778d80be 166 if (unlikely(idev->cnf.disable_ipv6)) {
9e508490 167 IP6_INC_STATS(dev_net(dev), idev,
3bd653c8 168 IPSTATS_MIB_OUTDISCARDS);
778d80be
YH
169 kfree_skb(skb);
170 return 0;
171 }
172
9c6eb28a
JE
173 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
174 ip6_finish_output,
175 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
1da177e4
LT
176}
177
1da177e4 178/*
b5d43998 179 * xmit an sk_buff (used by TCP, SCTP and DCCP)
1da177e4
LT
180 */
181
4c9483b2 182int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
4e15ed4d 183 struct ipv6_txoptions *opt)
1da177e4 184{
3bd653c8 185 struct net *net = sock_net(sk);
b30bd282 186 struct ipv6_pinfo *np = inet6_sk(sk);
4c9483b2 187 struct in6_addr *first_hop = &fl6->daddr;
adf30907 188 struct dst_entry *dst = skb_dst(skb);
1da177e4 189 struct ipv6hdr *hdr;
4c9483b2 190 u8 proto = fl6->flowi6_proto;
1da177e4 191 int seg_len = skb->len;
e651f03a
GR
192 int hlimit = -1;
193 int tclass = 0;
1da177e4
LT
194 u32 mtu;
195
196 if (opt) {
c2636b4d 197 unsigned int head_room;
1da177e4
LT
198
199 /* First: exthdrs may take lots of space (~8K for now)
200 MAX_HEADER is not enough.
201 */
202 head_room = opt->opt_nflen + opt->opt_flen;
203 seg_len += head_room;
204 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
205
206 if (skb_headroom(skb) < head_room) {
207 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
a11d206d 208 if (skb2 == NULL) {
adf30907 209 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d
YH
210 IPSTATS_MIB_OUTDISCARDS);
211 kfree_skb(skb);
1da177e4
LT
212 return -ENOBUFS;
213 }
a11d206d
YH
214 kfree_skb(skb);
215 skb = skb2;
83d7eb29 216 skb_set_owner_w(skb, sk);
1da177e4
LT
217 }
218 if (opt->opt_flen)
219 ipv6_push_frag_opts(skb, opt, &proto);
220 if (opt->opt_nflen)
221 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
222 }
223
e2d1bca7
ACM
224 skb_push(skb, sizeof(struct ipv6hdr));
225 skb_reset_network_header(skb);
0660e03f 226 hdr = ipv6_hdr(skb);
1da177e4
LT
227
228 /*
229 * Fill in the IPv6 header
230 */
e651f03a
GR
231 if (np) {
232 tclass = np->tclass;
1da177e4 233 hlimit = np->hop_limit;
e651f03a 234 }
1da177e4 235 if (hlimit < 0)
6b75d090 236 hlimit = ip6_dst_hoplimit(dst);
1da177e4 237
4c9483b2 238 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl6->flowlabel;
41a1f8ea 239
1da177e4
LT
240 hdr->payload_len = htons(seg_len);
241 hdr->nexthdr = proto;
242 hdr->hop_limit = hlimit;
243
4c9483b2 244 ipv6_addr_copy(&hdr->saddr, &fl6->saddr);
1da177e4
LT
245 ipv6_addr_copy(&hdr->daddr, first_hop);
246
a2c2064f 247 skb->priority = sk->sk_priority;
4a19ec58 248 skb->mark = sk->sk_mark;
a2c2064f 249
1da177e4 250 mtu = dst_mtu(dst);
283d07ac 251 if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
adf30907 252 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
edf391ff 253 IPSTATS_MIB_OUT, skb->len);
b2e0b385
JE
254 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
255 dst->dev, dst_output);
1da177e4
LT
256 }
257
258 if (net_ratelimit())
259 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
260 skb->dev = dst->dev;
3ffe533c 261 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
adf30907 262 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
263 kfree_skb(skb);
264 return -EMSGSIZE;
265}
266
7159039a
YH
267EXPORT_SYMBOL(ip6_xmit);
268
1da177e4
LT
269/*
270 * To avoid extra problems ND packets are send through this
271 * routine. It's code duplication but I really want to avoid
272 * extra checks since ipv6_build_header is used by TCP (which
273 * is for us performance critical)
274 */
275
276int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
9acd9f3a 277 const struct in6_addr *saddr, const struct in6_addr *daddr,
1da177e4
LT
278 int proto, int len)
279{
280 struct ipv6_pinfo *np = inet6_sk(sk);
281 struct ipv6hdr *hdr;
1da177e4
LT
282
283 skb->protocol = htons(ETH_P_IPV6);
284 skb->dev = dev;
285
55f79cc0
ACM
286 skb_reset_network_header(skb);
287 skb_put(skb, sizeof(struct ipv6hdr));
0660e03f 288 hdr = ipv6_hdr(skb);
1da177e4 289
ae08e1f0 290 *(__be32*)hdr = htonl(0x60000000);
1da177e4
LT
291
292 hdr->payload_len = htons(len);
293 hdr->nexthdr = proto;
294 hdr->hop_limit = np->hop_limit;
295
296 ipv6_addr_copy(&hdr->saddr, saddr);
297 ipv6_addr_copy(&hdr->daddr, daddr);
298
299 return 0;
300}
301
302static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
303{
304 struct ip6_ra_chain *ra;
305 struct sock *last = NULL;
306
307 read_lock(&ip6_ra_lock);
308 for (ra = ip6_ra_chain; ra; ra = ra->next) {
309 struct sock *sk = ra->sk;
0bd1b59b
AM
310 if (sk && ra->sel == sel &&
311 (!sk->sk_bound_dev_if ||
312 sk->sk_bound_dev_if == skb->dev->ifindex)) {
1da177e4
LT
313 if (last) {
314 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
315 if (skb2)
316 rawv6_rcv(last, skb2);
317 }
318 last = sk;
319 }
320 }
321
322 if (last) {
323 rawv6_rcv(last, skb);
324 read_unlock(&ip6_ra_lock);
325 return 1;
326 }
327 read_unlock(&ip6_ra_lock);
328 return 0;
329}
330
e21e0b5f
VN
331static int ip6_forward_proxy_check(struct sk_buff *skb)
332{
0660e03f 333 struct ipv6hdr *hdr = ipv6_hdr(skb);
e21e0b5f
VN
334 u8 nexthdr = hdr->nexthdr;
335 int offset;
336
337 if (ipv6_ext_hdr(nexthdr)) {
338 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
339 if (offset < 0)
340 return 0;
341 } else
342 offset = sizeof(struct ipv6hdr);
343
344 if (nexthdr == IPPROTO_ICMPV6) {
345 struct icmp6hdr *icmp6;
346
d56f90a7
ACM
347 if (!pskb_may_pull(skb, (skb_network_header(skb) +
348 offset + 1 - skb->data)))
e21e0b5f
VN
349 return 0;
350
d56f90a7 351 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
e21e0b5f
VN
352
353 switch (icmp6->icmp6_type) {
354 case NDISC_ROUTER_SOLICITATION:
355 case NDISC_ROUTER_ADVERTISEMENT:
356 case NDISC_NEIGHBOUR_SOLICITATION:
357 case NDISC_NEIGHBOUR_ADVERTISEMENT:
358 case NDISC_REDIRECT:
359 /* For reaction involving unicast neighbor discovery
360 * message destined to the proxied address, pass it to
361 * input function.
362 */
363 return 1;
364 default:
365 break;
366 }
367 }
368
74553b09
VN
369 /*
370 * The proxying router can't forward traffic sent to a link-local
371 * address, so signal the sender and discard the packet. This
372 * behavior is clarified by the MIPv6 specification.
373 */
374 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
375 dst_link_failure(skb);
376 return -1;
377 }
378
e21e0b5f
VN
379 return 0;
380}
381
1da177e4
LT
382static inline int ip6_forward_finish(struct sk_buff *skb)
383{
384 return dst_output(skb);
385}
386
387int ip6_forward(struct sk_buff *skb)
388{
adf30907 389 struct dst_entry *dst = skb_dst(skb);
0660e03f 390 struct ipv6hdr *hdr = ipv6_hdr(skb);
1da177e4 391 struct inet6_skb_parm *opt = IP6CB(skb);
c346dca1 392 struct net *net = dev_net(dst->dev);
69cce1d1 393 struct neighbour *n;
14f3ad6f 394 u32 mtu;
1ab1457c 395
53b7997f 396 if (net->ipv6.devconf_all->forwarding == 0)
1da177e4
LT
397 goto error;
398
4497b076
BH
399 if (skb_warn_if_lro(skb))
400 goto drop;
401
1da177e4 402 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
3bd653c8 403 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
1da177e4
LT
404 goto drop;
405 }
406
72b43d08
AK
407 if (skb->pkt_type != PACKET_HOST)
408 goto drop;
409
35fc92a9 410 skb_forward_csum(skb);
1da177e4
LT
411
412 /*
413 * We DO NOT make any processing on
414 * RA packets, pushing them to user level AS IS
415 * without ane WARRANTY that application will be able
416 * to interpret them. The reason is that we
417 * cannot make anything clever here.
418 *
419 * We are not end-node, so that if packet contains
420 * AH/ESP, we cannot make anything.
421 * Defragmentation also would be mistake, RA packets
422 * cannot be fragmented, because there is no warranty
423 * that different fragments will go along one path. --ANK
424 */
425 if (opt->ra) {
d56f90a7 426 u8 *ptr = skb_network_header(skb) + opt->ra;
1da177e4
LT
427 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
428 return 0;
429 }
430
431 /*
432 * check and decrement ttl
433 */
434 if (hdr->hop_limit <= 1) {
435 /* Force OUTPUT device used as source address */
436 skb->dev = dst->dev;
3ffe533c 437 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
483a47d2
DL
438 IP6_INC_STATS_BH(net,
439 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
1da177e4
LT
440
441 kfree_skb(skb);
442 return -ETIMEDOUT;
443 }
444
fbea49e1 445 /* XXX: idev->cnf.proxy_ndp? */
53b7997f 446 if (net->ipv6.devconf_all->proxy_ndp &&
8a3edd80 447 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
74553b09
VN
448 int proxied = ip6_forward_proxy_check(skb);
449 if (proxied > 0)
e21e0b5f 450 return ip6_input(skb);
74553b09 451 else if (proxied < 0) {
3bd653c8
DL
452 IP6_INC_STATS(net, ip6_dst_idev(dst),
453 IPSTATS_MIB_INDISCARDS);
74553b09
VN
454 goto drop;
455 }
e21e0b5f
VN
456 }
457
1da177e4 458 if (!xfrm6_route_forward(skb)) {
3bd653c8 459 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
1da177e4
LT
460 goto drop;
461 }
adf30907 462 dst = skb_dst(skb);
1da177e4
LT
463
464 /* IPv6 specs say nothing about it, but it is clear that we cannot
465 send redirects to source routed frames.
1e5dc146 466 We don't send redirects to frames decapsulated from IPsec.
1da177e4 467 */
69cce1d1
DM
468 n = dst_get_neighbour(dst);
469 if (skb->dev == dst->dev && n && opt->srcrt == 0 && !skb_sec_path(skb)) {
1da177e4
LT
470 struct in6_addr *target = NULL;
471 struct rt6_info *rt;
1da177e4
LT
472
473 /*
474 * incoming and outgoing devices are the same
475 * send a redirect.
476 */
477
478 rt = (struct rt6_info *) dst;
479 if ((rt->rt6i_flags & RTF_GATEWAY))
480 target = (struct in6_addr*)&n->primary_key;
481 else
482 target = &hdr->daddr;
483
92d86829
DM
484 if (!rt->rt6i_peer)
485 rt6_bind_peer(rt, 1);
486
1da177e4
LT
487 /* Limit redirects both by destination (here)
488 and by source (inside ndisc_send_redirect)
489 */
92d86829 490 if (inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ))
1da177e4 491 ndisc_send_redirect(skb, n, target);
5bb1ab09
DS
492 } else {
493 int addrtype = ipv6_addr_type(&hdr->saddr);
494
1da177e4 495 /* This check is security critical. */
f81b2e7d
YH
496 if (addrtype == IPV6_ADDR_ANY ||
497 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
5bb1ab09
DS
498 goto error;
499 if (addrtype & IPV6_ADDR_LINKLOCAL) {
500 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
3ffe533c 501 ICMPV6_NOT_NEIGHBOUR, 0);
5bb1ab09
DS
502 goto error;
503 }
1da177e4
LT
504 }
505
14f3ad6f
UW
506 mtu = dst_mtu(dst);
507 if (mtu < IPV6_MIN_MTU)
508 mtu = IPV6_MIN_MTU;
509
0aa68271 510 if (skb->len > mtu && !skb_is_gso(skb)) {
1da177e4
LT
511 /* Again, force OUTPUT device used as source address */
512 skb->dev = dst->dev;
14f3ad6f 513 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
483a47d2
DL
514 IP6_INC_STATS_BH(net,
515 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
516 IP6_INC_STATS_BH(net,
517 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
518 kfree_skb(skb);
519 return -EMSGSIZE;
520 }
521
522 if (skb_cow(skb, dst->dev->hard_header_len)) {
3bd653c8 523 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
524 goto drop;
525 }
526
0660e03f 527 hdr = ipv6_hdr(skb);
1da177e4
LT
528
529 /* Mangling hops number delayed to point after skb COW */
1ab1457c 530
1da177e4
LT
531 hdr->hop_limit--;
532
483a47d2 533 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
b2e0b385 534 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
6e23ae2a 535 ip6_forward_finish);
1da177e4
LT
536
537error:
483a47d2 538 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
1da177e4
LT
539drop:
540 kfree_skb(skb);
541 return -EINVAL;
542}
543
544static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
545{
546 to->pkt_type = from->pkt_type;
547 to->priority = from->priority;
548 to->protocol = from->protocol;
adf30907
ED
549 skb_dst_drop(to);
550 skb_dst_set(to, dst_clone(skb_dst(from)));
1da177e4 551 to->dev = from->dev;
82e91ffe 552 to->mark = from->mark;
1da177e4
LT
553
554#ifdef CONFIG_NET_SCHED
555 to->tc_index = from->tc_index;
556#endif
e7ac05f3 557 nf_copy(to, from);
ba9dda3a
JK
558#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
559 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
560 to->nf_trace = from->nf_trace;
561#endif
984bc16c 562 skb_copy_secmark(to, from);
1da177e4
LT
563}
564
565int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
566{
567 u16 offset = sizeof(struct ipv6hdr);
0660e03f
ACM
568 struct ipv6_opt_hdr *exthdr =
569 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
27a884dc 570 unsigned int packet_len = skb->tail - skb->network_header;
1da177e4 571 int found_rhdr = 0;
0660e03f 572 *nexthdr = &ipv6_hdr(skb)->nexthdr;
1da177e4
LT
573
574 while (offset + 1 <= packet_len) {
575
576 switch (**nexthdr) {
577
578 case NEXTHDR_HOP:
27637df9 579 break;
1da177e4 580 case NEXTHDR_ROUTING:
27637df9
MN
581 found_rhdr = 1;
582 break;
1da177e4 583 case NEXTHDR_DEST:
59fbb3a6 584#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
27637df9
MN
585 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
586 break;
587#endif
588 if (found_rhdr)
589 return offset;
1da177e4
LT
590 break;
591 default :
592 return offset;
593 }
27637df9
MN
594
595 offset += ipv6_optlen(exthdr);
596 *nexthdr = &exthdr->nexthdr;
d56f90a7
ACM
597 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
598 offset);
1da177e4
LT
599 }
600
601 return offset;
602}
603
87c48fa3
ED
604void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
605{
606 static atomic_t ipv6_fragmentation_id;
607 int old, new;
608
609 if (rt) {
610 struct inet_peer *peer;
611
612 if (!rt->rt6i_peer)
613 rt6_bind_peer(rt, 1);
614 peer = rt->rt6i_peer;
615 if (peer) {
616 fhdr->identification = htonl(inet_getid(peer, 0));
617 return;
618 }
619 }
620 do {
621 old = atomic_read(&ipv6_fragmentation_id);
622 new = old + 1;
623 if (!new)
624 new = 1;
625 } while (atomic_cmpxchg(&ipv6_fragmentation_id, old, new) != old);
626 fhdr->identification = htonl(new);
627}
628
ad0081e4 629int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
1da177e4 630{
1da177e4 631 struct sk_buff *frag;
adf30907 632 struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
d91675f9 633 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
1da177e4
LT
634 struct ipv6hdr *tmp_hdr;
635 struct frag_hdr *fh;
636 unsigned int mtu, hlen, left, len;
ae08e1f0 637 __be32 frag_id = 0;
1da177e4
LT
638 int ptr, offset = 0, err=0;
639 u8 *prevhdr, nexthdr = 0;
adf30907 640 struct net *net = dev_net(skb_dst(skb)->dev);
1da177e4 641
1da177e4
LT
642 hlen = ip6_find_1stfragopt(skb, &prevhdr);
643 nexthdr = *prevhdr;
644
628a5c56 645 mtu = ip6_skb_dst_mtu(skb);
b881ef76
JH
646
647 /* We must not fragment if the socket is set to force MTU discovery
14f3ad6f 648 * or if the skb it not generated by a local socket.
b881ef76 649 */
f2228f78 650 if (!skb->local_df && skb->len > mtu) {
adf30907 651 skb->dev = skb_dst(skb)->dev;
3ffe533c 652 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
adf30907 653 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 654 IPSTATS_MIB_FRAGFAILS);
b881ef76
JH
655 kfree_skb(skb);
656 return -EMSGSIZE;
657 }
658
d91675f9
YH
659 if (np && np->frag_size < mtu) {
660 if (np->frag_size)
661 mtu = np->frag_size;
662 }
663 mtu -= hlen + sizeof(struct frag_hdr);
1da177e4 664
21dc3301 665 if (skb_has_frag_list(skb)) {
1da177e4 666 int first_len = skb_pagelen(skb);
3d13008e 667 struct sk_buff *frag2;
1da177e4
LT
668
669 if (first_len - hlen > mtu ||
670 ((first_len - hlen) & 7) ||
671 skb_cloned(skb))
672 goto slow_path;
673
4d9092bb 674 skb_walk_frags(skb, frag) {
1da177e4
LT
675 /* Correct geometry. */
676 if (frag->len > mtu ||
677 ((frag->len & 7) && frag->next) ||
678 skb_headroom(frag) < hlen)
3d13008e 679 goto slow_path_clean;
1da177e4 680
1da177e4
LT
681 /* Partially cloned skb? */
682 if (skb_shared(frag))
3d13008e 683 goto slow_path_clean;
2fdba6b0
HX
684
685 BUG_ON(frag->sk);
686 if (skb->sk) {
2fdba6b0
HX
687 frag->sk = skb->sk;
688 frag->destructor = sock_wfree;
2fdba6b0 689 }
3d13008e 690 skb->truesize -= frag->truesize;
1da177e4
LT
691 }
692
693 err = 0;
694 offset = 0;
695 frag = skb_shinfo(skb)->frag_list;
4d9092bb 696 skb_frag_list_init(skb);
1da177e4
LT
697 /* BUILD HEADER */
698
9a217a1c 699 *prevhdr = NEXTHDR_FRAGMENT;
d56f90a7 700 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
1da177e4 701 if (!tmp_hdr) {
adf30907 702 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 703 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
704 return -ENOMEM;
705 }
706
1da177e4
LT
707 __skb_pull(skb, hlen);
708 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
e2d1bca7
ACM
709 __skb_push(skb, hlen);
710 skb_reset_network_header(skb);
d56f90a7 711 memcpy(skb_network_header(skb), tmp_hdr, hlen);
1da177e4 712
87c48fa3 713 ipv6_select_ident(fh, rt);
1da177e4
LT
714 fh->nexthdr = nexthdr;
715 fh->reserved = 0;
716 fh->frag_off = htons(IP6_MF);
717 frag_id = fh->identification;
718
719 first_len = skb_pagelen(skb);
720 skb->data_len = first_len - skb_headlen(skb);
721 skb->len = first_len;
0660e03f
ACM
722 ipv6_hdr(skb)->payload_len = htons(first_len -
723 sizeof(struct ipv6hdr));
a11d206d 724
d8d1f30b 725 dst_hold(&rt->dst);
1da177e4
LT
726
727 for (;;) {
728 /* Prepare header of the next frame,
729 * before previous one went down. */
730 if (frag) {
731 frag->ip_summed = CHECKSUM_NONE;
badff6d0 732 skb_reset_transport_header(frag);
1da177e4 733 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
e2d1bca7
ACM
734 __skb_push(frag, hlen);
735 skb_reset_network_header(frag);
d56f90a7
ACM
736 memcpy(skb_network_header(frag), tmp_hdr,
737 hlen);
1da177e4
LT
738 offset += skb->len - hlen - sizeof(struct frag_hdr);
739 fh->nexthdr = nexthdr;
740 fh->reserved = 0;
741 fh->frag_off = htons(offset);
742 if (frag->next != NULL)
743 fh->frag_off |= htons(IP6_MF);
744 fh->identification = frag_id;
0660e03f
ACM
745 ipv6_hdr(frag)->payload_len =
746 htons(frag->len -
747 sizeof(struct ipv6hdr));
1da177e4
LT
748 ip6_copy_metadata(frag, skb);
749 }
1ab1457c 750
1da177e4 751 err = output(skb);
dafee490 752 if(!err)
d8d1f30b 753 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 754 IPSTATS_MIB_FRAGCREATES);
dafee490 755
1da177e4
LT
756 if (err || !frag)
757 break;
758
759 skb = frag;
760 frag = skb->next;
761 skb->next = NULL;
762 }
763
a51482bd 764 kfree(tmp_hdr);
1da177e4
LT
765
766 if (err == 0) {
d8d1f30b 767 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 768 IPSTATS_MIB_FRAGOKS);
d8d1f30b 769 dst_release(&rt->dst);
1da177e4
LT
770 return 0;
771 }
772
773 while (frag) {
774 skb = frag->next;
775 kfree_skb(frag);
776 frag = skb;
777 }
778
d8d1f30b 779 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 780 IPSTATS_MIB_FRAGFAILS);
d8d1f30b 781 dst_release(&rt->dst);
1da177e4 782 return err;
3d13008e
ED
783
784slow_path_clean:
785 skb_walk_frags(skb, frag2) {
786 if (frag2 == frag)
787 break;
788 frag2->sk = NULL;
789 frag2->destructor = NULL;
790 skb->truesize += frag2->truesize;
791 }
1da177e4
LT
792 }
793
794slow_path:
795 left = skb->len - hlen; /* Space per frame */
796 ptr = hlen; /* Where to start from */
797
798 /*
799 * Fragment the datagram.
800 */
801
802 *prevhdr = NEXTHDR_FRAGMENT;
803
804 /*
805 * Keep copying data until we run out.
806 */
807 while(left > 0) {
808 len = left;
809 /* IF: it doesn't fit, use 'mtu' - the data space left */
810 if (len > mtu)
811 len = mtu;
25985edc 812 /* IF: we are not sending up to and including the packet end
1da177e4
LT
813 then align the next start on an eight byte boundary */
814 if (len < left) {
815 len &= ~7;
816 }
817 /*
818 * Allocate buffer.
819 */
820
d8d1f30b 821 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->dst.dev), GFP_ATOMIC)) == NULL) {
64ce2073 822 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
adf30907 823 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 824 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
825 err = -ENOMEM;
826 goto fail;
827 }
828
829 /*
830 * Set up data on packet
831 */
832
833 ip6_copy_metadata(frag, skb);
d8d1f30b 834 skb_reserve(frag, LL_RESERVED_SPACE(rt->dst.dev));
1da177e4 835 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
c1d2bbe1 836 skb_reset_network_header(frag);
badff6d0 837 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
b0e380b1
ACM
838 frag->transport_header = (frag->network_header + hlen +
839 sizeof(struct frag_hdr));
1da177e4
LT
840
841 /*
842 * Charge the memory for the fragment to any owner
843 * it might possess
844 */
845 if (skb->sk)
846 skb_set_owner_w(frag, skb->sk);
847
848 /*
849 * Copy the packet header into the new buffer.
850 */
d626f62b 851 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
1da177e4
LT
852
853 /*
854 * Build fragment header.
855 */
856 fh->nexthdr = nexthdr;
857 fh->reserved = 0;
f36d6ab1 858 if (!frag_id) {
87c48fa3 859 ipv6_select_ident(fh, rt);
1da177e4
LT
860 frag_id = fh->identification;
861 } else
862 fh->identification = frag_id;
863
864 /*
865 * Copy a block of the IP datagram.
866 */
8984e41d 867 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
1da177e4
LT
868 BUG();
869 left -= len;
870
871 fh->frag_off = htons(offset);
872 if (left > 0)
873 fh->frag_off |= htons(IP6_MF);
0660e03f
ACM
874 ipv6_hdr(frag)->payload_len = htons(frag->len -
875 sizeof(struct ipv6hdr));
1da177e4
LT
876
877 ptr += len;
878 offset += len;
879
880 /*
881 * Put this fragment into the sending queue.
882 */
1da177e4
LT
883 err = output(frag);
884 if (err)
885 goto fail;
dafee490 886
adf30907 887 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 888 IPSTATS_MIB_FRAGCREATES);
1da177e4 889 }
adf30907 890 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 891 IPSTATS_MIB_FRAGOKS);
1da177e4 892 kfree_skb(skb);
1da177e4
LT
893 return err;
894
895fail:
adf30907 896 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 897 IPSTATS_MIB_FRAGFAILS);
1ab1457c 898 kfree_skb(skb);
1da177e4
LT
899 return err;
900}
901
b71d1d42
ED
902static inline int ip6_rt_check(const struct rt6key *rt_key,
903 const struct in6_addr *fl_addr,
904 const struct in6_addr *addr_cache)
cf6b1982 905{
a02cec21
ED
906 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
907 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache));
cf6b1982
YH
908}
909
497c615a
HX
910static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
911 struct dst_entry *dst,
b71d1d42 912 const struct flowi6 *fl6)
1da177e4 913{
497c615a
HX
914 struct ipv6_pinfo *np = inet6_sk(sk);
915 struct rt6_info *rt = (struct rt6_info *)dst;
1da177e4 916
497c615a
HX
917 if (!dst)
918 goto out;
919
920 /* Yes, checking route validity in not connected
921 * case is not very simple. Take into account,
922 * that we do not support routing by source, TOS,
923 * and MSG_DONTROUTE --ANK (980726)
924 *
cf6b1982
YH
925 * 1. ip6_rt_check(): If route was host route,
926 * check that cached destination is current.
497c615a
HX
927 * If it is network route, we still may
928 * check its validity using saved pointer
929 * to the last used address: daddr_cache.
930 * We do not want to save whole address now,
931 * (because main consumer of this service
932 * is tcp, which has not this problem),
933 * so that the last trick works only on connected
934 * sockets.
935 * 2. oif also should be the same.
936 */
4c9483b2 937 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
8e1ef0a9 938#ifdef CONFIG_IPV6_SUBTREES
4c9483b2 939 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
8e1ef0a9 940#endif
4c9483b2 941 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
497c615a
HX
942 dst_release(dst);
943 dst = NULL;
1da177e4
LT
944 }
945
497c615a
HX
946out:
947 return dst;
948}
949
950static int ip6_dst_lookup_tail(struct sock *sk,
4c9483b2 951 struct dst_entry **dst, struct flowi6 *fl6)
497c615a 952{
3b1e0a65 953 struct net *net = sock_net(sk);
69cce1d1
DM
954#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
955 struct neighbour *n;
956#endif
957 int err;
497c615a 958
1da177e4 959 if (*dst == NULL)
4c9483b2 960 *dst = ip6_route_output(net, sk, fl6);
1da177e4
LT
961
962 if ((err = (*dst)->error))
963 goto out_err_release;
964
4c9483b2 965 if (ipv6_addr_any(&fl6->saddr)) {
c3968a85
DW
966 struct rt6_info *rt = (struct rt6_info *) *dst;
967 err = ip6_route_get_saddr(net, rt, &fl6->daddr,
968 sk ? inet6_sk(sk)->srcprefs : 0,
969 &fl6->saddr);
44456d37 970 if (err)
1da177e4 971 goto out_err_release;
1da177e4
LT
972 }
973
95c385b4 974#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
e550dfb0
NH
975 /*
976 * Here if the dst entry we've looked up
977 * has a neighbour entry that is in the INCOMPLETE
978 * state and the src address from the flow is
979 * marked as OPTIMISTIC, we release the found
980 * dst entry and replace it instead with the
981 * dst entry of the nexthop router
982 */
f2c31e32 983 rcu_read_lock();
69cce1d1
DM
984 n = dst_get_neighbour(*dst);
985 if (n && !(n->nud_state & NUD_VALID)) {
e550dfb0 986 struct inet6_ifaddr *ifp;
4c9483b2 987 struct flowi6 fl_gw6;
e550dfb0
NH
988 int redirect;
989
f2c31e32 990 rcu_read_unlock();
4c9483b2 991 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
e550dfb0
NH
992 (*dst)->dev, 1);
993
994 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
995 if (ifp)
996 in6_ifa_put(ifp);
997
998 if (redirect) {
999 /*
1000 * We need to get the dst entry for the
1001 * default router instead
1002 */
1003 dst_release(*dst);
4c9483b2
DM
1004 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1005 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1006 *dst = ip6_route_output(net, sk, &fl_gw6);
e550dfb0
NH
1007 if ((err = (*dst)->error))
1008 goto out_err_release;
95c385b4 1009 }
f2c31e32
ED
1010 } else {
1011 rcu_read_unlock();
e550dfb0 1012 }
95c385b4
NH
1013#endif
1014
1da177e4
LT
1015 return 0;
1016
1017out_err_release:
ca46f9c8 1018 if (err == -ENETUNREACH)
483a47d2 1019 IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
1020 dst_release(*dst);
1021 *dst = NULL;
1022 return err;
1023}
34a0b3cd 1024
497c615a
HX
1025/**
1026 * ip6_dst_lookup - perform route lookup on flow
1027 * @sk: socket which provides route info
1028 * @dst: pointer to dst_entry * for result
4c9483b2 1029 * @fl6: flow to lookup
497c615a
HX
1030 *
1031 * This function performs a route lookup on the given flow.
1032 *
1033 * It returns zero on success, or a standard errno code on error.
1034 */
4c9483b2 1035int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6)
497c615a
HX
1036{
1037 *dst = NULL;
4c9483b2 1038 return ip6_dst_lookup_tail(sk, dst, fl6);
497c615a 1039}
3cf3dc6c
ACM
1040EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1041
497c615a 1042/**
68d0c6d3
DM
1043 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1044 * @sk: socket which provides route info
4c9483b2 1045 * @fl6: flow to lookup
68d0c6d3 1046 * @final_dst: final destination address for ipsec lookup
a1414715 1047 * @can_sleep: we are in a sleepable context
68d0c6d3
DM
1048 *
1049 * This function performs a route lookup on the given flow.
1050 *
1051 * It returns a valid dst pointer on success, or a pointer encoded
1052 * error code.
1053 */
4c9483b2 1054struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
68d0c6d3 1055 const struct in6_addr *final_dst,
a1414715 1056 bool can_sleep)
68d0c6d3
DM
1057{
1058 struct dst_entry *dst = NULL;
1059 int err;
1060
4c9483b2 1061 err = ip6_dst_lookup_tail(sk, &dst, fl6);
68d0c6d3
DM
1062 if (err)
1063 return ERR_PTR(err);
1064 if (final_dst)
4c9483b2 1065 ipv6_addr_copy(&fl6->daddr, final_dst);
2774c131 1066 if (can_sleep)
4c9483b2 1067 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
2774c131 1068
4c9483b2 1069 return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
68d0c6d3
DM
1070}
1071EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1072
1073/**
1074 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
497c615a 1075 * @sk: socket which provides the dst cache and route info
4c9483b2 1076 * @fl6: flow to lookup
68d0c6d3 1077 * @final_dst: final destination address for ipsec lookup
a1414715 1078 * @can_sleep: we are in a sleepable context
497c615a
HX
1079 *
1080 * This function performs a route lookup on the given flow with the
1081 * possibility of using the cached route in the socket if it is valid.
1082 * It will take the socket dst lock when operating on the dst cache.
1083 * As a result, this function can only be used in process context.
1084 *
68d0c6d3
DM
1085 * It returns a valid dst pointer on success, or a pointer encoded
1086 * error code.
497c615a 1087 */
4c9483b2 1088struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
68d0c6d3 1089 const struct in6_addr *final_dst,
a1414715 1090 bool can_sleep)
497c615a 1091{
68d0c6d3
DM
1092 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1093 int err;
497c615a 1094
4c9483b2 1095 dst = ip6_sk_dst_check(sk, dst, fl6);
68d0c6d3 1096
4c9483b2 1097 err = ip6_dst_lookup_tail(sk, &dst, fl6);
68d0c6d3
DM
1098 if (err)
1099 return ERR_PTR(err);
1100 if (final_dst)
4c9483b2 1101 ipv6_addr_copy(&fl6->daddr, final_dst);
2774c131 1102 if (can_sleep)
4c9483b2 1103 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
2774c131 1104
4c9483b2 1105 return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
497c615a 1106}
68d0c6d3 1107EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
497c615a 1108
34a0b3cd 1109static inline int ip6_ufo_append_data(struct sock *sk,
e89e9cf5
AR
1110 int getfrag(void *from, char *to, int offset, int len,
1111 int odd, struct sk_buff *skb),
1112 void *from, int length, int hh_len, int fragheaderlen,
87c48fa3
ED
1113 int transhdrlen, int mtu,unsigned int flags,
1114 struct rt6_info *rt)
e89e9cf5
AR
1115
1116{
1117 struct sk_buff *skb;
1118 int err;
1119
1120 /* There is support for UDP large send offload by network
1121 * device, so create one single skb packet containing complete
1122 * udp datagram
1123 */
1124 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1125 skb = sock_alloc_send_skb(sk,
1126 hh_len + fragheaderlen + transhdrlen + 20,
1127 (flags & MSG_DONTWAIT), &err);
1128 if (skb == NULL)
1129 return -ENOMEM;
1130
1131 /* reserve space for Hardware header */
1132 skb_reserve(skb, hh_len);
1133
1134 /* create space for UDP/IP header */
1135 skb_put(skb,fragheaderlen + transhdrlen);
1136
1137 /* initialize network header pointer */
c1d2bbe1 1138 skb_reset_network_header(skb);
e89e9cf5
AR
1139
1140 /* initialize protocol header pointer */
b0e380b1 1141 skb->transport_header = skb->network_header + fragheaderlen;
e89e9cf5 1142
84fa7933 1143 skb->ip_summed = CHECKSUM_PARTIAL;
e89e9cf5 1144 skb->csum = 0;
e89e9cf5
AR
1145 }
1146
1147 err = skb_append_datato_frags(sk,skb, getfrag, from,
1148 (length - transhdrlen));
1149 if (!err) {
1150 struct frag_hdr fhdr;
1151
c31d5326
SS
1152 /* Specify the length of each IPv6 datagram fragment.
1153 * It has to be a multiple of 8.
1154 */
1155 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1156 sizeof(struct frag_hdr)) & ~7;
f83ef8c0 1157 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
87c48fa3 1158 ipv6_select_ident(&fhdr, rt);
e89e9cf5
AR
1159 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1160 __skb_queue_tail(&sk->sk_write_queue, skb);
1161
1162 return 0;
1163 }
1164 /* There is not enough support do UPD LSO,
1165 * so follow normal path
1166 */
1167 kfree_skb(skb);
1168
1169 return err;
1170}
1da177e4 1171
0178b695
HX
1172static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1173 gfp_t gfp)
1174{
1175 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1176}
1177
1178static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1179 gfp_t gfp)
1180{
1181 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1182}
1183
41a1f8ea
YH
1184int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1185 int offset, int len, int odd, struct sk_buff *skb),
1186 void *from, int length, int transhdrlen,
4c9483b2 1187 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
13b52cd4 1188 struct rt6_info *rt, unsigned int flags, int dontfrag)
1da177e4
LT
1189{
1190 struct inet_sock *inet = inet_sk(sk);
1191 struct ipv6_pinfo *np = inet6_sk(sk);
bdc712b4 1192 struct inet_cork *cork;
1da177e4
LT
1193 struct sk_buff *skb;
1194 unsigned int maxfraglen, fragheaderlen;
1195 int exthdrlen;
299b0767 1196 int dst_exthdrlen;
1da177e4
LT
1197 int hh_len;
1198 int mtu;
1199 int copy;
1200 int err;
1201 int offset = 0;
1202 int csummode = CHECKSUM_NONE;
a693e698 1203 __u8 tx_flags = 0;
1da177e4
LT
1204
1205 if (flags&MSG_PROBE)
1206 return 0;
bdc712b4 1207 cork = &inet->cork.base;
1da177e4
LT
1208 if (skb_queue_empty(&sk->sk_write_queue)) {
1209 /*
1210 * setup for corking
1211 */
1212 if (opt) {
0178b695 1213 if (WARN_ON(np->cork.opt))
1da177e4 1214 return -EINVAL;
0178b695
HX
1215
1216 np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation);
1217 if (unlikely(np->cork.opt == NULL))
1218 return -ENOBUFS;
1219
1220 np->cork.opt->tot_len = opt->tot_len;
1221 np->cork.opt->opt_flen = opt->opt_flen;
1222 np->cork.opt->opt_nflen = opt->opt_nflen;
1223
1224 np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1225 sk->sk_allocation);
1226 if (opt->dst0opt && !np->cork.opt->dst0opt)
1227 return -ENOBUFS;
1228
1229 np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1230 sk->sk_allocation);
1231 if (opt->dst1opt && !np->cork.opt->dst1opt)
1232 return -ENOBUFS;
1233
1234 np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1235 sk->sk_allocation);
1236 if (opt->hopopt && !np->cork.opt->hopopt)
1237 return -ENOBUFS;
1238
1239 np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1240 sk->sk_allocation);
1241 if (opt->srcrt && !np->cork.opt->srcrt)
1242 return -ENOBUFS;
1243
1da177e4
LT
1244 /* need source address above miyazawa*/
1245 }
d8d1f30b 1246 dst_hold(&rt->dst);
bdc712b4 1247 cork->dst = &rt->dst;
4c9483b2 1248 inet->cork.fl.u.ip6 = *fl6;
1da177e4 1249 np->cork.hop_limit = hlimit;
41a1f8ea 1250 np->cork.tclass = tclass;
628a5c56 1251 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
299b0767 1252 rt->dst.dev->mtu : dst_mtu(&rt->dst);
c7503609 1253 if (np->frag_size < mtu) {
d91675f9
YH
1254 if (np->frag_size)
1255 mtu = np->frag_size;
1256 }
bdc712b4 1257 cork->fragsize = mtu;
d8d1f30b 1258 if (dst_allfrag(rt->dst.path))
bdc712b4
DM
1259 cork->flags |= IPCORK_ALLFRAG;
1260 cork->length = 0;
1da177e4
LT
1261 sk->sk_sndmsg_page = NULL;
1262 sk->sk_sndmsg_off = 0;
299b0767 1263 exthdrlen = (opt ? opt->opt_flen : 0) - rt->rt6i_nfheader_len;
1da177e4
LT
1264 length += exthdrlen;
1265 transhdrlen += exthdrlen;
299b0767 1266 dst_exthdrlen = rt->dst.header_len;
1da177e4 1267 } else {
bdc712b4 1268 rt = (struct rt6_info *)cork->dst;
4c9483b2 1269 fl6 = &inet->cork.fl.u.ip6;
0178b695 1270 opt = np->cork.opt;
1da177e4
LT
1271 transhdrlen = 0;
1272 exthdrlen = 0;
299b0767 1273 dst_exthdrlen = 0;
bdc712b4 1274 mtu = cork->fragsize;
1da177e4
LT
1275 }
1276
d8d1f30b 1277 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1da177e4 1278
a1b05140 1279 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
b4ce9277 1280 (opt ? opt->opt_nflen : 0);
1da177e4
LT
1281 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1282
1283 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
bdc712b4 1284 if (cork->length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
4c9483b2 1285 ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen);
1da177e4
LT
1286 return -EMSGSIZE;
1287 }
1288 }
1289
a693e698
AB
1290 /* For UDP, check if TX timestamp is enabled */
1291 if (sk->sk_type == SOCK_DGRAM) {
1292 err = sock_tx_timestamp(sk, &tx_flags);
1293 if (err)
1294 goto error;
1295 }
1296
1da177e4
LT
1297 /*
1298 * Let's try using as much space as possible.
1299 * Use MTU if total length of the message fits into the MTU.
1300 * Otherwise, we need to reserve fragment header and
1301 * fragment alignment (= 8-15 octects, in total).
1302 *
1303 * Note that we may need to "move" the data from the tail of
1ab1457c 1304 * of the buffer to the new fragment when we split
1da177e4
LT
1305 * the message.
1306 *
1ab1457c 1307 * FIXME: It may be fragmented into multiple chunks
1da177e4
LT
1308 * at once if non-fragmentable extension headers
1309 * are too large.
1ab1457c 1310 * --yoshfuji
1da177e4
LT
1311 */
1312
bdc712b4 1313 cork->length += length;
4b340ae2
BH
1314 if (length > mtu) {
1315 int proto = sk->sk_protocol;
1316 if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){
4c9483b2 1317 ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen);
4b340ae2
BH
1318 return -EMSGSIZE;
1319 }
e89e9cf5 1320
4b340ae2 1321 if (proto == IPPROTO_UDP &&
d8d1f30b 1322 (rt->dst.dev->features & NETIF_F_UFO)) {
4b340ae2
BH
1323
1324 err = ip6_ufo_append_data(sk, getfrag, from, length,
1325 hh_len, fragheaderlen,
87c48fa3 1326 transhdrlen, mtu, flags, rt);
4b340ae2
BH
1327 if (err)
1328 goto error;
1329 return 0;
1330 }
e89e9cf5 1331 }
1da177e4
LT
1332
1333 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1334 goto alloc_new_skb;
1335
1336 while (length > 0) {
1337 /* Check if the remaining data fits into current packet. */
bdc712b4 1338 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1da177e4
LT
1339 if (copy < length)
1340 copy = maxfraglen - skb->len;
1341
1342 if (copy <= 0) {
1343 char *data;
1344 unsigned int datalen;
1345 unsigned int fraglen;
1346 unsigned int fraggap;
1347 unsigned int alloclen;
1348 struct sk_buff *skb_prev;
1349alloc_new_skb:
1350 skb_prev = skb;
1351
1352 /* There's no room in the current skb */
1353 if (skb_prev)
1354 fraggap = skb_prev->len - maxfraglen;
1355 else
1356 fraggap = 0;
1357
1358 /*
1359 * If remaining data exceeds the mtu,
1360 * we know we need more fragment(s).
1361 */
1362 datalen = length + fraggap;
bdc712b4 1363 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1da177e4
LT
1364 datalen = maxfraglen - fragheaderlen;
1365
1366 fraglen = datalen + fragheaderlen;
1367 if ((flags & MSG_MORE) &&
d8d1f30b 1368 !(rt->dst.dev->features&NETIF_F_SG))
1da177e4
LT
1369 alloclen = mtu;
1370 else
1371 alloclen = datalen + fragheaderlen;
1372
299b0767
SK
1373 alloclen += dst_exthdrlen;
1374
1da177e4
LT
1375 /*
1376 * The last fragment gets additional space at tail.
1377 * Note: we overallocate on fragments with MSG_MODE
1378 * because we have no idea if we're the last one.
1379 */
1380 if (datalen == length + fraggap)
d8d1f30b 1381 alloclen += rt->dst.trailer_len;
1da177e4
LT
1382
1383 /*
1384 * We just reserve space for fragment header.
1ab1457c 1385 * Note: this may be overallocation if the message
1da177e4
LT
1386 * (without MSG_MORE) fits into the MTU.
1387 */
1388 alloclen += sizeof(struct frag_hdr);
1389
1390 if (transhdrlen) {
1391 skb = sock_alloc_send_skb(sk,
1392 alloclen + hh_len,
1393 (flags & MSG_DONTWAIT), &err);
1394 } else {
1395 skb = NULL;
1396 if (atomic_read(&sk->sk_wmem_alloc) <=
1397 2 * sk->sk_sndbuf)
1398 skb = sock_wmalloc(sk,
1399 alloclen + hh_len, 1,
1400 sk->sk_allocation);
1401 if (unlikely(skb == NULL))
1402 err = -ENOBUFS;
a693e698
AB
1403 else {
1404 /* Only the initial fragment
1405 * is time stamped.
1406 */
1407 tx_flags = 0;
1408 }
1da177e4
LT
1409 }
1410 if (skb == NULL)
1411 goto error;
1412 /*
1413 * Fill in the control structures
1414 */
1415 skb->ip_summed = csummode;
1416 skb->csum = 0;
1417 /* reserve for fragmentation */
1418 skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1419
a693e698
AB
1420 if (sk->sk_type == SOCK_DGRAM)
1421 skb_shinfo(skb)->tx_flags = tx_flags;
1422
1da177e4
LT
1423 /*
1424 * Find where to start putting bytes
1425 */
299b0767
SK
1426 data = skb_put(skb, fraglen + dst_exthdrlen);
1427 skb_set_network_header(skb, exthdrlen + dst_exthdrlen);
1428 data += fragheaderlen + dst_exthdrlen;
b0e380b1
ACM
1429 skb->transport_header = (skb->network_header +
1430 fragheaderlen);
1da177e4
LT
1431 if (fraggap) {
1432 skb->csum = skb_copy_and_csum_bits(
1433 skb_prev, maxfraglen,
1434 data + transhdrlen, fraggap, 0);
1435 skb_prev->csum = csum_sub(skb_prev->csum,
1436 skb->csum);
1437 data += fraggap;
e9fa4f7b 1438 pskb_trim_unique(skb_prev, maxfraglen);
1da177e4
LT
1439 }
1440 copy = datalen - transhdrlen - fraggap;
299b0767 1441
1da177e4
LT
1442 if (copy < 0) {
1443 err = -EINVAL;
1444 kfree_skb(skb);
1445 goto error;
1446 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1447 err = -EFAULT;
1448 kfree_skb(skb);
1449 goto error;
1450 }
1451
1452 offset += copy;
1453 length -= datalen - fraggap;
1454 transhdrlen = 0;
1455 exthdrlen = 0;
299b0767 1456 dst_exthdrlen = 0;
1da177e4
LT
1457 csummode = CHECKSUM_NONE;
1458
1459 /*
1460 * Put the packet on the pending queue
1461 */
1462 __skb_queue_tail(&sk->sk_write_queue, skb);
1463 continue;
1464 }
1465
1466 if (copy > length)
1467 copy = length;
1468
d8d1f30b 1469 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1da177e4
LT
1470 unsigned int off;
1471
1472 off = skb->len;
1473 if (getfrag(from, skb_put(skb, copy),
1474 offset, copy, off, skb) < 0) {
1475 __skb_trim(skb, off);
1476 err = -EFAULT;
1477 goto error;
1478 }
1479 } else {
1480 int i = skb_shinfo(skb)->nr_frags;
1481 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1482 struct page *page = sk->sk_sndmsg_page;
1483 int off = sk->sk_sndmsg_off;
1484 unsigned int left;
1485
1486 if (page && (left = PAGE_SIZE - off) > 0) {
1487 if (copy >= left)
1488 copy = left;
408dadf0 1489 if (page != skb_frag_page(frag)) {
1da177e4
LT
1490 if (i == MAX_SKB_FRAGS) {
1491 err = -EMSGSIZE;
1492 goto error;
1493 }
1da177e4 1494 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
408dadf0 1495 skb_frag_ref(skb, i);
1da177e4
LT
1496 frag = &skb_shinfo(skb)->frags[i];
1497 }
1498 } else if(i < MAX_SKB_FRAGS) {
1499 if (copy > PAGE_SIZE)
1500 copy = PAGE_SIZE;
1501 page = alloc_pages(sk->sk_allocation, 0);
1502 if (page == NULL) {
1503 err = -ENOMEM;
1504 goto error;
1505 }
1506 sk->sk_sndmsg_page = page;
1507 sk->sk_sndmsg_off = 0;
1508
1509 skb_fill_page_desc(skb, i, page, 0, 0);
1510 frag = &skb_shinfo(skb)->frags[i];
1da177e4
LT
1511 } else {
1512 err = -EMSGSIZE;
1513 goto error;
1514 }
9e903e08
ED
1515 if (getfrag(from,
1516 skb_frag_address(frag) + skb_frag_size(frag),
408dadf0 1517 offset, copy, skb->len, skb) < 0) {
1da177e4
LT
1518 err = -EFAULT;
1519 goto error;
1520 }
1521 sk->sk_sndmsg_off += copy;
9e903e08 1522 skb_frag_size_add(frag, copy);
1da177e4
LT
1523 skb->len += copy;
1524 skb->data_len += copy;
f945fa7a
HX
1525 skb->truesize += copy;
1526 atomic_add(copy, &sk->sk_wmem_alloc);
1da177e4
LT
1527 }
1528 offset += copy;
1529 length -= copy;
1530 }
1531 return 0;
1532error:
bdc712b4 1533 cork->length -= length;
3bd653c8 1534 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1535 return err;
1536}
1537
bf138862
PE
1538static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1539{
0178b695
HX
1540 if (np->cork.opt) {
1541 kfree(np->cork.opt->dst0opt);
1542 kfree(np->cork.opt->dst1opt);
1543 kfree(np->cork.opt->hopopt);
1544 kfree(np->cork.opt->srcrt);
1545 kfree(np->cork.opt);
1546 np->cork.opt = NULL;
1547 }
1548
bdc712b4
DM
1549 if (inet->cork.base.dst) {
1550 dst_release(inet->cork.base.dst);
1551 inet->cork.base.dst = NULL;
1552 inet->cork.base.flags &= ~IPCORK_ALLFRAG;
bf138862
PE
1553 }
1554 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1555}
1556
1da177e4
LT
1557int ip6_push_pending_frames(struct sock *sk)
1558{
1559 struct sk_buff *skb, *tmp_skb;
1560 struct sk_buff **tail_skb;
1561 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1562 struct inet_sock *inet = inet_sk(sk);
1563 struct ipv6_pinfo *np = inet6_sk(sk);
3bd653c8 1564 struct net *net = sock_net(sk);
1da177e4
LT
1565 struct ipv6hdr *hdr;
1566 struct ipv6_txoptions *opt = np->cork.opt;
bdc712b4 1567 struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst;
4c9483b2
DM
1568 struct flowi6 *fl6 = &inet->cork.fl.u.ip6;
1569 unsigned char proto = fl6->flowi6_proto;
1da177e4
LT
1570 int err = 0;
1571
1572 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1573 goto out;
1574 tail_skb = &(skb_shinfo(skb)->frag_list);
1575
1576 /* move skb->data to ip header from ext header */
d56f90a7 1577 if (skb->data < skb_network_header(skb))
bbe735e4 1578 __skb_pull(skb, skb_network_offset(skb));
1da177e4 1579 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
cfe1fc77 1580 __skb_pull(tmp_skb, skb_network_header_len(skb));
1da177e4
LT
1581 *tail_skb = tmp_skb;
1582 tail_skb = &(tmp_skb->next);
1583 skb->len += tmp_skb->len;
1584 skb->data_len += tmp_skb->len;
1da177e4 1585 skb->truesize += tmp_skb->truesize;
1da177e4
LT
1586 tmp_skb->destructor = NULL;
1587 tmp_skb->sk = NULL;
1da177e4
LT
1588 }
1589
28a89453 1590 /* Allow local fragmentation. */
b5c15fc0 1591 if (np->pmtudisc < IPV6_PMTUDISC_DO)
28a89453
HX
1592 skb->local_df = 1;
1593
4c9483b2 1594 ipv6_addr_copy(final_dst, &fl6->daddr);
cfe1fc77 1595 __skb_pull(skb, skb_network_header_len(skb));
1da177e4
LT
1596 if (opt && opt->opt_flen)
1597 ipv6_push_frag_opts(skb, opt, &proto);
1598 if (opt && opt->opt_nflen)
1599 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1600
e2d1bca7
ACM
1601 skb_push(skb, sizeof(struct ipv6hdr));
1602 skb_reset_network_header(skb);
0660e03f 1603 hdr = ipv6_hdr(skb);
1ab1457c 1604
4c9483b2 1605 *(__be32*)hdr = fl6->flowlabel |
41a1f8ea 1606 htonl(0x60000000 | ((int)np->cork.tclass << 20));
1da177e4 1607
1da177e4
LT
1608 hdr->hop_limit = np->cork.hop_limit;
1609 hdr->nexthdr = proto;
4c9483b2 1610 ipv6_addr_copy(&hdr->saddr, &fl6->saddr);
1da177e4
LT
1611 ipv6_addr_copy(&hdr->daddr, final_dst);
1612
a2c2064f 1613 skb->priority = sk->sk_priority;
4a19ec58 1614 skb->mark = sk->sk_mark;
a2c2064f 1615
d8d1f30b 1616 skb_dst_set(skb, dst_clone(&rt->dst));
edf391ff 1617 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
14878f75 1618 if (proto == IPPROTO_ICMPV6) {
adf30907 1619 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
14878f75 1620
5a57d4c7 1621 ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
e41b5368 1622 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
14878f75
DS
1623 }
1624
ef76bc23 1625 err = ip6_local_out(skb);
1da177e4
LT
1626 if (err) {
1627 if (err > 0)
6ce9e7b5 1628 err = net_xmit_errno(err);
1da177e4
LT
1629 if (err)
1630 goto error;
1631 }
1632
1633out:
bf138862 1634 ip6_cork_release(inet, np);
1da177e4
LT
1635 return err;
1636error:
06254914 1637 IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1638 goto out;
1639}
1640
1641void ip6_flush_pending_frames(struct sock *sk)
1642{
1da177e4
LT
1643 struct sk_buff *skb;
1644
1645 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
adf30907
ED
1646 if (skb_dst(skb))
1647 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
e1f52208 1648 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1649 kfree_skb(skb);
1650 }
1651
bf138862 1652 ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1da177e4 1653}