]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - net/ipv6/ip6_output.c
ipv6: Add fragment reporting to ipv6_skip_exthdr().
[mirror_ubuntu-artful-kernel.git] / net / ipv6 / ip6_output.c
CommitLineData
1da177e4
LT
1/*
2 * IPv6 output functions
1ab1457c 3 * Linux INET6 implementation
1da177e4
LT
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * Based on linux/net/ipv4/ip_output.c
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 *
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
21 *
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
27 */
28
1da177e4 29#include <linux/errno.h>
ef76bc23 30#include <linux/kernel.h>
1da177e4
LT
31#include <linux/string.h>
32#include <linux/socket.h>
33#include <linux/net.h>
34#include <linux/netdevice.h>
35#include <linux/if_arp.h>
36#include <linux/in6.h>
37#include <linux/tcp.h>
38#include <linux/route.h>
b59f45d0 39#include <linux/module.h>
5a0e3ad6 40#include <linux/slab.h>
1da177e4
LT
41
42#include <linux/netfilter.h>
43#include <linux/netfilter_ipv6.h>
44
45#include <net/sock.h>
46#include <net/snmp.h>
47
48#include <net/ipv6.h>
49#include <net/ndisc.h>
50#include <net/protocol.h>
51#include <net/ip6_route.h>
52#include <net/addrconf.h>
53#include <net/rawv6.h>
54#include <net/icmp.h>
55#include <net/xfrm.h>
56#include <net/checksum.h>
7bc570c8 57#include <linux/mroute6.h>
1da177e4 58
ad0081e4 59int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
1da177e4 60
ef76bc23
HX
61int __ip6_local_out(struct sk_buff *skb)
62{
63 int len;
64
65 len = skb->len - sizeof(struct ipv6hdr);
66 if (len > IPV6_MAXPLEN)
67 len = 0;
68 ipv6_hdr(skb)->payload_len = htons(len);
69
b2e0b385
JE
70 return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
71 skb_dst(skb)->dev, dst_output);
ef76bc23
HX
72}
73
74int ip6_local_out(struct sk_buff *skb)
75{
76 int err;
77
78 err = __ip6_local_out(skb);
79 if (likely(err == 1))
80 err = dst_output(skb);
81
82 return err;
83}
84EXPORT_SYMBOL_GPL(ip6_local_out);
85
1da177e4
LT
86/* dev_loopback_xmit for use with netfilter. */
87static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
88{
459a98ed 89 skb_reset_mac_header(newskb);
bbe735e4 90 __skb_pull(newskb, skb_network_offset(newskb));
1da177e4
LT
91 newskb->pkt_type = PACKET_LOOPBACK;
92 newskb->ip_summed = CHECKSUM_UNNECESSARY;
adf30907 93 WARN_ON(!skb_dst(newskb));
1da177e4 94
e30b38c2 95 netif_rx_ni(newskb);
1da177e4
LT
96 return 0;
97}
98
9e508490 99static int ip6_finish_output2(struct sk_buff *skb)
1da177e4 100{
adf30907 101 struct dst_entry *dst = skb_dst(skb);
1da177e4 102 struct net_device *dev = dst->dev;
f6b72b62 103 struct neighbour *neigh;
1da177e4
LT
104
105 skb->protocol = htons(ETH_P_IPV6);
106 skb->dev = dev;
107
0660e03f 108 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
adf30907 109 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1da177e4 110
7ad6848c 111 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
d1db275d 112 ((mroute6_socket(dev_net(dev), skb) &&
bd91b8bf 113 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
7bc570c8
YH
114 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
115 &ipv6_hdr(skb)->saddr))) {
1da177e4
LT
116 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
117
118 /* Do not check for IFF_ALLMULTI; multicast routing
119 is not supported in any case.
120 */
121 if (newskb)
b2e0b385
JE
122 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
123 newskb, NULL, newskb->dev,
1da177e4
LT
124 ip6_dev_loopback_xmit);
125
0660e03f 126 if (ipv6_hdr(skb)->hop_limit == 0) {
3bd653c8
DL
127 IP6_INC_STATS(dev_net(dev), idev,
128 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
129 kfree_skb(skb);
130 return 0;
131 }
132 }
133
edf391ff
NH
134 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
135 skb->len);
1da177e4
LT
136 }
137
f2c31e32 138 rcu_read_lock();
69cce1d1 139 neigh = dst_get_neighbour(dst);
f2c31e32
ED
140 if (neigh) {
141 int res = neigh_output(neigh, skb);
05e3aa09 142
f2c31e32
ED
143 rcu_read_unlock();
144 return res;
145 }
146 rcu_read_unlock();
9e508490
JE
147 IP6_INC_STATS_BH(dev_net(dst->dev),
148 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
149 kfree_skb(skb);
150 return -EINVAL;
1da177e4
LT
151}
152
9e508490
JE
153static int ip6_finish_output(struct sk_buff *skb)
154{
155 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
156 dst_allfrag(skb_dst(skb)))
157 return ip6_fragment(skb, ip6_finish_output2);
158 else
159 return ip6_finish_output2(skb);
160}
161
1da177e4
LT
162int ip6_output(struct sk_buff *skb)
163{
9e508490 164 struct net_device *dev = skb_dst(skb)->dev;
adf30907 165 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
778d80be 166 if (unlikely(idev->cnf.disable_ipv6)) {
9e508490 167 IP6_INC_STATS(dev_net(dev), idev,
3bd653c8 168 IPSTATS_MIB_OUTDISCARDS);
778d80be
YH
169 kfree_skb(skb);
170 return 0;
171 }
172
9c6eb28a
JE
173 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
174 ip6_finish_output,
175 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
1da177e4
LT
176}
177
1da177e4 178/*
b5d43998 179 * xmit an sk_buff (used by TCP, SCTP and DCCP)
1da177e4
LT
180 */
181
4c9483b2 182int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
b903d324 183 struct ipv6_txoptions *opt, int tclass)
1da177e4 184{
3bd653c8 185 struct net *net = sock_net(sk);
b30bd282 186 struct ipv6_pinfo *np = inet6_sk(sk);
4c9483b2 187 struct in6_addr *first_hop = &fl6->daddr;
adf30907 188 struct dst_entry *dst = skb_dst(skb);
1da177e4 189 struct ipv6hdr *hdr;
4c9483b2 190 u8 proto = fl6->flowi6_proto;
1da177e4 191 int seg_len = skb->len;
e651f03a 192 int hlimit = -1;
1da177e4
LT
193 u32 mtu;
194
195 if (opt) {
c2636b4d 196 unsigned int head_room;
1da177e4
LT
197
198 /* First: exthdrs may take lots of space (~8K for now)
199 MAX_HEADER is not enough.
200 */
201 head_room = opt->opt_nflen + opt->opt_flen;
202 seg_len += head_room;
203 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
204
205 if (skb_headroom(skb) < head_room) {
206 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
a11d206d 207 if (skb2 == NULL) {
adf30907 208 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d
YH
209 IPSTATS_MIB_OUTDISCARDS);
210 kfree_skb(skb);
1da177e4
LT
211 return -ENOBUFS;
212 }
a11d206d
YH
213 kfree_skb(skb);
214 skb = skb2;
83d7eb29 215 skb_set_owner_w(skb, sk);
1da177e4
LT
216 }
217 if (opt->opt_flen)
218 ipv6_push_frag_opts(skb, opt, &proto);
219 if (opt->opt_nflen)
220 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
221 }
222
e2d1bca7
ACM
223 skb_push(skb, sizeof(struct ipv6hdr));
224 skb_reset_network_header(skb);
0660e03f 225 hdr = ipv6_hdr(skb);
1da177e4
LT
226
227 /*
228 * Fill in the IPv6 header
229 */
b903d324 230 if (np)
1da177e4
LT
231 hlimit = np->hop_limit;
232 if (hlimit < 0)
6b75d090 233 hlimit = ip6_dst_hoplimit(dst);
1da177e4 234
4c9483b2 235 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl6->flowlabel;
41a1f8ea 236
1da177e4
LT
237 hdr->payload_len = htons(seg_len);
238 hdr->nexthdr = proto;
239 hdr->hop_limit = hlimit;
240
4e3fd7a0
AD
241 hdr->saddr = fl6->saddr;
242 hdr->daddr = *first_hop;
1da177e4 243
a2c2064f 244 skb->priority = sk->sk_priority;
4a19ec58 245 skb->mark = sk->sk_mark;
a2c2064f 246
1da177e4 247 mtu = dst_mtu(dst);
283d07ac 248 if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
adf30907 249 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
edf391ff 250 IPSTATS_MIB_OUT, skb->len);
b2e0b385
JE
251 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
252 dst->dev, dst_output);
1da177e4
LT
253 }
254
255 if (net_ratelimit())
256 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
257 skb->dev = dst->dev;
3ffe533c 258 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
adf30907 259 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
260 kfree_skb(skb);
261 return -EMSGSIZE;
262}
263
7159039a
YH
264EXPORT_SYMBOL(ip6_xmit);
265
1da177e4
LT
266/*
267 * To avoid extra problems ND packets are send through this
268 * routine. It's code duplication but I really want to avoid
269 * extra checks since ipv6_build_header is used by TCP (which
270 * is for us performance critical)
271 */
272
273int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
9acd9f3a 274 const struct in6_addr *saddr, const struct in6_addr *daddr,
1da177e4
LT
275 int proto, int len)
276{
277 struct ipv6_pinfo *np = inet6_sk(sk);
278 struct ipv6hdr *hdr;
1da177e4
LT
279
280 skb->protocol = htons(ETH_P_IPV6);
281 skb->dev = dev;
282
55f79cc0
ACM
283 skb_reset_network_header(skb);
284 skb_put(skb, sizeof(struct ipv6hdr));
0660e03f 285 hdr = ipv6_hdr(skb);
1da177e4 286
ae08e1f0 287 *(__be32*)hdr = htonl(0x60000000);
1da177e4
LT
288
289 hdr->payload_len = htons(len);
290 hdr->nexthdr = proto;
291 hdr->hop_limit = np->hop_limit;
292
4e3fd7a0
AD
293 hdr->saddr = *saddr;
294 hdr->daddr = *daddr;
1da177e4
LT
295
296 return 0;
297}
298
299static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
300{
301 struct ip6_ra_chain *ra;
302 struct sock *last = NULL;
303
304 read_lock(&ip6_ra_lock);
305 for (ra = ip6_ra_chain; ra; ra = ra->next) {
306 struct sock *sk = ra->sk;
0bd1b59b
AM
307 if (sk && ra->sel == sel &&
308 (!sk->sk_bound_dev_if ||
309 sk->sk_bound_dev_if == skb->dev->ifindex)) {
1da177e4
LT
310 if (last) {
311 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
312 if (skb2)
313 rawv6_rcv(last, skb2);
314 }
315 last = sk;
316 }
317 }
318
319 if (last) {
320 rawv6_rcv(last, skb);
321 read_unlock(&ip6_ra_lock);
322 return 1;
323 }
324 read_unlock(&ip6_ra_lock);
325 return 0;
326}
327
e21e0b5f
VN
328static int ip6_forward_proxy_check(struct sk_buff *skb)
329{
0660e03f 330 struct ipv6hdr *hdr = ipv6_hdr(skb);
e21e0b5f 331 u8 nexthdr = hdr->nexthdr;
75f2811c 332 __be16 frag_off;
e21e0b5f
VN
333 int offset;
334
335 if (ipv6_ext_hdr(nexthdr)) {
75f2811c 336 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
e21e0b5f
VN
337 if (offset < 0)
338 return 0;
339 } else
340 offset = sizeof(struct ipv6hdr);
341
342 if (nexthdr == IPPROTO_ICMPV6) {
343 struct icmp6hdr *icmp6;
344
d56f90a7
ACM
345 if (!pskb_may_pull(skb, (skb_network_header(skb) +
346 offset + 1 - skb->data)))
e21e0b5f
VN
347 return 0;
348
d56f90a7 349 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
e21e0b5f
VN
350
351 switch (icmp6->icmp6_type) {
352 case NDISC_ROUTER_SOLICITATION:
353 case NDISC_ROUTER_ADVERTISEMENT:
354 case NDISC_NEIGHBOUR_SOLICITATION:
355 case NDISC_NEIGHBOUR_ADVERTISEMENT:
356 case NDISC_REDIRECT:
357 /* For reaction involving unicast neighbor discovery
358 * message destined to the proxied address, pass it to
359 * input function.
360 */
361 return 1;
362 default:
363 break;
364 }
365 }
366
74553b09
VN
367 /*
368 * The proxying router can't forward traffic sent to a link-local
369 * address, so signal the sender and discard the packet. This
370 * behavior is clarified by the MIPv6 specification.
371 */
372 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
373 dst_link_failure(skb);
374 return -1;
375 }
376
e21e0b5f
VN
377 return 0;
378}
379
1da177e4
LT
380static inline int ip6_forward_finish(struct sk_buff *skb)
381{
382 return dst_output(skb);
383}
384
385int ip6_forward(struct sk_buff *skb)
386{
adf30907 387 struct dst_entry *dst = skb_dst(skb);
0660e03f 388 struct ipv6hdr *hdr = ipv6_hdr(skb);
1da177e4 389 struct inet6_skb_parm *opt = IP6CB(skb);
c346dca1 390 struct net *net = dev_net(dst->dev);
69cce1d1 391 struct neighbour *n;
14f3ad6f 392 u32 mtu;
1ab1457c 393
53b7997f 394 if (net->ipv6.devconf_all->forwarding == 0)
1da177e4
LT
395 goto error;
396
4497b076
BH
397 if (skb_warn_if_lro(skb))
398 goto drop;
399
1da177e4 400 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
3bd653c8 401 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
1da177e4
LT
402 goto drop;
403 }
404
72b43d08
AK
405 if (skb->pkt_type != PACKET_HOST)
406 goto drop;
407
35fc92a9 408 skb_forward_csum(skb);
1da177e4
LT
409
410 /*
411 * We DO NOT make any processing on
412 * RA packets, pushing them to user level AS IS
413 * without ane WARRANTY that application will be able
414 * to interpret them. The reason is that we
415 * cannot make anything clever here.
416 *
417 * We are not end-node, so that if packet contains
418 * AH/ESP, we cannot make anything.
419 * Defragmentation also would be mistake, RA packets
420 * cannot be fragmented, because there is no warranty
421 * that different fragments will go along one path. --ANK
422 */
423 if (opt->ra) {
d56f90a7 424 u8 *ptr = skb_network_header(skb) + opt->ra;
1da177e4
LT
425 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
426 return 0;
427 }
428
429 /*
430 * check and decrement ttl
431 */
432 if (hdr->hop_limit <= 1) {
433 /* Force OUTPUT device used as source address */
434 skb->dev = dst->dev;
3ffe533c 435 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
483a47d2
DL
436 IP6_INC_STATS_BH(net,
437 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
1da177e4
LT
438
439 kfree_skb(skb);
440 return -ETIMEDOUT;
441 }
442
fbea49e1 443 /* XXX: idev->cnf.proxy_ndp? */
53b7997f 444 if (net->ipv6.devconf_all->proxy_ndp &&
8a3edd80 445 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
74553b09
VN
446 int proxied = ip6_forward_proxy_check(skb);
447 if (proxied > 0)
e21e0b5f 448 return ip6_input(skb);
74553b09 449 else if (proxied < 0) {
3bd653c8
DL
450 IP6_INC_STATS(net, ip6_dst_idev(dst),
451 IPSTATS_MIB_INDISCARDS);
74553b09
VN
452 goto drop;
453 }
e21e0b5f
VN
454 }
455
1da177e4 456 if (!xfrm6_route_forward(skb)) {
3bd653c8 457 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
1da177e4
LT
458 goto drop;
459 }
adf30907 460 dst = skb_dst(skb);
1da177e4
LT
461
462 /* IPv6 specs say nothing about it, but it is clear that we cannot
463 send redirects to source routed frames.
1e5dc146 464 We don't send redirects to frames decapsulated from IPsec.
1da177e4 465 */
69cce1d1
DM
466 n = dst_get_neighbour(dst);
467 if (skb->dev == dst->dev && n && opt->srcrt == 0 && !skb_sec_path(skb)) {
1da177e4
LT
468 struct in6_addr *target = NULL;
469 struct rt6_info *rt;
1da177e4
LT
470
471 /*
472 * incoming and outgoing devices are the same
473 * send a redirect.
474 */
475
476 rt = (struct rt6_info *) dst;
477 if ((rt->rt6i_flags & RTF_GATEWAY))
478 target = (struct in6_addr*)&n->primary_key;
479 else
480 target = &hdr->daddr;
481
92d86829
DM
482 if (!rt->rt6i_peer)
483 rt6_bind_peer(rt, 1);
484
1da177e4
LT
485 /* Limit redirects both by destination (here)
486 and by source (inside ndisc_send_redirect)
487 */
92d86829 488 if (inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ))
1da177e4 489 ndisc_send_redirect(skb, n, target);
5bb1ab09
DS
490 } else {
491 int addrtype = ipv6_addr_type(&hdr->saddr);
492
1da177e4 493 /* This check is security critical. */
f81b2e7d
YH
494 if (addrtype == IPV6_ADDR_ANY ||
495 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
5bb1ab09
DS
496 goto error;
497 if (addrtype & IPV6_ADDR_LINKLOCAL) {
498 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
3ffe533c 499 ICMPV6_NOT_NEIGHBOUR, 0);
5bb1ab09
DS
500 goto error;
501 }
1da177e4
LT
502 }
503
14f3ad6f
UW
504 mtu = dst_mtu(dst);
505 if (mtu < IPV6_MIN_MTU)
506 mtu = IPV6_MIN_MTU;
507
0aa68271 508 if (skb->len > mtu && !skb_is_gso(skb)) {
1da177e4
LT
509 /* Again, force OUTPUT device used as source address */
510 skb->dev = dst->dev;
14f3ad6f 511 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
483a47d2
DL
512 IP6_INC_STATS_BH(net,
513 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
514 IP6_INC_STATS_BH(net,
515 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
516 kfree_skb(skb);
517 return -EMSGSIZE;
518 }
519
520 if (skb_cow(skb, dst->dev->hard_header_len)) {
3bd653c8 521 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
522 goto drop;
523 }
524
0660e03f 525 hdr = ipv6_hdr(skb);
1da177e4
LT
526
527 /* Mangling hops number delayed to point after skb COW */
1ab1457c 528
1da177e4
LT
529 hdr->hop_limit--;
530
483a47d2 531 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
b2e0b385 532 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
6e23ae2a 533 ip6_forward_finish);
1da177e4
LT
534
535error:
483a47d2 536 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
1da177e4
LT
537drop:
538 kfree_skb(skb);
539 return -EINVAL;
540}
541
542static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
543{
544 to->pkt_type = from->pkt_type;
545 to->priority = from->priority;
546 to->protocol = from->protocol;
adf30907
ED
547 skb_dst_drop(to);
548 skb_dst_set(to, dst_clone(skb_dst(from)));
1da177e4 549 to->dev = from->dev;
82e91ffe 550 to->mark = from->mark;
1da177e4
LT
551
552#ifdef CONFIG_NET_SCHED
553 to->tc_index = from->tc_index;
554#endif
e7ac05f3 555 nf_copy(to, from);
ba9dda3a
JK
556#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
557 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
558 to->nf_trace = from->nf_trace;
559#endif
984bc16c 560 skb_copy_secmark(to, from);
1da177e4
LT
561}
562
563int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
564{
565 u16 offset = sizeof(struct ipv6hdr);
0660e03f
ACM
566 struct ipv6_opt_hdr *exthdr =
567 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
27a884dc 568 unsigned int packet_len = skb->tail - skb->network_header;
1da177e4 569 int found_rhdr = 0;
0660e03f 570 *nexthdr = &ipv6_hdr(skb)->nexthdr;
1da177e4
LT
571
572 while (offset + 1 <= packet_len) {
573
574 switch (**nexthdr) {
575
576 case NEXTHDR_HOP:
27637df9 577 break;
1da177e4 578 case NEXTHDR_ROUTING:
27637df9
MN
579 found_rhdr = 1;
580 break;
1da177e4 581 case NEXTHDR_DEST:
59fbb3a6 582#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
27637df9
MN
583 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
584 break;
585#endif
586 if (found_rhdr)
587 return offset;
1da177e4
LT
588 break;
589 default :
590 return offset;
591 }
27637df9
MN
592
593 offset += ipv6_optlen(exthdr);
594 *nexthdr = &exthdr->nexthdr;
d56f90a7
ACM
595 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
596 offset);
1da177e4
LT
597 }
598
599 return offset;
600}
601
87c48fa3
ED
602void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
603{
604 static atomic_t ipv6_fragmentation_id;
605 int old, new;
606
607 if (rt) {
608 struct inet_peer *peer;
609
610 if (!rt->rt6i_peer)
611 rt6_bind_peer(rt, 1);
612 peer = rt->rt6i_peer;
613 if (peer) {
614 fhdr->identification = htonl(inet_getid(peer, 0));
615 return;
616 }
617 }
618 do {
619 old = atomic_read(&ipv6_fragmentation_id);
620 new = old + 1;
621 if (!new)
622 new = 1;
623 } while (atomic_cmpxchg(&ipv6_fragmentation_id, old, new) != old);
624 fhdr->identification = htonl(new);
625}
626
ad0081e4 627int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
1da177e4 628{
1da177e4 629 struct sk_buff *frag;
adf30907 630 struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
d91675f9 631 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
1da177e4
LT
632 struct ipv6hdr *tmp_hdr;
633 struct frag_hdr *fh;
634 unsigned int mtu, hlen, left, len;
a7ae1992 635 int hroom, troom;
ae08e1f0 636 __be32 frag_id = 0;
1da177e4
LT
637 int ptr, offset = 0, err=0;
638 u8 *prevhdr, nexthdr = 0;
adf30907 639 struct net *net = dev_net(skb_dst(skb)->dev);
1da177e4 640
1da177e4
LT
641 hlen = ip6_find_1stfragopt(skb, &prevhdr);
642 nexthdr = *prevhdr;
643
628a5c56 644 mtu = ip6_skb_dst_mtu(skb);
b881ef76
JH
645
646 /* We must not fragment if the socket is set to force MTU discovery
14f3ad6f 647 * or if the skb it not generated by a local socket.
b881ef76 648 */
f2228f78 649 if (!skb->local_df && skb->len > mtu) {
adf30907 650 skb->dev = skb_dst(skb)->dev;
3ffe533c 651 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
adf30907 652 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 653 IPSTATS_MIB_FRAGFAILS);
b881ef76
JH
654 kfree_skb(skb);
655 return -EMSGSIZE;
656 }
657
d91675f9
YH
658 if (np && np->frag_size < mtu) {
659 if (np->frag_size)
660 mtu = np->frag_size;
661 }
662 mtu -= hlen + sizeof(struct frag_hdr);
1da177e4 663
21dc3301 664 if (skb_has_frag_list(skb)) {
1da177e4 665 int first_len = skb_pagelen(skb);
3d13008e 666 struct sk_buff *frag2;
1da177e4
LT
667
668 if (first_len - hlen > mtu ||
669 ((first_len - hlen) & 7) ||
670 skb_cloned(skb))
671 goto slow_path;
672
4d9092bb 673 skb_walk_frags(skb, frag) {
1da177e4
LT
674 /* Correct geometry. */
675 if (frag->len > mtu ||
676 ((frag->len & 7) && frag->next) ||
677 skb_headroom(frag) < hlen)
3d13008e 678 goto slow_path_clean;
1da177e4 679
1da177e4
LT
680 /* Partially cloned skb? */
681 if (skb_shared(frag))
3d13008e 682 goto slow_path_clean;
2fdba6b0
HX
683
684 BUG_ON(frag->sk);
685 if (skb->sk) {
2fdba6b0
HX
686 frag->sk = skb->sk;
687 frag->destructor = sock_wfree;
2fdba6b0 688 }
3d13008e 689 skb->truesize -= frag->truesize;
1da177e4
LT
690 }
691
692 err = 0;
693 offset = 0;
694 frag = skb_shinfo(skb)->frag_list;
4d9092bb 695 skb_frag_list_init(skb);
1da177e4
LT
696 /* BUILD HEADER */
697
9a217a1c 698 *prevhdr = NEXTHDR_FRAGMENT;
d56f90a7 699 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
1da177e4 700 if (!tmp_hdr) {
adf30907 701 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 702 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
703 return -ENOMEM;
704 }
705
1da177e4
LT
706 __skb_pull(skb, hlen);
707 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
e2d1bca7
ACM
708 __skb_push(skb, hlen);
709 skb_reset_network_header(skb);
d56f90a7 710 memcpy(skb_network_header(skb), tmp_hdr, hlen);
1da177e4 711
87c48fa3 712 ipv6_select_ident(fh, rt);
1da177e4
LT
713 fh->nexthdr = nexthdr;
714 fh->reserved = 0;
715 fh->frag_off = htons(IP6_MF);
716 frag_id = fh->identification;
717
718 first_len = skb_pagelen(skb);
719 skb->data_len = first_len - skb_headlen(skb);
720 skb->len = first_len;
0660e03f
ACM
721 ipv6_hdr(skb)->payload_len = htons(first_len -
722 sizeof(struct ipv6hdr));
a11d206d 723
d8d1f30b 724 dst_hold(&rt->dst);
1da177e4
LT
725
726 for (;;) {
727 /* Prepare header of the next frame,
728 * before previous one went down. */
729 if (frag) {
730 frag->ip_summed = CHECKSUM_NONE;
badff6d0 731 skb_reset_transport_header(frag);
1da177e4 732 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
e2d1bca7
ACM
733 __skb_push(frag, hlen);
734 skb_reset_network_header(frag);
d56f90a7
ACM
735 memcpy(skb_network_header(frag), tmp_hdr,
736 hlen);
1da177e4
LT
737 offset += skb->len - hlen - sizeof(struct frag_hdr);
738 fh->nexthdr = nexthdr;
739 fh->reserved = 0;
740 fh->frag_off = htons(offset);
741 if (frag->next != NULL)
742 fh->frag_off |= htons(IP6_MF);
743 fh->identification = frag_id;
0660e03f
ACM
744 ipv6_hdr(frag)->payload_len =
745 htons(frag->len -
746 sizeof(struct ipv6hdr));
1da177e4
LT
747 ip6_copy_metadata(frag, skb);
748 }
1ab1457c 749
1da177e4 750 err = output(skb);
dafee490 751 if(!err)
d8d1f30b 752 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 753 IPSTATS_MIB_FRAGCREATES);
dafee490 754
1da177e4
LT
755 if (err || !frag)
756 break;
757
758 skb = frag;
759 frag = skb->next;
760 skb->next = NULL;
761 }
762
a51482bd 763 kfree(tmp_hdr);
1da177e4
LT
764
765 if (err == 0) {
d8d1f30b 766 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 767 IPSTATS_MIB_FRAGOKS);
d8d1f30b 768 dst_release(&rt->dst);
1da177e4
LT
769 return 0;
770 }
771
772 while (frag) {
773 skb = frag->next;
774 kfree_skb(frag);
775 frag = skb;
776 }
777
d8d1f30b 778 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 779 IPSTATS_MIB_FRAGFAILS);
d8d1f30b 780 dst_release(&rt->dst);
1da177e4 781 return err;
3d13008e
ED
782
783slow_path_clean:
784 skb_walk_frags(skb, frag2) {
785 if (frag2 == frag)
786 break;
787 frag2->sk = NULL;
788 frag2->destructor = NULL;
789 skb->truesize += frag2->truesize;
790 }
1da177e4
LT
791 }
792
793slow_path:
794 left = skb->len - hlen; /* Space per frame */
795 ptr = hlen; /* Where to start from */
796
797 /*
798 * Fragment the datagram.
799 */
800
801 *prevhdr = NEXTHDR_FRAGMENT;
a7ae1992
HX
802 hroom = LL_RESERVED_SPACE(rt->dst.dev);
803 troom = rt->dst.dev->needed_tailroom;
1da177e4
LT
804
805 /*
806 * Keep copying data until we run out.
807 */
808 while(left > 0) {
809 len = left;
810 /* IF: it doesn't fit, use 'mtu' - the data space left */
811 if (len > mtu)
812 len = mtu;
25985edc 813 /* IF: we are not sending up to and including the packet end
1da177e4
LT
814 then align the next start on an eight byte boundary */
815 if (len < left) {
816 len &= ~7;
817 }
818 /*
819 * Allocate buffer.
820 */
821
a7ae1992
HX
822 if ((frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
823 hroom + troom, GFP_ATOMIC)) == NULL) {
64ce2073 824 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
adf30907 825 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 826 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
827 err = -ENOMEM;
828 goto fail;
829 }
830
831 /*
832 * Set up data on packet
833 */
834
835 ip6_copy_metadata(frag, skb);
a7ae1992 836 skb_reserve(frag, hroom);
1da177e4 837 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
c1d2bbe1 838 skb_reset_network_header(frag);
badff6d0 839 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
b0e380b1
ACM
840 frag->transport_header = (frag->network_header + hlen +
841 sizeof(struct frag_hdr));
1da177e4
LT
842
843 /*
844 * Charge the memory for the fragment to any owner
845 * it might possess
846 */
847 if (skb->sk)
848 skb_set_owner_w(frag, skb->sk);
849
850 /*
851 * Copy the packet header into the new buffer.
852 */
d626f62b 853 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
1da177e4
LT
854
855 /*
856 * Build fragment header.
857 */
858 fh->nexthdr = nexthdr;
859 fh->reserved = 0;
f36d6ab1 860 if (!frag_id) {
87c48fa3 861 ipv6_select_ident(fh, rt);
1da177e4
LT
862 frag_id = fh->identification;
863 } else
864 fh->identification = frag_id;
865
866 /*
867 * Copy a block of the IP datagram.
868 */
8984e41d 869 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
1da177e4
LT
870 BUG();
871 left -= len;
872
873 fh->frag_off = htons(offset);
874 if (left > 0)
875 fh->frag_off |= htons(IP6_MF);
0660e03f
ACM
876 ipv6_hdr(frag)->payload_len = htons(frag->len -
877 sizeof(struct ipv6hdr));
1da177e4
LT
878
879 ptr += len;
880 offset += len;
881
882 /*
883 * Put this fragment into the sending queue.
884 */
1da177e4
LT
885 err = output(frag);
886 if (err)
887 goto fail;
dafee490 888
adf30907 889 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 890 IPSTATS_MIB_FRAGCREATES);
1da177e4 891 }
adf30907 892 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 893 IPSTATS_MIB_FRAGOKS);
1da177e4 894 kfree_skb(skb);
1da177e4
LT
895 return err;
896
897fail:
adf30907 898 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 899 IPSTATS_MIB_FRAGFAILS);
1ab1457c 900 kfree_skb(skb);
1da177e4
LT
901 return err;
902}
903
b71d1d42
ED
904static inline int ip6_rt_check(const struct rt6key *rt_key,
905 const struct in6_addr *fl_addr,
906 const struct in6_addr *addr_cache)
cf6b1982 907{
a02cec21
ED
908 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
909 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache));
cf6b1982
YH
910}
911
497c615a
HX
912static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
913 struct dst_entry *dst,
b71d1d42 914 const struct flowi6 *fl6)
1da177e4 915{
497c615a
HX
916 struct ipv6_pinfo *np = inet6_sk(sk);
917 struct rt6_info *rt = (struct rt6_info *)dst;
1da177e4 918
497c615a
HX
919 if (!dst)
920 goto out;
921
922 /* Yes, checking route validity in not connected
923 * case is not very simple. Take into account,
924 * that we do not support routing by source, TOS,
925 * and MSG_DONTROUTE --ANK (980726)
926 *
cf6b1982
YH
927 * 1. ip6_rt_check(): If route was host route,
928 * check that cached destination is current.
497c615a
HX
929 * If it is network route, we still may
930 * check its validity using saved pointer
931 * to the last used address: daddr_cache.
932 * We do not want to save whole address now,
933 * (because main consumer of this service
934 * is tcp, which has not this problem),
935 * so that the last trick works only on connected
936 * sockets.
937 * 2. oif also should be the same.
938 */
4c9483b2 939 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
8e1ef0a9 940#ifdef CONFIG_IPV6_SUBTREES
4c9483b2 941 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
8e1ef0a9 942#endif
4c9483b2 943 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
497c615a
HX
944 dst_release(dst);
945 dst = NULL;
1da177e4
LT
946 }
947
497c615a
HX
948out:
949 return dst;
950}
951
952static int ip6_dst_lookup_tail(struct sock *sk,
4c9483b2 953 struct dst_entry **dst, struct flowi6 *fl6)
497c615a 954{
3b1e0a65 955 struct net *net = sock_net(sk);
69cce1d1
DM
956#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
957 struct neighbour *n;
958#endif
959 int err;
497c615a 960
1da177e4 961 if (*dst == NULL)
4c9483b2 962 *dst = ip6_route_output(net, sk, fl6);
1da177e4
LT
963
964 if ((err = (*dst)->error))
965 goto out_err_release;
966
4c9483b2 967 if (ipv6_addr_any(&fl6->saddr)) {
c3968a85
DW
968 struct rt6_info *rt = (struct rt6_info *) *dst;
969 err = ip6_route_get_saddr(net, rt, &fl6->daddr,
970 sk ? inet6_sk(sk)->srcprefs : 0,
971 &fl6->saddr);
44456d37 972 if (err)
1da177e4 973 goto out_err_release;
1da177e4
LT
974 }
975
95c385b4 976#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
e550dfb0
NH
977 /*
978 * Here if the dst entry we've looked up
979 * has a neighbour entry that is in the INCOMPLETE
980 * state and the src address from the flow is
981 * marked as OPTIMISTIC, we release the found
982 * dst entry and replace it instead with the
983 * dst entry of the nexthop router
984 */
f2c31e32 985 rcu_read_lock();
69cce1d1
DM
986 n = dst_get_neighbour(*dst);
987 if (n && !(n->nud_state & NUD_VALID)) {
e550dfb0 988 struct inet6_ifaddr *ifp;
4c9483b2 989 struct flowi6 fl_gw6;
e550dfb0
NH
990 int redirect;
991
f2c31e32 992 rcu_read_unlock();
4c9483b2 993 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
e550dfb0
NH
994 (*dst)->dev, 1);
995
996 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
997 if (ifp)
998 in6_ifa_put(ifp);
999
1000 if (redirect) {
1001 /*
1002 * We need to get the dst entry for the
1003 * default router instead
1004 */
1005 dst_release(*dst);
4c9483b2
DM
1006 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1007 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1008 *dst = ip6_route_output(net, sk, &fl_gw6);
e550dfb0
NH
1009 if ((err = (*dst)->error))
1010 goto out_err_release;
95c385b4 1011 }
f2c31e32
ED
1012 } else {
1013 rcu_read_unlock();
e550dfb0 1014 }
95c385b4
NH
1015#endif
1016
1da177e4
LT
1017 return 0;
1018
1019out_err_release:
ca46f9c8 1020 if (err == -ENETUNREACH)
483a47d2 1021 IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
1022 dst_release(*dst);
1023 *dst = NULL;
1024 return err;
1025}
34a0b3cd 1026
497c615a
HX
1027/**
1028 * ip6_dst_lookup - perform route lookup on flow
1029 * @sk: socket which provides route info
1030 * @dst: pointer to dst_entry * for result
4c9483b2 1031 * @fl6: flow to lookup
497c615a
HX
1032 *
1033 * This function performs a route lookup on the given flow.
1034 *
1035 * It returns zero on success, or a standard errno code on error.
1036 */
4c9483b2 1037int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6)
497c615a
HX
1038{
1039 *dst = NULL;
4c9483b2 1040 return ip6_dst_lookup_tail(sk, dst, fl6);
497c615a 1041}
3cf3dc6c
ACM
1042EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1043
497c615a 1044/**
68d0c6d3
DM
1045 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1046 * @sk: socket which provides route info
4c9483b2 1047 * @fl6: flow to lookup
68d0c6d3 1048 * @final_dst: final destination address for ipsec lookup
a1414715 1049 * @can_sleep: we are in a sleepable context
68d0c6d3
DM
1050 *
1051 * This function performs a route lookup on the given flow.
1052 *
1053 * It returns a valid dst pointer on success, or a pointer encoded
1054 * error code.
1055 */
4c9483b2 1056struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
68d0c6d3 1057 const struct in6_addr *final_dst,
a1414715 1058 bool can_sleep)
68d0c6d3
DM
1059{
1060 struct dst_entry *dst = NULL;
1061 int err;
1062
4c9483b2 1063 err = ip6_dst_lookup_tail(sk, &dst, fl6);
68d0c6d3
DM
1064 if (err)
1065 return ERR_PTR(err);
1066 if (final_dst)
4e3fd7a0 1067 fl6->daddr = *final_dst;
2774c131 1068 if (can_sleep)
4c9483b2 1069 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
2774c131 1070
4c9483b2 1071 return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
68d0c6d3
DM
1072}
1073EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1074
1075/**
1076 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
497c615a 1077 * @sk: socket which provides the dst cache and route info
4c9483b2 1078 * @fl6: flow to lookup
68d0c6d3 1079 * @final_dst: final destination address for ipsec lookup
a1414715 1080 * @can_sleep: we are in a sleepable context
497c615a
HX
1081 *
1082 * This function performs a route lookup on the given flow with the
1083 * possibility of using the cached route in the socket if it is valid.
1084 * It will take the socket dst lock when operating on the dst cache.
1085 * As a result, this function can only be used in process context.
1086 *
68d0c6d3
DM
1087 * It returns a valid dst pointer on success, or a pointer encoded
1088 * error code.
497c615a 1089 */
4c9483b2 1090struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
68d0c6d3 1091 const struct in6_addr *final_dst,
a1414715 1092 bool can_sleep)
497c615a 1093{
68d0c6d3
DM
1094 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1095 int err;
497c615a 1096
4c9483b2 1097 dst = ip6_sk_dst_check(sk, dst, fl6);
68d0c6d3 1098
4c9483b2 1099 err = ip6_dst_lookup_tail(sk, &dst, fl6);
68d0c6d3
DM
1100 if (err)
1101 return ERR_PTR(err);
1102 if (final_dst)
4e3fd7a0 1103 fl6->daddr = *final_dst;
2774c131 1104 if (can_sleep)
4c9483b2 1105 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
2774c131 1106
4c9483b2 1107 return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
497c615a 1108}
68d0c6d3 1109EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
497c615a 1110
34a0b3cd 1111static inline int ip6_ufo_append_data(struct sock *sk,
e89e9cf5
AR
1112 int getfrag(void *from, char *to, int offset, int len,
1113 int odd, struct sk_buff *skb),
1114 void *from, int length, int hh_len, int fragheaderlen,
87c48fa3
ED
1115 int transhdrlen, int mtu,unsigned int flags,
1116 struct rt6_info *rt)
e89e9cf5
AR
1117
1118{
1119 struct sk_buff *skb;
1120 int err;
1121
1122 /* There is support for UDP large send offload by network
1123 * device, so create one single skb packet containing complete
1124 * udp datagram
1125 */
1126 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1127 skb = sock_alloc_send_skb(sk,
1128 hh_len + fragheaderlen + transhdrlen + 20,
1129 (flags & MSG_DONTWAIT), &err);
1130 if (skb == NULL)
504744e4 1131 return err;
e89e9cf5
AR
1132
1133 /* reserve space for Hardware header */
1134 skb_reserve(skb, hh_len);
1135
1136 /* create space for UDP/IP header */
1137 skb_put(skb,fragheaderlen + transhdrlen);
1138
1139 /* initialize network header pointer */
c1d2bbe1 1140 skb_reset_network_header(skb);
e89e9cf5
AR
1141
1142 /* initialize protocol header pointer */
b0e380b1 1143 skb->transport_header = skb->network_header + fragheaderlen;
e89e9cf5 1144
84fa7933 1145 skb->ip_summed = CHECKSUM_PARTIAL;
e89e9cf5 1146 skb->csum = 0;
e89e9cf5
AR
1147 }
1148
1149 err = skb_append_datato_frags(sk,skb, getfrag, from,
1150 (length - transhdrlen));
1151 if (!err) {
1152 struct frag_hdr fhdr;
1153
c31d5326
SS
1154 /* Specify the length of each IPv6 datagram fragment.
1155 * It has to be a multiple of 8.
1156 */
1157 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1158 sizeof(struct frag_hdr)) & ~7;
f83ef8c0 1159 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
87c48fa3 1160 ipv6_select_ident(&fhdr, rt);
e89e9cf5
AR
1161 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1162 __skb_queue_tail(&sk->sk_write_queue, skb);
1163
1164 return 0;
1165 }
1166 /* There is not enough support do UPD LSO,
1167 * so follow normal path
1168 */
1169 kfree_skb(skb);
1170
1171 return err;
1172}
1da177e4 1173
0178b695
HX
1174static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1175 gfp_t gfp)
1176{
1177 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1178}
1179
1180static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1181 gfp_t gfp)
1182{
1183 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1184}
1185
41a1f8ea
YH
1186int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1187 int offset, int len, int odd, struct sk_buff *skb),
1188 void *from, int length, int transhdrlen,
4c9483b2 1189 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
13b52cd4 1190 struct rt6_info *rt, unsigned int flags, int dontfrag)
1da177e4
LT
1191{
1192 struct inet_sock *inet = inet_sk(sk);
1193 struct ipv6_pinfo *np = inet6_sk(sk);
bdc712b4 1194 struct inet_cork *cork;
1da177e4
LT
1195 struct sk_buff *skb;
1196 unsigned int maxfraglen, fragheaderlen;
1197 int exthdrlen;
299b0767 1198 int dst_exthdrlen;
1da177e4
LT
1199 int hh_len;
1200 int mtu;
1201 int copy;
1202 int err;
1203 int offset = 0;
1204 int csummode = CHECKSUM_NONE;
a693e698 1205 __u8 tx_flags = 0;
1da177e4
LT
1206
1207 if (flags&MSG_PROBE)
1208 return 0;
bdc712b4 1209 cork = &inet->cork.base;
1da177e4
LT
1210 if (skb_queue_empty(&sk->sk_write_queue)) {
1211 /*
1212 * setup for corking
1213 */
1214 if (opt) {
0178b695 1215 if (WARN_ON(np->cork.opt))
1da177e4 1216 return -EINVAL;
0178b695
HX
1217
1218 np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation);
1219 if (unlikely(np->cork.opt == NULL))
1220 return -ENOBUFS;
1221
1222 np->cork.opt->tot_len = opt->tot_len;
1223 np->cork.opt->opt_flen = opt->opt_flen;
1224 np->cork.opt->opt_nflen = opt->opt_nflen;
1225
1226 np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1227 sk->sk_allocation);
1228 if (opt->dst0opt && !np->cork.opt->dst0opt)
1229 return -ENOBUFS;
1230
1231 np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1232 sk->sk_allocation);
1233 if (opt->dst1opt && !np->cork.opt->dst1opt)
1234 return -ENOBUFS;
1235
1236 np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1237 sk->sk_allocation);
1238 if (opt->hopopt && !np->cork.opt->hopopt)
1239 return -ENOBUFS;
1240
1241 np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1242 sk->sk_allocation);
1243 if (opt->srcrt && !np->cork.opt->srcrt)
1244 return -ENOBUFS;
1245
1da177e4
LT
1246 /* need source address above miyazawa*/
1247 }
d8d1f30b 1248 dst_hold(&rt->dst);
bdc712b4 1249 cork->dst = &rt->dst;
4c9483b2 1250 inet->cork.fl.u.ip6 = *fl6;
1da177e4 1251 np->cork.hop_limit = hlimit;
41a1f8ea 1252 np->cork.tclass = tclass;
628a5c56 1253 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
299b0767 1254 rt->dst.dev->mtu : dst_mtu(&rt->dst);
c7503609 1255 if (np->frag_size < mtu) {
d91675f9
YH
1256 if (np->frag_size)
1257 mtu = np->frag_size;
1258 }
bdc712b4 1259 cork->fragsize = mtu;
d8d1f30b 1260 if (dst_allfrag(rt->dst.path))
bdc712b4
DM
1261 cork->flags |= IPCORK_ALLFRAG;
1262 cork->length = 0;
1da177e4
LT
1263 sk->sk_sndmsg_page = NULL;
1264 sk->sk_sndmsg_off = 0;
299b0767 1265 exthdrlen = (opt ? opt->opt_flen : 0) - rt->rt6i_nfheader_len;
1da177e4
LT
1266 length += exthdrlen;
1267 transhdrlen += exthdrlen;
299b0767 1268 dst_exthdrlen = rt->dst.header_len;
1da177e4 1269 } else {
bdc712b4 1270 rt = (struct rt6_info *)cork->dst;
4c9483b2 1271 fl6 = &inet->cork.fl.u.ip6;
0178b695 1272 opt = np->cork.opt;
1da177e4
LT
1273 transhdrlen = 0;
1274 exthdrlen = 0;
299b0767 1275 dst_exthdrlen = 0;
bdc712b4 1276 mtu = cork->fragsize;
1da177e4
LT
1277 }
1278
d8d1f30b 1279 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1da177e4 1280
a1b05140 1281 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
b4ce9277 1282 (opt ? opt->opt_nflen : 0);
1da177e4
LT
1283 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1284
1285 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
bdc712b4 1286 if (cork->length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
4c9483b2 1287 ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen);
1da177e4
LT
1288 return -EMSGSIZE;
1289 }
1290 }
1291
a693e698
AB
1292 /* For UDP, check if TX timestamp is enabled */
1293 if (sk->sk_type == SOCK_DGRAM) {
1294 err = sock_tx_timestamp(sk, &tx_flags);
1295 if (err)
1296 goto error;
1297 }
1298
1da177e4
LT
1299 /*
1300 * Let's try using as much space as possible.
1301 * Use MTU if total length of the message fits into the MTU.
1302 * Otherwise, we need to reserve fragment header and
1303 * fragment alignment (= 8-15 octects, in total).
1304 *
1305 * Note that we may need to "move" the data from the tail of
1ab1457c 1306 * of the buffer to the new fragment when we split
1da177e4
LT
1307 * the message.
1308 *
1ab1457c 1309 * FIXME: It may be fragmented into multiple chunks
1da177e4
LT
1310 * at once if non-fragmentable extension headers
1311 * are too large.
1ab1457c 1312 * --yoshfuji
1da177e4
LT
1313 */
1314
bdc712b4 1315 cork->length += length;
4b340ae2
BH
1316 if (length > mtu) {
1317 int proto = sk->sk_protocol;
1318 if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){
4c9483b2 1319 ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen);
4b340ae2
BH
1320 return -EMSGSIZE;
1321 }
e89e9cf5 1322
4b340ae2 1323 if (proto == IPPROTO_UDP &&
d8d1f30b 1324 (rt->dst.dev->features & NETIF_F_UFO)) {
4b340ae2
BH
1325
1326 err = ip6_ufo_append_data(sk, getfrag, from, length,
1327 hh_len, fragheaderlen,
87c48fa3 1328 transhdrlen, mtu, flags, rt);
4b340ae2
BH
1329 if (err)
1330 goto error;
1331 return 0;
1332 }
e89e9cf5 1333 }
1da177e4
LT
1334
1335 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1336 goto alloc_new_skb;
1337
1338 while (length > 0) {
1339 /* Check if the remaining data fits into current packet. */
bdc712b4 1340 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1da177e4
LT
1341 if (copy < length)
1342 copy = maxfraglen - skb->len;
1343
1344 if (copy <= 0) {
1345 char *data;
1346 unsigned int datalen;
1347 unsigned int fraglen;
1348 unsigned int fraggap;
1349 unsigned int alloclen;
1350 struct sk_buff *skb_prev;
1351alloc_new_skb:
1352 skb_prev = skb;
1353
1354 /* There's no room in the current skb */
1355 if (skb_prev)
1356 fraggap = skb_prev->len - maxfraglen;
1357 else
1358 fraggap = 0;
1359
1360 /*
1361 * If remaining data exceeds the mtu,
1362 * we know we need more fragment(s).
1363 */
1364 datalen = length + fraggap;
bdc712b4 1365 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1da177e4
LT
1366 datalen = maxfraglen - fragheaderlen;
1367
1368 fraglen = datalen + fragheaderlen;
1369 if ((flags & MSG_MORE) &&
d8d1f30b 1370 !(rt->dst.dev->features&NETIF_F_SG))
1da177e4
LT
1371 alloclen = mtu;
1372 else
1373 alloclen = datalen + fragheaderlen;
1374
299b0767
SK
1375 alloclen += dst_exthdrlen;
1376
1da177e4
LT
1377 /*
1378 * The last fragment gets additional space at tail.
1379 * Note: we overallocate on fragments with MSG_MODE
1380 * because we have no idea if we're the last one.
1381 */
1382 if (datalen == length + fraggap)
d8d1f30b 1383 alloclen += rt->dst.trailer_len;
1da177e4
LT
1384
1385 /*
1386 * We just reserve space for fragment header.
1ab1457c 1387 * Note: this may be overallocation if the message
1da177e4
LT
1388 * (without MSG_MORE) fits into the MTU.
1389 */
1390 alloclen += sizeof(struct frag_hdr);
1391
1392 if (transhdrlen) {
1393 skb = sock_alloc_send_skb(sk,
1394 alloclen + hh_len,
1395 (flags & MSG_DONTWAIT), &err);
1396 } else {
1397 skb = NULL;
1398 if (atomic_read(&sk->sk_wmem_alloc) <=
1399 2 * sk->sk_sndbuf)
1400 skb = sock_wmalloc(sk,
1401 alloclen + hh_len, 1,
1402 sk->sk_allocation);
1403 if (unlikely(skb == NULL))
1404 err = -ENOBUFS;
a693e698
AB
1405 else {
1406 /* Only the initial fragment
1407 * is time stamped.
1408 */
1409 tx_flags = 0;
1410 }
1da177e4
LT
1411 }
1412 if (skb == NULL)
1413 goto error;
1414 /*
1415 * Fill in the control structures
1416 */
1417 skb->ip_summed = csummode;
1418 skb->csum = 0;
1419 /* reserve for fragmentation */
1420 skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1421
a693e698
AB
1422 if (sk->sk_type == SOCK_DGRAM)
1423 skb_shinfo(skb)->tx_flags = tx_flags;
1424
1da177e4
LT
1425 /*
1426 * Find where to start putting bytes
1427 */
299b0767
SK
1428 data = skb_put(skb, fraglen + dst_exthdrlen);
1429 skb_set_network_header(skb, exthdrlen + dst_exthdrlen);
1430 data += fragheaderlen + dst_exthdrlen;
b0e380b1
ACM
1431 skb->transport_header = (skb->network_header +
1432 fragheaderlen);
1da177e4
LT
1433 if (fraggap) {
1434 skb->csum = skb_copy_and_csum_bits(
1435 skb_prev, maxfraglen,
1436 data + transhdrlen, fraggap, 0);
1437 skb_prev->csum = csum_sub(skb_prev->csum,
1438 skb->csum);
1439 data += fraggap;
e9fa4f7b 1440 pskb_trim_unique(skb_prev, maxfraglen);
1da177e4
LT
1441 }
1442 copy = datalen - transhdrlen - fraggap;
299b0767 1443
1da177e4
LT
1444 if (copy < 0) {
1445 err = -EINVAL;
1446 kfree_skb(skb);
1447 goto error;
1448 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1449 err = -EFAULT;
1450 kfree_skb(skb);
1451 goto error;
1452 }
1453
1454 offset += copy;
1455 length -= datalen - fraggap;
1456 transhdrlen = 0;
1457 exthdrlen = 0;
299b0767 1458 dst_exthdrlen = 0;
1da177e4
LT
1459 csummode = CHECKSUM_NONE;
1460
1461 /*
1462 * Put the packet on the pending queue
1463 */
1464 __skb_queue_tail(&sk->sk_write_queue, skb);
1465 continue;
1466 }
1467
1468 if (copy > length)
1469 copy = length;
1470
d8d1f30b 1471 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1da177e4
LT
1472 unsigned int off;
1473
1474 off = skb->len;
1475 if (getfrag(from, skb_put(skb, copy),
1476 offset, copy, off, skb) < 0) {
1477 __skb_trim(skb, off);
1478 err = -EFAULT;
1479 goto error;
1480 }
1481 } else {
1482 int i = skb_shinfo(skb)->nr_frags;
1483 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1484 struct page *page = sk->sk_sndmsg_page;
1485 int off = sk->sk_sndmsg_off;
1486 unsigned int left;
1487
1488 if (page && (left = PAGE_SIZE - off) > 0) {
1489 if (copy >= left)
1490 copy = left;
408dadf0 1491 if (page != skb_frag_page(frag)) {
1da177e4
LT
1492 if (i == MAX_SKB_FRAGS) {
1493 err = -EMSGSIZE;
1494 goto error;
1495 }
1da177e4 1496 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
408dadf0 1497 skb_frag_ref(skb, i);
1da177e4
LT
1498 frag = &skb_shinfo(skb)->frags[i];
1499 }
1500 } else if(i < MAX_SKB_FRAGS) {
1501 if (copy > PAGE_SIZE)
1502 copy = PAGE_SIZE;
1503 page = alloc_pages(sk->sk_allocation, 0);
1504 if (page == NULL) {
1505 err = -ENOMEM;
1506 goto error;
1507 }
1508 sk->sk_sndmsg_page = page;
1509 sk->sk_sndmsg_off = 0;
1510
1511 skb_fill_page_desc(skb, i, page, 0, 0);
1512 frag = &skb_shinfo(skb)->frags[i];
1da177e4
LT
1513 } else {
1514 err = -EMSGSIZE;
1515 goto error;
1516 }
9e903e08
ED
1517 if (getfrag(from,
1518 skb_frag_address(frag) + skb_frag_size(frag),
408dadf0 1519 offset, copy, skb->len, skb) < 0) {
1da177e4
LT
1520 err = -EFAULT;
1521 goto error;
1522 }
1523 sk->sk_sndmsg_off += copy;
9e903e08 1524 skb_frag_size_add(frag, copy);
1da177e4
LT
1525 skb->len += copy;
1526 skb->data_len += copy;
f945fa7a
HX
1527 skb->truesize += copy;
1528 atomic_add(copy, &sk->sk_wmem_alloc);
1da177e4
LT
1529 }
1530 offset += copy;
1531 length -= copy;
1532 }
1533 return 0;
1534error:
bdc712b4 1535 cork->length -= length;
3bd653c8 1536 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1537 return err;
1538}
1539
bf138862
PE
1540static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1541{
0178b695
HX
1542 if (np->cork.opt) {
1543 kfree(np->cork.opt->dst0opt);
1544 kfree(np->cork.opt->dst1opt);
1545 kfree(np->cork.opt->hopopt);
1546 kfree(np->cork.opt->srcrt);
1547 kfree(np->cork.opt);
1548 np->cork.opt = NULL;
1549 }
1550
bdc712b4
DM
1551 if (inet->cork.base.dst) {
1552 dst_release(inet->cork.base.dst);
1553 inet->cork.base.dst = NULL;
1554 inet->cork.base.flags &= ~IPCORK_ALLFRAG;
bf138862
PE
1555 }
1556 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1557}
1558
1da177e4
LT
1559int ip6_push_pending_frames(struct sock *sk)
1560{
1561 struct sk_buff *skb, *tmp_skb;
1562 struct sk_buff **tail_skb;
1563 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1564 struct inet_sock *inet = inet_sk(sk);
1565 struct ipv6_pinfo *np = inet6_sk(sk);
3bd653c8 1566 struct net *net = sock_net(sk);
1da177e4
LT
1567 struct ipv6hdr *hdr;
1568 struct ipv6_txoptions *opt = np->cork.opt;
bdc712b4 1569 struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst;
4c9483b2
DM
1570 struct flowi6 *fl6 = &inet->cork.fl.u.ip6;
1571 unsigned char proto = fl6->flowi6_proto;
1da177e4
LT
1572 int err = 0;
1573
1574 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1575 goto out;
1576 tail_skb = &(skb_shinfo(skb)->frag_list);
1577
1578 /* move skb->data to ip header from ext header */
d56f90a7 1579 if (skb->data < skb_network_header(skb))
bbe735e4 1580 __skb_pull(skb, skb_network_offset(skb));
1da177e4 1581 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
cfe1fc77 1582 __skb_pull(tmp_skb, skb_network_header_len(skb));
1da177e4
LT
1583 *tail_skb = tmp_skb;
1584 tail_skb = &(tmp_skb->next);
1585 skb->len += tmp_skb->len;
1586 skb->data_len += tmp_skb->len;
1da177e4 1587 skb->truesize += tmp_skb->truesize;
1da177e4
LT
1588 tmp_skb->destructor = NULL;
1589 tmp_skb->sk = NULL;
1da177e4
LT
1590 }
1591
28a89453 1592 /* Allow local fragmentation. */
b5c15fc0 1593 if (np->pmtudisc < IPV6_PMTUDISC_DO)
28a89453
HX
1594 skb->local_df = 1;
1595
4e3fd7a0 1596 *final_dst = fl6->daddr;
cfe1fc77 1597 __skb_pull(skb, skb_network_header_len(skb));
1da177e4
LT
1598 if (opt && opt->opt_flen)
1599 ipv6_push_frag_opts(skb, opt, &proto);
1600 if (opt && opt->opt_nflen)
1601 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1602
e2d1bca7
ACM
1603 skb_push(skb, sizeof(struct ipv6hdr));
1604 skb_reset_network_header(skb);
0660e03f 1605 hdr = ipv6_hdr(skb);
1ab1457c 1606
4c9483b2 1607 *(__be32*)hdr = fl6->flowlabel |
41a1f8ea 1608 htonl(0x60000000 | ((int)np->cork.tclass << 20));
1da177e4 1609
1da177e4
LT
1610 hdr->hop_limit = np->cork.hop_limit;
1611 hdr->nexthdr = proto;
4e3fd7a0
AD
1612 hdr->saddr = fl6->saddr;
1613 hdr->daddr = *final_dst;
1da177e4 1614
a2c2064f 1615 skb->priority = sk->sk_priority;
4a19ec58 1616 skb->mark = sk->sk_mark;
a2c2064f 1617
d8d1f30b 1618 skb_dst_set(skb, dst_clone(&rt->dst));
edf391ff 1619 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
14878f75 1620 if (proto == IPPROTO_ICMPV6) {
adf30907 1621 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
14878f75 1622
5a57d4c7 1623 ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
e41b5368 1624 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
14878f75
DS
1625 }
1626
ef76bc23 1627 err = ip6_local_out(skb);
1da177e4
LT
1628 if (err) {
1629 if (err > 0)
6ce9e7b5 1630 err = net_xmit_errno(err);
1da177e4
LT
1631 if (err)
1632 goto error;
1633 }
1634
1635out:
bf138862 1636 ip6_cork_release(inet, np);
1da177e4
LT
1637 return err;
1638error:
06254914 1639 IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1640 goto out;
1641}
1642
1643void ip6_flush_pending_frames(struct sock *sk)
1644{
1da177e4
LT
1645 struct sk_buff *skb;
1646
1647 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
adf30907
ED
1648 if (skb_dst(skb))
1649 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
e1f52208 1650 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1651 kfree_skb(skb);
1652 }
1653
bf138862 1654 ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1da177e4 1655}