]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blame - net/ipv6/ip6_output.c
net: Add net_ratelimited_function and net_<level>_ratelimited macros
[mirror_ubuntu-zesty-kernel.git] / net / ipv6 / ip6_output.c
CommitLineData
1da177e4
LT
1/*
2 * IPv6 output functions
1ab1457c 3 * Linux INET6 implementation
1da177e4
LT
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * Based on linux/net/ipv4/ip_output.c
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 *
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
21 *
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
27 */
28
1da177e4 29#include <linux/errno.h>
ef76bc23 30#include <linux/kernel.h>
1da177e4
LT
31#include <linux/string.h>
32#include <linux/socket.h>
33#include <linux/net.h>
34#include <linux/netdevice.h>
35#include <linux/if_arp.h>
36#include <linux/in6.h>
37#include <linux/tcp.h>
38#include <linux/route.h>
b59f45d0 39#include <linux/module.h>
5a0e3ad6 40#include <linux/slab.h>
1da177e4
LT
41
42#include <linux/netfilter.h>
43#include <linux/netfilter_ipv6.h>
44
45#include <net/sock.h>
46#include <net/snmp.h>
47
48#include <net/ipv6.h>
49#include <net/ndisc.h>
50#include <net/protocol.h>
51#include <net/ip6_route.h>
52#include <net/addrconf.h>
53#include <net/rawv6.h>
54#include <net/icmp.h>
55#include <net/xfrm.h>
56#include <net/checksum.h>
7bc570c8 57#include <linux/mroute6.h>
1da177e4 58
ad0081e4 59int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
1da177e4 60
ef76bc23
HX
61int __ip6_local_out(struct sk_buff *skb)
62{
63 int len;
64
65 len = skb->len - sizeof(struct ipv6hdr);
66 if (len > IPV6_MAXPLEN)
67 len = 0;
68 ipv6_hdr(skb)->payload_len = htons(len);
69
b2e0b385
JE
70 return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
71 skb_dst(skb)->dev, dst_output);
ef76bc23
HX
72}
73
74int ip6_local_out(struct sk_buff *skb)
75{
76 int err;
77
78 err = __ip6_local_out(skb);
79 if (likely(err == 1))
80 err = dst_output(skb);
81
82 return err;
83}
84EXPORT_SYMBOL_GPL(ip6_local_out);
85
1da177e4
LT
86/* dev_loopback_xmit for use with netfilter. */
87static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
88{
459a98ed 89 skb_reset_mac_header(newskb);
bbe735e4 90 __skb_pull(newskb, skb_network_offset(newskb));
1da177e4
LT
91 newskb->pkt_type = PACKET_LOOPBACK;
92 newskb->ip_summed = CHECKSUM_UNNECESSARY;
adf30907 93 WARN_ON(!skb_dst(newskb));
1da177e4 94
e30b38c2 95 netif_rx_ni(newskb);
1da177e4
LT
96 return 0;
97}
98
9e508490 99static int ip6_finish_output2(struct sk_buff *skb)
1da177e4 100{
adf30907 101 struct dst_entry *dst = skb_dst(skb);
1da177e4 102 struct net_device *dev = dst->dev;
f6b72b62 103 struct neighbour *neigh;
1da177e4
LT
104
105 skb->protocol = htons(ETH_P_IPV6);
106 skb->dev = dev;
107
0660e03f 108 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
adf30907 109 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1da177e4 110
7ad6848c 111 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
d1db275d 112 ((mroute6_socket(dev_net(dev), skb) &&
bd91b8bf 113 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
7bc570c8
YH
114 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
115 &ipv6_hdr(skb)->saddr))) {
1da177e4
LT
116 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
117
118 /* Do not check for IFF_ALLMULTI; multicast routing
119 is not supported in any case.
120 */
121 if (newskb)
b2e0b385
JE
122 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
123 newskb, NULL, newskb->dev,
1da177e4
LT
124 ip6_dev_loopback_xmit);
125
0660e03f 126 if (ipv6_hdr(skb)->hop_limit == 0) {
3bd653c8
DL
127 IP6_INC_STATS(dev_net(dev), idev,
128 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
129 kfree_skb(skb);
130 return 0;
131 }
132 }
133
edf391ff
NH
134 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
135 skb->len);
1da177e4
LT
136 }
137
f2c31e32 138 rcu_read_lock();
27217455 139 neigh = dst_get_neighbour_noref(dst);
f2c31e32
ED
140 if (neigh) {
141 int res = neigh_output(neigh, skb);
05e3aa09 142
f2c31e32
ED
143 rcu_read_unlock();
144 return res;
145 }
146 rcu_read_unlock();
9e508490
JE
147 IP6_INC_STATS_BH(dev_net(dst->dev),
148 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
149 kfree_skb(skb);
150 return -EINVAL;
1da177e4
LT
151}
152
9e508490
JE
153static int ip6_finish_output(struct sk_buff *skb)
154{
155 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
156 dst_allfrag(skb_dst(skb)))
157 return ip6_fragment(skb, ip6_finish_output2);
158 else
159 return ip6_finish_output2(skb);
160}
161
1da177e4
LT
162int ip6_output(struct sk_buff *skb)
163{
9e508490 164 struct net_device *dev = skb_dst(skb)->dev;
adf30907 165 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
778d80be 166 if (unlikely(idev->cnf.disable_ipv6)) {
9e508490 167 IP6_INC_STATS(dev_net(dev), idev,
3bd653c8 168 IPSTATS_MIB_OUTDISCARDS);
778d80be
YH
169 kfree_skb(skb);
170 return 0;
171 }
172
9c6eb28a
JE
173 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
174 ip6_finish_output,
175 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
1da177e4
LT
176}
177
1da177e4 178/*
b5d43998 179 * xmit an sk_buff (used by TCP, SCTP and DCCP)
1da177e4
LT
180 */
181
4c9483b2 182int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
b903d324 183 struct ipv6_txoptions *opt, int tclass)
1da177e4 184{
3bd653c8 185 struct net *net = sock_net(sk);
b30bd282 186 struct ipv6_pinfo *np = inet6_sk(sk);
4c9483b2 187 struct in6_addr *first_hop = &fl6->daddr;
adf30907 188 struct dst_entry *dst = skb_dst(skb);
1da177e4 189 struct ipv6hdr *hdr;
4c9483b2 190 u8 proto = fl6->flowi6_proto;
1da177e4 191 int seg_len = skb->len;
e651f03a 192 int hlimit = -1;
1da177e4
LT
193 u32 mtu;
194
195 if (opt) {
c2636b4d 196 unsigned int head_room;
1da177e4
LT
197
198 /* First: exthdrs may take lots of space (~8K for now)
199 MAX_HEADER is not enough.
200 */
201 head_room = opt->opt_nflen + opt->opt_flen;
202 seg_len += head_room;
203 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
204
205 if (skb_headroom(skb) < head_room) {
206 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
a11d206d 207 if (skb2 == NULL) {
adf30907 208 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d
YH
209 IPSTATS_MIB_OUTDISCARDS);
210 kfree_skb(skb);
1da177e4
LT
211 return -ENOBUFS;
212 }
808db80a 213 consume_skb(skb);
a11d206d 214 skb = skb2;
83d7eb29 215 skb_set_owner_w(skb, sk);
1da177e4
LT
216 }
217 if (opt->opt_flen)
218 ipv6_push_frag_opts(skb, opt, &proto);
219 if (opt->opt_nflen)
220 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
221 }
222
e2d1bca7
ACM
223 skb_push(skb, sizeof(struct ipv6hdr));
224 skb_reset_network_header(skb);
0660e03f 225 hdr = ipv6_hdr(skb);
1da177e4
LT
226
227 /*
228 * Fill in the IPv6 header
229 */
b903d324 230 if (np)
1da177e4
LT
231 hlimit = np->hop_limit;
232 if (hlimit < 0)
6b75d090 233 hlimit = ip6_dst_hoplimit(dst);
1da177e4 234
4c9483b2 235 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl6->flowlabel;
41a1f8ea 236
1da177e4
LT
237 hdr->payload_len = htons(seg_len);
238 hdr->nexthdr = proto;
239 hdr->hop_limit = hlimit;
240
4e3fd7a0
AD
241 hdr->saddr = fl6->saddr;
242 hdr->daddr = *first_hop;
1da177e4 243
a2c2064f 244 skb->priority = sk->sk_priority;
4a19ec58 245 skb->mark = sk->sk_mark;
a2c2064f 246
1da177e4 247 mtu = dst_mtu(dst);
283d07ac 248 if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
adf30907 249 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
edf391ff 250 IPSTATS_MIB_OUT, skb->len);
b2e0b385
JE
251 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
252 dst->dev, dst_output);
1da177e4
LT
253 }
254
255 if (net_ratelimit())
256 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
257 skb->dev = dst->dev;
3ffe533c 258 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
adf30907 259 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
260 kfree_skb(skb);
261 return -EMSGSIZE;
262}
263
7159039a
YH
264EXPORT_SYMBOL(ip6_xmit);
265
1da177e4
LT
266/*
267 * To avoid extra problems ND packets are send through this
268 * routine. It's code duplication but I really want to avoid
269 * extra checks since ipv6_build_header is used by TCP (which
270 * is for us performance critical)
271 */
272
273int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
9acd9f3a 274 const struct in6_addr *saddr, const struct in6_addr *daddr,
1da177e4
LT
275 int proto, int len)
276{
277 struct ipv6_pinfo *np = inet6_sk(sk);
278 struct ipv6hdr *hdr;
1da177e4
LT
279
280 skb->protocol = htons(ETH_P_IPV6);
281 skb->dev = dev;
282
55f79cc0
ACM
283 skb_reset_network_header(skb);
284 skb_put(skb, sizeof(struct ipv6hdr));
0660e03f 285 hdr = ipv6_hdr(skb);
1da177e4 286
ae08e1f0 287 *(__be32*)hdr = htonl(0x60000000);
1da177e4
LT
288
289 hdr->payload_len = htons(len);
290 hdr->nexthdr = proto;
291 hdr->hop_limit = np->hop_limit;
292
4e3fd7a0
AD
293 hdr->saddr = *saddr;
294 hdr->daddr = *daddr;
1da177e4
LT
295
296 return 0;
297}
298
299static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
300{
301 struct ip6_ra_chain *ra;
302 struct sock *last = NULL;
303
304 read_lock(&ip6_ra_lock);
305 for (ra = ip6_ra_chain; ra; ra = ra->next) {
306 struct sock *sk = ra->sk;
0bd1b59b
AM
307 if (sk && ra->sel == sel &&
308 (!sk->sk_bound_dev_if ||
309 sk->sk_bound_dev_if == skb->dev->ifindex)) {
1da177e4
LT
310 if (last) {
311 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
312 if (skb2)
313 rawv6_rcv(last, skb2);
314 }
315 last = sk;
316 }
317 }
318
319 if (last) {
320 rawv6_rcv(last, skb);
321 read_unlock(&ip6_ra_lock);
322 return 1;
323 }
324 read_unlock(&ip6_ra_lock);
325 return 0;
326}
327
e21e0b5f
VN
328static int ip6_forward_proxy_check(struct sk_buff *skb)
329{
0660e03f 330 struct ipv6hdr *hdr = ipv6_hdr(skb);
e21e0b5f 331 u8 nexthdr = hdr->nexthdr;
75f2811c 332 __be16 frag_off;
e21e0b5f
VN
333 int offset;
334
335 if (ipv6_ext_hdr(nexthdr)) {
75f2811c 336 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
e21e0b5f
VN
337 if (offset < 0)
338 return 0;
339 } else
340 offset = sizeof(struct ipv6hdr);
341
342 if (nexthdr == IPPROTO_ICMPV6) {
343 struct icmp6hdr *icmp6;
344
d56f90a7
ACM
345 if (!pskb_may_pull(skb, (skb_network_header(skb) +
346 offset + 1 - skb->data)))
e21e0b5f
VN
347 return 0;
348
d56f90a7 349 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
e21e0b5f
VN
350
351 switch (icmp6->icmp6_type) {
352 case NDISC_ROUTER_SOLICITATION:
353 case NDISC_ROUTER_ADVERTISEMENT:
354 case NDISC_NEIGHBOUR_SOLICITATION:
355 case NDISC_NEIGHBOUR_ADVERTISEMENT:
356 case NDISC_REDIRECT:
357 /* For reaction involving unicast neighbor discovery
358 * message destined to the proxied address, pass it to
359 * input function.
360 */
361 return 1;
362 default:
363 break;
364 }
365 }
366
74553b09
VN
367 /*
368 * The proxying router can't forward traffic sent to a link-local
369 * address, so signal the sender and discard the packet. This
370 * behavior is clarified by the MIPv6 specification.
371 */
372 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
373 dst_link_failure(skb);
374 return -1;
375 }
376
e21e0b5f
VN
377 return 0;
378}
379
1da177e4
LT
380static inline int ip6_forward_finish(struct sk_buff *skb)
381{
382 return dst_output(skb);
383}
384
385int ip6_forward(struct sk_buff *skb)
386{
adf30907 387 struct dst_entry *dst = skb_dst(skb);
0660e03f 388 struct ipv6hdr *hdr = ipv6_hdr(skb);
1da177e4 389 struct inet6_skb_parm *opt = IP6CB(skb);
c346dca1 390 struct net *net = dev_net(dst->dev);
14f3ad6f 391 u32 mtu;
1ab1457c 392
53b7997f 393 if (net->ipv6.devconf_all->forwarding == 0)
1da177e4
LT
394 goto error;
395
4497b076
BH
396 if (skb_warn_if_lro(skb))
397 goto drop;
398
1da177e4 399 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
3bd653c8 400 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
1da177e4
LT
401 goto drop;
402 }
403
72b43d08
AK
404 if (skb->pkt_type != PACKET_HOST)
405 goto drop;
406
35fc92a9 407 skb_forward_csum(skb);
1da177e4
LT
408
409 /*
410 * We DO NOT make any processing on
411 * RA packets, pushing them to user level AS IS
412 * without ane WARRANTY that application will be able
413 * to interpret them. The reason is that we
414 * cannot make anything clever here.
415 *
416 * We are not end-node, so that if packet contains
417 * AH/ESP, we cannot make anything.
418 * Defragmentation also would be mistake, RA packets
419 * cannot be fragmented, because there is no warranty
420 * that different fragments will go along one path. --ANK
421 */
422 if (opt->ra) {
d56f90a7 423 u8 *ptr = skb_network_header(skb) + opt->ra;
1da177e4
LT
424 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
425 return 0;
426 }
427
428 /*
429 * check and decrement ttl
430 */
431 if (hdr->hop_limit <= 1) {
432 /* Force OUTPUT device used as source address */
433 skb->dev = dst->dev;
3ffe533c 434 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
483a47d2
DL
435 IP6_INC_STATS_BH(net,
436 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
1da177e4
LT
437
438 kfree_skb(skb);
439 return -ETIMEDOUT;
440 }
441
fbea49e1 442 /* XXX: idev->cnf.proxy_ndp? */
53b7997f 443 if (net->ipv6.devconf_all->proxy_ndp &&
8a3edd80 444 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
74553b09
VN
445 int proxied = ip6_forward_proxy_check(skb);
446 if (proxied > 0)
e21e0b5f 447 return ip6_input(skb);
74553b09 448 else if (proxied < 0) {
3bd653c8
DL
449 IP6_INC_STATS(net, ip6_dst_idev(dst),
450 IPSTATS_MIB_INDISCARDS);
74553b09
VN
451 goto drop;
452 }
e21e0b5f
VN
453 }
454
1da177e4 455 if (!xfrm6_route_forward(skb)) {
3bd653c8 456 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
1da177e4
LT
457 goto drop;
458 }
adf30907 459 dst = skb_dst(skb);
1da177e4
LT
460
461 /* IPv6 specs say nothing about it, but it is clear that we cannot
462 send redirects to source routed frames.
1e5dc146 463 We don't send redirects to frames decapsulated from IPsec.
1da177e4 464 */
c45a3dfb 465 if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
1da177e4
LT
466 struct in6_addr *target = NULL;
467 struct rt6_info *rt;
1da177e4
LT
468
469 /*
470 * incoming and outgoing devices are the same
471 * send a redirect.
472 */
473
474 rt = (struct rt6_info *) dst;
c45a3dfb
DM
475 if (rt->rt6i_flags & RTF_GATEWAY)
476 target = &rt->rt6i_gateway;
1da177e4
LT
477 else
478 target = &hdr->daddr;
479
92d86829
DM
480 if (!rt->rt6i_peer)
481 rt6_bind_peer(rt, 1);
482
1da177e4
LT
483 /* Limit redirects both by destination (here)
484 and by source (inside ndisc_send_redirect)
485 */
92d86829 486 if (inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ))
4991969a 487 ndisc_send_redirect(skb, target);
5bb1ab09
DS
488 } else {
489 int addrtype = ipv6_addr_type(&hdr->saddr);
490
1da177e4 491 /* This check is security critical. */
f81b2e7d
YH
492 if (addrtype == IPV6_ADDR_ANY ||
493 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
5bb1ab09
DS
494 goto error;
495 if (addrtype & IPV6_ADDR_LINKLOCAL) {
496 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
3ffe533c 497 ICMPV6_NOT_NEIGHBOUR, 0);
5bb1ab09
DS
498 goto error;
499 }
1da177e4
LT
500 }
501
14f3ad6f
UW
502 mtu = dst_mtu(dst);
503 if (mtu < IPV6_MIN_MTU)
504 mtu = IPV6_MIN_MTU;
505
0aa68271 506 if (skb->len > mtu && !skb_is_gso(skb)) {
1da177e4
LT
507 /* Again, force OUTPUT device used as source address */
508 skb->dev = dst->dev;
14f3ad6f 509 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
483a47d2
DL
510 IP6_INC_STATS_BH(net,
511 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
512 IP6_INC_STATS_BH(net,
513 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
514 kfree_skb(skb);
515 return -EMSGSIZE;
516 }
517
518 if (skb_cow(skb, dst->dev->hard_header_len)) {
3bd653c8 519 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
520 goto drop;
521 }
522
0660e03f 523 hdr = ipv6_hdr(skb);
1da177e4
LT
524
525 /* Mangling hops number delayed to point after skb COW */
1ab1457c 526
1da177e4
LT
527 hdr->hop_limit--;
528
483a47d2 529 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
b2e0b385 530 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
6e23ae2a 531 ip6_forward_finish);
1da177e4
LT
532
533error:
483a47d2 534 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
1da177e4
LT
535drop:
536 kfree_skb(skb);
537 return -EINVAL;
538}
539
540static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
541{
542 to->pkt_type = from->pkt_type;
543 to->priority = from->priority;
544 to->protocol = from->protocol;
adf30907
ED
545 skb_dst_drop(to);
546 skb_dst_set(to, dst_clone(skb_dst(from)));
1da177e4 547 to->dev = from->dev;
82e91ffe 548 to->mark = from->mark;
1da177e4
LT
549
550#ifdef CONFIG_NET_SCHED
551 to->tc_index = from->tc_index;
552#endif
e7ac05f3 553 nf_copy(to, from);
ba9dda3a
JK
554#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
555 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
556 to->nf_trace = from->nf_trace;
557#endif
984bc16c 558 skb_copy_secmark(to, from);
1da177e4
LT
559}
560
561int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
562{
563 u16 offset = sizeof(struct ipv6hdr);
0660e03f
ACM
564 struct ipv6_opt_hdr *exthdr =
565 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
27a884dc 566 unsigned int packet_len = skb->tail - skb->network_header;
1da177e4 567 int found_rhdr = 0;
0660e03f 568 *nexthdr = &ipv6_hdr(skb)->nexthdr;
1da177e4
LT
569
570 while (offset + 1 <= packet_len) {
571
572 switch (**nexthdr) {
573
574 case NEXTHDR_HOP:
27637df9 575 break;
1da177e4 576 case NEXTHDR_ROUTING:
27637df9
MN
577 found_rhdr = 1;
578 break;
1da177e4 579 case NEXTHDR_DEST:
59fbb3a6 580#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
27637df9
MN
581 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
582 break;
583#endif
584 if (found_rhdr)
585 return offset;
1da177e4
LT
586 break;
587 default :
588 return offset;
589 }
27637df9
MN
590
591 offset += ipv6_optlen(exthdr);
592 *nexthdr = &exthdr->nexthdr;
d56f90a7
ACM
593 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
594 offset);
1da177e4
LT
595 }
596
597 return offset;
598}
599
87c48fa3
ED
600void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
601{
602 static atomic_t ipv6_fragmentation_id;
603 int old, new;
604
e688a604 605 if (rt && !(rt->dst.flags & DST_NOPEER)) {
87c48fa3
ED
606 struct inet_peer *peer;
607
608 if (!rt->rt6i_peer)
609 rt6_bind_peer(rt, 1);
610 peer = rt->rt6i_peer;
611 if (peer) {
612 fhdr->identification = htonl(inet_getid(peer, 0));
613 return;
614 }
615 }
616 do {
617 old = atomic_read(&ipv6_fragmentation_id);
618 new = old + 1;
619 if (!new)
620 new = 1;
621 } while (atomic_cmpxchg(&ipv6_fragmentation_id, old, new) != old);
622 fhdr->identification = htonl(new);
623}
624
ad0081e4 625int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
1da177e4 626{
1da177e4 627 struct sk_buff *frag;
adf30907 628 struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
d91675f9 629 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
1da177e4
LT
630 struct ipv6hdr *tmp_hdr;
631 struct frag_hdr *fh;
632 unsigned int mtu, hlen, left, len;
a7ae1992 633 int hroom, troom;
ae08e1f0 634 __be32 frag_id = 0;
1da177e4
LT
635 int ptr, offset = 0, err=0;
636 u8 *prevhdr, nexthdr = 0;
adf30907 637 struct net *net = dev_net(skb_dst(skb)->dev);
1da177e4 638
1da177e4
LT
639 hlen = ip6_find_1stfragopt(skb, &prevhdr);
640 nexthdr = *prevhdr;
641
628a5c56 642 mtu = ip6_skb_dst_mtu(skb);
b881ef76
JH
643
644 /* We must not fragment if the socket is set to force MTU discovery
14f3ad6f 645 * or if the skb it not generated by a local socket.
b881ef76 646 */
f2228f78 647 if (!skb->local_df && skb->len > mtu) {
adf30907 648 skb->dev = skb_dst(skb)->dev;
3ffe533c 649 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
adf30907 650 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 651 IPSTATS_MIB_FRAGFAILS);
b881ef76
JH
652 kfree_skb(skb);
653 return -EMSGSIZE;
654 }
655
d91675f9
YH
656 if (np && np->frag_size < mtu) {
657 if (np->frag_size)
658 mtu = np->frag_size;
659 }
660 mtu -= hlen + sizeof(struct frag_hdr);
1da177e4 661
21dc3301 662 if (skb_has_frag_list(skb)) {
1da177e4 663 int first_len = skb_pagelen(skb);
3d13008e 664 struct sk_buff *frag2;
1da177e4
LT
665
666 if (first_len - hlen > mtu ||
667 ((first_len - hlen) & 7) ||
668 skb_cloned(skb))
669 goto slow_path;
670
4d9092bb 671 skb_walk_frags(skb, frag) {
1da177e4
LT
672 /* Correct geometry. */
673 if (frag->len > mtu ||
674 ((frag->len & 7) && frag->next) ||
675 skb_headroom(frag) < hlen)
3d13008e 676 goto slow_path_clean;
1da177e4 677
1da177e4
LT
678 /* Partially cloned skb? */
679 if (skb_shared(frag))
3d13008e 680 goto slow_path_clean;
2fdba6b0
HX
681
682 BUG_ON(frag->sk);
683 if (skb->sk) {
2fdba6b0
HX
684 frag->sk = skb->sk;
685 frag->destructor = sock_wfree;
2fdba6b0 686 }
3d13008e 687 skb->truesize -= frag->truesize;
1da177e4
LT
688 }
689
690 err = 0;
691 offset = 0;
692 frag = skb_shinfo(skb)->frag_list;
4d9092bb 693 skb_frag_list_init(skb);
1da177e4
LT
694 /* BUILD HEADER */
695
9a217a1c 696 *prevhdr = NEXTHDR_FRAGMENT;
d56f90a7 697 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
1da177e4 698 if (!tmp_hdr) {
adf30907 699 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 700 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
701 return -ENOMEM;
702 }
703
1da177e4
LT
704 __skb_pull(skb, hlen);
705 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
e2d1bca7
ACM
706 __skb_push(skb, hlen);
707 skb_reset_network_header(skb);
d56f90a7 708 memcpy(skb_network_header(skb), tmp_hdr, hlen);
1da177e4 709
87c48fa3 710 ipv6_select_ident(fh, rt);
1da177e4
LT
711 fh->nexthdr = nexthdr;
712 fh->reserved = 0;
713 fh->frag_off = htons(IP6_MF);
714 frag_id = fh->identification;
715
716 first_len = skb_pagelen(skb);
717 skb->data_len = first_len - skb_headlen(skb);
718 skb->len = first_len;
0660e03f
ACM
719 ipv6_hdr(skb)->payload_len = htons(first_len -
720 sizeof(struct ipv6hdr));
a11d206d 721
d8d1f30b 722 dst_hold(&rt->dst);
1da177e4
LT
723
724 for (;;) {
725 /* Prepare header of the next frame,
726 * before previous one went down. */
727 if (frag) {
728 frag->ip_summed = CHECKSUM_NONE;
badff6d0 729 skb_reset_transport_header(frag);
1da177e4 730 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
e2d1bca7
ACM
731 __skb_push(frag, hlen);
732 skb_reset_network_header(frag);
d56f90a7
ACM
733 memcpy(skb_network_header(frag), tmp_hdr,
734 hlen);
1da177e4
LT
735 offset += skb->len - hlen - sizeof(struct frag_hdr);
736 fh->nexthdr = nexthdr;
737 fh->reserved = 0;
738 fh->frag_off = htons(offset);
739 if (frag->next != NULL)
740 fh->frag_off |= htons(IP6_MF);
741 fh->identification = frag_id;
0660e03f
ACM
742 ipv6_hdr(frag)->payload_len =
743 htons(frag->len -
744 sizeof(struct ipv6hdr));
1da177e4
LT
745 ip6_copy_metadata(frag, skb);
746 }
1ab1457c 747
1da177e4 748 err = output(skb);
dafee490 749 if(!err)
d8d1f30b 750 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 751 IPSTATS_MIB_FRAGCREATES);
dafee490 752
1da177e4
LT
753 if (err || !frag)
754 break;
755
756 skb = frag;
757 frag = skb->next;
758 skb->next = NULL;
759 }
760
a51482bd 761 kfree(tmp_hdr);
1da177e4
LT
762
763 if (err == 0) {
d8d1f30b 764 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 765 IPSTATS_MIB_FRAGOKS);
d8d1f30b 766 dst_release(&rt->dst);
1da177e4
LT
767 return 0;
768 }
769
770 while (frag) {
771 skb = frag->next;
772 kfree_skb(frag);
773 frag = skb;
774 }
775
d8d1f30b 776 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 777 IPSTATS_MIB_FRAGFAILS);
d8d1f30b 778 dst_release(&rt->dst);
1da177e4 779 return err;
3d13008e
ED
780
781slow_path_clean:
782 skb_walk_frags(skb, frag2) {
783 if (frag2 == frag)
784 break;
785 frag2->sk = NULL;
786 frag2->destructor = NULL;
787 skb->truesize += frag2->truesize;
788 }
1da177e4
LT
789 }
790
791slow_path:
792 left = skb->len - hlen; /* Space per frame */
793 ptr = hlen; /* Where to start from */
794
795 /*
796 * Fragment the datagram.
797 */
798
799 *prevhdr = NEXTHDR_FRAGMENT;
a7ae1992
HX
800 hroom = LL_RESERVED_SPACE(rt->dst.dev);
801 troom = rt->dst.dev->needed_tailroom;
1da177e4
LT
802
803 /*
804 * Keep copying data until we run out.
805 */
806 while(left > 0) {
807 len = left;
808 /* IF: it doesn't fit, use 'mtu' - the data space left */
809 if (len > mtu)
810 len = mtu;
25985edc 811 /* IF: we are not sending up to and including the packet end
1da177e4
LT
812 then align the next start on an eight byte boundary */
813 if (len < left) {
814 len &= ~7;
815 }
816 /*
817 * Allocate buffer.
818 */
819
a7ae1992
HX
820 if ((frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
821 hroom + troom, GFP_ATOMIC)) == NULL) {
64ce2073 822 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
adf30907 823 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 824 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
825 err = -ENOMEM;
826 goto fail;
827 }
828
829 /*
830 * Set up data on packet
831 */
832
833 ip6_copy_metadata(frag, skb);
a7ae1992 834 skb_reserve(frag, hroom);
1da177e4 835 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
c1d2bbe1 836 skb_reset_network_header(frag);
badff6d0 837 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
b0e380b1
ACM
838 frag->transport_header = (frag->network_header + hlen +
839 sizeof(struct frag_hdr));
1da177e4
LT
840
841 /*
842 * Charge the memory for the fragment to any owner
843 * it might possess
844 */
845 if (skb->sk)
846 skb_set_owner_w(frag, skb->sk);
847
848 /*
849 * Copy the packet header into the new buffer.
850 */
d626f62b 851 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
1da177e4
LT
852
853 /*
854 * Build fragment header.
855 */
856 fh->nexthdr = nexthdr;
857 fh->reserved = 0;
f36d6ab1 858 if (!frag_id) {
87c48fa3 859 ipv6_select_ident(fh, rt);
1da177e4
LT
860 frag_id = fh->identification;
861 } else
862 fh->identification = frag_id;
863
864 /*
865 * Copy a block of the IP datagram.
866 */
8984e41d 867 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
1da177e4
LT
868 BUG();
869 left -= len;
870
871 fh->frag_off = htons(offset);
872 if (left > 0)
873 fh->frag_off |= htons(IP6_MF);
0660e03f
ACM
874 ipv6_hdr(frag)->payload_len = htons(frag->len -
875 sizeof(struct ipv6hdr));
1da177e4
LT
876
877 ptr += len;
878 offset += len;
879
880 /*
881 * Put this fragment into the sending queue.
882 */
1da177e4
LT
883 err = output(frag);
884 if (err)
885 goto fail;
dafee490 886
adf30907 887 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 888 IPSTATS_MIB_FRAGCREATES);
1da177e4 889 }
adf30907 890 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 891 IPSTATS_MIB_FRAGOKS);
808db80a 892 consume_skb(skb);
1da177e4
LT
893 return err;
894
895fail:
adf30907 896 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 897 IPSTATS_MIB_FRAGFAILS);
1ab1457c 898 kfree_skb(skb);
1da177e4
LT
899 return err;
900}
901
b71d1d42
ED
902static inline int ip6_rt_check(const struct rt6key *rt_key,
903 const struct in6_addr *fl_addr,
904 const struct in6_addr *addr_cache)
cf6b1982 905{
a02cec21
ED
906 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
907 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache));
cf6b1982
YH
908}
909
497c615a
HX
910static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
911 struct dst_entry *dst,
b71d1d42 912 const struct flowi6 *fl6)
1da177e4 913{
497c615a
HX
914 struct ipv6_pinfo *np = inet6_sk(sk);
915 struct rt6_info *rt = (struct rt6_info *)dst;
1da177e4 916
497c615a
HX
917 if (!dst)
918 goto out;
919
920 /* Yes, checking route validity in not connected
921 * case is not very simple. Take into account,
922 * that we do not support routing by source, TOS,
923 * and MSG_DONTROUTE --ANK (980726)
924 *
cf6b1982
YH
925 * 1. ip6_rt_check(): If route was host route,
926 * check that cached destination is current.
497c615a
HX
927 * If it is network route, we still may
928 * check its validity using saved pointer
929 * to the last used address: daddr_cache.
930 * We do not want to save whole address now,
931 * (because main consumer of this service
932 * is tcp, which has not this problem),
933 * so that the last trick works only on connected
934 * sockets.
935 * 2. oif also should be the same.
936 */
4c9483b2 937 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
8e1ef0a9 938#ifdef CONFIG_IPV6_SUBTREES
4c9483b2 939 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
8e1ef0a9 940#endif
4c9483b2 941 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
497c615a
HX
942 dst_release(dst);
943 dst = NULL;
1da177e4
LT
944 }
945
497c615a
HX
946out:
947 return dst;
948}
949
950static int ip6_dst_lookup_tail(struct sock *sk,
4c9483b2 951 struct dst_entry **dst, struct flowi6 *fl6)
497c615a 952{
3b1e0a65 953 struct net *net = sock_net(sk);
69cce1d1
DM
954#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
955 struct neighbour *n;
956#endif
957 int err;
497c615a 958
1da177e4 959 if (*dst == NULL)
4c9483b2 960 *dst = ip6_route_output(net, sk, fl6);
1da177e4
LT
961
962 if ((err = (*dst)->error))
963 goto out_err_release;
964
4c9483b2 965 if (ipv6_addr_any(&fl6->saddr)) {
c3968a85
DW
966 struct rt6_info *rt = (struct rt6_info *) *dst;
967 err = ip6_route_get_saddr(net, rt, &fl6->daddr,
968 sk ? inet6_sk(sk)->srcprefs : 0,
969 &fl6->saddr);
44456d37 970 if (err)
1da177e4 971 goto out_err_release;
1da177e4
LT
972 }
973
95c385b4 974#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
e550dfb0
NH
975 /*
976 * Here if the dst entry we've looked up
977 * has a neighbour entry that is in the INCOMPLETE
978 * state and the src address from the flow is
979 * marked as OPTIMISTIC, we release the found
980 * dst entry and replace it instead with the
981 * dst entry of the nexthop router
982 */
f2c31e32 983 rcu_read_lock();
27217455 984 n = dst_get_neighbour_noref(*dst);
69cce1d1 985 if (n && !(n->nud_state & NUD_VALID)) {
e550dfb0 986 struct inet6_ifaddr *ifp;
4c9483b2 987 struct flowi6 fl_gw6;
e550dfb0
NH
988 int redirect;
989
f2c31e32 990 rcu_read_unlock();
4c9483b2 991 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
e550dfb0
NH
992 (*dst)->dev, 1);
993
994 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
995 if (ifp)
996 in6_ifa_put(ifp);
997
998 if (redirect) {
999 /*
1000 * We need to get the dst entry for the
1001 * default router instead
1002 */
1003 dst_release(*dst);
4c9483b2
DM
1004 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1005 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1006 *dst = ip6_route_output(net, sk, &fl_gw6);
e550dfb0
NH
1007 if ((err = (*dst)->error))
1008 goto out_err_release;
95c385b4 1009 }
f2c31e32
ED
1010 } else {
1011 rcu_read_unlock();
e550dfb0 1012 }
95c385b4
NH
1013#endif
1014
1da177e4
LT
1015 return 0;
1016
1017out_err_release:
ca46f9c8 1018 if (err == -ENETUNREACH)
483a47d2 1019 IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
1020 dst_release(*dst);
1021 *dst = NULL;
1022 return err;
1023}
34a0b3cd 1024
497c615a
HX
1025/**
1026 * ip6_dst_lookup - perform route lookup on flow
1027 * @sk: socket which provides route info
1028 * @dst: pointer to dst_entry * for result
4c9483b2 1029 * @fl6: flow to lookup
497c615a
HX
1030 *
1031 * This function performs a route lookup on the given flow.
1032 *
1033 * It returns zero on success, or a standard errno code on error.
1034 */
4c9483b2 1035int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6)
497c615a
HX
1036{
1037 *dst = NULL;
4c9483b2 1038 return ip6_dst_lookup_tail(sk, dst, fl6);
497c615a 1039}
3cf3dc6c
ACM
1040EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1041
497c615a 1042/**
68d0c6d3
DM
1043 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1044 * @sk: socket which provides route info
4c9483b2 1045 * @fl6: flow to lookup
68d0c6d3 1046 * @final_dst: final destination address for ipsec lookup
a1414715 1047 * @can_sleep: we are in a sleepable context
68d0c6d3
DM
1048 *
1049 * This function performs a route lookup on the given flow.
1050 *
1051 * It returns a valid dst pointer on success, or a pointer encoded
1052 * error code.
1053 */
4c9483b2 1054struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
68d0c6d3 1055 const struct in6_addr *final_dst,
a1414715 1056 bool can_sleep)
68d0c6d3
DM
1057{
1058 struct dst_entry *dst = NULL;
1059 int err;
1060
4c9483b2 1061 err = ip6_dst_lookup_tail(sk, &dst, fl6);
68d0c6d3
DM
1062 if (err)
1063 return ERR_PTR(err);
1064 if (final_dst)
4e3fd7a0 1065 fl6->daddr = *final_dst;
2774c131 1066 if (can_sleep)
4c9483b2 1067 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
2774c131 1068
4c9483b2 1069 return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
68d0c6d3
DM
1070}
1071EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1072
1073/**
1074 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
497c615a 1075 * @sk: socket which provides the dst cache and route info
4c9483b2 1076 * @fl6: flow to lookup
68d0c6d3 1077 * @final_dst: final destination address for ipsec lookup
a1414715 1078 * @can_sleep: we are in a sleepable context
497c615a
HX
1079 *
1080 * This function performs a route lookup on the given flow with the
1081 * possibility of using the cached route in the socket if it is valid.
1082 * It will take the socket dst lock when operating on the dst cache.
1083 * As a result, this function can only be used in process context.
1084 *
68d0c6d3
DM
1085 * It returns a valid dst pointer on success, or a pointer encoded
1086 * error code.
497c615a 1087 */
4c9483b2 1088struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
68d0c6d3 1089 const struct in6_addr *final_dst,
a1414715 1090 bool can_sleep)
497c615a 1091{
68d0c6d3
DM
1092 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1093 int err;
497c615a 1094
4c9483b2 1095 dst = ip6_sk_dst_check(sk, dst, fl6);
68d0c6d3 1096
4c9483b2 1097 err = ip6_dst_lookup_tail(sk, &dst, fl6);
68d0c6d3
DM
1098 if (err)
1099 return ERR_PTR(err);
1100 if (final_dst)
4e3fd7a0 1101 fl6->daddr = *final_dst;
2774c131 1102 if (can_sleep)
4c9483b2 1103 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
2774c131 1104
4c9483b2 1105 return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
497c615a 1106}
68d0c6d3 1107EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
497c615a 1108
34a0b3cd 1109static inline int ip6_ufo_append_data(struct sock *sk,
e89e9cf5
AR
1110 int getfrag(void *from, char *to, int offset, int len,
1111 int odd, struct sk_buff *skb),
1112 void *from, int length, int hh_len, int fragheaderlen,
87c48fa3
ED
1113 int transhdrlen, int mtu,unsigned int flags,
1114 struct rt6_info *rt)
e89e9cf5
AR
1115
1116{
1117 struct sk_buff *skb;
1118 int err;
1119
1120 /* There is support for UDP large send offload by network
1121 * device, so create one single skb packet containing complete
1122 * udp datagram
1123 */
1124 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1125 skb = sock_alloc_send_skb(sk,
1126 hh_len + fragheaderlen + transhdrlen + 20,
1127 (flags & MSG_DONTWAIT), &err);
1128 if (skb == NULL)
504744e4 1129 return err;
e89e9cf5
AR
1130
1131 /* reserve space for Hardware header */
1132 skb_reserve(skb, hh_len);
1133
1134 /* create space for UDP/IP header */
1135 skb_put(skb,fragheaderlen + transhdrlen);
1136
1137 /* initialize network header pointer */
c1d2bbe1 1138 skb_reset_network_header(skb);
e89e9cf5
AR
1139
1140 /* initialize protocol header pointer */
b0e380b1 1141 skb->transport_header = skb->network_header + fragheaderlen;
e89e9cf5 1142
84fa7933 1143 skb->ip_summed = CHECKSUM_PARTIAL;
e89e9cf5 1144 skb->csum = 0;
e89e9cf5
AR
1145 }
1146
1147 err = skb_append_datato_frags(sk,skb, getfrag, from,
1148 (length - transhdrlen));
1149 if (!err) {
1150 struct frag_hdr fhdr;
1151
c31d5326
SS
1152 /* Specify the length of each IPv6 datagram fragment.
1153 * It has to be a multiple of 8.
1154 */
1155 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1156 sizeof(struct frag_hdr)) & ~7;
f83ef8c0 1157 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
87c48fa3 1158 ipv6_select_ident(&fhdr, rt);
e89e9cf5
AR
1159 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1160 __skb_queue_tail(&sk->sk_write_queue, skb);
1161
1162 return 0;
1163 }
1164 /* There is not enough support do UPD LSO,
1165 * so follow normal path
1166 */
1167 kfree_skb(skb);
1168
1169 return err;
1170}
1da177e4 1171
0178b695
HX
1172static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1173 gfp_t gfp)
1174{
1175 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1176}
1177
1178static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1179 gfp_t gfp)
1180{
1181 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1182}
1183
41a1f8ea
YH
1184int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1185 int offset, int len, int odd, struct sk_buff *skb),
1186 void *from, int length, int transhdrlen,
4c9483b2 1187 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
13b52cd4 1188 struct rt6_info *rt, unsigned int flags, int dontfrag)
1da177e4
LT
1189{
1190 struct inet_sock *inet = inet_sk(sk);
1191 struct ipv6_pinfo *np = inet6_sk(sk);
bdc712b4 1192 struct inet_cork *cork;
1da177e4
LT
1193 struct sk_buff *skb;
1194 unsigned int maxfraglen, fragheaderlen;
1195 int exthdrlen;
299b0767 1196 int dst_exthdrlen;
1da177e4
LT
1197 int hh_len;
1198 int mtu;
1199 int copy;
1200 int err;
1201 int offset = 0;
1202 int csummode = CHECKSUM_NONE;
a693e698 1203 __u8 tx_flags = 0;
1da177e4
LT
1204
1205 if (flags&MSG_PROBE)
1206 return 0;
bdc712b4 1207 cork = &inet->cork.base;
1da177e4
LT
1208 if (skb_queue_empty(&sk->sk_write_queue)) {
1209 /*
1210 * setup for corking
1211 */
1212 if (opt) {
0178b695 1213 if (WARN_ON(np->cork.opt))
1da177e4 1214 return -EINVAL;
0178b695
HX
1215
1216 np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation);
1217 if (unlikely(np->cork.opt == NULL))
1218 return -ENOBUFS;
1219
1220 np->cork.opt->tot_len = opt->tot_len;
1221 np->cork.opt->opt_flen = opt->opt_flen;
1222 np->cork.opt->opt_nflen = opt->opt_nflen;
1223
1224 np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1225 sk->sk_allocation);
1226 if (opt->dst0opt && !np->cork.opt->dst0opt)
1227 return -ENOBUFS;
1228
1229 np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1230 sk->sk_allocation);
1231 if (opt->dst1opt && !np->cork.opt->dst1opt)
1232 return -ENOBUFS;
1233
1234 np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1235 sk->sk_allocation);
1236 if (opt->hopopt && !np->cork.opt->hopopt)
1237 return -ENOBUFS;
1238
1239 np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1240 sk->sk_allocation);
1241 if (opt->srcrt && !np->cork.opt->srcrt)
1242 return -ENOBUFS;
1243
1da177e4
LT
1244 /* need source address above miyazawa*/
1245 }
d8d1f30b 1246 dst_hold(&rt->dst);
bdc712b4 1247 cork->dst = &rt->dst;
4c9483b2 1248 inet->cork.fl.u.ip6 = *fl6;
1da177e4 1249 np->cork.hop_limit = hlimit;
41a1f8ea 1250 np->cork.tclass = tclass;
628a5c56 1251 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
299b0767 1252 rt->dst.dev->mtu : dst_mtu(&rt->dst);
c7503609 1253 if (np->frag_size < mtu) {
d91675f9
YH
1254 if (np->frag_size)
1255 mtu = np->frag_size;
1256 }
bdc712b4 1257 cork->fragsize = mtu;
d8d1f30b 1258 if (dst_allfrag(rt->dst.path))
bdc712b4
DM
1259 cork->flags |= IPCORK_ALLFRAG;
1260 cork->length = 0;
1da177e4
LT
1261 sk->sk_sndmsg_page = NULL;
1262 sk->sk_sndmsg_off = 0;
299b0767 1263 exthdrlen = (opt ? opt->opt_flen : 0) - rt->rt6i_nfheader_len;
1da177e4
LT
1264 length += exthdrlen;
1265 transhdrlen += exthdrlen;
299b0767 1266 dst_exthdrlen = rt->dst.header_len;
1da177e4 1267 } else {
bdc712b4 1268 rt = (struct rt6_info *)cork->dst;
4c9483b2 1269 fl6 = &inet->cork.fl.u.ip6;
0178b695 1270 opt = np->cork.opt;
1da177e4
LT
1271 transhdrlen = 0;
1272 exthdrlen = 0;
299b0767 1273 dst_exthdrlen = 0;
bdc712b4 1274 mtu = cork->fragsize;
1da177e4
LT
1275 }
1276
d8d1f30b 1277 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1da177e4 1278
a1b05140 1279 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
b4ce9277 1280 (opt ? opt->opt_nflen : 0);
1da177e4
LT
1281 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1282
1283 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
bdc712b4 1284 if (cork->length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
4c9483b2 1285 ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen);
1da177e4
LT
1286 return -EMSGSIZE;
1287 }
1288 }
1289
a693e698
AB
1290 /* For UDP, check if TX timestamp is enabled */
1291 if (sk->sk_type == SOCK_DGRAM) {
1292 err = sock_tx_timestamp(sk, &tx_flags);
1293 if (err)
1294 goto error;
1295 }
1296
1da177e4
LT
1297 /*
1298 * Let's try using as much space as possible.
1299 * Use MTU if total length of the message fits into the MTU.
1300 * Otherwise, we need to reserve fragment header and
1301 * fragment alignment (= 8-15 octects, in total).
1302 *
1303 * Note that we may need to "move" the data from the tail of
1ab1457c 1304 * of the buffer to the new fragment when we split
1da177e4
LT
1305 * the message.
1306 *
1ab1457c 1307 * FIXME: It may be fragmented into multiple chunks
1da177e4
LT
1308 * at once if non-fragmentable extension headers
1309 * are too large.
1ab1457c 1310 * --yoshfuji
1da177e4
LT
1311 */
1312
bdc712b4 1313 cork->length += length;
4b340ae2
BH
1314 if (length > mtu) {
1315 int proto = sk->sk_protocol;
1316 if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){
4c9483b2 1317 ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen);
4b340ae2
BH
1318 return -EMSGSIZE;
1319 }
e89e9cf5 1320
4b340ae2 1321 if (proto == IPPROTO_UDP &&
d8d1f30b 1322 (rt->dst.dev->features & NETIF_F_UFO)) {
4b340ae2
BH
1323
1324 err = ip6_ufo_append_data(sk, getfrag, from, length,
1325 hh_len, fragheaderlen,
87c48fa3 1326 transhdrlen, mtu, flags, rt);
4b340ae2
BH
1327 if (err)
1328 goto error;
1329 return 0;
1330 }
e89e9cf5 1331 }
1da177e4
LT
1332
1333 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1334 goto alloc_new_skb;
1335
1336 while (length > 0) {
1337 /* Check if the remaining data fits into current packet. */
bdc712b4 1338 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1da177e4
LT
1339 if (copy < length)
1340 copy = maxfraglen - skb->len;
1341
1342 if (copy <= 0) {
1343 char *data;
1344 unsigned int datalen;
1345 unsigned int fraglen;
1346 unsigned int fraggap;
1347 unsigned int alloclen;
1348 struct sk_buff *skb_prev;
1349alloc_new_skb:
1350 skb_prev = skb;
1351
1352 /* There's no room in the current skb */
1353 if (skb_prev)
1354 fraggap = skb_prev->len - maxfraglen;
1355 else
1356 fraggap = 0;
1357
1358 /*
1359 * If remaining data exceeds the mtu,
1360 * we know we need more fragment(s).
1361 */
1362 datalen = length + fraggap;
bdc712b4 1363 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1da177e4
LT
1364 datalen = maxfraglen - fragheaderlen;
1365
1366 fraglen = datalen + fragheaderlen;
1367 if ((flags & MSG_MORE) &&
d8d1f30b 1368 !(rt->dst.dev->features&NETIF_F_SG))
1da177e4
LT
1369 alloclen = mtu;
1370 else
1371 alloclen = datalen + fragheaderlen;
1372
299b0767
SK
1373 alloclen += dst_exthdrlen;
1374
1da177e4
LT
1375 /*
1376 * The last fragment gets additional space at tail.
1377 * Note: we overallocate on fragments with MSG_MODE
1378 * because we have no idea if we're the last one.
1379 */
1380 if (datalen == length + fraggap)
d8d1f30b 1381 alloclen += rt->dst.trailer_len;
1da177e4
LT
1382
1383 /*
1384 * We just reserve space for fragment header.
1ab1457c 1385 * Note: this may be overallocation if the message
1da177e4
LT
1386 * (without MSG_MORE) fits into the MTU.
1387 */
1388 alloclen += sizeof(struct frag_hdr);
1389
1390 if (transhdrlen) {
1391 skb = sock_alloc_send_skb(sk,
1392 alloclen + hh_len,
1393 (flags & MSG_DONTWAIT), &err);
1394 } else {
1395 skb = NULL;
1396 if (atomic_read(&sk->sk_wmem_alloc) <=
1397 2 * sk->sk_sndbuf)
1398 skb = sock_wmalloc(sk,
1399 alloclen + hh_len, 1,
1400 sk->sk_allocation);
1401 if (unlikely(skb == NULL))
1402 err = -ENOBUFS;
a693e698
AB
1403 else {
1404 /* Only the initial fragment
1405 * is time stamped.
1406 */
1407 tx_flags = 0;
1408 }
1da177e4
LT
1409 }
1410 if (skb == NULL)
1411 goto error;
1412 /*
1413 * Fill in the control structures
1414 */
1415 skb->ip_summed = csummode;
1416 skb->csum = 0;
1f85851e
G
1417 /* reserve for fragmentation and ipsec header */
1418 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1419 dst_exthdrlen);
1da177e4 1420
a693e698
AB
1421 if (sk->sk_type == SOCK_DGRAM)
1422 skb_shinfo(skb)->tx_flags = tx_flags;
1423
1da177e4
LT
1424 /*
1425 * Find where to start putting bytes
1426 */
1f85851e
G
1427 data = skb_put(skb, fraglen);
1428 skb_set_network_header(skb, exthdrlen);
1429 data += fragheaderlen;
b0e380b1
ACM
1430 skb->transport_header = (skb->network_header +
1431 fragheaderlen);
1da177e4
LT
1432 if (fraggap) {
1433 skb->csum = skb_copy_and_csum_bits(
1434 skb_prev, maxfraglen,
1435 data + transhdrlen, fraggap, 0);
1436 skb_prev->csum = csum_sub(skb_prev->csum,
1437 skb->csum);
1438 data += fraggap;
e9fa4f7b 1439 pskb_trim_unique(skb_prev, maxfraglen);
1da177e4
LT
1440 }
1441 copy = datalen - transhdrlen - fraggap;
299b0767 1442
1da177e4
LT
1443 if (copy < 0) {
1444 err = -EINVAL;
1445 kfree_skb(skb);
1446 goto error;
1447 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1448 err = -EFAULT;
1449 kfree_skb(skb);
1450 goto error;
1451 }
1452
1453 offset += copy;
1454 length -= datalen - fraggap;
1455 transhdrlen = 0;
1456 exthdrlen = 0;
299b0767 1457 dst_exthdrlen = 0;
1da177e4
LT
1458 csummode = CHECKSUM_NONE;
1459
1460 /*
1461 * Put the packet on the pending queue
1462 */
1463 __skb_queue_tail(&sk->sk_write_queue, skb);
1464 continue;
1465 }
1466
1467 if (copy > length)
1468 copy = length;
1469
d8d1f30b 1470 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1da177e4
LT
1471 unsigned int off;
1472
1473 off = skb->len;
1474 if (getfrag(from, skb_put(skb, copy),
1475 offset, copy, off, skb) < 0) {
1476 __skb_trim(skb, off);
1477 err = -EFAULT;
1478 goto error;
1479 }
1480 } else {
1481 int i = skb_shinfo(skb)->nr_frags;
1482 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1483 struct page *page = sk->sk_sndmsg_page;
1484 int off = sk->sk_sndmsg_off;
1485 unsigned int left;
1486
1487 if (page && (left = PAGE_SIZE - off) > 0) {
1488 if (copy >= left)
1489 copy = left;
408dadf0 1490 if (page != skb_frag_page(frag)) {
1da177e4
LT
1491 if (i == MAX_SKB_FRAGS) {
1492 err = -EMSGSIZE;
1493 goto error;
1494 }
1da177e4 1495 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
408dadf0 1496 skb_frag_ref(skb, i);
1da177e4
LT
1497 frag = &skb_shinfo(skb)->frags[i];
1498 }
1499 } else if(i < MAX_SKB_FRAGS) {
1500 if (copy > PAGE_SIZE)
1501 copy = PAGE_SIZE;
1502 page = alloc_pages(sk->sk_allocation, 0);
1503 if (page == NULL) {
1504 err = -ENOMEM;
1505 goto error;
1506 }
1507 sk->sk_sndmsg_page = page;
1508 sk->sk_sndmsg_off = 0;
1509
1510 skb_fill_page_desc(skb, i, page, 0, 0);
1511 frag = &skb_shinfo(skb)->frags[i];
1da177e4
LT
1512 } else {
1513 err = -EMSGSIZE;
1514 goto error;
1515 }
9e903e08
ED
1516 if (getfrag(from,
1517 skb_frag_address(frag) + skb_frag_size(frag),
408dadf0 1518 offset, copy, skb->len, skb) < 0) {
1da177e4
LT
1519 err = -EFAULT;
1520 goto error;
1521 }
1522 sk->sk_sndmsg_off += copy;
9e903e08 1523 skb_frag_size_add(frag, copy);
1da177e4
LT
1524 skb->len += copy;
1525 skb->data_len += copy;
f945fa7a
HX
1526 skb->truesize += copy;
1527 atomic_add(copy, &sk->sk_wmem_alloc);
1da177e4
LT
1528 }
1529 offset += copy;
1530 length -= copy;
1531 }
1532 return 0;
1533error:
bdc712b4 1534 cork->length -= length;
3bd653c8 1535 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1536 return err;
1537}
a495f836 1538EXPORT_SYMBOL_GPL(ip6_append_data);
1da177e4 1539
bf138862
PE
1540static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1541{
0178b695
HX
1542 if (np->cork.opt) {
1543 kfree(np->cork.opt->dst0opt);
1544 kfree(np->cork.opt->dst1opt);
1545 kfree(np->cork.opt->hopopt);
1546 kfree(np->cork.opt->srcrt);
1547 kfree(np->cork.opt);
1548 np->cork.opt = NULL;
1549 }
1550
bdc712b4
DM
1551 if (inet->cork.base.dst) {
1552 dst_release(inet->cork.base.dst);
1553 inet->cork.base.dst = NULL;
1554 inet->cork.base.flags &= ~IPCORK_ALLFRAG;
bf138862
PE
1555 }
1556 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1557}
1558
1da177e4
LT
1559int ip6_push_pending_frames(struct sock *sk)
1560{
1561 struct sk_buff *skb, *tmp_skb;
1562 struct sk_buff **tail_skb;
1563 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1564 struct inet_sock *inet = inet_sk(sk);
1565 struct ipv6_pinfo *np = inet6_sk(sk);
3bd653c8 1566 struct net *net = sock_net(sk);
1da177e4
LT
1567 struct ipv6hdr *hdr;
1568 struct ipv6_txoptions *opt = np->cork.opt;
bdc712b4 1569 struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst;
4c9483b2
DM
1570 struct flowi6 *fl6 = &inet->cork.fl.u.ip6;
1571 unsigned char proto = fl6->flowi6_proto;
1da177e4
LT
1572 int err = 0;
1573
1574 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1575 goto out;
1576 tail_skb = &(skb_shinfo(skb)->frag_list);
1577
1578 /* move skb->data to ip header from ext header */
d56f90a7 1579 if (skb->data < skb_network_header(skb))
bbe735e4 1580 __skb_pull(skb, skb_network_offset(skb));
1da177e4 1581 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
cfe1fc77 1582 __skb_pull(tmp_skb, skb_network_header_len(skb));
1da177e4
LT
1583 *tail_skb = tmp_skb;
1584 tail_skb = &(tmp_skb->next);
1585 skb->len += tmp_skb->len;
1586 skb->data_len += tmp_skb->len;
1da177e4 1587 skb->truesize += tmp_skb->truesize;
1da177e4
LT
1588 tmp_skb->destructor = NULL;
1589 tmp_skb->sk = NULL;
1da177e4
LT
1590 }
1591
28a89453 1592 /* Allow local fragmentation. */
b5c15fc0 1593 if (np->pmtudisc < IPV6_PMTUDISC_DO)
28a89453
HX
1594 skb->local_df = 1;
1595
4e3fd7a0 1596 *final_dst = fl6->daddr;
cfe1fc77 1597 __skb_pull(skb, skb_network_header_len(skb));
1da177e4
LT
1598 if (opt && opt->opt_flen)
1599 ipv6_push_frag_opts(skb, opt, &proto);
1600 if (opt && opt->opt_nflen)
1601 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1602
e2d1bca7
ACM
1603 skb_push(skb, sizeof(struct ipv6hdr));
1604 skb_reset_network_header(skb);
0660e03f 1605 hdr = ipv6_hdr(skb);
1ab1457c 1606
4c9483b2 1607 *(__be32*)hdr = fl6->flowlabel |
41a1f8ea 1608 htonl(0x60000000 | ((int)np->cork.tclass << 20));
1da177e4 1609
1da177e4
LT
1610 hdr->hop_limit = np->cork.hop_limit;
1611 hdr->nexthdr = proto;
4e3fd7a0
AD
1612 hdr->saddr = fl6->saddr;
1613 hdr->daddr = *final_dst;
1da177e4 1614
a2c2064f 1615 skb->priority = sk->sk_priority;
4a19ec58 1616 skb->mark = sk->sk_mark;
a2c2064f 1617
d8d1f30b 1618 skb_dst_set(skb, dst_clone(&rt->dst));
edf391ff 1619 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
14878f75 1620 if (proto == IPPROTO_ICMPV6) {
adf30907 1621 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
14878f75 1622
5a57d4c7 1623 ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
e41b5368 1624 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
14878f75
DS
1625 }
1626
ef76bc23 1627 err = ip6_local_out(skb);
1da177e4
LT
1628 if (err) {
1629 if (err > 0)
6ce9e7b5 1630 err = net_xmit_errno(err);
1da177e4
LT
1631 if (err)
1632 goto error;
1633 }
1634
1635out:
bf138862 1636 ip6_cork_release(inet, np);
1da177e4
LT
1637 return err;
1638error:
06254914 1639 IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1640 goto out;
1641}
a495f836 1642EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1da177e4
LT
1643
1644void ip6_flush_pending_frames(struct sock *sk)
1645{
1da177e4
LT
1646 struct sk_buff *skb;
1647
1648 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
adf30907
ED
1649 if (skb_dst(skb))
1650 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
e1f52208 1651 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1652 kfree_skb(skb);
1653 }
1654
bf138862 1655 ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1da177e4 1656}
a495f836 1657EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);