]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - net/ipv6/ip6_output.c
ipv6: unshare inetpeers
[mirror_ubuntu-bionic-kernel.git] / net / ipv6 / ip6_output.c
CommitLineData
1da177e4
LT
1/*
2 * IPv6 output functions
1ab1457c 3 * Linux INET6 implementation
1da177e4
LT
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * Based on linux/net/ipv4/ip_output.c
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 *
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
21 *
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
27 */
28
1da177e4 29#include <linux/errno.h>
ef76bc23 30#include <linux/kernel.h>
1da177e4
LT
31#include <linux/string.h>
32#include <linux/socket.h>
33#include <linux/net.h>
34#include <linux/netdevice.h>
35#include <linux/if_arp.h>
36#include <linux/in6.h>
37#include <linux/tcp.h>
38#include <linux/route.h>
b59f45d0 39#include <linux/module.h>
5a0e3ad6 40#include <linux/slab.h>
1da177e4
LT
41
42#include <linux/netfilter.h>
43#include <linux/netfilter_ipv6.h>
44
45#include <net/sock.h>
46#include <net/snmp.h>
47
48#include <net/ipv6.h>
49#include <net/ndisc.h>
50#include <net/protocol.h>
51#include <net/ip6_route.h>
52#include <net/addrconf.h>
53#include <net/rawv6.h>
54#include <net/icmp.h>
55#include <net/xfrm.h>
56#include <net/checksum.h>
7bc570c8 57#include <linux/mroute6.h>
1da177e4 58
ad0081e4 59int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
1da177e4 60
ef76bc23
HX
61int __ip6_local_out(struct sk_buff *skb)
62{
63 int len;
64
65 len = skb->len - sizeof(struct ipv6hdr);
66 if (len > IPV6_MAXPLEN)
67 len = 0;
68 ipv6_hdr(skb)->payload_len = htons(len);
69
b2e0b385
JE
70 return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
71 skb_dst(skb)->dev, dst_output);
ef76bc23
HX
72}
73
74int ip6_local_out(struct sk_buff *skb)
75{
76 int err;
77
78 err = __ip6_local_out(skb);
79 if (likely(err == 1))
80 err = dst_output(skb);
81
82 return err;
83}
84EXPORT_SYMBOL_GPL(ip6_local_out);
85
1da177e4
LT
86/* dev_loopback_xmit for use with netfilter. */
87static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
88{
459a98ed 89 skb_reset_mac_header(newskb);
bbe735e4 90 __skb_pull(newskb, skb_network_offset(newskb));
1da177e4
LT
91 newskb->pkt_type = PACKET_LOOPBACK;
92 newskb->ip_summed = CHECKSUM_UNNECESSARY;
adf30907 93 WARN_ON(!skb_dst(newskb));
1da177e4 94
e30b38c2 95 netif_rx_ni(newskb);
1da177e4
LT
96 return 0;
97}
98
9e508490 99static int ip6_finish_output2(struct sk_buff *skb)
1da177e4 100{
adf30907 101 struct dst_entry *dst = skb_dst(skb);
1da177e4 102 struct net_device *dev = dst->dev;
f6b72b62 103 struct neighbour *neigh;
1da177e4
LT
104
105 skb->protocol = htons(ETH_P_IPV6);
106 skb->dev = dev;
107
0660e03f 108 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
adf30907 109 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1da177e4 110
7ad6848c 111 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
d1db275d 112 ((mroute6_socket(dev_net(dev), skb) &&
bd91b8bf 113 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
7bc570c8
YH
114 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
115 &ipv6_hdr(skb)->saddr))) {
1da177e4
LT
116 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
117
118 /* Do not check for IFF_ALLMULTI; multicast routing
119 is not supported in any case.
120 */
121 if (newskb)
b2e0b385
JE
122 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
123 newskb, NULL, newskb->dev,
1da177e4
LT
124 ip6_dev_loopback_xmit);
125
0660e03f 126 if (ipv6_hdr(skb)->hop_limit == 0) {
3bd653c8
DL
127 IP6_INC_STATS(dev_net(dev), idev,
128 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
129 kfree_skb(skb);
130 return 0;
131 }
132 }
133
edf391ff
NH
134 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
135 skb->len);
1da177e4
LT
136 }
137
69cce1d1 138 neigh = dst_get_neighbour(dst);
05e3aa09
DM
139 if (neigh)
140 return neigh_output(neigh, skb);
141
9e508490
JE
142 IP6_INC_STATS_BH(dev_net(dst->dev),
143 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
144 kfree_skb(skb);
145 return -EINVAL;
1da177e4
LT
146}
147
9e508490
JE
148static int ip6_finish_output(struct sk_buff *skb)
149{
150 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
151 dst_allfrag(skb_dst(skb)))
152 return ip6_fragment(skb, ip6_finish_output2);
153 else
154 return ip6_finish_output2(skb);
155}
156
1da177e4
LT
157int ip6_output(struct sk_buff *skb)
158{
9e508490 159 struct net_device *dev = skb_dst(skb)->dev;
adf30907 160 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
778d80be 161 if (unlikely(idev->cnf.disable_ipv6)) {
9e508490 162 IP6_INC_STATS(dev_net(dev), idev,
3bd653c8 163 IPSTATS_MIB_OUTDISCARDS);
778d80be
YH
164 kfree_skb(skb);
165 return 0;
166 }
167
9c6eb28a
JE
168 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
169 ip6_finish_output,
170 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
1da177e4
LT
171}
172
1da177e4 173/*
b5d43998 174 * xmit an sk_buff (used by TCP, SCTP and DCCP)
1da177e4
LT
175 */
176
4c9483b2 177int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
4e15ed4d 178 struct ipv6_txoptions *opt)
1da177e4 179{
3bd653c8 180 struct net *net = sock_net(sk);
b30bd282 181 struct ipv6_pinfo *np = inet6_sk(sk);
4c9483b2 182 struct in6_addr *first_hop = &fl6->daddr;
adf30907 183 struct dst_entry *dst = skb_dst(skb);
1da177e4 184 struct ipv6hdr *hdr;
4c9483b2 185 u8 proto = fl6->flowi6_proto;
1da177e4 186 int seg_len = skb->len;
e651f03a
GR
187 int hlimit = -1;
188 int tclass = 0;
1da177e4
LT
189 u32 mtu;
190
191 if (opt) {
c2636b4d 192 unsigned int head_room;
1da177e4
LT
193
194 /* First: exthdrs may take lots of space (~8K for now)
195 MAX_HEADER is not enough.
196 */
197 head_room = opt->opt_nflen + opt->opt_flen;
198 seg_len += head_room;
199 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
200
201 if (skb_headroom(skb) < head_room) {
202 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
a11d206d 203 if (skb2 == NULL) {
adf30907 204 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d
YH
205 IPSTATS_MIB_OUTDISCARDS);
206 kfree_skb(skb);
1da177e4
LT
207 return -ENOBUFS;
208 }
a11d206d
YH
209 kfree_skb(skb);
210 skb = skb2;
83d7eb29 211 skb_set_owner_w(skb, sk);
1da177e4
LT
212 }
213 if (opt->opt_flen)
214 ipv6_push_frag_opts(skb, opt, &proto);
215 if (opt->opt_nflen)
216 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
217 }
218
e2d1bca7
ACM
219 skb_push(skb, sizeof(struct ipv6hdr));
220 skb_reset_network_header(skb);
0660e03f 221 hdr = ipv6_hdr(skb);
1da177e4
LT
222
223 /*
224 * Fill in the IPv6 header
225 */
e651f03a
GR
226 if (np) {
227 tclass = np->tclass;
1da177e4 228 hlimit = np->hop_limit;
e651f03a 229 }
1da177e4 230 if (hlimit < 0)
6b75d090 231 hlimit = ip6_dst_hoplimit(dst);
1da177e4 232
4c9483b2 233 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl6->flowlabel;
41a1f8ea 234
1da177e4
LT
235 hdr->payload_len = htons(seg_len);
236 hdr->nexthdr = proto;
237 hdr->hop_limit = hlimit;
238
4c9483b2 239 ipv6_addr_copy(&hdr->saddr, &fl6->saddr);
1da177e4
LT
240 ipv6_addr_copy(&hdr->daddr, first_hop);
241
a2c2064f 242 skb->priority = sk->sk_priority;
4a19ec58 243 skb->mark = sk->sk_mark;
a2c2064f 244
1da177e4 245 mtu = dst_mtu(dst);
283d07ac 246 if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
adf30907 247 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
edf391ff 248 IPSTATS_MIB_OUT, skb->len);
b2e0b385
JE
249 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
250 dst->dev, dst_output);
1da177e4
LT
251 }
252
253 if (net_ratelimit())
254 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
255 skb->dev = dst->dev;
3ffe533c 256 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
adf30907 257 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
258 kfree_skb(skb);
259 return -EMSGSIZE;
260}
261
7159039a
YH
262EXPORT_SYMBOL(ip6_xmit);
263
1da177e4
LT
264/*
265 * To avoid extra problems ND packets are send through this
266 * routine. It's code duplication but I really want to avoid
267 * extra checks since ipv6_build_header is used by TCP (which
268 * is for us performance critical)
269 */
270
271int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
9acd9f3a 272 const struct in6_addr *saddr, const struct in6_addr *daddr,
1da177e4
LT
273 int proto, int len)
274{
275 struct ipv6_pinfo *np = inet6_sk(sk);
276 struct ipv6hdr *hdr;
1da177e4
LT
277
278 skb->protocol = htons(ETH_P_IPV6);
279 skb->dev = dev;
280
55f79cc0
ACM
281 skb_reset_network_header(skb);
282 skb_put(skb, sizeof(struct ipv6hdr));
0660e03f 283 hdr = ipv6_hdr(skb);
1da177e4 284
ae08e1f0 285 *(__be32*)hdr = htonl(0x60000000);
1da177e4
LT
286
287 hdr->payload_len = htons(len);
288 hdr->nexthdr = proto;
289 hdr->hop_limit = np->hop_limit;
290
291 ipv6_addr_copy(&hdr->saddr, saddr);
292 ipv6_addr_copy(&hdr->daddr, daddr);
293
294 return 0;
295}
296
297static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
298{
299 struct ip6_ra_chain *ra;
300 struct sock *last = NULL;
301
302 read_lock(&ip6_ra_lock);
303 for (ra = ip6_ra_chain; ra; ra = ra->next) {
304 struct sock *sk = ra->sk;
0bd1b59b
AM
305 if (sk && ra->sel == sel &&
306 (!sk->sk_bound_dev_if ||
307 sk->sk_bound_dev_if == skb->dev->ifindex)) {
1da177e4
LT
308 if (last) {
309 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
310 if (skb2)
311 rawv6_rcv(last, skb2);
312 }
313 last = sk;
314 }
315 }
316
317 if (last) {
318 rawv6_rcv(last, skb);
319 read_unlock(&ip6_ra_lock);
320 return 1;
321 }
322 read_unlock(&ip6_ra_lock);
323 return 0;
324}
325
e21e0b5f
VN
326static int ip6_forward_proxy_check(struct sk_buff *skb)
327{
0660e03f 328 struct ipv6hdr *hdr = ipv6_hdr(skb);
e21e0b5f
VN
329 u8 nexthdr = hdr->nexthdr;
330 int offset;
331
332 if (ipv6_ext_hdr(nexthdr)) {
333 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
334 if (offset < 0)
335 return 0;
336 } else
337 offset = sizeof(struct ipv6hdr);
338
339 if (nexthdr == IPPROTO_ICMPV6) {
340 struct icmp6hdr *icmp6;
341
d56f90a7
ACM
342 if (!pskb_may_pull(skb, (skb_network_header(skb) +
343 offset + 1 - skb->data)))
e21e0b5f
VN
344 return 0;
345
d56f90a7 346 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
e21e0b5f
VN
347
348 switch (icmp6->icmp6_type) {
349 case NDISC_ROUTER_SOLICITATION:
350 case NDISC_ROUTER_ADVERTISEMENT:
351 case NDISC_NEIGHBOUR_SOLICITATION:
352 case NDISC_NEIGHBOUR_ADVERTISEMENT:
353 case NDISC_REDIRECT:
354 /* For reaction involving unicast neighbor discovery
355 * message destined to the proxied address, pass it to
356 * input function.
357 */
358 return 1;
359 default:
360 break;
361 }
362 }
363
74553b09
VN
364 /*
365 * The proxying router can't forward traffic sent to a link-local
366 * address, so signal the sender and discard the packet. This
367 * behavior is clarified by the MIPv6 specification.
368 */
369 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
370 dst_link_failure(skb);
371 return -1;
372 }
373
e21e0b5f
VN
374 return 0;
375}
376
1da177e4
LT
377static inline int ip6_forward_finish(struct sk_buff *skb)
378{
379 return dst_output(skb);
380}
381
382int ip6_forward(struct sk_buff *skb)
383{
adf30907 384 struct dst_entry *dst = skb_dst(skb);
0660e03f 385 struct ipv6hdr *hdr = ipv6_hdr(skb);
1da177e4 386 struct inet6_skb_parm *opt = IP6CB(skb);
c346dca1 387 struct net *net = dev_net(dst->dev);
69cce1d1 388 struct neighbour *n;
14f3ad6f 389 u32 mtu;
1ab1457c 390
53b7997f 391 if (net->ipv6.devconf_all->forwarding == 0)
1da177e4
LT
392 goto error;
393
4497b076
BH
394 if (skb_warn_if_lro(skb))
395 goto drop;
396
1da177e4 397 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
3bd653c8 398 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
1da177e4
LT
399 goto drop;
400 }
401
72b43d08
AK
402 if (skb->pkt_type != PACKET_HOST)
403 goto drop;
404
35fc92a9 405 skb_forward_csum(skb);
1da177e4
LT
406
407 /*
408 * We DO NOT make any processing on
409 * RA packets, pushing them to user level AS IS
410 * without ane WARRANTY that application will be able
411 * to interpret them. The reason is that we
412 * cannot make anything clever here.
413 *
414 * We are not end-node, so that if packet contains
415 * AH/ESP, we cannot make anything.
416 * Defragmentation also would be mistake, RA packets
417 * cannot be fragmented, because there is no warranty
418 * that different fragments will go along one path. --ANK
419 */
420 if (opt->ra) {
d56f90a7 421 u8 *ptr = skb_network_header(skb) + opt->ra;
1da177e4
LT
422 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
423 return 0;
424 }
425
426 /*
427 * check and decrement ttl
428 */
429 if (hdr->hop_limit <= 1) {
430 /* Force OUTPUT device used as source address */
431 skb->dev = dst->dev;
3ffe533c 432 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
483a47d2
DL
433 IP6_INC_STATS_BH(net,
434 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
1da177e4
LT
435
436 kfree_skb(skb);
437 return -ETIMEDOUT;
438 }
439
fbea49e1 440 /* XXX: idev->cnf.proxy_ndp? */
53b7997f 441 if (net->ipv6.devconf_all->proxy_ndp &&
8a3edd80 442 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
74553b09
VN
443 int proxied = ip6_forward_proxy_check(skb);
444 if (proxied > 0)
e21e0b5f 445 return ip6_input(skb);
74553b09 446 else if (proxied < 0) {
3bd653c8
DL
447 IP6_INC_STATS(net, ip6_dst_idev(dst),
448 IPSTATS_MIB_INDISCARDS);
74553b09
VN
449 goto drop;
450 }
e21e0b5f
VN
451 }
452
1da177e4 453 if (!xfrm6_route_forward(skb)) {
3bd653c8 454 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
1da177e4
LT
455 goto drop;
456 }
adf30907 457 dst = skb_dst(skb);
1da177e4
LT
458
459 /* IPv6 specs say nothing about it, but it is clear that we cannot
460 send redirects to source routed frames.
1e5dc146 461 We don't send redirects to frames decapsulated from IPsec.
1da177e4 462 */
69cce1d1
DM
463 n = dst_get_neighbour(dst);
464 if (skb->dev == dst->dev && n && opt->srcrt == 0 && !skb_sec_path(skb)) {
1da177e4
LT
465 struct in6_addr *target = NULL;
466 struct rt6_info *rt;
1da177e4
LT
467
468 /*
469 * incoming and outgoing devices are the same
470 * send a redirect.
471 */
472
473 rt = (struct rt6_info *) dst;
474 if ((rt->rt6i_flags & RTF_GATEWAY))
475 target = (struct in6_addr*)&n->primary_key;
476 else
477 target = &hdr->daddr;
478
92d86829
DM
479 if (!rt->rt6i_peer)
480 rt6_bind_peer(rt, 1);
481
1da177e4
LT
482 /* Limit redirects both by destination (here)
483 and by source (inside ndisc_send_redirect)
484 */
92d86829 485 if (inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ))
1da177e4 486 ndisc_send_redirect(skb, n, target);
5bb1ab09
DS
487 } else {
488 int addrtype = ipv6_addr_type(&hdr->saddr);
489
1da177e4 490 /* This check is security critical. */
f81b2e7d
YH
491 if (addrtype == IPV6_ADDR_ANY ||
492 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
5bb1ab09
DS
493 goto error;
494 if (addrtype & IPV6_ADDR_LINKLOCAL) {
495 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
3ffe533c 496 ICMPV6_NOT_NEIGHBOUR, 0);
5bb1ab09
DS
497 goto error;
498 }
1da177e4
LT
499 }
500
14f3ad6f
UW
501 mtu = dst_mtu(dst);
502 if (mtu < IPV6_MIN_MTU)
503 mtu = IPV6_MIN_MTU;
504
0aa68271 505 if (skb->len > mtu && !skb_is_gso(skb)) {
1da177e4
LT
506 /* Again, force OUTPUT device used as source address */
507 skb->dev = dst->dev;
14f3ad6f 508 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
483a47d2
DL
509 IP6_INC_STATS_BH(net,
510 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
511 IP6_INC_STATS_BH(net,
512 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
513 kfree_skb(skb);
514 return -EMSGSIZE;
515 }
516
517 if (skb_cow(skb, dst->dev->hard_header_len)) {
3bd653c8 518 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
519 goto drop;
520 }
521
0660e03f 522 hdr = ipv6_hdr(skb);
1da177e4
LT
523
524 /* Mangling hops number delayed to point after skb COW */
1ab1457c 525
1da177e4
LT
526 hdr->hop_limit--;
527
483a47d2 528 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
b2e0b385 529 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
6e23ae2a 530 ip6_forward_finish);
1da177e4
LT
531
532error:
483a47d2 533 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
1da177e4
LT
534drop:
535 kfree_skb(skb);
536 return -EINVAL;
537}
538
539static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
540{
541 to->pkt_type = from->pkt_type;
542 to->priority = from->priority;
543 to->protocol = from->protocol;
adf30907
ED
544 skb_dst_drop(to);
545 skb_dst_set(to, dst_clone(skb_dst(from)));
1da177e4 546 to->dev = from->dev;
82e91ffe 547 to->mark = from->mark;
1da177e4
LT
548
549#ifdef CONFIG_NET_SCHED
550 to->tc_index = from->tc_index;
551#endif
e7ac05f3 552 nf_copy(to, from);
ba9dda3a
JK
553#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
554 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
555 to->nf_trace = from->nf_trace;
556#endif
984bc16c 557 skb_copy_secmark(to, from);
1da177e4
LT
558}
559
560int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
561{
562 u16 offset = sizeof(struct ipv6hdr);
0660e03f
ACM
563 struct ipv6_opt_hdr *exthdr =
564 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
27a884dc 565 unsigned int packet_len = skb->tail - skb->network_header;
1da177e4 566 int found_rhdr = 0;
0660e03f 567 *nexthdr = &ipv6_hdr(skb)->nexthdr;
1da177e4
LT
568
569 while (offset + 1 <= packet_len) {
570
571 switch (**nexthdr) {
572
573 case NEXTHDR_HOP:
27637df9 574 break;
1da177e4 575 case NEXTHDR_ROUTING:
27637df9
MN
576 found_rhdr = 1;
577 break;
1da177e4 578 case NEXTHDR_DEST:
59fbb3a6 579#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
27637df9
MN
580 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
581 break;
582#endif
583 if (found_rhdr)
584 return offset;
1da177e4
LT
585 break;
586 default :
587 return offset;
588 }
27637df9
MN
589
590 offset += ipv6_optlen(exthdr);
591 *nexthdr = &exthdr->nexthdr;
d56f90a7
ACM
592 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
593 offset);
1da177e4
LT
594 }
595
596 return offset;
597}
598
ad0081e4 599int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
1da177e4 600{
1da177e4 601 struct sk_buff *frag;
adf30907 602 struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
d91675f9 603 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
1da177e4
LT
604 struct ipv6hdr *tmp_hdr;
605 struct frag_hdr *fh;
606 unsigned int mtu, hlen, left, len;
ae08e1f0 607 __be32 frag_id = 0;
1da177e4
LT
608 int ptr, offset = 0, err=0;
609 u8 *prevhdr, nexthdr = 0;
adf30907 610 struct net *net = dev_net(skb_dst(skb)->dev);
1da177e4 611
1da177e4
LT
612 hlen = ip6_find_1stfragopt(skb, &prevhdr);
613 nexthdr = *prevhdr;
614
628a5c56 615 mtu = ip6_skb_dst_mtu(skb);
b881ef76
JH
616
617 /* We must not fragment if the socket is set to force MTU discovery
14f3ad6f 618 * or if the skb it not generated by a local socket.
b881ef76 619 */
f2228f78 620 if (!skb->local_df && skb->len > mtu) {
adf30907 621 skb->dev = skb_dst(skb)->dev;
3ffe533c 622 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
adf30907 623 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 624 IPSTATS_MIB_FRAGFAILS);
b881ef76
JH
625 kfree_skb(skb);
626 return -EMSGSIZE;
627 }
628
d91675f9
YH
629 if (np && np->frag_size < mtu) {
630 if (np->frag_size)
631 mtu = np->frag_size;
632 }
633 mtu -= hlen + sizeof(struct frag_hdr);
1da177e4 634
21dc3301 635 if (skb_has_frag_list(skb)) {
1da177e4 636 int first_len = skb_pagelen(skb);
3d13008e 637 struct sk_buff *frag2;
1da177e4
LT
638
639 if (first_len - hlen > mtu ||
640 ((first_len - hlen) & 7) ||
641 skb_cloned(skb))
642 goto slow_path;
643
4d9092bb 644 skb_walk_frags(skb, frag) {
1da177e4
LT
645 /* Correct geometry. */
646 if (frag->len > mtu ||
647 ((frag->len & 7) && frag->next) ||
648 skb_headroom(frag) < hlen)
3d13008e 649 goto slow_path_clean;
1da177e4 650
1da177e4
LT
651 /* Partially cloned skb? */
652 if (skb_shared(frag))
3d13008e 653 goto slow_path_clean;
2fdba6b0
HX
654
655 BUG_ON(frag->sk);
656 if (skb->sk) {
2fdba6b0
HX
657 frag->sk = skb->sk;
658 frag->destructor = sock_wfree;
2fdba6b0 659 }
3d13008e 660 skb->truesize -= frag->truesize;
1da177e4
LT
661 }
662
663 err = 0;
664 offset = 0;
665 frag = skb_shinfo(skb)->frag_list;
4d9092bb 666 skb_frag_list_init(skb);
1da177e4
LT
667 /* BUILD HEADER */
668
9a217a1c 669 *prevhdr = NEXTHDR_FRAGMENT;
d56f90a7 670 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
1da177e4 671 if (!tmp_hdr) {
adf30907 672 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 673 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
674 return -ENOMEM;
675 }
676
1da177e4
LT
677 __skb_pull(skb, hlen);
678 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
e2d1bca7
ACM
679 __skb_push(skb, hlen);
680 skb_reset_network_header(skb);
d56f90a7 681 memcpy(skb_network_header(skb), tmp_hdr, hlen);
1da177e4 682
7ea2f2c5 683 ipv6_select_ident(fh);
1da177e4
LT
684 fh->nexthdr = nexthdr;
685 fh->reserved = 0;
686 fh->frag_off = htons(IP6_MF);
687 frag_id = fh->identification;
688
689 first_len = skb_pagelen(skb);
690 skb->data_len = first_len - skb_headlen(skb);
691 skb->len = first_len;
0660e03f
ACM
692 ipv6_hdr(skb)->payload_len = htons(first_len -
693 sizeof(struct ipv6hdr));
a11d206d 694
d8d1f30b 695 dst_hold(&rt->dst);
1da177e4
LT
696
697 for (;;) {
698 /* Prepare header of the next frame,
699 * before previous one went down. */
700 if (frag) {
701 frag->ip_summed = CHECKSUM_NONE;
badff6d0 702 skb_reset_transport_header(frag);
1da177e4 703 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
e2d1bca7
ACM
704 __skb_push(frag, hlen);
705 skb_reset_network_header(frag);
d56f90a7
ACM
706 memcpy(skb_network_header(frag), tmp_hdr,
707 hlen);
1da177e4
LT
708 offset += skb->len - hlen - sizeof(struct frag_hdr);
709 fh->nexthdr = nexthdr;
710 fh->reserved = 0;
711 fh->frag_off = htons(offset);
712 if (frag->next != NULL)
713 fh->frag_off |= htons(IP6_MF);
714 fh->identification = frag_id;
0660e03f
ACM
715 ipv6_hdr(frag)->payload_len =
716 htons(frag->len -
717 sizeof(struct ipv6hdr));
1da177e4
LT
718 ip6_copy_metadata(frag, skb);
719 }
1ab1457c 720
1da177e4 721 err = output(skb);
dafee490 722 if(!err)
d8d1f30b 723 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 724 IPSTATS_MIB_FRAGCREATES);
dafee490 725
1da177e4
LT
726 if (err || !frag)
727 break;
728
729 skb = frag;
730 frag = skb->next;
731 skb->next = NULL;
732 }
733
a51482bd 734 kfree(tmp_hdr);
1da177e4
LT
735
736 if (err == 0) {
d8d1f30b 737 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 738 IPSTATS_MIB_FRAGOKS);
d8d1f30b 739 dst_release(&rt->dst);
1da177e4
LT
740 return 0;
741 }
742
743 while (frag) {
744 skb = frag->next;
745 kfree_skb(frag);
746 frag = skb;
747 }
748
d8d1f30b 749 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 750 IPSTATS_MIB_FRAGFAILS);
d8d1f30b 751 dst_release(&rt->dst);
1da177e4 752 return err;
3d13008e
ED
753
754slow_path_clean:
755 skb_walk_frags(skb, frag2) {
756 if (frag2 == frag)
757 break;
758 frag2->sk = NULL;
759 frag2->destructor = NULL;
760 skb->truesize += frag2->truesize;
761 }
1da177e4
LT
762 }
763
764slow_path:
765 left = skb->len - hlen; /* Space per frame */
766 ptr = hlen; /* Where to start from */
767
768 /*
769 * Fragment the datagram.
770 */
771
772 *prevhdr = NEXTHDR_FRAGMENT;
773
774 /*
775 * Keep copying data until we run out.
776 */
777 while(left > 0) {
778 len = left;
779 /* IF: it doesn't fit, use 'mtu' - the data space left */
780 if (len > mtu)
781 len = mtu;
25985edc 782 /* IF: we are not sending up to and including the packet end
1da177e4
LT
783 then align the next start on an eight byte boundary */
784 if (len < left) {
785 len &= ~7;
786 }
787 /*
788 * Allocate buffer.
789 */
790
d8d1f30b 791 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->dst.dev), GFP_ATOMIC)) == NULL) {
64ce2073 792 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
adf30907 793 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 794 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
795 err = -ENOMEM;
796 goto fail;
797 }
798
799 /*
800 * Set up data on packet
801 */
802
803 ip6_copy_metadata(frag, skb);
d8d1f30b 804 skb_reserve(frag, LL_RESERVED_SPACE(rt->dst.dev));
1da177e4 805 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
c1d2bbe1 806 skb_reset_network_header(frag);
badff6d0 807 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
b0e380b1
ACM
808 frag->transport_header = (frag->network_header + hlen +
809 sizeof(struct frag_hdr));
1da177e4
LT
810
811 /*
812 * Charge the memory for the fragment to any owner
813 * it might possess
814 */
815 if (skb->sk)
816 skb_set_owner_w(frag, skb->sk);
817
818 /*
819 * Copy the packet header into the new buffer.
820 */
d626f62b 821 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
1da177e4
LT
822
823 /*
824 * Build fragment header.
825 */
826 fh->nexthdr = nexthdr;
827 fh->reserved = 0;
f36d6ab1 828 if (!frag_id) {
7ea2f2c5 829 ipv6_select_ident(fh);
1da177e4
LT
830 frag_id = fh->identification;
831 } else
832 fh->identification = frag_id;
833
834 /*
835 * Copy a block of the IP datagram.
836 */
8984e41d 837 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
1da177e4
LT
838 BUG();
839 left -= len;
840
841 fh->frag_off = htons(offset);
842 if (left > 0)
843 fh->frag_off |= htons(IP6_MF);
0660e03f
ACM
844 ipv6_hdr(frag)->payload_len = htons(frag->len -
845 sizeof(struct ipv6hdr));
1da177e4
LT
846
847 ptr += len;
848 offset += len;
849
850 /*
851 * Put this fragment into the sending queue.
852 */
1da177e4
LT
853 err = output(frag);
854 if (err)
855 goto fail;
dafee490 856
adf30907 857 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 858 IPSTATS_MIB_FRAGCREATES);
1da177e4 859 }
adf30907 860 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 861 IPSTATS_MIB_FRAGOKS);
1da177e4 862 kfree_skb(skb);
1da177e4
LT
863 return err;
864
865fail:
adf30907 866 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 867 IPSTATS_MIB_FRAGFAILS);
1ab1457c 868 kfree_skb(skb);
1da177e4
LT
869 return err;
870}
871
b71d1d42
ED
872static inline int ip6_rt_check(const struct rt6key *rt_key,
873 const struct in6_addr *fl_addr,
874 const struct in6_addr *addr_cache)
cf6b1982 875{
a02cec21
ED
876 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
877 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache));
cf6b1982
YH
878}
879
497c615a
HX
880static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
881 struct dst_entry *dst,
b71d1d42 882 const struct flowi6 *fl6)
1da177e4 883{
497c615a
HX
884 struct ipv6_pinfo *np = inet6_sk(sk);
885 struct rt6_info *rt = (struct rt6_info *)dst;
1da177e4 886
497c615a
HX
887 if (!dst)
888 goto out;
889
890 /* Yes, checking route validity in not connected
891 * case is not very simple. Take into account,
892 * that we do not support routing by source, TOS,
893 * and MSG_DONTROUTE --ANK (980726)
894 *
cf6b1982
YH
895 * 1. ip6_rt_check(): If route was host route,
896 * check that cached destination is current.
497c615a
HX
897 * If it is network route, we still may
898 * check its validity using saved pointer
899 * to the last used address: daddr_cache.
900 * We do not want to save whole address now,
901 * (because main consumer of this service
902 * is tcp, which has not this problem),
903 * so that the last trick works only on connected
904 * sockets.
905 * 2. oif also should be the same.
906 */
4c9483b2 907 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
8e1ef0a9 908#ifdef CONFIG_IPV6_SUBTREES
4c9483b2 909 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
8e1ef0a9 910#endif
4c9483b2 911 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
497c615a
HX
912 dst_release(dst);
913 dst = NULL;
1da177e4
LT
914 }
915
497c615a
HX
916out:
917 return dst;
918}
919
920static int ip6_dst_lookup_tail(struct sock *sk,
4c9483b2 921 struct dst_entry **dst, struct flowi6 *fl6)
497c615a 922{
3b1e0a65 923 struct net *net = sock_net(sk);
69cce1d1
DM
924#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
925 struct neighbour *n;
926#endif
927 int err;
497c615a 928
1da177e4 929 if (*dst == NULL)
4c9483b2 930 *dst = ip6_route_output(net, sk, fl6);
1da177e4
LT
931
932 if ((err = (*dst)->error))
933 goto out_err_release;
934
4c9483b2 935 if (ipv6_addr_any(&fl6->saddr)) {
c3968a85
DW
936 struct rt6_info *rt = (struct rt6_info *) *dst;
937 err = ip6_route_get_saddr(net, rt, &fl6->daddr,
938 sk ? inet6_sk(sk)->srcprefs : 0,
939 &fl6->saddr);
44456d37 940 if (err)
1da177e4 941 goto out_err_release;
1da177e4
LT
942 }
943
95c385b4 944#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
e550dfb0
NH
945 /*
946 * Here if the dst entry we've looked up
947 * has a neighbour entry that is in the INCOMPLETE
948 * state and the src address from the flow is
949 * marked as OPTIMISTIC, we release the found
950 * dst entry and replace it instead with the
951 * dst entry of the nexthop router
952 */
69cce1d1
DM
953 n = dst_get_neighbour(*dst);
954 if (n && !(n->nud_state & NUD_VALID)) {
e550dfb0 955 struct inet6_ifaddr *ifp;
4c9483b2 956 struct flowi6 fl_gw6;
e550dfb0
NH
957 int redirect;
958
4c9483b2 959 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
e550dfb0
NH
960 (*dst)->dev, 1);
961
962 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
963 if (ifp)
964 in6_ifa_put(ifp);
965
966 if (redirect) {
967 /*
968 * We need to get the dst entry for the
969 * default router instead
970 */
971 dst_release(*dst);
4c9483b2
DM
972 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
973 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
974 *dst = ip6_route_output(net, sk, &fl_gw6);
e550dfb0
NH
975 if ((err = (*dst)->error))
976 goto out_err_release;
95c385b4 977 }
e550dfb0 978 }
95c385b4
NH
979#endif
980
1da177e4
LT
981 return 0;
982
983out_err_release:
ca46f9c8 984 if (err == -ENETUNREACH)
483a47d2 985 IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
986 dst_release(*dst);
987 *dst = NULL;
988 return err;
989}
34a0b3cd 990
497c615a
HX
991/**
992 * ip6_dst_lookup - perform route lookup on flow
993 * @sk: socket which provides route info
994 * @dst: pointer to dst_entry * for result
4c9483b2 995 * @fl6: flow to lookup
497c615a
HX
996 *
997 * This function performs a route lookup on the given flow.
998 *
999 * It returns zero on success, or a standard errno code on error.
1000 */
4c9483b2 1001int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6)
497c615a
HX
1002{
1003 *dst = NULL;
4c9483b2 1004 return ip6_dst_lookup_tail(sk, dst, fl6);
497c615a 1005}
3cf3dc6c
ACM
1006EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1007
497c615a 1008/**
68d0c6d3
DM
1009 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1010 * @sk: socket which provides route info
4c9483b2 1011 * @fl6: flow to lookup
68d0c6d3 1012 * @final_dst: final destination address for ipsec lookup
a1414715 1013 * @can_sleep: we are in a sleepable context
68d0c6d3
DM
1014 *
1015 * This function performs a route lookup on the given flow.
1016 *
1017 * It returns a valid dst pointer on success, or a pointer encoded
1018 * error code.
1019 */
4c9483b2 1020struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
68d0c6d3 1021 const struct in6_addr *final_dst,
a1414715 1022 bool can_sleep)
68d0c6d3
DM
1023{
1024 struct dst_entry *dst = NULL;
1025 int err;
1026
4c9483b2 1027 err = ip6_dst_lookup_tail(sk, &dst, fl6);
68d0c6d3
DM
1028 if (err)
1029 return ERR_PTR(err);
1030 if (final_dst)
4c9483b2 1031 ipv6_addr_copy(&fl6->daddr, final_dst);
2774c131 1032 if (can_sleep)
4c9483b2 1033 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
2774c131 1034
4c9483b2 1035 return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
68d0c6d3
DM
1036}
1037EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1038
1039/**
1040 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
497c615a 1041 * @sk: socket which provides the dst cache and route info
4c9483b2 1042 * @fl6: flow to lookup
68d0c6d3 1043 * @final_dst: final destination address for ipsec lookup
a1414715 1044 * @can_sleep: we are in a sleepable context
497c615a
HX
1045 *
1046 * This function performs a route lookup on the given flow with the
1047 * possibility of using the cached route in the socket if it is valid.
1048 * It will take the socket dst lock when operating on the dst cache.
1049 * As a result, this function can only be used in process context.
1050 *
68d0c6d3
DM
1051 * It returns a valid dst pointer on success, or a pointer encoded
1052 * error code.
497c615a 1053 */
4c9483b2 1054struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
68d0c6d3 1055 const struct in6_addr *final_dst,
a1414715 1056 bool can_sleep)
497c615a 1057{
68d0c6d3
DM
1058 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1059 int err;
497c615a 1060
4c9483b2 1061 dst = ip6_sk_dst_check(sk, dst, fl6);
68d0c6d3 1062
4c9483b2 1063 err = ip6_dst_lookup_tail(sk, &dst, fl6);
68d0c6d3
DM
1064 if (err)
1065 return ERR_PTR(err);
1066 if (final_dst)
4c9483b2 1067 ipv6_addr_copy(&fl6->daddr, final_dst);
2774c131 1068 if (can_sleep)
4c9483b2 1069 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
2774c131 1070
4c9483b2 1071 return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
497c615a 1072}
68d0c6d3 1073EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
497c615a 1074
34a0b3cd 1075static inline int ip6_ufo_append_data(struct sock *sk,
e89e9cf5
AR
1076 int getfrag(void *from, char *to, int offset, int len,
1077 int odd, struct sk_buff *skb),
1078 void *from, int length, int hh_len, int fragheaderlen,
1079 int transhdrlen, int mtu,unsigned int flags)
1080
1081{
1082 struct sk_buff *skb;
1083 int err;
1084
1085 /* There is support for UDP large send offload by network
1086 * device, so create one single skb packet containing complete
1087 * udp datagram
1088 */
1089 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1090 skb = sock_alloc_send_skb(sk,
1091 hh_len + fragheaderlen + transhdrlen + 20,
1092 (flags & MSG_DONTWAIT), &err);
1093 if (skb == NULL)
1094 return -ENOMEM;
1095
1096 /* reserve space for Hardware header */
1097 skb_reserve(skb, hh_len);
1098
1099 /* create space for UDP/IP header */
1100 skb_put(skb,fragheaderlen + transhdrlen);
1101
1102 /* initialize network header pointer */
c1d2bbe1 1103 skb_reset_network_header(skb);
e89e9cf5
AR
1104
1105 /* initialize protocol header pointer */
b0e380b1 1106 skb->transport_header = skb->network_header + fragheaderlen;
e89e9cf5 1107
84fa7933 1108 skb->ip_summed = CHECKSUM_PARTIAL;
e89e9cf5 1109 skb->csum = 0;
e89e9cf5
AR
1110 }
1111
1112 err = skb_append_datato_frags(sk,skb, getfrag, from,
1113 (length - transhdrlen));
1114 if (!err) {
1115 struct frag_hdr fhdr;
1116
c31d5326
SS
1117 /* Specify the length of each IPv6 datagram fragment.
1118 * It has to be a multiple of 8.
1119 */
1120 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1121 sizeof(struct frag_hdr)) & ~7;
f83ef8c0 1122 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
7ea2f2c5 1123 ipv6_select_ident(&fhdr);
e89e9cf5
AR
1124 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1125 __skb_queue_tail(&sk->sk_write_queue, skb);
1126
1127 return 0;
1128 }
1129 /* There is not enough support do UPD LSO,
1130 * so follow normal path
1131 */
1132 kfree_skb(skb);
1133
1134 return err;
1135}
1da177e4 1136
0178b695
HX
1137static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1138 gfp_t gfp)
1139{
1140 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1141}
1142
1143static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1144 gfp_t gfp)
1145{
1146 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1147}
1148
41a1f8ea
YH
1149int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1150 int offset, int len, int odd, struct sk_buff *skb),
1151 void *from, int length, int transhdrlen,
4c9483b2 1152 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
13b52cd4 1153 struct rt6_info *rt, unsigned int flags, int dontfrag)
1da177e4
LT
1154{
1155 struct inet_sock *inet = inet_sk(sk);
1156 struct ipv6_pinfo *np = inet6_sk(sk);
bdc712b4 1157 struct inet_cork *cork;
1da177e4
LT
1158 struct sk_buff *skb;
1159 unsigned int maxfraglen, fragheaderlen;
1160 int exthdrlen;
1161 int hh_len;
1162 int mtu;
1163 int copy;
1164 int err;
1165 int offset = 0;
1166 int csummode = CHECKSUM_NONE;
a693e698 1167 __u8 tx_flags = 0;
1da177e4
LT
1168
1169 if (flags&MSG_PROBE)
1170 return 0;
bdc712b4 1171 cork = &inet->cork.base;
1da177e4
LT
1172 if (skb_queue_empty(&sk->sk_write_queue)) {
1173 /*
1174 * setup for corking
1175 */
1176 if (opt) {
0178b695 1177 if (WARN_ON(np->cork.opt))
1da177e4 1178 return -EINVAL;
0178b695
HX
1179
1180 np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation);
1181 if (unlikely(np->cork.opt == NULL))
1182 return -ENOBUFS;
1183
1184 np->cork.opt->tot_len = opt->tot_len;
1185 np->cork.opt->opt_flen = opt->opt_flen;
1186 np->cork.opt->opt_nflen = opt->opt_nflen;
1187
1188 np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1189 sk->sk_allocation);
1190 if (opt->dst0opt && !np->cork.opt->dst0opt)
1191 return -ENOBUFS;
1192
1193 np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1194 sk->sk_allocation);
1195 if (opt->dst1opt && !np->cork.opt->dst1opt)
1196 return -ENOBUFS;
1197
1198 np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1199 sk->sk_allocation);
1200 if (opt->hopopt && !np->cork.opt->hopopt)
1201 return -ENOBUFS;
1202
1203 np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1204 sk->sk_allocation);
1205 if (opt->srcrt && !np->cork.opt->srcrt)
1206 return -ENOBUFS;
1207
1da177e4
LT
1208 /* need source address above miyazawa*/
1209 }
d8d1f30b 1210 dst_hold(&rt->dst);
bdc712b4 1211 cork->dst = &rt->dst;
4c9483b2 1212 inet->cork.fl.u.ip6 = *fl6;
1da177e4 1213 np->cork.hop_limit = hlimit;
41a1f8ea 1214 np->cork.tclass = tclass;
628a5c56 1215 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
d8d1f30b 1216 rt->dst.dev->mtu : dst_mtu(rt->dst.path);
c7503609 1217 if (np->frag_size < mtu) {
d91675f9
YH
1218 if (np->frag_size)
1219 mtu = np->frag_size;
1220 }
bdc712b4 1221 cork->fragsize = mtu;
d8d1f30b 1222 if (dst_allfrag(rt->dst.path))
bdc712b4
DM
1223 cork->flags |= IPCORK_ALLFRAG;
1224 cork->length = 0;
1da177e4
LT
1225 sk->sk_sndmsg_page = NULL;
1226 sk->sk_sndmsg_off = 0;
d8d1f30b 1227 exthdrlen = rt->dst.header_len + (opt ? opt->opt_flen : 0) -
a1b05140 1228 rt->rt6i_nfheader_len;
1da177e4
LT
1229 length += exthdrlen;
1230 transhdrlen += exthdrlen;
1231 } else {
bdc712b4 1232 rt = (struct rt6_info *)cork->dst;
4c9483b2 1233 fl6 = &inet->cork.fl.u.ip6;
0178b695 1234 opt = np->cork.opt;
1da177e4
LT
1235 transhdrlen = 0;
1236 exthdrlen = 0;
bdc712b4 1237 mtu = cork->fragsize;
1da177e4
LT
1238 }
1239
d8d1f30b 1240 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1da177e4 1241
a1b05140 1242 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
b4ce9277 1243 (opt ? opt->opt_nflen : 0);
1da177e4
LT
1244 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1245
1246 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
bdc712b4 1247 if (cork->length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
4c9483b2 1248 ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen);
1da177e4
LT
1249 return -EMSGSIZE;
1250 }
1251 }
1252
a693e698
AB
1253 /* For UDP, check if TX timestamp is enabled */
1254 if (sk->sk_type == SOCK_DGRAM) {
1255 err = sock_tx_timestamp(sk, &tx_flags);
1256 if (err)
1257 goto error;
1258 }
1259
1da177e4
LT
1260 /*
1261 * Let's try using as much space as possible.
1262 * Use MTU if total length of the message fits into the MTU.
1263 * Otherwise, we need to reserve fragment header and
1264 * fragment alignment (= 8-15 octects, in total).
1265 *
1266 * Note that we may need to "move" the data from the tail of
1ab1457c 1267 * of the buffer to the new fragment when we split
1da177e4
LT
1268 * the message.
1269 *
1ab1457c 1270 * FIXME: It may be fragmented into multiple chunks
1da177e4
LT
1271 * at once if non-fragmentable extension headers
1272 * are too large.
1ab1457c 1273 * --yoshfuji
1da177e4
LT
1274 */
1275
bdc712b4 1276 cork->length += length;
4b340ae2
BH
1277 if (length > mtu) {
1278 int proto = sk->sk_protocol;
1279 if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){
4c9483b2 1280 ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen);
4b340ae2
BH
1281 return -EMSGSIZE;
1282 }
e89e9cf5 1283
4b340ae2 1284 if (proto == IPPROTO_UDP &&
d8d1f30b 1285 (rt->dst.dev->features & NETIF_F_UFO)) {
4b340ae2
BH
1286
1287 err = ip6_ufo_append_data(sk, getfrag, from, length,
1288 hh_len, fragheaderlen,
1289 transhdrlen, mtu, flags);
1290 if (err)
1291 goto error;
1292 return 0;
1293 }
e89e9cf5 1294 }
1da177e4
LT
1295
1296 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1297 goto alloc_new_skb;
1298
1299 while (length > 0) {
1300 /* Check if the remaining data fits into current packet. */
bdc712b4 1301 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1da177e4
LT
1302 if (copy < length)
1303 copy = maxfraglen - skb->len;
1304
1305 if (copy <= 0) {
1306 char *data;
1307 unsigned int datalen;
1308 unsigned int fraglen;
1309 unsigned int fraggap;
1310 unsigned int alloclen;
1311 struct sk_buff *skb_prev;
1312alloc_new_skb:
1313 skb_prev = skb;
1314
1315 /* There's no room in the current skb */
1316 if (skb_prev)
1317 fraggap = skb_prev->len - maxfraglen;
1318 else
1319 fraggap = 0;
1320
1321 /*
1322 * If remaining data exceeds the mtu,
1323 * we know we need more fragment(s).
1324 */
1325 datalen = length + fraggap;
bdc712b4 1326 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1da177e4
LT
1327 datalen = maxfraglen - fragheaderlen;
1328
1329 fraglen = datalen + fragheaderlen;
1330 if ((flags & MSG_MORE) &&
d8d1f30b 1331 !(rt->dst.dev->features&NETIF_F_SG))
1da177e4
LT
1332 alloclen = mtu;
1333 else
1334 alloclen = datalen + fragheaderlen;
1335
1336 /*
1337 * The last fragment gets additional space at tail.
1338 * Note: we overallocate on fragments with MSG_MODE
1339 * because we have no idea if we're the last one.
1340 */
1341 if (datalen == length + fraggap)
d8d1f30b 1342 alloclen += rt->dst.trailer_len;
1da177e4
LT
1343
1344 /*
1345 * We just reserve space for fragment header.
1ab1457c 1346 * Note: this may be overallocation if the message
1da177e4
LT
1347 * (without MSG_MORE) fits into the MTU.
1348 */
1349 alloclen += sizeof(struct frag_hdr);
1350
1351 if (transhdrlen) {
1352 skb = sock_alloc_send_skb(sk,
1353 alloclen + hh_len,
1354 (flags & MSG_DONTWAIT), &err);
1355 } else {
1356 skb = NULL;
1357 if (atomic_read(&sk->sk_wmem_alloc) <=
1358 2 * sk->sk_sndbuf)
1359 skb = sock_wmalloc(sk,
1360 alloclen + hh_len, 1,
1361 sk->sk_allocation);
1362 if (unlikely(skb == NULL))
1363 err = -ENOBUFS;
a693e698
AB
1364 else {
1365 /* Only the initial fragment
1366 * is time stamped.
1367 */
1368 tx_flags = 0;
1369 }
1da177e4
LT
1370 }
1371 if (skb == NULL)
1372 goto error;
1373 /*
1374 * Fill in the control structures
1375 */
1376 skb->ip_summed = csummode;
1377 skb->csum = 0;
1378 /* reserve for fragmentation */
1379 skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1380
a693e698
AB
1381 if (sk->sk_type == SOCK_DGRAM)
1382 skb_shinfo(skb)->tx_flags = tx_flags;
1383
1da177e4
LT
1384 /*
1385 * Find where to start putting bytes
1386 */
1387 data = skb_put(skb, fraglen);
c14d2450 1388 skb_set_network_header(skb, exthdrlen);
1da177e4 1389 data += fragheaderlen;
b0e380b1
ACM
1390 skb->transport_header = (skb->network_header +
1391 fragheaderlen);
1da177e4
LT
1392 if (fraggap) {
1393 skb->csum = skb_copy_and_csum_bits(
1394 skb_prev, maxfraglen,
1395 data + transhdrlen, fraggap, 0);
1396 skb_prev->csum = csum_sub(skb_prev->csum,
1397 skb->csum);
1398 data += fraggap;
e9fa4f7b 1399 pskb_trim_unique(skb_prev, maxfraglen);
1da177e4
LT
1400 }
1401 copy = datalen - transhdrlen - fraggap;
1402 if (copy < 0) {
1403 err = -EINVAL;
1404 kfree_skb(skb);
1405 goto error;
1406 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1407 err = -EFAULT;
1408 kfree_skb(skb);
1409 goto error;
1410 }
1411
1412 offset += copy;
1413 length -= datalen - fraggap;
1414 transhdrlen = 0;
1415 exthdrlen = 0;
1416 csummode = CHECKSUM_NONE;
1417
1418 /*
1419 * Put the packet on the pending queue
1420 */
1421 __skb_queue_tail(&sk->sk_write_queue, skb);
1422 continue;
1423 }
1424
1425 if (copy > length)
1426 copy = length;
1427
d8d1f30b 1428 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1da177e4
LT
1429 unsigned int off;
1430
1431 off = skb->len;
1432 if (getfrag(from, skb_put(skb, copy),
1433 offset, copy, off, skb) < 0) {
1434 __skb_trim(skb, off);
1435 err = -EFAULT;
1436 goto error;
1437 }
1438 } else {
1439 int i = skb_shinfo(skb)->nr_frags;
1440 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1441 struct page *page = sk->sk_sndmsg_page;
1442 int off = sk->sk_sndmsg_off;
1443 unsigned int left;
1444
1445 if (page && (left = PAGE_SIZE - off) > 0) {
1446 if (copy >= left)
1447 copy = left;
1448 if (page != frag->page) {
1449 if (i == MAX_SKB_FRAGS) {
1450 err = -EMSGSIZE;
1451 goto error;
1452 }
1453 get_page(page);
1454 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1455 frag = &skb_shinfo(skb)->frags[i];
1456 }
1457 } else if(i < MAX_SKB_FRAGS) {
1458 if (copy > PAGE_SIZE)
1459 copy = PAGE_SIZE;
1460 page = alloc_pages(sk->sk_allocation, 0);
1461 if (page == NULL) {
1462 err = -ENOMEM;
1463 goto error;
1464 }
1465 sk->sk_sndmsg_page = page;
1466 sk->sk_sndmsg_off = 0;
1467
1468 skb_fill_page_desc(skb, i, page, 0, 0);
1469 frag = &skb_shinfo(skb)->frags[i];
1da177e4
LT
1470 } else {
1471 err = -EMSGSIZE;
1472 goto error;
1473 }
1474 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1475 err = -EFAULT;
1476 goto error;
1477 }
1478 sk->sk_sndmsg_off += copy;
1479 frag->size += copy;
1480 skb->len += copy;
1481 skb->data_len += copy;
f945fa7a
HX
1482 skb->truesize += copy;
1483 atomic_add(copy, &sk->sk_wmem_alloc);
1da177e4
LT
1484 }
1485 offset += copy;
1486 length -= copy;
1487 }
1488 return 0;
1489error:
bdc712b4 1490 cork->length -= length;
3bd653c8 1491 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1492 return err;
1493}
1494
bf138862
PE
1495static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1496{
0178b695
HX
1497 if (np->cork.opt) {
1498 kfree(np->cork.opt->dst0opt);
1499 kfree(np->cork.opt->dst1opt);
1500 kfree(np->cork.opt->hopopt);
1501 kfree(np->cork.opt->srcrt);
1502 kfree(np->cork.opt);
1503 np->cork.opt = NULL;
1504 }
1505
bdc712b4
DM
1506 if (inet->cork.base.dst) {
1507 dst_release(inet->cork.base.dst);
1508 inet->cork.base.dst = NULL;
1509 inet->cork.base.flags &= ~IPCORK_ALLFRAG;
bf138862
PE
1510 }
1511 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1512}
1513
1da177e4
LT
1514int ip6_push_pending_frames(struct sock *sk)
1515{
1516 struct sk_buff *skb, *tmp_skb;
1517 struct sk_buff **tail_skb;
1518 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1519 struct inet_sock *inet = inet_sk(sk);
1520 struct ipv6_pinfo *np = inet6_sk(sk);
3bd653c8 1521 struct net *net = sock_net(sk);
1da177e4
LT
1522 struct ipv6hdr *hdr;
1523 struct ipv6_txoptions *opt = np->cork.opt;
bdc712b4 1524 struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst;
4c9483b2
DM
1525 struct flowi6 *fl6 = &inet->cork.fl.u.ip6;
1526 unsigned char proto = fl6->flowi6_proto;
1da177e4
LT
1527 int err = 0;
1528
1529 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1530 goto out;
1531 tail_skb = &(skb_shinfo(skb)->frag_list);
1532
1533 /* move skb->data to ip header from ext header */
d56f90a7 1534 if (skb->data < skb_network_header(skb))
bbe735e4 1535 __skb_pull(skb, skb_network_offset(skb));
1da177e4 1536 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
cfe1fc77 1537 __skb_pull(tmp_skb, skb_network_header_len(skb));
1da177e4
LT
1538 *tail_skb = tmp_skb;
1539 tail_skb = &(tmp_skb->next);
1540 skb->len += tmp_skb->len;
1541 skb->data_len += tmp_skb->len;
1da177e4 1542 skb->truesize += tmp_skb->truesize;
1da177e4
LT
1543 tmp_skb->destructor = NULL;
1544 tmp_skb->sk = NULL;
1da177e4
LT
1545 }
1546
28a89453 1547 /* Allow local fragmentation. */
b5c15fc0 1548 if (np->pmtudisc < IPV6_PMTUDISC_DO)
28a89453
HX
1549 skb->local_df = 1;
1550
4c9483b2 1551 ipv6_addr_copy(final_dst, &fl6->daddr);
cfe1fc77 1552 __skb_pull(skb, skb_network_header_len(skb));
1da177e4
LT
1553 if (opt && opt->opt_flen)
1554 ipv6_push_frag_opts(skb, opt, &proto);
1555 if (opt && opt->opt_nflen)
1556 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1557
e2d1bca7
ACM
1558 skb_push(skb, sizeof(struct ipv6hdr));
1559 skb_reset_network_header(skb);
0660e03f 1560 hdr = ipv6_hdr(skb);
1ab1457c 1561
4c9483b2 1562 *(__be32*)hdr = fl6->flowlabel |
41a1f8ea 1563 htonl(0x60000000 | ((int)np->cork.tclass << 20));
1da177e4 1564
1da177e4
LT
1565 hdr->hop_limit = np->cork.hop_limit;
1566 hdr->nexthdr = proto;
4c9483b2 1567 ipv6_addr_copy(&hdr->saddr, &fl6->saddr);
1da177e4
LT
1568 ipv6_addr_copy(&hdr->daddr, final_dst);
1569
a2c2064f 1570 skb->priority = sk->sk_priority;
4a19ec58 1571 skb->mark = sk->sk_mark;
a2c2064f 1572
d8d1f30b 1573 skb_dst_set(skb, dst_clone(&rt->dst));
edf391ff 1574 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
14878f75 1575 if (proto == IPPROTO_ICMPV6) {
adf30907 1576 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
14878f75 1577
5a57d4c7 1578 ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
e41b5368 1579 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
14878f75
DS
1580 }
1581
ef76bc23 1582 err = ip6_local_out(skb);
1da177e4
LT
1583 if (err) {
1584 if (err > 0)
6ce9e7b5 1585 err = net_xmit_errno(err);
1da177e4
LT
1586 if (err)
1587 goto error;
1588 }
1589
1590out:
bf138862 1591 ip6_cork_release(inet, np);
1da177e4
LT
1592 return err;
1593error:
06254914 1594 IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1595 goto out;
1596}
1597
1598void ip6_flush_pending_frames(struct sock *sk)
1599{
1da177e4
LT
1600 struct sk_buff *skb;
1601
1602 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
adf30907
ED
1603 if (skb_dst(skb))
1604 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
e1f52208 1605 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1606 kfree_skb(skb);
1607 }
1608
bf138862 1609 ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1da177e4 1610}