]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - net/ipv6/ip6_output.c
net: ec_bhf: remove excessive debug messages
[mirror_ubuntu-artful-kernel.git] / net / ipv6 / ip6_output.c
CommitLineData
1da177e4
LT
1/*
2 * IPv6 output functions
1ab1457c 3 * Linux INET6 implementation
1da177e4
LT
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * Based on linux/net/ipv4/ip_output.c
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 *
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
21 *
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
27 */
28
1da177e4 29#include <linux/errno.h>
ef76bc23 30#include <linux/kernel.h>
1da177e4
LT
31#include <linux/string.h>
32#include <linux/socket.h>
33#include <linux/net.h>
34#include <linux/netdevice.h>
35#include <linux/if_arp.h>
36#include <linux/in6.h>
37#include <linux/tcp.h>
38#include <linux/route.h>
b59f45d0 39#include <linux/module.h>
5a0e3ad6 40#include <linux/slab.h>
1da177e4
LT
41
42#include <linux/netfilter.h>
43#include <linux/netfilter_ipv6.h>
44
45#include <net/sock.h>
46#include <net/snmp.h>
47
48#include <net/ipv6.h>
49#include <net/ndisc.h>
50#include <net/protocol.h>
51#include <net/ip6_route.h>
52#include <net/addrconf.h>
53#include <net/rawv6.h>
54#include <net/icmp.h>
55#include <net/xfrm.h>
56#include <net/checksum.h>
7bc570c8 57#include <linux/mroute6.h>
1da177e4 58
9e508490 59static int ip6_finish_output2(struct sk_buff *skb)
1da177e4 60{
adf30907 61 struct dst_entry *dst = skb_dst(skb);
1da177e4 62 struct net_device *dev = dst->dev;
f6b72b62 63 struct neighbour *neigh;
6fd6ce20
YH
64 struct in6_addr *nexthop;
65 int ret;
1da177e4
LT
66
67 skb->protocol = htons(ETH_P_IPV6);
68 skb->dev = dev;
69
0660e03f 70 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
adf30907 71 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1da177e4 72
7ad6848c 73 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
d1db275d 74 ((mroute6_socket(dev_net(dev), skb) &&
bd91b8bf 75 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
7bc570c8
YH
76 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
77 &ipv6_hdr(skb)->saddr))) {
1da177e4
LT
78 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
79
80 /* Do not check for IFF_ALLMULTI; multicast routing
81 is not supported in any case.
82 */
83 if (newskb)
b2e0b385
JE
84 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
85 newskb, NULL, newskb->dev,
95603e22 86 dev_loopback_xmit);
1da177e4 87
0660e03f 88 if (ipv6_hdr(skb)->hop_limit == 0) {
3bd653c8
DL
89 IP6_INC_STATS(dev_net(dev), idev,
90 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
91 kfree_skb(skb);
92 return 0;
93 }
94 }
95
edf391ff
NH
96 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
97 skb->len);
dd408515
HFS
98
99 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
100 IPV6_ADDR_SCOPE_NODELOCAL &&
101 !(dev->flags & IFF_LOOPBACK)) {
102 kfree_skb(skb);
103 return 0;
104 }
1da177e4
LT
105 }
106
6fd6ce20 107 rcu_read_lock_bh();
550bab42 108 nexthop = rt6_nexthop((struct rt6_info *)dst);
6fd6ce20
YH
109 neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
110 if (unlikely(!neigh))
111 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
112 if (!IS_ERR(neigh)) {
113 ret = dst_neigh_output(dst, neigh, skb);
114 rcu_read_unlock_bh();
115 return ret;
116 }
117 rcu_read_unlock_bh();
05e3aa09 118
7f88c6b2
HFS
119 IP6_INC_STATS(dev_net(dst->dev),
120 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
9e508490
JE
121 kfree_skb(skb);
122 return -EINVAL;
1da177e4
LT
123}
124
9e508490
JE
125static int ip6_finish_output(struct sk_buff *skb)
126{
127 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
9037c357
JP
128 dst_allfrag(skb_dst(skb)) ||
129 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
9e508490
JE
130 return ip6_fragment(skb, ip6_finish_output2);
131 else
132 return ip6_finish_output2(skb);
133}
134
aad88724 135int ip6_output(struct sock *sk, struct sk_buff *skb)
1da177e4 136{
9e508490 137 struct net_device *dev = skb_dst(skb)->dev;
adf30907 138 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
778d80be 139 if (unlikely(idev->cnf.disable_ipv6)) {
9e508490 140 IP6_INC_STATS(dev_net(dev), idev,
3bd653c8 141 IPSTATS_MIB_OUTDISCARDS);
778d80be
YH
142 kfree_skb(skb);
143 return 0;
144 }
145
9c6eb28a
JE
146 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
147 ip6_finish_output,
148 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
1da177e4
LT
149}
150
1da177e4 151/*
b5d43998 152 * xmit an sk_buff (used by TCP, SCTP and DCCP)
1da177e4
LT
153 */
154
4c9483b2 155int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
b903d324 156 struct ipv6_txoptions *opt, int tclass)
1da177e4 157{
3bd653c8 158 struct net *net = sock_net(sk);
b30bd282 159 struct ipv6_pinfo *np = inet6_sk(sk);
4c9483b2 160 struct in6_addr *first_hop = &fl6->daddr;
adf30907 161 struct dst_entry *dst = skb_dst(skb);
1da177e4 162 struct ipv6hdr *hdr;
4c9483b2 163 u8 proto = fl6->flowi6_proto;
1da177e4 164 int seg_len = skb->len;
e651f03a 165 int hlimit = -1;
1da177e4
LT
166 u32 mtu;
167
168 if (opt) {
c2636b4d 169 unsigned int head_room;
1da177e4
LT
170
171 /* First: exthdrs may take lots of space (~8K for now)
172 MAX_HEADER is not enough.
173 */
174 head_room = opt->opt_nflen + opt->opt_flen;
175 seg_len += head_room;
176 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
177
178 if (skb_headroom(skb) < head_room) {
179 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
a11d206d 180 if (skb2 == NULL) {
adf30907 181 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d
YH
182 IPSTATS_MIB_OUTDISCARDS);
183 kfree_skb(skb);
1da177e4
LT
184 return -ENOBUFS;
185 }
808db80a 186 consume_skb(skb);
a11d206d 187 skb = skb2;
83d7eb29 188 skb_set_owner_w(skb, sk);
1da177e4
LT
189 }
190 if (opt->opt_flen)
191 ipv6_push_frag_opts(skb, opt, &proto);
192 if (opt->opt_nflen)
193 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
194 }
195
e2d1bca7
ACM
196 skb_push(skb, sizeof(struct ipv6hdr));
197 skb_reset_network_header(skb);
0660e03f 198 hdr = ipv6_hdr(skb);
1da177e4
LT
199
200 /*
201 * Fill in the IPv6 header
202 */
b903d324 203 if (np)
1da177e4
LT
204 hlimit = np->hop_limit;
205 if (hlimit < 0)
6b75d090 206 hlimit = ip6_dst_hoplimit(dst);
1da177e4 207
cb1ce2ef
TH
208 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
209 np->autoflowlabel));
41a1f8ea 210
1da177e4
LT
211 hdr->payload_len = htons(seg_len);
212 hdr->nexthdr = proto;
213 hdr->hop_limit = hlimit;
214
4e3fd7a0
AD
215 hdr->saddr = fl6->saddr;
216 hdr->daddr = *first_hop;
1da177e4 217
9c9c9ad5 218 skb->protocol = htons(ETH_P_IPV6);
a2c2064f 219 skb->priority = sk->sk_priority;
4a19ec58 220 skb->mark = sk->sk_mark;
a2c2064f 221
1da177e4 222 mtu = dst_mtu(dst);
60ff7467 223 if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
adf30907 224 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
edf391ff 225 IPSTATS_MIB_OUT, skb->len);
b2e0b385
JE
226 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
227 dst->dev, dst_output);
1da177e4
LT
228 }
229
1da177e4 230 skb->dev = dst->dev;
f4e53e29 231 ipv6_local_error(sk, EMSGSIZE, fl6, mtu);
adf30907 232 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
233 kfree_skb(skb);
234 return -EMSGSIZE;
235}
236
7159039a
YH
237EXPORT_SYMBOL(ip6_xmit);
238
1da177e4
LT
239static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
240{
241 struct ip6_ra_chain *ra;
242 struct sock *last = NULL;
243
244 read_lock(&ip6_ra_lock);
245 for (ra = ip6_ra_chain; ra; ra = ra->next) {
246 struct sock *sk = ra->sk;
0bd1b59b
AM
247 if (sk && ra->sel == sel &&
248 (!sk->sk_bound_dev_if ||
249 sk->sk_bound_dev_if == skb->dev->ifindex)) {
1da177e4
LT
250 if (last) {
251 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
252 if (skb2)
253 rawv6_rcv(last, skb2);
254 }
255 last = sk;
256 }
257 }
258
259 if (last) {
260 rawv6_rcv(last, skb);
261 read_unlock(&ip6_ra_lock);
262 return 1;
263 }
264 read_unlock(&ip6_ra_lock);
265 return 0;
266}
267
e21e0b5f
VN
268static int ip6_forward_proxy_check(struct sk_buff *skb)
269{
0660e03f 270 struct ipv6hdr *hdr = ipv6_hdr(skb);
e21e0b5f 271 u8 nexthdr = hdr->nexthdr;
75f2811c 272 __be16 frag_off;
e21e0b5f
VN
273 int offset;
274
275 if (ipv6_ext_hdr(nexthdr)) {
75f2811c 276 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
e21e0b5f
VN
277 if (offset < 0)
278 return 0;
279 } else
280 offset = sizeof(struct ipv6hdr);
281
282 if (nexthdr == IPPROTO_ICMPV6) {
283 struct icmp6hdr *icmp6;
284
d56f90a7
ACM
285 if (!pskb_may_pull(skb, (skb_network_header(skb) +
286 offset + 1 - skb->data)))
e21e0b5f
VN
287 return 0;
288
d56f90a7 289 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
e21e0b5f
VN
290
291 switch (icmp6->icmp6_type) {
292 case NDISC_ROUTER_SOLICITATION:
293 case NDISC_ROUTER_ADVERTISEMENT:
294 case NDISC_NEIGHBOUR_SOLICITATION:
295 case NDISC_NEIGHBOUR_ADVERTISEMENT:
296 case NDISC_REDIRECT:
297 /* For reaction involving unicast neighbor discovery
298 * message destined to the proxied address, pass it to
299 * input function.
300 */
301 return 1;
302 default:
303 break;
304 }
305 }
306
74553b09
VN
307 /*
308 * The proxying router can't forward traffic sent to a link-local
309 * address, so signal the sender and discard the packet. This
310 * behavior is clarified by the MIPv6 specification.
311 */
312 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
313 dst_link_failure(skb);
314 return -1;
315 }
316
e21e0b5f
VN
317 return 0;
318}
319
1da177e4
LT
320static inline int ip6_forward_finish(struct sk_buff *skb)
321{
322 return dst_output(skb);
323}
324
0954cf9c
HFS
325static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
326{
327 unsigned int mtu;
328 struct inet6_dev *idev;
329
330 if (dst_metric_locked(dst, RTAX_MTU)) {
331 mtu = dst_metric_raw(dst, RTAX_MTU);
332 if (mtu)
333 return mtu;
334 }
335
336 mtu = IPV6_MIN_MTU;
337 rcu_read_lock();
338 idev = __in6_dev_get(dst->dev);
339 if (idev)
340 mtu = idev->cnf.mtu6;
341 rcu_read_unlock();
342
343 return mtu;
344}
345
fe6cc55f
FW
346static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
347{
418a3156 348 if (skb->len <= mtu)
fe6cc55f
FW
349 return false;
350
60ff7467 351 /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
fe6cc55f
FW
352 if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
353 return true;
354
60ff7467 355 if (skb->ignore_df)
418a3156
FW
356 return false;
357
fe6cc55f
FW
358 if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
359 return false;
360
361 return true;
362}
363
1da177e4
LT
364int ip6_forward(struct sk_buff *skb)
365{
adf30907 366 struct dst_entry *dst = skb_dst(skb);
0660e03f 367 struct ipv6hdr *hdr = ipv6_hdr(skb);
1da177e4 368 struct inet6_skb_parm *opt = IP6CB(skb);
c346dca1 369 struct net *net = dev_net(dst->dev);
14f3ad6f 370 u32 mtu;
1ab1457c 371
53b7997f 372 if (net->ipv6.devconf_all->forwarding == 0)
1da177e4
LT
373 goto error;
374
090f1166
LR
375 if (skb->pkt_type != PACKET_HOST)
376 goto drop;
377
4497b076
BH
378 if (skb_warn_if_lro(skb))
379 goto drop;
380
1da177e4 381 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
15c77d8b
ED
382 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
383 IPSTATS_MIB_INDISCARDS);
1da177e4
LT
384 goto drop;
385 }
386
35fc92a9 387 skb_forward_csum(skb);
1da177e4
LT
388
389 /*
390 * We DO NOT make any processing on
391 * RA packets, pushing them to user level AS IS
392 * without ane WARRANTY that application will be able
393 * to interpret them. The reason is that we
394 * cannot make anything clever here.
395 *
396 * We are not end-node, so that if packet contains
397 * AH/ESP, we cannot make anything.
398 * Defragmentation also would be mistake, RA packets
399 * cannot be fragmented, because there is no warranty
400 * that different fragments will go along one path. --ANK
401 */
ab4eb353
YH
402 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
403 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
1da177e4
LT
404 return 0;
405 }
406
407 /*
408 * check and decrement ttl
409 */
410 if (hdr->hop_limit <= 1) {
411 /* Force OUTPUT device used as source address */
412 skb->dev = dst->dev;
3ffe533c 413 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
15c77d8b
ED
414 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
415 IPSTATS_MIB_INHDRERRORS);
1da177e4
LT
416
417 kfree_skb(skb);
418 return -ETIMEDOUT;
419 }
420
fbea49e1 421 /* XXX: idev->cnf.proxy_ndp? */
53b7997f 422 if (net->ipv6.devconf_all->proxy_ndp &&
8a3edd80 423 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
74553b09
VN
424 int proxied = ip6_forward_proxy_check(skb);
425 if (proxied > 0)
e21e0b5f 426 return ip6_input(skb);
74553b09 427 else if (proxied < 0) {
15c77d8b
ED
428 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
429 IPSTATS_MIB_INDISCARDS);
74553b09
VN
430 goto drop;
431 }
e21e0b5f
VN
432 }
433
1da177e4 434 if (!xfrm6_route_forward(skb)) {
15c77d8b
ED
435 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
436 IPSTATS_MIB_INDISCARDS);
1da177e4
LT
437 goto drop;
438 }
adf30907 439 dst = skb_dst(skb);
1da177e4
LT
440
441 /* IPv6 specs say nothing about it, but it is clear that we cannot
442 send redirects to source routed frames.
1e5dc146 443 We don't send redirects to frames decapsulated from IPsec.
1da177e4 444 */
c45a3dfb 445 if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
1da177e4 446 struct in6_addr *target = NULL;
fbfe95a4 447 struct inet_peer *peer;
1da177e4 448 struct rt6_info *rt;
1da177e4
LT
449
450 /*
451 * incoming and outgoing devices are the same
452 * send a redirect.
453 */
454
455 rt = (struct rt6_info *) dst;
c45a3dfb
DM
456 if (rt->rt6i_flags & RTF_GATEWAY)
457 target = &rt->rt6i_gateway;
1da177e4
LT
458 else
459 target = &hdr->daddr;
460
1d861aa4 461 peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
92d86829 462
1da177e4
LT
463 /* Limit redirects both by destination (here)
464 and by source (inside ndisc_send_redirect)
465 */
fbfe95a4 466 if (inet_peer_xrlim_allow(peer, 1*HZ))
4991969a 467 ndisc_send_redirect(skb, target);
1d861aa4
DM
468 if (peer)
469 inet_putpeer(peer);
5bb1ab09
DS
470 } else {
471 int addrtype = ipv6_addr_type(&hdr->saddr);
472
1da177e4 473 /* This check is security critical. */
f81b2e7d
YH
474 if (addrtype == IPV6_ADDR_ANY ||
475 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
5bb1ab09
DS
476 goto error;
477 if (addrtype & IPV6_ADDR_LINKLOCAL) {
478 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
3ffe533c 479 ICMPV6_NOT_NEIGHBOUR, 0);
5bb1ab09
DS
480 goto error;
481 }
1da177e4
LT
482 }
483
0954cf9c 484 mtu = ip6_dst_mtu_forward(dst);
14f3ad6f
UW
485 if (mtu < IPV6_MIN_MTU)
486 mtu = IPV6_MIN_MTU;
487
fe6cc55f 488 if (ip6_pkt_too_big(skb, mtu)) {
1da177e4
LT
489 /* Again, force OUTPUT device used as source address */
490 skb->dev = dst->dev;
14f3ad6f 491 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
15c77d8b
ED
492 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
493 IPSTATS_MIB_INTOOBIGERRORS);
494 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
495 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
496 kfree_skb(skb);
497 return -EMSGSIZE;
498 }
499
500 if (skb_cow(skb, dst->dev->hard_header_len)) {
15c77d8b
ED
501 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
502 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
503 goto drop;
504 }
505
0660e03f 506 hdr = ipv6_hdr(skb);
1da177e4
LT
507
508 /* Mangling hops number delayed to point after skb COW */
1ab1457c 509
1da177e4
LT
510 hdr->hop_limit--;
511
483a47d2 512 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
2d8dbb04 513 IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
b2e0b385 514 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
6e23ae2a 515 ip6_forward_finish);
1da177e4
LT
516
517error:
483a47d2 518 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
1da177e4
LT
519drop:
520 kfree_skb(skb);
521 return -EINVAL;
522}
523
524static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
525{
526 to->pkt_type = from->pkt_type;
527 to->priority = from->priority;
528 to->protocol = from->protocol;
adf30907
ED
529 skb_dst_drop(to);
530 skb_dst_set(to, dst_clone(skb_dst(from)));
1da177e4 531 to->dev = from->dev;
82e91ffe 532 to->mark = from->mark;
1da177e4
LT
533
534#ifdef CONFIG_NET_SCHED
535 to->tc_index = from->tc_index;
536#endif
e7ac05f3 537 nf_copy(to, from);
984bc16c 538 skb_copy_secmark(to, from);
1da177e4
LT
539}
540
73f156a6
ED
541static void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
542{
543 static u32 ip6_idents_hashrnd __read_mostly;
544 u32 hash, id;
545
546 net_get_random_once(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd));
547
548 hash = __ipv6_addr_jhash(&rt->rt6i_dst.addr, ip6_idents_hashrnd);
04ca6973
ED
549 hash = __ipv6_addr_jhash(&rt->rt6i_src.addr, hash);
550
73f156a6
ED
551 id = ip_idents_reserve(hash, 1);
552 fhdr->identification = htonl(id);
553}
554
ad0081e4 555int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
1da177e4 556{
1da177e4 557 struct sk_buff *frag;
adf30907 558 struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
d91675f9 559 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
1da177e4
LT
560 struct ipv6hdr *tmp_hdr;
561 struct frag_hdr *fh;
562 unsigned int mtu, hlen, left, len;
a7ae1992 563 int hroom, troom;
ae08e1f0 564 __be32 frag_id = 0;
1da177e4
LT
565 int ptr, offset = 0, err=0;
566 u8 *prevhdr, nexthdr = 0;
adf30907 567 struct net *net = dev_net(skb_dst(skb)->dev);
1da177e4 568
1da177e4
LT
569 hlen = ip6_find_1stfragopt(skb, &prevhdr);
570 nexthdr = *prevhdr;
571
628a5c56 572 mtu = ip6_skb_dst_mtu(skb);
b881ef76
JH
573
574 /* We must not fragment if the socket is set to force MTU discovery
14f3ad6f 575 * or if the skb it not generated by a local socket.
b881ef76 576 */
60ff7467 577 if (unlikely(!skb->ignore_df && skb->len > mtu) ||
4cdd3408
PM
578 (IP6CB(skb)->frag_max_size &&
579 IP6CB(skb)->frag_max_size > mtu)) {
a34a101e
ED
580 if (skb->sk && dst_allfrag(skb_dst(skb)))
581 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
582
adf30907 583 skb->dev = skb_dst(skb)->dev;
3ffe533c 584 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
adf30907 585 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 586 IPSTATS_MIB_FRAGFAILS);
b881ef76
JH
587 kfree_skb(skb);
588 return -EMSGSIZE;
589 }
590
d91675f9
YH
591 if (np && np->frag_size < mtu) {
592 if (np->frag_size)
593 mtu = np->frag_size;
594 }
595 mtu -= hlen + sizeof(struct frag_hdr);
1da177e4 596
21dc3301 597 if (skb_has_frag_list(skb)) {
1da177e4 598 int first_len = skb_pagelen(skb);
3d13008e 599 struct sk_buff *frag2;
1da177e4
LT
600
601 if (first_len - hlen > mtu ||
602 ((first_len - hlen) & 7) ||
603 skb_cloned(skb))
604 goto slow_path;
605
4d9092bb 606 skb_walk_frags(skb, frag) {
1da177e4
LT
607 /* Correct geometry. */
608 if (frag->len > mtu ||
609 ((frag->len & 7) && frag->next) ||
610 skb_headroom(frag) < hlen)
3d13008e 611 goto slow_path_clean;
1da177e4 612
1da177e4
LT
613 /* Partially cloned skb? */
614 if (skb_shared(frag))
3d13008e 615 goto slow_path_clean;
2fdba6b0
HX
616
617 BUG_ON(frag->sk);
618 if (skb->sk) {
2fdba6b0
HX
619 frag->sk = skb->sk;
620 frag->destructor = sock_wfree;
2fdba6b0 621 }
3d13008e 622 skb->truesize -= frag->truesize;
1da177e4
LT
623 }
624
625 err = 0;
626 offset = 0;
627 frag = skb_shinfo(skb)->frag_list;
4d9092bb 628 skb_frag_list_init(skb);
1da177e4
LT
629 /* BUILD HEADER */
630
9a217a1c 631 *prevhdr = NEXTHDR_FRAGMENT;
d56f90a7 632 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
1da177e4 633 if (!tmp_hdr) {
adf30907 634 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 635 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
636 return -ENOMEM;
637 }
638
1da177e4
LT
639 __skb_pull(skb, hlen);
640 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
e2d1bca7
ACM
641 __skb_push(skb, hlen);
642 skb_reset_network_header(skb);
d56f90a7 643 memcpy(skb_network_header(skb), tmp_hdr, hlen);
1da177e4 644
87c48fa3 645 ipv6_select_ident(fh, rt);
1da177e4
LT
646 fh->nexthdr = nexthdr;
647 fh->reserved = 0;
648 fh->frag_off = htons(IP6_MF);
649 frag_id = fh->identification;
650
651 first_len = skb_pagelen(skb);
652 skb->data_len = first_len - skb_headlen(skb);
653 skb->len = first_len;
0660e03f
ACM
654 ipv6_hdr(skb)->payload_len = htons(first_len -
655 sizeof(struct ipv6hdr));
a11d206d 656
d8d1f30b 657 dst_hold(&rt->dst);
1da177e4
LT
658
659 for (;;) {
660 /* Prepare header of the next frame,
661 * before previous one went down. */
662 if (frag) {
663 frag->ip_summed = CHECKSUM_NONE;
badff6d0 664 skb_reset_transport_header(frag);
1da177e4 665 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
e2d1bca7
ACM
666 __skb_push(frag, hlen);
667 skb_reset_network_header(frag);
d56f90a7
ACM
668 memcpy(skb_network_header(frag), tmp_hdr,
669 hlen);
1da177e4
LT
670 offset += skb->len - hlen - sizeof(struct frag_hdr);
671 fh->nexthdr = nexthdr;
672 fh->reserved = 0;
673 fh->frag_off = htons(offset);
674 if (frag->next != NULL)
675 fh->frag_off |= htons(IP6_MF);
676 fh->identification = frag_id;
0660e03f
ACM
677 ipv6_hdr(frag)->payload_len =
678 htons(frag->len -
679 sizeof(struct ipv6hdr));
1da177e4
LT
680 ip6_copy_metadata(frag, skb);
681 }
1ab1457c 682
1da177e4 683 err = output(skb);
dafee490 684 if(!err)
d8d1f30b 685 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 686 IPSTATS_MIB_FRAGCREATES);
dafee490 687
1da177e4
LT
688 if (err || !frag)
689 break;
690
691 skb = frag;
692 frag = skb->next;
693 skb->next = NULL;
694 }
695
a51482bd 696 kfree(tmp_hdr);
1da177e4
LT
697
698 if (err == 0) {
d8d1f30b 699 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 700 IPSTATS_MIB_FRAGOKS);
94e187c0 701 ip6_rt_put(rt);
1da177e4
LT
702 return 0;
703 }
704
705 while (frag) {
706 skb = frag->next;
707 kfree_skb(frag);
708 frag = skb;
709 }
710
d8d1f30b 711 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 712 IPSTATS_MIB_FRAGFAILS);
94e187c0 713 ip6_rt_put(rt);
1da177e4 714 return err;
3d13008e
ED
715
716slow_path_clean:
717 skb_walk_frags(skb, frag2) {
718 if (frag2 == frag)
719 break;
720 frag2->sk = NULL;
721 frag2->destructor = NULL;
722 skb->truesize += frag2->truesize;
723 }
1da177e4
LT
724 }
725
726slow_path:
72e843bb
ED
727 if ((skb->ip_summed == CHECKSUM_PARTIAL) &&
728 skb_checksum_help(skb))
729 goto fail;
730
1da177e4
LT
731 left = skb->len - hlen; /* Space per frame */
732 ptr = hlen; /* Where to start from */
733
734 /*
735 * Fragment the datagram.
736 */
737
738 *prevhdr = NEXTHDR_FRAGMENT;
a7ae1992
HX
739 hroom = LL_RESERVED_SPACE(rt->dst.dev);
740 troom = rt->dst.dev->needed_tailroom;
1da177e4
LT
741
742 /*
743 * Keep copying data until we run out.
744 */
745 while(left > 0) {
746 len = left;
747 /* IF: it doesn't fit, use 'mtu' - the data space left */
748 if (len > mtu)
749 len = mtu;
25985edc 750 /* IF: we are not sending up to and including the packet end
1da177e4
LT
751 then align the next start on an eight byte boundary */
752 if (len < left) {
753 len &= ~7;
754 }
755 /*
756 * Allocate buffer.
757 */
758
a7ae1992
HX
759 if ((frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
760 hroom + troom, GFP_ATOMIC)) == NULL) {
64ce2073 761 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
adf30907 762 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 763 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
764 err = -ENOMEM;
765 goto fail;
766 }
767
768 /*
769 * Set up data on packet
770 */
771
772 ip6_copy_metadata(frag, skb);
a7ae1992 773 skb_reserve(frag, hroom);
1da177e4 774 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
c1d2bbe1 775 skb_reset_network_header(frag);
badff6d0 776 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
b0e380b1
ACM
777 frag->transport_header = (frag->network_header + hlen +
778 sizeof(struct frag_hdr));
1da177e4
LT
779
780 /*
781 * Charge the memory for the fragment to any owner
782 * it might possess
783 */
784 if (skb->sk)
785 skb_set_owner_w(frag, skb->sk);
786
787 /*
788 * Copy the packet header into the new buffer.
789 */
d626f62b 790 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
1da177e4
LT
791
792 /*
793 * Build fragment header.
794 */
795 fh->nexthdr = nexthdr;
796 fh->reserved = 0;
f36d6ab1 797 if (!frag_id) {
87c48fa3 798 ipv6_select_ident(fh, rt);
1da177e4
LT
799 frag_id = fh->identification;
800 } else
801 fh->identification = frag_id;
802
803 /*
804 * Copy a block of the IP datagram.
805 */
e3f0b86b
HS
806 BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag),
807 len));
1da177e4
LT
808 left -= len;
809
810 fh->frag_off = htons(offset);
811 if (left > 0)
812 fh->frag_off |= htons(IP6_MF);
0660e03f
ACM
813 ipv6_hdr(frag)->payload_len = htons(frag->len -
814 sizeof(struct ipv6hdr));
1da177e4
LT
815
816 ptr += len;
817 offset += len;
818
819 /*
820 * Put this fragment into the sending queue.
821 */
1da177e4
LT
822 err = output(frag);
823 if (err)
824 goto fail;
dafee490 825
adf30907 826 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 827 IPSTATS_MIB_FRAGCREATES);
1da177e4 828 }
adf30907 829 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 830 IPSTATS_MIB_FRAGOKS);
808db80a 831 consume_skb(skb);
1da177e4
LT
832 return err;
833
834fail:
adf30907 835 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 836 IPSTATS_MIB_FRAGFAILS);
1ab1457c 837 kfree_skb(skb);
1da177e4
LT
838 return err;
839}
840
b71d1d42
ED
841static inline int ip6_rt_check(const struct rt6key *rt_key,
842 const struct in6_addr *fl_addr,
843 const struct in6_addr *addr_cache)
cf6b1982 844{
a02cec21
ED
845 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
846 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache));
cf6b1982
YH
847}
848
497c615a
HX
849static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
850 struct dst_entry *dst,
b71d1d42 851 const struct flowi6 *fl6)
1da177e4 852{
497c615a 853 struct ipv6_pinfo *np = inet6_sk(sk);
a963a37d 854 struct rt6_info *rt;
1da177e4 855
497c615a
HX
856 if (!dst)
857 goto out;
858
a963a37d
ED
859 if (dst->ops->family != AF_INET6) {
860 dst_release(dst);
861 return NULL;
862 }
863
864 rt = (struct rt6_info *)dst;
497c615a
HX
865 /* Yes, checking route validity in not connected
866 * case is not very simple. Take into account,
867 * that we do not support routing by source, TOS,
868 * and MSG_DONTROUTE --ANK (980726)
869 *
cf6b1982
YH
870 * 1. ip6_rt_check(): If route was host route,
871 * check that cached destination is current.
497c615a
HX
872 * If it is network route, we still may
873 * check its validity using saved pointer
874 * to the last used address: daddr_cache.
875 * We do not want to save whole address now,
876 * (because main consumer of this service
877 * is tcp, which has not this problem),
878 * so that the last trick works only on connected
879 * sockets.
880 * 2. oif also should be the same.
881 */
4c9483b2 882 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
8e1ef0a9 883#ifdef CONFIG_IPV6_SUBTREES
4c9483b2 884 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
8e1ef0a9 885#endif
4c9483b2 886 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
497c615a
HX
887 dst_release(dst);
888 dst = NULL;
1da177e4
LT
889 }
890
497c615a
HX
891out:
892 return dst;
893}
894
895static int ip6_dst_lookup_tail(struct sock *sk,
4c9483b2 896 struct dst_entry **dst, struct flowi6 *fl6)
497c615a 897{
3b1e0a65 898 struct net *net = sock_net(sk);
69cce1d1
DM
899#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
900 struct neighbour *n;
97cac082 901 struct rt6_info *rt;
69cce1d1
DM
902#endif
903 int err;
497c615a 904
1da177e4 905 if (*dst == NULL)
4c9483b2 906 *dst = ip6_route_output(net, sk, fl6);
1da177e4
LT
907
908 if ((err = (*dst)->error))
909 goto out_err_release;
910
4c9483b2 911 if (ipv6_addr_any(&fl6->saddr)) {
c3968a85
DW
912 struct rt6_info *rt = (struct rt6_info *) *dst;
913 err = ip6_route_get_saddr(net, rt, &fl6->daddr,
914 sk ? inet6_sk(sk)->srcprefs : 0,
915 &fl6->saddr);
44456d37 916 if (err)
1da177e4 917 goto out_err_release;
1da177e4
LT
918 }
919
95c385b4 920#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
e550dfb0
NH
921 /*
922 * Here if the dst entry we've looked up
923 * has a neighbour entry that is in the INCOMPLETE
924 * state and the src address from the flow is
925 * marked as OPTIMISTIC, we release the found
926 * dst entry and replace it instead with the
927 * dst entry of the nexthop router
928 */
c56bf6fe 929 rt = (struct rt6_info *) *dst;
707be1ff 930 rcu_read_lock_bh();
550bab42 931 n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt));
707be1ff
YH
932 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
933 rcu_read_unlock_bh();
934
935 if (err) {
e550dfb0 936 struct inet6_ifaddr *ifp;
4c9483b2 937 struct flowi6 fl_gw6;
e550dfb0
NH
938 int redirect;
939
4c9483b2 940 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
e550dfb0
NH
941 (*dst)->dev, 1);
942
943 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
944 if (ifp)
945 in6_ifa_put(ifp);
946
947 if (redirect) {
948 /*
949 * We need to get the dst entry for the
950 * default router instead
951 */
952 dst_release(*dst);
4c9483b2
DM
953 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
954 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
955 *dst = ip6_route_output(net, sk, &fl_gw6);
e550dfb0
NH
956 if ((err = (*dst)->error))
957 goto out_err_release;
95c385b4 958 }
e550dfb0 959 }
95c385b4
NH
960#endif
961
1da177e4
LT
962 return 0;
963
964out_err_release:
ca46f9c8 965 if (err == -ENETUNREACH)
5ac68e7c 966 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
967 dst_release(*dst);
968 *dst = NULL;
969 return err;
970}
34a0b3cd 971
497c615a
HX
972/**
973 * ip6_dst_lookup - perform route lookup on flow
974 * @sk: socket which provides route info
975 * @dst: pointer to dst_entry * for result
4c9483b2 976 * @fl6: flow to lookup
497c615a
HX
977 *
978 * This function performs a route lookup on the given flow.
979 *
980 * It returns zero on success, or a standard errno code on error.
981 */
4c9483b2 982int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6)
497c615a
HX
983{
984 *dst = NULL;
4c9483b2 985 return ip6_dst_lookup_tail(sk, dst, fl6);
497c615a 986}
3cf3dc6c
ACM
987EXPORT_SYMBOL_GPL(ip6_dst_lookup);
988
497c615a 989/**
68d0c6d3
DM
990 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
991 * @sk: socket which provides route info
4c9483b2 992 * @fl6: flow to lookup
68d0c6d3 993 * @final_dst: final destination address for ipsec lookup
68d0c6d3
DM
994 *
995 * This function performs a route lookup on the given flow.
996 *
997 * It returns a valid dst pointer on success, or a pointer encoded
998 * error code.
999 */
4c9483b2 1000struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
0e0d44ab 1001 const struct in6_addr *final_dst)
68d0c6d3
DM
1002{
1003 struct dst_entry *dst = NULL;
1004 int err;
1005
4c9483b2 1006 err = ip6_dst_lookup_tail(sk, &dst, fl6);
68d0c6d3
DM
1007 if (err)
1008 return ERR_PTR(err);
1009 if (final_dst)
4e3fd7a0 1010 fl6->daddr = *final_dst;
2774c131 1011
4c9483b2 1012 return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
68d0c6d3
DM
1013}
1014EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1015
1016/**
1017 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
497c615a 1018 * @sk: socket which provides the dst cache and route info
4c9483b2 1019 * @fl6: flow to lookup
68d0c6d3 1020 * @final_dst: final destination address for ipsec lookup
497c615a
HX
1021 *
1022 * This function performs a route lookup on the given flow with the
1023 * possibility of using the cached route in the socket if it is valid.
1024 * It will take the socket dst lock when operating on the dst cache.
1025 * As a result, this function can only be used in process context.
1026 *
68d0c6d3
DM
1027 * It returns a valid dst pointer on success, or a pointer encoded
1028 * error code.
497c615a 1029 */
4c9483b2 1030struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
0e0d44ab 1031 const struct in6_addr *final_dst)
497c615a 1032{
68d0c6d3
DM
1033 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1034 int err;
497c615a 1035
4c9483b2 1036 dst = ip6_sk_dst_check(sk, dst, fl6);
68d0c6d3 1037
4c9483b2 1038 err = ip6_dst_lookup_tail(sk, &dst, fl6);
68d0c6d3
DM
1039 if (err)
1040 return ERR_PTR(err);
1041 if (final_dst)
4e3fd7a0 1042 fl6->daddr = *final_dst;
2774c131 1043
4c9483b2 1044 return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
497c615a 1045}
68d0c6d3 1046EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
497c615a 1047
34a0b3cd 1048static inline int ip6_ufo_append_data(struct sock *sk,
e89e9cf5
AR
1049 int getfrag(void *from, char *to, int offset, int len,
1050 int odd, struct sk_buff *skb),
1051 void *from, int length, int hh_len, int fragheaderlen,
87c48fa3
ED
1052 int transhdrlen, int mtu,unsigned int flags,
1053 struct rt6_info *rt)
e89e9cf5
AR
1054
1055{
1056 struct sk_buff *skb;
c547dbf5 1057 struct frag_hdr fhdr;
e89e9cf5
AR
1058 int err;
1059
1060 /* There is support for UDP large send offload by network
1061 * device, so create one single skb packet containing complete
1062 * udp datagram
1063 */
1064 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1065 skb = sock_alloc_send_skb(sk,
1066 hh_len + fragheaderlen + transhdrlen + 20,
1067 (flags & MSG_DONTWAIT), &err);
1068 if (skb == NULL)
504744e4 1069 return err;
e89e9cf5
AR
1070
1071 /* reserve space for Hardware header */
1072 skb_reserve(skb, hh_len);
1073
1074 /* create space for UDP/IP header */
1075 skb_put(skb,fragheaderlen + transhdrlen);
1076
1077 /* initialize network header pointer */
c1d2bbe1 1078 skb_reset_network_header(skb);
e89e9cf5
AR
1079
1080 /* initialize protocol header pointer */
b0e380b1 1081 skb->transport_header = skb->network_header + fragheaderlen;
e89e9cf5 1082
9c9c9ad5 1083 skb->protocol = htons(ETH_P_IPV6);
e89e9cf5 1084 skb->csum = 0;
e89e9cf5 1085
e89e9cf5 1086 __skb_queue_tail(&sk->sk_write_queue, skb);
c547dbf5
JP
1087 } else if (skb_is_gso(skb)) {
1088 goto append;
e89e9cf5 1089 }
e89e9cf5 1090
c547dbf5
JP
1091 skb->ip_summed = CHECKSUM_PARTIAL;
1092 /* Specify the length of each IPv6 datagram fragment.
1093 * It has to be a multiple of 8.
1094 */
1095 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1096 sizeof(struct frag_hdr)) & ~7;
1097 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1098 ipv6_select_ident(&fhdr, rt);
1099 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1100
1101append:
2811ebac
HFS
1102 return skb_append_datato_frags(sk, skb, getfrag, from,
1103 (length - transhdrlen));
e89e9cf5 1104}
1da177e4 1105
0178b695
HX
1106static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1107 gfp_t gfp)
1108{
1109 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1110}
1111
1112static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1113 gfp_t gfp)
1114{
1115 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1116}
1117
75a493e6 1118static void ip6_append_data_mtu(unsigned int *mtu,
0c183379
G
1119 int *maxfraglen,
1120 unsigned int fragheaderlen,
1121 struct sk_buff *skb,
75a493e6 1122 struct rt6_info *rt,
e367c2d0 1123 unsigned int orig_mtu)
0c183379
G
1124{
1125 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1126 if (skb == NULL) {
1127 /* first fragment, reserve header_len */
e367c2d0 1128 *mtu = orig_mtu - rt->dst.header_len;
0c183379
G
1129
1130 } else {
1131 /*
1132 * this fragment is not first, the headers
1133 * space is regarded as data space.
1134 */
e367c2d0 1135 *mtu = orig_mtu;
0c183379
G
1136 }
1137 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1138 + fragheaderlen - sizeof(struct frag_hdr);
1139 }
1140}
1141
41a1f8ea
YH
1142int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1143 int offset, int len, int odd, struct sk_buff *skb),
1144 void *from, int length, int transhdrlen,
4c9483b2 1145 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
13b52cd4 1146 struct rt6_info *rt, unsigned int flags, int dontfrag)
1da177e4
LT
1147{
1148 struct inet_sock *inet = inet_sk(sk);
1149 struct ipv6_pinfo *np = inet6_sk(sk);
bdc712b4 1150 struct inet_cork *cork;
0c183379 1151 struct sk_buff *skb, *skb_prev = NULL;
e367c2d0 1152 unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu;
1da177e4 1153 int exthdrlen;
299b0767 1154 int dst_exthdrlen;
1da177e4 1155 int hh_len;
1da177e4
LT
1156 int copy;
1157 int err;
1158 int offset = 0;
a693e698 1159 __u8 tx_flags = 0;
09c2d251 1160 u32 tskey = 0;
1da177e4
LT
1161
1162 if (flags&MSG_PROBE)
1163 return 0;
bdc712b4 1164 cork = &inet->cork.base;
1da177e4
LT
1165 if (skb_queue_empty(&sk->sk_write_queue)) {
1166 /*
1167 * setup for corking
1168 */
1169 if (opt) {
0178b695 1170 if (WARN_ON(np->cork.opt))
1da177e4 1171 return -EINVAL;
0178b695 1172
284041ef 1173 np->cork.opt = kzalloc(opt->tot_len, sk->sk_allocation);
0178b695
HX
1174 if (unlikely(np->cork.opt == NULL))
1175 return -ENOBUFS;
1176
1177 np->cork.opt->tot_len = opt->tot_len;
1178 np->cork.opt->opt_flen = opt->opt_flen;
1179 np->cork.opt->opt_nflen = opt->opt_nflen;
1180
1181 np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1182 sk->sk_allocation);
1183 if (opt->dst0opt && !np->cork.opt->dst0opt)
1184 return -ENOBUFS;
1185
1186 np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1187 sk->sk_allocation);
1188 if (opt->dst1opt && !np->cork.opt->dst1opt)
1189 return -ENOBUFS;
1190
1191 np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1192 sk->sk_allocation);
1193 if (opt->hopopt && !np->cork.opt->hopopt)
1194 return -ENOBUFS;
1195
1196 np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1197 sk->sk_allocation);
1198 if (opt->srcrt && !np->cork.opt->srcrt)
1199 return -ENOBUFS;
1200
1da177e4
LT
1201 /* need source address above miyazawa*/
1202 }
d8d1f30b 1203 dst_hold(&rt->dst);
bdc712b4 1204 cork->dst = &rt->dst;
4c9483b2 1205 inet->cork.fl.u.ip6 = *fl6;
1da177e4 1206 np->cork.hop_limit = hlimit;
41a1f8ea 1207 np->cork.tclass = tclass;
0c183379 1208 if (rt->dst.flags & DST_XFRM_TUNNEL)
93b36cf3 1209 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
0c183379
G
1210 rt->dst.dev->mtu : dst_mtu(&rt->dst);
1211 else
93b36cf3 1212 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
0c183379 1213 rt->dst.dev->mtu : dst_mtu(rt->dst.path);
c7503609 1214 if (np->frag_size < mtu) {
d91675f9
YH
1215 if (np->frag_size)
1216 mtu = np->frag_size;
1217 }
bdc712b4 1218 cork->fragsize = mtu;
d8d1f30b 1219 if (dst_allfrag(rt->dst.path))
bdc712b4
DM
1220 cork->flags |= IPCORK_ALLFRAG;
1221 cork->length = 0;
7efdba5b 1222 exthdrlen = (opt ? opt->opt_flen : 0);
1da177e4
LT
1223 length += exthdrlen;
1224 transhdrlen += exthdrlen;
7efdba5b 1225 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1da177e4 1226 } else {
bdc712b4 1227 rt = (struct rt6_info *)cork->dst;
4c9483b2 1228 fl6 = &inet->cork.fl.u.ip6;
0178b695 1229 opt = np->cork.opt;
1da177e4
LT
1230 transhdrlen = 0;
1231 exthdrlen = 0;
299b0767 1232 dst_exthdrlen = 0;
bdc712b4 1233 mtu = cork->fragsize;
1da177e4 1234 }
e367c2d0 1235 orig_mtu = mtu;
1da177e4 1236
d8d1f30b 1237 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1da177e4 1238
a1b05140 1239 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
b4ce9277 1240 (opt ? opt->opt_nflen : 0);
4df98e76
HFS
1241 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1242 sizeof(struct frag_hdr);
1da177e4
LT
1243
1244 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
4df98e76
HFS
1245 unsigned int maxnonfragsize, headersize;
1246
1247 headersize = sizeof(struct ipv6hdr) +
3a1cebe7 1248 (opt ? opt->opt_flen + opt->opt_nflen : 0) +
4df98e76
HFS
1249 (dst_allfrag(&rt->dst) ?
1250 sizeof(struct frag_hdr) : 0) +
1251 rt->rt6i_nfheader_len;
1252
60ff7467 1253 if (ip6_sk_ignore_df(sk))
0b95227a
HFS
1254 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1255 else
1256 maxnonfragsize = mtu;
4df98e76
HFS
1257
1258 /* dontfrag active */
1259 if ((cork->length + length > mtu - headersize) && dontfrag &&
1260 (sk->sk_protocol == IPPROTO_UDP ||
1261 sk->sk_protocol == IPPROTO_RAW)) {
1262 ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1263 sizeof(struct ipv6hdr));
1264 goto emsgsize;
1265 }
1266
1267 if (cork->length + length > maxnonfragsize - headersize) {
1268emsgsize:
1269 ipv6_local_error(sk, EMSGSIZE, fl6,
1270 mtu - headersize +
1271 sizeof(struct ipv6hdr));
1da177e4
LT
1272 return -EMSGSIZE;
1273 }
1274 }
1275
09c2d251 1276 if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) {
bf84a010 1277 sock_tx_timestamp(sk, &tx_flags);
09c2d251
WB
1278 if (tx_flags & SKBTX_ANY_SW_TSTAMP &&
1279 sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1280 tskey = sk->sk_tskey++;
1281 }
a693e698 1282
1da177e4
LT
1283 /*
1284 * Let's try using as much space as possible.
1285 * Use MTU if total length of the message fits into the MTU.
1286 * Otherwise, we need to reserve fragment header and
1287 * fragment alignment (= 8-15 octects, in total).
1288 *
1289 * Note that we may need to "move" the data from the tail of
1ab1457c 1290 * of the buffer to the new fragment when we split
1da177e4
LT
1291 * the message.
1292 *
1ab1457c 1293 * FIXME: It may be fragmented into multiple chunks
1da177e4
LT
1294 * at once if non-fragmentable extension headers
1295 * are too large.
1ab1457c 1296 * --yoshfuji
1da177e4
LT
1297 */
1298
2811ebac
HFS
1299 skb = skb_peek_tail(&sk->sk_write_queue);
1300 cork->length += length;
1301 if (((length > mtu) ||
1302 (skb && skb_is_gso(skb))) &&
1303 (sk->sk_protocol == IPPROTO_UDP) &&
1304 (rt->dst.dev->features & NETIF_F_UFO)) {
1305 err = ip6_ufo_append_data(sk, getfrag, from, length,
1306 hh_len, fragheaderlen,
1307 transhdrlen, mtu, flags, rt);
1308 if (err)
1309 goto error;
1310 return 0;
e89e9cf5 1311 }
1da177e4 1312
2811ebac 1313 if (!skb)
1da177e4
LT
1314 goto alloc_new_skb;
1315
1316 while (length > 0) {
1317 /* Check if the remaining data fits into current packet. */
bdc712b4 1318 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1da177e4
LT
1319 if (copy < length)
1320 copy = maxfraglen - skb->len;
1321
1322 if (copy <= 0) {
1323 char *data;
1324 unsigned int datalen;
1325 unsigned int fraglen;
1326 unsigned int fraggap;
1327 unsigned int alloclen;
1da177e4 1328alloc_new_skb:
1da177e4 1329 /* There's no room in the current skb */
0c183379
G
1330 if (skb)
1331 fraggap = skb->len - maxfraglen;
1da177e4
LT
1332 else
1333 fraggap = 0;
0c183379
G
1334 /* update mtu and maxfraglen if necessary */
1335 if (skb == NULL || skb_prev == NULL)
1336 ip6_append_data_mtu(&mtu, &maxfraglen,
75a493e6 1337 fragheaderlen, skb, rt,
e367c2d0 1338 orig_mtu);
0c183379
G
1339
1340 skb_prev = skb;
1da177e4
LT
1341
1342 /*
1343 * If remaining data exceeds the mtu,
1344 * we know we need more fragment(s).
1345 */
1346 datalen = length + fraggap;
1da177e4 1347
0c183379
G
1348 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1349 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1da177e4 1350 if ((flags & MSG_MORE) &&
d8d1f30b 1351 !(rt->dst.dev->features&NETIF_F_SG))
1da177e4
LT
1352 alloclen = mtu;
1353 else
1354 alloclen = datalen + fragheaderlen;
1355
299b0767
SK
1356 alloclen += dst_exthdrlen;
1357
0c183379
G
1358 if (datalen != length + fraggap) {
1359 /*
1360 * this is not the last fragment, the trailer
1361 * space is regarded as data space.
1362 */
1363 datalen += rt->dst.trailer_len;
1364 }
1365
1366 alloclen += rt->dst.trailer_len;
1367 fraglen = datalen + fragheaderlen;
1da177e4
LT
1368
1369 /*
1370 * We just reserve space for fragment header.
1ab1457c 1371 * Note: this may be overallocation if the message
1da177e4
LT
1372 * (without MSG_MORE) fits into the MTU.
1373 */
1374 alloclen += sizeof(struct frag_hdr);
1375
1376 if (transhdrlen) {
1377 skb = sock_alloc_send_skb(sk,
1378 alloclen + hh_len,
1379 (flags & MSG_DONTWAIT), &err);
1380 } else {
1381 skb = NULL;
1382 if (atomic_read(&sk->sk_wmem_alloc) <=
1383 2 * sk->sk_sndbuf)
1384 skb = sock_wmalloc(sk,
1385 alloclen + hh_len, 1,
1386 sk->sk_allocation);
1387 if (unlikely(skb == NULL))
1388 err = -ENOBUFS;
1389 }
1390 if (skb == NULL)
1391 goto error;
1392 /*
1393 * Fill in the control structures
1394 */
9c9c9ad5 1395 skb->protocol = htons(ETH_P_IPV6);
d7f7c0ac 1396 skb->ip_summed = CHECKSUM_NONE;
1da177e4 1397 skb->csum = 0;
1f85851e
G
1398 /* reserve for fragmentation and ipsec header */
1399 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1400 dst_exthdrlen);
1da177e4 1401
11878b40
WB
1402 /* Only the initial fragment is time stamped */
1403 skb_shinfo(skb)->tx_flags = tx_flags;
1404 tx_flags = 0;
09c2d251
WB
1405 skb_shinfo(skb)->tskey = tskey;
1406 tskey = 0;
a693e698 1407
1da177e4
LT
1408 /*
1409 * Find where to start putting bytes
1410 */
1f85851e
G
1411 data = skb_put(skb, fraglen);
1412 skb_set_network_header(skb, exthdrlen);
1413 data += fragheaderlen;
b0e380b1
ACM
1414 skb->transport_header = (skb->network_header +
1415 fragheaderlen);
1da177e4
LT
1416 if (fraggap) {
1417 skb->csum = skb_copy_and_csum_bits(
1418 skb_prev, maxfraglen,
1419 data + transhdrlen, fraggap, 0);
1420 skb_prev->csum = csum_sub(skb_prev->csum,
1421 skb->csum);
1422 data += fraggap;
e9fa4f7b 1423 pskb_trim_unique(skb_prev, maxfraglen);
1da177e4
LT
1424 }
1425 copy = datalen - transhdrlen - fraggap;
299b0767 1426
1da177e4
LT
1427 if (copy < 0) {
1428 err = -EINVAL;
1429 kfree_skb(skb);
1430 goto error;
1431 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1432 err = -EFAULT;
1433 kfree_skb(skb);
1434 goto error;
1435 }
1436
1437 offset += copy;
1438 length -= datalen - fraggap;
1439 transhdrlen = 0;
1440 exthdrlen = 0;
299b0767 1441 dst_exthdrlen = 0;
1da177e4
LT
1442
1443 /*
1444 * Put the packet on the pending queue
1445 */
1446 __skb_queue_tail(&sk->sk_write_queue, skb);
1447 continue;
1448 }
1449
1450 if (copy > length)
1451 copy = length;
1452
d8d1f30b 1453 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1da177e4
LT
1454 unsigned int off;
1455
1456 off = skb->len;
1457 if (getfrag(from, skb_put(skb, copy),
1458 offset, copy, off, skb) < 0) {
1459 __skb_trim(skb, off);
1460 err = -EFAULT;
1461 goto error;
1462 }
1463 } else {
1464 int i = skb_shinfo(skb)->nr_frags;
5640f768 1465 struct page_frag *pfrag = sk_page_frag(sk);
1da177e4 1466
5640f768
ED
1467 err = -ENOMEM;
1468 if (!sk_page_frag_refill(sk, pfrag))
1da177e4 1469 goto error;
5640f768
ED
1470
1471 if (!skb_can_coalesce(skb, i, pfrag->page,
1472 pfrag->offset)) {
1473 err = -EMSGSIZE;
1474 if (i == MAX_SKB_FRAGS)
1475 goto error;
1476
1477 __skb_fill_page_desc(skb, i, pfrag->page,
1478 pfrag->offset, 0);
1479 skb_shinfo(skb)->nr_frags = ++i;
1480 get_page(pfrag->page);
1da177e4 1481 }
5640f768 1482 copy = min_t(int, copy, pfrag->size - pfrag->offset);
9e903e08 1483 if (getfrag(from,
5640f768
ED
1484 page_address(pfrag->page) + pfrag->offset,
1485 offset, copy, skb->len, skb) < 0)
1486 goto error_efault;
1487
1488 pfrag->offset += copy;
1489 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1da177e4
LT
1490 skb->len += copy;
1491 skb->data_len += copy;
f945fa7a
HX
1492 skb->truesize += copy;
1493 atomic_add(copy, &sk->sk_wmem_alloc);
1da177e4
LT
1494 }
1495 offset += copy;
1496 length -= copy;
1497 }
5640f768 1498
1da177e4 1499 return 0;
5640f768
ED
1500
1501error_efault:
1502 err = -EFAULT;
1da177e4 1503error:
bdc712b4 1504 cork->length -= length;
3bd653c8 1505 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1506 return err;
1507}
a495f836 1508EXPORT_SYMBOL_GPL(ip6_append_data);
1da177e4 1509
bf138862
PE
1510static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1511{
0178b695
HX
1512 if (np->cork.opt) {
1513 kfree(np->cork.opt->dst0opt);
1514 kfree(np->cork.opt->dst1opt);
1515 kfree(np->cork.opt->hopopt);
1516 kfree(np->cork.opt->srcrt);
1517 kfree(np->cork.opt);
1518 np->cork.opt = NULL;
1519 }
1520
bdc712b4
DM
1521 if (inet->cork.base.dst) {
1522 dst_release(inet->cork.base.dst);
1523 inet->cork.base.dst = NULL;
1524 inet->cork.base.flags &= ~IPCORK_ALLFRAG;
bf138862
PE
1525 }
1526 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1527}
1528
1da177e4
LT
1529int ip6_push_pending_frames(struct sock *sk)
1530{
1531 struct sk_buff *skb, *tmp_skb;
1532 struct sk_buff **tail_skb;
1533 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1534 struct inet_sock *inet = inet_sk(sk);
1535 struct ipv6_pinfo *np = inet6_sk(sk);
3bd653c8 1536 struct net *net = sock_net(sk);
1da177e4
LT
1537 struct ipv6hdr *hdr;
1538 struct ipv6_txoptions *opt = np->cork.opt;
bdc712b4 1539 struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst;
4c9483b2
DM
1540 struct flowi6 *fl6 = &inet->cork.fl.u.ip6;
1541 unsigned char proto = fl6->flowi6_proto;
1da177e4
LT
1542 int err = 0;
1543
1544 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1545 goto out;
1546 tail_skb = &(skb_shinfo(skb)->frag_list);
1547
1548 /* move skb->data to ip header from ext header */
d56f90a7 1549 if (skb->data < skb_network_header(skb))
bbe735e4 1550 __skb_pull(skb, skb_network_offset(skb));
1da177e4 1551 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
cfe1fc77 1552 __skb_pull(tmp_skb, skb_network_header_len(skb));
1da177e4
LT
1553 *tail_skb = tmp_skb;
1554 tail_skb = &(tmp_skb->next);
1555 skb->len += tmp_skb->len;
1556 skb->data_len += tmp_skb->len;
1da177e4 1557 skb->truesize += tmp_skb->truesize;
1da177e4
LT
1558 tmp_skb->destructor = NULL;
1559 tmp_skb->sk = NULL;
1da177e4
LT
1560 }
1561
28a89453 1562 /* Allow local fragmentation. */
60ff7467 1563 skb->ignore_df = ip6_sk_ignore_df(sk);
28a89453 1564
4e3fd7a0 1565 *final_dst = fl6->daddr;
cfe1fc77 1566 __skb_pull(skb, skb_network_header_len(skb));
1da177e4
LT
1567 if (opt && opt->opt_flen)
1568 ipv6_push_frag_opts(skb, opt, &proto);
1569 if (opt && opt->opt_nflen)
1570 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1571
e2d1bca7
ACM
1572 skb_push(skb, sizeof(struct ipv6hdr));
1573 skb_reset_network_header(skb);
0660e03f 1574 hdr = ipv6_hdr(skb);
1ab1457c 1575
cb1ce2ef
TH
1576 ip6_flow_hdr(hdr, np->cork.tclass,
1577 ip6_make_flowlabel(net, skb, fl6->flowlabel,
1578 np->autoflowlabel));
1da177e4
LT
1579 hdr->hop_limit = np->cork.hop_limit;
1580 hdr->nexthdr = proto;
4e3fd7a0
AD
1581 hdr->saddr = fl6->saddr;
1582 hdr->daddr = *final_dst;
1da177e4 1583
a2c2064f 1584 skb->priority = sk->sk_priority;
4a19ec58 1585 skb->mark = sk->sk_mark;
a2c2064f 1586
d8d1f30b 1587 skb_dst_set(skb, dst_clone(&rt->dst));
edf391ff 1588 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
14878f75 1589 if (proto == IPPROTO_ICMPV6) {
adf30907 1590 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
14878f75 1591
43a43b60
HFS
1592 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1593 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
14878f75
DS
1594 }
1595
ef76bc23 1596 err = ip6_local_out(skb);
1da177e4
LT
1597 if (err) {
1598 if (err > 0)
6ce9e7b5 1599 err = net_xmit_errno(err);
1da177e4
LT
1600 if (err)
1601 goto error;
1602 }
1603
1604out:
bf138862 1605 ip6_cork_release(inet, np);
1da177e4
LT
1606 return err;
1607error:
06254914 1608 IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1609 goto out;
1610}
a495f836 1611EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1da177e4
LT
1612
1613void ip6_flush_pending_frames(struct sock *sk)
1614{
1da177e4
LT
1615 struct sk_buff *skb;
1616
1617 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
adf30907
ED
1618 if (skb_dst(skb))
1619 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
e1f52208 1620 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1621 kfree_skb(skb);
1622 }
1623
bf138862 1624 ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1da177e4 1625}
a495f836 1626EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);