]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blame - net/ipv6/ip6_output.c
ipv6: avoid write to a possibly cloned skb
[mirror_ubuntu-zesty-kernel.git] / net / ipv6 / ip6_output.c
CommitLineData
1da177e4
LT
1/*
2 * IPv6 output functions
1ab1457c 3 * Linux INET6 implementation
1da177e4
LT
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * Based on linux/net/ipv4/ip_output.c
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 *
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
21 *
22 * H. von Brand : Added missing #include <linux/string.h>
67ba4152 23 * Imran Patel : frag id should be in NBO
1da177e4
LT
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
27 */
28
1da177e4 29#include <linux/errno.h>
ef76bc23 30#include <linux/kernel.h>
1da177e4
LT
31#include <linux/string.h>
32#include <linux/socket.h>
33#include <linux/net.h>
34#include <linux/netdevice.h>
35#include <linux/if_arp.h>
36#include <linux/in6.h>
37#include <linux/tcp.h>
38#include <linux/route.h>
b59f45d0 39#include <linux/module.h>
5a0e3ad6 40#include <linux/slab.h>
1da177e4 41
33b48679 42#include <linux/bpf-cgroup.h>
1da177e4
LT
43#include <linux/netfilter.h>
44#include <linux/netfilter_ipv6.h>
45
46#include <net/sock.h>
47#include <net/snmp.h>
48
49#include <net/ipv6.h>
50#include <net/ndisc.h>
51#include <net/protocol.h>
52#include <net/ip6_route.h>
53#include <net/addrconf.h>
54#include <net/rawv6.h>
55#include <net/icmp.h>
56#include <net/xfrm.h>
57#include <net/checksum.h>
7bc570c8 58#include <linux/mroute6.h>
ca254490 59#include <net/l3mdev.h>
14972cbd 60#include <net/lwtunnel.h>
1da177e4 61
7d8c6e39 62static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 63{
adf30907 64 struct dst_entry *dst = skb_dst(skb);
1da177e4 65 struct net_device *dev = dst->dev;
f6b72b62 66 struct neighbour *neigh;
6fd6ce20
YH
67 struct in6_addr *nexthop;
68 int ret;
1da177e4
LT
69
70 skb->protocol = htons(ETH_P_IPV6);
71 skb->dev = dev;
72
0660e03f 73 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
adf30907 74 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1da177e4 75
7026b1dd 76 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
78126c41 77 ((mroute6_socket(net, skb) &&
bd91b8bf 78 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
7bc570c8
YH
79 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
80 &ipv6_hdr(skb)->saddr))) {
1da177e4
LT
81 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
82
83 /* Do not check for IFF_ALLMULTI; multicast routing
84 is not supported in any case.
85 */
86 if (newskb)
b2e0b385 87 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
29a26a56 88 net, sk, newskb, NULL, newskb->dev,
95603e22 89 dev_loopback_xmit);
1da177e4 90
0660e03f 91 if (ipv6_hdr(skb)->hop_limit == 0) {
78126c41 92 IP6_INC_STATS(net, idev,
3bd653c8 93 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
94 kfree_skb(skb);
95 return 0;
96 }
97 }
98
78126c41 99 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
dd408515
HFS
100
101 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
102 IPV6_ADDR_SCOPE_NODELOCAL &&
103 !(dev->flags & IFF_LOOPBACK)) {
104 kfree_skb(skb);
105 return 0;
106 }
1da177e4
LT
107 }
108
14972cbd
RP
109 if (lwtunnel_xmit_redirect(dst->lwtstate)) {
110 int res = lwtunnel_xmit(skb);
111
112 if (res < 0 || res == LWTUNNEL_XMIT_DONE)
113 return res;
114 }
115
6fd6ce20 116 rcu_read_lock_bh();
2647a9b0 117 nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
6fd6ce20
YH
118 neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
119 if (unlikely(!neigh))
120 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
121 if (!IS_ERR(neigh)) {
122 ret = dst_neigh_output(dst, neigh, skb);
123 rcu_read_unlock_bh();
124 return ret;
125 }
126 rcu_read_unlock_bh();
05e3aa09 127
78126c41 128 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
9e508490
JE
129 kfree_skb(skb);
130 return -EINVAL;
1da177e4
LT
131}
132
0c4b51f0 133static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
9e508490 134{
33b48679
DM
135 int ret;
136
137 ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
138 if (ret) {
139 kfree_skb(skb);
140 return ret;
141 }
142
9e508490 143 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
9037c357
JP
144 dst_allfrag(skb_dst(skb)) ||
145 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
7d8c6e39 146 return ip6_fragment(net, sk, skb, ip6_finish_output2);
9e508490 147 else
7d8c6e39 148 return ip6_finish_output2(net, sk, skb);
9e508490
JE
149}
150
ede2059d 151int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 152{
9e508490 153 struct net_device *dev = skb_dst(skb)->dev;
adf30907 154 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
be10de0a 155
778d80be 156 if (unlikely(idev->cnf.disable_ipv6)) {
19a0644c 157 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
778d80be
YH
158 kfree_skb(skb);
159 return 0;
160 }
161
29a26a56
EB
162 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
163 net, sk, skb, NULL, dev,
9c6eb28a
JE
164 ip6_finish_output,
165 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
1da177e4
LT
166}
167
1da177e4 168/*
1c1e9d2b
ED
169 * xmit an sk_buff (used by TCP, SCTP and DCCP)
170 * Note : socket lock is not held for SYNACK packets, but might be modified
171 * by calls to skb_set_owner_w() and ipv6_local_error(),
172 * which are using proper atomic operations or spinlocks.
1da177e4 173 */
1c1e9d2b 174int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
92e55f41 175 __u32 mark, struct ipv6_txoptions *opt, int tclass)
1da177e4 176{
3bd653c8 177 struct net *net = sock_net(sk);
1c1e9d2b 178 const struct ipv6_pinfo *np = inet6_sk(sk);
4c9483b2 179 struct in6_addr *first_hop = &fl6->daddr;
adf30907 180 struct dst_entry *dst = skb_dst(skb);
1da177e4 181 struct ipv6hdr *hdr;
4c9483b2 182 u8 proto = fl6->flowi6_proto;
1da177e4 183 int seg_len = skb->len;
e651f03a 184 int hlimit = -1;
1da177e4
LT
185 u32 mtu;
186
187 if (opt) {
c2636b4d 188 unsigned int head_room;
1da177e4
LT
189
190 /* First: exthdrs may take lots of space (~8K for now)
191 MAX_HEADER is not enough.
192 */
193 head_room = opt->opt_nflen + opt->opt_flen;
194 seg_len += head_room;
195 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
196
197 if (skb_headroom(skb) < head_room) {
198 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
63159f29 199 if (!skb2) {
adf30907 200 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d
YH
201 IPSTATS_MIB_OUTDISCARDS);
202 kfree_skb(skb);
1da177e4
LT
203 return -ENOBUFS;
204 }
808db80a 205 consume_skb(skb);
a11d206d 206 skb = skb2;
1c1e9d2b
ED
207 /* skb_set_owner_w() changes sk->sk_wmem_alloc atomically,
208 * it is safe to call in our context (socket lock not held)
209 */
210 skb_set_owner_w(skb, (struct sock *)sk);
1da177e4
LT
211 }
212 if (opt->opt_flen)
213 ipv6_push_frag_opts(skb, opt, &proto);
214 if (opt->opt_nflen)
613fa3ca
DL
215 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
216 &fl6->saddr);
1da177e4
LT
217 }
218
e2d1bca7
ACM
219 skb_push(skb, sizeof(struct ipv6hdr));
220 skb_reset_network_header(skb);
0660e03f 221 hdr = ipv6_hdr(skb);
1da177e4
LT
222
223 /*
224 * Fill in the IPv6 header
225 */
b903d324 226 if (np)
1da177e4
LT
227 hlimit = np->hop_limit;
228 if (hlimit < 0)
6b75d090 229 hlimit = ip6_dst_hoplimit(dst);
1da177e4 230
cb1ce2ef 231 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
67800f9b 232 np->autoflowlabel, fl6));
41a1f8ea 233
1da177e4
LT
234 hdr->payload_len = htons(seg_len);
235 hdr->nexthdr = proto;
236 hdr->hop_limit = hlimit;
237
4e3fd7a0
AD
238 hdr->saddr = fl6->saddr;
239 hdr->daddr = *first_hop;
1da177e4 240
9c9c9ad5 241 skb->protocol = htons(ETH_P_IPV6);
a2c2064f 242 skb->priority = sk->sk_priority;
92e55f41 243 skb->mark = mark;
a2c2064f 244
1da177e4 245 mtu = dst_mtu(dst);
60ff7467 246 if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
adf30907 247 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
edf391ff 248 IPSTATS_MIB_OUT, skb->len);
a8e3e1a9
DA
249
250 /* if egress device is enslaved to an L3 master device pass the
251 * skb to its handler for processing
252 */
253 skb = l3mdev_ip6_out((struct sock *)sk, skb);
254 if (unlikely(!skb))
255 return 0;
256
1c1e9d2b
ED
257 /* hooks should never assume socket lock is held.
258 * we promote our socket to non const
259 */
29a26a56 260 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
1c1e9d2b 261 net, (struct sock *)sk, skb, NULL, dst->dev,
13206b6b 262 dst_output);
1da177e4
LT
263 }
264
1da177e4 265 skb->dev = dst->dev;
1c1e9d2b
ED
266 /* ipv6_local_error() does not require socket lock,
267 * we promote our socket to non const
268 */
269 ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
270
adf30907 271 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
272 kfree_skb(skb);
273 return -EMSGSIZE;
274}
7159039a
YH
275EXPORT_SYMBOL(ip6_xmit);
276
1da177e4
LT
277static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
278{
279 struct ip6_ra_chain *ra;
280 struct sock *last = NULL;
281
282 read_lock(&ip6_ra_lock);
283 for (ra = ip6_ra_chain; ra; ra = ra->next) {
284 struct sock *sk = ra->sk;
0bd1b59b
AM
285 if (sk && ra->sel == sel &&
286 (!sk->sk_bound_dev_if ||
287 sk->sk_bound_dev_if == skb->dev->ifindex)) {
1da177e4
LT
288 if (last) {
289 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
290 if (skb2)
291 rawv6_rcv(last, skb2);
292 }
293 last = sk;
294 }
295 }
296
297 if (last) {
298 rawv6_rcv(last, skb);
299 read_unlock(&ip6_ra_lock);
300 return 1;
301 }
302 read_unlock(&ip6_ra_lock);
303 return 0;
304}
305
e21e0b5f
VN
306static int ip6_forward_proxy_check(struct sk_buff *skb)
307{
0660e03f 308 struct ipv6hdr *hdr = ipv6_hdr(skb);
e21e0b5f 309 u8 nexthdr = hdr->nexthdr;
75f2811c 310 __be16 frag_off;
e21e0b5f
VN
311 int offset;
312
313 if (ipv6_ext_hdr(nexthdr)) {
75f2811c 314 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
e21e0b5f
VN
315 if (offset < 0)
316 return 0;
317 } else
318 offset = sizeof(struct ipv6hdr);
319
320 if (nexthdr == IPPROTO_ICMPV6) {
321 struct icmp6hdr *icmp6;
322
d56f90a7
ACM
323 if (!pskb_may_pull(skb, (skb_network_header(skb) +
324 offset + 1 - skb->data)))
e21e0b5f
VN
325 return 0;
326
d56f90a7 327 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
e21e0b5f
VN
328
329 switch (icmp6->icmp6_type) {
330 case NDISC_ROUTER_SOLICITATION:
331 case NDISC_ROUTER_ADVERTISEMENT:
332 case NDISC_NEIGHBOUR_SOLICITATION:
333 case NDISC_NEIGHBOUR_ADVERTISEMENT:
334 case NDISC_REDIRECT:
335 /* For reaction involving unicast neighbor discovery
336 * message destined to the proxied address, pass it to
337 * input function.
338 */
339 return 1;
340 default:
341 break;
342 }
343 }
344
74553b09
VN
345 /*
346 * The proxying router can't forward traffic sent to a link-local
347 * address, so signal the sender and discard the packet. This
348 * behavior is clarified by the MIPv6 specification.
349 */
350 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
351 dst_link_failure(skb);
352 return -1;
353 }
354
e21e0b5f
VN
355 return 0;
356}
357
0c4b51f0
EB
358static inline int ip6_forward_finish(struct net *net, struct sock *sk,
359 struct sk_buff *skb)
1da177e4 360{
13206b6b 361 return dst_output(net, sk, skb);
1da177e4
LT
362}
363
0954cf9c
HFS
364static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
365{
366 unsigned int mtu;
367 struct inet6_dev *idev;
368
369 if (dst_metric_locked(dst, RTAX_MTU)) {
370 mtu = dst_metric_raw(dst, RTAX_MTU);
371 if (mtu)
372 return mtu;
373 }
374
375 mtu = IPV6_MIN_MTU;
376 rcu_read_lock();
377 idev = __in6_dev_get(dst->dev);
378 if (idev)
379 mtu = idev->cnf.mtu6;
380 rcu_read_unlock();
381
382 return mtu;
383}
384
fe6cc55f
FW
385static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
386{
418a3156 387 if (skb->len <= mtu)
fe6cc55f
FW
388 return false;
389
60ff7467 390 /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
fe6cc55f
FW
391 if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
392 return true;
393
60ff7467 394 if (skb->ignore_df)
418a3156
FW
395 return false;
396
ae7ef81e 397 if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
fe6cc55f
FW
398 return false;
399
400 return true;
401}
402
1da177e4
LT
403int ip6_forward(struct sk_buff *skb)
404{
adf30907 405 struct dst_entry *dst = skb_dst(skb);
0660e03f 406 struct ipv6hdr *hdr = ipv6_hdr(skb);
1da177e4 407 struct inet6_skb_parm *opt = IP6CB(skb);
c346dca1 408 struct net *net = dev_net(dst->dev);
14f3ad6f 409 u32 mtu;
1ab1457c 410
53b7997f 411 if (net->ipv6.devconf_all->forwarding == 0)
1da177e4
LT
412 goto error;
413
090f1166
LR
414 if (skb->pkt_type != PACKET_HOST)
415 goto drop;
416
9ef2e965
HFS
417 if (unlikely(skb->sk))
418 goto drop;
419
4497b076
BH
420 if (skb_warn_if_lro(skb))
421 goto drop;
422
1da177e4 423 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
1d015503
ED
424 __IP6_INC_STATS(net, ip6_dst_idev(dst),
425 IPSTATS_MIB_INDISCARDS);
1da177e4
LT
426 goto drop;
427 }
428
35fc92a9 429 skb_forward_csum(skb);
1da177e4
LT
430
431 /*
432 * We DO NOT make any processing on
433 * RA packets, pushing them to user level AS IS
434 * without ane WARRANTY that application will be able
435 * to interpret them. The reason is that we
436 * cannot make anything clever here.
437 *
438 * We are not end-node, so that if packet contains
439 * AH/ESP, we cannot make anything.
440 * Defragmentation also would be mistake, RA packets
441 * cannot be fragmented, because there is no warranty
442 * that different fragments will go along one path. --ANK
443 */
ab4eb353
YH
444 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
445 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
1da177e4
LT
446 return 0;
447 }
448
449 /*
450 * check and decrement ttl
451 */
452 if (hdr->hop_limit <= 1) {
453 /* Force OUTPUT device used as source address */
454 skb->dev = dst->dev;
3ffe533c 455 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
1d015503
ED
456 __IP6_INC_STATS(net, ip6_dst_idev(dst),
457 IPSTATS_MIB_INHDRERRORS);
1da177e4
LT
458
459 kfree_skb(skb);
460 return -ETIMEDOUT;
461 }
462
fbea49e1 463 /* XXX: idev->cnf.proxy_ndp? */
53b7997f 464 if (net->ipv6.devconf_all->proxy_ndp &&
8a3edd80 465 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
74553b09
VN
466 int proxied = ip6_forward_proxy_check(skb);
467 if (proxied > 0)
e21e0b5f 468 return ip6_input(skb);
74553b09 469 else if (proxied < 0) {
1d015503
ED
470 __IP6_INC_STATS(net, ip6_dst_idev(dst),
471 IPSTATS_MIB_INDISCARDS);
74553b09
VN
472 goto drop;
473 }
e21e0b5f
VN
474 }
475
1da177e4 476 if (!xfrm6_route_forward(skb)) {
1d015503
ED
477 __IP6_INC_STATS(net, ip6_dst_idev(dst),
478 IPSTATS_MIB_INDISCARDS);
1da177e4
LT
479 goto drop;
480 }
adf30907 481 dst = skb_dst(skb);
1da177e4
LT
482
483 /* IPv6 specs say nothing about it, but it is clear that we cannot
484 send redirects to source routed frames.
1e5dc146 485 We don't send redirects to frames decapsulated from IPsec.
1da177e4 486 */
c45a3dfb 487 if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
1da177e4 488 struct in6_addr *target = NULL;
fbfe95a4 489 struct inet_peer *peer;
1da177e4 490 struct rt6_info *rt;
1da177e4
LT
491
492 /*
493 * incoming and outgoing devices are the same
494 * send a redirect.
495 */
496
497 rt = (struct rt6_info *) dst;
c45a3dfb
DM
498 if (rt->rt6i_flags & RTF_GATEWAY)
499 target = &rt->rt6i_gateway;
1da177e4
LT
500 else
501 target = &hdr->daddr;
502
fd0273d7 503 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
92d86829 504
1da177e4
LT
505 /* Limit redirects both by destination (here)
506 and by source (inside ndisc_send_redirect)
507 */
fbfe95a4 508 if (inet_peer_xrlim_allow(peer, 1*HZ))
4991969a 509 ndisc_send_redirect(skb, target);
1d861aa4
DM
510 if (peer)
511 inet_putpeer(peer);
5bb1ab09
DS
512 } else {
513 int addrtype = ipv6_addr_type(&hdr->saddr);
514
1da177e4 515 /* This check is security critical. */
f81b2e7d
YH
516 if (addrtype == IPV6_ADDR_ANY ||
517 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
5bb1ab09
DS
518 goto error;
519 if (addrtype & IPV6_ADDR_LINKLOCAL) {
520 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
3ffe533c 521 ICMPV6_NOT_NEIGHBOUR, 0);
5bb1ab09
DS
522 goto error;
523 }
1da177e4
LT
524 }
525
0954cf9c 526 mtu = ip6_dst_mtu_forward(dst);
14f3ad6f
UW
527 if (mtu < IPV6_MIN_MTU)
528 mtu = IPV6_MIN_MTU;
529
fe6cc55f 530 if (ip6_pkt_too_big(skb, mtu)) {
1da177e4
LT
531 /* Again, force OUTPUT device used as source address */
532 skb->dev = dst->dev;
14f3ad6f 533 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1d015503
ED
534 __IP6_INC_STATS(net, ip6_dst_idev(dst),
535 IPSTATS_MIB_INTOOBIGERRORS);
536 __IP6_INC_STATS(net, ip6_dst_idev(dst),
537 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
538 kfree_skb(skb);
539 return -EMSGSIZE;
540 }
541
542 if (skb_cow(skb, dst->dev->hard_header_len)) {
1d015503
ED
543 __IP6_INC_STATS(net, ip6_dst_idev(dst),
544 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
545 goto drop;
546 }
547
0660e03f 548 hdr = ipv6_hdr(skb);
1da177e4
LT
549
550 /* Mangling hops number delayed to point after skb COW */
1ab1457c 551
1da177e4
LT
552 hdr->hop_limit--;
553
1d015503
ED
554 __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
555 __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
29a26a56
EB
556 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
557 net, NULL, skb, skb->dev, dst->dev,
6e23ae2a 558 ip6_forward_finish);
1da177e4
LT
559
560error:
1d015503 561 __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
1da177e4
LT
562drop:
563 kfree_skb(skb);
564 return -EINVAL;
565}
566
567static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
568{
569 to->pkt_type = from->pkt_type;
570 to->priority = from->priority;
571 to->protocol = from->protocol;
adf30907
ED
572 skb_dst_drop(to);
573 skb_dst_set(to, dst_clone(skb_dst(from)));
1da177e4 574 to->dev = from->dev;
82e91ffe 575 to->mark = from->mark;
1da177e4
LT
576
577#ifdef CONFIG_NET_SCHED
578 to->tc_index = from->tc_index;
579#endif
e7ac05f3 580 nf_copy(to, from);
984bc16c 581 skb_copy_secmark(to, from);
1da177e4
LT
582}
583
7d8c6e39
EB
584int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
585 int (*output)(struct net *, struct sock *, struct sk_buff *))
1da177e4 586{
1da177e4 587 struct sk_buff *frag;
67ba4152 588 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
f60e5990 589 struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
590 inet6_sk(skb->sk) : NULL;
1da177e4
LT
591 struct ipv6hdr *tmp_hdr;
592 struct frag_hdr *fh;
593 unsigned int mtu, hlen, left, len;
a7ae1992 594 int hroom, troom;
286c2349 595 __be32 frag_id;
67ba4152 596 int ptr, offset = 0, err = 0;
1da177e4
LT
597 u8 *prevhdr, nexthdr = 0;
598
1da177e4
LT
599 hlen = ip6_find_1stfragopt(skb, &prevhdr);
600 nexthdr = *prevhdr;
601
628a5c56 602 mtu = ip6_skb_dst_mtu(skb);
b881ef76
JH
603
604 /* We must not fragment if the socket is set to force MTU discovery
14f3ad6f 605 * or if the skb it not generated by a local socket.
b881ef76 606 */
485fca66
FW
607 if (unlikely(!skb->ignore_df && skb->len > mtu))
608 goto fail_toobig;
a34a101e 609
485fca66
FW
610 if (IP6CB(skb)->frag_max_size) {
611 if (IP6CB(skb)->frag_max_size > mtu)
612 goto fail_toobig;
613
614 /* don't send fragments larger than what we received */
615 mtu = IP6CB(skb)->frag_max_size;
616 if (mtu < IPV6_MIN_MTU)
617 mtu = IPV6_MIN_MTU;
b881ef76
JH
618 }
619
d91675f9
YH
620 if (np && np->frag_size < mtu) {
621 if (np->frag_size)
622 mtu = np->frag_size;
623 }
89bc7848 624 if (mtu < hlen + sizeof(struct frag_hdr) + 8)
b72a2b01 625 goto fail_toobig;
1e0d69a9 626 mtu -= hlen + sizeof(struct frag_hdr);
1da177e4 627
fd0273d7
MKL
628 frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
629 &ipv6_hdr(skb)->saddr);
286c2349 630
405c92f7
HFS
631 if (skb->ip_summed == CHECKSUM_PARTIAL &&
632 (err = skb_checksum_help(skb)))
633 goto fail;
634
1d325d21 635 hroom = LL_RESERVED_SPACE(rt->dst.dev);
21dc3301 636 if (skb_has_frag_list(skb)) {
c72d8cda 637 unsigned int first_len = skb_pagelen(skb);
3d13008e 638 struct sk_buff *frag2;
1da177e4
LT
639
640 if (first_len - hlen > mtu ||
641 ((first_len - hlen) & 7) ||
1d325d21
FW
642 skb_cloned(skb) ||
643 skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
1da177e4
LT
644 goto slow_path;
645
4d9092bb 646 skb_walk_frags(skb, frag) {
1da177e4
LT
647 /* Correct geometry. */
648 if (frag->len > mtu ||
649 ((frag->len & 7) && frag->next) ||
1d325d21 650 skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
3d13008e 651 goto slow_path_clean;
1da177e4 652
1da177e4
LT
653 /* Partially cloned skb? */
654 if (skb_shared(frag))
3d13008e 655 goto slow_path_clean;
2fdba6b0
HX
656
657 BUG_ON(frag->sk);
658 if (skb->sk) {
2fdba6b0
HX
659 frag->sk = skb->sk;
660 frag->destructor = sock_wfree;
2fdba6b0 661 }
3d13008e 662 skb->truesize -= frag->truesize;
1da177e4
LT
663 }
664
665 err = 0;
666 offset = 0;
1da177e4
LT
667 /* BUILD HEADER */
668
9a217a1c 669 *prevhdr = NEXTHDR_FRAGMENT;
d56f90a7 670 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
1da177e4 671 if (!tmp_hdr) {
adf30907 672 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 673 IPSTATS_MIB_FRAGFAILS);
1d325d21
FW
674 err = -ENOMEM;
675 goto fail;
1da177e4 676 }
1d325d21
FW
677 frag = skb_shinfo(skb)->frag_list;
678 skb_frag_list_init(skb);
1da177e4 679
1da177e4 680 __skb_pull(skb, hlen);
67ba4152 681 fh = (struct frag_hdr *)__skb_push(skb, sizeof(struct frag_hdr));
e2d1bca7
ACM
682 __skb_push(skb, hlen);
683 skb_reset_network_header(skb);
d56f90a7 684 memcpy(skb_network_header(skb), tmp_hdr, hlen);
1da177e4 685
1da177e4
LT
686 fh->nexthdr = nexthdr;
687 fh->reserved = 0;
688 fh->frag_off = htons(IP6_MF);
286c2349 689 fh->identification = frag_id;
1da177e4
LT
690
691 first_len = skb_pagelen(skb);
692 skb->data_len = first_len - skb_headlen(skb);
693 skb->len = first_len;
0660e03f
ACM
694 ipv6_hdr(skb)->payload_len = htons(first_len -
695 sizeof(struct ipv6hdr));
a11d206d 696
d8d1f30b 697 dst_hold(&rt->dst);
1da177e4
LT
698
699 for (;;) {
700 /* Prepare header of the next frame,
701 * before previous one went down. */
702 if (frag) {
703 frag->ip_summed = CHECKSUM_NONE;
badff6d0 704 skb_reset_transport_header(frag);
67ba4152 705 fh = (struct frag_hdr *)__skb_push(frag, sizeof(struct frag_hdr));
e2d1bca7
ACM
706 __skb_push(frag, hlen);
707 skb_reset_network_header(frag);
d56f90a7
ACM
708 memcpy(skb_network_header(frag), tmp_hdr,
709 hlen);
1da177e4
LT
710 offset += skb->len - hlen - sizeof(struct frag_hdr);
711 fh->nexthdr = nexthdr;
712 fh->reserved = 0;
713 fh->frag_off = htons(offset);
53b24b8f 714 if (frag->next)
1da177e4
LT
715 fh->frag_off |= htons(IP6_MF);
716 fh->identification = frag_id;
0660e03f
ACM
717 ipv6_hdr(frag)->payload_len =
718 htons(frag->len -
719 sizeof(struct ipv6hdr));
1da177e4
LT
720 ip6_copy_metadata(frag, skb);
721 }
1ab1457c 722
7d8c6e39 723 err = output(net, sk, skb);
67ba4152 724 if (!err)
d8d1f30b 725 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 726 IPSTATS_MIB_FRAGCREATES);
dafee490 727
1da177e4
LT
728 if (err || !frag)
729 break;
730
731 skb = frag;
732 frag = skb->next;
733 skb->next = NULL;
734 }
735
a51482bd 736 kfree(tmp_hdr);
1da177e4
LT
737
738 if (err == 0) {
d8d1f30b 739 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 740 IPSTATS_MIB_FRAGOKS);
94e187c0 741 ip6_rt_put(rt);
1da177e4
LT
742 return 0;
743 }
744
46cfd725 745 kfree_skb_list(frag);
1da177e4 746
d8d1f30b 747 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 748 IPSTATS_MIB_FRAGFAILS);
94e187c0 749 ip6_rt_put(rt);
1da177e4 750 return err;
3d13008e
ED
751
752slow_path_clean:
753 skb_walk_frags(skb, frag2) {
754 if (frag2 == frag)
755 break;
756 frag2->sk = NULL;
757 frag2->destructor = NULL;
758 skb->truesize += frag2->truesize;
759 }
1da177e4
LT
760 }
761
762slow_path:
763 left = skb->len - hlen; /* Space per frame */
764 ptr = hlen; /* Where to start from */
765
766 /*
767 * Fragment the datagram.
768 */
769
a7ae1992 770 troom = rt->dst.dev->needed_tailroom;
1da177e4
LT
771
772 /*
773 * Keep copying data until we run out.
774 */
67ba4152 775 while (left > 0) {
45197b72
FW
776 u8 *fragnexthdr_offset;
777
1da177e4
LT
778 len = left;
779 /* IF: it doesn't fit, use 'mtu' - the data space left */
780 if (len > mtu)
781 len = mtu;
25985edc 782 /* IF: we are not sending up to and including the packet end
1da177e4
LT
783 then align the next start on an eight byte boundary */
784 if (len < left) {
785 len &= ~7;
786 }
1da177e4 787
cbffccc9
JP
788 /* Allocate buffer */
789 frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
790 hroom + troom, GFP_ATOMIC);
791 if (!frag) {
adf30907 792 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 793 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
794 err = -ENOMEM;
795 goto fail;
796 }
797
798 /*
799 * Set up data on packet
800 */
801
802 ip6_copy_metadata(frag, skb);
a7ae1992 803 skb_reserve(frag, hroom);
1da177e4 804 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
c1d2bbe1 805 skb_reset_network_header(frag);
badff6d0 806 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
b0e380b1
ACM
807 frag->transport_header = (frag->network_header + hlen +
808 sizeof(struct frag_hdr));
1da177e4
LT
809
810 /*
811 * Charge the memory for the fragment to any owner
812 * it might possess
813 */
814 if (skb->sk)
815 skb_set_owner_w(frag, skb->sk);
816
817 /*
818 * Copy the packet header into the new buffer.
819 */
d626f62b 820 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
1da177e4 821
45197b72
FW
822 fragnexthdr_offset = skb_network_header(frag);
823 fragnexthdr_offset += prevhdr - skb_network_header(skb);
824 *fragnexthdr_offset = NEXTHDR_FRAGMENT;
825
1da177e4
LT
826 /*
827 * Build fragment header.
828 */
829 fh->nexthdr = nexthdr;
830 fh->reserved = 0;
286c2349 831 fh->identification = frag_id;
1da177e4
LT
832
833 /*
834 * Copy a block of the IP datagram.
835 */
e3f0b86b
HS
836 BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag),
837 len));
1da177e4
LT
838 left -= len;
839
840 fh->frag_off = htons(offset);
841 if (left > 0)
842 fh->frag_off |= htons(IP6_MF);
0660e03f
ACM
843 ipv6_hdr(frag)->payload_len = htons(frag->len -
844 sizeof(struct ipv6hdr));
1da177e4
LT
845
846 ptr += len;
847 offset += len;
848
849 /*
850 * Put this fragment into the sending queue.
851 */
7d8c6e39 852 err = output(net, sk, frag);
1da177e4
LT
853 if (err)
854 goto fail;
dafee490 855
adf30907 856 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 857 IPSTATS_MIB_FRAGCREATES);
1da177e4 858 }
adf30907 859 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 860 IPSTATS_MIB_FRAGOKS);
808db80a 861 consume_skb(skb);
1da177e4
LT
862 return err;
863
485fca66
FW
864fail_toobig:
865 if (skb->sk && dst_allfrag(skb_dst(skb)))
866 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
867
868 skb->dev = skb_dst(skb)->dev;
869 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
870 err = -EMSGSIZE;
871
1da177e4 872fail:
adf30907 873 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 874 IPSTATS_MIB_FRAGFAILS);
1ab1457c 875 kfree_skb(skb);
1da177e4
LT
876 return err;
877}
878
b71d1d42
ED
879static inline int ip6_rt_check(const struct rt6key *rt_key,
880 const struct in6_addr *fl_addr,
881 const struct in6_addr *addr_cache)
cf6b1982 882{
a02cec21 883 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
63159f29 884 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
cf6b1982
YH
885}
886
497c615a
HX
887static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
888 struct dst_entry *dst,
b71d1d42 889 const struct flowi6 *fl6)
1da177e4 890{
497c615a 891 struct ipv6_pinfo *np = inet6_sk(sk);
a963a37d 892 struct rt6_info *rt;
1da177e4 893
497c615a
HX
894 if (!dst)
895 goto out;
896
a963a37d
ED
897 if (dst->ops->family != AF_INET6) {
898 dst_release(dst);
899 return NULL;
900 }
901
902 rt = (struct rt6_info *)dst;
497c615a
HX
903 /* Yes, checking route validity in not connected
904 * case is not very simple. Take into account,
905 * that we do not support routing by source, TOS,
67ba4152 906 * and MSG_DONTROUTE --ANK (980726)
497c615a 907 *
cf6b1982
YH
908 * 1. ip6_rt_check(): If route was host route,
909 * check that cached destination is current.
497c615a
HX
910 * If it is network route, we still may
911 * check its validity using saved pointer
912 * to the last used address: daddr_cache.
913 * We do not want to save whole address now,
914 * (because main consumer of this service
915 * is tcp, which has not this problem),
916 * so that the last trick works only on connected
917 * sockets.
918 * 2. oif also should be the same.
919 */
4c9483b2 920 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
8e1ef0a9 921#ifdef CONFIG_IPV6_SUBTREES
4c9483b2 922 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
8e1ef0a9 923#endif
ca254490
DA
924 (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
925 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
497c615a
HX
926 dst_release(dst);
927 dst = NULL;
1da177e4
LT
928 }
929
497c615a
HX
930out:
931 return dst;
932}
933
3aef934f 934static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
4c9483b2 935 struct dst_entry **dst, struct flowi6 *fl6)
497c615a 936{
69cce1d1
DM
937#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
938 struct neighbour *n;
97cac082 939 struct rt6_info *rt;
69cce1d1
DM
940#endif
941 int err;
6f21c96a 942 int flags = 0;
497c615a 943
e16e888b
MS
944 /* The correct way to handle this would be to do
945 * ip6_route_get_saddr, and then ip6_route_output; however,
946 * the route-specific preferred source forces the
947 * ip6_route_output call _before_ ip6_route_get_saddr.
948 *
949 * In source specific routing (no src=any default route),
950 * ip6_route_output will fail given src=any saddr, though, so
951 * that's why we try it again later.
952 */
953 if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
954 struct rt6_info *rt;
955 bool had_dst = *dst != NULL;
1da177e4 956
e16e888b
MS
957 if (!had_dst)
958 *dst = ip6_route_output(net, sk, fl6);
959 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
c3968a85
DW
960 err = ip6_route_get_saddr(net, rt, &fl6->daddr,
961 sk ? inet6_sk(sk)->srcprefs : 0,
962 &fl6->saddr);
44456d37 963 if (err)
1da177e4 964 goto out_err_release;
e16e888b
MS
965
966 /* If we had an erroneous initial result, pretend it
967 * never existed and let the SA-enabled version take
968 * over.
969 */
970 if (!had_dst && (*dst)->error) {
971 dst_release(*dst);
972 *dst = NULL;
973 }
6f21c96a
PA
974
975 if (fl6->flowi6_oif)
976 flags |= RT6_LOOKUP_F_IFACE;
1da177e4
LT
977 }
978
e16e888b 979 if (!*dst)
6f21c96a 980 *dst = ip6_route_output_flags(net, sk, fl6, flags);
e16e888b
MS
981
982 err = (*dst)->error;
983 if (err)
984 goto out_err_release;
985
95c385b4 986#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
e550dfb0
NH
987 /*
988 * Here if the dst entry we've looked up
989 * has a neighbour entry that is in the INCOMPLETE
990 * state and the src address from the flow is
991 * marked as OPTIMISTIC, we release the found
992 * dst entry and replace it instead with the
993 * dst entry of the nexthop router
994 */
c56bf6fe 995 rt = (struct rt6_info *) *dst;
707be1ff 996 rcu_read_lock_bh();
2647a9b0
MKL
997 n = __ipv6_neigh_lookup_noref(rt->dst.dev,
998 rt6_nexthop(rt, &fl6->daddr));
707be1ff
YH
999 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
1000 rcu_read_unlock_bh();
1001
1002 if (err) {
e550dfb0 1003 struct inet6_ifaddr *ifp;
4c9483b2 1004 struct flowi6 fl_gw6;
e550dfb0
NH
1005 int redirect;
1006
4c9483b2 1007 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
e550dfb0
NH
1008 (*dst)->dev, 1);
1009
1010 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1011 if (ifp)
1012 in6_ifa_put(ifp);
1013
1014 if (redirect) {
1015 /*
1016 * We need to get the dst entry for the
1017 * default router instead
1018 */
1019 dst_release(*dst);
4c9483b2
DM
1020 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1021 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1022 *dst = ip6_route_output(net, sk, &fl_gw6);
e5d08d71
IM
1023 err = (*dst)->error;
1024 if (err)
e550dfb0 1025 goto out_err_release;
95c385b4 1026 }
e550dfb0 1027 }
95c385b4 1028#endif
ec5e3b0a 1029 if (ipv6_addr_v4mapped(&fl6->saddr) &&
00ea1cee
WB
1030 !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
1031 err = -EAFNOSUPPORT;
1032 goto out_err_release;
1033 }
95c385b4 1034
1da177e4
LT
1035 return 0;
1036
1037out_err_release:
1038 dst_release(*dst);
1039 *dst = NULL;
8a966fc0 1040
0d240e78
DA
1041 if (err == -ENETUNREACH)
1042 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
1043 return err;
1044}
34a0b3cd 1045
497c615a
HX
1046/**
1047 * ip6_dst_lookup - perform route lookup on flow
1048 * @sk: socket which provides route info
1049 * @dst: pointer to dst_entry * for result
4c9483b2 1050 * @fl6: flow to lookup
497c615a
HX
1051 *
1052 * This function performs a route lookup on the given flow.
1053 *
1054 * It returns zero on success, or a standard errno code on error.
1055 */
343d60aa
RP
1056int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1057 struct flowi6 *fl6)
497c615a
HX
1058{
1059 *dst = NULL;
343d60aa 1060 return ip6_dst_lookup_tail(net, sk, dst, fl6);
497c615a 1061}
3cf3dc6c
ACM
1062EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1063
497c615a 1064/**
68d0c6d3
DM
1065 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1066 * @sk: socket which provides route info
4c9483b2 1067 * @fl6: flow to lookup
68d0c6d3 1068 * @final_dst: final destination address for ipsec lookup
68d0c6d3
DM
1069 *
1070 * This function performs a route lookup on the given flow.
1071 *
1072 * It returns a valid dst pointer on success, or a pointer encoded
1073 * error code.
1074 */
3aef934f 1075struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
0e0d44ab 1076 const struct in6_addr *final_dst)
68d0c6d3
DM
1077{
1078 struct dst_entry *dst = NULL;
1079 int err;
1080
343d60aa 1081 err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
68d0c6d3
DM
1082 if (err)
1083 return ERR_PTR(err);
1084 if (final_dst)
4e3fd7a0 1085 fl6->daddr = *final_dst;
2774c131 1086
f92ee619 1087 return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
68d0c6d3
DM
1088}
1089EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1090
1091/**
1092 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
497c615a 1093 * @sk: socket which provides the dst cache and route info
4c9483b2 1094 * @fl6: flow to lookup
68d0c6d3 1095 * @final_dst: final destination address for ipsec lookup
497c615a
HX
1096 *
1097 * This function performs a route lookup on the given flow with the
1098 * possibility of using the cached route in the socket if it is valid.
1099 * It will take the socket dst lock when operating on the dst cache.
1100 * As a result, this function can only be used in process context.
1101 *
68d0c6d3
DM
1102 * It returns a valid dst pointer on success, or a pointer encoded
1103 * error code.
497c615a 1104 */
4c9483b2 1105struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
0e0d44ab 1106 const struct in6_addr *final_dst)
497c615a 1107{
68d0c6d3 1108 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
497c615a 1109
4c9483b2 1110 dst = ip6_sk_dst_check(sk, dst, fl6);
00bc0ef5
JS
1111 if (!dst)
1112 dst = ip6_dst_lookup_flow(sk, fl6, final_dst);
68d0c6d3 1113
00bc0ef5 1114 return dst;
497c615a 1115}
68d0c6d3 1116EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
497c615a 1117
34a0b3cd 1118static inline int ip6_ufo_append_data(struct sock *sk,
0bbe84a6 1119 struct sk_buff_head *queue,
e89e9cf5
AR
1120 int getfrag(void *from, char *to, int offset, int len,
1121 int odd, struct sk_buff *skb),
1122 void *from, int length, int hh_len, int fragheaderlen,
3ba3458f
JS
1123 int exthdrlen, int transhdrlen, int mtu,
1124 unsigned int flags, const struct flowi6 *fl6)
e89e9cf5
AR
1125
1126{
1127 struct sk_buff *skb;
1128 int err;
1129
1130 /* There is support for UDP large send offload by network
1131 * device, so create one single skb packet containing complete
1132 * udp datagram
1133 */
0bbe84a6 1134 skb = skb_peek_tail(queue);
63159f29 1135 if (!skb) {
e89e9cf5
AR
1136 skb = sock_alloc_send_skb(sk,
1137 hh_len + fragheaderlen + transhdrlen + 20,
1138 (flags & MSG_DONTWAIT), &err);
63159f29 1139 if (!skb)
504744e4 1140 return err;
e89e9cf5
AR
1141
1142 /* reserve space for Hardware header */
1143 skb_reserve(skb, hh_len);
1144
1145 /* create space for UDP/IP header */
67ba4152 1146 skb_put(skb, fragheaderlen + transhdrlen);
e89e9cf5
AR
1147
1148 /* initialize network header pointer */
3ba3458f 1149 skb_set_network_header(skb, exthdrlen);
e89e9cf5
AR
1150
1151 /* initialize protocol header pointer */
b0e380b1 1152 skb->transport_header = skb->network_header + fragheaderlen;
e89e9cf5 1153
9c9c9ad5 1154 skb->protocol = htons(ETH_P_IPV6);
e89e9cf5 1155 skb->csum = 0;
e89e9cf5 1156
0bbe84a6 1157 __skb_queue_tail(queue, skb);
c547dbf5
JP
1158 } else if (skb_is_gso(skb)) {
1159 goto append;
e89e9cf5 1160 }
e89e9cf5 1161
c547dbf5
JP
1162 skb->ip_summed = CHECKSUM_PARTIAL;
1163 /* Specify the length of each IPv6 datagram fragment.
1164 * It has to be a multiple of 8.
1165 */
1166 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1167 sizeof(struct frag_hdr)) & ~7;
1168 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
fd0273d7
MKL
1169 skb_shinfo(skb)->ip6_frag_id = ipv6_select_ident(sock_net(sk),
1170 &fl6->daddr,
1171 &fl6->saddr);
c547dbf5
JP
1172
1173append:
2811ebac
HFS
1174 return skb_append_datato_frags(sk, skb, getfrag, from,
1175 (length - transhdrlen));
e89e9cf5 1176}
1da177e4 1177
0178b695
HX
1178static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1179 gfp_t gfp)
1180{
1181 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1182}
1183
1184static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1185 gfp_t gfp)
1186{
1187 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1188}
1189
75a493e6 1190static void ip6_append_data_mtu(unsigned int *mtu,
0c183379
G
1191 int *maxfraglen,
1192 unsigned int fragheaderlen,
1193 struct sk_buff *skb,
75a493e6 1194 struct rt6_info *rt,
e367c2d0 1195 unsigned int orig_mtu)
0c183379
G
1196{
1197 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
63159f29 1198 if (!skb) {
0c183379 1199 /* first fragment, reserve header_len */
e367c2d0 1200 *mtu = orig_mtu - rt->dst.header_len;
0c183379
G
1201
1202 } else {
1203 /*
1204 * this fragment is not first, the headers
1205 * space is regarded as data space.
1206 */
e367c2d0 1207 *mtu = orig_mtu;
0c183379
G
1208 }
1209 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1210 + fragheaderlen - sizeof(struct frag_hdr);
1211 }
1212}
1213
366e41d9 1214static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
26879da5 1215 struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
366e41d9
VY
1216 struct rt6_info *rt, struct flowi6 *fl6)
1217{
1218 struct ipv6_pinfo *np = inet6_sk(sk);
1219 unsigned int mtu;
26879da5 1220 struct ipv6_txoptions *opt = ipc6->opt;
366e41d9
VY
1221
1222 /*
1223 * setup for corking
1224 */
1225 if (opt) {
1226 if (WARN_ON(v6_cork->opt))
1227 return -EINVAL;
1228
1229 v6_cork->opt = kzalloc(opt->tot_len, sk->sk_allocation);
63159f29 1230 if (unlikely(!v6_cork->opt))
366e41d9
VY
1231 return -ENOBUFS;
1232
1233 v6_cork->opt->tot_len = opt->tot_len;
1234 v6_cork->opt->opt_flen = opt->opt_flen;
1235 v6_cork->opt->opt_nflen = opt->opt_nflen;
1236
1237 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1238 sk->sk_allocation);
1239 if (opt->dst0opt && !v6_cork->opt->dst0opt)
1240 return -ENOBUFS;
1241
1242 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1243 sk->sk_allocation);
1244 if (opt->dst1opt && !v6_cork->opt->dst1opt)
1245 return -ENOBUFS;
1246
1247 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1248 sk->sk_allocation);
1249 if (opt->hopopt && !v6_cork->opt->hopopt)
1250 return -ENOBUFS;
1251
1252 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1253 sk->sk_allocation);
1254 if (opt->srcrt && !v6_cork->opt->srcrt)
1255 return -ENOBUFS;
1256
1257 /* need source address above miyazawa*/
1258 }
1259 dst_hold(&rt->dst);
1260 cork->base.dst = &rt->dst;
1261 cork->fl.u.ip6 = *fl6;
26879da5
WW
1262 v6_cork->hop_limit = ipc6->hlimit;
1263 v6_cork->tclass = ipc6->tclass;
366e41d9
VY
1264 if (rt->dst.flags & DST_XFRM_TUNNEL)
1265 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1266 rt->dst.dev->mtu : dst_mtu(&rt->dst);
1267 else
1268 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1269 rt->dst.dev->mtu : dst_mtu(rt->dst.path);
1270 if (np->frag_size < mtu) {
1271 if (np->frag_size)
1272 mtu = np->frag_size;
1273 }
1274 cork->base.fragsize = mtu;
1275 if (dst_allfrag(rt->dst.path))
1276 cork->base.flags |= IPCORK_ALLFRAG;
1277 cork->base.length = 0;
1278
1279 return 0;
1280}
1281
0bbe84a6
VY
1282static int __ip6_append_data(struct sock *sk,
1283 struct flowi6 *fl6,
1284 struct sk_buff_head *queue,
1285 struct inet_cork *cork,
1286 struct inet6_cork *v6_cork,
1287 struct page_frag *pfrag,
1288 int getfrag(void *from, char *to, int offset,
1289 int len, int odd, struct sk_buff *skb),
1290 void *from, int length, int transhdrlen,
26879da5 1291 unsigned int flags, struct ipcm6_cookie *ipc6,
c14ac945 1292 const struct sockcm_cookie *sockc)
1da177e4 1293{
0c183379 1294 struct sk_buff *skb, *skb_prev = NULL;
e367c2d0 1295 unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu;
0bbe84a6
VY
1296 int exthdrlen = 0;
1297 int dst_exthdrlen = 0;
1da177e4 1298 int hh_len;
1da177e4
LT
1299 int copy;
1300 int err;
1301 int offset = 0;
a693e698 1302 __u8 tx_flags = 0;
09c2d251 1303 u32 tskey = 0;
0bbe84a6
VY
1304 struct rt6_info *rt = (struct rt6_info *)cork->dst;
1305 struct ipv6_txoptions *opt = v6_cork->opt;
32dce968 1306 int csummode = CHECKSUM_NONE;
682b1a9d 1307 unsigned int maxnonfragsize, headersize;
1da177e4 1308
0bbe84a6
VY
1309 skb = skb_peek_tail(queue);
1310 if (!skb) {
1311 exthdrlen = opt ? opt->opt_flen : 0;
7efdba5b 1312 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1da177e4 1313 }
0bbe84a6 1314
366e41d9 1315 mtu = cork->fragsize;
e367c2d0 1316 orig_mtu = mtu;
1da177e4 1317
d8d1f30b 1318 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1da177e4 1319
a1b05140 1320 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
b4ce9277 1321 (opt ? opt->opt_nflen : 0);
4df98e76
HFS
1322 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1323 sizeof(struct frag_hdr);
1da177e4 1324
682b1a9d
HFS
1325 headersize = sizeof(struct ipv6hdr) +
1326 (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1327 (dst_allfrag(&rt->dst) ?
1328 sizeof(struct frag_hdr) : 0) +
1329 rt->rt6i_nfheader_len;
1330
26879da5 1331 if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
682b1a9d
HFS
1332 (sk->sk_protocol == IPPROTO_UDP ||
1333 sk->sk_protocol == IPPROTO_RAW)) {
1334 ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1335 sizeof(struct ipv6hdr));
1336 goto emsgsize;
1337 }
4df98e76 1338
682b1a9d
HFS
1339 if (ip6_sk_ignore_df(sk))
1340 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1341 else
1342 maxnonfragsize = mtu;
4df98e76 1343
682b1a9d 1344 if (cork->length + length > maxnonfragsize - headersize) {
4df98e76 1345emsgsize:
682b1a9d
HFS
1346 ipv6_local_error(sk, EMSGSIZE, fl6,
1347 mtu - headersize +
1348 sizeof(struct ipv6hdr));
1349 return -EMSGSIZE;
1da177e4
LT
1350 }
1351
682b1a9d
HFS
1352 /* CHECKSUM_PARTIAL only with no extension headers and when
1353 * we are not going to fragment
1354 */
1355 if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1356 headersize == sizeof(struct ipv6hdr) &&
2b89ed65 1357 length <= mtu - headersize &&
682b1a9d 1358 !(flags & MSG_MORE) &&
c8cd0989 1359 rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
682b1a9d
HFS
1360 csummode = CHECKSUM_PARTIAL;
1361
09c2d251 1362 if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) {
c14ac945 1363 sock_tx_timestamp(sk, sockc->tsflags, &tx_flags);
09c2d251
WB
1364 if (tx_flags & SKBTX_ANY_SW_TSTAMP &&
1365 sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1366 tskey = sk->sk_tskey++;
1367 }
a693e698 1368
1da177e4
LT
1369 /*
1370 * Let's try using as much space as possible.
1371 * Use MTU if total length of the message fits into the MTU.
1372 * Otherwise, we need to reserve fragment header and
1373 * fragment alignment (= 8-15 octects, in total).
1374 *
1375 * Note that we may need to "move" the data from the tail of
1ab1457c 1376 * of the buffer to the new fragment when we split
1da177e4
LT
1377 * the message.
1378 *
1ab1457c 1379 * FIXME: It may be fragmented into multiple chunks
1da177e4
LT
1380 * at once if non-fragmentable extension headers
1381 * are too large.
1ab1457c 1382 * --yoshfuji
1da177e4
LT
1383 */
1384
2811ebac 1385 cork->length += length;
e4c5e13a 1386 if ((((length + fragheaderlen) > mtu) ||
2811ebac
HFS
1387 (skb && skb_is_gso(skb))) &&
1388 (sk->sk_protocol == IPPROTO_UDP) &&
f89c56ce 1389 (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len &&
40ba3302 1390 (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk)) {
0bbe84a6 1391 err = ip6_ufo_append_data(sk, queue, getfrag, from, length,
3ba3458f 1392 hh_len, fragheaderlen, exthdrlen,
fd0273d7 1393 transhdrlen, mtu, flags, fl6);
2811ebac
HFS
1394 if (err)
1395 goto error;
1396 return 0;
e89e9cf5 1397 }
1da177e4 1398
2811ebac 1399 if (!skb)
1da177e4
LT
1400 goto alloc_new_skb;
1401
1402 while (length > 0) {
1403 /* Check if the remaining data fits into current packet. */
bdc712b4 1404 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1da177e4
LT
1405 if (copy < length)
1406 copy = maxfraglen - skb->len;
1407
1408 if (copy <= 0) {
1409 char *data;
1410 unsigned int datalen;
1411 unsigned int fraglen;
1412 unsigned int fraggap;
1413 unsigned int alloclen;
1da177e4 1414alloc_new_skb:
1da177e4 1415 /* There's no room in the current skb */
0c183379
G
1416 if (skb)
1417 fraggap = skb->len - maxfraglen;
1da177e4
LT
1418 else
1419 fraggap = 0;
0c183379 1420 /* update mtu and maxfraglen if necessary */
63159f29 1421 if (!skb || !skb_prev)
0c183379 1422 ip6_append_data_mtu(&mtu, &maxfraglen,
75a493e6 1423 fragheaderlen, skb, rt,
e367c2d0 1424 orig_mtu);
0c183379
G
1425
1426 skb_prev = skb;
1da177e4
LT
1427
1428 /*
1429 * If remaining data exceeds the mtu,
1430 * we know we need more fragment(s).
1431 */
1432 datalen = length + fraggap;
1da177e4 1433
0c183379
G
1434 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1435 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1da177e4 1436 if ((flags & MSG_MORE) &&
d8d1f30b 1437 !(rt->dst.dev->features&NETIF_F_SG))
1da177e4
LT
1438 alloclen = mtu;
1439 else
1440 alloclen = datalen + fragheaderlen;
1441
299b0767
SK
1442 alloclen += dst_exthdrlen;
1443
0c183379
G
1444 if (datalen != length + fraggap) {
1445 /*
1446 * this is not the last fragment, the trailer
1447 * space is regarded as data space.
1448 */
1449 datalen += rt->dst.trailer_len;
1450 }
1451
1452 alloclen += rt->dst.trailer_len;
1453 fraglen = datalen + fragheaderlen;
1da177e4
LT
1454
1455 /*
1456 * We just reserve space for fragment header.
1ab1457c 1457 * Note: this may be overallocation if the message
1da177e4
LT
1458 * (without MSG_MORE) fits into the MTU.
1459 */
1460 alloclen += sizeof(struct frag_hdr);
1461
1462 if (transhdrlen) {
1463 skb = sock_alloc_send_skb(sk,
1464 alloclen + hh_len,
1465 (flags & MSG_DONTWAIT), &err);
1466 } else {
1467 skb = NULL;
1468 if (atomic_read(&sk->sk_wmem_alloc) <=
1469 2 * sk->sk_sndbuf)
1470 skb = sock_wmalloc(sk,
1471 alloclen + hh_len, 1,
1472 sk->sk_allocation);
63159f29 1473 if (unlikely(!skb))
1da177e4
LT
1474 err = -ENOBUFS;
1475 }
63159f29 1476 if (!skb)
1da177e4
LT
1477 goto error;
1478 /*
1479 * Fill in the control structures
1480 */
9c9c9ad5 1481 skb->protocol = htons(ETH_P_IPV6);
32dce968 1482 skb->ip_summed = csummode;
1da177e4 1483 skb->csum = 0;
1f85851e
G
1484 /* reserve for fragmentation and ipsec header */
1485 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1486 dst_exthdrlen);
1da177e4 1487
11878b40
WB
1488 /* Only the initial fragment is time stamped */
1489 skb_shinfo(skb)->tx_flags = tx_flags;
1490 tx_flags = 0;
09c2d251
WB
1491 skb_shinfo(skb)->tskey = tskey;
1492 tskey = 0;
a693e698 1493
1da177e4
LT
1494 /*
1495 * Find where to start putting bytes
1496 */
1f85851e
G
1497 data = skb_put(skb, fraglen);
1498 skb_set_network_header(skb, exthdrlen);
1499 data += fragheaderlen;
b0e380b1
ACM
1500 skb->transport_header = (skb->network_header +
1501 fragheaderlen);
1da177e4
LT
1502 if (fraggap) {
1503 skb->csum = skb_copy_and_csum_bits(
1504 skb_prev, maxfraglen,
1505 data + transhdrlen, fraggap, 0);
1506 skb_prev->csum = csum_sub(skb_prev->csum,
1507 skb->csum);
1508 data += fraggap;
e9fa4f7b 1509 pskb_trim_unique(skb_prev, maxfraglen);
1da177e4
LT
1510 }
1511 copy = datalen - transhdrlen - fraggap;
299b0767 1512
1da177e4
LT
1513 if (copy < 0) {
1514 err = -EINVAL;
1515 kfree_skb(skb);
1516 goto error;
1517 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1518 err = -EFAULT;
1519 kfree_skb(skb);
1520 goto error;
1521 }
1522
1523 offset += copy;
1524 length -= datalen - fraggap;
1525 transhdrlen = 0;
1526 exthdrlen = 0;
299b0767 1527 dst_exthdrlen = 0;
1da177e4
LT
1528
1529 /*
1530 * Put the packet on the pending queue
1531 */
0bbe84a6 1532 __skb_queue_tail(queue, skb);
1da177e4
LT
1533 continue;
1534 }
1535
1536 if (copy > length)
1537 copy = length;
1538
d8d1f30b 1539 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1da177e4
LT
1540 unsigned int off;
1541
1542 off = skb->len;
1543 if (getfrag(from, skb_put(skb, copy),
1544 offset, copy, off, skb) < 0) {
1545 __skb_trim(skb, off);
1546 err = -EFAULT;
1547 goto error;
1548 }
1549 } else {
1550 int i = skb_shinfo(skb)->nr_frags;
1da177e4 1551
5640f768
ED
1552 err = -ENOMEM;
1553 if (!sk_page_frag_refill(sk, pfrag))
1da177e4 1554 goto error;
5640f768
ED
1555
1556 if (!skb_can_coalesce(skb, i, pfrag->page,
1557 pfrag->offset)) {
1558 err = -EMSGSIZE;
1559 if (i == MAX_SKB_FRAGS)
1560 goto error;
1561
1562 __skb_fill_page_desc(skb, i, pfrag->page,
1563 pfrag->offset, 0);
1564 skb_shinfo(skb)->nr_frags = ++i;
1565 get_page(pfrag->page);
1da177e4 1566 }
5640f768 1567 copy = min_t(int, copy, pfrag->size - pfrag->offset);
9e903e08 1568 if (getfrag(from,
5640f768
ED
1569 page_address(pfrag->page) + pfrag->offset,
1570 offset, copy, skb->len, skb) < 0)
1571 goto error_efault;
1572
1573 pfrag->offset += copy;
1574 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1da177e4
LT
1575 skb->len += copy;
1576 skb->data_len += copy;
f945fa7a
HX
1577 skb->truesize += copy;
1578 atomic_add(copy, &sk->sk_wmem_alloc);
1da177e4
LT
1579 }
1580 offset += copy;
1581 length -= copy;
1582 }
5640f768 1583
1da177e4 1584 return 0;
5640f768
ED
1585
1586error_efault:
1587 err = -EFAULT;
1da177e4 1588error:
bdc712b4 1589 cork->length -= length;
3bd653c8 1590 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1591 return err;
1592}
0bbe84a6
VY
1593
1594int ip6_append_data(struct sock *sk,
1595 int getfrag(void *from, char *to, int offset, int len,
1596 int odd, struct sk_buff *skb),
26879da5
WW
1597 void *from, int length, int transhdrlen,
1598 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1599 struct rt6_info *rt, unsigned int flags,
c14ac945 1600 const struct sockcm_cookie *sockc)
0bbe84a6
VY
1601{
1602 struct inet_sock *inet = inet_sk(sk);
1603 struct ipv6_pinfo *np = inet6_sk(sk);
1604 int exthdrlen;
1605 int err;
1606
1607 if (flags&MSG_PROBE)
1608 return 0;
1609 if (skb_queue_empty(&sk->sk_write_queue)) {
1610 /*
1611 * setup for corking
1612 */
26879da5
WW
1613 err = ip6_setup_cork(sk, &inet->cork, &np->cork,
1614 ipc6, rt, fl6);
0bbe84a6
VY
1615 if (err)
1616 return err;
1617
26879da5 1618 exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
0bbe84a6
VY
1619 length += exthdrlen;
1620 transhdrlen += exthdrlen;
1621 } else {
1622 fl6 = &inet->cork.fl.u.ip6;
1623 transhdrlen = 0;
1624 }
1625
1626 return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1627 &np->cork, sk_page_frag(sk), getfrag,
26879da5 1628 from, length, transhdrlen, flags, ipc6, sockc);
0bbe84a6 1629}
a495f836 1630EXPORT_SYMBOL_GPL(ip6_append_data);
1da177e4 1631
366e41d9
VY
1632static void ip6_cork_release(struct inet_cork_full *cork,
1633 struct inet6_cork *v6_cork)
bf138862 1634{
366e41d9
VY
1635 if (v6_cork->opt) {
1636 kfree(v6_cork->opt->dst0opt);
1637 kfree(v6_cork->opt->dst1opt);
1638 kfree(v6_cork->opt->hopopt);
1639 kfree(v6_cork->opt->srcrt);
1640 kfree(v6_cork->opt);
1641 v6_cork->opt = NULL;
0178b695
HX
1642 }
1643
366e41d9
VY
1644 if (cork->base.dst) {
1645 dst_release(cork->base.dst);
1646 cork->base.dst = NULL;
1647 cork->base.flags &= ~IPCORK_ALLFRAG;
bf138862 1648 }
366e41d9 1649 memset(&cork->fl, 0, sizeof(cork->fl));
bf138862
PE
1650}
1651
6422398c
VY
1652struct sk_buff *__ip6_make_skb(struct sock *sk,
1653 struct sk_buff_head *queue,
1654 struct inet_cork_full *cork,
1655 struct inet6_cork *v6_cork)
1da177e4
LT
1656{
1657 struct sk_buff *skb, *tmp_skb;
1658 struct sk_buff **tail_skb;
1659 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1da177e4 1660 struct ipv6_pinfo *np = inet6_sk(sk);
3bd653c8 1661 struct net *net = sock_net(sk);
1da177e4 1662 struct ipv6hdr *hdr;
6422398c
VY
1663 struct ipv6_txoptions *opt = v6_cork->opt;
1664 struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1665 struct flowi6 *fl6 = &cork->fl.u.ip6;
4c9483b2 1666 unsigned char proto = fl6->flowi6_proto;
1da177e4 1667
6422398c 1668 skb = __skb_dequeue(queue);
63159f29 1669 if (!skb)
1da177e4
LT
1670 goto out;
1671 tail_skb = &(skb_shinfo(skb)->frag_list);
1672
1673 /* move skb->data to ip header from ext header */
d56f90a7 1674 if (skb->data < skb_network_header(skb))
bbe735e4 1675 __skb_pull(skb, skb_network_offset(skb));
6422398c 1676 while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
cfe1fc77 1677 __skb_pull(tmp_skb, skb_network_header_len(skb));
1da177e4
LT
1678 *tail_skb = tmp_skb;
1679 tail_skb = &(tmp_skb->next);
1680 skb->len += tmp_skb->len;
1681 skb->data_len += tmp_skb->len;
1da177e4 1682 skb->truesize += tmp_skb->truesize;
1da177e4
LT
1683 tmp_skb->destructor = NULL;
1684 tmp_skb->sk = NULL;
1da177e4
LT
1685 }
1686
28a89453 1687 /* Allow local fragmentation. */
60ff7467 1688 skb->ignore_df = ip6_sk_ignore_df(sk);
28a89453 1689
4e3fd7a0 1690 *final_dst = fl6->daddr;
cfe1fc77 1691 __skb_pull(skb, skb_network_header_len(skb));
1da177e4
LT
1692 if (opt && opt->opt_flen)
1693 ipv6_push_frag_opts(skb, opt, &proto);
1694 if (opt && opt->opt_nflen)
613fa3ca 1695 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
1da177e4 1696
e2d1bca7
ACM
1697 skb_push(skb, sizeof(struct ipv6hdr));
1698 skb_reset_network_header(skb);
0660e03f 1699 hdr = ipv6_hdr(skb);
1ab1457c 1700
6422398c 1701 ip6_flow_hdr(hdr, v6_cork->tclass,
cb1ce2ef 1702 ip6_make_flowlabel(net, skb, fl6->flowlabel,
67800f9b 1703 np->autoflowlabel, fl6));
6422398c 1704 hdr->hop_limit = v6_cork->hop_limit;
1da177e4 1705 hdr->nexthdr = proto;
4e3fd7a0
AD
1706 hdr->saddr = fl6->saddr;
1707 hdr->daddr = *final_dst;
1da177e4 1708
a2c2064f 1709 skb->priority = sk->sk_priority;
4a19ec58 1710 skb->mark = sk->sk_mark;
a2c2064f 1711
d8d1f30b 1712 skb_dst_set(skb, dst_clone(&rt->dst));
edf391ff 1713 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
14878f75 1714 if (proto == IPPROTO_ICMPV6) {
adf30907 1715 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
14878f75 1716
43a43b60
HFS
1717 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1718 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
14878f75
DS
1719 }
1720
6422398c
VY
1721 ip6_cork_release(cork, v6_cork);
1722out:
1723 return skb;
1724}
1725
1726int ip6_send_skb(struct sk_buff *skb)
1727{
1728 struct net *net = sock_net(skb->sk);
1729 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1730 int err;
1731
33224b16 1732 err = ip6_local_out(net, skb->sk, skb);
1da177e4
LT
1733 if (err) {
1734 if (err > 0)
6ce9e7b5 1735 err = net_xmit_errno(err);
1da177e4 1736 if (err)
6422398c
VY
1737 IP6_INC_STATS(net, rt->rt6i_idev,
1738 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1739 }
1740
1da177e4 1741 return err;
6422398c
VY
1742}
1743
1744int ip6_push_pending_frames(struct sock *sk)
1745{
1746 struct sk_buff *skb;
1747
1748 skb = ip6_finish_skb(sk);
1749 if (!skb)
1750 return 0;
1751
1752 return ip6_send_skb(skb);
1da177e4 1753}
a495f836 1754EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1da177e4 1755
0bbe84a6 1756static void __ip6_flush_pending_frames(struct sock *sk,
6422398c
VY
1757 struct sk_buff_head *queue,
1758 struct inet_cork_full *cork,
1759 struct inet6_cork *v6_cork)
1da177e4 1760{
1da177e4
LT
1761 struct sk_buff *skb;
1762
0bbe84a6 1763 while ((skb = __skb_dequeue_tail(queue)) != NULL) {
adf30907
ED
1764 if (skb_dst(skb))
1765 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
e1f52208 1766 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1767 kfree_skb(skb);
1768 }
1769
6422398c 1770 ip6_cork_release(cork, v6_cork);
1da177e4 1771}
0bbe84a6
VY
1772
1773void ip6_flush_pending_frames(struct sock *sk)
1774{
6422398c
VY
1775 __ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1776 &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
0bbe84a6 1777}
a495f836 1778EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
6422398c
VY
1779
1780struct sk_buff *ip6_make_skb(struct sock *sk,
1781 int getfrag(void *from, char *to, int offset,
1782 int len, int odd, struct sk_buff *skb),
1783 void *from, int length, int transhdrlen,
26879da5 1784 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
6422398c 1785 struct rt6_info *rt, unsigned int flags,
26879da5 1786 const struct sockcm_cookie *sockc)
6422398c
VY
1787{
1788 struct inet_cork_full cork;
1789 struct inet6_cork v6_cork;
1790 struct sk_buff_head queue;
26879da5 1791 int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
6422398c
VY
1792 int err;
1793
1794 if (flags & MSG_PROBE)
1795 return NULL;
1796
1797 __skb_queue_head_init(&queue);
1798
1799 cork.base.flags = 0;
1800 cork.base.addr = 0;
1801 cork.base.opt = NULL;
1802 v6_cork.opt = NULL;
26879da5 1803 err = ip6_setup_cork(sk, &cork, &v6_cork, ipc6, rt, fl6);
6422398c
VY
1804 if (err)
1805 return ERR_PTR(err);
1806
26879da5
WW
1807 if (ipc6->dontfrag < 0)
1808 ipc6->dontfrag = inet6_sk(sk)->dontfrag;
6422398c
VY
1809
1810 err = __ip6_append_data(sk, fl6, &queue, &cork.base, &v6_cork,
1811 &current->task_frag, getfrag, from,
1812 length + exthdrlen, transhdrlen + exthdrlen,
26879da5 1813 flags, ipc6, sockc);
6422398c
VY
1814 if (err) {
1815 __ip6_flush_pending_frames(sk, &queue, &cork, &v6_cork);
1816 return ERR_PTR(err);
1817 }
1818
1819 return __ip6_make_skb(sk, &queue, &cork, &v6_cork);
1820}