]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - net/ipv6/ip6_output.c
net: l3mdev: remove redundant calls
[mirror_ubuntu-artful-kernel.git] / net / ipv6 / ip6_output.c
CommitLineData
1da177e4
LT
1/*
2 * IPv6 output functions
1ab1457c 3 * Linux INET6 implementation
1da177e4
LT
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * Based on linux/net/ipv4/ip_output.c
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 *
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
21 *
22 * H. von Brand : Added missing #include <linux/string.h>
67ba4152 23 * Imran Patel : frag id should be in NBO
1da177e4
LT
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
27 */
28
1da177e4 29#include <linux/errno.h>
ef76bc23 30#include <linux/kernel.h>
1da177e4
LT
31#include <linux/string.h>
32#include <linux/socket.h>
33#include <linux/net.h>
34#include <linux/netdevice.h>
35#include <linux/if_arp.h>
36#include <linux/in6.h>
37#include <linux/tcp.h>
38#include <linux/route.h>
b59f45d0 39#include <linux/module.h>
5a0e3ad6 40#include <linux/slab.h>
1da177e4
LT
41
42#include <linux/netfilter.h>
43#include <linux/netfilter_ipv6.h>
44
45#include <net/sock.h>
46#include <net/snmp.h>
47
48#include <net/ipv6.h>
49#include <net/ndisc.h>
50#include <net/protocol.h>
51#include <net/ip6_route.h>
52#include <net/addrconf.h>
53#include <net/rawv6.h>
54#include <net/icmp.h>
55#include <net/xfrm.h>
56#include <net/checksum.h>
7bc570c8 57#include <linux/mroute6.h>
ca254490 58#include <net/l3mdev.h>
14972cbd 59#include <net/lwtunnel.h>
1da177e4 60
7d8c6e39 61static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 62{
adf30907 63 struct dst_entry *dst = skb_dst(skb);
1da177e4 64 struct net_device *dev = dst->dev;
f6b72b62 65 struct neighbour *neigh;
6fd6ce20
YH
66 struct in6_addr *nexthop;
67 int ret;
1da177e4
LT
68
69 skb->protocol = htons(ETH_P_IPV6);
70 skb->dev = dev;
71
0660e03f 72 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
adf30907 73 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1da177e4 74
7026b1dd 75 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
78126c41 76 ((mroute6_socket(net, skb) &&
bd91b8bf 77 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
7bc570c8
YH
78 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
79 &ipv6_hdr(skb)->saddr))) {
1da177e4
LT
80 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
81
82 /* Do not check for IFF_ALLMULTI; multicast routing
83 is not supported in any case.
84 */
85 if (newskb)
b2e0b385 86 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
29a26a56 87 net, sk, newskb, NULL, newskb->dev,
95603e22 88 dev_loopback_xmit);
1da177e4 89
0660e03f 90 if (ipv6_hdr(skb)->hop_limit == 0) {
78126c41 91 IP6_INC_STATS(net, idev,
3bd653c8 92 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
93 kfree_skb(skb);
94 return 0;
95 }
96 }
97
78126c41 98 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
dd408515
HFS
99
100 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
101 IPV6_ADDR_SCOPE_NODELOCAL &&
102 !(dev->flags & IFF_LOOPBACK)) {
103 kfree_skb(skb);
104 return 0;
105 }
1da177e4
LT
106 }
107
14972cbd
RP
108 if (lwtunnel_xmit_redirect(dst->lwtstate)) {
109 int res = lwtunnel_xmit(skb);
110
111 if (res < 0 || res == LWTUNNEL_XMIT_DONE)
112 return res;
113 }
114
6fd6ce20 115 rcu_read_lock_bh();
2647a9b0 116 nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
6fd6ce20
YH
117 neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
118 if (unlikely(!neigh))
119 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
120 if (!IS_ERR(neigh)) {
121 ret = dst_neigh_output(dst, neigh, skb);
122 rcu_read_unlock_bh();
123 return ret;
124 }
125 rcu_read_unlock_bh();
05e3aa09 126
78126c41 127 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
9e508490
JE
128 kfree_skb(skb);
129 return -EINVAL;
1da177e4
LT
130}
131
0c4b51f0 132static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
9e508490
JE
133{
134 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
9037c357
JP
135 dst_allfrag(skb_dst(skb)) ||
136 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
7d8c6e39 137 return ip6_fragment(net, sk, skb, ip6_finish_output2);
9e508490 138 else
7d8c6e39 139 return ip6_finish_output2(net, sk, skb);
9e508490
JE
140}
141
ede2059d 142int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 143{
9e508490 144 struct net_device *dev = skb_dst(skb)->dev;
adf30907 145 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
be10de0a 146
778d80be 147 if (unlikely(idev->cnf.disable_ipv6)) {
19a0644c 148 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
778d80be
YH
149 kfree_skb(skb);
150 return 0;
151 }
152
29a26a56
EB
153 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
154 net, sk, skb, NULL, dev,
9c6eb28a
JE
155 ip6_finish_output,
156 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
1da177e4
LT
157}
158
1da177e4 159/*
1c1e9d2b
ED
160 * xmit an sk_buff (used by TCP, SCTP and DCCP)
161 * Note : socket lock is not held for SYNACK packets, but might be modified
162 * by calls to skb_set_owner_w() and ipv6_local_error(),
163 * which are using proper atomic operations or spinlocks.
1da177e4 164 */
1c1e9d2b 165int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
b903d324 166 struct ipv6_txoptions *opt, int tclass)
1da177e4 167{
3bd653c8 168 struct net *net = sock_net(sk);
1c1e9d2b 169 const struct ipv6_pinfo *np = inet6_sk(sk);
4c9483b2 170 struct in6_addr *first_hop = &fl6->daddr;
adf30907 171 struct dst_entry *dst = skb_dst(skb);
1da177e4 172 struct ipv6hdr *hdr;
4c9483b2 173 u8 proto = fl6->flowi6_proto;
1da177e4 174 int seg_len = skb->len;
e651f03a 175 int hlimit = -1;
1da177e4
LT
176 u32 mtu;
177
178 if (opt) {
c2636b4d 179 unsigned int head_room;
1da177e4
LT
180
181 /* First: exthdrs may take lots of space (~8K for now)
182 MAX_HEADER is not enough.
183 */
184 head_room = opt->opt_nflen + opt->opt_flen;
185 seg_len += head_room;
186 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
187
188 if (skb_headroom(skb) < head_room) {
189 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
63159f29 190 if (!skb2) {
adf30907 191 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d
YH
192 IPSTATS_MIB_OUTDISCARDS);
193 kfree_skb(skb);
1da177e4
LT
194 return -ENOBUFS;
195 }
808db80a 196 consume_skb(skb);
a11d206d 197 skb = skb2;
1c1e9d2b
ED
198 /* skb_set_owner_w() changes sk->sk_wmem_alloc atomically,
199 * it is safe to call in our context (socket lock not held)
200 */
201 skb_set_owner_w(skb, (struct sock *)sk);
1da177e4
LT
202 }
203 if (opt->opt_flen)
204 ipv6_push_frag_opts(skb, opt, &proto);
205 if (opt->opt_nflen)
206 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
207 }
208
e2d1bca7
ACM
209 skb_push(skb, sizeof(struct ipv6hdr));
210 skb_reset_network_header(skb);
0660e03f 211 hdr = ipv6_hdr(skb);
1da177e4
LT
212
213 /*
214 * Fill in the IPv6 header
215 */
b903d324 216 if (np)
1da177e4
LT
217 hlimit = np->hop_limit;
218 if (hlimit < 0)
6b75d090 219 hlimit = ip6_dst_hoplimit(dst);
1da177e4 220
cb1ce2ef 221 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
67800f9b 222 np->autoflowlabel, fl6));
41a1f8ea 223
1da177e4
LT
224 hdr->payload_len = htons(seg_len);
225 hdr->nexthdr = proto;
226 hdr->hop_limit = hlimit;
227
4e3fd7a0
AD
228 hdr->saddr = fl6->saddr;
229 hdr->daddr = *first_hop;
1da177e4 230
9c9c9ad5 231 skb->protocol = htons(ETH_P_IPV6);
a2c2064f 232 skb->priority = sk->sk_priority;
4a19ec58 233 skb->mark = sk->sk_mark;
a2c2064f 234
1da177e4 235 mtu = dst_mtu(dst);
60ff7467 236 if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
adf30907 237 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
edf391ff 238 IPSTATS_MIB_OUT, skb->len);
a8e3e1a9
DA
239
240 /* if egress device is enslaved to an L3 master device pass the
241 * skb to its handler for processing
242 */
243 skb = l3mdev_ip6_out((struct sock *)sk, skb);
244 if (unlikely(!skb))
245 return 0;
246
1c1e9d2b
ED
247 /* hooks should never assume socket lock is held.
248 * we promote our socket to non const
249 */
29a26a56 250 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
1c1e9d2b 251 net, (struct sock *)sk, skb, NULL, dst->dev,
13206b6b 252 dst_output);
1da177e4
LT
253 }
254
1da177e4 255 skb->dev = dst->dev;
1c1e9d2b
ED
256 /* ipv6_local_error() does not require socket lock,
257 * we promote our socket to non const
258 */
259 ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
260
adf30907 261 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
262 kfree_skb(skb);
263 return -EMSGSIZE;
264}
7159039a
YH
265EXPORT_SYMBOL(ip6_xmit);
266
1da177e4
LT
267static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
268{
269 struct ip6_ra_chain *ra;
270 struct sock *last = NULL;
271
272 read_lock(&ip6_ra_lock);
273 for (ra = ip6_ra_chain; ra; ra = ra->next) {
274 struct sock *sk = ra->sk;
0bd1b59b
AM
275 if (sk && ra->sel == sel &&
276 (!sk->sk_bound_dev_if ||
277 sk->sk_bound_dev_if == skb->dev->ifindex)) {
1da177e4
LT
278 if (last) {
279 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
280 if (skb2)
281 rawv6_rcv(last, skb2);
282 }
283 last = sk;
284 }
285 }
286
287 if (last) {
288 rawv6_rcv(last, skb);
289 read_unlock(&ip6_ra_lock);
290 return 1;
291 }
292 read_unlock(&ip6_ra_lock);
293 return 0;
294}
295
e21e0b5f
VN
296static int ip6_forward_proxy_check(struct sk_buff *skb)
297{
0660e03f 298 struct ipv6hdr *hdr = ipv6_hdr(skb);
e21e0b5f 299 u8 nexthdr = hdr->nexthdr;
75f2811c 300 __be16 frag_off;
e21e0b5f
VN
301 int offset;
302
303 if (ipv6_ext_hdr(nexthdr)) {
75f2811c 304 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
e21e0b5f
VN
305 if (offset < 0)
306 return 0;
307 } else
308 offset = sizeof(struct ipv6hdr);
309
310 if (nexthdr == IPPROTO_ICMPV6) {
311 struct icmp6hdr *icmp6;
312
d56f90a7
ACM
313 if (!pskb_may_pull(skb, (skb_network_header(skb) +
314 offset + 1 - skb->data)))
e21e0b5f
VN
315 return 0;
316
d56f90a7 317 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
e21e0b5f
VN
318
319 switch (icmp6->icmp6_type) {
320 case NDISC_ROUTER_SOLICITATION:
321 case NDISC_ROUTER_ADVERTISEMENT:
322 case NDISC_NEIGHBOUR_SOLICITATION:
323 case NDISC_NEIGHBOUR_ADVERTISEMENT:
324 case NDISC_REDIRECT:
325 /* For reaction involving unicast neighbor discovery
326 * message destined to the proxied address, pass it to
327 * input function.
328 */
329 return 1;
330 default:
331 break;
332 }
333 }
334
74553b09
VN
335 /*
336 * The proxying router can't forward traffic sent to a link-local
337 * address, so signal the sender and discard the packet. This
338 * behavior is clarified by the MIPv6 specification.
339 */
340 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
341 dst_link_failure(skb);
342 return -1;
343 }
344
e21e0b5f
VN
345 return 0;
346}
347
0c4b51f0
EB
348static inline int ip6_forward_finish(struct net *net, struct sock *sk,
349 struct sk_buff *skb)
1da177e4 350{
13206b6b 351 return dst_output(net, sk, skb);
1da177e4
LT
352}
353
0954cf9c
HFS
354static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
355{
356 unsigned int mtu;
357 struct inet6_dev *idev;
358
359 if (dst_metric_locked(dst, RTAX_MTU)) {
360 mtu = dst_metric_raw(dst, RTAX_MTU);
361 if (mtu)
362 return mtu;
363 }
364
365 mtu = IPV6_MIN_MTU;
366 rcu_read_lock();
367 idev = __in6_dev_get(dst->dev);
368 if (idev)
369 mtu = idev->cnf.mtu6;
370 rcu_read_unlock();
371
372 return mtu;
373}
374
fe6cc55f
FW
375static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
376{
418a3156 377 if (skb->len <= mtu)
fe6cc55f
FW
378 return false;
379
60ff7467 380 /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
fe6cc55f
FW
381 if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
382 return true;
383
60ff7467 384 if (skb->ignore_df)
418a3156
FW
385 return false;
386
ae7ef81e 387 if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
fe6cc55f
FW
388 return false;
389
390 return true;
391}
392
1da177e4
LT
393int ip6_forward(struct sk_buff *skb)
394{
adf30907 395 struct dst_entry *dst = skb_dst(skb);
0660e03f 396 struct ipv6hdr *hdr = ipv6_hdr(skb);
1da177e4 397 struct inet6_skb_parm *opt = IP6CB(skb);
c346dca1 398 struct net *net = dev_net(dst->dev);
14f3ad6f 399 u32 mtu;
1ab1457c 400
53b7997f 401 if (net->ipv6.devconf_all->forwarding == 0)
1da177e4
LT
402 goto error;
403
090f1166
LR
404 if (skb->pkt_type != PACKET_HOST)
405 goto drop;
406
9ef2e965
HFS
407 if (unlikely(skb->sk))
408 goto drop;
409
4497b076
BH
410 if (skb_warn_if_lro(skb))
411 goto drop;
412
1da177e4 413 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
1d015503
ED
414 __IP6_INC_STATS(net, ip6_dst_idev(dst),
415 IPSTATS_MIB_INDISCARDS);
1da177e4
LT
416 goto drop;
417 }
418
35fc92a9 419 skb_forward_csum(skb);
1da177e4
LT
420
421 /*
422 * We DO NOT make any processing on
423 * RA packets, pushing them to user level AS IS
424 * without ane WARRANTY that application will be able
425 * to interpret them. The reason is that we
426 * cannot make anything clever here.
427 *
428 * We are not end-node, so that if packet contains
429 * AH/ESP, we cannot make anything.
430 * Defragmentation also would be mistake, RA packets
431 * cannot be fragmented, because there is no warranty
432 * that different fragments will go along one path. --ANK
433 */
ab4eb353
YH
434 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
435 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
1da177e4
LT
436 return 0;
437 }
438
439 /*
440 * check and decrement ttl
441 */
442 if (hdr->hop_limit <= 1) {
443 /* Force OUTPUT device used as source address */
444 skb->dev = dst->dev;
3ffe533c 445 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
1d015503
ED
446 __IP6_INC_STATS(net, ip6_dst_idev(dst),
447 IPSTATS_MIB_INHDRERRORS);
1da177e4
LT
448
449 kfree_skb(skb);
450 return -ETIMEDOUT;
451 }
452
fbea49e1 453 /* XXX: idev->cnf.proxy_ndp? */
53b7997f 454 if (net->ipv6.devconf_all->proxy_ndp &&
8a3edd80 455 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
74553b09
VN
456 int proxied = ip6_forward_proxy_check(skb);
457 if (proxied > 0)
e21e0b5f 458 return ip6_input(skb);
74553b09 459 else if (proxied < 0) {
1d015503
ED
460 __IP6_INC_STATS(net, ip6_dst_idev(dst),
461 IPSTATS_MIB_INDISCARDS);
74553b09
VN
462 goto drop;
463 }
e21e0b5f
VN
464 }
465
1da177e4 466 if (!xfrm6_route_forward(skb)) {
1d015503
ED
467 __IP6_INC_STATS(net, ip6_dst_idev(dst),
468 IPSTATS_MIB_INDISCARDS);
1da177e4
LT
469 goto drop;
470 }
adf30907 471 dst = skb_dst(skb);
1da177e4
LT
472
473 /* IPv6 specs say nothing about it, but it is clear that we cannot
474 send redirects to source routed frames.
1e5dc146 475 We don't send redirects to frames decapsulated from IPsec.
1da177e4 476 */
c45a3dfb 477 if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
1da177e4 478 struct in6_addr *target = NULL;
fbfe95a4 479 struct inet_peer *peer;
1da177e4 480 struct rt6_info *rt;
1da177e4
LT
481
482 /*
483 * incoming and outgoing devices are the same
484 * send a redirect.
485 */
486
487 rt = (struct rt6_info *) dst;
c45a3dfb
DM
488 if (rt->rt6i_flags & RTF_GATEWAY)
489 target = &rt->rt6i_gateway;
1da177e4
LT
490 else
491 target = &hdr->daddr;
492
fd0273d7 493 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
92d86829 494
1da177e4
LT
495 /* Limit redirects both by destination (here)
496 and by source (inside ndisc_send_redirect)
497 */
fbfe95a4 498 if (inet_peer_xrlim_allow(peer, 1*HZ))
4991969a 499 ndisc_send_redirect(skb, target);
1d861aa4
DM
500 if (peer)
501 inet_putpeer(peer);
5bb1ab09
DS
502 } else {
503 int addrtype = ipv6_addr_type(&hdr->saddr);
504
1da177e4 505 /* This check is security critical. */
f81b2e7d
YH
506 if (addrtype == IPV6_ADDR_ANY ||
507 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
5bb1ab09
DS
508 goto error;
509 if (addrtype & IPV6_ADDR_LINKLOCAL) {
510 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
3ffe533c 511 ICMPV6_NOT_NEIGHBOUR, 0);
5bb1ab09
DS
512 goto error;
513 }
1da177e4
LT
514 }
515
0954cf9c 516 mtu = ip6_dst_mtu_forward(dst);
14f3ad6f
UW
517 if (mtu < IPV6_MIN_MTU)
518 mtu = IPV6_MIN_MTU;
519
fe6cc55f 520 if (ip6_pkt_too_big(skb, mtu)) {
1da177e4
LT
521 /* Again, force OUTPUT device used as source address */
522 skb->dev = dst->dev;
14f3ad6f 523 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1d015503
ED
524 __IP6_INC_STATS(net, ip6_dst_idev(dst),
525 IPSTATS_MIB_INTOOBIGERRORS);
526 __IP6_INC_STATS(net, ip6_dst_idev(dst),
527 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
528 kfree_skb(skb);
529 return -EMSGSIZE;
530 }
531
532 if (skb_cow(skb, dst->dev->hard_header_len)) {
1d015503
ED
533 __IP6_INC_STATS(net, ip6_dst_idev(dst),
534 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
535 goto drop;
536 }
537
0660e03f 538 hdr = ipv6_hdr(skb);
1da177e4
LT
539
540 /* Mangling hops number delayed to point after skb COW */
1ab1457c 541
1da177e4
LT
542 hdr->hop_limit--;
543
1d015503
ED
544 __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
545 __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
29a26a56
EB
546 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
547 net, NULL, skb, skb->dev, dst->dev,
6e23ae2a 548 ip6_forward_finish);
1da177e4
LT
549
550error:
1d015503 551 __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
1da177e4
LT
552drop:
553 kfree_skb(skb);
554 return -EINVAL;
555}
556
557static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
558{
559 to->pkt_type = from->pkt_type;
560 to->priority = from->priority;
561 to->protocol = from->protocol;
adf30907
ED
562 skb_dst_drop(to);
563 skb_dst_set(to, dst_clone(skb_dst(from)));
1da177e4 564 to->dev = from->dev;
82e91ffe 565 to->mark = from->mark;
1da177e4
LT
566
567#ifdef CONFIG_NET_SCHED
568 to->tc_index = from->tc_index;
569#endif
e7ac05f3 570 nf_copy(to, from);
984bc16c 571 skb_copy_secmark(to, from);
1da177e4
LT
572}
573
7d8c6e39
EB
574int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
575 int (*output)(struct net *, struct sock *, struct sk_buff *))
1da177e4 576{
1da177e4 577 struct sk_buff *frag;
67ba4152 578 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
f60e5990 579 struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
580 inet6_sk(skb->sk) : NULL;
1da177e4
LT
581 struct ipv6hdr *tmp_hdr;
582 struct frag_hdr *fh;
583 unsigned int mtu, hlen, left, len;
a7ae1992 584 int hroom, troom;
286c2349 585 __be32 frag_id;
67ba4152 586 int ptr, offset = 0, err = 0;
1da177e4
LT
587 u8 *prevhdr, nexthdr = 0;
588
1da177e4
LT
589 hlen = ip6_find_1stfragopt(skb, &prevhdr);
590 nexthdr = *prevhdr;
591
628a5c56 592 mtu = ip6_skb_dst_mtu(skb);
b881ef76
JH
593
594 /* We must not fragment if the socket is set to force MTU discovery
14f3ad6f 595 * or if the skb it not generated by a local socket.
b881ef76 596 */
485fca66
FW
597 if (unlikely(!skb->ignore_df && skb->len > mtu))
598 goto fail_toobig;
a34a101e 599
485fca66
FW
600 if (IP6CB(skb)->frag_max_size) {
601 if (IP6CB(skb)->frag_max_size > mtu)
602 goto fail_toobig;
603
604 /* don't send fragments larger than what we received */
605 mtu = IP6CB(skb)->frag_max_size;
606 if (mtu < IPV6_MIN_MTU)
607 mtu = IPV6_MIN_MTU;
b881ef76
JH
608 }
609
d91675f9
YH
610 if (np && np->frag_size < mtu) {
611 if (np->frag_size)
612 mtu = np->frag_size;
613 }
89bc7848 614 if (mtu < hlen + sizeof(struct frag_hdr) + 8)
b72a2b01 615 goto fail_toobig;
1e0d69a9 616 mtu -= hlen + sizeof(struct frag_hdr);
1da177e4 617
fd0273d7
MKL
618 frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
619 &ipv6_hdr(skb)->saddr);
286c2349 620
405c92f7
HFS
621 if (skb->ip_summed == CHECKSUM_PARTIAL &&
622 (err = skb_checksum_help(skb)))
623 goto fail;
624
1d325d21 625 hroom = LL_RESERVED_SPACE(rt->dst.dev);
21dc3301 626 if (skb_has_frag_list(skb)) {
1da177e4 627 int first_len = skb_pagelen(skb);
3d13008e 628 struct sk_buff *frag2;
1da177e4
LT
629
630 if (first_len - hlen > mtu ||
631 ((first_len - hlen) & 7) ||
1d325d21
FW
632 skb_cloned(skb) ||
633 skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
1da177e4
LT
634 goto slow_path;
635
4d9092bb 636 skb_walk_frags(skb, frag) {
1da177e4
LT
637 /* Correct geometry. */
638 if (frag->len > mtu ||
639 ((frag->len & 7) && frag->next) ||
1d325d21 640 skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
3d13008e 641 goto slow_path_clean;
1da177e4 642
1da177e4
LT
643 /* Partially cloned skb? */
644 if (skb_shared(frag))
3d13008e 645 goto slow_path_clean;
2fdba6b0
HX
646
647 BUG_ON(frag->sk);
648 if (skb->sk) {
2fdba6b0
HX
649 frag->sk = skb->sk;
650 frag->destructor = sock_wfree;
2fdba6b0 651 }
3d13008e 652 skb->truesize -= frag->truesize;
1da177e4
LT
653 }
654
655 err = 0;
656 offset = 0;
1da177e4
LT
657 /* BUILD HEADER */
658
9a217a1c 659 *prevhdr = NEXTHDR_FRAGMENT;
d56f90a7 660 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
1da177e4 661 if (!tmp_hdr) {
adf30907 662 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 663 IPSTATS_MIB_FRAGFAILS);
1d325d21
FW
664 err = -ENOMEM;
665 goto fail;
1da177e4 666 }
1d325d21
FW
667 frag = skb_shinfo(skb)->frag_list;
668 skb_frag_list_init(skb);
1da177e4 669
1da177e4 670 __skb_pull(skb, hlen);
67ba4152 671 fh = (struct frag_hdr *)__skb_push(skb, sizeof(struct frag_hdr));
e2d1bca7
ACM
672 __skb_push(skb, hlen);
673 skb_reset_network_header(skb);
d56f90a7 674 memcpy(skb_network_header(skb), tmp_hdr, hlen);
1da177e4 675
1da177e4
LT
676 fh->nexthdr = nexthdr;
677 fh->reserved = 0;
678 fh->frag_off = htons(IP6_MF);
286c2349 679 fh->identification = frag_id;
1da177e4
LT
680
681 first_len = skb_pagelen(skb);
682 skb->data_len = first_len - skb_headlen(skb);
683 skb->len = first_len;
0660e03f
ACM
684 ipv6_hdr(skb)->payload_len = htons(first_len -
685 sizeof(struct ipv6hdr));
a11d206d 686
d8d1f30b 687 dst_hold(&rt->dst);
1da177e4
LT
688
689 for (;;) {
690 /* Prepare header of the next frame,
691 * before previous one went down. */
692 if (frag) {
693 frag->ip_summed = CHECKSUM_NONE;
badff6d0 694 skb_reset_transport_header(frag);
67ba4152 695 fh = (struct frag_hdr *)__skb_push(frag, sizeof(struct frag_hdr));
e2d1bca7
ACM
696 __skb_push(frag, hlen);
697 skb_reset_network_header(frag);
d56f90a7
ACM
698 memcpy(skb_network_header(frag), tmp_hdr,
699 hlen);
1da177e4
LT
700 offset += skb->len - hlen - sizeof(struct frag_hdr);
701 fh->nexthdr = nexthdr;
702 fh->reserved = 0;
703 fh->frag_off = htons(offset);
53b24b8f 704 if (frag->next)
1da177e4
LT
705 fh->frag_off |= htons(IP6_MF);
706 fh->identification = frag_id;
0660e03f
ACM
707 ipv6_hdr(frag)->payload_len =
708 htons(frag->len -
709 sizeof(struct ipv6hdr));
1da177e4
LT
710 ip6_copy_metadata(frag, skb);
711 }
1ab1457c 712
7d8c6e39 713 err = output(net, sk, skb);
67ba4152 714 if (!err)
d8d1f30b 715 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 716 IPSTATS_MIB_FRAGCREATES);
dafee490 717
1da177e4
LT
718 if (err || !frag)
719 break;
720
721 skb = frag;
722 frag = skb->next;
723 skb->next = NULL;
724 }
725
a51482bd 726 kfree(tmp_hdr);
1da177e4
LT
727
728 if (err == 0) {
d8d1f30b 729 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 730 IPSTATS_MIB_FRAGOKS);
94e187c0 731 ip6_rt_put(rt);
1da177e4
LT
732 return 0;
733 }
734
46cfd725 735 kfree_skb_list(frag);
1da177e4 736
d8d1f30b 737 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 738 IPSTATS_MIB_FRAGFAILS);
94e187c0 739 ip6_rt_put(rt);
1da177e4 740 return err;
3d13008e
ED
741
742slow_path_clean:
743 skb_walk_frags(skb, frag2) {
744 if (frag2 == frag)
745 break;
746 frag2->sk = NULL;
747 frag2->destructor = NULL;
748 skb->truesize += frag2->truesize;
749 }
1da177e4
LT
750 }
751
752slow_path:
753 left = skb->len - hlen; /* Space per frame */
754 ptr = hlen; /* Where to start from */
755
756 /*
757 * Fragment the datagram.
758 */
759
760 *prevhdr = NEXTHDR_FRAGMENT;
a7ae1992 761 troom = rt->dst.dev->needed_tailroom;
1da177e4
LT
762
763 /*
764 * Keep copying data until we run out.
765 */
67ba4152 766 while (left > 0) {
1da177e4
LT
767 len = left;
768 /* IF: it doesn't fit, use 'mtu' - the data space left */
769 if (len > mtu)
770 len = mtu;
25985edc 771 /* IF: we are not sending up to and including the packet end
1da177e4
LT
772 then align the next start on an eight byte boundary */
773 if (len < left) {
774 len &= ~7;
775 }
1da177e4 776
cbffccc9
JP
777 /* Allocate buffer */
778 frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
779 hroom + troom, GFP_ATOMIC);
780 if (!frag) {
adf30907 781 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 782 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
783 err = -ENOMEM;
784 goto fail;
785 }
786
787 /*
788 * Set up data on packet
789 */
790
791 ip6_copy_metadata(frag, skb);
a7ae1992 792 skb_reserve(frag, hroom);
1da177e4 793 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
c1d2bbe1 794 skb_reset_network_header(frag);
badff6d0 795 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
b0e380b1
ACM
796 frag->transport_header = (frag->network_header + hlen +
797 sizeof(struct frag_hdr));
1da177e4
LT
798
799 /*
800 * Charge the memory for the fragment to any owner
801 * it might possess
802 */
803 if (skb->sk)
804 skb_set_owner_w(frag, skb->sk);
805
806 /*
807 * Copy the packet header into the new buffer.
808 */
d626f62b 809 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
1da177e4
LT
810
811 /*
812 * Build fragment header.
813 */
814 fh->nexthdr = nexthdr;
815 fh->reserved = 0;
286c2349 816 fh->identification = frag_id;
1da177e4
LT
817
818 /*
819 * Copy a block of the IP datagram.
820 */
e3f0b86b
HS
821 BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag),
822 len));
1da177e4
LT
823 left -= len;
824
825 fh->frag_off = htons(offset);
826 if (left > 0)
827 fh->frag_off |= htons(IP6_MF);
0660e03f
ACM
828 ipv6_hdr(frag)->payload_len = htons(frag->len -
829 sizeof(struct ipv6hdr));
1da177e4
LT
830
831 ptr += len;
832 offset += len;
833
834 /*
835 * Put this fragment into the sending queue.
836 */
7d8c6e39 837 err = output(net, sk, frag);
1da177e4
LT
838 if (err)
839 goto fail;
dafee490 840
adf30907 841 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 842 IPSTATS_MIB_FRAGCREATES);
1da177e4 843 }
adf30907 844 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 845 IPSTATS_MIB_FRAGOKS);
808db80a 846 consume_skb(skb);
1da177e4
LT
847 return err;
848
485fca66
FW
849fail_toobig:
850 if (skb->sk && dst_allfrag(skb_dst(skb)))
851 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
852
853 skb->dev = skb_dst(skb)->dev;
854 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
855 err = -EMSGSIZE;
856
1da177e4 857fail:
adf30907 858 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 859 IPSTATS_MIB_FRAGFAILS);
1ab1457c 860 kfree_skb(skb);
1da177e4
LT
861 return err;
862}
863
b71d1d42
ED
864static inline int ip6_rt_check(const struct rt6key *rt_key,
865 const struct in6_addr *fl_addr,
866 const struct in6_addr *addr_cache)
cf6b1982 867{
a02cec21 868 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
63159f29 869 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
cf6b1982
YH
870}
871
497c615a
HX
872static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
873 struct dst_entry *dst,
b71d1d42 874 const struct flowi6 *fl6)
1da177e4 875{
497c615a 876 struct ipv6_pinfo *np = inet6_sk(sk);
a963a37d 877 struct rt6_info *rt;
1da177e4 878
497c615a
HX
879 if (!dst)
880 goto out;
881
a963a37d
ED
882 if (dst->ops->family != AF_INET6) {
883 dst_release(dst);
884 return NULL;
885 }
886
887 rt = (struct rt6_info *)dst;
497c615a
HX
888 /* Yes, checking route validity in not connected
889 * case is not very simple. Take into account,
890 * that we do not support routing by source, TOS,
67ba4152 891 * and MSG_DONTROUTE --ANK (980726)
497c615a 892 *
cf6b1982
YH
893 * 1. ip6_rt_check(): If route was host route,
894 * check that cached destination is current.
497c615a
HX
895 * If it is network route, we still may
896 * check its validity using saved pointer
897 * to the last used address: daddr_cache.
898 * We do not want to save whole address now,
899 * (because main consumer of this service
900 * is tcp, which has not this problem),
901 * so that the last trick works only on connected
902 * sockets.
903 * 2. oif also should be the same.
904 */
4c9483b2 905 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
8e1ef0a9 906#ifdef CONFIG_IPV6_SUBTREES
4c9483b2 907 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
8e1ef0a9 908#endif
ca254490
DA
909 (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
910 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
497c615a
HX
911 dst_release(dst);
912 dst = NULL;
1da177e4
LT
913 }
914
497c615a
HX
915out:
916 return dst;
917}
918
3aef934f 919static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
4c9483b2 920 struct dst_entry **dst, struct flowi6 *fl6)
497c615a 921{
69cce1d1
DM
922#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
923 struct neighbour *n;
97cac082 924 struct rt6_info *rt;
69cce1d1
DM
925#endif
926 int err;
6f21c96a 927 int flags = 0;
497c615a 928
0d240e78
DA
929 if (ipv6_addr_any(&fl6->saddr) && fl6->flowi6_oif &&
930 (!*dst || !(*dst)->error)) {
931 err = l3mdev_get_saddr6(net, sk, fl6);
932 if (err)
933 goto out_err;
934 }
935
e16e888b
MS
936 /* The correct way to handle this would be to do
937 * ip6_route_get_saddr, and then ip6_route_output; however,
938 * the route-specific preferred source forces the
939 * ip6_route_output call _before_ ip6_route_get_saddr.
940 *
941 * In source specific routing (no src=any default route),
942 * ip6_route_output will fail given src=any saddr, though, so
943 * that's why we try it again later.
944 */
945 if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
946 struct rt6_info *rt;
947 bool had_dst = *dst != NULL;
1da177e4 948
e16e888b
MS
949 if (!had_dst)
950 *dst = ip6_route_output(net, sk, fl6);
951 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
c3968a85
DW
952 err = ip6_route_get_saddr(net, rt, &fl6->daddr,
953 sk ? inet6_sk(sk)->srcprefs : 0,
954 &fl6->saddr);
44456d37 955 if (err)
1da177e4 956 goto out_err_release;
e16e888b
MS
957
958 /* If we had an erroneous initial result, pretend it
959 * never existed and let the SA-enabled version take
960 * over.
961 */
962 if (!had_dst && (*dst)->error) {
963 dst_release(*dst);
964 *dst = NULL;
965 }
6f21c96a
PA
966
967 if (fl6->flowi6_oif)
968 flags |= RT6_LOOKUP_F_IFACE;
1da177e4
LT
969 }
970
e16e888b 971 if (!*dst)
6f21c96a 972 *dst = ip6_route_output_flags(net, sk, fl6, flags);
e16e888b
MS
973
974 err = (*dst)->error;
975 if (err)
976 goto out_err_release;
977
95c385b4 978#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
e550dfb0
NH
979 /*
980 * Here if the dst entry we've looked up
981 * has a neighbour entry that is in the INCOMPLETE
982 * state and the src address from the flow is
983 * marked as OPTIMISTIC, we release the found
984 * dst entry and replace it instead with the
985 * dst entry of the nexthop router
986 */
c56bf6fe 987 rt = (struct rt6_info *) *dst;
707be1ff 988 rcu_read_lock_bh();
2647a9b0
MKL
989 n = __ipv6_neigh_lookup_noref(rt->dst.dev,
990 rt6_nexthop(rt, &fl6->daddr));
707be1ff
YH
991 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
992 rcu_read_unlock_bh();
993
994 if (err) {
e550dfb0 995 struct inet6_ifaddr *ifp;
4c9483b2 996 struct flowi6 fl_gw6;
e550dfb0
NH
997 int redirect;
998
4c9483b2 999 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
e550dfb0
NH
1000 (*dst)->dev, 1);
1001
1002 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1003 if (ifp)
1004 in6_ifa_put(ifp);
1005
1006 if (redirect) {
1007 /*
1008 * We need to get the dst entry for the
1009 * default router instead
1010 */
1011 dst_release(*dst);
4c9483b2
DM
1012 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1013 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1014 *dst = ip6_route_output(net, sk, &fl_gw6);
e5d08d71
IM
1015 err = (*dst)->error;
1016 if (err)
e550dfb0 1017 goto out_err_release;
95c385b4 1018 }
e550dfb0 1019 }
95c385b4
NH
1020#endif
1021
1da177e4
LT
1022 return 0;
1023
1024out_err_release:
1025 dst_release(*dst);
1026 *dst = NULL;
0d240e78
DA
1027out_err:
1028 if (err == -ENETUNREACH)
1029 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
1030 return err;
1031}
34a0b3cd 1032
497c615a
HX
1033/**
1034 * ip6_dst_lookup - perform route lookup on flow
1035 * @sk: socket which provides route info
1036 * @dst: pointer to dst_entry * for result
4c9483b2 1037 * @fl6: flow to lookup
497c615a
HX
1038 *
1039 * This function performs a route lookup on the given flow.
1040 *
1041 * It returns zero on success, or a standard errno code on error.
1042 */
343d60aa
RP
1043int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1044 struct flowi6 *fl6)
497c615a
HX
1045{
1046 *dst = NULL;
343d60aa 1047 return ip6_dst_lookup_tail(net, sk, dst, fl6);
497c615a 1048}
3cf3dc6c
ACM
1049EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1050
497c615a 1051/**
68d0c6d3
DM
1052 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1053 * @sk: socket which provides route info
4c9483b2 1054 * @fl6: flow to lookup
68d0c6d3 1055 * @final_dst: final destination address for ipsec lookup
68d0c6d3
DM
1056 *
1057 * This function performs a route lookup on the given flow.
1058 *
1059 * It returns a valid dst pointer on success, or a pointer encoded
1060 * error code.
1061 */
3aef934f 1062struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
0e0d44ab 1063 const struct in6_addr *final_dst)
68d0c6d3
DM
1064{
1065 struct dst_entry *dst = NULL;
1066 int err;
1067
343d60aa 1068 err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
68d0c6d3
DM
1069 if (err)
1070 return ERR_PTR(err);
1071 if (final_dst)
4e3fd7a0 1072 fl6->daddr = *final_dst;
2774c131 1073
f92ee619 1074 return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
68d0c6d3
DM
1075}
1076EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1077
1078/**
1079 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
497c615a 1080 * @sk: socket which provides the dst cache and route info
4c9483b2 1081 * @fl6: flow to lookup
68d0c6d3 1082 * @final_dst: final destination address for ipsec lookup
497c615a
HX
1083 *
1084 * This function performs a route lookup on the given flow with the
1085 * possibility of using the cached route in the socket if it is valid.
1086 * It will take the socket dst lock when operating on the dst cache.
1087 * As a result, this function can only be used in process context.
1088 *
68d0c6d3
DM
1089 * It returns a valid dst pointer on success, or a pointer encoded
1090 * error code.
497c615a 1091 */
4c9483b2 1092struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
0e0d44ab 1093 const struct in6_addr *final_dst)
497c615a 1094{
68d0c6d3 1095 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
497c615a 1096
4c9483b2 1097 dst = ip6_sk_dst_check(sk, dst, fl6);
00bc0ef5
JS
1098 if (!dst)
1099 dst = ip6_dst_lookup_flow(sk, fl6, final_dst);
68d0c6d3 1100
00bc0ef5 1101 return dst;
497c615a 1102}
68d0c6d3 1103EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
497c615a 1104
34a0b3cd 1105static inline int ip6_ufo_append_data(struct sock *sk,
0bbe84a6 1106 struct sk_buff_head *queue,
e89e9cf5
AR
1107 int getfrag(void *from, char *to, int offset, int len,
1108 int odd, struct sk_buff *skb),
1109 void *from, int length, int hh_len, int fragheaderlen,
3ba3458f
JS
1110 int exthdrlen, int transhdrlen, int mtu,
1111 unsigned int flags, const struct flowi6 *fl6)
e89e9cf5
AR
1112
1113{
1114 struct sk_buff *skb;
1115 int err;
1116
1117 /* There is support for UDP large send offload by network
1118 * device, so create one single skb packet containing complete
1119 * udp datagram
1120 */
0bbe84a6 1121 skb = skb_peek_tail(queue);
63159f29 1122 if (!skb) {
e89e9cf5
AR
1123 skb = sock_alloc_send_skb(sk,
1124 hh_len + fragheaderlen + transhdrlen + 20,
1125 (flags & MSG_DONTWAIT), &err);
63159f29 1126 if (!skb)
504744e4 1127 return err;
e89e9cf5
AR
1128
1129 /* reserve space for Hardware header */
1130 skb_reserve(skb, hh_len);
1131
1132 /* create space for UDP/IP header */
67ba4152 1133 skb_put(skb, fragheaderlen + transhdrlen);
e89e9cf5
AR
1134
1135 /* initialize network header pointer */
3ba3458f 1136 skb_set_network_header(skb, exthdrlen);
e89e9cf5
AR
1137
1138 /* initialize protocol header pointer */
b0e380b1 1139 skb->transport_header = skb->network_header + fragheaderlen;
e89e9cf5 1140
9c9c9ad5 1141 skb->protocol = htons(ETH_P_IPV6);
e89e9cf5 1142 skb->csum = 0;
e89e9cf5 1143
0bbe84a6 1144 __skb_queue_tail(queue, skb);
c547dbf5
JP
1145 } else if (skb_is_gso(skb)) {
1146 goto append;
e89e9cf5 1147 }
e89e9cf5 1148
c547dbf5
JP
1149 skb->ip_summed = CHECKSUM_PARTIAL;
1150 /* Specify the length of each IPv6 datagram fragment.
1151 * It has to be a multiple of 8.
1152 */
1153 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1154 sizeof(struct frag_hdr)) & ~7;
1155 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
fd0273d7
MKL
1156 skb_shinfo(skb)->ip6_frag_id = ipv6_select_ident(sock_net(sk),
1157 &fl6->daddr,
1158 &fl6->saddr);
c547dbf5
JP
1159
1160append:
2811ebac
HFS
1161 return skb_append_datato_frags(sk, skb, getfrag, from,
1162 (length - transhdrlen));
e89e9cf5 1163}
1da177e4 1164
0178b695
HX
1165static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1166 gfp_t gfp)
1167{
1168 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1169}
1170
1171static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1172 gfp_t gfp)
1173{
1174 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1175}
1176
75a493e6 1177static void ip6_append_data_mtu(unsigned int *mtu,
0c183379
G
1178 int *maxfraglen,
1179 unsigned int fragheaderlen,
1180 struct sk_buff *skb,
75a493e6 1181 struct rt6_info *rt,
e367c2d0 1182 unsigned int orig_mtu)
0c183379
G
1183{
1184 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
63159f29 1185 if (!skb) {
0c183379 1186 /* first fragment, reserve header_len */
e367c2d0 1187 *mtu = orig_mtu - rt->dst.header_len;
0c183379
G
1188
1189 } else {
1190 /*
1191 * this fragment is not first, the headers
1192 * space is regarded as data space.
1193 */
e367c2d0 1194 *mtu = orig_mtu;
0c183379
G
1195 }
1196 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1197 + fragheaderlen - sizeof(struct frag_hdr);
1198 }
1199}
1200
366e41d9 1201static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
26879da5 1202 struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
366e41d9
VY
1203 struct rt6_info *rt, struct flowi6 *fl6)
1204{
1205 struct ipv6_pinfo *np = inet6_sk(sk);
1206 unsigned int mtu;
26879da5 1207 struct ipv6_txoptions *opt = ipc6->opt;
366e41d9
VY
1208
1209 /*
1210 * setup for corking
1211 */
1212 if (opt) {
1213 if (WARN_ON(v6_cork->opt))
1214 return -EINVAL;
1215
1216 v6_cork->opt = kzalloc(opt->tot_len, sk->sk_allocation);
63159f29 1217 if (unlikely(!v6_cork->opt))
366e41d9
VY
1218 return -ENOBUFS;
1219
1220 v6_cork->opt->tot_len = opt->tot_len;
1221 v6_cork->opt->opt_flen = opt->opt_flen;
1222 v6_cork->opt->opt_nflen = opt->opt_nflen;
1223
1224 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1225 sk->sk_allocation);
1226 if (opt->dst0opt && !v6_cork->opt->dst0opt)
1227 return -ENOBUFS;
1228
1229 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1230 sk->sk_allocation);
1231 if (opt->dst1opt && !v6_cork->opt->dst1opt)
1232 return -ENOBUFS;
1233
1234 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1235 sk->sk_allocation);
1236 if (opt->hopopt && !v6_cork->opt->hopopt)
1237 return -ENOBUFS;
1238
1239 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1240 sk->sk_allocation);
1241 if (opt->srcrt && !v6_cork->opt->srcrt)
1242 return -ENOBUFS;
1243
1244 /* need source address above miyazawa*/
1245 }
1246 dst_hold(&rt->dst);
1247 cork->base.dst = &rt->dst;
1248 cork->fl.u.ip6 = *fl6;
26879da5
WW
1249 v6_cork->hop_limit = ipc6->hlimit;
1250 v6_cork->tclass = ipc6->tclass;
366e41d9
VY
1251 if (rt->dst.flags & DST_XFRM_TUNNEL)
1252 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1253 rt->dst.dev->mtu : dst_mtu(&rt->dst);
1254 else
1255 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1256 rt->dst.dev->mtu : dst_mtu(rt->dst.path);
1257 if (np->frag_size < mtu) {
1258 if (np->frag_size)
1259 mtu = np->frag_size;
1260 }
1261 cork->base.fragsize = mtu;
1262 if (dst_allfrag(rt->dst.path))
1263 cork->base.flags |= IPCORK_ALLFRAG;
1264 cork->base.length = 0;
1265
1266 return 0;
1267}
1268
0bbe84a6
VY
1269static int __ip6_append_data(struct sock *sk,
1270 struct flowi6 *fl6,
1271 struct sk_buff_head *queue,
1272 struct inet_cork *cork,
1273 struct inet6_cork *v6_cork,
1274 struct page_frag *pfrag,
1275 int getfrag(void *from, char *to, int offset,
1276 int len, int odd, struct sk_buff *skb),
1277 void *from, int length, int transhdrlen,
26879da5 1278 unsigned int flags, struct ipcm6_cookie *ipc6,
c14ac945 1279 const struct sockcm_cookie *sockc)
1da177e4 1280{
0c183379 1281 struct sk_buff *skb, *skb_prev = NULL;
e367c2d0 1282 unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu;
0bbe84a6
VY
1283 int exthdrlen = 0;
1284 int dst_exthdrlen = 0;
1da177e4 1285 int hh_len;
1da177e4
LT
1286 int copy;
1287 int err;
1288 int offset = 0;
a693e698 1289 __u8 tx_flags = 0;
09c2d251 1290 u32 tskey = 0;
0bbe84a6
VY
1291 struct rt6_info *rt = (struct rt6_info *)cork->dst;
1292 struct ipv6_txoptions *opt = v6_cork->opt;
32dce968 1293 int csummode = CHECKSUM_NONE;
682b1a9d 1294 unsigned int maxnonfragsize, headersize;
1da177e4 1295
0bbe84a6
VY
1296 skb = skb_peek_tail(queue);
1297 if (!skb) {
1298 exthdrlen = opt ? opt->opt_flen : 0;
7efdba5b 1299 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1da177e4 1300 }
0bbe84a6 1301
366e41d9 1302 mtu = cork->fragsize;
e367c2d0 1303 orig_mtu = mtu;
1da177e4 1304
d8d1f30b 1305 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1da177e4 1306
a1b05140 1307 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
b4ce9277 1308 (opt ? opt->opt_nflen : 0);
4df98e76
HFS
1309 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1310 sizeof(struct frag_hdr);
1da177e4 1311
682b1a9d
HFS
1312 headersize = sizeof(struct ipv6hdr) +
1313 (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1314 (dst_allfrag(&rt->dst) ?
1315 sizeof(struct frag_hdr) : 0) +
1316 rt->rt6i_nfheader_len;
1317
26879da5 1318 if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
682b1a9d
HFS
1319 (sk->sk_protocol == IPPROTO_UDP ||
1320 sk->sk_protocol == IPPROTO_RAW)) {
1321 ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1322 sizeof(struct ipv6hdr));
1323 goto emsgsize;
1324 }
4df98e76 1325
682b1a9d
HFS
1326 if (ip6_sk_ignore_df(sk))
1327 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1328 else
1329 maxnonfragsize = mtu;
4df98e76 1330
682b1a9d 1331 if (cork->length + length > maxnonfragsize - headersize) {
4df98e76 1332emsgsize:
682b1a9d
HFS
1333 ipv6_local_error(sk, EMSGSIZE, fl6,
1334 mtu - headersize +
1335 sizeof(struct ipv6hdr));
1336 return -EMSGSIZE;
1da177e4
LT
1337 }
1338
682b1a9d
HFS
1339 /* CHECKSUM_PARTIAL only with no extension headers and when
1340 * we are not going to fragment
1341 */
1342 if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1343 headersize == sizeof(struct ipv6hdr) &&
1344 length < mtu - headersize &&
1345 !(flags & MSG_MORE) &&
c8cd0989 1346 rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
682b1a9d
HFS
1347 csummode = CHECKSUM_PARTIAL;
1348
09c2d251 1349 if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) {
c14ac945 1350 sock_tx_timestamp(sk, sockc->tsflags, &tx_flags);
09c2d251
WB
1351 if (tx_flags & SKBTX_ANY_SW_TSTAMP &&
1352 sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1353 tskey = sk->sk_tskey++;
1354 }
a693e698 1355
1da177e4
LT
1356 /*
1357 * Let's try using as much space as possible.
1358 * Use MTU if total length of the message fits into the MTU.
1359 * Otherwise, we need to reserve fragment header and
1360 * fragment alignment (= 8-15 octects, in total).
1361 *
1362 * Note that we may need to "move" the data from the tail of
1ab1457c 1363 * of the buffer to the new fragment when we split
1da177e4
LT
1364 * the message.
1365 *
1ab1457c 1366 * FIXME: It may be fragmented into multiple chunks
1da177e4
LT
1367 * at once if non-fragmentable extension headers
1368 * are too large.
1ab1457c 1369 * --yoshfuji
1da177e4
LT
1370 */
1371
2811ebac
HFS
1372 cork->length += length;
1373 if (((length > mtu) ||
1374 (skb && skb_is_gso(skb))) &&
1375 (sk->sk_protocol == IPPROTO_UDP) &&
acf8dd0a 1376 (rt->dst.dev->features & NETIF_F_UFO) &&
40ba3302 1377 (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk)) {
0bbe84a6 1378 err = ip6_ufo_append_data(sk, queue, getfrag, from, length,
3ba3458f 1379 hh_len, fragheaderlen, exthdrlen,
fd0273d7 1380 transhdrlen, mtu, flags, fl6);
2811ebac
HFS
1381 if (err)
1382 goto error;
1383 return 0;
e89e9cf5 1384 }
1da177e4 1385
2811ebac 1386 if (!skb)
1da177e4
LT
1387 goto alloc_new_skb;
1388
1389 while (length > 0) {
1390 /* Check if the remaining data fits into current packet. */
bdc712b4 1391 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1da177e4
LT
1392 if (copy < length)
1393 copy = maxfraglen - skb->len;
1394
1395 if (copy <= 0) {
1396 char *data;
1397 unsigned int datalen;
1398 unsigned int fraglen;
1399 unsigned int fraggap;
1400 unsigned int alloclen;
1da177e4 1401alloc_new_skb:
1da177e4 1402 /* There's no room in the current skb */
0c183379
G
1403 if (skb)
1404 fraggap = skb->len - maxfraglen;
1da177e4
LT
1405 else
1406 fraggap = 0;
0c183379 1407 /* update mtu and maxfraglen if necessary */
63159f29 1408 if (!skb || !skb_prev)
0c183379 1409 ip6_append_data_mtu(&mtu, &maxfraglen,
75a493e6 1410 fragheaderlen, skb, rt,
e367c2d0 1411 orig_mtu);
0c183379
G
1412
1413 skb_prev = skb;
1da177e4
LT
1414
1415 /*
1416 * If remaining data exceeds the mtu,
1417 * we know we need more fragment(s).
1418 */
1419 datalen = length + fraggap;
1da177e4 1420
0c183379
G
1421 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1422 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1da177e4 1423 if ((flags & MSG_MORE) &&
d8d1f30b 1424 !(rt->dst.dev->features&NETIF_F_SG))
1da177e4
LT
1425 alloclen = mtu;
1426 else
1427 alloclen = datalen + fragheaderlen;
1428
299b0767
SK
1429 alloclen += dst_exthdrlen;
1430
0c183379
G
1431 if (datalen != length + fraggap) {
1432 /*
1433 * this is not the last fragment, the trailer
1434 * space is regarded as data space.
1435 */
1436 datalen += rt->dst.trailer_len;
1437 }
1438
1439 alloclen += rt->dst.trailer_len;
1440 fraglen = datalen + fragheaderlen;
1da177e4
LT
1441
1442 /*
1443 * We just reserve space for fragment header.
1ab1457c 1444 * Note: this may be overallocation if the message
1da177e4
LT
1445 * (without MSG_MORE) fits into the MTU.
1446 */
1447 alloclen += sizeof(struct frag_hdr);
1448
1449 if (transhdrlen) {
1450 skb = sock_alloc_send_skb(sk,
1451 alloclen + hh_len,
1452 (flags & MSG_DONTWAIT), &err);
1453 } else {
1454 skb = NULL;
1455 if (atomic_read(&sk->sk_wmem_alloc) <=
1456 2 * sk->sk_sndbuf)
1457 skb = sock_wmalloc(sk,
1458 alloclen + hh_len, 1,
1459 sk->sk_allocation);
63159f29 1460 if (unlikely(!skb))
1da177e4
LT
1461 err = -ENOBUFS;
1462 }
63159f29 1463 if (!skb)
1da177e4
LT
1464 goto error;
1465 /*
1466 * Fill in the control structures
1467 */
9c9c9ad5 1468 skb->protocol = htons(ETH_P_IPV6);
32dce968 1469 skb->ip_summed = csummode;
1da177e4 1470 skb->csum = 0;
1f85851e
G
1471 /* reserve for fragmentation and ipsec header */
1472 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1473 dst_exthdrlen);
1da177e4 1474
11878b40
WB
1475 /* Only the initial fragment is time stamped */
1476 skb_shinfo(skb)->tx_flags = tx_flags;
1477 tx_flags = 0;
09c2d251
WB
1478 skb_shinfo(skb)->tskey = tskey;
1479 tskey = 0;
a693e698 1480
1da177e4
LT
1481 /*
1482 * Find where to start putting bytes
1483 */
1f85851e
G
1484 data = skb_put(skb, fraglen);
1485 skb_set_network_header(skb, exthdrlen);
1486 data += fragheaderlen;
b0e380b1
ACM
1487 skb->transport_header = (skb->network_header +
1488 fragheaderlen);
1da177e4
LT
1489 if (fraggap) {
1490 skb->csum = skb_copy_and_csum_bits(
1491 skb_prev, maxfraglen,
1492 data + transhdrlen, fraggap, 0);
1493 skb_prev->csum = csum_sub(skb_prev->csum,
1494 skb->csum);
1495 data += fraggap;
e9fa4f7b 1496 pskb_trim_unique(skb_prev, maxfraglen);
1da177e4
LT
1497 }
1498 copy = datalen - transhdrlen - fraggap;
299b0767 1499
1da177e4
LT
1500 if (copy < 0) {
1501 err = -EINVAL;
1502 kfree_skb(skb);
1503 goto error;
1504 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1505 err = -EFAULT;
1506 kfree_skb(skb);
1507 goto error;
1508 }
1509
1510 offset += copy;
1511 length -= datalen - fraggap;
1512 transhdrlen = 0;
1513 exthdrlen = 0;
299b0767 1514 dst_exthdrlen = 0;
1da177e4
LT
1515
1516 /*
1517 * Put the packet on the pending queue
1518 */
0bbe84a6 1519 __skb_queue_tail(queue, skb);
1da177e4
LT
1520 continue;
1521 }
1522
1523 if (copy > length)
1524 copy = length;
1525
d8d1f30b 1526 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1da177e4
LT
1527 unsigned int off;
1528
1529 off = skb->len;
1530 if (getfrag(from, skb_put(skb, copy),
1531 offset, copy, off, skb) < 0) {
1532 __skb_trim(skb, off);
1533 err = -EFAULT;
1534 goto error;
1535 }
1536 } else {
1537 int i = skb_shinfo(skb)->nr_frags;
1da177e4 1538
5640f768
ED
1539 err = -ENOMEM;
1540 if (!sk_page_frag_refill(sk, pfrag))
1da177e4 1541 goto error;
5640f768
ED
1542
1543 if (!skb_can_coalesce(skb, i, pfrag->page,
1544 pfrag->offset)) {
1545 err = -EMSGSIZE;
1546 if (i == MAX_SKB_FRAGS)
1547 goto error;
1548
1549 __skb_fill_page_desc(skb, i, pfrag->page,
1550 pfrag->offset, 0);
1551 skb_shinfo(skb)->nr_frags = ++i;
1552 get_page(pfrag->page);
1da177e4 1553 }
5640f768 1554 copy = min_t(int, copy, pfrag->size - pfrag->offset);
9e903e08 1555 if (getfrag(from,
5640f768
ED
1556 page_address(pfrag->page) + pfrag->offset,
1557 offset, copy, skb->len, skb) < 0)
1558 goto error_efault;
1559
1560 pfrag->offset += copy;
1561 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1da177e4
LT
1562 skb->len += copy;
1563 skb->data_len += copy;
f945fa7a
HX
1564 skb->truesize += copy;
1565 atomic_add(copy, &sk->sk_wmem_alloc);
1da177e4
LT
1566 }
1567 offset += copy;
1568 length -= copy;
1569 }
5640f768 1570
1da177e4 1571 return 0;
5640f768
ED
1572
1573error_efault:
1574 err = -EFAULT;
1da177e4 1575error:
bdc712b4 1576 cork->length -= length;
3bd653c8 1577 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1578 return err;
1579}
0bbe84a6
VY
1580
1581int ip6_append_data(struct sock *sk,
1582 int getfrag(void *from, char *to, int offset, int len,
1583 int odd, struct sk_buff *skb),
26879da5
WW
1584 void *from, int length, int transhdrlen,
1585 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1586 struct rt6_info *rt, unsigned int flags,
c14ac945 1587 const struct sockcm_cookie *sockc)
0bbe84a6
VY
1588{
1589 struct inet_sock *inet = inet_sk(sk);
1590 struct ipv6_pinfo *np = inet6_sk(sk);
1591 int exthdrlen;
1592 int err;
1593
1594 if (flags&MSG_PROBE)
1595 return 0;
1596 if (skb_queue_empty(&sk->sk_write_queue)) {
1597 /*
1598 * setup for corking
1599 */
26879da5
WW
1600 err = ip6_setup_cork(sk, &inet->cork, &np->cork,
1601 ipc6, rt, fl6);
0bbe84a6
VY
1602 if (err)
1603 return err;
1604
26879da5 1605 exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
0bbe84a6
VY
1606 length += exthdrlen;
1607 transhdrlen += exthdrlen;
1608 } else {
1609 fl6 = &inet->cork.fl.u.ip6;
1610 transhdrlen = 0;
1611 }
1612
1613 return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1614 &np->cork, sk_page_frag(sk), getfrag,
26879da5 1615 from, length, transhdrlen, flags, ipc6, sockc);
0bbe84a6 1616}
a495f836 1617EXPORT_SYMBOL_GPL(ip6_append_data);
1da177e4 1618
366e41d9
VY
1619static void ip6_cork_release(struct inet_cork_full *cork,
1620 struct inet6_cork *v6_cork)
bf138862 1621{
366e41d9
VY
1622 if (v6_cork->opt) {
1623 kfree(v6_cork->opt->dst0opt);
1624 kfree(v6_cork->opt->dst1opt);
1625 kfree(v6_cork->opt->hopopt);
1626 kfree(v6_cork->opt->srcrt);
1627 kfree(v6_cork->opt);
1628 v6_cork->opt = NULL;
0178b695
HX
1629 }
1630
366e41d9
VY
1631 if (cork->base.dst) {
1632 dst_release(cork->base.dst);
1633 cork->base.dst = NULL;
1634 cork->base.flags &= ~IPCORK_ALLFRAG;
bf138862 1635 }
366e41d9 1636 memset(&cork->fl, 0, sizeof(cork->fl));
bf138862
PE
1637}
1638
6422398c
VY
1639struct sk_buff *__ip6_make_skb(struct sock *sk,
1640 struct sk_buff_head *queue,
1641 struct inet_cork_full *cork,
1642 struct inet6_cork *v6_cork)
1da177e4
LT
1643{
1644 struct sk_buff *skb, *tmp_skb;
1645 struct sk_buff **tail_skb;
1646 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1da177e4 1647 struct ipv6_pinfo *np = inet6_sk(sk);
3bd653c8 1648 struct net *net = sock_net(sk);
1da177e4 1649 struct ipv6hdr *hdr;
6422398c
VY
1650 struct ipv6_txoptions *opt = v6_cork->opt;
1651 struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1652 struct flowi6 *fl6 = &cork->fl.u.ip6;
4c9483b2 1653 unsigned char proto = fl6->flowi6_proto;
1da177e4 1654
6422398c 1655 skb = __skb_dequeue(queue);
63159f29 1656 if (!skb)
1da177e4
LT
1657 goto out;
1658 tail_skb = &(skb_shinfo(skb)->frag_list);
1659
1660 /* move skb->data to ip header from ext header */
d56f90a7 1661 if (skb->data < skb_network_header(skb))
bbe735e4 1662 __skb_pull(skb, skb_network_offset(skb));
6422398c 1663 while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
cfe1fc77 1664 __skb_pull(tmp_skb, skb_network_header_len(skb));
1da177e4
LT
1665 *tail_skb = tmp_skb;
1666 tail_skb = &(tmp_skb->next);
1667 skb->len += tmp_skb->len;
1668 skb->data_len += tmp_skb->len;
1da177e4 1669 skb->truesize += tmp_skb->truesize;
1da177e4
LT
1670 tmp_skb->destructor = NULL;
1671 tmp_skb->sk = NULL;
1da177e4
LT
1672 }
1673
28a89453 1674 /* Allow local fragmentation. */
60ff7467 1675 skb->ignore_df = ip6_sk_ignore_df(sk);
28a89453 1676
4e3fd7a0 1677 *final_dst = fl6->daddr;
cfe1fc77 1678 __skb_pull(skb, skb_network_header_len(skb));
1da177e4
LT
1679 if (opt && opt->opt_flen)
1680 ipv6_push_frag_opts(skb, opt, &proto);
1681 if (opt && opt->opt_nflen)
1682 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1683
e2d1bca7
ACM
1684 skb_push(skb, sizeof(struct ipv6hdr));
1685 skb_reset_network_header(skb);
0660e03f 1686 hdr = ipv6_hdr(skb);
1ab1457c 1687
6422398c 1688 ip6_flow_hdr(hdr, v6_cork->tclass,
cb1ce2ef 1689 ip6_make_flowlabel(net, skb, fl6->flowlabel,
67800f9b 1690 np->autoflowlabel, fl6));
6422398c 1691 hdr->hop_limit = v6_cork->hop_limit;
1da177e4 1692 hdr->nexthdr = proto;
4e3fd7a0
AD
1693 hdr->saddr = fl6->saddr;
1694 hdr->daddr = *final_dst;
1da177e4 1695
a2c2064f 1696 skb->priority = sk->sk_priority;
4a19ec58 1697 skb->mark = sk->sk_mark;
a2c2064f 1698
d8d1f30b 1699 skb_dst_set(skb, dst_clone(&rt->dst));
edf391ff 1700 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
14878f75 1701 if (proto == IPPROTO_ICMPV6) {
adf30907 1702 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
14878f75 1703
43a43b60
HFS
1704 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1705 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
14878f75
DS
1706 }
1707
6422398c
VY
1708 ip6_cork_release(cork, v6_cork);
1709out:
1710 return skb;
1711}
1712
1713int ip6_send_skb(struct sk_buff *skb)
1714{
1715 struct net *net = sock_net(skb->sk);
1716 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1717 int err;
1718
33224b16 1719 err = ip6_local_out(net, skb->sk, skb);
1da177e4
LT
1720 if (err) {
1721 if (err > 0)
6ce9e7b5 1722 err = net_xmit_errno(err);
1da177e4 1723 if (err)
6422398c
VY
1724 IP6_INC_STATS(net, rt->rt6i_idev,
1725 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1726 }
1727
1da177e4 1728 return err;
6422398c
VY
1729}
1730
1731int ip6_push_pending_frames(struct sock *sk)
1732{
1733 struct sk_buff *skb;
1734
1735 skb = ip6_finish_skb(sk);
1736 if (!skb)
1737 return 0;
1738
1739 return ip6_send_skb(skb);
1da177e4 1740}
a495f836 1741EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1da177e4 1742
0bbe84a6 1743static void __ip6_flush_pending_frames(struct sock *sk,
6422398c
VY
1744 struct sk_buff_head *queue,
1745 struct inet_cork_full *cork,
1746 struct inet6_cork *v6_cork)
1da177e4 1747{
1da177e4
LT
1748 struct sk_buff *skb;
1749
0bbe84a6 1750 while ((skb = __skb_dequeue_tail(queue)) != NULL) {
adf30907
ED
1751 if (skb_dst(skb))
1752 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
e1f52208 1753 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1754 kfree_skb(skb);
1755 }
1756
6422398c 1757 ip6_cork_release(cork, v6_cork);
1da177e4 1758}
0bbe84a6
VY
1759
1760void ip6_flush_pending_frames(struct sock *sk)
1761{
6422398c
VY
1762 __ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1763 &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
0bbe84a6 1764}
a495f836 1765EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
6422398c
VY
1766
1767struct sk_buff *ip6_make_skb(struct sock *sk,
1768 int getfrag(void *from, char *to, int offset,
1769 int len, int odd, struct sk_buff *skb),
1770 void *from, int length, int transhdrlen,
26879da5 1771 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
6422398c 1772 struct rt6_info *rt, unsigned int flags,
26879da5 1773 const struct sockcm_cookie *sockc)
6422398c
VY
1774{
1775 struct inet_cork_full cork;
1776 struct inet6_cork v6_cork;
1777 struct sk_buff_head queue;
26879da5 1778 int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
6422398c
VY
1779 int err;
1780
1781 if (flags & MSG_PROBE)
1782 return NULL;
1783
1784 __skb_queue_head_init(&queue);
1785
1786 cork.base.flags = 0;
1787 cork.base.addr = 0;
1788 cork.base.opt = NULL;
1789 v6_cork.opt = NULL;
26879da5 1790 err = ip6_setup_cork(sk, &cork, &v6_cork, ipc6, rt, fl6);
6422398c
VY
1791 if (err)
1792 return ERR_PTR(err);
1793
26879da5
WW
1794 if (ipc6->dontfrag < 0)
1795 ipc6->dontfrag = inet6_sk(sk)->dontfrag;
6422398c
VY
1796
1797 err = __ip6_append_data(sk, fl6, &queue, &cork.base, &v6_cork,
1798 &current->task_frag, getfrag, from,
1799 length + exthdrlen, transhdrlen + exthdrlen,
26879da5 1800 flags, ipc6, sockc);
6422398c
VY
1801 if (err) {
1802 __ip6_flush_pending_frames(sk, &queue, &cork, &v6_cork);
1803 return ERR_PTR(err);
1804 }
1805
1806 return __ip6_make_skb(sk, &queue, &cork, &v6_cork);
1807}