]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blame - net/ipv6/ip6_output.c
ipv6: sr: add calls to verify and insert HMAC signatures
[mirror_ubuntu-zesty-kernel.git] / net / ipv6 / ip6_output.c
CommitLineData
1da177e4
LT
1/*
2 * IPv6 output functions
1ab1457c 3 * Linux INET6 implementation
1da177e4
LT
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * Based on linux/net/ipv4/ip_output.c
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 *
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
21 *
22 * H. von Brand : Added missing #include <linux/string.h>
67ba4152 23 * Imran Patel : frag id should be in NBO
1da177e4
LT
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
27 */
28
1da177e4 29#include <linux/errno.h>
ef76bc23 30#include <linux/kernel.h>
1da177e4
LT
31#include <linux/string.h>
32#include <linux/socket.h>
33#include <linux/net.h>
34#include <linux/netdevice.h>
35#include <linux/if_arp.h>
36#include <linux/in6.h>
37#include <linux/tcp.h>
38#include <linux/route.h>
b59f45d0 39#include <linux/module.h>
5a0e3ad6 40#include <linux/slab.h>
1da177e4
LT
41
42#include <linux/netfilter.h>
43#include <linux/netfilter_ipv6.h>
44
45#include <net/sock.h>
46#include <net/snmp.h>
47
48#include <net/ipv6.h>
49#include <net/ndisc.h>
50#include <net/protocol.h>
51#include <net/ip6_route.h>
52#include <net/addrconf.h>
53#include <net/rawv6.h>
54#include <net/icmp.h>
55#include <net/xfrm.h>
56#include <net/checksum.h>
7bc570c8 57#include <linux/mroute6.h>
ca254490 58#include <net/l3mdev.h>
14972cbd 59#include <net/lwtunnel.h>
1da177e4 60
7d8c6e39 61static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 62{
adf30907 63 struct dst_entry *dst = skb_dst(skb);
1da177e4 64 struct net_device *dev = dst->dev;
f6b72b62 65 struct neighbour *neigh;
6fd6ce20
YH
66 struct in6_addr *nexthop;
67 int ret;
1da177e4
LT
68
69 skb->protocol = htons(ETH_P_IPV6);
70 skb->dev = dev;
71
0660e03f 72 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
adf30907 73 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1da177e4 74
7026b1dd 75 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
78126c41 76 ((mroute6_socket(net, skb) &&
bd91b8bf 77 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
7bc570c8
YH
78 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
79 &ipv6_hdr(skb)->saddr))) {
1da177e4
LT
80 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
81
82 /* Do not check for IFF_ALLMULTI; multicast routing
83 is not supported in any case.
84 */
85 if (newskb)
b2e0b385 86 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
29a26a56 87 net, sk, newskb, NULL, newskb->dev,
95603e22 88 dev_loopback_xmit);
1da177e4 89
0660e03f 90 if (ipv6_hdr(skb)->hop_limit == 0) {
78126c41 91 IP6_INC_STATS(net, idev,
3bd653c8 92 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
93 kfree_skb(skb);
94 return 0;
95 }
96 }
97
78126c41 98 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
dd408515
HFS
99
100 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
101 IPV6_ADDR_SCOPE_NODELOCAL &&
102 !(dev->flags & IFF_LOOPBACK)) {
103 kfree_skb(skb);
104 return 0;
105 }
1da177e4
LT
106 }
107
14972cbd
RP
108 if (lwtunnel_xmit_redirect(dst->lwtstate)) {
109 int res = lwtunnel_xmit(skb);
110
111 if (res < 0 || res == LWTUNNEL_XMIT_DONE)
112 return res;
113 }
114
6fd6ce20 115 rcu_read_lock_bh();
2647a9b0 116 nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
6fd6ce20
YH
117 neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
118 if (unlikely(!neigh))
119 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
120 if (!IS_ERR(neigh)) {
121 ret = dst_neigh_output(dst, neigh, skb);
122 rcu_read_unlock_bh();
123 return ret;
124 }
125 rcu_read_unlock_bh();
05e3aa09 126
78126c41 127 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
9e508490
JE
128 kfree_skb(skb);
129 return -EINVAL;
1da177e4
LT
130}
131
0c4b51f0 132static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
9e508490
JE
133{
134 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
9037c357
JP
135 dst_allfrag(skb_dst(skb)) ||
136 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
7d8c6e39 137 return ip6_fragment(net, sk, skb, ip6_finish_output2);
9e508490 138 else
7d8c6e39 139 return ip6_finish_output2(net, sk, skb);
9e508490
JE
140}
141
ede2059d 142int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 143{
9e508490 144 struct net_device *dev = skb_dst(skb)->dev;
adf30907 145 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
be10de0a 146
778d80be 147 if (unlikely(idev->cnf.disable_ipv6)) {
19a0644c 148 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
778d80be
YH
149 kfree_skb(skb);
150 return 0;
151 }
152
29a26a56
EB
153 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
154 net, sk, skb, NULL, dev,
9c6eb28a
JE
155 ip6_finish_output,
156 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
1da177e4
LT
157}
158
1da177e4 159/*
1c1e9d2b
ED
160 * xmit an sk_buff (used by TCP, SCTP and DCCP)
161 * Note : socket lock is not held for SYNACK packets, but might be modified
162 * by calls to skb_set_owner_w() and ipv6_local_error(),
163 * which are using proper atomic operations or spinlocks.
1da177e4 164 */
1c1e9d2b 165int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
b903d324 166 struct ipv6_txoptions *opt, int tclass)
1da177e4 167{
3bd653c8 168 struct net *net = sock_net(sk);
1c1e9d2b 169 const struct ipv6_pinfo *np = inet6_sk(sk);
4c9483b2 170 struct in6_addr *first_hop = &fl6->daddr;
adf30907 171 struct dst_entry *dst = skb_dst(skb);
1da177e4 172 struct ipv6hdr *hdr;
4c9483b2 173 u8 proto = fl6->flowi6_proto;
1da177e4 174 int seg_len = skb->len;
e651f03a 175 int hlimit = -1;
1da177e4
LT
176 u32 mtu;
177
178 if (opt) {
c2636b4d 179 unsigned int head_room;
1da177e4
LT
180
181 /* First: exthdrs may take lots of space (~8K for now)
182 MAX_HEADER is not enough.
183 */
184 head_room = opt->opt_nflen + opt->opt_flen;
185 seg_len += head_room;
186 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
187
188 if (skb_headroom(skb) < head_room) {
189 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
63159f29 190 if (!skb2) {
adf30907 191 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d
YH
192 IPSTATS_MIB_OUTDISCARDS);
193 kfree_skb(skb);
1da177e4
LT
194 return -ENOBUFS;
195 }
808db80a 196 consume_skb(skb);
a11d206d 197 skb = skb2;
1c1e9d2b
ED
198 /* skb_set_owner_w() changes sk->sk_wmem_alloc atomically,
199 * it is safe to call in our context (socket lock not held)
200 */
201 skb_set_owner_w(skb, (struct sock *)sk);
1da177e4
LT
202 }
203 if (opt->opt_flen)
204 ipv6_push_frag_opts(skb, opt, &proto);
205 if (opt->opt_nflen)
206 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
207 }
208
e2d1bca7
ACM
209 skb_push(skb, sizeof(struct ipv6hdr));
210 skb_reset_network_header(skb);
0660e03f 211 hdr = ipv6_hdr(skb);
1da177e4
LT
212
213 /*
214 * Fill in the IPv6 header
215 */
b903d324 216 if (np)
1da177e4
LT
217 hlimit = np->hop_limit;
218 if (hlimit < 0)
6b75d090 219 hlimit = ip6_dst_hoplimit(dst);
1da177e4 220
cb1ce2ef 221 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
67800f9b 222 np->autoflowlabel, fl6));
41a1f8ea 223
1da177e4
LT
224 hdr->payload_len = htons(seg_len);
225 hdr->nexthdr = proto;
226 hdr->hop_limit = hlimit;
227
4e3fd7a0
AD
228 hdr->saddr = fl6->saddr;
229 hdr->daddr = *first_hop;
1da177e4 230
9c9c9ad5 231 skb->protocol = htons(ETH_P_IPV6);
a2c2064f 232 skb->priority = sk->sk_priority;
4a19ec58 233 skb->mark = sk->sk_mark;
a2c2064f 234
1da177e4 235 mtu = dst_mtu(dst);
60ff7467 236 if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
adf30907 237 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
edf391ff 238 IPSTATS_MIB_OUT, skb->len);
a8e3e1a9
DA
239
240 /* if egress device is enslaved to an L3 master device pass the
241 * skb to its handler for processing
242 */
243 skb = l3mdev_ip6_out((struct sock *)sk, skb);
244 if (unlikely(!skb))
245 return 0;
246
1c1e9d2b
ED
247 /* hooks should never assume socket lock is held.
248 * we promote our socket to non const
249 */
29a26a56 250 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
1c1e9d2b 251 net, (struct sock *)sk, skb, NULL, dst->dev,
13206b6b 252 dst_output);
1da177e4
LT
253 }
254
1da177e4 255 skb->dev = dst->dev;
1c1e9d2b
ED
256 /* ipv6_local_error() does not require socket lock,
257 * we promote our socket to non const
258 */
259 ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
260
adf30907 261 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
262 kfree_skb(skb);
263 return -EMSGSIZE;
264}
7159039a
YH
265EXPORT_SYMBOL(ip6_xmit);
266
1da177e4
LT
267static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
268{
269 struct ip6_ra_chain *ra;
270 struct sock *last = NULL;
271
272 read_lock(&ip6_ra_lock);
273 for (ra = ip6_ra_chain; ra; ra = ra->next) {
274 struct sock *sk = ra->sk;
0bd1b59b
AM
275 if (sk && ra->sel == sel &&
276 (!sk->sk_bound_dev_if ||
277 sk->sk_bound_dev_if == skb->dev->ifindex)) {
1da177e4
LT
278 if (last) {
279 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
280 if (skb2)
281 rawv6_rcv(last, skb2);
282 }
283 last = sk;
284 }
285 }
286
287 if (last) {
288 rawv6_rcv(last, skb);
289 read_unlock(&ip6_ra_lock);
290 return 1;
291 }
292 read_unlock(&ip6_ra_lock);
293 return 0;
294}
295
e21e0b5f
VN
296static int ip6_forward_proxy_check(struct sk_buff *skb)
297{
0660e03f 298 struct ipv6hdr *hdr = ipv6_hdr(skb);
e21e0b5f 299 u8 nexthdr = hdr->nexthdr;
75f2811c 300 __be16 frag_off;
e21e0b5f
VN
301 int offset;
302
303 if (ipv6_ext_hdr(nexthdr)) {
75f2811c 304 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
e21e0b5f
VN
305 if (offset < 0)
306 return 0;
307 } else
308 offset = sizeof(struct ipv6hdr);
309
310 if (nexthdr == IPPROTO_ICMPV6) {
311 struct icmp6hdr *icmp6;
312
d56f90a7
ACM
313 if (!pskb_may_pull(skb, (skb_network_header(skb) +
314 offset + 1 - skb->data)))
e21e0b5f
VN
315 return 0;
316
d56f90a7 317 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
e21e0b5f
VN
318
319 switch (icmp6->icmp6_type) {
320 case NDISC_ROUTER_SOLICITATION:
321 case NDISC_ROUTER_ADVERTISEMENT:
322 case NDISC_NEIGHBOUR_SOLICITATION:
323 case NDISC_NEIGHBOUR_ADVERTISEMENT:
324 case NDISC_REDIRECT:
325 /* For reaction involving unicast neighbor discovery
326 * message destined to the proxied address, pass it to
327 * input function.
328 */
329 return 1;
330 default:
331 break;
332 }
333 }
334
74553b09
VN
335 /*
336 * The proxying router can't forward traffic sent to a link-local
337 * address, so signal the sender and discard the packet. This
338 * behavior is clarified by the MIPv6 specification.
339 */
340 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
341 dst_link_failure(skb);
342 return -1;
343 }
344
e21e0b5f
VN
345 return 0;
346}
347
0c4b51f0
EB
348static inline int ip6_forward_finish(struct net *net, struct sock *sk,
349 struct sk_buff *skb)
1da177e4 350{
13206b6b 351 return dst_output(net, sk, skb);
1da177e4
LT
352}
353
0954cf9c
HFS
354static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
355{
356 unsigned int mtu;
357 struct inet6_dev *idev;
358
359 if (dst_metric_locked(dst, RTAX_MTU)) {
360 mtu = dst_metric_raw(dst, RTAX_MTU);
361 if (mtu)
362 return mtu;
363 }
364
365 mtu = IPV6_MIN_MTU;
366 rcu_read_lock();
367 idev = __in6_dev_get(dst->dev);
368 if (idev)
369 mtu = idev->cnf.mtu6;
370 rcu_read_unlock();
371
372 return mtu;
373}
374
fe6cc55f
FW
375static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
376{
418a3156 377 if (skb->len <= mtu)
fe6cc55f
FW
378 return false;
379
60ff7467 380 /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
fe6cc55f
FW
381 if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
382 return true;
383
60ff7467 384 if (skb->ignore_df)
418a3156
FW
385 return false;
386
ae7ef81e 387 if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
fe6cc55f
FW
388 return false;
389
390 return true;
391}
392
1da177e4
LT
393int ip6_forward(struct sk_buff *skb)
394{
adf30907 395 struct dst_entry *dst = skb_dst(skb);
0660e03f 396 struct ipv6hdr *hdr = ipv6_hdr(skb);
1da177e4 397 struct inet6_skb_parm *opt = IP6CB(skb);
c346dca1 398 struct net *net = dev_net(dst->dev);
14f3ad6f 399 u32 mtu;
1ab1457c 400
53b7997f 401 if (net->ipv6.devconf_all->forwarding == 0)
1da177e4
LT
402 goto error;
403
090f1166
LR
404 if (skb->pkt_type != PACKET_HOST)
405 goto drop;
406
9ef2e965
HFS
407 if (unlikely(skb->sk))
408 goto drop;
409
4497b076
BH
410 if (skb_warn_if_lro(skb))
411 goto drop;
412
1da177e4 413 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
1d015503
ED
414 __IP6_INC_STATS(net, ip6_dst_idev(dst),
415 IPSTATS_MIB_INDISCARDS);
1da177e4
LT
416 goto drop;
417 }
418
35fc92a9 419 skb_forward_csum(skb);
1da177e4
LT
420
421 /*
422 * We DO NOT make any processing on
423 * RA packets, pushing them to user level AS IS
424 * without ane WARRANTY that application will be able
425 * to interpret them. The reason is that we
426 * cannot make anything clever here.
427 *
428 * We are not end-node, so that if packet contains
429 * AH/ESP, we cannot make anything.
430 * Defragmentation also would be mistake, RA packets
431 * cannot be fragmented, because there is no warranty
432 * that different fragments will go along one path. --ANK
433 */
ab4eb353
YH
434 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
435 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
1da177e4
LT
436 return 0;
437 }
438
439 /*
440 * check and decrement ttl
441 */
442 if (hdr->hop_limit <= 1) {
443 /* Force OUTPUT device used as source address */
444 skb->dev = dst->dev;
3ffe533c 445 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
1d015503
ED
446 __IP6_INC_STATS(net, ip6_dst_idev(dst),
447 IPSTATS_MIB_INHDRERRORS);
1da177e4
LT
448
449 kfree_skb(skb);
450 return -ETIMEDOUT;
451 }
452
fbea49e1 453 /* XXX: idev->cnf.proxy_ndp? */
53b7997f 454 if (net->ipv6.devconf_all->proxy_ndp &&
8a3edd80 455 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
74553b09
VN
456 int proxied = ip6_forward_proxy_check(skb);
457 if (proxied > 0)
e21e0b5f 458 return ip6_input(skb);
74553b09 459 else if (proxied < 0) {
1d015503
ED
460 __IP6_INC_STATS(net, ip6_dst_idev(dst),
461 IPSTATS_MIB_INDISCARDS);
74553b09
VN
462 goto drop;
463 }
e21e0b5f
VN
464 }
465
1da177e4 466 if (!xfrm6_route_forward(skb)) {
1d015503
ED
467 __IP6_INC_STATS(net, ip6_dst_idev(dst),
468 IPSTATS_MIB_INDISCARDS);
1da177e4
LT
469 goto drop;
470 }
adf30907 471 dst = skb_dst(skb);
1da177e4
LT
472
473 /* IPv6 specs say nothing about it, but it is clear that we cannot
474 send redirects to source routed frames.
1e5dc146 475 We don't send redirects to frames decapsulated from IPsec.
1da177e4 476 */
c45a3dfb 477 if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
1da177e4 478 struct in6_addr *target = NULL;
fbfe95a4 479 struct inet_peer *peer;
1da177e4 480 struct rt6_info *rt;
1da177e4
LT
481
482 /*
483 * incoming and outgoing devices are the same
484 * send a redirect.
485 */
486
487 rt = (struct rt6_info *) dst;
c45a3dfb
DM
488 if (rt->rt6i_flags & RTF_GATEWAY)
489 target = &rt->rt6i_gateway;
1da177e4
LT
490 else
491 target = &hdr->daddr;
492
fd0273d7 493 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
92d86829 494
1da177e4
LT
495 /* Limit redirects both by destination (here)
496 and by source (inside ndisc_send_redirect)
497 */
fbfe95a4 498 if (inet_peer_xrlim_allow(peer, 1*HZ))
4991969a 499 ndisc_send_redirect(skb, target);
1d861aa4
DM
500 if (peer)
501 inet_putpeer(peer);
5bb1ab09
DS
502 } else {
503 int addrtype = ipv6_addr_type(&hdr->saddr);
504
1da177e4 505 /* This check is security critical. */
f81b2e7d
YH
506 if (addrtype == IPV6_ADDR_ANY ||
507 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
5bb1ab09
DS
508 goto error;
509 if (addrtype & IPV6_ADDR_LINKLOCAL) {
510 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
3ffe533c 511 ICMPV6_NOT_NEIGHBOUR, 0);
5bb1ab09
DS
512 goto error;
513 }
1da177e4
LT
514 }
515
0954cf9c 516 mtu = ip6_dst_mtu_forward(dst);
14f3ad6f
UW
517 if (mtu < IPV6_MIN_MTU)
518 mtu = IPV6_MIN_MTU;
519
fe6cc55f 520 if (ip6_pkt_too_big(skb, mtu)) {
1da177e4
LT
521 /* Again, force OUTPUT device used as source address */
522 skb->dev = dst->dev;
14f3ad6f 523 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1d015503
ED
524 __IP6_INC_STATS(net, ip6_dst_idev(dst),
525 IPSTATS_MIB_INTOOBIGERRORS);
526 __IP6_INC_STATS(net, ip6_dst_idev(dst),
527 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
528 kfree_skb(skb);
529 return -EMSGSIZE;
530 }
531
532 if (skb_cow(skb, dst->dev->hard_header_len)) {
1d015503
ED
533 __IP6_INC_STATS(net, ip6_dst_idev(dst),
534 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
535 goto drop;
536 }
537
0660e03f 538 hdr = ipv6_hdr(skb);
1da177e4
LT
539
540 /* Mangling hops number delayed to point after skb COW */
1ab1457c 541
1da177e4
LT
542 hdr->hop_limit--;
543
1d015503
ED
544 __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
545 __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
29a26a56
EB
546 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
547 net, NULL, skb, skb->dev, dst->dev,
6e23ae2a 548 ip6_forward_finish);
1da177e4
LT
549
550error:
1d015503 551 __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
1da177e4
LT
552drop:
553 kfree_skb(skb);
554 return -EINVAL;
555}
556
557static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
558{
559 to->pkt_type = from->pkt_type;
560 to->priority = from->priority;
561 to->protocol = from->protocol;
adf30907
ED
562 skb_dst_drop(to);
563 skb_dst_set(to, dst_clone(skb_dst(from)));
1da177e4 564 to->dev = from->dev;
82e91ffe 565 to->mark = from->mark;
1da177e4
LT
566
567#ifdef CONFIG_NET_SCHED
568 to->tc_index = from->tc_index;
569#endif
e7ac05f3 570 nf_copy(to, from);
984bc16c 571 skb_copy_secmark(to, from);
1da177e4
LT
572}
573
7d8c6e39
EB
574int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
575 int (*output)(struct net *, struct sock *, struct sk_buff *))
1da177e4 576{
1da177e4 577 struct sk_buff *frag;
67ba4152 578 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
f60e5990 579 struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
580 inet6_sk(skb->sk) : NULL;
1da177e4
LT
581 struct ipv6hdr *tmp_hdr;
582 struct frag_hdr *fh;
583 unsigned int mtu, hlen, left, len;
a7ae1992 584 int hroom, troom;
286c2349 585 __be32 frag_id;
67ba4152 586 int ptr, offset = 0, err = 0;
1da177e4
LT
587 u8 *prevhdr, nexthdr = 0;
588
1da177e4
LT
589 hlen = ip6_find_1stfragopt(skb, &prevhdr);
590 nexthdr = *prevhdr;
591
628a5c56 592 mtu = ip6_skb_dst_mtu(skb);
b881ef76
JH
593
594 /* We must not fragment if the socket is set to force MTU discovery
14f3ad6f 595 * or if the skb it not generated by a local socket.
b881ef76 596 */
485fca66
FW
597 if (unlikely(!skb->ignore_df && skb->len > mtu))
598 goto fail_toobig;
a34a101e 599
485fca66
FW
600 if (IP6CB(skb)->frag_max_size) {
601 if (IP6CB(skb)->frag_max_size > mtu)
602 goto fail_toobig;
603
604 /* don't send fragments larger than what we received */
605 mtu = IP6CB(skb)->frag_max_size;
606 if (mtu < IPV6_MIN_MTU)
607 mtu = IPV6_MIN_MTU;
b881ef76
JH
608 }
609
d91675f9
YH
610 if (np && np->frag_size < mtu) {
611 if (np->frag_size)
612 mtu = np->frag_size;
613 }
89bc7848 614 if (mtu < hlen + sizeof(struct frag_hdr) + 8)
b72a2b01 615 goto fail_toobig;
1e0d69a9 616 mtu -= hlen + sizeof(struct frag_hdr);
1da177e4 617
fd0273d7
MKL
618 frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
619 &ipv6_hdr(skb)->saddr);
286c2349 620
405c92f7
HFS
621 if (skb->ip_summed == CHECKSUM_PARTIAL &&
622 (err = skb_checksum_help(skb)))
623 goto fail;
624
1d325d21 625 hroom = LL_RESERVED_SPACE(rt->dst.dev);
21dc3301 626 if (skb_has_frag_list(skb)) {
1da177e4 627 int first_len = skb_pagelen(skb);
3d13008e 628 struct sk_buff *frag2;
1da177e4
LT
629
630 if (first_len - hlen > mtu ||
631 ((first_len - hlen) & 7) ||
1d325d21
FW
632 skb_cloned(skb) ||
633 skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
1da177e4
LT
634 goto slow_path;
635
4d9092bb 636 skb_walk_frags(skb, frag) {
1da177e4
LT
637 /* Correct geometry. */
638 if (frag->len > mtu ||
639 ((frag->len & 7) && frag->next) ||
1d325d21 640 skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
3d13008e 641 goto slow_path_clean;
1da177e4 642
1da177e4
LT
643 /* Partially cloned skb? */
644 if (skb_shared(frag))
3d13008e 645 goto slow_path_clean;
2fdba6b0
HX
646
647 BUG_ON(frag->sk);
648 if (skb->sk) {
2fdba6b0
HX
649 frag->sk = skb->sk;
650 frag->destructor = sock_wfree;
2fdba6b0 651 }
3d13008e 652 skb->truesize -= frag->truesize;
1da177e4
LT
653 }
654
655 err = 0;
656 offset = 0;
1da177e4
LT
657 /* BUILD HEADER */
658
9a217a1c 659 *prevhdr = NEXTHDR_FRAGMENT;
d56f90a7 660 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
1da177e4 661 if (!tmp_hdr) {
adf30907 662 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 663 IPSTATS_MIB_FRAGFAILS);
1d325d21
FW
664 err = -ENOMEM;
665 goto fail;
1da177e4 666 }
1d325d21
FW
667 frag = skb_shinfo(skb)->frag_list;
668 skb_frag_list_init(skb);
1da177e4 669
1da177e4 670 __skb_pull(skb, hlen);
67ba4152 671 fh = (struct frag_hdr *)__skb_push(skb, sizeof(struct frag_hdr));
e2d1bca7
ACM
672 __skb_push(skb, hlen);
673 skb_reset_network_header(skb);
d56f90a7 674 memcpy(skb_network_header(skb), tmp_hdr, hlen);
1da177e4 675
1da177e4
LT
676 fh->nexthdr = nexthdr;
677 fh->reserved = 0;
678 fh->frag_off = htons(IP6_MF);
286c2349 679 fh->identification = frag_id;
1da177e4
LT
680
681 first_len = skb_pagelen(skb);
682 skb->data_len = first_len - skb_headlen(skb);
683 skb->len = first_len;
0660e03f
ACM
684 ipv6_hdr(skb)->payload_len = htons(first_len -
685 sizeof(struct ipv6hdr));
a11d206d 686
d8d1f30b 687 dst_hold(&rt->dst);
1da177e4
LT
688
689 for (;;) {
690 /* Prepare header of the next frame,
691 * before previous one went down. */
692 if (frag) {
693 frag->ip_summed = CHECKSUM_NONE;
badff6d0 694 skb_reset_transport_header(frag);
67ba4152 695 fh = (struct frag_hdr *)__skb_push(frag, sizeof(struct frag_hdr));
e2d1bca7
ACM
696 __skb_push(frag, hlen);
697 skb_reset_network_header(frag);
d56f90a7
ACM
698 memcpy(skb_network_header(frag), tmp_hdr,
699 hlen);
1da177e4
LT
700 offset += skb->len - hlen - sizeof(struct frag_hdr);
701 fh->nexthdr = nexthdr;
702 fh->reserved = 0;
703 fh->frag_off = htons(offset);
53b24b8f 704 if (frag->next)
1da177e4
LT
705 fh->frag_off |= htons(IP6_MF);
706 fh->identification = frag_id;
0660e03f
ACM
707 ipv6_hdr(frag)->payload_len =
708 htons(frag->len -
709 sizeof(struct ipv6hdr));
1da177e4
LT
710 ip6_copy_metadata(frag, skb);
711 }
1ab1457c 712
7d8c6e39 713 err = output(net, sk, skb);
67ba4152 714 if (!err)
d8d1f30b 715 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 716 IPSTATS_MIB_FRAGCREATES);
dafee490 717
1da177e4
LT
718 if (err || !frag)
719 break;
720
721 skb = frag;
722 frag = skb->next;
723 skb->next = NULL;
724 }
725
a51482bd 726 kfree(tmp_hdr);
1da177e4
LT
727
728 if (err == 0) {
d8d1f30b 729 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 730 IPSTATS_MIB_FRAGOKS);
94e187c0 731 ip6_rt_put(rt);
1da177e4
LT
732 return 0;
733 }
734
46cfd725 735 kfree_skb_list(frag);
1da177e4 736
d8d1f30b 737 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 738 IPSTATS_MIB_FRAGFAILS);
94e187c0 739 ip6_rt_put(rt);
1da177e4 740 return err;
3d13008e
ED
741
742slow_path_clean:
743 skb_walk_frags(skb, frag2) {
744 if (frag2 == frag)
745 break;
746 frag2->sk = NULL;
747 frag2->destructor = NULL;
748 skb->truesize += frag2->truesize;
749 }
1da177e4
LT
750 }
751
752slow_path:
753 left = skb->len - hlen; /* Space per frame */
754 ptr = hlen; /* Where to start from */
755
756 /*
757 * Fragment the datagram.
758 */
759
760 *prevhdr = NEXTHDR_FRAGMENT;
a7ae1992 761 troom = rt->dst.dev->needed_tailroom;
1da177e4
LT
762
763 /*
764 * Keep copying data until we run out.
765 */
67ba4152 766 while (left > 0) {
1da177e4
LT
767 len = left;
768 /* IF: it doesn't fit, use 'mtu' - the data space left */
769 if (len > mtu)
770 len = mtu;
25985edc 771 /* IF: we are not sending up to and including the packet end
1da177e4
LT
772 then align the next start on an eight byte boundary */
773 if (len < left) {
774 len &= ~7;
775 }
1da177e4 776
cbffccc9
JP
777 /* Allocate buffer */
778 frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
779 hroom + troom, GFP_ATOMIC);
780 if (!frag) {
adf30907 781 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 782 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
783 err = -ENOMEM;
784 goto fail;
785 }
786
787 /*
788 * Set up data on packet
789 */
790
791 ip6_copy_metadata(frag, skb);
a7ae1992 792 skb_reserve(frag, hroom);
1da177e4 793 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
c1d2bbe1 794 skb_reset_network_header(frag);
badff6d0 795 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
b0e380b1
ACM
796 frag->transport_header = (frag->network_header + hlen +
797 sizeof(struct frag_hdr));
1da177e4
LT
798
799 /*
800 * Charge the memory for the fragment to any owner
801 * it might possess
802 */
803 if (skb->sk)
804 skb_set_owner_w(frag, skb->sk);
805
806 /*
807 * Copy the packet header into the new buffer.
808 */
d626f62b 809 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
1da177e4
LT
810
811 /*
812 * Build fragment header.
813 */
814 fh->nexthdr = nexthdr;
815 fh->reserved = 0;
286c2349 816 fh->identification = frag_id;
1da177e4
LT
817
818 /*
819 * Copy a block of the IP datagram.
820 */
e3f0b86b
HS
821 BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag),
822 len));
1da177e4
LT
823 left -= len;
824
825 fh->frag_off = htons(offset);
826 if (left > 0)
827 fh->frag_off |= htons(IP6_MF);
0660e03f
ACM
828 ipv6_hdr(frag)->payload_len = htons(frag->len -
829 sizeof(struct ipv6hdr));
1da177e4
LT
830
831 ptr += len;
832 offset += len;
833
834 /*
835 * Put this fragment into the sending queue.
836 */
7d8c6e39 837 err = output(net, sk, frag);
1da177e4
LT
838 if (err)
839 goto fail;
dafee490 840
adf30907 841 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 842 IPSTATS_MIB_FRAGCREATES);
1da177e4 843 }
adf30907 844 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 845 IPSTATS_MIB_FRAGOKS);
808db80a 846 consume_skb(skb);
1da177e4
LT
847 return err;
848
485fca66
FW
849fail_toobig:
850 if (skb->sk && dst_allfrag(skb_dst(skb)))
851 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
852
853 skb->dev = skb_dst(skb)->dev;
854 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
855 err = -EMSGSIZE;
856
1da177e4 857fail:
adf30907 858 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 859 IPSTATS_MIB_FRAGFAILS);
1ab1457c 860 kfree_skb(skb);
1da177e4
LT
861 return err;
862}
863
b71d1d42
ED
864static inline int ip6_rt_check(const struct rt6key *rt_key,
865 const struct in6_addr *fl_addr,
866 const struct in6_addr *addr_cache)
cf6b1982 867{
a02cec21 868 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
63159f29 869 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
cf6b1982
YH
870}
871
497c615a
HX
872static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
873 struct dst_entry *dst,
b71d1d42 874 const struct flowi6 *fl6)
1da177e4 875{
497c615a 876 struct ipv6_pinfo *np = inet6_sk(sk);
a963a37d 877 struct rt6_info *rt;
1da177e4 878
497c615a
HX
879 if (!dst)
880 goto out;
881
a963a37d
ED
882 if (dst->ops->family != AF_INET6) {
883 dst_release(dst);
884 return NULL;
885 }
886
887 rt = (struct rt6_info *)dst;
497c615a
HX
888 /* Yes, checking route validity in not connected
889 * case is not very simple. Take into account,
890 * that we do not support routing by source, TOS,
67ba4152 891 * and MSG_DONTROUTE --ANK (980726)
497c615a 892 *
cf6b1982
YH
893 * 1. ip6_rt_check(): If route was host route,
894 * check that cached destination is current.
497c615a
HX
895 * If it is network route, we still may
896 * check its validity using saved pointer
897 * to the last used address: daddr_cache.
898 * We do not want to save whole address now,
899 * (because main consumer of this service
900 * is tcp, which has not this problem),
901 * so that the last trick works only on connected
902 * sockets.
903 * 2. oif also should be the same.
904 */
4c9483b2 905 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
8e1ef0a9 906#ifdef CONFIG_IPV6_SUBTREES
4c9483b2 907 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
8e1ef0a9 908#endif
ca254490
DA
909 (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
910 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
497c615a
HX
911 dst_release(dst);
912 dst = NULL;
1da177e4
LT
913 }
914
497c615a
HX
915out:
916 return dst;
917}
918
3aef934f 919static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
4c9483b2 920 struct dst_entry **dst, struct flowi6 *fl6)
497c615a 921{
69cce1d1
DM
922#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
923 struct neighbour *n;
97cac082 924 struct rt6_info *rt;
69cce1d1
DM
925#endif
926 int err;
6f21c96a 927 int flags = 0;
497c615a 928
e16e888b
MS
929 /* The correct way to handle this would be to do
930 * ip6_route_get_saddr, and then ip6_route_output; however,
931 * the route-specific preferred source forces the
932 * ip6_route_output call _before_ ip6_route_get_saddr.
933 *
934 * In source specific routing (no src=any default route),
935 * ip6_route_output will fail given src=any saddr, though, so
936 * that's why we try it again later.
937 */
938 if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
939 struct rt6_info *rt;
940 bool had_dst = *dst != NULL;
1da177e4 941
e16e888b
MS
942 if (!had_dst)
943 *dst = ip6_route_output(net, sk, fl6);
944 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
c3968a85
DW
945 err = ip6_route_get_saddr(net, rt, &fl6->daddr,
946 sk ? inet6_sk(sk)->srcprefs : 0,
947 &fl6->saddr);
44456d37 948 if (err)
1da177e4 949 goto out_err_release;
e16e888b
MS
950
951 /* If we had an erroneous initial result, pretend it
952 * never existed and let the SA-enabled version take
953 * over.
954 */
955 if (!had_dst && (*dst)->error) {
956 dst_release(*dst);
957 *dst = NULL;
958 }
6f21c96a
PA
959
960 if (fl6->flowi6_oif)
961 flags |= RT6_LOOKUP_F_IFACE;
1da177e4
LT
962 }
963
e16e888b 964 if (!*dst)
6f21c96a 965 *dst = ip6_route_output_flags(net, sk, fl6, flags);
e16e888b
MS
966
967 err = (*dst)->error;
968 if (err)
969 goto out_err_release;
970
95c385b4 971#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
e550dfb0
NH
972 /*
973 * Here if the dst entry we've looked up
974 * has a neighbour entry that is in the INCOMPLETE
975 * state and the src address from the flow is
976 * marked as OPTIMISTIC, we release the found
977 * dst entry and replace it instead with the
978 * dst entry of the nexthop router
979 */
c56bf6fe 980 rt = (struct rt6_info *) *dst;
707be1ff 981 rcu_read_lock_bh();
2647a9b0
MKL
982 n = __ipv6_neigh_lookup_noref(rt->dst.dev,
983 rt6_nexthop(rt, &fl6->daddr));
707be1ff
YH
984 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
985 rcu_read_unlock_bh();
986
987 if (err) {
e550dfb0 988 struct inet6_ifaddr *ifp;
4c9483b2 989 struct flowi6 fl_gw6;
e550dfb0
NH
990 int redirect;
991
4c9483b2 992 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
e550dfb0
NH
993 (*dst)->dev, 1);
994
995 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
996 if (ifp)
997 in6_ifa_put(ifp);
998
999 if (redirect) {
1000 /*
1001 * We need to get the dst entry for the
1002 * default router instead
1003 */
1004 dst_release(*dst);
4c9483b2
DM
1005 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1006 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1007 *dst = ip6_route_output(net, sk, &fl_gw6);
e5d08d71
IM
1008 err = (*dst)->error;
1009 if (err)
e550dfb0 1010 goto out_err_release;
95c385b4 1011 }
e550dfb0 1012 }
95c385b4
NH
1013#endif
1014
1da177e4
LT
1015 return 0;
1016
1017out_err_release:
1018 dst_release(*dst);
1019 *dst = NULL;
8a966fc0 1020
0d240e78
DA
1021 if (err == -ENETUNREACH)
1022 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
1023 return err;
1024}
34a0b3cd 1025
497c615a
HX
1026/**
1027 * ip6_dst_lookup - perform route lookup on flow
1028 * @sk: socket which provides route info
1029 * @dst: pointer to dst_entry * for result
4c9483b2 1030 * @fl6: flow to lookup
497c615a
HX
1031 *
1032 * This function performs a route lookup on the given flow.
1033 *
1034 * It returns zero on success, or a standard errno code on error.
1035 */
343d60aa
RP
1036int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1037 struct flowi6 *fl6)
497c615a
HX
1038{
1039 *dst = NULL;
343d60aa 1040 return ip6_dst_lookup_tail(net, sk, dst, fl6);
497c615a 1041}
3cf3dc6c
ACM
1042EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1043
497c615a 1044/**
68d0c6d3
DM
1045 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1046 * @sk: socket which provides route info
4c9483b2 1047 * @fl6: flow to lookup
68d0c6d3 1048 * @final_dst: final destination address for ipsec lookup
68d0c6d3
DM
1049 *
1050 * This function performs a route lookup on the given flow.
1051 *
1052 * It returns a valid dst pointer on success, or a pointer encoded
1053 * error code.
1054 */
3aef934f 1055struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
0e0d44ab 1056 const struct in6_addr *final_dst)
68d0c6d3
DM
1057{
1058 struct dst_entry *dst = NULL;
1059 int err;
1060
343d60aa 1061 err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
68d0c6d3
DM
1062 if (err)
1063 return ERR_PTR(err);
1064 if (final_dst)
4e3fd7a0 1065 fl6->daddr = *final_dst;
2774c131 1066
f92ee619 1067 return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
68d0c6d3
DM
1068}
1069EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1070
1071/**
1072 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
497c615a 1073 * @sk: socket which provides the dst cache and route info
4c9483b2 1074 * @fl6: flow to lookup
68d0c6d3 1075 * @final_dst: final destination address for ipsec lookup
497c615a
HX
1076 *
1077 * This function performs a route lookup on the given flow with the
1078 * possibility of using the cached route in the socket if it is valid.
1079 * It will take the socket dst lock when operating on the dst cache.
1080 * As a result, this function can only be used in process context.
1081 *
68d0c6d3
DM
1082 * It returns a valid dst pointer on success, or a pointer encoded
1083 * error code.
497c615a 1084 */
4c9483b2 1085struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
0e0d44ab 1086 const struct in6_addr *final_dst)
497c615a 1087{
68d0c6d3 1088 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
497c615a 1089
4c9483b2 1090 dst = ip6_sk_dst_check(sk, dst, fl6);
00bc0ef5
JS
1091 if (!dst)
1092 dst = ip6_dst_lookup_flow(sk, fl6, final_dst);
68d0c6d3 1093
00bc0ef5 1094 return dst;
497c615a 1095}
68d0c6d3 1096EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
497c615a 1097
34a0b3cd 1098static inline int ip6_ufo_append_data(struct sock *sk,
0bbe84a6 1099 struct sk_buff_head *queue,
e89e9cf5
AR
1100 int getfrag(void *from, char *to, int offset, int len,
1101 int odd, struct sk_buff *skb),
1102 void *from, int length, int hh_len, int fragheaderlen,
3ba3458f
JS
1103 int exthdrlen, int transhdrlen, int mtu,
1104 unsigned int flags, const struct flowi6 *fl6)
e89e9cf5
AR
1105
1106{
1107 struct sk_buff *skb;
1108 int err;
1109
1110 /* There is support for UDP large send offload by network
1111 * device, so create one single skb packet containing complete
1112 * udp datagram
1113 */
0bbe84a6 1114 skb = skb_peek_tail(queue);
63159f29 1115 if (!skb) {
e89e9cf5
AR
1116 skb = sock_alloc_send_skb(sk,
1117 hh_len + fragheaderlen + transhdrlen + 20,
1118 (flags & MSG_DONTWAIT), &err);
63159f29 1119 if (!skb)
504744e4 1120 return err;
e89e9cf5
AR
1121
1122 /* reserve space for Hardware header */
1123 skb_reserve(skb, hh_len);
1124
1125 /* create space for UDP/IP header */
67ba4152 1126 skb_put(skb, fragheaderlen + transhdrlen);
e89e9cf5
AR
1127
1128 /* initialize network header pointer */
3ba3458f 1129 skb_set_network_header(skb, exthdrlen);
e89e9cf5
AR
1130
1131 /* initialize protocol header pointer */
b0e380b1 1132 skb->transport_header = skb->network_header + fragheaderlen;
e89e9cf5 1133
9c9c9ad5 1134 skb->protocol = htons(ETH_P_IPV6);
e89e9cf5 1135 skb->csum = 0;
e89e9cf5 1136
0bbe84a6 1137 __skb_queue_tail(queue, skb);
c547dbf5
JP
1138 } else if (skb_is_gso(skb)) {
1139 goto append;
e89e9cf5 1140 }
e89e9cf5 1141
c547dbf5
JP
1142 skb->ip_summed = CHECKSUM_PARTIAL;
1143 /* Specify the length of each IPv6 datagram fragment.
1144 * It has to be a multiple of 8.
1145 */
1146 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1147 sizeof(struct frag_hdr)) & ~7;
1148 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
fd0273d7
MKL
1149 skb_shinfo(skb)->ip6_frag_id = ipv6_select_ident(sock_net(sk),
1150 &fl6->daddr,
1151 &fl6->saddr);
c547dbf5
JP
1152
1153append:
2811ebac
HFS
1154 return skb_append_datato_frags(sk, skb, getfrag, from,
1155 (length - transhdrlen));
e89e9cf5 1156}
1da177e4 1157
0178b695
HX
1158static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1159 gfp_t gfp)
1160{
1161 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1162}
1163
1164static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1165 gfp_t gfp)
1166{
1167 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1168}
1169
75a493e6 1170static void ip6_append_data_mtu(unsigned int *mtu,
0c183379
G
1171 int *maxfraglen,
1172 unsigned int fragheaderlen,
1173 struct sk_buff *skb,
75a493e6 1174 struct rt6_info *rt,
e367c2d0 1175 unsigned int orig_mtu)
0c183379
G
1176{
1177 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
63159f29 1178 if (!skb) {
0c183379 1179 /* first fragment, reserve header_len */
e367c2d0 1180 *mtu = orig_mtu - rt->dst.header_len;
0c183379
G
1181
1182 } else {
1183 /*
1184 * this fragment is not first, the headers
1185 * space is regarded as data space.
1186 */
e367c2d0 1187 *mtu = orig_mtu;
0c183379
G
1188 }
1189 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1190 + fragheaderlen - sizeof(struct frag_hdr);
1191 }
1192}
1193
366e41d9 1194static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
26879da5 1195 struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
366e41d9
VY
1196 struct rt6_info *rt, struct flowi6 *fl6)
1197{
1198 struct ipv6_pinfo *np = inet6_sk(sk);
1199 unsigned int mtu;
26879da5 1200 struct ipv6_txoptions *opt = ipc6->opt;
366e41d9
VY
1201
1202 /*
1203 * setup for corking
1204 */
1205 if (opt) {
1206 if (WARN_ON(v6_cork->opt))
1207 return -EINVAL;
1208
1209 v6_cork->opt = kzalloc(opt->tot_len, sk->sk_allocation);
63159f29 1210 if (unlikely(!v6_cork->opt))
366e41d9
VY
1211 return -ENOBUFS;
1212
1213 v6_cork->opt->tot_len = opt->tot_len;
1214 v6_cork->opt->opt_flen = opt->opt_flen;
1215 v6_cork->opt->opt_nflen = opt->opt_nflen;
1216
1217 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1218 sk->sk_allocation);
1219 if (opt->dst0opt && !v6_cork->opt->dst0opt)
1220 return -ENOBUFS;
1221
1222 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1223 sk->sk_allocation);
1224 if (opt->dst1opt && !v6_cork->opt->dst1opt)
1225 return -ENOBUFS;
1226
1227 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1228 sk->sk_allocation);
1229 if (opt->hopopt && !v6_cork->opt->hopopt)
1230 return -ENOBUFS;
1231
1232 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1233 sk->sk_allocation);
1234 if (opt->srcrt && !v6_cork->opt->srcrt)
1235 return -ENOBUFS;
1236
1237 /* need source address above miyazawa*/
1238 }
1239 dst_hold(&rt->dst);
1240 cork->base.dst = &rt->dst;
1241 cork->fl.u.ip6 = *fl6;
26879da5
WW
1242 v6_cork->hop_limit = ipc6->hlimit;
1243 v6_cork->tclass = ipc6->tclass;
366e41d9
VY
1244 if (rt->dst.flags & DST_XFRM_TUNNEL)
1245 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1246 rt->dst.dev->mtu : dst_mtu(&rt->dst);
1247 else
1248 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1249 rt->dst.dev->mtu : dst_mtu(rt->dst.path);
1250 if (np->frag_size < mtu) {
1251 if (np->frag_size)
1252 mtu = np->frag_size;
1253 }
1254 cork->base.fragsize = mtu;
1255 if (dst_allfrag(rt->dst.path))
1256 cork->base.flags |= IPCORK_ALLFRAG;
1257 cork->base.length = 0;
1258
1259 return 0;
1260}
1261
0bbe84a6
VY
1262static int __ip6_append_data(struct sock *sk,
1263 struct flowi6 *fl6,
1264 struct sk_buff_head *queue,
1265 struct inet_cork *cork,
1266 struct inet6_cork *v6_cork,
1267 struct page_frag *pfrag,
1268 int getfrag(void *from, char *to, int offset,
1269 int len, int odd, struct sk_buff *skb),
1270 void *from, int length, int transhdrlen,
26879da5 1271 unsigned int flags, struct ipcm6_cookie *ipc6,
c14ac945 1272 const struct sockcm_cookie *sockc)
1da177e4 1273{
0c183379 1274 struct sk_buff *skb, *skb_prev = NULL;
e367c2d0 1275 unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu;
0bbe84a6
VY
1276 int exthdrlen = 0;
1277 int dst_exthdrlen = 0;
1da177e4 1278 int hh_len;
1da177e4
LT
1279 int copy;
1280 int err;
1281 int offset = 0;
a693e698 1282 __u8 tx_flags = 0;
09c2d251 1283 u32 tskey = 0;
0bbe84a6
VY
1284 struct rt6_info *rt = (struct rt6_info *)cork->dst;
1285 struct ipv6_txoptions *opt = v6_cork->opt;
32dce968 1286 int csummode = CHECKSUM_NONE;
682b1a9d 1287 unsigned int maxnonfragsize, headersize;
1da177e4 1288
0bbe84a6
VY
1289 skb = skb_peek_tail(queue);
1290 if (!skb) {
1291 exthdrlen = opt ? opt->opt_flen : 0;
7efdba5b 1292 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1da177e4 1293 }
0bbe84a6 1294
366e41d9 1295 mtu = cork->fragsize;
e367c2d0 1296 orig_mtu = mtu;
1da177e4 1297
d8d1f30b 1298 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1da177e4 1299
a1b05140 1300 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
b4ce9277 1301 (opt ? opt->opt_nflen : 0);
4df98e76
HFS
1302 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1303 sizeof(struct frag_hdr);
1da177e4 1304
682b1a9d
HFS
1305 headersize = sizeof(struct ipv6hdr) +
1306 (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1307 (dst_allfrag(&rt->dst) ?
1308 sizeof(struct frag_hdr) : 0) +
1309 rt->rt6i_nfheader_len;
1310
26879da5 1311 if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
682b1a9d
HFS
1312 (sk->sk_protocol == IPPROTO_UDP ||
1313 sk->sk_protocol == IPPROTO_RAW)) {
1314 ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1315 sizeof(struct ipv6hdr));
1316 goto emsgsize;
1317 }
4df98e76 1318
682b1a9d
HFS
1319 if (ip6_sk_ignore_df(sk))
1320 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1321 else
1322 maxnonfragsize = mtu;
4df98e76 1323
682b1a9d 1324 if (cork->length + length > maxnonfragsize - headersize) {
4df98e76 1325emsgsize:
682b1a9d
HFS
1326 ipv6_local_error(sk, EMSGSIZE, fl6,
1327 mtu - headersize +
1328 sizeof(struct ipv6hdr));
1329 return -EMSGSIZE;
1da177e4
LT
1330 }
1331
682b1a9d
HFS
1332 /* CHECKSUM_PARTIAL only with no extension headers and when
1333 * we are not going to fragment
1334 */
1335 if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1336 headersize == sizeof(struct ipv6hdr) &&
1337 length < mtu - headersize &&
1338 !(flags & MSG_MORE) &&
c8cd0989 1339 rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
682b1a9d
HFS
1340 csummode = CHECKSUM_PARTIAL;
1341
09c2d251 1342 if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) {
c14ac945 1343 sock_tx_timestamp(sk, sockc->tsflags, &tx_flags);
09c2d251
WB
1344 if (tx_flags & SKBTX_ANY_SW_TSTAMP &&
1345 sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1346 tskey = sk->sk_tskey++;
1347 }
a693e698 1348
1da177e4
LT
1349 /*
1350 * Let's try using as much space as possible.
1351 * Use MTU if total length of the message fits into the MTU.
1352 * Otherwise, we need to reserve fragment header and
1353 * fragment alignment (= 8-15 octects, in total).
1354 *
1355 * Note that we may need to "move" the data from the tail of
1ab1457c 1356 * of the buffer to the new fragment when we split
1da177e4
LT
1357 * the message.
1358 *
1ab1457c 1359 * FIXME: It may be fragmented into multiple chunks
1da177e4
LT
1360 * at once if non-fragmentable extension headers
1361 * are too large.
1ab1457c 1362 * --yoshfuji
1da177e4
LT
1363 */
1364
2811ebac
HFS
1365 cork->length += length;
1366 if (((length > mtu) ||
1367 (skb && skb_is_gso(skb))) &&
1368 (sk->sk_protocol == IPPROTO_UDP) &&
acf8dd0a 1369 (rt->dst.dev->features & NETIF_F_UFO) &&
40ba3302 1370 (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk)) {
0bbe84a6 1371 err = ip6_ufo_append_data(sk, queue, getfrag, from, length,
3ba3458f 1372 hh_len, fragheaderlen, exthdrlen,
fd0273d7 1373 transhdrlen, mtu, flags, fl6);
2811ebac
HFS
1374 if (err)
1375 goto error;
1376 return 0;
e89e9cf5 1377 }
1da177e4 1378
2811ebac 1379 if (!skb)
1da177e4
LT
1380 goto alloc_new_skb;
1381
1382 while (length > 0) {
1383 /* Check if the remaining data fits into current packet. */
bdc712b4 1384 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1da177e4
LT
1385 if (copy < length)
1386 copy = maxfraglen - skb->len;
1387
1388 if (copy <= 0) {
1389 char *data;
1390 unsigned int datalen;
1391 unsigned int fraglen;
1392 unsigned int fraggap;
1393 unsigned int alloclen;
1da177e4 1394alloc_new_skb:
1da177e4 1395 /* There's no room in the current skb */
0c183379
G
1396 if (skb)
1397 fraggap = skb->len - maxfraglen;
1da177e4
LT
1398 else
1399 fraggap = 0;
0c183379 1400 /* update mtu and maxfraglen if necessary */
63159f29 1401 if (!skb || !skb_prev)
0c183379 1402 ip6_append_data_mtu(&mtu, &maxfraglen,
75a493e6 1403 fragheaderlen, skb, rt,
e367c2d0 1404 orig_mtu);
0c183379
G
1405
1406 skb_prev = skb;
1da177e4
LT
1407
1408 /*
1409 * If remaining data exceeds the mtu,
1410 * we know we need more fragment(s).
1411 */
1412 datalen = length + fraggap;
1da177e4 1413
0c183379
G
1414 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1415 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1da177e4 1416 if ((flags & MSG_MORE) &&
d8d1f30b 1417 !(rt->dst.dev->features&NETIF_F_SG))
1da177e4
LT
1418 alloclen = mtu;
1419 else
1420 alloclen = datalen + fragheaderlen;
1421
299b0767
SK
1422 alloclen += dst_exthdrlen;
1423
0c183379
G
1424 if (datalen != length + fraggap) {
1425 /*
1426 * this is not the last fragment, the trailer
1427 * space is regarded as data space.
1428 */
1429 datalen += rt->dst.trailer_len;
1430 }
1431
1432 alloclen += rt->dst.trailer_len;
1433 fraglen = datalen + fragheaderlen;
1da177e4
LT
1434
1435 /*
1436 * We just reserve space for fragment header.
1ab1457c 1437 * Note: this may be overallocation if the message
1da177e4
LT
1438 * (without MSG_MORE) fits into the MTU.
1439 */
1440 alloclen += sizeof(struct frag_hdr);
1441
1442 if (transhdrlen) {
1443 skb = sock_alloc_send_skb(sk,
1444 alloclen + hh_len,
1445 (flags & MSG_DONTWAIT), &err);
1446 } else {
1447 skb = NULL;
1448 if (atomic_read(&sk->sk_wmem_alloc) <=
1449 2 * sk->sk_sndbuf)
1450 skb = sock_wmalloc(sk,
1451 alloclen + hh_len, 1,
1452 sk->sk_allocation);
63159f29 1453 if (unlikely(!skb))
1da177e4
LT
1454 err = -ENOBUFS;
1455 }
63159f29 1456 if (!skb)
1da177e4
LT
1457 goto error;
1458 /*
1459 * Fill in the control structures
1460 */
9c9c9ad5 1461 skb->protocol = htons(ETH_P_IPV6);
32dce968 1462 skb->ip_summed = csummode;
1da177e4 1463 skb->csum = 0;
1f85851e
G
1464 /* reserve for fragmentation and ipsec header */
1465 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1466 dst_exthdrlen);
1da177e4 1467
11878b40
WB
1468 /* Only the initial fragment is time stamped */
1469 skb_shinfo(skb)->tx_flags = tx_flags;
1470 tx_flags = 0;
09c2d251
WB
1471 skb_shinfo(skb)->tskey = tskey;
1472 tskey = 0;
a693e698 1473
1da177e4
LT
1474 /*
1475 * Find where to start putting bytes
1476 */
1f85851e
G
1477 data = skb_put(skb, fraglen);
1478 skb_set_network_header(skb, exthdrlen);
1479 data += fragheaderlen;
b0e380b1
ACM
1480 skb->transport_header = (skb->network_header +
1481 fragheaderlen);
1da177e4
LT
1482 if (fraggap) {
1483 skb->csum = skb_copy_and_csum_bits(
1484 skb_prev, maxfraglen,
1485 data + transhdrlen, fraggap, 0);
1486 skb_prev->csum = csum_sub(skb_prev->csum,
1487 skb->csum);
1488 data += fraggap;
e9fa4f7b 1489 pskb_trim_unique(skb_prev, maxfraglen);
1da177e4
LT
1490 }
1491 copy = datalen - transhdrlen - fraggap;
299b0767 1492
1da177e4
LT
1493 if (copy < 0) {
1494 err = -EINVAL;
1495 kfree_skb(skb);
1496 goto error;
1497 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1498 err = -EFAULT;
1499 kfree_skb(skb);
1500 goto error;
1501 }
1502
1503 offset += copy;
1504 length -= datalen - fraggap;
1505 transhdrlen = 0;
1506 exthdrlen = 0;
299b0767 1507 dst_exthdrlen = 0;
1da177e4
LT
1508
1509 /*
1510 * Put the packet on the pending queue
1511 */
0bbe84a6 1512 __skb_queue_tail(queue, skb);
1da177e4
LT
1513 continue;
1514 }
1515
1516 if (copy > length)
1517 copy = length;
1518
d8d1f30b 1519 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1da177e4
LT
1520 unsigned int off;
1521
1522 off = skb->len;
1523 if (getfrag(from, skb_put(skb, copy),
1524 offset, copy, off, skb) < 0) {
1525 __skb_trim(skb, off);
1526 err = -EFAULT;
1527 goto error;
1528 }
1529 } else {
1530 int i = skb_shinfo(skb)->nr_frags;
1da177e4 1531
5640f768
ED
1532 err = -ENOMEM;
1533 if (!sk_page_frag_refill(sk, pfrag))
1da177e4 1534 goto error;
5640f768
ED
1535
1536 if (!skb_can_coalesce(skb, i, pfrag->page,
1537 pfrag->offset)) {
1538 err = -EMSGSIZE;
1539 if (i == MAX_SKB_FRAGS)
1540 goto error;
1541
1542 __skb_fill_page_desc(skb, i, pfrag->page,
1543 pfrag->offset, 0);
1544 skb_shinfo(skb)->nr_frags = ++i;
1545 get_page(pfrag->page);
1da177e4 1546 }
5640f768 1547 copy = min_t(int, copy, pfrag->size - pfrag->offset);
9e903e08 1548 if (getfrag(from,
5640f768
ED
1549 page_address(pfrag->page) + pfrag->offset,
1550 offset, copy, skb->len, skb) < 0)
1551 goto error_efault;
1552
1553 pfrag->offset += copy;
1554 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1da177e4
LT
1555 skb->len += copy;
1556 skb->data_len += copy;
f945fa7a
HX
1557 skb->truesize += copy;
1558 atomic_add(copy, &sk->sk_wmem_alloc);
1da177e4
LT
1559 }
1560 offset += copy;
1561 length -= copy;
1562 }
5640f768 1563
1da177e4 1564 return 0;
5640f768
ED
1565
1566error_efault:
1567 err = -EFAULT;
1da177e4 1568error:
bdc712b4 1569 cork->length -= length;
3bd653c8 1570 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1571 return err;
1572}
0bbe84a6
VY
1573
1574int ip6_append_data(struct sock *sk,
1575 int getfrag(void *from, char *to, int offset, int len,
1576 int odd, struct sk_buff *skb),
26879da5
WW
1577 void *from, int length, int transhdrlen,
1578 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1579 struct rt6_info *rt, unsigned int flags,
c14ac945 1580 const struct sockcm_cookie *sockc)
0bbe84a6
VY
1581{
1582 struct inet_sock *inet = inet_sk(sk);
1583 struct ipv6_pinfo *np = inet6_sk(sk);
1584 int exthdrlen;
1585 int err;
1586
1587 if (flags&MSG_PROBE)
1588 return 0;
1589 if (skb_queue_empty(&sk->sk_write_queue)) {
1590 /*
1591 * setup for corking
1592 */
26879da5
WW
1593 err = ip6_setup_cork(sk, &inet->cork, &np->cork,
1594 ipc6, rt, fl6);
0bbe84a6
VY
1595 if (err)
1596 return err;
1597
26879da5 1598 exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
0bbe84a6
VY
1599 length += exthdrlen;
1600 transhdrlen += exthdrlen;
1601 } else {
1602 fl6 = &inet->cork.fl.u.ip6;
1603 transhdrlen = 0;
1604 }
1605
1606 return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1607 &np->cork, sk_page_frag(sk), getfrag,
26879da5 1608 from, length, transhdrlen, flags, ipc6, sockc);
0bbe84a6 1609}
a495f836 1610EXPORT_SYMBOL_GPL(ip6_append_data);
1da177e4 1611
366e41d9
VY
1612static void ip6_cork_release(struct inet_cork_full *cork,
1613 struct inet6_cork *v6_cork)
bf138862 1614{
366e41d9
VY
1615 if (v6_cork->opt) {
1616 kfree(v6_cork->opt->dst0opt);
1617 kfree(v6_cork->opt->dst1opt);
1618 kfree(v6_cork->opt->hopopt);
1619 kfree(v6_cork->opt->srcrt);
1620 kfree(v6_cork->opt);
1621 v6_cork->opt = NULL;
0178b695
HX
1622 }
1623
366e41d9
VY
1624 if (cork->base.dst) {
1625 dst_release(cork->base.dst);
1626 cork->base.dst = NULL;
1627 cork->base.flags &= ~IPCORK_ALLFRAG;
bf138862 1628 }
366e41d9 1629 memset(&cork->fl, 0, sizeof(cork->fl));
bf138862
PE
1630}
1631
6422398c
VY
1632struct sk_buff *__ip6_make_skb(struct sock *sk,
1633 struct sk_buff_head *queue,
1634 struct inet_cork_full *cork,
1635 struct inet6_cork *v6_cork)
1da177e4
LT
1636{
1637 struct sk_buff *skb, *tmp_skb;
1638 struct sk_buff **tail_skb;
1639 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1da177e4 1640 struct ipv6_pinfo *np = inet6_sk(sk);
3bd653c8 1641 struct net *net = sock_net(sk);
1da177e4 1642 struct ipv6hdr *hdr;
6422398c
VY
1643 struct ipv6_txoptions *opt = v6_cork->opt;
1644 struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1645 struct flowi6 *fl6 = &cork->fl.u.ip6;
4c9483b2 1646 unsigned char proto = fl6->flowi6_proto;
1da177e4 1647
6422398c 1648 skb = __skb_dequeue(queue);
63159f29 1649 if (!skb)
1da177e4
LT
1650 goto out;
1651 tail_skb = &(skb_shinfo(skb)->frag_list);
1652
1653 /* move skb->data to ip header from ext header */
d56f90a7 1654 if (skb->data < skb_network_header(skb))
bbe735e4 1655 __skb_pull(skb, skb_network_offset(skb));
6422398c 1656 while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
cfe1fc77 1657 __skb_pull(tmp_skb, skb_network_header_len(skb));
1da177e4
LT
1658 *tail_skb = tmp_skb;
1659 tail_skb = &(tmp_skb->next);
1660 skb->len += tmp_skb->len;
1661 skb->data_len += tmp_skb->len;
1da177e4 1662 skb->truesize += tmp_skb->truesize;
1da177e4
LT
1663 tmp_skb->destructor = NULL;
1664 tmp_skb->sk = NULL;
1da177e4
LT
1665 }
1666
28a89453 1667 /* Allow local fragmentation. */
60ff7467 1668 skb->ignore_df = ip6_sk_ignore_df(sk);
28a89453 1669
4e3fd7a0 1670 *final_dst = fl6->daddr;
cfe1fc77 1671 __skb_pull(skb, skb_network_header_len(skb));
1da177e4
LT
1672 if (opt && opt->opt_flen)
1673 ipv6_push_frag_opts(skb, opt, &proto);
1674 if (opt && opt->opt_nflen)
1675 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1676
e2d1bca7
ACM
1677 skb_push(skb, sizeof(struct ipv6hdr));
1678 skb_reset_network_header(skb);
0660e03f 1679 hdr = ipv6_hdr(skb);
1ab1457c 1680
6422398c 1681 ip6_flow_hdr(hdr, v6_cork->tclass,
cb1ce2ef 1682 ip6_make_flowlabel(net, skb, fl6->flowlabel,
67800f9b 1683 np->autoflowlabel, fl6));
6422398c 1684 hdr->hop_limit = v6_cork->hop_limit;
1da177e4 1685 hdr->nexthdr = proto;
4e3fd7a0
AD
1686 hdr->saddr = fl6->saddr;
1687 hdr->daddr = *final_dst;
1da177e4 1688
a2c2064f 1689 skb->priority = sk->sk_priority;
4a19ec58 1690 skb->mark = sk->sk_mark;
a2c2064f 1691
d8d1f30b 1692 skb_dst_set(skb, dst_clone(&rt->dst));
edf391ff 1693 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
14878f75 1694 if (proto == IPPROTO_ICMPV6) {
adf30907 1695 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
14878f75 1696
43a43b60
HFS
1697 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1698 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
14878f75
DS
1699 }
1700
6422398c
VY
1701 ip6_cork_release(cork, v6_cork);
1702out:
1703 return skb;
1704}
1705
1706int ip6_send_skb(struct sk_buff *skb)
1707{
1708 struct net *net = sock_net(skb->sk);
1709 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1710 int err;
1711
33224b16 1712 err = ip6_local_out(net, skb->sk, skb);
1da177e4
LT
1713 if (err) {
1714 if (err > 0)
6ce9e7b5 1715 err = net_xmit_errno(err);
1da177e4 1716 if (err)
6422398c
VY
1717 IP6_INC_STATS(net, rt->rt6i_idev,
1718 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1719 }
1720
1da177e4 1721 return err;
6422398c
VY
1722}
1723
1724int ip6_push_pending_frames(struct sock *sk)
1725{
1726 struct sk_buff *skb;
1727
1728 skb = ip6_finish_skb(sk);
1729 if (!skb)
1730 return 0;
1731
1732 return ip6_send_skb(skb);
1da177e4 1733}
a495f836 1734EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1da177e4 1735
0bbe84a6 1736static void __ip6_flush_pending_frames(struct sock *sk,
6422398c
VY
1737 struct sk_buff_head *queue,
1738 struct inet_cork_full *cork,
1739 struct inet6_cork *v6_cork)
1da177e4 1740{
1da177e4
LT
1741 struct sk_buff *skb;
1742
0bbe84a6 1743 while ((skb = __skb_dequeue_tail(queue)) != NULL) {
adf30907
ED
1744 if (skb_dst(skb))
1745 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
e1f52208 1746 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1747 kfree_skb(skb);
1748 }
1749
6422398c 1750 ip6_cork_release(cork, v6_cork);
1da177e4 1751}
0bbe84a6
VY
1752
1753void ip6_flush_pending_frames(struct sock *sk)
1754{
6422398c
VY
1755 __ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1756 &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
0bbe84a6 1757}
a495f836 1758EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
6422398c
VY
1759
1760struct sk_buff *ip6_make_skb(struct sock *sk,
1761 int getfrag(void *from, char *to, int offset,
1762 int len, int odd, struct sk_buff *skb),
1763 void *from, int length, int transhdrlen,
26879da5 1764 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
6422398c 1765 struct rt6_info *rt, unsigned int flags,
26879da5 1766 const struct sockcm_cookie *sockc)
6422398c
VY
1767{
1768 struct inet_cork_full cork;
1769 struct inet6_cork v6_cork;
1770 struct sk_buff_head queue;
26879da5 1771 int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
6422398c
VY
1772 int err;
1773
1774 if (flags & MSG_PROBE)
1775 return NULL;
1776
1777 __skb_queue_head_init(&queue);
1778
1779 cork.base.flags = 0;
1780 cork.base.addr = 0;
1781 cork.base.opt = NULL;
1782 v6_cork.opt = NULL;
26879da5 1783 err = ip6_setup_cork(sk, &cork, &v6_cork, ipc6, rt, fl6);
6422398c
VY
1784 if (err)
1785 return ERR_PTR(err);
1786
26879da5
WW
1787 if (ipc6->dontfrag < 0)
1788 ipc6->dontfrag = inet6_sk(sk)->dontfrag;
6422398c
VY
1789
1790 err = __ip6_append_data(sk, fl6, &queue, &cork.base, &v6_cork,
1791 &current->task_frag, getfrag, from,
1792 length + exthdrlen, transhdrlen + exthdrlen,
26879da5 1793 flags, ipc6, sockc);
6422398c
VY
1794 if (err) {
1795 __ip6_flush_pending_frames(sk, &queue, &cork, &v6_cork);
1796 return ERR_PTR(err);
1797 }
1798
1799 return __ip6_make_skb(sk, &queue, &cork, &v6_cork);
1800}