2 * IPv6 output functions
3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * Based on linux/net/ipv4/ip_output.c
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
40 #include <linux/slab.h>
42 #include <linux/bpf-cgroup.h>
43 #include <linux/netfilter.h>
44 #include <linux/netfilter_ipv6.h>
50 #include <net/ndisc.h>
51 #include <net/protocol.h>
52 #include <net/ip6_route.h>
53 #include <net/addrconf.h>
54 #include <net/rawv6.h>
57 #include <net/checksum.h>
58 #include <linux/mroute6.h>
59 #include <net/l3mdev.h>
60 #include <net/lwtunnel.h>
62 static int ip6_finish_output2(struct net
*net
, struct sock
*sk
, struct sk_buff
*skb
)
64 struct dst_entry
*dst
= skb_dst(skb
);
65 struct net_device
*dev
= dst
->dev
;
66 const struct in6_addr
*nexthop
;
67 struct neighbour
*neigh
;
70 if (ipv6_addr_is_multicast(&ipv6_hdr(skb
)->daddr
)) {
71 struct inet6_dev
*idev
= ip6_dst_idev(skb_dst(skb
));
73 if (!(dev
->flags
& IFF_LOOPBACK
) && sk_mc_loop(sk
) &&
74 ((mroute6_socket(net
, skb
) &&
75 !(IP6CB(skb
)->flags
& IP6SKB_FORWARDED
)) ||
76 ipv6_chk_mcast_addr(dev
, &ipv6_hdr(skb
)->daddr
,
77 &ipv6_hdr(skb
)->saddr
))) {
78 struct sk_buff
*newskb
= skb_clone(skb
, GFP_ATOMIC
);
80 /* Do not check for IFF_ALLMULTI; multicast routing
81 is not supported in any case.
84 NF_HOOK(NFPROTO_IPV6
, NF_INET_POST_ROUTING
,
85 net
, sk
, newskb
, NULL
, newskb
->dev
,
88 if (ipv6_hdr(skb
)->hop_limit
== 0) {
89 IP6_INC_STATS(net
, idev
,
90 IPSTATS_MIB_OUTDISCARDS
);
96 IP6_UPD_PO_STATS(net
, idev
, IPSTATS_MIB_OUTMCAST
, skb
->len
);
98 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb
)->daddr
) <=
99 IPV6_ADDR_SCOPE_NODELOCAL
&&
100 !(dev
->flags
& IFF_LOOPBACK
)) {
106 if (lwtunnel_xmit_redirect(dst
->lwtstate
)) {
107 int res
= lwtunnel_xmit(skb
);
109 if (res
< 0 || res
== LWTUNNEL_XMIT_DONE
)
114 nexthop
= rt6_nexthop((struct rt6_info
*)dst
, &ipv6_hdr(skb
)->daddr
);
115 neigh
= __ipv6_neigh_lookup_noref(dst
->dev
, nexthop
);
116 if (unlikely(!neigh
))
117 neigh
= __neigh_create(&nd_tbl
, nexthop
, dst
->dev
, false);
118 if (!IS_ERR(neigh
)) {
119 sock_confirm_neigh(skb
, neigh
);
120 ret
= neigh_output(neigh
, skb
);
121 rcu_read_unlock_bh();
124 rcu_read_unlock_bh();
126 IP6_INC_STATS(net
, ip6_dst_idev(dst
), IPSTATS_MIB_OUTNOROUTES
);
131 static int ip6_finish_output(struct net
*net
, struct sock
*sk
, struct sk_buff
*skb
)
135 ret
= BPF_CGROUP_RUN_PROG_INET_EGRESS(sk
, skb
);
141 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
142 /* Policy lookup after SNAT yielded a new policy */
143 if (skb_dst(skb
)->xfrm
) {
144 IPCB(skb
)->flags
|= IPSKB_REROUTED
;
145 return dst_output(net
, sk
, skb
);
149 if ((skb
->len
> ip6_skb_dst_mtu(skb
) && !skb_is_gso(skb
)) ||
150 dst_allfrag(skb_dst(skb
)) ||
151 (IP6CB(skb
)->frag_max_size
&& skb
->len
> IP6CB(skb
)->frag_max_size
))
152 return ip6_fragment(net
, sk
, skb
, ip6_finish_output2
);
154 return ip6_finish_output2(net
, sk
, skb
);
157 int ip6_output(struct net
*net
, struct sock
*sk
, struct sk_buff
*skb
)
159 struct net_device
*dev
= skb_dst(skb
)->dev
;
160 struct inet6_dev
*idev
= ip6_dst_idev(skb_dst(skb
));
162 skb
->protocol
= htons(ETH_P_IPV6
);
165 if (unlikely(idev
->cnf
.disable_ipv6
)) {
166 IP6_INC_STATS(net
, idev
, IPSTATS_MIB_OUTDISCARDS
);
171 return NF_HOOK_COND(NFPROTO_IPV6
, NF_INET_POST_ROUTING
,
172 net
, sk
, skb
, NULL
, dev
,
174 !(IP6CB(skb
)->flags
& IP6SKB_REROUTED
));
177 bool ip6_autoflowlabel(struct net
*net
, const struct ipv6_pinfo
*np
)
179 if (!np
->autoflowlabel_set
)
180 return ip6_default_np_autolabel(net
);
182 return np
->autoflowlabel
;
186 * xmit an sk_buff (used by TCP, SCTP and DCCP)
187 * Note : socket lock is not held for SYNACK packets, but might be modified
188 * by calls to skb_set_owner_w() and ipv6_local_error(),
189 * which are using proper atomic operations or spinlocks.
191 int ip6_xmit(const struct sock
*sk
, struct sk_buff
*skb
, struct flowi6
*fl6
,
192 __u32 mark
, struct ipv6_txoptions
*opt
, int tclass
)
194 struct net
*net
= sock_net(sk
);
195 const struct ipv6_pinfo
*np
= inet6_sk(sk
);
196 struct in6_addr
*first_hop
= &fl6
->daddr
;
197 struct dst_entry
*dst
= skb_dst(skb
);
198 unsigned int head_room
;
200 u8 proto
= fl6
->flowi6_proto
;
201 int seg_len
= skb
->len
;
205 head_room
= sizeof(struct ipv6hdr
) + LL_RESERVED_SPACE(dst
->dev
);
207 head_room
+= opt
->opt_nflen
+ opt
->opt_flen
;
209 if (unlikely(skb_headroom(skb
) < head_room
)) {
210 struct sk_buff
*skb2
= skb_realloc_headroom(skb
, head_room
);
212 IP6_INC_STATS(net
, ip6_dst_idev(skb_dst(skb
)),
213 IPSTATS_MIB_OUTDISCARDS
);
218 skb_set_owner_w(skb2
, skb
->sk
);
224 seg_len
+= opt
->opt_nflen
+ opt
->opt_flen
;
227 ipv6_push_frag_opts(skb
, opt
, &proto
);
230 ipv6_push_nfrag_opts(skb
, opt
, &proto
, &first_hop
,
234 skb_push(skb
, sizeof(struct ipv6hdr
));
235 skb_reset_network_header(skb
);
239 * Fill in the IPv6 header
242 hlimit
= np
->hop_limit
;
244 hlimit
= ip6_dst_hoplimit(dst
);
246 ip6_flow_hdr(hdr
, tclass
, ip6_make_flowlabel(net
, skb
, fl6
->flowlabel
,
247 ip6_autoflowlabel(net
, np
), fl6
));
249 hdr
->payload_len
= htons(seg_len
);
250 hdr
->nexthdr
= proto
;
251 hdr
->hop_limit
= hlimit
;
253 hdr
->saddr
= fl6
->saddr
;
254 hdr
->daddr
= *first_hop
;
256 skb
->protocol
= htons(ETH_P_IPV6
);
257 skb
->priority
= sk
->sk_priority
;
261 if ((skb
->len
<= mtu
) || skb
->ignore_df
|| skb_is_gso(skb
)) {
262 IP6_UPD_PO_STATS(net
, ip6_dst_idev(skb_dst(skb
)),
263 IPSTATS_MIB_OUT
, skb
->len
);
265 /* if egress device is enslaved to an L3 master device pass the
266 * skb to its handler for processing
268 skb
= l3mdev_ip6_out((struct sock
*)sk
, skb
);
272 /* hooks should never assume socket lock is held.
273 * we promote our socket to non const
275 return NF_HOOK(NFPROTO_IPV6
, NF_INET_LOCAL_OUT
,
276 net
, (struct sock
*)sk
, skb
, NULL
, dst
->dev
,
281 /* ipv6_local_error() does not require socket lock,
282 * we promote our socket to non const
284 ipv6_local_error((struct sock
*)sk
, EMSGSIZE
, fl6
, mtu
);
286 IP6_INC_STATS(net
, ip6_dst_idev(skb_dst(skb
)), IPSTATS_MIB_FRAGFAILS
);
290 EXPORT_SYMBOL(ip6_xmit
);
292 static int ip6_call_ra_chain(struct sk_buff
*skb
, int sel
)
294 struct ip6_ra_chain
*ra
;
295 struct sock
*last
= NULL
;
297 read_lock(&ip6_ra_lock
);
298 for (ra
= ip6_ra_chain
; ra
; ra
= ra
->next
) {
299 struct sock
*sk
= ra
->sk
;
300 if (sk
&& ra
->sel
== sel
&&
301 (!sk
->sk_bound_dev_if
||
302 sk
->sk_bound_dev_if
== skb
->dev
->ifindex
)) {
304 struct sk_buff
*skb2
= skb_clone(skb
, GFP_ATOMIC
);
306 rawv6_rcv(last
, skb2
);
313 rawv6_rcv(last
, skb
);
314 read_unlock(&ip6_ra_lock
);
317 read_unlock(&ip6_ra_lock
);
321 static int ip6_forward_proxy_check(struct sk_buff
*skb
)
323 struct ipv6hdr
*hdr
= ipv6_hdr(skb
);
324 u8 nexthdr
= hdr
->nexthdr
;
328 if (ipv6_ext_hdr(nexthdr
)) {
329 offset
= ipv6_skip_exthdr(skb
, sizeof(*hdr
), &nexthdr
, &frag_off
);
333 offset
= sizeof(struct ipv6hdr
);
335 if (nexthdr
== IPPROTO_ICMPV6
) {
336 struct icmp6hdr
*icmp6
;
338 if (!pskb_may_pull(skb
, (skb_network_header(skb
) +
339 offset
+ 1 - skb
->data
)))
342 icmp6
= (struct icmp6hdr
*)(skb_network_header(skb
) + offset
);
344 switch (icmp6
->icmp6_type
) {
345 case NDISC_ROUTER_SOLICITATION
:
346 case NDISC_ROUTER_ADVERTISEMENT
:
347 case NDISC_NEIGHBOUR_SOLICITATION
:
348 case NDISC_NEIGHBOUR_ADVERTISEMENT
:
350 /* For reaction involving unicast neighbor discovery
351 * message destined to the proxied address, pass it to
361 * The proxying router can't forward traffic sent to a link-local
362 * address, so signal the sender and discard the packet. This
363 * behavior is clarified by the MIPv6 specification.
365 if (ipv6_addr_type(&hdr
->daddr
) & IPV6_ADDR_LINKLOCAL
) {
366 dst_link_failure(skb
);
373 static inline int ip6_forward_finish(struct net
*net
, struct sock
*sk
,
376 struct dst_entry
*dst
= skb_dst(skb
);
378 __IP6_INC_STATS(net
, ip6_dst_idev(dst
), IPSTATS_MIB_OUTFORWDATAGRAMS
);
379 __IP6_ADD_STATS(net
, ip6_dst_idev(dst
), IPSTATS_MIB_OUTOCTETS
, skb
->len
);
382 return dst_output(net
, sk
, skb
);
385 static unsigned int ip6_dst_mtu_forward(const struct dst_entry
*dst
)
388 struct inet6_dev
*idev
;
390 if (dst_metric_locked(dst
, RTAX_MTU
)) {
391 mtu
= dst_metric_raw(dst
, RTAX_MTU
);
398 idev
= __in6_dev_get(dst
->dev
);
400 mtu
= idev
->cnf
.mtu6
;
406 static bool ip6_pkt_too_big(const struct sk_buff
*skb
, unsigned int mtu
)
411 /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
412 if (IP6CB(skb
)->frag_max_size
&& IP6CB(skb
)->frag_max_size
> mtu
)
418 if (skb_is_gso(skb
) && skb_gso_validate_mtu(skb
, mtu
))
424 int ip6_forward(struct sk_buff
*skb
)
426 struct dst_entry
*dst
= skb_dst(skb
);
427 struct ipv6hdr
*hdr
= ipv6_hdr(skb
);
428 struct inet6_skb_parm
*opt
= IP6CB(skb
);
429 struct net
*net
= dev_net(dst
->dev
);
432 if (net
->ipv6
.devconf_all
->forwarding
== 0)
435 if (skb
->pkt_type
!= PACKET_HOST
)
438 if (unlikely(skb
->sk
))
441 if (skb_warn_if_lro(skb
))
444 if (!xfrm6_policy_check(NULL
, XFRM_POLICY_FWD
, skb
)) {
445 __IP6_INC_STATS(net
, ip6_dst_idev(dst
),
446 IPSTATS_MIB_INDISCARDS
);
450 skb_forward_csum(skb
);
453 * We DO NOT make any processing on
454 * RA packets, pushing them to user level AS IS
455 * without ane WARRANTY that application will be able
456 * to interpret them. The reason is that we
457 * cannot make anything clever here.
459 * We are not end-node, so that if packet contains
460 * AH/ESP, we cannot make anything.
461 * Defragmentation also would be mistake, RA packets
462 * cannot be fragmented, because there is no warranty
463 * that different fragments will go along one path. --ANK
465 if (unlikely(opt
->flags
& IP6SKB_ROUTERALERT
)) {
466 if (ip6_call_ra_chain(skb
, ntohs(opt
->ra
)))
471 * check and decrement ttl
473 if (hdr
->hop_limit
<= 1) {
474 /* Force OUTPUT device used as source address */
476 icmpv6_send(skb
, ICMPV6_TIME_EXCEED
, ICMPV6_EXC_HOPLIMIT
, 0);
477 __IP6_INC_STATS(net
, ip6_dst_idev(dst
),
478 IPSTATS_MIB_INHDRERRORS
);
484 /* XXX: idev->cnf.proxy_ndp? */
485 if (net
->ipv6
.devconf_all
->proxy_ndp
&&
486 pneigh_lookup(&nd_tbl
, net
, &hdr
->daddr
, skb
->dev
, 0)) {
487 int proxied
= ip6_forward_proxy_check(skb
);
489 return ip6_input(skb
);
490 else if (proxied
< 0) {
491 __IP6_INC_STATS(net
, ip6_dst_idev(dst
),
492 IPSTATS_MIB_INDISCARDS
);
497 if (!xfrm6_route_forward(skb
)) {
498 __IP6_INC_STATS(net
, ip6_dst_idev(dst
),
499 IPSTATS_MIB_INDISCARDS
);
504 /* IPv6 specs say nothing about it, but it is clear that we cannot
505 send redirects to source routed frames.
506 We don't send redirects to frames decapsulated from IPsec.
508 if (IP6CB(skb
)->iif
== dst
->dev
->ifindex
&&
509 opt
->srcrt
== 0 && !skb_sec_path(skb
)) {
510 struct in6_addr
*target
= NULL
;
511 struct inet_peer
*peer
;
515 * incoming and outgoing devices are the same
519 rt
= (struct rt6_info
*) dst
;
520 if (rt
->rt6i_flags
& RTF_GATEWAY
)
521 target
= &rt
->rt6i_gateway
;
523 target
= &hdr
->daddr
;
525 peer
= inet_getpeer_v6(net
->ipv6
.peers
, &hdr
->daddr
, 1);
527 /* Limit redirects both by destination (here)
528 and by source (inside ndisc_send_redirect)
530 if (inet_peer_xrlim_allow(peer
, 1*HZ
))
531 ndisc_send_redirect(skb
, target
);
535 int addrtype
= ipv6_addr_type(&hdr
->saddr
);
537 /* This check is security critical. */
538 if (addrtype
== IPV6_ADDR_ANY
||
539 addrtype
& (IPV6_ADDR_MULTICAST
| IPV6_ADDR_LOOPBACK
))
541 if (addrtype
& IPV6_ADDR_LINKLOCAL
) {
542 icmpv6_send(skb
, ICMPV6_DEST_UNREACH
,
543 ICMPV6_NOT_NEIGHBOUR
, 0);
548 mtu
= ip6_dst_mtu_forward(dst
);
549 if (mtu
< IPV6_MIN_MTU
)
552 if (ip6_pkt_too_big(skb
, mtu
)) {
553 /* Again, force OUTPUT device used as source address */
555 icmpv6_send(skb
, ICMPV6_PKT_TOOBIG
, 0, mtu
);
556 __IP6_INC_STATS(net
, ip6_dst_idev(dst
),
557 IPSTATS_MIB_INTOOBIGERRORS
);
558 __IP6_INC_STATS(net
, ip6_dst_idev(dst
),
559 IPSTATS_MIB_FRAGFAILS
);
564 if (skb_cow(skb
, dst
->dev
->hard_header_len
)) {
565 __IP6_INC_STATS(net
, ip6_dst_idev(dst
),
566 IPSTATS_MIB_OUTDISCARDS
);
572 /* Mangling hops number delayed to point after skb COW */
576 return NF_HOOK(NFPROTO_IPV6
, NF_INET_FORWARD
,
577 net
, NULL
, skb
, skb
->dev
, dst
->dev
,
581 __IP6_INC_STATS(net
, ip6_dst_idev(dst
), IPSTATS_MIB_INADDRERRORS
);
587 static void ip6_copy_metadata(struct sk_buff
*to
, struct sk_buff
*from
)
589 to
->pkt_type
= from
->pkt_type
;
590 to
->priority
= from
->priority
;
591 to
->protocol
= from
->protocol
;
593 skb_dst_set(to
, dst_clone(skb_dst(from
)));
595 to
->mark
= from
->mark
;
597 skb_copy_hash(to
, from
);
599 #ifdef CONFIG_NET_SCHED
600 to
->tc_index
= from
->tc_index
;
603 skb_copy_secmark(to
, from
);
606 int ip6_fragment(struct net
*net
, struct sock
*sk
, struct sk_buff
*skb
,
607 int (*output
)(struct net
*, struct sock
*, struct sk_buff
*))
609 struct sk_buff
*frag
;
610 struct rt6_info
*rt
= (struct rt6_info
*)skb_dst(skb
);
611 struct ipv6_pinfo
*np
= skb
->sk
&& !dev_recursion_level() ?
612 inet6_sk(skb
->sk
) : NULL
;
613 struct ipv6hdr
*tmp_hdr
;
615 unsigned int mtu
, hlen
, left
, len
, nexthdr_offset
;
618 int ptr
, offset
= 0, err
= 0;
619 u8
*prevhdr
, nexthdr
= 0;
621 err
= ip6_find_1stfragopt(skb
, &prevhdr
);
626 nexthdr_offset
= prevhdr
- skb_network_header(skb
);
628 mtu
= ip6_skb_dst_mtu(skb
);
630 /* We must not fragment if the socket is set to force MTU discovery
631 * or if the skb it not generated by a local socket.
633 if (unlikely(!skb
->ignore_df
&& skb
->len
> mtu
))
636 if (IP6CB(skb
)->frag_max_size
) {
637 if (IP6CB(skb
)->frag_max_size
> mtu
)
640 /* don't send fragments larger than what we received */
641 mtu
= IP6CB(skb
)->frag_max_size
;
642 if (mtu
< IPV6_MIN_MTU
)
646 if (np
&& np
->frag_size
< mtu
) {
650 if (mtu
< hlen
+ sizeof(struct frag_hdr
) + 8)
652 mtu
-= hlen
+ sizeof(struct frag_hdr
);
654 frag_id
= ipv6_select_ident(net
, &ipv6_hdr(skb
)->daddr
,
655 &ipv6_hdr(skb
)->saddr
);
657 if (skb
->ip_summed
== CHECKSUM_PARTIAL
&&
658 (err
= skb_checksum_help(skb
)))
661 prevhdr
= skb_network_header(skb
) + nexthdr_offset
;
662 hroom
= LL_RESERVED_SPACE(rt
->dst
.dev
);
663 if (skb_has_frag_list(skb
)) {
664 unsigned int first_len
= skb_pagelen(skb
);
665 struct sk_buff
*frag2
;
667 if (first_len
- hlen
> mtu
||
668 ((first_len
- hlen
) & 7) ||
670 skb_headroom(skb
) < (hroom
+ sizeof(struct frag_hdr
)))
673 skb_walk_frags(skb
, frag
) {
674 /* Correct geometry. */
675 if (frag
->len
> mtu
||
676 ((frag
->len
& 7) && frag
->next
) ||
677 skb_headroom(frag
) < (hlen
+ hroom
+ sizeof(struct frag_hdr
)))
678 goto slow_path_clean
;
680 /* Partially cloned skb? */
681 if (skb_shared(frag
))
682 goto slow_path_clean
;
687 frag
->destructor
= sock_wfree
;
689 skb
->truesize
-= frag
->truesize
;
696 *prevhdr
= NEXTHDR_FRAGMENT
;
697 tmp_hdr
= kmemdup(skb_network_header(skb
), hlen
, GFP_ATOMIC
);
702 frag
= skb_shinfo(skb
)->frag_list
;
703 skb_frag_list_init(skb
);
705 __skb_pull(skb
, hlen
);
706 fh
= __skb_push(skb
, sizeof(struct frag_hdr
));
707 __skb_push(skb
, hlen
);
708 skb_reset_network_header(skb
);
709 memcpy(skb_network_header(skb
), tmp_hdr
, hlen
);
711 fh
->nexthdr
= nexthdr
;
713 fh
->frag_off
= htons(IP6_MF
);
714 fh
->identification
= frag_id
;
716 first_len
= skb_pagelen(skb
);
717 skb
->data_len
= first_len
- skb_headlen(skb
);
718 skb
->len
= first_len
;
719 ipv6_hdr(skb
)->payload_len
= htons(first_len
-
720 sizeof(struct ipv6hdr
));
723 /* Prepare header of the next frame,
724 * before previous one went down. */
726 frag
->ip_summed
= CHECKSUM_NONE
;
727 skb_reset_transport_header(frag
);
728 fh
= __skb_push(frag
, sizeof(struct frag_hdr
));
729 __skb_push(frag
, hlen
);
730 skb_reset_network_header(frag
);
731 memcpy(skb_network_header(frag
), tmp_hdr
,
733 offset
+= skb
->len
- hlen
- sizeof(struct frag_hdr
);
734 fh
->nexthdr
= nexthdr
;
736 fh
->frag_off
= htons(offset
);
738 fh
->frag_off
|= htons(IP6_MF
);
739 fh
->identification
= frag_id
;
740 ipv6_hdr(frag
)->payload_len
=
742 sizeof(struct ipv6hdr
));
743 ip6_copy_metadata(frag
, skb
);
746 err
= output(net
, sk
, skb
);
748 IP6_INC_STATS(net
, ip6_dst_idev(&rt
->dst
),
749 IPSTATS_MIB_FRAGCREATES
);
762 IP6_INC_STATS(net
, ip6_dst_idev(&rt
->dst
),
763 IPSTATS_MIB_FRAGOKS
);
767 kfree_skb_list(frag
);
769 IP6_INC_STATS(net
, ip6_dst_idev(&rt
->dst
),
770 IPSTATS_MIB_FRAGFAILS
);
774 skb_walk_frags(skb
, frag2
) {
778 frag2
->destructor
= NULL
;
779 skb
->truesize
+= frag2
->truesize
;
784 left
= skb
->len
- hlen
; /* Space per frame */
785 ptr
= hlen
; /* Where to start from */
788 * Fragment the datagram.
791 troom
= rt
->dst
.dev
->needed_tailroom
;
794 * Keep copying data until we run out.
797 u8
*fragnexthdr_offset
;
800 /* IF: it doesn't fit, use 'mtu' - the data space left */
803 /* IF: we are not sending up to and including the packet end
804 then align the next start on an eight byte boundary */
809 /* Allocate buffer */
810 frag
= alloc_skb(len
+ hlen
+ sizeof(struct frag_hdr
) +
811 hroom
+ troom
, GFP_ATOMIC
);
818 * Set up data on packet
821 ip6_copy_metadata(frag
, skb
);
822 skb_reserve(frag
, hroom
);
823 skb_put(frag
, len
+ hlen
+ sizeof(struct frag_hdr
));
824 skb_reset_network_header(frag
);
825 fh
= (struct frag_hdr
*)(skb_network_header(frag
) + hlen
);
826 frag
->transport_header
= (frag
->network_header
+ hlen
+
827 sizeof(struct frag_hdr
));
830 * Charge the memory for the fragment to any owner
834 skb_set_owner_w(frag
, skb
->sk
);
837 * Copy the packet header into the new buffer.
839 skb_copy_from_linear_data(skb
, skb_network_header(frag
), hlen
);
841 fragnexthdr_offset
= skb_network_header(frag
);
842 fragnexthdr_offset
+= prevhdr
- skb_network_header(skb
);
843 *fragnexthdr_offset
= NEXTHDR_FRAGMENT
;
846 * Build fragment header.
848 fh
->nexthdr
= nexthdr
;
850 fh
->identification
= frag_id
;
853 * Copy a block of the IP datagram.
855 BUG_ON(skb_copy_bits(skb
, ptr
, skb_transport_header(frag
),
859 fh
->frag_off
= htons(offset
);
861 fh
->frag_off
|= htons(IP6_MF
);
862 ipv6_hdr(frag
)->payload_len
= htons(frag
->len
-
863 sizeof(struct ipv6hdr
));
869 * Put this fragment into the sending queue.
871 err
= output(net
, sk
, frag
);
875 IP6_INC_STATS(net
, ip6_dst_idev(skb_dst(skb
)),
876 IPSTATS_MIB_FRAGCREATES
);
878 IP6_INC_STATS(net
, ip6_dst_idev(skb_dst(skb
)),
879 IPSTATS_MIB_FRAGOKS
);
884 if (skb
->sk
&& dst_allfrag(skb_dst(skb
)))
885 sk_nocaps_add(skb
->sk
, NETIF_F_GSO_MASK
);
887 icmpv6_send(skb
, ICMPV6_PKT_TOOBIG
, 0, mtu
);
891 IP6_INC_STATS(net
, ip6_dst_idev(skb_dst(skb
)),
892 IPSTATS_MIB_FRAGFAILS
);
897 static inline int ip6_rt_check(const struct rt6key
*rt_key
,
898 const struct in6_addr
*fl_addr
,
899 const struct in6_addr
*addr_cache
)
901 return (rt_key
->plen
!= 128 || !ipv6_addr_equal(fl_addr
, &rt_key
->addr
)) &&
902 (!addr_cache
|| !ipv6_addr_equal(fl_addr
, addr_cache
));
905 static struct dst_entry
*ip6_sk_dst_check(struct sock
*sk
,
906 struct dst_entry
*dst
,
907 const struct flowi6
*fl6
)
909 struct ipv6_pinfo
*np
= inet6_sk(sk
);
915 if (dst
->ops
->family
!= AF_INET6
) {
920 rt
= (struct rt6_info
*)dst
;
921 /* Yes, checking route validity in not connected
922 * case is not very simple. Take into account,
923 * that we do not support routing by source, TOS,
924 * and MSG_DONTROUTE --ANK (980726)
926 * 1. ip6_rt_check(): If route was host route,
927 * check that cached destination is current.
928 * If it is network route, we still may
929 * check its validity using saved pointer
930 * to the last used address: daddr_cache.
931 * We do not want to save whole address now,
932 * (because main consumer of this service
933 * is tcp, which has not this problem),
934 * so that the last trick works only on connected
936 * 2. oif also should be the same.
938 if (ip6_rt_check(&rt
->rt6i_dst
, &fl6
->daddr
, np
->daddr_cache
) ||
939 #ifdef CONFIG_IPV6_SUBTREES
940 ip6_rt_check(&rt
->rt6i_src
, &fl6
->saddr
, np
->saddr_cache
) ||
942 (!(fl6
->flowi6_flags
& FLOWI_FLAG_SKIP_NH_OIF
) &&
943 (fl6
->flowi6_oif
&& fl6
->flowi6_oif
!= dst
->dev
->ifindex
))) {
952 static int ip6_dst_lookup_tail(struct net
*net
, const struct sock
*sk
,
953 struct dst_entry
**dst
, struct flowi6
*fl6
)
955 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
962 /* The correct way to handle this would be to do
963 * ip6_route_get_saddr, and then ip6_route_output; however,
964 * the route-specific preferred source forces the
965 * ip6_route_output call _before_ ip6_route_get_saddr.
967 * In source specific routing (no src=any default route),
968 * ip6_route_output will fail given src=any saddr, though, so
969 * that's why we try it again later.
971 if (ipv6_addr_any(&fl6
->saddr
) && (!*dst
|| !(*dst
)->error
)) {
973 bool had_dst
= *dst
!= NULL
;
976 *dst
= ip6_route_output(net
, sk
, fl6
);
977 rt
= (*dst
)->error
? NULL
: (struct rt6_info
*)*dst
;
978 err
= ip6_route_get_saddr(net
, rt
, &fl6
->daddr
,
979 sk
? inet6_sk(sk
)->srcprefs
: 0,
982 goto out_err_release
;
984 /* If we had an erroneous initial result, pretend it
985 * never existed and let the SA-enabled version take
988 if (!had_dst
&& (*dst
)->error
) {
994 flags
|= RT6_LOOKUP_F_IFACE
;
998 *dst
= ip6_route_output_flags(net
, sk
, fl6
, flags
);
1000 err
= (*dst
)->error
;
1002 goto out_err_release
;
1004 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1006 * Here if the dst entry we've looked up
1007 * has a neighbour entry that is in the INCOMPLETE
1008 * state and the src address from the flow is
1009 * marked as OPTIMISTIC, we release the found
1010 * dst entry and replace it instead with the
1011 * dst entry of the nexthop router
1013 rt
= (struct rt6_info
*) *dst
;
1015 n
= __ipv6_neigh_lookup_noref(rt
->dst
.dev
,
1016 rt6_nexthop(rt
, &fl6
->daddr
));
1017 err
= n
&& !(n
->nud_state
& NUD_VALID
) ? -EINVAL
: 0;
1018 rcu_read_unlock_bh();
1021 struct inet6_ifaddr
*ifp
;
1022 struct flowi6 fl_gw6
;
1025 ifp
= ipv6_get_ifaddr(net
, &fl6
->saddr
,
1028 redirect
= (ifp
&& ifp
->flags
& IFA_F_OPTIMISTIC
);
1034 * We need to get the dst entry for the
1035 * default router instead
1038 memcpy(&fl_gw6
, fl6
, sizeof(struct flowi6
));
1039 memset(&fl_gw6
.daddr
, 0, sizeof(struct in6_addr
));
1040 *dst
= ip6_route_output(net
, sk
, &fl_gw6
);
1041 err
= (*dst
)->error
;
1043 goto out_err_release
;
1047 if (ipv6_addr_v4mapped(&fl6
->saddr
) &&
1048 !(ipv6_addr_v4mapped(&fl6
->daddr
) || ipv6_addr_any(&fl6
->daddr
))) {
1049 err
= -EAFNOSUPPORT
;
1050 goto out_err_release
;
1059 if (err
== -ENETUNREACH
)
1060 IP6_INC_STATS(net
, NULL
, IPSTATS_MIB_OUTNOROUTES
);
1065 * ip6_dst_lookup - perform route lookup on flow
1066 * @sk: socket which provides route info
1067 * @dst: pointer to dst_entry * for result
1068 * @fl6: flow to lookup
1070 * This function performs a route lookup on the given flow.
1072 * It returns zero on success, or a standard errno code on error.
1074 int ip6_dst_lookup(struct net
*net
, struct sock
*sk
, struct dst_entry
**dst
,
1078 return ip6_dst_lookup_tail(net
, sk
, dst
, fl6
);
1080 EXPORT_SYMBOL_GPL(ip6_dst_lookup
);
1083 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1084 * @sk: socket which provides route info
1085 * @fl6: flow to lookup
1086 * @final_dst: final destination address for ipsec lookup
1088 * This function performs a route lookup on the given flow.
1090 * It returns a valid dst pointer on success, or a pointer encoded
1093 struct dst_entry
*ip6_dst_lookup_flow(const struct sock
*sk
, struct flowi6
*fl6
,
1094 const struct in6_addr
*final_dst
)
1096 struct dst_entry
*dst
= NULL
;
1099 err
= ip6_dst_lookup_tail(sock_net(sk
), sk
, &dst
, fl6
);
1101 return ERR_PTR(err
);
1103 fl6
->daddr
= *final_dst
;
1105 return xfrm_lookup_route(sock_net(sk
), dst
, flowi6_to_flowi(fl6
), sk
, 0);
1107 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow
);
1110 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1111 * @sk: socket which provides the dst cache and route info
1112 * @fl6: flow to lookup
1113 * @final_dst: final destination address for ipsec lookup
1115 * This function performs a route lookup on the given flow with the
1116 * possibility of using the cached route in the socket if it is valid.
1117 * It will take the socket dst lock when operating on the dst cache.
1118 * As a result, this function can only be used in process context.
1120 * It returns a valid dst pointer on success, or a pointer encoded
1123 struct dst_entry
*ip6_sk_dst_lookup_flow(struct sock
*sk
, struct flowi6
*fl6
,
1124 const struct in6_addr
*final_dst
)
1126 struct dst_entry
*dst
= sk_dst_check(sk
, inet6_sk(sk
)->dst_cookie
);
1128 dst
= ip6_sk_dst_check(sk
, dst
, fl6
);
1130 dst
= ip6_dst_lookup_flow(sk
, fl6
, final_dst
);
1134 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow
);
1136 static inline struct ipv6_opt_hdr
*ip6_opt_dup(struct ipv6_opt_hdr
*src
,
1139 return src
? kmemdup(src
, (src
->hdrlen
+ 1) * 8, gfp
) : NULL
;
1142 static inline struct ipv6_rt_hdr
*ip6_rthdr_dup(struct ipv6_rt_hdr
*src
,
1145 return src
? kmemdup(src
, (src
->hdrlen
+ 1) * 8, gfp
) : NULL
;
1148 static void ip6_append_data_mtu(unsigned int *mtu
,
1150 unsigned int fragheaderlen
,
1151 struct sk_buff
*skb
,
1152 struct rt6_info
*rt
,
1153 unsigned int orig_mtu
)
1155 if (!(rt
->dst
.flags
& DST_XFRM_TUNNEL
)) {
1157 /* first fragment, reserve header_len */
1158 *mtu
= orig_mtu
- rt
->dst
.header_len
;
1162 * this fragment is not first, the headers
1163 * space is regarded as data space.
1167 *maxfraglen
= ((*mtu
- fragheaderlen
) & ~7)
1168 + fragheaderlen
- sizeof(struct frag_hdr
);
1172 static int ip6_setup_cork(struct sock
*sk
, struct inet_cork_full
*cork
,
1173 struct inet6_cork
*v6_cork
, struct ipcm6_cookie
*ipc6
,
1174 struct rt6_info
*rt
, struct flowi6
*fl6
)
1176 struct ipv6_pinfo
*np
= inet6_sk(sk
);
1178 struct ipv6_txoptions
*opt
= ipc6
->opt
;
1184 if (WARN_ON(v6_cork
->opt
))
1187 v6_cork
->opt
= kzalloc(sizeof(*opt
), sk
->sk_allocation
);
1188 if (unlikely(!v6_cork
->opt
))
1191 v6_cork
->opt
->tot_len
= sizeof(*opt
);
1192 v6_cork
->opt
->opt_flen
= opt
->opt_flen
;
1193 v6_cork
->opt
->opt_nflen
= opt
->opt_nflen
;
1195 v6_cork
->opt
->dst0opt
= ip6_opt_dup(opt
->dst0opt
,
1197 if (opt
->dst0opt
&& !v6_cork
->opt
->dst0opt
)
1200 v6_cork
->opt
->dst1opt
= ip6_opt_dup(opt
->dst1opt
,
1202 if (opt
->dst1opt
&& !v6_cork
->opt
->dst1opt
)
1205 v6_cork
->opt
->hopopt
= ip6_opt_dup(opt
->hopopt
,
1207 if (opt
->hopopt
&& !v6_cork
->opt
->hopopt
)
1210 v6_cork
->opt
->srcrt
= ip6_rthdr_dup(opt
->srcrt
,
1212 if (opt
->srcrt
&& !v6_cork
->opt
->srcrt
)
1215 /* need source address above miyazawa*/
1218 cork
->base
.dst
= &rt
->dst
;
1219 cork
->fl
.u
.ip6
= *fl6
;
1220 v6_cork
->hop_limit
= ipc6
->hlimit
;
1221 v6_cork
->tclass
= ipc6
->tclass
;
1222 if (rt
->dst
.flags
& DST_XFRM_TUNNEL
)
1223 mtu
= np
->pmtudisc
>= IPV6_PMTUDISC_PROBE
?
1224 READ_ONCE(rt
->dst
.dev
->mtu
) : dst_mtu(&rt
->dst
);
1226 mtu
= np
->pmtudisc
>= IPV6_PMTUDISC_PROBE
?
1227 READ_ONCE(rt
->dst
.dev
->mtu
) : dst_mtu(rt
->dst
.path
);
1228 if (np
->frag_size
< mtu
) {
1230 mtu
= np
->frag_size
;
1232 if (mtu
< IPV6_MIN_MTU
)
1234 cork
->base
.fragsize
= mtu
;
1235 if (dst_allfrag(rt
->dst
.path
))
1236 cork
->base
.flags
|= IPCORK_ALLFRAG
;
1237 cork
->base
.length
= 0;
1242 static int __ip6_append_data(struct sock
*sk
,
1244 struct sk_buff_head
*queue
,
1245 struct inet_cork
*cork
,
1246 struct inet6_cork
*v6_cork
,
1247 struct page_frag
*pfrag
,
1248 int getfrag(void *from
, char *to
, int offset
,
1249 int len
, int odd
, struct sk_buff
*skb
),
1250 void *from
, int length
, int transhdrlen
,
1251 unsigned int flags
, struct ipcm6_cookie
*ipc6
,
1252 const struct sockcm_cookie
*sockc
)
1254 struct sk_buff
*skb
, *skb_prev
= NULL
;
1255 unsigned int maxfraglen
, fragheaderlen
, mtu
, orig_mtu
, pmtu
;
1257 int dst_exthdrlen
= 0;
1264 struct rt6_info
*rt
= (struct rt6_info
*)cork
->dst
;
1265 struct ipv6_txoptions
*opt
= v6_cork
->opt
;
1266 int csummode
= CHECKSUM_NONE
;
1267 unsigned int maxnonfragsize
, headersize
;
1269 skb
= skb_peek_tail(queue
);
1271 exthdrlen
= opt
? opt
->opt_flen
: 0;
1272 dst_exthdrlen
= rt
->dst
.header_len
- rt
->rt6i_nfheader_len
;
1275 mtu
= cork
->fragsize
;
1278 hh_len
= LL_RESERVED_SPACE(rt
->dst
.dev
);
1280 fragheaderlen
= sizeof(struct ipv6hdr
) + rt
->rt6i_nfheader_len
+
1281 (opt
? opt
->opt_nflen
: 0);
1282 maxfraglen
= ((mtu
- fragheaderlen
) & ~7) + fragheaderlen
-
1283 sizeof(struct frag_hdr
);
1285 headersize
= sizeof(struct ipv6hdr
) +
1286 (opt
? opt
->opt_flen
+ opt
->opt_nflen
: 0) +
1287 (dst_allfrag(&rt
->dst
) ?
1288 sizeof(struct frag_hdr
) : 0) +
1289 rt
->rt6i_nfheader_len
;
1291 /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
1292 * the first fragment
1294 if (headersize
+ transhdrlen
> mtu
)
1297 if (cork
->length
+ length
> mtu
- headersize
&& ipc6
->dontfrag
&&
1298 (sk
->sk_protocol
== IPPROTO_UDP
||
1299 sk
->sk_protocol
== IPPROTO_RAW
)) {
1300 ipv6_local_rxpmtu(sk
, fl6
, mtu
- headersize
+
1301 sizeof(struct ipv6hdr
));
1305 if (ip6_sk_ignore_df(sk
))
1306 maxnonfragsize
= sizeof(struct ipv6hdr
) + IPV6_MAXPLEN
;
1308 maxnonfragsize
= mtu
;
1310 if (cork
->length
+ length
> maxnonfragsize
- headersize
) {
1312 pmtu
= max_t(int, mtu
- headersize
+ sizeof(struct ipv6hdr
), 0);
1313 ipv6_local_error(sk
, EMSGSIZE
, fl6
, pmtu
);
1317 /* CHECKSUM_PARTIAL only with no extension headers and when
1318 * we are not going to fragment
1320 if (transhdrlen
&& sk
->sk_protocol
== IPPROTO_UDP
&&
1321 headersize
== sizeof(struct ipv6hdr
) &&
1322 length
<= mtu
- headersize
&&
1323 !(flags
& MSG_MORE
) &&
1324 rt
->dst
.dev
->features
& (NETIF_F_IPV6_CSUM
| NETIF_F_HW_CSUM
))
1325 csummode
= CHECKSUM_PARTIAL
;
1327 if (sk
->sk_type
== SOCK_DGRAM
|| sk
->sk_type
== SOCK_RAW
) {
1328 sock_tx_timestamp(sk
, sockc
->tsflags
, &tx_flags
);
1329 if (tx_flags
& SKBTX_ANY_SW_TSTAMP
&&
1330 sk
->sk_tsflags
& SOF_TIMESTAMPING_OPT_ID
)
1331 tskey
= sk
->sk_tskey
++;
1335 * Let's try using as much space as possible.
1336 * Use MTU if total length of the message fits into the MTU.
1337 * Otherwise, we need to reserve fragment header and
1338 * fragment alignment (= 8-15 octects, in total).
1340 * Note that we may need to "move" the data from the tail of
1341 * of the buffer to the new fragment when we split
1344 * FIXME: It may be fragmented into multiple chunks
1345 * at once if non-fragmentable extension headers
1350 cork
->length
+= length
;
1354 while (length
> 0) {
1355 /* Check if the remaining data fits into current packet. */
1356 copy
= (cork
->length
<= mtu
&& !(cork
->flags
& IPCORK_ALLFRAG
) ? mtu
: maxfraglen
) - skb
->len
;
1358 copy
= maxfraglen
- skb
->len
;
1362 unsigned int datalen
;
1363 unsigned int fraglen
;
1364 unsigned int fraggap
;
1365 unsigned int alloclen
;
1367 /* There's no room in the current skb */
1369 fraggap
= skb
->len
- maxfraglen
;
1372 /* update mtu and maxfraglen if necessary */
1373 if (!skb
|| !skb_prev
)
1374 ip6_append_data_mtu(&mtu
, &maxfraglen
,
1375 fragheaderlen
, skb
, rt
,
1381 * If remaining data exceeds the mtu,
1382 * we know we need more fragment(s).
1384 datalen
= length
+ fraggap
;
1386 if (datalen
> (cork
->length
<= mtu
&& !(cork
->flags
& IPCORK_ALLFRAG
) ? mtu
: maxfraglen
) - fragheaderlen
)
1387 datalen
= maxfraglen
- fragheaderlen
- rt
->dst
.trailer_len
;
1388 if ((flags
& MSG_MORE
) &&
1389 !(rt
->dst
.dev
->features
&NETIF_F_SG
))
1392 alloclen
= datalen
+ fragheaderlen
;
1394 alloclen
+= dst_exthdrlen
;
1396 if (datalen
!= length
+ fraggap
) {
1398 * this is not the last fragment, the trailer
1399 * space is regarded as data space.
1401 datalen
+= rt
->dst
.trailer_len
;
1404 alloclen
+= rt
->dst
.trailer_len
;
1405 fraglen
= datalen
+ fragheaderlen
;
1408 * We just reserve space for fragment header.
1409 * Note: this may be overallocation if the message
1410 * (without MSG_MORE) fits into the MTU.
1412 alloclen
+= sizeof(struct frag_hdr
);
1414 copy
= datalen
- transhdrlen
- fraggap
;
1420 skb
= sock_alloc_send_skb(sk
,
1422 (flags
& MSG_DONTWAIT
), &err
);
1425 if (refcount_read(&sk
->sk_wmem_alloc
) <=
1427 skb
= sock_wmalloc(sk
,
1428 alloclen
+ hh_len
, 1,
1436 * Fill in the control structures
1438 skb
->protocol
= htons(ETH_P_IPV6
);
1439 skb
->ip_summed
= csummode
;
1441 /* reserve for fragmentation and ipsec header */
1442 skb_reserve(skb
, hh_len
+ sizeof(struct frag_hdr
) +
1445 /* Only the initial fragment is time stamped */
1446 skb_shinfo(skb
)->tx_flags
= tx_flags
;
1448 skb_shinfo(skb
)->tskey
= tskey
;
1452 * Find where to start putting bytes
1454 data
= skb_put(skb
, fraglen
);
1455 skb_set_network_header(skb
, exthdrlen
);
1456 data
+= fragheaderlen
;
1457 skb
->transport_header
= (skb
->network_header
+
1460 skb
->csum
= skb_copy_and_csum_bits(
1461 skb_prev
, maxfraglen
,
1462 data
+ transhdrlen
, fraggap
, 0);
1463 skb_prev
->csum
= csum_sub(skb_prev
->csum
,
1466 pskb_trim_unique(skb_prev
, maxfraglen
);
1469 getfrag(from
, data
+ transhdrlen
, offset
,
1470 copy
, fraggap
, skb
) < 0) {
1477 length
-= datalen
- fraggap
;
1482 if ((flags
& MSG_CONFIRM
) && !skb_prev
)
1483 skb_set_dst_pending_confirm(skb
, 1);
1486 * Put the packet on the pending queue
1488 __skb_queue_tail(queue
, skb
);
1495 if (!(rt
->dst
.dev
->features
&NETIF_F_SG
) &&
1496 skb_tailroom(skb
) >= copy
) {
1500 if (getfrag(from
, skb_put(skb
, copy
),
1501 offset
, copy
, off
, skb
) < 0) {
1502 __skb_trim(skb
, off
);
1507 int i
= skb_shinfo(skb
)->nr_frags
;
1510 if (!sk_page_frag_refill(sk
, pfrag
))
1513 if (!skb_can_coalesce(skb
, i
, pfrag
->page
,
1516 if (i
== MAX_SKB_FRAGS
)
1519 __skb_fill_page_desc(skb
, i
, pfrag
->page
,
1521 skb_shinfo(skb
)->nr_frags
= ++i
;
1522 get_page(pfrag
->page
);
1524 copy
= min_t(int, copy
, pfrag
->size
- pfrag
->offset
);
1526 page_address(pfrag
->page
) + pfrag
->offset
,
1527 offset
, copy
, skb
->len
, skb
) < 0)
1530 pfrag
->offset
+= copy
;
1531 skb_frag_size_add(&skb_shinfo(skb
)->frags
[i
- 1], copy
);
1533 skb
->data_len
+= copy
;
1534 skb
->truesize
+= copy
;
1535 refcount_add(copy
, &sk
->sk_wmem_alloc
);
1546 cork
->length
-= length
;
1547 IP6_INC_STATS(sock_net(sk
), rt
->rt6i_idev
, IPSTATS_MIB_OUTDISCARDS
);
1551 int ip6_append_data(struct sock
*sk
,
1552 int getfrag(void *from
, char *to
, int offset
, int len
,
1553 int odd
, struct sk_buff
*skb
),
1554 void *from
, int length
, int transhdrlen
,
1555 struct ipcm6_cookie
*ipc6
, struct flowi6
*fl6
,
1556 struct rt6_info
*rt
, unsigned int flags
,
1557 const struct sockcm_cookie
*sockc
)
1559 struct inet_sock
*inet
= inet_sk(sk
);
1560 struct ipv6_pinfo
*np
= inet6_sk(sk
);
1564 if (flags
&MSG_PROBE
)
1566 if (skb_queue_empty(&sk
->sk_write_queue
)) {
1570 err
= ip6_setup_cork(sk
, &inet
->cork
, &np
->cork
,
1575 exthdrlen
= (ipc6
->opt
? ipc6
->opt
->opt_flen
: 0);
1576 length
+= exthdrlen
;
1577 transhdrlen
+= exthdrlen
;
1579 fl6
= &inet
->cork
.fl
.u
.ip6
;
1583 return __ip6_append_data(sk
, fl6
, &sk
->sk_write_queue
, &inet
->cork
.base
,
1584 &np
->cork
, sk_page_frag(sk
), getfrag
,
1585 from
, length
, transhdrlen
, flags
, ipc6
, sockc
);
1587 EXPORT_SYMBOL_GPL(ip6_append_data
);
1589 static void ip6_cork_release(struct inet_cork_full
*cork
,
1590 struct inet6_cork
*v6_cork
)
1593 kfree(v6_cork
->opt
->dst0opt
);
1594 kfree(v6_cork
->opt
->dst1opt
);
1595 kfree(v6_cork
->opt
->hopopt
);
1596 kfree(v6_cork
->opt
->srcrt
);
1597 kfree(v6_cork
->opt
);
1598 v6_cork
->opt
= NULL
;
1601 if (cork
->base
.dst
) {
1602 dst_release(cork
->base
.dst
);
1603 cork
->base
.dst
= NULL
;
1604 cork
->base
.flags
&= ~IPCORK_ALLFRAG
;
1606 memset(&cork
->fl
, 0, sizeof(cork
->fl
));
1609 struct sk_buff
*__ip6_make_skb(struct sock
*sk
,
1610 struct sk_buff_head
*queue
,
1611 struct inet_cork_full
*cork
,
1612 struct inet6_cork
*v6_cork
)
1614 struct sk_buff
*skb
, *tmp_skb
;
1615 struct sk_buff
**tail_skb
;
1616 struct in6_addr final_dst_buf
, *final_dst
= &final_dst_buf
;
1617 struct ipv6_pinfo
*np
= inet6_sk(sk
);
1618 struct net
*net
= sock_net(sk
);
1619 struct ipv6hdr
*hdr
;
1620 struct ipv6_txoptions
*opt
= v6_cork
->opt
;
1621 struct rt6_info
*rt
= (struct rt6_info
*)cork
->base
.dst
;
1622 struct flowi6
*fl6
= &cork
->fl
.u
.ip6
;
1623 unsigned char proto
= fl6
->flowi6_proto
;
1625 skb
= __skb_dequeue(queue
);
1628 tail_skb
= &(skb_shinfo(skb
)->frag_list
);
1630 /* move skb->data to ip header from ext header */
1631 if (skb
->data
< skb_network_header(skb
))
1632 __skb_pull(skb
, skb_network_offset(skb
));
1633 while ((tmp_skb
= __skb_dequeue(queue
)) != NULL
) {
1634 __skb_pull(tmp_skb
, skb_network_header_len(skb
));
1635 *tail_skb
= tmp_skb
;
1636 tail_skb
= &(tmp_skb
->next
);
1637 skb
->len
+= tmp_skb
->len
;
1638 skb
->data_len
+= tmp_skb
->len
;
1639 skb
->truesize
+= tmp_skb
->truesize
;
1640 tmp_skb
->destructor
= NULL
;
1644 /* Allow local fragmentation. */
1645 skb
->ignore_df
= ip6_sk_ignore_df(sk
);
1647 *final_dst
= fl6
->daddr
;
1648 __skb_pull(skb
, skb_network_header_len(skb
));
1649 if (opt
&& opt
->opt_flen
)
1650 ipv6_push_frag_opts(skb
, opt
, &proto
);
1651 if (opt
&& opt
->opt_nflen
)
1652 ipv6_push_nfrag_opts(skb
, opt
, &proto
, &final_dst
, &fl6
->saddr
);
1654 skb_push(skb
, sizeof(struct ipv6hdr
));
1655 skb_reset_network_header(skb
);
1656 hdr
= ipv6_hdr(skb
);
1658 ip6_flow_hdr(hdr
, v6_cork
->tclass
,
1659 ip6_make_flowlabel(net
, skb
, fl6
->flowlabel
,
1660 ip6_autoflowlabel(net
, np
), fl6
));
1661 hdr
->hop_limit
= v6_cork
->hop_limit
;
1662 hdr
->nexthdr
= proto
;
1663 hdr
->saddr
= fl6
->saddr
;
1664 hdr
->daddr
= *final_dst
;
1666 skb
->priority
= sk
->sk_priority
;
1667 skb
->mark
= sk
->sk_mark
;
1669 skb_dst_set(skb
, dst_clone(&rt
->dst
));
1670 IP6_UPD_PO_STATS(net
, rt
->rt6i_idev
, IPSTATS_MIB_OUT
, skb
->len
);
1671 if (proto
== IPPROTO_ICMPV6
) {
1672 struct inet6_dev
*idev
= ip6_dst_idev(skb_dst(skb
));
1674 ICMP6MSGOUT_INC_STATS(net
, idev
, icmp6_hdr(skb
)->icmp6_type
);
1675 ICMP6_INC_STATS(net
, idev
, ICMP6_MIB_OUTMSGS
);
1678 ip6_cork_release(cork
, v6_cork
);
1683 int ip6_send_skb(struct sk_buff
*skb
)
1685 struct net
*net
= sock_net(skb
->sk
);
1686 struct rt6_info
*rt
= (struct rt6_info
*)skb_dst(skb
);
1689 err
= ip6_local_out(net
, skb
->sk
, skb
);
1692 err
= net_xmit_errno(err
);
1694 IP6_INC_STATS(net
, rt
->rt6i_idev
,
1695 IPSTATS_MIB_OUTDISCARDS
);
1701 int ip6_push_pending_frames(struct sock
*sk
)
1703 struct sk_buff
*skb
;
1705 skb
= ip6_finish_skb(sk
);
1709 return ip6_send_skb(skb
);
1711 EXPORT_SYMBOL_GPL(ip6_push_pending_frames
);
1713 static void __ip6_flush_pending_frames(struct sock
*sk
,
1714 struct sk_buff_head
*queue
,
1715 struct inet_cork_full
*cork
,
1716 struct inet6_cork
*v6_cork
)
1718 struct sk_buff
*skb
;
1720 while ((skb
= __skb_dequeue_tail(queue
)) != NULL
) {
1722 IP6_INC_STATS(sock_net(sk
), ip6_dst_idev(skb_dst(skb
)),
1723 IPSTATS_MIB_OUTDISCARDS
);
1727 ip6_cork_release(cork
, v6_cork
);
1730 void ip6_flush_pending_frames(struct sock
*sk
)
1732 __ip6_flush_pending_frames(sk
, &sk
->sk_write_queue
,
1733 &inet_sk(sk
)->cork
, &inet6_sk(sk
)->cork
);
1735 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames
);
1737 struct sk_buff
*ip6_make_skb(struct sock
*sk
,
1738 int getfrag(void *from
, char *to
, int offset
,
1739 int len
, int odd
, struct sk_buff
*skb
),
1740 void *from
, int length
, int transhdrlen
,
1741 struct ipcm6_cookie
*ipc6
, struct flowi6
*fl6
,
1742 struct rt6_info
*rt
, unsigned int flags
,
1743 const struct sockcm_cookie
*sockc
)
1745 struct inet_cork_full cork
;
1746 struct inet6_cork v6_cork
;
1747 struct sk_buff_head queue
;
1748 int exthdrlen
= (ipc6
->opt
? ipc6
->opt
->opt_flen
: 0);
1751 if (flags
& MSG_PROBE
)
1754 __skb_queue_head_init(&queue
);
1756 cork
.base
.flags
= 0;
1758 cork
.base
.opt
= NULL
;
1759 cork
.base
.dst
= NULL
;
1761 err
= ip6_setup_cork(sk
, &cork
, &v6_cork
, ipc6
, rt
, fl6
);
1763 ip6_cork_release(&cork
, &v6_cork
);
1764 return ERR_PTR(err
);
1766 if (ipc6
->dontfrag
< 0)
1767 ipc6
->dontfrag
= inet6_sk(sk
)->dontfrag
;
1769 err
= __ip6_append_data(sk
, fl6
, &queue
, &cork
.base
, &v6_cork
,
1770 ¤t
->task_frag
, getfrag
, from
,
1771 length
+ exthdrlen
, transhdrlen
+ exthdrlen
,
1772 flags
, ipc6
, sockc
);
1774 __ip6_flush_pending_frames(sk
, &queue
, &cork
, &v6_cork
);
1775 return ERR_PTR(err
);
1778 return __ip6_make_skb(sk
, &queue
, &cork
, &v6_cork
);