1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * IPv6 output functions
4 * Linux INET6 implementation
7 * Pedro Roque <roque@di.fc.ul.pt>
9 * Based on linux/net/ipv4/ip_output.c
12 * A.N.Kuznetsov : airthmetics in fragmentation.
13 * extension headers are implemented.
14 * route changes now work.
15 * ip6_forward does not confuse sniffers.
18 * H. von Brand : Added missing #include <linux/string.h>
19 * Imran Patel : frag id should be in NBO
20 * Kazunori MIYAZAWA @USAGI
21 * : add ip6_append_data and related functions
25 #include <linux/errno.h>
26 #include <linux/kernel.h>
27 #include <linux/string.h>
28 #include <linux/socket.h>
29 #include <linux/net.h>
30 #include <linux/netdevice.h>
31 #include <linux/if_arp.h>
32 #include <linux/in6.h>
33 #include <linux/tcp.h>
34 #include <linux/route.h>
35 #include <linux/module.h>
36 #include <linux/slab.h>
38 #include <linux/bpf-cgroup.h>
39 #include <linux/netfilter.h>
40 #include <linux/netfilter_ipv6.h>
46 #include <net/ndisc.h>
47 #include <net/protocol.h>
48 #include <net/ip6_route.h>
49 #include <net/addrconf.h>
50 #include <net/rawv6.h>
53 #include <net/checksum.h>
54 #include <linux/mroute6.h>
55 #include <net/l3mdev.h>
56 #include <net/lwtunnel.h>
58 static int ip6_finish_output2(struct net
*net
, struct sock
*sk
, struct sk_buff
*skb
)
60 struct dst_entry
*dst
= skb_dst(skb
);
61 struct net_device
*dev
= dst
->dev
;
62 struct neighbour
*neigh
;
63 struct in6_addr
*nexthop
;
66 if (ipv6_addr_is_multicast(&ipv6_hdr(skb
)->daddr
)) {
67 struct inet6_dev
*idev
= ip6_dst_idev(skb_dst(skb
));
69 if (!(dev
->flags
& IFF_LOOPBACK
) && sk_mc_loop(sk
) &&
70 ((mroute6_is_socket(net
, skb
) &&
71 !(IP6CB(skb
)->flags
& IP6SKB_FORWARDED
)) ||
72 ipv6_chk_mcast_addr(dev
, &ipv6_hdr(skb
)->daddr
,
73 &ipv6_hdr(skb
)->saddr
))) {
74 struct sk_buff
*newskb
= skb_clone(skb
, GFP_ATOMIC
);
76 /* Do not check for IFF_ALLMULTI; multicast routing
77 is not supported in any case.
80 NF_HOOK(NFPROTO_IPV6
, NF_INET_POST_ROUTING
,
81 net
, sk
, newskb
, NULL
, newskb
->dev
,
84 if (ipv6_hdr(skb
)->hop_limit
== 0) {
85 IP6_INC_STATS(net
, idev
,
86 IPSTATS_MIB_OUTDISCARDS
);
92 IP6_UPD_PO_STATS(net
, idev
, IPSTATS_MIB_OUTMCAST
, skb
->len
);
94 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb
)->daddr
) <=
95 IPV6_ADDR_SCOPE_NODELOCAL
&&
96 !(dev
->flags
& IFF_LOOPBACK
)) {
102 if (lwtunnel_xmit_redirect(dst
->lwtstate
)) {
103 int res
= lwtunnel_xmit(skb
);
105 if (res
< 0 || res
== LWTUNNEL_XMIT_DONE
)
110 nexthop
= rt6_nexthop((struct rt6_info
*)dst
, &ipv6_hdr(skb
)->daddr
);
111 neigh
= __ipv6_neigh_lookup_noref(dst
->dev
, nexthop
);
112 if (unlikely(!neigh
))
113 neigh
= __neigh_create(&nd_tbl
, nexthop
, dst
->dev
, false);
114 if (!IS_ERR(neigh
)) {
115 sock_confirm_neigh(skb
, neigh
);
116 ret
= neigh_output(neigh
, skb
, false);
117 rcu_read_unlock_bh();
120 rcu_read_unlock_bh();
122 IP6_INC_STATS(net
, ip6_dst_idev(dst
), IPSTATS_MIB_OUTNOROUTES
);
127 static int ip6_finish_output(struct net
*net
, struct sock
*sk
, struct sk_buff
*skb
)
131 ret
= BPF_CGROUP_RUN_PROG_INET_EGRESS(sk
, skb
);
137 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
138 /* Policy lookup after SNAT yielded a new policy */
139 if (skb_dst(skb
)->xfrm
) {
140 IPCB(skb
)->flags
|= IPSKB_REROUTED
;
141 return dst_output(net
, sk
, skb
);
145 if ((skb
->len
> ip6_skb_dst_mtu(skb
) && !skb_is_gso(skb
)) ||
146 dst_allfrag(skb_dst(skb
)) ||
147 (IP6CB(skb
)->frag_max_size
&& skb
->len
> IP6CB(skb
)->frag_max_size
))
148 return ip6_fragment(net
, sk
, skb
, ip6_finish_output2
);
150 return ip6_finish_output2(net
, sk
, skb
);
153 int ip6_output(struct net
*net
, struct sock
*sk
, struct sk_buff
*skb
)
155 struct net_device
*dev
= skb_dst(skb
)->dev
;
156 struct inet6_dev
*idev
= ip6_dst_idev(skb_dst(skb
));
158 skb
->protocol
= htons(ETH_P_IPV6
);
161 if (unlikely(idev
->cnf
.disable_ipv6
)) {
162 IP6_INC_STATS(net
, idev
, IPSTATS_MIB_OUTDISCARDS
);
167 return NF_HOOK_COND(NFPROTO_IPV6
, NF_INET_POST_ROUTING
,
168 net
, sk
, skb
, NULL
, dev
,
170 !(IP6CB(skb
)->flags
& IP6SKB_REROUTED
));
173 bool ip6_autoflowlabel(struct net
*net
, const struct ipv6_pinfo
*np
)
175 if (!np
->autoflowlabel_set
)
176 return ip6_default_np_autolabel(net
);
178 return np
->autoflowlabel
;
182 * xmit an sk_buff (used by TCP, SCTP and DCCP)
183 * Note : socket lock is not held for SYNACK packets, but might be modified
184 * by calls to skb_set_owner_w() and ipv6_local_error(),
185 * which are using proper atomic operations or spinlocks.
187 int ip6_xmit(const struct sock
*sk
, struct sk_buff
*skb
, struct flowi6
*fl6
,
188 __u32 mark
, struct ipv6_txoptions
*opt
, int tclass
)
190 struct net
*net
= sock_net(sk
);
191 const struct ipv6_pinfo
*np
= inet6_sk(sk
);
192 struct in6_addr
*first_hop
= &fl6
->daddr
;
193 struct dst_entry
*dst
= skb_dst(skb
);
194 unsigned int head_room
;
196 u8 proto
= fl6
->flowi6_proto
;
197 int seg_len
= skb
->len
;
201 head_room
= sizeof(struct ipv6hdr
) + LL_RESERVED_SPACE(dst
->dev
);
203 head_room
+= opt
->opt_nflen
+ opt
->opt_flen
;
205 if (unlikely(skb_headroom(skb
) < head_room
)) {
206 struct sk_buff
*skb2
= skb_realloc_headroom(skb
, head_room
);
208 IP6_INC_STATS(net
, ip6_dst_idev(skb_dst(skb
)),
209 IPSTATS_MIB_OUTDISCARDS
);
214 skb_set_owner_w(skb2
, skb
->sk
);
220 seg_len
+= opt
->opt_nflen
+ opt
->opt_flen
;
223 ipv6_push_frag_opts(skb
, opt
, &proto
);
226 ipv6_push_nfrag_opts(skb
, opt
, &proto
, &first_hop
,
230 skb_push(skb
, sizeof(struct ipv6hdr
));
231 skb_reset_network_header(skb
);
235 * Fill in the IPv6 header
238 hlimit
= np
->hop_limit
;
240 hlimit
= ip6_dst_hoplimit(dst
);
242 ip6_flow_hdr(hdr
, tclass
, ip6_make_flowlabel(net
, skb
, fl6
->flowlabel
,
243 ip6_autoflowlabel(net
, np
), fl6
));
245 hdr
->payload_len
= htons(seg_len
);
246 hdr
->nexthdr
= proto
;
247 hdr
->hop_limit
= hlimit
;
249 hdr
->saddr
= fl6
->saddr
;
250 hdr
->daddr
= *first_hop
;
252 skb
->protocol
= htons(ETH_P_IPV6
);
253 skb
->priority
= sk
->sk_priority
;
257 if ((skb
->len
<= mtu
) || skb
->ignore_df
|| skb_is_gso(skb
)) {
258 IP6_UPD_PO_STATS(net
, ip6_dst_idev(skb_dst(skb
)),
259 IPSTATS_MIB_OUT
, skb
->len
);
261 /* if egress device is enslaved to an L3 master device pass the
262 * skb to its handler for processing
264 skb
= l3mdev_ip6_out((struct sock
*)sk
, skb
);
268 /* hooks should never assume socket lock is held.
269 * we promote our socket to non const
271 return NF_HOOK(NFPROTO_IPV6
, NF_INET_LOCAL_OUT
,
272 net
, (struct sock
*)sk
, skb
, NULL
, dst
->dev
,
277 /* ipv6_local_error() does not require socket lock,
278 * we promote our socket to non const
280 ipv6_local_error((struct sock
*)sk
, EMSGSIZE
, fl6
, mtu
);
282 IP6_INC_STATS(net
, ip6_dst_idev(skb_dst(skb
)), IPSTATS_MIB_FRAGFAILS
);
286 EXPORT_SYMBOL(ip6_xmit
);
288 static int ip6_call_ra_chain(struct sk_buff
*skb
, int sel
)
290 struct ip6_ra_chain
*ra
;
291 struct sock
*last
= NULL
;
293 read_lock(&ip6_ra_lock
);
294 for (ra
= ip6_ra_chain
; ra
; ra
= ra
->next
) {
295 struct sock
*sk
= ra
->sk
;
296 if (sk
&& ra
->sel
== sel
&&
297 (!sk
->sk_bound_dev_if
||
298 sk
->sk_bound_dev_if
== skb
->dev
->ifindex
)) {
299 struct ipv6_pinfo
*np
= inet6_sk(sk
);
301 if (np
&& np
->rtalert_isolate
&&
302 !net_eq(sock_net(sk
), dev_net(skb
->dev
))) {
306 struct sk_buff
*skb2
= skb_clone(skb
, GFP_ATOMIC
);
308 rawv6_rcv(last
, skb2
);
315 rawv6_rcv(last
, skb
);
316 read_unlock(&ip6_ra_lock
);
319 read_unlock(&ip6_ra_lock
);
323 static int ip6_forward_proxy_check(struct sk_buff
*skb
)
325 struct ipv6hdr
*hdr
= ipv6_hdr(skb
);
326 u8 nexthdr
= hdr
->nexthdr
;
330 if (ipv6_ext_hdr(nexthdr
)) {
331 offset
= ipv6_skip_exthdr(skb
, sizeof(*hdr
), &nexthdr
, &frag_off
);
335 offset
= sizeof(struct ipv6hdr
);
337 if (nexthdr
== IPPROTO_ICMPV6
) {
338 struct icmp6hdr
*icmp6
;
340 if (!pskb_may_pull(skb
, (skb_network_header(skb
) +
341 offset
+ 1 - skb
->data
)))
344 icmp6
= (struct icmp6hdr
*)(skb_network_header(skb
) + offset
);
346 switch (icmp6
->icmp6_type
) {
347 case NDISC_ROUTER_SOLICITATION
:
348 case NDISC_ROUTER_ADVERTISEMENT
:
349 case NDISC_NEIGHBOUR_SOLICITATION
:
350 case NDISC_NEIGHBOUR_ADVERTISEMENT
:
352 /* For reaction involving unicast neighbor discovery
353 * message destined to the proxied address, pass it to
363 * The proxying router can't forward traffic sent to a link-local
364 * address, so signal the sender and discard the packet. This
365 * behavior is clarified by the MIPv6 specification.
367 if (ipv6_addr_type(&hdr
->daddr
) & IPV6_ADDR_LINKLOCAL
) {
368 dst_link_failure(skb
);
375 static inline int ip6_forward_finish(struct net
*net
, struct sock
*sk
,
378 struct dst_entry
*dst
= skb_dst(skb
);
380 __IP6_INC_STATS(net
, ip6_dst_idev(dst
), IPSTATS_MIB_OUTFORWDATAGRAMS
);
381 __IP6_ADD_STATS(net
, ip6_dst_idev(dst
), IPSTATS_MIB_OUTOCTETS
, skb
->len
);
383 #ifdef CONFIG_NET_SWITCHDEV
384 if (skb
->offload_l3_fwd_mark
) {
391 return dst_output(net
, sk
, skb
);
394 static bool ip6_pkt_too_big(const struct sk_buff
*skb
, unsigned int mtu
)
399 /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
400 if (IP6CB(skb
)->frag_max_size
&& IP6CB(skb
)->frag_max_size
> mtu
)
406 if (skb_is_gso(skb
) && skb_gso_validate_network_len(skb
, mtu
))
412 int ip6_forward(struct sk_buff
*skb
)
414 struct inet6_dev
*idev
= __in6_dev_get_safely(skb
->dev
);
415 struct dst_entry
*dst
= skb_dst(skb
);
416 struct ipv6hdr
*hdr
= ipv6_hdr(skb
);
417 struct inet6_skb_parm
*opt
= IP6CB(skb
);
418 struct net
*net
= dev_net(dst
->dev
);
421 if (net
->ipv6
.devconf_all
->forwarding
== 0)
424 if (skb
->pkt_type
!= PACKET_HOST
)
427 if (unlikely(skb
->sk
))
430 if (skb_warn_if_lro(skb
))
433 if (!xfrm6_policy_check(NULL
, XFRM_POLICY_FWD
, skb
)) {
434 __IP6_INC_STATS(net
, idev
, IPSTATS_MIB_INDISCARDS
);
438 skb_forward_csum(skb
);
441 * We DO NOT make any processing on
442 * RA packets, pushing them to user level AS IS
443 * without ane WARRANTY that application will be able
444 * to interpret them. The reason is that we
445 * cannot make anything clever here.
447 * We are not end-node, so that if packet contains
448 * AH/ESP, we cannot make anything.
449 * Defragmentation also would be mistake, RA packets
450 * cannot be fragmented, because there is no warranty
451 * that different fragments will go along one path. --ANK
453 if (unlikely(opt
->flags
& IP6SKB_ROUTERALERT
)) {
454 if (ip6_call_ra_chain(skb
, ntohs(opt
->ra
)))
459 * check and decrement ttl
461 if (hdr
->hop_limit
<= 1) {
462 /* Force OUTPUT device used as source address */
464 icmpv6_send(skb
, ICMPV6_TIME_EXCEED
, ICMPV6_EXC_HOPLIMIT
, 0);
465 __IP6_INC_STATS(net
, idev
, IPSTATS_MIB_INHDRERRORS
);
471 /* XXX: idev->cnf.proxy_ndp? */
472 if (net
->ipv6
.devconf_all
->proxy_ndp
&&
473 pneigh_lookup(&nd_tbl
, net
, &hdr
->daddr
, skb
->dev
, 0)) {
474 int proxied
= ip6_forward_proxy_check(skb
);
476 return ip6_input(skb
);
477 else if (proxied
< 0) {
478 __IP6_INC_STATS(net
, idev
, IPSTATS_MIB_INDISCARDS
);
483 if (!xfrm6_route_forward(skb
)) {
484 __IP6_INC_STATS(net
, idev
, IPSTATS_MIB_INDISCARDS
);
489 /* IPv6 specs say nothing about it, but it is clear that we cannot
490 send redirects to source routed frames.
491 We don't send redirects to frames decapsulated from IPsec.
493 if (IP6CB(skb
)->iif
== dst
->dev
->ifindex
&&
494 opt
->srcrt
== 0 && !skb_sec_path(skb
)) {
495 struct in6_addr
*target
= NULL
;
496 struct inet_peer
*peer
;
500 * incoming and outgoing devices are the same
504 rt
= (struct rt6_info
*) dst
;
505 if (rt
->rt6i_flags
& RTF_GATEWAY
)
506 target
= &rt
->rt6i_gateway
;
508 target
= &hdr
->daddr
;
510 peer
= inet_getpeer_v6(net
->ipv6
.peers
, &hdr
->daddr
, 1);
512 /* Limit redirects both by destination (here)
513 and by source (inside ndisc_send_redirect)
515 if (inet_peer_xrlim_allow(peer
, 1*HZ
))
516 ndisc_send_redirect(skb
, target
);
520 int addrtype
= ipv6_addr_type(&hdr
->saddr
);
522 /* This check is security critical. */
523 if (addrtype
== IPV6_ADDR_ANY
||
524 addrtype
& (IPV6_ADDR_MULTICAST
| IPV6_ADDR_LOOPBACK
))
526 if (addrtype
& IPV6_ADDR_LINKLOCAL
) {
527 icmpv6_send(skb
, ICMPV6_DEST_UNREACH
,
528 ICMPV6_NOT_NEIGHBOUR
, 0);
533 mtu
= ip6_dst_mtu_forward(dst
);
534 if (mtu
< IPV6_MIN_MTU
)
537 if (ip6_pkt_too_big(skb
, mtu
)) {
538 /* Again, force OUTPUT device used as source address */
540 icmpv6_send(skb
, ICMPV6_PKT_TOOBIG
, 0, mtu
);
541 __IP6_INC_STATS(net
, idev
, IPSTATS_MIB_INTOOBIGERRORS
);
542 __IP6_INC_STATS(net
, ip6_dst_idev(dst
),
543 IPSTATS_MIB_FRAGFAILS
);
548 if (skb_cow(skb
, dst
->dev
->hard_header_len
)) {
549 __IP6_INC_STATS(net
, ip6_dst_idev(dst
),
550 IPSTATS_MIB_OUTDISCARDS
);
556 /* Mangling hops number delayed to point after skb COW */
560 return NF_HOOK(NFPROTO_IPV6
, NF_INET_FORWARD
,
561 net
, NULL
, skb
, skb
->dev
, dst
->dev
,
565 __IP6_INC_STATS(net
, idev
, IPSTATS_MIB_INADDRERRORS
);
571 static void ip6_copy_metadata(struct sk_buff
*to
, struct sk_buff
*from
)
573 to
->pkt_type
= from
->pkt_type
;
574 to
->priority
= from
->priority
;
575 to
->protocol
= from
->protocol
;
577 skb_dst_set(to
, dst_clone(skb_dst(from
)));
579 to
->mark
= from
->mark
;
581 skb_copy_hash(to
, from
);
583 #ifdef CONFIG_NET_SCHED
584 to
->tc_index
= from
->tc_index
;
587 skb_ext_copy(to
, from
);
588 skb_copy_secmark(to
, from
);
591 int ip6_fragment(struct net
*net
, struct sock
*sk
, struct sk_buff
*skb
,
592 int (*output
)(struct net
*, struct sock
*, struct sk_buff
*))
594 struct sk_buff
*frag
;
595 struct rt6_info
*rt
= (struct rt6_info
*)skb_dst(skb
);
596 struct ipv6_pinfo
*np
= skb
->sk
&& !dev_recursion_level() ?
597 inet6_sk(skb
->sk
) : NULL
;
598 struct ipv6hdr
*tmp_hdr
;
600 unsigned int mtu
, hlen
, left
, len
, nexthdr_offset
;
603 int ptr
, offset
= 0, err
= 0;
604 u8
*prevhdr
, nexthdr
= 0;
606 err
= ip6_find_1stfragopt(skb
, &prevhdr
);
611 nexthdr_offset
= prevhdr
- skb_network_header(skb
);
613 mtu
= ip6_skb_dst_mtu(skb
);
615 /* We must not fragment if the socket is set to force MTU discovery
616 * or if the skb it not generated by a local socket.
618 if (unlikely(!skb
->ignore_df
&& skb
->len
> mtu
))
621 if (IP6CB(skb
)->frag_max_size
) {
622 if (IP6CB(skb
)->frag_max_size
> mtu
)
625 /* don't send fragments larger than what we received */
626 mtu
= IP6CB(skb
)->frag_max_size
;
627 if (mtu
< IPV6_MIN_MTU
)
631 if (np
&& np
->frag_size
< mtu
) {
635 if (mtu
< hlen
+ sizeof(struct frag_hdr
) + 8)
637 mtu
-= hlen
+ sizeof(struct frag_hdr
);
639 frag_id
= ipv6_select_ident(net
, &ipv6_hdr(skb
)->daddr
,
640 &ipv6_hdr(skb
)->saddr
);
642 if (skb
->ip_summed
== CHECKSUM_PARTIAL
&&
643 (err
= skb_checksum_help(skb
)))
646 prevhdr
= skb_network_header(skb
) + nexthdr_offset
;
647 hroom
= LL_RESERVED_SPACE(rt
->dst
.dev
);
648 if (skb_has_frag_list(skb
)) {
649 unsigned int first_len
= skb_pagelen(skb
);
650 struct sk_buff
*frag2
;
652 if (first_len
- hlen
> mtu
||
653 ((first_len
- hlen
) & 7) ||
655 skb_headroom(skb
) < (hroom
+ sizeof(struct frag_hdr
)))
658 skb_walk_frags(skb
, frag
) {
659 /* Correct geometry. */
660 if (frag
->len
> mtu
||
661 ((frag
->len
& 7) && frag
->next
) ||
662 skb_headroom(frag
) < (hlen
+ hroom
+ sizeof(struct frag_hdr
)))
663 goto slow_path_clean
;
665 /* Partially cloned skb? */
666 if (skb_shared(frag
))
667 goto slow_path_clean
;
672 frag
->destructor
= sock_wfree
;
674 skb
->truesize
-= frag
->truesize
;
681 *prevhdr
= NEXTHDR_FRAGMENT
;
682 tmp_hdr
= kmemdup(skb_network_header(skb
), hlen
, GFP_ATOMIC
);
687 frag
= skb_shinfo(skb
)->frag_list
;
688 skb_frag_list_init(skb
);
690 __skb_pull(skb
, hlen
);
691 fh
= __skb_push(skb
, sizeof(struct frag_hdr
));
692 __skb_push(skb
, hlen
);
693 skb_reset_network_header(skb
);
694 memcpy(skb_network_header(skb
), tmp_hdr
, hlen
);
696 fh
->nexthdr
= nexthdr
;
698 fh
->frag_off
= htons(IP6_MF
);
699 fh
->identification
= frag_id
;
701 first_len
= skb_pagelen(skb
);
702 skb
->data_len
= first_len
- skb_headlen(skb
);
703 skb
->len
= first_len
;
704 ipv6_hdr(skb
)->payload_len
= htons(first_len
-
705 sizeof(struct ipv6hdr
));
708 /* Prepare header of the next frame,
709 * before previous one went down. */
711 frag
->ip_summed
= CHECKSUM_NONE
;
712 skb_reset_transport_header(frag
);
713 fh
= __skb_push(frag
, sizeof(struct frag_hdr
));
714 __skb_push(frag
, hlen
);
715 skb_reset_network_header(frag
);
716 memcpy(skb_network_header(frag
), tmp_hdr
,
718 offset
+= skb
->len
- hlen
- sizeof(struct frag_hdr
);
719 fh
->nexthdr
= nexthdr
;
721 fh
->frag_off
= htons(offset
);
723 fh
->frag_off
|= htons(IP6_MF
);
724 fh
->identification
= frag_id
;
725 ipv6_hdr(frag
)->payload_len
=
727 sizeof(struct ipv6hdr
));
728 ip6_copy_metadata(frag
, skb
);
731 err
= output(net
, sk
, skb
);
733 IP6_INC_STATS(net
, ip6_dst_idev(&rt
->dst
),
734 IPSTATS_MIB_FRAGCREATES
);
741 skb_mark_not_on_list(skb
);
747 IP6_INC_STATS(net
, ip6_dst_idev(&rt
->dst
),
748 IPSTATS_MIB_FRAGOKS
);
752 kfree_skb_list(frag
);
754 IP6_INC_STATS(net
, ip6_dst_idev(&rt
->dst
),
755 IPSTATS_MIB_FRAGFAILS
);
759 skb_walk_frags(skb
, frag2
) {
763 frag2
->destructor
= NULL
;
764 skb
->truesize
+= frag2
->truesize
;
769 left
= skb
->len
- hlen
; /* Space per frame */
770 ptr
= hlen
; /* Where to start from */
773 * Fragment the datagram.
776 troom
= rt
->dst
.dev
->needed_tailroom
;
779 * Keep copying data until we run out.
782 u8
*fragnexthdr_offset
;
785 /* IF: it doesn't fit, use 'mtu' - the data space left */
788 /* IF: we are not sending up to and including the packet end
789 then align the next start on an eight byte boundary */
794 /* Allocate buffer */
795 frag
= alloc_skb(len
+ hlen
+ sizeof(struct frag_hdr
) +
796 hroom
+ troom
, GFP_ATOMIC
);
803 * Set up data on packet
806 ip6_copy_metadata(frag
, skb
);
807 skb_reserve(frag
, hroom
);
808 skb_put(frag
, len
+ hlen
+ sizeof(struct frag_hdr
));
809 skb_reset_network_header(frag
);
810 fh
= (struct frag_hdr
*)(skb_network_header(frag
) + hlen
);
811 frag
->transport_header
= (frag
->network_header
+ hlen
+
812 sizeof(struct frag_hdr
));
815 * Charge the memory for the fragment to any owner
819 skb_set_owner_w(frag
, skb
->sk
);
822 * Copy the packet header into the new buffer.
824 skb_copy_from_linear_data(skb
, skb_network_header(frag
), hlen
);
826 fragnexthdr_offset
= skb_network_header(frag
);
827 fragnexthdr_offset
+= prevhdr
- skb_network_header(skb
);
828 *fragnexthdr_offset
= NEXTHDR_FRAGMENT
;
831 * Build fragment header.
833 fh
->nexthdr
= nexthdr
;
835 fh
->identification
= frag_id
;
838 * Copy a block of the IP datagram.
840 BUG_ON(skb_copy_bits(skb
, ptr
, skb_transport_header(frag
),
844 fh
->frag_off
= htons(offset
);
846 fh
->frag_off
|= htons(IP6_MF
);
847 ipv6_hdr(frag
)->payload_len
= htons(frag
->len
-
848 sizeof(struct ipv6hdr
));
854 * Put this fragment into the sending queue.
856 err
= output(net
, sk
, frag
);
860 IP6_INC_STATS(net
, ip6_dst_idev(skb_dst(skb
)),
861 IPSTATS_MIB_FRAGCREATES
);
863 IP6_INC_STATS(net
, ip6_dst_idev(skb_dst(skb
)),
864 IPSTATS_MIB_FRAGOKS
);
869 if (skb
->sk
&& dst_allfrag(skb_dst(skb
)))
870 sk_nocaps_add(skb
->sk
, NETIF_F_GSO_MASK
);
872 icmpv6_send(skb
, ICMPV6_PKT_TOOBIG
, 0, mtu
);
876 IP6_INC_STATS(net
, ip6_dst_idev(skb_dst(skb
)),
877 IPSTATS_MIB_FRAGFAILS
);
882 static inline int ip6_rt_check(const struct rt6key
*rt_key
,
883 const struct in6_addr
*fl_addr
,
884 const struct in6_addr
*addr_cache
)
886 return (rt_key
->plen
!= 128 || !ipv6_addr_equal(fl_addr
, &rt_key
->addr
)) &&
887 (!addr_cache
|| !ipv6_addr_equal(fl_addr
, addr_cache
));
890 static struct dst_entry
*ip6_sk_dst_check(struct sock
*sk
,
891 struct dst_entry
*dst
,
892 const struct flowi6
*fl6
)
894 struct ipv6_pinfo
*np
= inet6_sk(sk
);
900 if (dst
->ops
->family
!= AF_INET6
) {
905 rt
= (struct rt6_info
*)dst
;
906 /* Yes, checking route validity in not connected
907 * case is not very simple. Take into account,
908 * that we do not support routing by source, TOS,
909 * and MSG_DONTROUTE --ANK (980726)
911 * 1. ip6_rt_check(): If route was host route,
912 * check that cached destination is current.
913 * If it is network route, we still may
914 * check its validity using saved pointer
915 * to the last used address: daddr_cache.
916 * We do not want to save whole address now,
917 * (because main consumer of this service
918 * is tcp, which has not this problem),
919 * so that the last trick works only on connected
921 * 2. oif also should be the same.
923 if (ip6_rt_check(&rt
->rt6i_dst
, &fl6
->daddr
, np
->daddr_cache
) ||
924 #ifdef CONFIG_IPV6_SUBTREES
925 ip6_rt_check(&rt
->rt6i_src
, &fl6
->saddr
, np
->saddr_cache
) ||
927 (!(fl6
->flowi6_flags
& FLOWI_FLAG_SKIP_NH_OIF
) &&
928 (fl6
->flowi6_oif
&& fl6
->flowi6_oif
!= dst
->dev
->ifindex
))) {
937 static int ip6_dst_lookup_tail(struct net
*net
, const struct sock
*sk
,
938 struct dst_entry
**dst
, struct flowi6
*fl6
)
940 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
947 /* The correct way to handle this would be to do
948 * ip6_route_get_saddr, and then ip6_route_output; however,
949 * the route-specific preferred source forces the
950 * ip6_route_output call _before_ ip6_route_get_saddr.
952 * In source specific routing (no src=any default route),
953 * ip6_route_output will fail given src=any saddr, though, so
954 * that's why we try it again later.
956 if (ipv6_addr_any(&fl6
->saddr
) && (!*dst
|| !(*dst
)->error
)) {
957 struct fib6_info
*from
;
959 bool had_dst
= *dst
!= NULL
;
962 *dst
= ip6_route_output(net
, sk
, fl6
);
963 rt
= (*dst
)->error
? NULL
: (struct rt6_info
*)*dst
;
966 from
= rt
? rcu_dereference(rt
->from
) : NULL
;
967 err
= ip6_route_get_saddr(net
, from
, &fl6
->daddr
,
968 sk
? inet6_sk(sk
)->srcprefs
: 0,
973 goto out_err_release
;
975 /* If we had an erroneous initial result, pretend it
976 * never existed and let the SA-enabled version take
979 if (!had_dst
&& (*dst
)->error
) {
985 flags
|= RT6_LOOKUP_F_IFACE
;
989 *dst
= ip6_route_output_flags(net
, sk
, fl6
, flags
);
993 goto out_err_release
;
995 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
997 * Here if the dst entry we've looked up
998 * has a neighbour entry that is in the INCOMPLETE
999 * state and the src address from the flow is
1000 * marked as OPTIMISTIC, we release the found
1001 * dst entry and replace it instead with the
1002 * dst entry of the nexthop router
1004 rt
= (struct rt6_info
*) *dst
;
1006 n
= __ipv6_neigh_lookup_noref(rt
->dst
.dev
,
1007 rt6_nexthop(rt
, &fl6
->daddr
));
1008 err
= n
&& !(n
->nud_state
& NUD_VALID
) ? -EINVAL
: 0;
1009 rcu_read_unlock_bh();
1012 struct inet6_ifaddr
*ifp
;
1013 struct flowi6 fl_gw6
;
1016 ifp
= ipv6_get_ifaddr(net
, &fl6
->saddr
,
1019 redirect
= (ifp
&& ifp
->flags
& IFA_F_OPTIMISTIC
);
1025 * We need to get the dst entry for the
1026 * default router instead
1029 memcpy(&fl_gw6
, fl6
, sizeof(struct flowi6
));
1030 memset(&fl_gw6
.daddr
, 0, sizeof(struct in6_addr
));
1031 *dst
= ip6_route_output(net
, sk
, &fl_gw6
);
1032 err
= (*dst
)->error
;
1034 goto out_err_release
;
1038 if (ipv6_addr_v4mapped(&fl6
->saddr
) &&
1039 !(ipv6_addr_v4mapped(&fl6
->daddr
) || ipv6_addr_any(&fl6
->daddr
))) {
1040 err
= -EAFNOSUPPORT
;
1041 goto out_err_release
;
1050 if (err
== -ENETUNREACH
)
1051 IP6_INC_STATS(net
, NULL
, IPSTATS_MIB_OUTNOROUTES
);
1056 * ip6_dst_lookup - perform route lookup on flow
1057 * @sk: socket which provides route info
1058 * @dst: pointer to dst_entry * for result
1059 * @fl6: flow to lookup
1061 * This function performs a route lookup on the given flow.
1063 * It returns zero on success, or a standard errno code on error.
1065 int ip6_dst_lookup(struct net
*net
, struct sock
*sk
, struct dst_entry
**dst
,
1069 return ip6_dst_lookup_tail(net
, sk
, dst
, fl6
);
1071 EXPORT_SYMBOL_GPL(ip6_dst_lookup
);
1074 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1075 * @sk: socket which provides route info
1076 * @fl6: flow to lookup
1077 * @final_dst: final destination address for ipsec lookup
1079 * This function performs a route lookup on the given flow.
1081 * It returns a valid dst pointer on success, or a pointer encoded
1084 struct dst_entry
*ip6_dst_lookup_flow(const struct sock
*sk
, struct flowi6
*fl6
,
1085 const struct in6_addr
*final_dst
)
1087 struct dst_entry
*dst
= NULL
;
1090 err
= ip6_dst_lookup_tail(sock_net(sk
), sk
, &dst
, fl6
);
1092 return ERR_PTR(err
);
1094 fl6
->daddr
= *final_dst
;
1096 return xfrm_lookup_route(sock_net(sk
), dst
, flowi6_to_flowi(fl6
), sk
, 0);
1098 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow
);
1101 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1102 * @sk: socket which provides the dst cache and route info
1103 * @fl6: flow to lookup
1104 * @final_dst: final destination address for ipsec lookup
1105 * @connected: whether @sk is connected or not
1107 * This function performs a route lookup on the given flow with the
1108 * possibility of using the cached route in the socket if it is valid.
1109 * It will take the socket dst lock when operating on the dst cache.
1110 * As a result, this function can only be used in process context.
1112 * In addition, for a connected socket, cache the dst in the socket
1113 * if the current cache is not valid.
1115 * It returns a valid dst pointer on success, or a pointer encoded
1118 struct dst_entry
*ip6_sk_dst_lookup_flow(struct sock
*sk
, struct flowi6
*fl6
,
1119 const struct in6_addr
*final_dst
,
1122 struct dst_entry
*dst
= sk_dst_check(sk
, inet6_sk(sk
)->dst_cookie
);
1124 dst
= ip6_sk_dst_check(sk
, dst
, fl6
);
1128 dst
= ip6_dst_lookup_flow(sk
, fl6
, final_dst
);
1129 if (connected
&& !IS_ERR(dst
))
1130 ip6_sk_dst_store_flow(sk
, dst_clone(dst
), fl6
);
1134 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow
);
1136 static inline struct ipv6_opt_hdr
*ip6_opt_dup(struct ipv6_opt_hdr
*src
,
1139 return src
? kmemdup(src
, (src
->hdrlen
+ 1) * 8, gfp
) : NULL
;
1142 static inline struct ipv6_rt_hdr
*ip6_rthdr_dup(struct ipv6_rt_hdr
*src
,
1145 return src
? kmemdup(src
, (src
->hdrlen
+ 1) * 8, gfp
) : NULL
;
1148 static void ip6_append_data_mtu(unsigned int *mtu
,
1150 unsigned int fragheaderlen
,
1151 struct sk_buff
*skb
,
1152 struct rt6_info
*rt
,
1153 unsigned int orig_mtu
)
1155 if (!(rt
->dst
.flags
& DST_XFRM_TUNNEL
)) {
1157 /* first fragment, reserve header_len */
1158 *mtu
= orig_mtu
- rt
->dst
.header_len
;
1162 * this fragment is not first, the headers
1163 * space is regarded as data space.
1167 *maxfraglen
= ((*mtu
- fragheaderlen
) & ~7)
1168 + fragheaderlen
- sizeof(struct frag_hdr
);
1172 static int ip6_setup_cork(struct sock
*sk
, struct inet_cork_full
*cork
,
1173 struct inet6_cork
*v6_cork
, struct ipcm6_cookie
*ipc6
,
1174 struct rt6_info
*rt
, struct flowi6
*fl6
)
1176 struct ipv6_pinfo
*np
= inet6_sk(sk
);
1178 struct ipv6_txoptions
*opt
= ipc6
->opt
;
1184 if (WARN_ON(v6_cork
->opt
))
1187 v6_cork
->opt
= kzalloc(sizeof(*opt
), sk
->sk_allocation
);
1188 if (unlikely(!v6_cork
->opt
))
1191 v6_cork
->opt
->tot_len
= sizeof(*opt
);
1192 v6_cork
->opt
->opt_flen
= opt
->opt_flen
;
1193 v6_cork
->opt
->opt_nflen
= opt
->opt_nflen
;
1195 v6_cork
->opt
->dst0opt
= ip6_opt_dup(opt
->dst0opt
,
1197 if (opt
->dst0opt
&& !v6_cork
->opt
->dst0opt
)
1200 v6_cork
->opt
->dst1opt
= ip6_opt_dup(opt
->dst1opt
,
1202 if (opt
->dst1opt
&& !v6_cork
->opt
->dst1opt
)
1205 v6_cork
->opt
->hopopt
= ip6_opt_dup(opt
->hopopt
,
1207 if (opt
->hopopt
&& !v6_cork
->opt
->hopopt
)
1210 v6_cork
->opt
->srcrt
= ip6_rthdr_dup(opt
->srcrt
,
1212 if (opt
->srcrt
&& !v6_cork
->opt
->srcrt
)
1215 /* need source address above miyazawa*/
1218 cork
->base
.dst
= &rt
->dst
;
1219 cork
->fl
.u
.ip6
= *fl6
;
1220 v6_cork
->hop_limit
= ipc6
->hlimit
;
1221 v6_cork
->tclass
= ipc6
->tclass
;
1222 if (rt
->dst
.flags
& DST_XFRM_TUNNEL
)
1223 mtu
= np
->pmtudisc
>= IPV6_PMTUDISC_PROBE
?
1224 READ_ONCE(rt
->dst
.dev
->mtu
) : dst_mtu(&rt
->dst
);
1226 mtu
= np
->pmtudisc
>= IPV6_PMTUDISC_PROBE
?
1227 READ_ONCE(rt
->dst
.dev
->mtu
) : dst_mtu(xfrm_dst_path(&rt
->dst
));
1228 if (np
->frag_size
< mtu
) {
1230 mtu
= np
->frag_size
;
1232 if (mtu
< IPV6_MIN_MTU
)
1234 cork
->base
.fragsize
= mtu
;
1235 cork
->base
.gso_size
= ipc6
->gso_size
;
1236 cork
->base
.tx_flags
= 0;
1237 sock_tx_timestamp(sk
, ipc6
->sockc
.tsflags
, &cork
->base
.tx_flags
);
1239 if (dst_allfrag(xfrm_dst_path(&rt
->dst
)))
1240 cork
->base
.flags
|= IPCORK_ALLFRAG
;
1241 cork
->base
.length
= 0;
1243 cork
->base
.transmit_time
= ipc6
->sockc
.transmit_time
;
1248 static int __ip6_append_data(struct sock
*sk
,
1250 struct sk_buff_head
*queue
,
1251 struct inet_cork
*cork
,
1252 struct inet6_cork
*v6_cork
,
1253 struct page_frag
*pfrag
,
1254 int getfrag(void *from
, char *to
, int offset
,
1255 int len
, int odd
, struct sk_buff
*skb
),
1256 void *from
, int length
, int transhdrlen
,
1257 unsigned int flags
, struct ipcm6_cookie
*ipc6
)
1259 struct sk_buff
*skb
, *skb_prev
= NULL
;
1260 unsigned int maxfraglen
, fragheaderlen
, mtu
, orig_mtu
, pmtu
;
1261 struct ubuf_info
*uarg
= NULL
;
1263 int dst_exthdrlen
= 0;
1269 struct rt6_info
*rt
= (struct rt6_info
*)cork
->dst
;
1270 struct ipv6_txoptions
*opt
= v6_cork
->opt
;
1271 int csummode
= CHECKSUM_NONE
;
1272 unsigned int maxnonfragsize
, headersize
;
1273 unsigned int wmem_alloc_delta
= 0;
1274 bool paged
, extra_uref
;
1276 skb
= skb_peek_tail(queue
);
1278 exthdrlen
= opt
? opt
->opt_flen
: 0;
1279 dst_exthdrlen
= rt
->dst
.header_len
- rt
->rt6i_nfheader_len
;
1282 paged
= !!cork
->gso_size
;
1283 mtu
= cork
->gso_size
? IP6_MAX_MTU
: cork
->fragsize
;
1286 if (cork
->tx_flags
& SKBTX_ANY_SW_TSTAMP
&&
1287 sk
->sk_tsflags
& SOF_TIMESTAMPING_OPT_ID
)
1288 tskey
= sk
->sk_tskey
++;
1290 hh_len
= LL_RESERVED_SPACE(rt
->dst
.dev
);
1292 fragheaderlen
= sizeof(struct ipv6hdr
) + rt
->rt6i_nfheader_len
+
1293 (opt
? opt
->opt_nflen
: 0);
1294 maxfraglen
= ((mtu
- fragheaderlen
) & ~7) + fragheaderlen
-
1295 sizeof(struct frag_hdr
);
1297 headersize
= sizeof(struct ipv6hdr
) +
1298 (opt
? opt
->opt_flen
+ opt
->opt_nflen
: 0) +
1299 (dst_allfrag(&rt
->dst
) ?
1300 sizeof(struct frag_hdr
) : 0) +
1301 rt
->rt6i_nfheader_len
;
1303 /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
1304 * the first fragment
1306 if (headersize
+ transhdrlen
> mtu
)
1309 if (cork
->length
+ length
> mtu
- headersize
&& ipc6
->dontfrag
&&
1310 (sk
->sk_protocol
== IPPROTO_UDP
||
1311 sk
->sk_protocol
== IPPROTO_RAW
)) {
1312 ipv6_local_rxpmtu(sk
, fl6
, mtu
- headersize
+
1313 sizeof(struct ipv6hdr
));
1317 if (ip6_sk_ignore_df(sk
))
1318 maxnonfragsize
= sizeof(struct ipv6hdr
) + IPV6_MAXPLEN
;
1320 maxnonfragsize
= mtu
;
1322 if (cork
->length
+ length
> maxnonfragsize
- headersize
) {
1324 pmtu
= max_t(int, mtu
- headersize
+ sizeof(struct ipv6hdr
), 0);
1325 ipv6_local_error(sk
, EMSGSIZE
, fl6
, pmtu
);
1329 /* CHECKSUM_PARTIAL only with no extension headers and when
1330 * we are not going to fragment
1332 if (transhdrlen
&& sk
->sk_protocol
== IPPROTO_UDP
&&
1333 headersize
== sizeof(struct ipv6hdr
) &&
1334 length
<= mtu
- headersize
&&
1335 (!(flags
& MSG_MORE
) || cork
->gso_size
) &&
1336 rt
->dst
.dev
->features
& (NETIF_F_IPV6_CSUM
| NETIF_F_HW_CSUM
))
1337 csummode
= CHECKSUM_PARTIAL
;
1339 if (flags
& MSG_ZEROCOPY
&& length
&& sock_flag(sk
, SOCK_ZEROCOPY
)) {
1340 uarg
= sock_zerocopy_realloc(sk
, length
, skb_zcopy(skb
));
1344 if (rt
->dst
.dev
->features
& NETIF_F_SG
&&
1345 csummode
== CHECKSUM_PARTIAL
) {
1349 skb_zcopy_set(skb
, uarg
, &extra_uref
);
1354 * Let's try using as much space as possible.
1355 * Use MTU if total length of the message fits into the MTU.
1356 * Otherwise, we need to reserve fragment header and
1357 * fragment alignment (= 8-15 octects, in total).
1359 * Note that we may need to "move" the data from the tail of
1360 * of the buffer to the new fragment when we split
1363 * FIXME: It may be fragmented into multiple chunks
1364 * at once if non-fragmentable extension headers
1369 cork
->length
+= length
;
1373 while (length
> 0) {
1374 /* Check if the remaining data fits into current packet. */
1375 copy
= (cork
->length
<= mtu
&& !(cork
->flags
& IPCORK_ALLFRAG
) ? mtu
: maxfraglen
) - skb
->len
;
1377 copy
= maxfraglen
- skb
->len
;
1381 unsigned int datalen
;
1382 unsigned int fraglen
;
1383 unsigned int fraggap
;
1384 unsigned int alloclen
;
1385 unsigned int pagedlen
;
1387 /* There's no room in the current skb */
1389 fraggap
= skb
->len
- maxfraglen
;
1392 /* update mtu and maxfraglen if necessary */
1393 if (!skb
|| !skb_prev
)
1394 ip6_append_data_mtu(&mtu
, &maxfraglen
,
1395 fragheaderlen
, skb
, rt
,
1401 * If remaining data exceeds the mtu,
1402 * we know we need more fragment(s).
1404 datalen
= length
+ fraggap
;
1406 if (datalen
> (cork
->length
<= mtu
&& !(cork
->flags
& IPCORK_ALLFRAG
) ? mtu
: maxfraglen
) - fragheaderlen
)
1407 datalen
= maxfraglen
- fragheaderlen
- rt
->dst
.trailer_len
;
1408 fraglen
= datalen
+ fragheaderlen
;
1411 if ((flags
& MSG_MORE
) &&
1412 !(rt
->dst
.dev
->features
&NETIF_F_SG
))
1417 alloclen
= min_t(int, fraglen
, MAX_HEADER
);
1418 pagedlen
= fraglen
- alloclen
;
1421 alloclen
+= dst_exthdrlen
;
1423 if (datalen
!= length
+ fraggap
) {
1425 * this is not the last fragment, the trailer
1426 * space is regarded as data space.
1428 datalen
+= rt
->dst
.trailer_len
;
1431 alloclen
+= rt
->dst
.trailer_len
;
1432 fraglen
= datalen
+ fragheaderlen
;
1435 * We just reserve space for fragment header.
1436 * Note: this may be overallocation if the message
1437 * (without MSG_MORE) fits into the MTU.
1439 alloclen
+= sizeof(struct frag_hdr
);
1441 copy
= datalen
- transhdrlen
- fraggap
- pagedlen
;
1447 skb
= sock_alloc_send_skb(sk
,
1449 (flags
& MSG_DONTWAIT
), &err
);
1452 if (refcount_read(&sk
->sk_wmem_alloc
) + wmem_alloc_delta
<=
1454 skb
= alloc_skb(alloclen
+ hh_len
,
1462 * Fill in the control structures
1464 skb
->protocol
= htons(ETH_P_IPV6
);
1465 skb
->ip_summed
= csummode
;
1467 /* reserve for fragmentation and ipsec header */
1468 skb_reserve(skb
, hh_len
+ sizeof(struct frag_hdr
) +
1472 * Find where to start putting bytes
1474 data
= skb_put(skb
, fraglen
- pagedlen
);
1475 skb_set_network_header(skb
, exthdrlen
);
1476 data
+= fragheaderlen
;
1477 skb
->transport_header
= (skb
->network_header
+
1480 skb
->csum
= skb_copy_and_csum_bits(
1481 skb_prev
, maxfraglen
,
1482 data
+ transhdrlen
, fraggap
, 0);
1483 skb_prev
->csum
= csum_sub(skb_prev
->csum
,
1486 pskb_trim_unique(skb_prev
, maxfraglen
);
1489 getfrag(from
, data
+ transhdrlen
, offset
,
1490 copy
, fraggap
, skb
) < 0) {
1497 length
-= copy
+ transhdrlen
;
1502 /* Only the initial fragment is time stamped */
1503 skb_shinfo(skb
)->tx_flags
= cork
->tx_flags
;
1505 skb_shinfo(skb
)->tskey
= tskey
;
1507 skb_zcopy_set(skb
, uarg
, &extra_uref
);
1509 if ((flags
& MSG_CONFIRM
) && !skb_prev
)
1510 skb_set_dst_pending_confirm(skb
, 1);
1513 * Put the packet on the pending queue
1515 if (!skb
->destructor
) {
1516 skb
->destructor
= sock_wfree
;
1518 wmem_alloc_delta
+= skb
->truesize
;
1520 __skb_queue_tail(queue
, skb
);
1527 if (!(rt
->dst
.dev
->features
&NETIF_F_SG
) &&
1528 skb_tailroom(skb
) >= copy
) {
1532 if (getfrag(from
, skb_put(skb
, copy
),
1533 offset
, copy
, off
, skb
) < 0) {
1534 __skb_trim(skb
, off
);
1538 } else if (!uarg
|| !uarg
->zerocopy
) {
1539 int i
= skb_shinfo(skb
)->nr_frags
;
1542 if (!sk_page_frag_refill(sk
, pfrag
))
1545 if (!skb_can_coalesce(skb
, i
, pfrag
->page
,
1548 if (i
== MAX_SKB_FRAGS
)
1551 __skb_fill_page_desc(skb
, i
, pfrag
->page
,
1553 skb_shinfo(skb
)->nr_frags
= ++i
;
1554 get_page(pfrag
->page
);
1556 copy
= min_t(int, copy
, pfrag
->size
- pfrag
->offset
);
1558 page_address(pfrag
->page
) + pfrag
->offset
,
1559 offset
, copy
, skb
->len
, skb
) < 0)
1562 pfrag
->offset
+= copy
;
1563 skb_frag_size_add(&skb_shinfo(skb
)->frags
[i
- 1], copy
);
1565 skb
->data_len
+= copy
;
1566 skb
->truesize
+= copy
;
1567 wmem_alloc_delta
+= copy
;
1569 err
= skb_zerocopy_iter_dgram(skb
, from
, copy
);
1577 if (wmem_alloc_delta
)
1578 refcount_add(wmem_alloc_delta
, &sk
->sk_wmem_alloc
);
1585 sock_zerocopy_put_abort(uarg
, extra_uref
);
1586 cork
->length
-= length
;
1587 IP6_INC_STATS(sock_net(sk
), rt
->rt6i_idev
, IPSTATS_MIB_OUTDISCARDS
);
1588 refcount_add(wmem_alloc_delta
, &sk
->sk_wmem_alloc
);
1592 int ip6_append_data(struct sock
*sk
,
1593 int getfrag(void *from
, char *to
, int offset
, int len
,
1594 int odd
, struct sk_buff
*skb
),
1595 void *from
, int length
, int transhdrlen
,
1596 struct ipcm6_cookie
*ipc6
, struct flowi6
*fl6
,
1597 struct rt6_info
*rt
, unsigned int flags
)
1599 struct inet_sock
*inet
= inet_sk(sk
);
1600 struct ipv6_pinfo
*np
= inet6_sk(sk
);
1604 if (flags
&MSG_PROBE
)
1606 if (skb_queue_empty(&sk
->sk_write_queue
)) {
1610 err
= ip6_setup_cork(sk
, &inet
->cork
, &np
->cork
,
1615 exthdrlen
= (ipc6
->opt
? ipc6
->opt
->opt_flen
: 0);
1616 length
+= exthdrlen
;
1617 transhdrlen
+= exthdrlen
;
1619 fl6
= &inet
->cork
.fl
.u
.ip6
;
1623 return __ip6_append_data(sk
, fl6
, &sk
->sk_write_queue
, &inet
->cork
.base
,
1624 &np
->cork
, sk_page_frag(sk
), getfrag
,
1625 from
, length
, transhdrlen
, flags
, ipc6
);
1627 EXPORT_SYMBOL_GPL(ip6_append_data
);
1629 static void ip6_cork_release(struct inet_cork_full
*cork
,
1630 struct inet6_cork
*v6_cork
)
1633 kfree(v6_cork
->opt
->dst0opt
);
1634 kfree(v6_cork
->opt
->dst1opt
);
1635 kfree(v6_cork
->opt
->hopopt
);
1636 kfree(v6_cork
->opt
->srcrt
);
1637 kfree(v6_cork
->opt
);
1638 v6_cork
->opt
= NULL
;
1641 if (cork
->base
.dst
) {
1642 dst_release(cork
->base
.dst
);
1643 cork
->base
.dst
= NULL
;
1644 cork
->base
.flags
&= ~IPCORK_ALLFRAG
;
1646 memset(&cork
->fl
, 0, sizeof(cork
->fl
));
1649 struct sk_buff
*__ip6_make_skb(struct sock
*sk
,
1650 struct sk_buff_head
*queue
,
1651 struct inet_cork_full
*cork
,
1652 struct inet6_cork
*v6_cork
)
1654 struct sk_buff
*skb
, *tmp_skb
;
1655 struct sk_buff
**tail_skb
;
1656 struct in6_addr final_dst_buf
, *final_dst
= &final_dst_buf
;
1657 struct ipv6_pinfo
*np
= inet6_sk(sk
);
1658 struct net
*net
= sock_net(sk
);
1659 struct ipv6hdr
*hdr
;
1660 struct ipv6_txoptions
*opt
= v6_cork
->opt
;
1661 struct rt6_info
*rt
= (struct rt6_info
*)cork
->base
.dst
;
1662 struct flowi6
*fl6
= &cork
->fl
.u
.ip6
;
1663 unsigned char proto
= fl6
->flowi6_proto
;
1665 skb
= __skb_dequeue(queue
);
1668 tail_skb
= &(skb_shinfo(skb
)->frag_list
);
1670 /* move skb->data to ip header from ext header */
1671 if (skb
->data
< skb_network_header(skb
))
1672 __skb_pull(skb
, skb_network_offset(skb
));
1673 while ((tmp_skb
= __skb_dequeue(queue
)) != NULL
) {
1674 __skb_pull(tmp_skb
, skb_network_header_len(skb
));
1675 *tail_skb
= tmp_skb
;
1676 tail_skb
= &(tmp_skb
->next
);
1677 skb
->len
+= tmp_skb
->len
;
1678 skb
->data_len
+= tmp_skb
->len
;
1679 skb
->truesize
+= tmp_skb
->truesize
;
1680 tmp_skb
->destructor
= NULL
;
1684 /* Allow local fragmentation. */
1685 skb
->ignore_df
= ip6_sk_ignore_df(sk
);
1687 *final_dst
= fl6
->daddr
;
1688 __skb_pull(skb
, skb_network_header_len(skb
));
1689 if (opt
&& opt
->opt_flen
)
1690 ipv6_push_frag_opts(skb
, opt
, &proto
);
1691 if (opt
&& opt
->opt_nflen
)
1692 ipv6_push_nfrag_opts(skb
, opt
, &proto
, &final_dst
, &fl6
->saddr
);
1694 skb_push(skb
, sizeof(struct ipv6hdr
));
1695 skb_reset_network_header(skb
);
1696 hdr
= ipv6_hdr(skb
);
1698 ip6_flow_hdr(hdr
, v6_cork
->tclass
,
1699 ip6_make_flowlabel(net
, skb
, fl6
->flowlabel
,
1700 ip6_autoflowlabel(net
, np
), fl6
));
1701 hdr
->hop_limit
= v6_cork
->hop_limit
;
1702 hdr
->nexthdr
= proto
;
1703 hdr
->saddr
= fl6
->saddr
;
1704 hdr
->daddr
= *final_dst
;
1706 skb
->priority
= sk
->sk_priority
;
1707 skb
->mark
= sk
->sk_mark
;
1709 skb
->tstamp
= cork
->base
.transmit_time
;
1711 skb_dst_set(skb
, dst_clone(&rt
->dst
));
1712 IP6_UPD_PO_STATS(net
, rt
->rt6i_idev
, IPSTATS_MIB_OUT
, skb
->len
);
1713 if (proto
== IPPROTO_ICMPV6
) {
1714 struct inet6_dev
*idev
= ip6_dst_idev(skb_dst(skb
));
1716 ICMP6MSGOUT_INC_STATS(net
, idev
, icmp6_hdr(skb
)->icmp6_type
);
1717 ICMP6_INC_STATS(net
, idev
, ICMP6_MIB_OUTMSGS
);
1720 ip6_cork_release(cork
, v6_cork
);
1725 int ip6_send_skb(struct sk_buff
*skb
)
1727 struct net
*net
= sock_net(skb
->sk
);
1728 struct rt6_info
*rt
= (struct rt6_info
*)skb_dst(skb
);
1731 err
= ip6_local_out(net
, skb
->sk
, skb
);
1734 err
= net_xmit_errno(err
);
1736 IP6_INC_STATS(net
, rt
->rt6i_idev
,
1737 IPSTATS_MIB_OUTDISCARDS
);
1743 int ip6_push_pending_frames(struct sock
*sk
)
1745 struct sk_buff
*skb
;
1747 skb
= ip6_finish_skb(sk
);
1751 return ip6_send_skb(skb
);
1753 EXPORT_SYMBOL_GPL(ip6_push_pending_frames
);
1755 static void __ip6_flush_pending_frames(struct sock
*sk
,
1756 struct sk_buff_head
*queue
,
1757 struct inet_cork_full
*cork
,
1758 struct inet6_cork
*v6_cork
)
1760 struct sk_buff
*skb
;
1762 while ((skb
= __skb_dequeue_tail(queue
)) != NULL
) {
1764 IP6_INC_STATS(sock_net(sk
), ip6_dst_idev(skb_dst(skb
)),
1765 IPSTATS_MIB_OUTDISCARDS
);
1769 ip6_cork_release(cork
, v6_cork
);
1772 void ip6_flush_pending_frames(struct sock
*sk
)
1774 __ip6_flush_pending_frames(sk
, &sk
->sk_write_queue
,
1775 &inet_sk(sk
)->cork
, &inet6_sk(sk
)->cork
);
1777 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames
);
1779 struct sk_buff
*ip6_make_skb(struct sock
*sk
,
1780 int getfrag(void *from
, char *to
, int offset
,
1781 int len
, int odd
, struct sk_buff
*skb
),
1782 void *from
, int length
, int transhdrlen
,
1783 struct ipcm6_cookie
*ipc6
, struct flowi6
*fl6
,
1784 struct rt6_info
*rt
, unsigned int flags
,
1785 struct inet_cork_full
*cork
)
1787 struct inet6_cork v6_cork
;
1788 struct sk_buff_head queue
;
1789 int exthdrlen
= (ipc6
->opt
? ipc6
->opt
->opt_flen
: 0);
1792 if (flags
& MSG_PROBE
)
1795 __skb_queue_head_init(&queue
);
1797 cork
->base
.flags
= 0;
1798 cork
->base
.addr
= 0;
1799 cork
->base
.opt
= NULL
;
1800 cork
->base
.dst
= NULL
;
1802 err
= ip6_setup_cork(sk
, cork
, &v6_cork
, ipc6
, rt
, fl6
);
1804 ip6_cork_release(cork
, &v6_cork
);
1805 return ERR_PTR(err
);
1807 if (ipc6
->dontfrag
< 0)
1808 ipc6
->dontfrag
= inet6_sk(sk
)->dontfrag
;
1810 err
= __ip6_append_data(sk
, fl6
, &queue
, &cork
->base
, &v6_cork
,
1811 ¤t
->task_frag
, getfrag
, from
,
1812 length
+ exthdrlen
, transhdrlen
+ exthdrlen
,
1815 __ip6_flush_pending_frames(sk
, &queue
, cork
, &v6_cork
);
1816 return ERR_PTR(err
);
1819 return __ip6_make_skb(sk
, &queue
, cork
, &v6_cork
);