]> git.proxmox.com Git - mirror_ubuntu-kernels.git/blob - net/ipv6/icmp.c
Merge tag 'selinux-pr-20201214' of git://git.kernel.org/pub/scm/linux/kernel/git...
[mirror_ubuntu-kernels.git] / net / ipv6 / icmp.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Internet Control Message Protocol (ICMPv6)
4 * Linux INET6 implementation
5 *
6 * Authors:
7 * Pedro Roque <roque@di.fc.ul.pt>
8 *
9 * Based on net/ipv4/icmp.c
10 *
11 * RFC 1885
12 */
13
14 /*
15 * Changes:
16 *
17 * Andi Kleen : exception handling
18 * Andi Kleen add rate limits. never reply to a icmp.
19 * add more length checks and other fixes.
20 * yoshfuji : ensure to sent parameter problem for
21 * fragments.
22 * YOSHIFUJI Hideaki @USAGI: added sysctl for icmp rate limit.
23 * Randy Dunlap and
24 * YOSHIFUJI Hideaki @USAGI: Per-interface statistics support
25 * Kazunori MIYAZAWA @USAGI: change output process to use ip6_append_data
26 */
27
28 #define pr_fmt(fmt) "IPv6: " fmt
29
30 #include <linux/module.h>
31 #include <linux/errno.h>
32 #include <linux/types.h>
33 #include <linux/socket.h>
34 #include <linux/in.h>
35 #include <linux/kernel.h>
36 #include <linux/sockios.h>
37 #include <linux/net.h>
38 #include <linux/skbuff.h>
39 #include <linux/init.h>
40 #include <linux/netfilter.h>
41 #include <linux/slab.h>
42
43 #ifdef CONFIG_SYSCTL
44 #include <linux/sysctl.h>
45 #endif
46
47 #include <linux/inet.h>
48 #include <linux/netdevice.h>
49 #include <linux/icmpv6.h>
50
51 #include <net/ip.h>
52 #include <net/sock.h>
53
54 #include <net/ipv6.h>
55 #include <net/ip6_checksum.h>
56 #include <net/ping.h>
57 #include <net/protocol.h>
58 #include <net/raw.h>
59 #include <net/rawv6.h>
60 #include <net/transp_v6.h>
61 #include <net/ip6_route.h>
62 #include <net/addrconf.h>
63 #include <net/icmp.h>
64 #include <net/xfrm.h>
65 #include <net/inet_common.h>
66 #include <net/dsfield.h>
67 #include <net/l3mdev.h>
68
69 #include <linux/uaccess.h>
70
71 /*
72 * The ICMP socket(s). This is the most convenient way to flow control
73 * our ICMP output as well as maintain a clean interface throughout
74 * all layers. All Socketless IP sends will soon be gone.
75 *
76 * On SMP we have one ICMP socket per-cpu.
77 */
78 static struct sock *icmpv6_sk(struct net *net)
79 {
80 return this_cpu_read(*net->ipv6.icmp_sk);
81 }
82
83 static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
84 u8 type, u8 code, int offset, __be32 info)
85 {
86 /* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
87 struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
88 struct net *net = dev_net(skb->dev);
89
90 if (type == ICMPV6_PKT_TOOBIG)
91 ip6_update_pmtu(skb, net, info, skb->dev->ifindex, 0, sock_net_uid(net, NULL));
92 else if (type == NDISC_REDIRECT)
93 ip6_redirect(skb, net, skb->dev->ifindex, 0,
94 sock_net_uid(net, NULL));
95
96 if (!(type & ICMPV6_INFOMSG_MASK))
97 if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
98 ping_err(skb, offset, ntohl(info));
99
100 return 0;
101 }
102
103 static int icmpv6_rcv(struct sk_buff *skb);
104
105 static const struct inet6_protocol icmpv6_protocol = {
106 .handler = icmpv6_rcv,
107 .err_handler = icmpv6_err,
108 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
109 };
110
111 /* Called with BH disabled */
112 static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
113 {
114 struct sock *sk;
115
116 sk = icmpv6_sk(net);
117 if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
118 /* This can happen if the output path (f.e. SIT or
119 * ip6ip6 tunnel) signals dst_link_failure() for an
120 * outgoing ICMP6 packet.
121 */
122 return NULL;
123 }
124 return sk;
125 }
126
127 static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
128 {
129 spin_unlock(&sk->sk_lock.slock);
130 }
131
132 /*
133 * Figure out, may we reply to this packet with icmp error.
134 *
135 * We do not reply, if:
136 * - it was icmp error message.
137 * - it is truncated, so that it is known, that protocol is ICMPV6
138 * (i.e. in the middle of some exthdr)
139 *
140 * --ANK (980726)
141 */
142
143 static bool is_ineligible(const struct sk_buff *skb)
144 {
145 int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
146 int len = skb->len - ptr;
147 __u8 nexthdr = ipv6_hdr(skb)->nexthdr;
148 __be16 frag_off;
149
150 if (len < 0)
151 return true;
152
153 ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
154 if (ptr < 0)
155 return false;
156 if (nexthdr == IPPROTO_ICMPV6) {
157 u8 _type, *tp;
158 tp = skb_header_pointer(skb,
159 ptr+offsetof(struct icmp6hdr, icmp6_type),
160 sizeof(_type), &_type);
161
162 /* Based on RFC 8200, Section 4.5 Fragment Header, return
163 * false if this is a fragment packet with no icmp header info.
164 */
165 if (!tp && frag_off != 0)
166 return false;
167 else if (!tp || !(*tp & ICMPV6_INFOMSG_MASK))
168 return true;
169 }
170 return false;
171 }
172
173 static bool icmpv6_mask_allow(struct net *net, int type)
174 {
175 if (type > ICMPV6_MSG_MAX)
176 return true;
177
178 /* Limit if icmp type is set in ratemask. */
179 if (!test_bit(type, net->ipv6.sysctl.icmpv6_ratemask))
180 return true;
181
182 return false;
183 }
184
185 static bool icmpv6_global_allow(struct net *net, int type)
186 {
187 if (icmpv6_mask_allow(net, type))
188 return true;
189
190 if (icmp_global_allow())
191 return true;
192
193 return false;
194 }
195
196 /*
197 * Check the ICMP output rate limit
198 */
199 static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
200 struct flowi6 *fl6)
201 {
202 struct net *net = sock_net(sk);
203 struct dst_entry *dst;
204 bool res = false;
205
206 if (icmpv6_mask_allow(net, type))
207 return true;
208
209 /*
210 * Look up the output route.
211 * XXX: perhaps the expire for routing entries cloned by
212 * this lookup should be more aggressive (not longer than timeout).
213 */
214 dst = ip6_route_output(net, sk, fl6);
215 if (dst->error) {
216 IP6_INC_STATS(net, ip6_dst_idev(dst),
217 IPSTATS_MIB_OUTNOROUTES);
218 } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
219 res = true;
220 } else {
221 struct rt6_info *rt = (struct rt6_info *)dst;
222 int tmo = net->ipv6.sysctl.icmpv6_time;
223 struct inet_peer *peer;
224
225 /* Give more bandwidth to wider prefixes. */
226 if (rt->rt6i_dst.plen < 128)
227 tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
228
229 peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr, 1);
230 res = inet_peer_xrlim_allow(peer, tmo);
231 if (peer)
232 inet_putpeer(peer);
233 }
234 dst_release(dst);
235 return res;
236 }
237
238 static bool icmpv6_rt_has_prefsrc(struct sock *sk, u8 type,
239 struct flowi6 *fl6)
240 {
241 struct net *net = sock_net(sk);
242 struct dst_entry *dst;
243 bool res = false;
244
245 dst = ip6_route_output(net, sk, fl6);
246 if (!dst->error) {
247 struct rt6_info *rt = (struct rt6_info *)dst;
248 struct in6_addr prefsrc;
249
250 rt6_get_prefsrc(rt, &prefsrc);
251 res = !ipv6_addr_any(&prefsrc);
252 }
253 dst_release(dst);
254 return res;
255 }
256
257 /*
258 * an inline helper for the "simple" if statement below
259 * checks if parameter problem report is caused by an
260 * unrecognized IPv6 option that has the Option Type
261 * highest-order two bits set to 10
262 */
263
264 static bool opt_unrec(struct sk_buff *skb, __u32 offset)
265 {
266 u8 _optval, *op;
267
268 offset += skb_network_offset(skb);
269 op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
270 if (!op)
271 return true;
272 return (*op & 0xC0) == 0x80;
273 }
274
275 void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
276 struct icmp6hdr *thdr, int len)
277 {
278 struct sk_buff *skb;
279 struct icmp6hdr *icmp6h;
280
281 skb = skb_peek(&sk->sk_write_queue);
282 if (!skb)
283 return;
284
285 icmp6h = icmp6_hdr(skb);
286 memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
287 icmp6h->icmp6_cksum = 0;
288
289 if (skb_queue_len(&sk->sk_write_queue) == 1) {
290 skb->csum = csum_partial(icmp6h,
291 sizeof(struct icmp6hdr), skb->csum);
292 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
293 &fl6->daddr,
294 len, fl6->flowi6_proto,
295 skb->csum);
296 } else {
297 __wsum tmp_csum = 0;
298
299 skb_queue_walk(&sk->sk_write_queue, skb) {
300 tmp_csum = csum_add(tmp_csum, skb->csum);
301 }
302
303 tmp_csum = csum_partial(icmp6h,
304 sizeof(struct icmp6hdr), tmp_csum);
305 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
306 &fl6->daddr,
307 len, fl6->flowi6_proto,
308 tmp_csum);
309 }
310 ip6_push_pending_frames(sk);
311 }
312
313 struct icmpv6_msg {
314 struct sk_buff *skb;
315 int offset;
316 uint8_t type;
317 };
318
319 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
320 {
321 struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
322 struct sk_buff *org_skb = msg->skb;
323 __wsum csum;
324
325 csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
326 to, len);
327 skb->csum = csum_block_add(skb->csum, csum, odd);
328 if (!(msg->type & ICMPV6_INFOMSG_MASK))
329 nf_ct_attach(skb, org_skb);
330 return 0;
331 }
332
333 #if IS_ENABLED(CONFIG_IPV6_MIP6)
334 static void mip6_addr_swap(struct sk_buff *skb)
335 {
336 struct ipv6hdr *iph = ipv6_hdr(skb);
337 struct inet6_skb_parm *opt = IP6CB(skb);
338 struct ipv6_destopt_hao *hao;
339 struct in6_addr tmp;
340 int off;
341
342 if (opt->dsthao) {
343 off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
344 if (likely(off >= 0)) {
345 hao = (struct ipv6_destopt_hao *)
346 (skb_network_header(skb) + off);
347 tmp = iph->saddr;
348 iph->saddr = hao->addr;
349 hao->addr = tmp;
350 }
351 }
352 }
353 #else
354 static inline void mip6_addr_swap(struct sk_buff *skb) {}
355 #endif
356
357 static struct dst_entry *icmpv6_route_lookup(struct net *net,
358 struct sk_buff *skb,
359 struct sock *sk,
360 struct flowi6 *fl6)
361 {
362 struct dst_entry *dst, *dst2;
363 struct flowi6 fl2;
364 int err;
365
366 err = ip6_dst_lookup(net, sk, &dst, fl6);
367 if (err)
368 return ERR_PTR(err);
369
370 /*
371 * We won't send icmp if the destination is known
372 * anycast.
373 */
374 if (ipv6_anycast_destination(dst, &fl6->daddr)) {
375 net_dbg_ratelimited("icmp6_send: acast source\n");
376 dst_release(dst);
377 return ERR_PTR(-EINVAL);
378 }
379
380 /* No need to clone since we're just using its address. */
381 dst2 = dst;
382
383 dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
384 if (!IS_ERR(dst)) {
385 if (dst != dst2)
386 return dst;
387 } else {
388 if (PTR_ERR(dst) == -EPERM)
389 dst = NULL;
390 else
391 return dst;
392 }
393
394 err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
395 if (err)
396 goto relookup_failed;
397
398 err = ip6_dst_lookup(net, sk, &dst2, &fl2);
399 if (err)
400 goto relookup_failed;
401
402 dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
403 if (!IS_ERR(dst2)) {
404 dst_release(dst);
405 dst = dst2;
406 } else {
407 err = PTR_ERR(dst2);
408 if (err == -EPERM) {
409 dst_release(dst);
410 return dst2;
411 } else
412 goto relookup_failed;
413 }
414
415 relookup_failed:
416 if (dst)
417 return dst;
418 return ERR_PTR(err);
419 }
420
421 static struct net_device *icmp6_dev(const struct sk_buff *skb)
422 {
423 struct net_device *dev = skb->dev;
424
425 /* for local traffic to local address, skb dev is the loopback
426 * device. Check if there is a dst attached to the skb and if so
427 * get the real device index. Same is needed for replies to a link
428 * local address on a device enslaved to an L3 master device
429 */
430 if (unlikely(dev->ifindex == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) {
431 const struct rt6_info *rt6 = skb_rt6_info(skb);
432
433 if (rt6)
434 dev = rt6->rt6i_idev->dev;
435 }
436
437 return dev;
438 }
439
440 static int icmp6_iif(const struct sk_buff *skb)
441 {
442 return icmp6_dev(skb)->ifindex;
443 }
444
445 /*
446 * Send an ICMP message in response to a packet in error
447 */
448 void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
449 const struct in6_addr *force_saddr)
450 {
451 struct inet6_dev *idev = NULL;
452 struct ipv6hdr *hdr = ipv6_hdr(skb);
453 struct sock *sk;
454 struct net *net;
455 struct ipv6_pinfo *np;
456 const struct in6_addr *saddr = NULL;
457 struct dst_entry *dst;
458 struct icmp6hdr tmp_hdr;
459 struct flowi6 fl6;
460 struct icmpv6_msg msg;
461 struct ipcm6_cookie ipc6;
462 int iif = 0;
463 int addr_type = 0;
464 int len;
465 u32 mark;
466
467 if ((u8 *)hdr < skb->head ||
468 (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
469 return;
470
471 if (!skb->dev)
472 return;
473 net = dev_net(skb->dev);
474 mark = IP6_REPLY_MARK(net, skb->mark);
475 /*
476 * Make sure we respect the rules
477 * i.e. RFC 1885 2.4(e)
478 * Rule (e.1) is enforced by not using icmp6_send
479 * in any code that processes icmp errors.
480 */
481 addr_type = ipv6_addr_type(&hdr->daddr);
482
483 if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) ||
484 ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr))
485 saddr = &hdr->daddr;
486
487 /*
488 * Dest addr check
489 */
490
491 if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) {
492 if (type != ICMPV6_PKT_TOOBIG &&
493 !(type == ICMPV6_PARAMPROB &&
494 code == ICMPV6_UNK_OPTION &&
495 (opt_unrec(skb, info))))
496 return;
497
498 saddr = NULL;
499 }
500
501 addr_type = ipv6_addr_type(&hdr->saddr);
502
503 /*
504 * Source addr check
505 */
506
507 if (__ipv6_addr_needs_scope_id(addr_type)) {
508 iif = icmp6_iif(skb);
509 } else {
510 /*
511 * The source device is used for looking up which routing table
512 * to use for sending an ICMP error.
513 */
514 iif = l3mdev_master_ifindex(skb->dev);
515 }
516
517 /*
518 * Must not send error if the source does not uniquely
519 * identify a single node (RFC2463 Section 2.4).
520 * We check unspecified / multicast addresses here,
521 * and anycast addresses will be checked later.
522 */
523 if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
524 net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n",
525 &hdr->saddr, &hdr->daddr);
526 return;
527 }
528
529 /*
530 * Never answer to a ICMP packet.
531 */
532 if (is_ineligible(skb)) {
533 net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n",
534 &hdr->saddr, &hdr->daddr);
535 return;
536 }
537
538 /* Needed by both icmp_global_allow and icmpv6_xmit_lock */
539 local_bh_disable();
540
541 /* Check global sysctl_icmp_msgs_per_sec ratelimit */
542 if (!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, type))
543 goto out_bh_enable;
544
545 mip6_addr_swap(skb);
546
547 sk = icmpv6_xmit_lock(net);
548 if (!sk)
549 goto out_bh_enable;
550
551 memset(&fl6, 0, sizeof(fl6));
552 fl6.flowi6_proto = IPPROTO_ICMPV6;
553 fl6.daddr = hdr->saddr;
554 if (force_saddr)
555 saddr = force_saddr;
556 if (saddr) {
557 fl6.saddr = *saddr;
558 } else if (!icmpv6_rt_has_prefsrc(sk, type, &fl6)) {
559 /* select a more meaningful saddr from input if */
560 struct net_device *in_netdev;
561
562 in_netdev = dev_get_by_index(net, IP6CB(skb)->iif);
563 if (in_netdev) {
564 ipv6_dev_get_saddr(net, in_netdev, &fl6.daddr,
565 inet6_sk(sk)->srcprefs,
566 &fl6.saddr);
567 dev_put(in_netdev);
568 }
569 }
570 fl6.flowi6_mark = mark;
571 fl6.flowi6_oif = iif;
572 fl6.fl6_icmp_type = type;
573 fl6.fl6_icmp_code = code;
574 fl6.flowi6_uid = sock_net_uid(net, NULL);
575 fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL);
576 security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
577
578 np = inet6_sk(sk);
579
580 if (!icmpv6_xrlim_allow(sk, type, &fl6))
581 goto out;
582
583 tmp_hdr.icmp6_type = type;
584 tmp_hdr.icmp6_code = code;
585 tmp_hdr.icmp6_cksum = 0;
586 tmp_hdr.icmp6_pointer = htonl(info);
587
588 if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
589 fl6.flowi6_oif = np->mcast_oif;
590 else if (!fl6.flowi6_oif)
591 fl6.flowi6_oif = np->ucast_oif;
592
593 ipcm6_init_sk(&ipc6, np);
594 ipc6.sockc.mark = mark;
595 fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
596
597 dst = icmpv6_route_lookup(net, skb, sk, &fl6);
598 if (IS_ERR(dst))
599 goto out;
600
601 ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
602
603 msg.skb = skb;
604 msg.offset = skb_network_offset(skb);
605 msg.type = type;
606
607 len = skb->len - msg.offset;
608 len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr));
609 if (len < 0) {
610 net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n",
611 &hdr->saddr, &hdr->daddr);
612 goto out_dst_release;
613 }
614
615 rcu_read_lock();
616 idev = __in6_dev_get(skb->dev);
617
618 if (ip6_append_data(sk, icmpv6_getfrag, &msg,
619 len + sizeof(struct icmp6hdr),
620 sizeof(struct icmp6hdr),
621 &ipc6, &fl6, (struct rt6_info *)dst,
622 MSG_DONTWAIT)) {
623 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
624 ip6_flush_pending_frames(sk);
625 } else {
626 icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
627 len + sizeof(struct icmp6hdr));
628 }
629 rcu_read_unlock();
630 out_dst_release:
631 dst_release(dst);
632 out:
633 icmpv6_xmit_unlock(sk);
634 out_bh_enable:
635 local_bh_enable();
636 }
637 EXPORT_SYMBOL(icmp6_send);
638
639 /* Slightly more convenient version of icmp6_send.
640 */
641 void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
642 {
643 icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL);
644 kfree_skb(skb);
645 }
646
647 /* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
648 * if sufficient data bytes are available
649 * @nhs is the size of the tunnel header(s) :
650 * Either an IPv4 header for SIT encap
651 * an IPv4 header + GRE header for GRE encap
652 */
653 int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
654 unsigned int data_len)
655 {
656 struct in6_addr temp_saddr;
657 struct rt6_info *rt;
658 struct sk_buff *skb2;
659 u32 info = 0;
660
661 if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8))
662 return 1;
663
664 /* RFC 4884 (partial) support for ICMP extensions */
665 if (data_len < 128 || (data_len & 7) || skb->len < data_len)
666 data_len = 0;
667
668 skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC);
669
670 if (!skb2)
671 return 1;
672
673 skb_dst_drop(skb2);
674 skb_pull(skb2, nhs);
675 skb_reset_network_header(skb2);
676
677 rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0,
678 skb, 0);
679
680 if (rt && rt->dst.dev)
681 skb2->dev = rt->dst.dev;
682
683 ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr);
684
685 if (data_len) {
686 /* RFC 4884 (partial) support :
687 * insert 0 padding at the end, before the extensions
688 */
689 __skb_push(skb2, nhs);
690 skb_reset_network_header(skb2);
691 memmove(skb2->data, skb2->data + nhs, data_len - nhs);
692 memset(skb2->data + data_len - nhs, 0, nhs);
693 /* RFC 4884 4.5 : Length is measured in 64-bit words,
694 * and stored in reserved[0]
695 */
696 info = (data_len/8) << 24;
697 }
698 if (type == ICMP_TIME_EXCEEDED)
699 icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
700 info, &temp_saddr);
701 else
702 icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH,
703 info, &temp_saddr);
704 if (rt)
705 ip6_rt_put(rt);
706
707 kfree_skb(skb2);
708
709 return 0;
710 }
711 EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach);
712
713 static void icmpv6_echo_reply(struct sk_buff *skb)
714 {
715 struct net *net = dev_net(skb->dev);
716 struct sock *sk;
717 struct inet6_dev *idev;
718 struct ipv6_pinfo *np;
719 const struct in6_addr *saddr = NULL;
720 struct icmp6hdr *icmph = icmp6_hdr(skb);
721 struct icmp6hdr tmp_hdr;
722 struct flowi6 fl6;
723 struct icmpv6_msg msg;
724 struct dst_entry *dst;
725 struct ipcm6_cookie ipc6;
726 u32 mark = IP6_REPLY_MARK(net, skb->mark);
727 bool acast;
728
729 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) &&
730 net->ipv6.sysctl.icmpv6_echo_ignore_multicast)
731 return;
732
733 saddr = &ipv6_hdr(skb)->daddr;
734
735 acast = ipv6_anycast_destination(skb_dst(skb), saddr);
736 if (acast && net->ipv6.sysctl.icmpv6_echo_ignore_anycast)
737 return;
738
739 if (!ipv6_unicast_destination(skb) &&
740 !(net->ipv6.sysctl.anycast_src_echo_reply && acast))
741 saddr = NULL;
742
743 memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
744 tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
745
746 memset(&fl6, 0, sizeof(fl6));
747 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ICMPV6_ECHO_REPLIES)
748 fl6.flowlabel = ip6_flowlabel(ipv6_hdr(skb));
749
750 fl6.flowi6_proto = IPPROTO_ICMPV6;
751 fl6.daddr = ipv6_hdr(skb)->saddr;
752 if (saddr)
753 fl6.saddr = *saddr;
754 fl6.flowi6_oif = icmp6_iif(skb);
755 fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
756 fl6.flowi6_mark = mark;
757 fl6.flowi6_uid = sock_net_uid(net, NULL);
758 security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
759
760 local_bh_disable();
761 sk = icmpv6_xmit_lock(net);
762 if (!sk)
763 goto out_bh_enable;
764 np = inet6_sk(sk);
765
766 if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
767 fl6.flowi6_oif = np->mcast_oif;
768 else if (!fl6.flowi6_oif)
769 fl6.flowi6_oif = np->ucast_oif;
770
771 if (ip6_dst_lookup(net, sk, &dst, &fl6))
772 goto out;
773 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
774 if (IS_ERR(dst))
775 goto out;
776
777 /* Check the ratelimit */
778 if ((!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY)) ||
779 !icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6))
780 goto out_dst_release;
781
782 idev = __in6_dev_get(skb->dev);
783
784 msg.skb = skb;
785 msg.offset = 0;
786 msg.type = ICMPV6_ECHO_REPLY;
787
788 ipcm6_init_sk(&ipc6, np);
789 ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
790 ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
791 ipc6.sockc.mark = mark;
792
793 if (ip6_append_data(sk, icmpv6_getfrag, &msg,
794 skb->len + sizeof(struct icmp6hdr),
795 sizeof(struct icmp6hdr), &ipc6, &fl6,
796 (struct rt6_info *)dst, MSG_DONTWAIT)) {
797 __ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
798 ip6_flush_pending_frames(sk);
799 } else {
800 icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
801 skb->len + sizeof(struct icmp6hdr));
802 }
803 out_dst_release:
804 dst_release(dst);
805 out:
806 icmpv6_xmit_unlock(sk);
807 out_bh_enable:
808 local_bh_enable();
809 }
810
811 void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
812 {
813 const struct inet6_protocol *ipprot;
814 int inner_offset;
815 __be16 frag_off;
816 u8 nexthdr;
817 struct net *net = dev_net(skb->dev);
818
819 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
820 goto out;
821
822 nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
823 if (ipv6_ext_hdr(nexthdr)) {
824 /* now skip over extension headers */
825 inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
826 &nexthdr, &frag_off);
827 if (inner_offset < 0)
828 goto out;
829 } else {
830 inner_offset = sizeof(struct ipv6hdr);
831 }
832
833 /* Checkin header including 8 bytes of inner protocol header. */
834 if (!pskb_may_pull(skb, inner_offset+8))
835 goto out;
836
837 /* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
838 Without this we will not able f.e. to make source routed
839 pmtu discovery.
840 Corresponding argument (opt) to notifiers is already added.
841 --ANK (980726)
842 */
843
844 ipprot = rcu_dereference(inet6_protos[nexthdr]);
845 if (ipprot && ipprot->err_handler)
846 ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
847
848 raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
849 return;
850
851 out:
852 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
853 }
854
855 /*
856 * Handle icmp messages
857 */
858
859 static int icmpv6_rcv(struct sk_buff *skb)
860 {
861 struct net *net = dev_net(skb->dev);
862 struct net_device *dev = icmp6_dev(skb);
863 struct inet6_dev *idev = __in6_dev_get(dev);
864 const struct in6_addr *saddr, *daddr;
865 struct icmp6hdr *hdr;
866 u8 type;
867 bool success = false;
868
869 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
870 struct sec_path *sp = skb_sec_path(skb);
871 int nh;
872
873 if (!(sp && sp->xvec[sp->len - 1]->props.flags &
874 XFRM_STATE_ICMP))
875 goto drop_no_count;
876
877 if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
878 goto drop_no_count;
879
880 nh = skb_network_offset(skb);
881 skb_set_network_header(skb, sizeof(*hdr));
882
883 if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
884 goto drop_no_count;
885
886 skb_set_network_header(skb, nh);
887 }
888
889 __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS);
890
891 saddr = &ipv6_hdr(skb)->saddr;
892 daddr = &ipv6_hdr(skb)->daddr;
893
894 if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
895 net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n",
896 saddr, daddr);
897 goto csum_error;
898 }
899
900 if (!pskb_pull(skb, sizeof(*hdr)))
901 goto discard_it;
902
903 hdr = icmp6_hdr(skb);
904
905 type = hdr->icmp6_type;
906
907 ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type);
908
909 switch (type) {
910 case ICMPV6_ECHO_REQUEST:
911 if (!net->ipv6.sysctl.icmpv6_echo_ignore_all)
912 icmpv6_echo_reply(skb);
913 break;
914
915 case ICMPV6_ECHO_REPLY:
916 success = ping_rcv(skb);
917 break;
918
919 case ICMPV6_PKT_TOOBIG:
920 /* BUGGG_FUTURE: if packet contains rthdr, we cannot update
921 standard destination cache. Seems, only "advanced"
922 destination cache will allow to solve this problem
923 --ANK (980726)
924 */
925 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
926 goto discard_it;
927 hdr = icmp6_hdr(skb);
928
929 /* to notify */
930 fallthrough;
931 case ICMPV6_DEST_UNREACH:
932 case ICMPV6_TIME_EXCEED:
933 case ICMPV6_PARAMPROB:
934 icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
935 break;
936
937 case NDISC_ROUTER_SOLICITATION:
938 case NDISC_ROUTER_ADVERTISEMENT:
939 case NDISC_NEIGHBOUR_SOLICITATION:
940 case NDISC_NEIGHBOUR_ADVERTISEMENT:
941 case NDISC_REDIRECT:
942 ndisc_rcv(skb);
943 break;
944
945 case ICMPV6_MGM_QUERY:
946 igmp6_event_query(skb);
947 break;
948
949 case ICMPV6_MGM_REPORT:
950 igmp6_event_report(skb);
951 break;
952
953 case ICMPV6_MGM_REDUCTION:
954 case ICMPV6_NI_QUERY:
955 case ICMPV6_NI_REPLY:
956 case ICMPV6_MLD2_REPORT:
957 case ICMPV6_DHAAD_REQUEST:
958 case ICMPV6_DHAAD_REPLY:
959 case ICMPV6_MOBILE_PREFIX_SOL:
960 case ICMPV6_MOBILE_PREFIX_ADV:
961 break;
962
963 default:
964 /* informational */
965 if (type & ICMPV6_INFOMSG_MASK)
966 break;
967
968 net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n",
969 saddr, daddr);
970
971 /*
972 * error of unknown type.
973 * must pass to upper level
974 */
975
976 icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
977 }
978
979 /* until the v6 path can be better sorted assume failure and
980 * preserve the status quo behaviour for the rest of the paths to here
981 */
982 if (success)
983 consume_skb(skb);
984 else
985 kfree_skb(skb);
986
987 return 0;
988
989 csum_error:
990 __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
991 discard_it:
992 __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS);
993 drop_no_count:
994 kfree_skb(skb);
995 return 0;
996 }
997
998 void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
999 u8 type,
1000 const struct in6_addr *saddr,
1001 const struct in6_addr *daddr,
1002 int oif)
1003 {
1004 memset(fl6, 0, sizeof(*fl6));
1005 fl6->saddr = *saddr;
1006 fl6->daddr = *daddr;
1007 fl6->flowi6_proto = IPPROTO_ICMPV6;
1008 fl6->fl6_icmp_type = type;
1009 fl6->fl6_icmp_code = 0;
1010 fl6->flowi6_oif = oif;
1011 security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6));
1012 }
1013
1014 static void __net_exit icmpv6_sk_exit(struct net *net)
1015 {
1016 int i;
1017
1018 for_each_possible_cpu(i)
1019 inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv6.icmp_sk, i));
1020 free_percpu(net->ipv6.icmp_sk);
1021 }
1022
1023 static int __net_init icmpv6_sk_init(struct net *net)
1024 {
1025 struct sock *sk;
1026 int err, i;
1027
1028 net->ipv6.icmp_sk = alloc_percpu(struct sock *);
1029 if (!net->ipv6.icmp_sk)
1030 return -ENOMEM;
1031
1032 for_each_possible_cpu(i) {
1033 err = inet_ctl_sock_create(&sk, PF_INET6,
1034 SOCK_RAW, IPPROTO_ICMPV6, net);
1035 if (err < 0) {
1036 pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
1037 err);
1038 goto fail;
1039 }
1040
1041 *per_cpu_ptr(net->ipv6.icmp_sk, i) = sk;
1042
1043 /* Enough space for 2 64K ICMP packets, including
1044 * sk_buff struct overhead.
1045 */
1046 sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
1047 }
1048 return 0;
1049
1050 fail:
1051 icmpv6_sk_exit(net);
1052 return err;
1053 }
1054
1055 static struct pernet_operations icmpv6_sk_ops = {
1056 .init = icmpv6_sk_init,
1057 .exit = icmpv6_sk_exit,
1058 };
1059
1060 int __init icmpv6_init(void)
1061 {
1062 int err;
1063
1064 err = register_pernet_subsys(&icmpv6_sk_ops);
1065 if (err < 0)
1066 return err;
1067
1068 err = -EAGAIN;
1069 if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
1070 goto fail;
1071
1072 err = inet6_register_icmp_sender(icmp6_send);
1073 if (err)
1074 goto sender_reg_err;
1075 return 0;
1076
1077 sender_reg_err:
1078 inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1079 fail:
1080 pr_err("Failed to register ICMP6 protocol\n");
1081 unregister_pernet_subsys(&icmpv6_sk_ops);
1082 return err;
1083 }
1084
1085 void icmpv6_cleanup(void)
1086 {
1087 inet6_unregister_icmp_sender(icmp6_send);
1088 unregister_pernet_subsys(&icmpv6_sk_ops);
1089 inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1090 }
1091
1092
1093 static const struct icmp6_err {
1094 int err;
1095 int fatal;
1096 } tab_unreach[] = {
1097 { /* NOROUTE */
1098 .err = ENETUNREACH,
1099 .fatal = 0,
1100 },
1101 { /* ADM_PROHIBITED */
1102 .err = EACCES,
1103 .fatal = 1,
1104 },
1105 { /* Was NOT_NEIGHBOUR, now reserved */
1106 .err = EHOSTUNREACH,
1107 .fatal = 0,
1108 },
1109 { /* ADDR_UNREACH */
1110 .err = EHOSTUNREACH,
1111 .fatal = 0,
1112 },
1113 { /* PORT_UNREACH */
1114 .err = ECONNREFUSED,
1115 .fatal = 1,
1116 },
1117 { /* POLICY_FAIL */
1118 .err = EACCES,
1119 .fatal = 1,
1120 },
1121 { /* REJECT_ROUTE */
1122 .err = EACCES,
1123 .fatal = 1,
1124 },
1125 };
1126
1127 int icmpv6_err_convert(u8 type, u8 code, int *err)
1128 {
1129 int fatal = 0;
1130
1131 *err = EPROTO;
1132
1133 switch (type) {
1134 case ICMPV6_DEST_UNREACH:
1135 fatal = 1;
1136 if (code < ARRAY_SIZE(tab_unreach)) {
1137 *err = tab_unreach[code].err;
1138 fatal = tab_unreach[code].fatal;
1139 }
1140 break;
1141
1142 case ICMPV6_PKT_TOOBIG:
1143 *err = EMSGSIZE;
1144 break;
1145
1146 case ICMPV6_PARAMPROB:
1147 *err = EPROTO;
1148 fatal = 1;
1149 break;
1150
1151 case ICMPV6_TIME_EXCEED:
1152 *err = EHOSTUNREACH;
1153 break;
1154 }
1155
1156 return fatal;
1157 }
1158 EXPORT_SYMBOL(icmpv6_err_convert);
1159
1160 #ifdef CONFIG_SYSCTL
1161 static struct ctl_table ipv6_icmp_table_template[] = {
1162 {
1163 .procname = "ratelimit",
1164 .data = &init_net.ipv6.sysctl.icmpv6_time,
1165 .maxlen = sizeof(int),
1166 .mode = 0644,
1167 .proc_handler = proc_dointvec_ms_jiffies,
1168 },
1169 {
1170 .procname = "echo_ignore_all",
1171 .data = &init_net.ipv6.sysctl.icmpv6_echo_ignore_all,
1172 .maxlen = sizeof(int),
1173 .mode = 0644,
1174 .proc_handler = proc_dointvec,
1175 },
1176 {
1177 .procname = "echo_ignore_multicast",
1178 .data = &init_net.ipv6.sysctl.icmpv6_echo_ignore_multicast,
1179 .maxlen = sizeof(int),
1180 .mode = 0644,
1181 .proc_handler = proc_dointvec,
1182 },
1183 {
1184 .procname = "echo_ignore_anycast",
1185 .data = &init_net.ipv6.sysctl.icmpv6_echo_ignore_anycast,
1186 .maxlen = sizeof(int),
1187 .mode = 0644,
1188 .proc_handler = proc_dointvec,
1189 },
1190 {
1191 .procname = "ratemask",
1192 .data = &init_net.ipv6.sysctl.icmpv6_ratemask_ptr,
1193 .maxlen = ICMPV6_MSG_MAX + 1,
1194 .mode = 0644,
1195 .proc_handler = proc_do_large_bitmap,
1196 },
1197 { },
1198 };
1199
1200 struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
1201 {
1202 struct ctl_table *table;
1203
1204 table = kmemdup(ipv6_icmp_table_template,
1205 sizeof(ipv6_icmp_table_template),
1206 GFP_KERNEL);
1207
1208 if (table) {
1209 table[0].data = &net->ipv6.sysctl.icmpv6_time;
1210 table[1].data = &net->ipv6.sysctl.icmpv6_echo_ignore_all;
1211 table[2].data = &net->ipv6.sysctl.icmpv6_echo_ignore_multicast;
1212 table[3].data = &net->ipv6.sysctl.icmpv6_echo_ignore_anycast;
1213 table[4].data = &net->ipv6.sysctl.icmpv6_ratemask_ptr;
1214 }
1215 return table;
1216 }
1217 #endif