]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blob - net/ipv6/icmp.c
net/ipv6: Fix linklocal to global address with VRF
[mirror_ubuntu-jammy-kernel.git] / net / ipv6 / icmp.c
1 /*
2 * Internet Control Message Protocol (ICMPv6)
3 * Linux INET6 implementation
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * Based on net/ipv4/icmp.c
9 *
10 * RFC 1885
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 */
17
18 /*
19 * Changes:
20 *
21 * Andi Kleen : exception handling
22 * Andi Kleen add rate limits. never reply to a icmp.
23 * add more length checks and other fixes.
24 * yoshfuji : ensure to sent parameter problem for
25 * fragments.
26 * YOSHIFUJI Hideaki @USAGI: added sysctl for icmp rate limit.
27 * Randy Dunlap and
28 * YOSHIFUJI Hideaki @USAGI: Per-interface statistics support
29 * Kazunori MIYAZAWA @USAGI: change output process to use ip6_append_data
30 */
31
32 #define pr_fmt(fmt) "IPv6: " fmt
33
34 #include <linux/module.h>
35 #include <linux/errno.h>
36 #include <linux/types.h>
37 #include <linux/socket.h>
38 #include <linux/in.h>
39 #include <linux/kernel.h>
40 #include <linux/sockios.h>
41 #include <linux/net.h>
42 #include <linux/skbuff.h>
43 #include <linux/init.h>
44 #include <linux/netfilter.h>
45 #include <linux/slab.h>
46
47 #ifdef CONFIG_SYSCTL
48 #include <linux/sysctl.h>
49 #endif
50
51 #include <linux/inet.h>
52 #include <linux/netdevice.h>
53 #include <linux/icmpv6.h>
54
55 #include <net/ip.h>
56 #include <net/sock.h>
57
58 #include <net/ipv6.h>
59 #include <net/ip6_checksum.h>
60 #include <net/ping.h>
61 #include <net/protocol.h>
62 #include <net/raw.h>
63 #include <net/rawv6.h>
64 #include <net/transp_v6.h>
65 #include <net/ip6_route.h>
66 #include <net/addrconf.h>
67 #include <net/icmp.h>
68 #include <net/xfrm.h>
69 #include <net/inet_common.h>
70 #include <net/dsfield.h>
71 #include <net/l3mdev.h>
72
73 #include <linux/uaccess.h>
74
75 /*
76 * The ICMP socket(s). This is the most convenient way to flow control
77 * our ICMP output as well as maintain a clean interface throughout
78 * all layers. All Socketless IP sends will soon be gone.
79 *
80 * On SMP we have one ICMP socket per-cpu.
81 */
82 static inline struct sock *icmpv6_sk(struct net *net)
83 {
84 return net->ipv6.icmp_sk[smp_processor_id()];
85 }
86
87 static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
88 u8 type, u8 code, int offset, __be32 info)
89 {
90 /* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
91 struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
92 struct net *net = dev_net(skb->dev);
93
94 if (type == ICMPV6_PKT_TOOBIG)
95 ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL));
96 else if (type == NDISC_REDIRECT)
97 ip6_redirect(skb, net, skb->dev->ifindex, 0,
98 sock_net_uid(net, NULL));
99
100 if (!(type & ICMPV6_INFOMSG_MASK))
101 if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
102 ping_err(skb, offset, ntohl(info));
103 }
104
105 static int icmpv6_rcv(struct sk_buff *skb);
106
107 static const struct inet6_protocol icmpv6_protocol = {
108 .handler = icmpv6_rcv,
109 .err_handler = icmpv6_err,
110 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
111 };
112
113 /* Called with BH disabled */
114 static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
115 {
116 struct sock *sk;
117
118 sk = icmpv6_sk(net);
119 if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
120 /* This can happen if the output path (f.e. SIT or
121 * ip6ip6 tunnel) signals dst_link_failure() for an
122 * outgoing ICMP6 packet.
123 */
124 return NULL;
125 }
126 return sk;
127 }
128
129 static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
130 {
131 spin_unlock(&sk->sk_lock.slock);
132 }
133
134 /*
135 * Figure out, may we reply to this packet with icmp error.
136 *
137 * We do not reply, if:
138 * - it was icmp error message.
139 * - it is truncated, so that it is known, that protocol is ICMPV6
140 * (i.e. in the middle of some exthdr)
141 *
142 * --ANK (980726)
143 */
144
145 static bool is_ineligible(const struct sk_buff *skb)
146 {
147 int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
148 int len = skb->len - ptr;
149 __u8 nexthdr = ipv6_hdr(skb)->nexthdr;
150 __be16 frag_off;
151
152 if (len < 0)
153 return true;
154
155 ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
156 if (ptr < 0)
157 return false;
158 if (nexthdr == IPPROTO_ICMPV6) {
159 u8 _type, *tp;
160 tp = skb_header_pointer(skb,
161 ptr+offsetof(struct icmp6hdr, icmp6_type),
162 sizeof(_type), &_type);
163 if (!tp || !(*tp & ICMPV6_INFOMSG_MASK))
164 return true;
165 }
166 return false;
167 }
168
169 static bool icmpv6_mask_allow(int type)
170 {
171 /* Informational messages are not limited. */
172 if (type & ICMPV6_INFOMSG_MASK)
173 return true;
174
175 /* Do not limit pmtu discovery, it would break it. */
176 if (type == ICMPV6_PKT_TOOBIG)
177 return true;
178
179 return false;
180 }
181
182 static bool icmpv6_global_allow(int type)
183 {
184 if (icmpv6_mask_allow(type))
185 return true;
186
187 if (icmp_global_allow())
188 return true;
189
190 return false;
191 }
192
193 /*
194 * Check the ICMP output rate limit
195 */
196 static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
197 struct flowi6 *fl6)
198 {
199 struct net *net = sock_net(sk);
200 struct dst_entry *dst;
201 bool res = false;
202
203 if (icmpv6_mask_allow(type))
204 return true;
205
206 /*
207 * Look up the output route.
208 * XXX: perhaps the expire for routing entries cloned by
209 * this lookup should be more aggressive (not longer than timeout).
210 */
211 dst = ip6_route_output(net, sk, fl6);
212 if (dst->error) {
213 IP6_INC_STATS(net, ip6_dst_idev(dst),
214 IPSTATS_MIB_OUTNOROUTES);
215 } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
216 res = true;
217 } else {
218 struct rt6_info *rt = (struct rt6_info *)dst;
219 int tmo = net->ipv6.sysctl.icmpv6_time;
220 struct inet_peer *peer;
221
222 /* Give more bandwidth to wider prefixes. */
223 if (rt->rt6i_dst.plen < 128)
224 tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
225
226 peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr, 1);
227 res = inet_peer_xrlim_allow(peer, tmo);
228 if (peer)
229 inet_putpeer(peer);
230 }
231 dst_release(dst);
232 return res;
233 }
234
235 /*
236 * an inline helper for the "simple" if statement below
237 * checks if parameter problem report is caused by an
238 * unrecognized IPv6 option that has the Option Type
239 * highest-order two bits set to 10
240 */
241
242 static bool opt_unrec(struct sk_buff *skb, __u32 offset)
243 {
244 u8 _optval, *op;
245
246 offset += skb_network_offset(skb);
247 op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
248 if (!op)
249 return true;
250 return (*op & 0xC0) == 0x80;
251 }
252
253 void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
254 struct icmp6hdr *thdr, int len)
255 {
256 struct sk_buff *skb;
257 struct icmp6hdr *icmp6h;
258
259 skb = skb_peek(&sk->sk_write_queue);
260 if (!skb)
261 return;
262
263 icmp6h = icmp6_hdr(skb);
264 memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
265 icmp6h->icmp6_cksum = 0;
266
267 if (skb_queue_len(&sk->sk_write_queue) == 1) {
268 skb->csum = csum_partial(icmp6h,
269 sizeof(struct icmp6hdr), skb->csum);
270 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
271 &fl6->daddr,
272 len, fl6->flowi6_proto,
273 skb->csum);
274 } else {
275 __wsum tmp_csum = 0;
276
277 skb_queue_walk(&sk->sk_write_queue, skb) {
278 tmp_csum = csum_add(tmp_csum, skb->csum);
279 }
280
281 tmp_csum = csum_partial(icmp6h,
282 sizeof(struct icmp6hdr), tmp_csum);
283 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
284 &fl6->daddr,
285 len, fl6->flowi6_proto,
286 tmp_csum);
287 }
288 ip6_push_pending_frames(sk);
289 }
290
291 struct icmpv6_msg {
292 struct sk_buff *skb;
293 int offset;
294 uint8_t type;
295 };
296
297 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
298 {
299 struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
300 struct sk_buff *org_skb = msg->skb;
301 __wsum csum = 0;
302
303 csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
304 to, len, csum);
305 skb->csum = csum_block_add(skb->csum, csum, odd);
306 if (!(msg->type & ICMPV6_INFOMSG_MASK))
307 nf_ct_attach(skb, org_skb);
308 return 0;
309 }
310
311 #if IS_ENABLED(CONFIG_IPV6_MIP6)
312 static void mip6_addr_swap(struct sk_buff *skb)
313 {
314 struct ipv6hdr *iph = ipv6_hdr(skb);
315 struct inet6_skb_parm *opt = IP6CB(skb);
316 struct ipv6_destopt_hao *hao;
317 struct in6_addr tmp;
318 int off;
319
320 if (opt->dsthao) {
321 off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
322 if (likely(off >= 0)) {
323 hao = (struct ipv6_destopt_hao *)
324 (skb_network_header(skb) + off);
325 tmp = iph->saddr;
326 iph->saddr = hao->addr;
327 hao->addr = tmp;
328 }
329 }
330 }
331 #else
332 static inline void mip6_addr_swap(struct sk_buff *skb) {}
333 #endif
334
335 static struct dst_entry *icmpv6_route_lookup(struct net *net,
336 struct sk_buff *skb,
337 struct sock *sk,
338 struct flowi6 *fl6)
339 {
340 struct dst_entry *dst, *dst2;
341 struct flowi6 fl2;
342 int err;
343
344 err = ip6_dst_lookup(net, sk, &dst, fl6);
345 if (err)
346 return ERR_PTR(err);
347
348 /*
349 * We won't send icmp if the destination is known
350 * anycast.
351 */
352 if (ipv6_anycast_destination(dst, &fl6->daddr)) {
353 net_dbg_ratelimited("icmp6_send: acast source\n");
354 dst_release(dst);
355 return ERR_PTR(-EINVAL);
356 }
357
358 /* No need to clone since we're just using its address. */
359 dst2 = dst;
360
361 dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
362 if (!IS_ERR(dst)) {
363 if (dst != dst2)
364 return dst;
365 } else {
366 if (PTR_ERR(dst) == -EPERM)
367 dst = NULL;
368 else
369 return dst;
370 }
371
372 err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
373 if (err)
374 goto relookup_failed;
375
376 err = ip6_dst_lookup(net, sk, &dst2, &fl2);
377 if (err)
378 goto relookup_failed;
379
380 dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
381 if (!IS_ERR(dst2)) {
382 dst_release(dst);
383 dst = dst2;
384 } else {
385 err = PTR_ERR(dst2);
386 if (err == -EPERM) {
387 dst_release(dst);
388 return dst2;
389 } else
390 goto relookup_failed;
391 }
392
393 relookup_failed:
394 if (dst)
395 return dst;
396 return ERR_PTR(err);
397 }
398
399 static int icmp6_iif(const struct sk_buff *skb)
400 {
401 int iif = skb->dev->ifindex;
402
403 /* for local traffic to local address, skb dev is the loopback
404 * device. Check if there is a dst attached to the skb and if so
405 * get the real device index. Same is needed for replies to a link
406 * local address on a device enslaved to an L3 master device
407 */
408 if (unlikely(iif == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) {
409 const struct rt6_info *rt6 = skb_rt6_info(skb);
410
411 if (rt6)
412 iif = rt6->rt6i_idev->dev->ifindex;
413 }
414
415 return iif;
416 }
417
418 /*
419 * Send an ICMP message in response to a packet in error
420 */
421 static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
422 const struct in6_addr *force_saddr)
423 {
424 struct net *net = dev_net(skb->dev);
425 struct inet6_dev *idev = NULL;
426 struct ipv6hdr *hdr = ipv6_hdr(skb);
427 struct sock *sk;
428 struct ipv6_pinfo *np;
429 const struct in6_addr *saddr = NULL;
430 struct dst_entry *dst;
431 struct icmp6hdr tmp_hdr;
432 struct flowi6 fl6;
433 struct icmpv6_msg msg;
434 struct sockcm_cookie sockc_unused = {0};
435 struct ipcm6_cookie ipc6;
436 int iif = 0;
437 int addr_type = 0;
438 int len;
439 u32 mark = IP6_REPLY_MARK(net, skb->mark);
440
441 if ((u8 *)hdr < skb->head ||
442 (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
443 return;
444
445 /*
446 * Make sure we respect the rules
447 * i.e. RFC 1885 2.4(e)
448 * Rule (e.1) is enforced by not using icmp6_send
449 * in any code that processes icmp errors.
450 */
451 addr_type = ipv6_addr_type(&hdr->daddr);
452
453 if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) ||
454 ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr))
455 saddr = &hdr->daddr;
456
457 /*
458 * Dest addr check
459 */
460
461 if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) {
462 if (type != ICMPV6_PKT_TOOBIG &&
463 !(type == ICMPV6_PARAMPROB &&
464 code == ICMPV6_UNK_OPTION &&
465 (opt_unrec(skb, info))))
466 return;
467
468 saddr = NULL;
469 }
470
471 addr_type = ipv6_addr_type(&hdr->saddr);
472
473 /*
474 * Source addr check
475 */
476
477 if (__ipv6_addr_needs_scope_id(addr_type)) {
478 iif = icmp6_iif(skb);
479 } else {
480 dst = skb_dst(skb);
481 iif = l3mdev_master_ifindex(dst ? dst->dev : skb->dev);
482 }
483
484 /*
485 * Must not send error if the source does not uniquely
486 * identify a single node (RFC2463 Section 2.4).
487 * We check unspecified / multicast addresses here,
488 * and anycast addresses will be checked later.
489 */
490 if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
491 net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n",
492 &hdr->saddr, &hdr->daddr);
493 return;
494 }
495
496 /*
497 * Never answer to a ICMP packet.
498 */
499 if (is_ineligible(skb)) {
500 net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n",
501 &hdr->saddr, &hdr->daddr);
502 return;
503 }
504
505 /* Needed by both icmp_global_allow and icmpv6_xmit_lock */
506 local_bh_disable();
507
508 /* Check global sysctl_icmp_msgs_per_sec ratelimit */
509 if (!(skb->dev->flags&IFF_LOOPBACK) && !icmpv6_global_allow(type))
510 goto out_bh_enable;
511
512 mip6_addr_swap(skb);
513
514 memset(&fl6, 0, sizeof(fl6));
515 fl6.flowi6_proto = IPPROTO_ICMPV6;
516 fl6.daddr = hdr->saddr;
517 if (force_saddr)
518 saddr = force_saddr;
519 if (saddr)
520 fl6.saddr = *saddr;
521 fl6.flowi6_mark = mark;
522 fl6.flowi6_oif = iif;
523 fl6.fl6_icmp_type = type;
524 fl6.fl6_icmp_code = code;
525 fl6.flowi6_uid = sock_net_uid(net, NULL);
526 fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL);
527 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
528
529 sk = icmpv6_xmit_lock(net);
530 if (!sk)
531 goto out_bh_enable;
532
533 sk->sk_mark = mark;
534 np = inet6_sk(sk);
535
536 if (!icmpv6_xrlim_allow(sk, type, &fl6))
537 goto out;
538
539 tmp_hdr.icmp6_type = type;
540 tmp_hdr.icmp6_code = code;
541 tmp_hdr.icmp6_cksum = 0;
542 tmp_hdr.icmp6_pointer = htonl(info);
543
544 if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
545 fl6.flowi6_oif = np->mcast_oif;
546 else if (!fl6.flowi6_oif)
547 fl6.flowi6_oif = np->ucast_oif;
548
549 ipc6.tclass = np->tclass;
550 fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
551
552 dst = icmpv6_route_lookup(net, skb, sk, &fl6);
553 if (IS_ERR(dst))
554 goto out;
555
556 ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
557 ipc6.dontfrag = np->dontfrag;
558 ipc6.opt = NULL;
559
560 msg.skb = skb;
561 msg.offset = skb_network_offset(skb);
562 msg.type = type;
563
564 len = skb->len - msg.offset;
565 len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr));
566 if (len < 0) {
567 net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n",
568 &hdr->saddr, &hdr->daddr);
569 goto out_dst_release;
570 }
571
572 rcu_read_lock();
573 idev = __in6_dev_get(skb->dev);
574
575 if (ip6_append_data(sk, icmpv6_getfrag, &msg,
576 len + sizeof(struct icmp6hdr),
577 sizeof(struct icmp6hdr),
578 &ipc6, &fl6, (struct rt6_info *)dst,
579 MSG_DONTWAIT, &sockc_unused)) {
580 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
581 ip6_flush_pending_frames(sk);
582 } else {
583 icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
584 len + sizeof(struct icmp6hdr));
585 }
586 rcu_read_unlock();
587 out_dst_release:
588 dst_release(dst);
589 out:
590 icmpv6_xmit_unlock(sk);
591 out_bh_enable:
592 local_bh_enable();
593 }
594
595 /* Slightly more convenient version of icmp6_send.
596 */
597 void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
598 {
599 icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL);
600 kfree_skb(skb);
601 }
602
603 /* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
604 * if sufficient data bytes are available
605 * @nhs is the size of the tunnel header(s) :
606 * Either an IPv4 header for SIT encap
607 * an IPv4 header + GRE header for GRE encap
608 */
609 int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
610 unsigned int data_len)
611 {
612 struct in6_addr temp_saddr;
613 struct rt6_info *rt;
614 struct sk_buff *skb2;
615 u32 info = 0;
616
617 if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8))
618 return 1;
619
620 /* RFC 4884 (partial) support for ICMP extensions */
621 if (data_len < 128 || (data_len & 7) || skb->len < data_len)
622 data_len = 0;
623
624 skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC);
625
626 if (!skb2)
627 return 1;
628
629 skb_dst_drop(skb2);
630 skb_pull(skb2, nhs);
631 skb_reset_network_header(skb2);
632
633 rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0,
634 skb, 0);
635
636 if (rt && rt->dst.dev)
637 skb2->dev = rt->dst.dev;
638
639 ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr);
640
641 if (data_len) {
642 /* RFC 4884 (partial) support :
643 * insert 0 padding at the end, before the extensions
644 */
645 __skb_push(skb2, nhs);
646 skb_reset_network_header(skb2);
647 memmove(skb2->data, skb2->data + nhs, data_len - nhs);
648 memset(skb2->data + data_len - nhs, 0, nhs);
649 /* RFC 4884 4.5 : Length is measured in 64-bit words,
650 * and stored in reserved[0]
651 */
652 info = (data_len/8) << 24;
653 }
654 if (type == ICMP_TIME_EXCEEDED)
655 icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
656 info, &temp_saddr);
657 else
658 icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH,
659 info, &temp_saddr);
660 if (rt)
661 ip6_rt_put(rt);
662
663 kfree_skb(skb2);
664
665 return 0;
666 }
667 EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach);
668
669 static void icmpv6_echo_reply(struct sk_buff *skb)
670 {
671 struct net *net = dev_net(skb->dev);
672 struct sock *sk;
673 struct inet6_dev *idev;
674 struct ipv6_pinfo *np;
675 const struct in6_addr *saddr = NULL;
676 struct icmp6hdr *icmph = icmp6_hdr(skb);
677 struct icmp6hdr tmp_hdr;
678 struct flowi6 fl6;
679 struct icmpv6_msg msg;
680 struct dst_entry *dst;
681 struct ipcm6_cookie ipc6;
682 u32 mark = IP6_REPLY_MARK(net, skb->mark);
683 struct sockcm_cookie sockc_unused = {0};
684
685 saddr = &ipv6_hdr(skb)->daddr;
686
687 if (!ipv6_unicast_destination(skb) &&
688 !(net->ipv6.sysctl.anycast_src_echo_reply &&
689 ipv6_anycast_destination(skb_dst(skb), saddr)))
690 saddr = NULL;
691
692 memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
693 tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
694
695 memset(&fl6, 0, sizeof(fl6));
696 fl6.flowi6_proto = IPPROTO_ICMPV6;
697 fl6.daddr = ipv6_hdr(skb)->saddr;
698 if (saddr)
699 fl6.saddr = *saddr;
700 fl6.flowi6_oif = icmp6_iif(skb);
701 fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
702 fl6.flowi6_mark = mark;
703 fl6.flowi6_uid = sock_net_uid(net, NULL);
704 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
705
706 local_bh_disable();
707 sk = icmpv6_xmit_lock(net);
708 if (!sk)
709 goto out_bh_enable;
710 sk->sk_mark = mark;
711 np = inet6_sk(sk);
712
713 if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
714 fl6.flowi6_oif = np->mcast_oif;
715 else if (!fl6.flowi6_oif)
716 fl6.flowi6_oif = np->ucast_oif;
717
718 if (ip6_dst_lookup(net, sk, &dst, &fl6))
719 goto out;
720 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
721 if (IS_ERR(dst))
722 goto out;
723
724 idev = __in6_dev_get(skb->dev);
725
726 msg.skb = skb;
727 msg.offset = 0;
728 msg.type = ICMPV6_ECHO_REPLY;
729
730 ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
731 ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
732 ipc6.dontfrag = np->dontfrag;
733 ipc6.opt = NULL;
734
735 if (ip6_append_data(sk, icmpv6_getfrag, &msg,
736 skb->len + sizeof(struct icmp6hdr),
737 sizeof(struct icmp6hdr), &ipc6, &fl6,
738 (struct rt6_info *)dst, MSG_DONTWAIT,
739 &sockc_unused)) {
740 __ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
741 ip6_flush_pending_frames(sk);
742 } else {
743 icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
744 skb->len + sizeof(struct icmp6hdr));
745 }
746 dst_release(dst);
747 out:
748 icmpv6_xmit_unlock(sk);
749 out_bh_enable:
750 local_bh_enable();
751 }
752
753 void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
754 {
755 const struct inet6_protocol *ipprot;
756 int inner_offset;
757 __be16 frag_off;
758 u8 nexthdr;
759 struct net *net = dev_net(skb->dev);
760
761 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
762 goto out;
763
764 nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
765 if (ipv6_ext_hdr(nexthdr)) {
766 /* now skip over extension headers */
767 inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
768 &nexthdr, &frag_off);
769 if (inner_offset < 0)
770 goto out;
771 } else {
772 inner_offset = sizeof(struct ipv6hdr);
773 }
774
775 /* Checkin header including 8 bytes of inner protocol header. */
776 if (!pskb_may_pull(skb, inner_offset+8))
777 goto out;
778
779 /* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
780 Without this we will not able f.e. to make source routed
781 pmtu discovery.
782 Corresponding argument (opt) to notifiers is already added.
783 --ANK (980726)
784 */
785
786 ipprot = rcu_dereference(inet6_protos[nexthdr]);
787 if (ipprot && ipprot->err_handler)
788 ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
789
790 raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
791 return;
792
793 out:
794 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
795 }
796
797 /*
798 * Handle icmp messages
799 */
800
801 static int icmpv6_rcv(struct sk_buff *skb)
802 {
803 struct net_device *dev = skb->dev;
804 struct inet6_dev *idev = __in6_dev_get(dev);
805 const struct in6_addr *saddr, *daddr;
806 struct icmp6hdr *hdr;
807 u8 type;
808 bool success = false;
809
810 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
811 struct sec_path *sp = skb_sec_path(skb);
812 int nh;
813
814 if (!(sp && sp->xvec[sp->len - 1]->props.flags &
815 XFRM_STATE_ICMP))
816 goto drop_no_count;
817
818 if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
819 goto drop_no_count;
820
821 nh = skb_network_offset(skb);
822 skb_set_network_header(skb, sizeof(*hdr));
823
824 if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
825 goto drop_no_count;
826
827 skb_set_network_header(skb, nh);
828 }
829
830 __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS);
831
832 saddr = &ipv6_hdr(skb)->saddr;
833 daddr = &ipv6_hdr(skb)->daddr;
834
835 if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
836 net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n",
837 saddr, daddr);
838 goto csum_error;
839 }
840
841 if (!pskb_pull(skb, sizeof(*hdr)))
842 goto discard_it;
843
844 hdr = icmp6_hdr(skb);
845
846 type = hdr->icmp6_type;
847
848 ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type);
849
850 switch (type) {
851 case ICMPV6_ECHO_REQUEST:
852 icmpv6_echo_reply(skb);
853 break;
854
855 case ICMPV6_ECHO_REPLY:
856 success = ping_rcv(skb);
857 break;
858
859 case ICMPV6_PKT_TOOBIG:
860 /* BUGGG_FUTURE: if packet contains rthdr, we cannot update
861 standard destination cache. Seems, only "advanced"
862 destination cache will allow to solve this problem
863 --ANK (980726)
864 */
865 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
866 goto discard_it;
867 hdr = icmp6_hdr(skb);
868
869 /* to notify */
870 /* fall through */
871 case ICMPV6_DEST_UNREACH:
872 case ICMPV6_TIME_EXCEED:
873 case ICMPV6_PARAMPROB:
874 icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
875 break;
876
877 case NDISC_ROUTER_SOLICITATION:
878 case NDISC_ROUTER_ADVERTISEMENT:
879 case NDISC_NEIGHBOUR_SOLICITATION:
880 case NDISC_NEIGHBOUR_ADVERTISEMENT:
881 case NDISC_REDIRECT:
882 ndisc_rcv(skb);
883 break;
884
885 case ICMPV6_MGM_QUERY:
886 igmp6_event_query(skb);
887 break;
888
889 case ICMPV6_MGM_REPORT:
890 igmp6_event_report(skb);
891 break;
892
893 case ICMPV6_MGM_REDUCTION:
894 case ICMPV6_NI_QUERY:
895 case ICMPV6_NI_REPLY:
896 case ICMPV6_MLD2_REPORT:
897 case ICMPV6_DHAAD_REQUEST:
898 case ICMPV6_DHAAD_REPLY:
899 case ICMPV6_MOBILE_PREFIX_SOL:
900 case ICMPV6_MOBILE_PREFIX_ADV:
901 break;
902
903 default:
904 /* informational */
905 if (type & ICMPV6_INFOMSG_MASK)
906 break;
907
908 net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n",
909 saddr, daddr);
910
911 /*
912 * error of unknown type.
913 * must pass to upper level
914 */
915
916 icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
917 }
918
919 /* until the v6 path can be better sorted assume failure and
920 * preserve the status quo behaviour for the rest of the paths to here
921 */
922 if (success)
923 consume_skb(skb);
924 else
925 kfree_skb(skb);
926
927 return 0;
928
929 csum_error:
930 __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
931 discard_it:
932 __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS);
933 drop_no_count:
934 kfree_skb(skb);
935 return 0;
936 }
937
938 void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
939 u8 type,
940 const struct in6_addr *saddr,
941 const struct in6_addr *daddr,
942 int oif)
943 {
944 memset(fl6, 0, sizeof(*fl6));
945 fl6->saddr = *saddr;
946 fl6->daddr = *daddr;
947 fl6->flowi6_proto = IPPROTO_ICMPV6;
948 fl6->fl6_icmp_type = type;
949 fl6->fl6_icmp_code = 0;
950 fl6->flowi6_oif = oif;
951 security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
952 }
953
954 static int __net_init icmpv6_sk_init(struct net *net)
955 {
956 struct sock *sk;
957 int err, i, j;
958
959 net->ipv6.icmp_sk =
960 kcalloc(nr_cpu_ids, sizeof(struct sock *), GFP_KERNEL);
961 if (!net->ipv6.icmp_sk)
962 return -ENOMEM;
963
964 for_each_possible_cpu(i) {
965 err = inet_ctl_sock_create(&sk, PF_INET6,
966 SOCK_RAW, IPPROTO_ICMPV6, net);
967 if (err < 0) {
968 pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
969 err);
970 goto fail;
971 }
972
973 net->ipv6.icmp_sk[i] = sk;
974
975 /* Enough space for 2 64K ICMP packets, including
976 * sk_buff struct overhead.
977 */
978 sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
979 }
980 return 0;
981
982 fail:
983 for (j = 0; j < i; j++)
984 inet_ctl_sock_destroy(net->ipv6.icmp_sk[j]);
985 kfree(net->ipv6.icmp_sk);
986 return err;
987 }
988
989 static void __net_exit icmpv6_sk_exit(struct net *net)
990 {
991 int i;
992
993 for_each_possible_cpu(i) {
994 inet_ctl_sock_destroy(net->ipv6.icmp_sk[i]);
995 }
996 kfree(net->ipv6.icmp_sk);
997 }
998
999 static struct pernet_operations icmpv6_sk_ops = {
1000 .init = icmpv6_sk_init,
1001 .exit = icmpv6_sk_exit,
1002 };
1003
1004 int __init icmpv6_init(void)
1005 {
1006 int err;
1007
1008 err = register_pernet_subsys(&icmpv6_sk_ops);
1009 if (err < 0)
1010 return err;
1011
1012 err = -EAGAIN;
1013 if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
1014 goto fail;
1015
1016 err = inet6_register_icmp_sender(icmp6_send);
1017 if (err)
1018 goto sender_reg_err;
1019 return 0;
1020
1021 sender_reg_err:
1022 inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1023 fail:
1024 pr_err("Failed to register ICMP6 protocol\n");
1025 unregister_pernet_subsys(&icmpv6_sk_ops);
1026 return err;
1027 }
1028
1029 void icmpv6_cleanup(void)
1030 {
1031 inet6_unregister_icmp_sender(icmp6_send);
1032 unregister_pernet_subsys(&icmpv6_sk_ops);
1033 inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1034 }
1035
1036
1037 static const struct icmp6_err {
1038 int err;
1039 int fatal;
1040 } tab_unreach[] = {
1041 { /* NOROUTE */
1042 .err = ENETUNREACH,
1043 .fatal = 0,
1044 },
1045 { /* ADM_PROHIBITED */
1046 .err = EACCES,
1047 .fatal = 1,
1048 },
1049 { /* Was NOT_NEIGHBOUR, now reserved */
1050 .err = EHOSTUNREACH,
1051 .fatal = 0,
1052 },
1053 { /* ADDR_UNREACH */
1054 .err = EHOSTUNREACH,
1055 .fatal = 0,
1056 },
1057 { /* PORT_UNREACH */
1058 .err = ECONNREFUSED,
1059 .fatal = 1,
1060 },
1061 { /* POLICY_FAIL */
1062 .err = EACCES,
1063 .fatal = 1,
1064 },
1065 { /* REJECT_ROUTE */
1066 .err = EACCES,
1067 .fatal = 1,
1068 },
1069 };
1070
1071 int icmpv6_err_convert(u8 type, u8 code, int *err)
1072 {
1073 int fatal = 0;
1074
1075 *err = EPROTO;
1076
1077 switch (type) {
1078 case ICMPV6_DEST_UNREACH:
1079 fatal = 1;
1080 if (code < ARRAY_SIZE(tab_unreach)) {
1081 *err = tab_unreach[code].err;
1082 fatal = tab_unreach[code].fatal;
1083 }
1084 break;
1085
1086 case ICMPV6_PKT_TOOBIG:
1087 *err = EMSGSIZE;
1088 break;
1089
1090 case ICMPV6_PARAMPROB:
1091 *err = EPROTO;
1092 fatal = 1;
1093 break;
1094
1095 case ICMPV6_TIME_EXCEED:
1096 *err = EHOSTUNREACH;
1097 break;
1098 }
1099
1100 return fatal;
1101 }
1102 EXPORT_SYMBOL(icmpv6_err_convert);
1103
1104 #ifdef CONFIG_SYSCTL
1105 static struct ctl_table ipv6_icmp_table_template[] = {
1106 {
1107 .procname = "ratelimit",
1108 .data = &init_net.ipv6.sysctl.icmpv6_time,
1109 .maxlen = sizeof(int),
1110 .mode = 0644,
1111 .proc_handler = proc_dointvec_ms_jiffies,
1112 },
1113 { },
1114 };
1115
1116 struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
1117 {
1118 struct ctl_table *table;
1119
1120 table = kmemdup(ipv6_icmp_table_template,
1121 sizeof(ipv6_icmp_table_template),
1122 GFP_KERNEL);
1123
1124 if (table)
1125 table[0].data = &net->ipv6.sysctl.icmpv6_time;
1126
1127 return table;
1128 }
1129 #endif