]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - net/ipv6/icmp.c
net: reduce cycles spend on ICMP replies that gets rate limited
[mirror_ubuntu-bionic-kernel.git] / net / ipv6 / icmp.c
CommitLineData
1da177e4
LT
1/*
2 * Internet Control Message Protocol (ICMPv6)
3 * Linux INET6 implementation
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
1da177e4
LT
8 * Based on net/ipv4/icmp.c
9 *
10 * RFC 1885
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 */
17
18/*
19 * Changes:
20 *
21 * Andi Kleen : exception handling
22 * Andi Kleen add rate limits. never reply to a icmp.
23 * add more length checks and other fixes.
24 * yoshfuji : ensure to sent parameter problem for
25 * fragments.
26 * YOSHIFUJI Hideaki @USAGI: added sysctl for icmp rate limit.
27 * Randy Dunlap and
28 * YOSHIFUJI Hideaki @USAGI: Per-interface statistics support
29 * Kazunori MIYAZAWA @USAGI: change output process to use ip6_append_data
30 */
31
f3213831
JP
32#define pr_fmt(fmt) "IPv6: " fmt
33
1da177e4
LT
34#include <linux/module.h>
35#include <linux/errno.h>
36#include <linux/types.h>
37#include <linux/socket.h>
38#include <linux/in.h>
39#include <linux/kernel.h>
1da177e4
LT
40#include <linux/sockios.h>
41#include <linux/net.h>
42#include <linux/skbuff.h>
43#include <linux/init.h>
763ecff1 44#include <linux/netfilter.h>
5a0e3ad6 45#include <linux/slab.h>
1da177e4
LT
46
47#ifdef CONFIG_SYSCTL
48#include <linux/sysctl.h>
49#endif
50
51#include <linux/inet.h>
52#include <linux/netdevice.h>
53#include <linux/icmpv6.h>
54
55#include <net/ip.h>
56#include <net/sock.h>
57
58#include <net/ipv6.h>
59#include <net/ip6_checksum.h>
6d0bfe22 60#include <net/ping.h>
1da177e4
LT
61#include <net/protocol.h>
62#include <net/raw.h>
63#include <net/rawv6.h>
64#include <net/transp_v6.h>
65#include <net/ip6_route.h>
66#include <net/addrconf.h>
67#include <net/icmp.h>
8b7817f3 68#include <net/xfrm.h>
1ed8516f 69#include <net/inet_common.h>
825edac4 70#include <net/dsfield.h>
ca254490 71#include <net/l3mdev.h>
1da177e4 72
7c0f6ba6 73#include <linux/uaccess.h>
1da177e4 74
1da177e4
LT
75/*
76 * The ICMP socket(s). This is the most convenient way to flow control
77 * our ICMP output as well as maintain a clean interface throughout
78 * all layers. All Socketless IP sends will soon be gone.
79 *
80 * On SMP we have one ICMP socket per-cpu.
81 */
98c6d1b2
DL
82static inline struct sock *icmpv6_sk(struct net *net)
83{
84 return net->ipv6.icmp_sk[smp_processor_id()];
85}
1da177e4 86
6f809da2
SK
87static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
88 u8 type, u8 code, int offset, __be32 info)
89{
6d0bfe22
LC
90 /* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
91 struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
6f809da2
SK
92 struct net *net = dev_net(skb->dev);
93
94 if (type == ICMPV6_PKT_TOOBIG)
e2d118a1 95 ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL));
6f809da2 96 else if (type == NDISC_REDIRECT)
e2d118a1
LC
97 ip6_redirect(skb, net, skb->dev->ifindex, 0,
98 sock_net_uid(net, NULL));
6d0bfe22
LC
99
100 if (!(type & ICMPV6_INFOMSG_MASK))
101 if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
dcb94b88 102 ping_err(skb, offset, ntohl(info));
6f809da2
SK
103}
104
e5bbef20 105static int icmpv6_rcv(struct sk_buff *skb);
1da177e4 106
41135cc8 107static const struct inet6_protocol icmpv6_protocol = {
1da177e4 108 .handler = icmpv6_rcv,
6f809da2 109 .err_handler = icmpv6_err,
8b7817f3 110 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
1da177e4
LT
111};
112
fdc0bde9 113static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
1da177e4 114{
fdc0bde9
DL
115 struct sock *sk;
116
1da177e4
LT
117 local_bh_disable();
118
fdc0bde9 119 sk = icmpv6_sk(net);
405666db 120 if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
1da177e4
LT
121 /* This can happen if the output path (f.e. SIT or
122 * ip6ip6 tunnel) signals dst_link_failure() for an
123 * outgoing ICMP6 packet.
124 */
125 local_bh_enable();
fdc0bde9 126 return NULL;
1da177e4 127 }
fdc0bde9 128 return sk;
1da177e4
LT
129}
130
405666db 131static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
1da177e4 132{
405666db 133 spin_unlock_bh(&sk->sk_lock.slock);
1da177e4
LT
134}
135
1da177e4
LT
136/*
137 * Figure out, may we reply to this packet with icmp error.
138 *
139 * We do not reply, if:
140 * - it was icmp error message.
141 * - it is truncated, so that it is known, that protocol is ICMPV6
142 * (i.e. in the middle of some exthdr)
143 *
144 * --ANK (980726)
145 */
146
a50feda5 147static bool is_ineligible(const struct sk_buff *skb)
1da177e4 148{
0660e03f 149 int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
1da177e4 150 int len = skb->len - ptr;
0660e03f 151 __u8 nexthdr = ipv6_hdr(skb)->nexthdr;
75f2811c 152 __be16 frag_off;
1da177e4
LT
153
154 if (len < 0)
a50feda5 155 return true;
1da177e4 156
75f2811c 157 ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
1da177e4 158 if (ptr < 0)
a50feda5 159 return false;
1da177e4
LT
160 if (nexthdr == IPPROTO_ICMPV6) {
161 u8 _type, *tp;
162 tp = skb_header_pointer(skb,
163 ptr+offsetof(struct icmp6hdr, icmp6_type),
164 sizeof(_type), &_type);
63159f29 165 if (!tp || !(*tp & ICMPV6_INFOMSG_MASK))
a50feda5 166 return true;
1da177e4 167 }
a50feda5 168 return false;
1da177e4
LT
169}
170
c0303efe
JDB
171static bool icmpv6_mask_allow(int type)
172{
173 /* Informational messages are not limited. */
174 if (type & ICMPV6_INFOMSG_MASK)
175 return true;
176
177 /* Do not limit pmtu discovery, it would break it. */
178 if (type == ICMPV6_PKT_TOOBIG)
179 return true;
180
181 return false;
182}
183
184static bool icmpv6_global_allow(int type)
185{
186 if (icmpv6_mask_allow(type))
187 return true;
188
189 if (icmp_global_allow())
190 return true;
191
192 return false;
193}
194
1ab1457c
YH
195/*
196 * Check the ICMP output rate limit
1da177e4 197 */
4cdf507d
ED
198static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
199 struct flowi6 *fl6)
1da177e4 200{
3b1e0a65 201 struct net *net = sock_net(sk);
4cdf507d 202 struct dst_entry *dst;
92d86829 203 bool res = false;
1da177e4 204
c0303efe 205 if (icmpv6_mask_allow(type))
92d86829 206 return true;
1da177e4 207
1ab1457c 208 /*
1da177e4
LT
209 * Look up the output route.
210 * XXX: perhaps the expire for routing entries cloned by
211 * this lookup should be more aggressive (not longer than timeout).
212 */
4c9483b2 213 dst = ip6_route_output(net, sk, fl6);
1da177e4 214 if (dst->error) {
3bd653c8 215 IP6_INC_STATS(net, ip6_dst_idev(dst),
a11d206d 216 IPSTATS_MIB_OUTNOROUTES);
1da177e4 217 } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
92d86829 218 res = true;
1da177e4
LT
219 } else {
220 struct rt6_info *rt = (struct rt6_info *)dst;
9a43b709 221 int tmo = net->ipv6.sysctl.icmpv6_time;
c0303efe 222 struct inet_peer *peer;
1da177e4
LT
223
224 /* Give more bandwidth to wider prefixes. */
225 if (rt->rt6i_dst.plen < 128)
226 tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
227
c0303efe
JDB
228 peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr, 1);
229 res = inet_peer_xrlim_allow(peer, tmo);
230 if (peer)
231 inet_putpeer(peer);
1da177e4
LT
232 }
233 dst_release(dst);
234 return res;
235}
236
237/*
238 * an inline helper for the "simple" if statement below
239 * checks if parameter problem report is caused by an
1ab1457c 240 * unrecognized IPv6 option that has the Option Type
1da177e4
LT
241 * highest-order two bits set to 10
242 */
243
a50feda5 244static bool opt_unrec(struct sk_buff *skb, __u32 offset)
1da177e4
LT
245{
246 u8 _optval, *op;
247
bbe735e4 248 offset += skb_network_offset(skb);
1da177e4 249 op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
63159f29 250 if (!op)
a50feda5 251 return true;
1da177e4
LT
252 return (*op & 0xC0) == 0x80;
253}
254
6d0bfe22
LC
255int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
256 struct icmp6hdr *thdr, int len)
1da177e4
LT
257{
258 struct sk_buff *skb;
259 struct icmp6hdr *icmp6h;
260 int err = 0;
261
e5d08d71 262 skb = skb_peek(&sk->sk_write_queue);
63159f29 263 if (!skb)
1da177e4
LT
264 goto out;
265
cc70ab26 266 icmp6h = icmp6_hdr(skb);
1da177e4
LT
267 memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
268 icmp6h->icmp6_cksum = 0;
269
270 if (skb_queue_len(&sk->sk_write_queue) == 1) {
07f0757a 271 skb->csum = csum_partial(icmp6h,
1da177e4 272 sizeof(struct icmp6hdr), skb->csum);
4c9483b2
DM
273 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
274 &fl6->daddr,
275 len, fl6->flowi6_proto,
1da177e4
LT
276 skb->csum);
277 } else {
868c86bc 278 __wsum tmp_csum = 0;
1da177e4
LT
279
280 skb_queue_walk(&sk->sk_write_queue, skb) {
281 tmp_csum = csum_add(tmp_csum, skb->csum);
282 }
283
07f0757a 284 tmp_csum = csum_partial(icmp6h,
1da177e4 285 sizeof(struct icmp6hdr), tmp_csum);
4c9483b2
DM
286 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
287 &fl6->daddr,
288 len, fl6->flowi6_proto,
868c86bc 289 tmp_csum);
1da177e4 290 }
1da177e4
LT
291 ip6_push_pending_frames(sk);
292out:
293 return err;
294}
295
296struct icmpv6_msg {
297 struct sk_buff *skb;
298 int offset;
763ecff1 299 uint8_t type;
1da177e4
LT
300};
301
302static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
303{
304 struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
305 struct sk_buff *org_skb = msg->skb;
5f92a738 306 __wsum csum = 0;
1da177e4
LT
307
308 csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
309 to, len, csum);
310 skb->csum = csum_block_add(skb->csum, csum, odd);
763ecff1
YK
311 if (!(msg->type & ICMPV6_INFOMSG_MASK))
312 nf_ct_attach(skb, org_skb);
1da177e4
LT
313 return 0;
314}
315
07a93626 316#if IS_ENABLED(CONFIG_IPV6_MIP6)
79383236
MN
317static void mip6_addr_swap(struct sk_buff *skb)
318{
0660e03f 319 struct ipv6hdr *iph = ipv6_hdr(skb);
79383236
MN
320 struct inet6_skb_parm *opt = IP6CB(skb);
321 struct ipv6_destopt_hao *hao;
322 struct in6_addr tmp;
323 int off;
324
325 if (opt->dsthao) {
326 off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
327 if (likely(off >= 0)) {
d56f90a7
ACM
328 hao = (struct ipv6_destopt_hao *)
329 (skb_network_header(skb) + off);
4e3fd7a0
AD
330 tmp = iph->saddr;
331 iph->saddr = hao->addr;
332 hao->addr = tmp;
79383236
MN
333 }
334 }
335}
336#else
337static inline void mip6_addr_swap(struct sk_buff *skb) {}
338#endif
339
e8243534 340static struct dst_entry *icmpv6_route_lookup(struct net *net,
341 struct sk_buff *skb,
342 struct sock *sk,
343 struct flowi6 *fl6)
b42835db
DM
344{
345 struct dst_entry *dst, *dst2;
4c9483b2 346 struct flowi6 fl2;
b42835db
DM
347 int err;
348
343d60aa 349 err = ip6_dst_lookup(net, sk, &dst, fl6);
b42835db
DM
350 if (err)
351 return ERR_PTR(err);
352
353 /*
354 * We won't send icmp if the destination is known
355 * anycast.
356 */
2647a9b0 357 if (ipv6_anycast_destination(dst, &fl6->daddr)) {
ba7a46f1 358 net_dbg_ratelimited("icmp6_send: acast source\n");
b42835db
DM
359 dst_release(dst);
360 return ERR_PTR(-EINVAL);
361 }
362
363 /* No need to clone since we're just using its address. */
364 dst2 = dst;
365
4c9483b2 366 dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
452edd59 367 if (!IS_ERR(dst)) {
b42835db
DM
368 if (dst != dst2)
369 return dst;
452edd59
DM
370 } else {
371 if (PTR_ERR(dst) == -EPERM)
372 dst = NULL;
373 else
374 return dst;
b42835db
DM
375 }
376
4c9483b2 377 err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
b42835db
DM
378 if (err)
379 goto relookup_failed;
380
343d60aa 381 err = ip6_dst_lookup(net, sk, &dst2, &fl2);
b42835db
DM
382 if (err)
383 goto relookup_failed;
384
4c9483b2 385 dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
452edd59 386 if (!IS_ERR(dst2)) {
b42835db
DM
387 dst_release(dst);
388 dst = dst2;
452edd59
DM
389 } else {
390 err = PTR_ERR(dst2);
391 if (err == -EPERM) {
392 dst_release(dst);
393 return dst2;
394 } else
395 goto relookup_failed;
b42835db
DM
396 }
397
398relookup_failed:
399 if (dst)
400 return dst;
401 return ERR_PTR(err);
402}
403
1da177e4
LT
404/*
405 * Send an ICMP message in response to a packet in error
406 */
b1cadc1a
ED
407static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
408 const struct in6_addr *force_saddr)
1da177e4 409{
c346dca1 410 struct net *net = dev_net(skb->dev);
1da177e4 411 struct inet6_dev *idev = NULL;
0660e03f 412 struct ipv6hdr *hdr = ipv6_hdr(skb);
84427d53
YH
413 struct sock *sk;
414 struct ipv6_pinfo *np;
b71d1d42 415 const struct in6_addr *saddr = NULL;
1da177e4
LT
416 struct dst_entry *dst;
417 struct icmp6hdr tmp_hdr;
4c9483b2 418 struct flowi6 fl6;
1da177e4 419 struct icmpv6_msg msg;
c14ac945 420 struct sockcm_cookie sockc_unused = {0};
26879da5 421 struct ipcm6_cookie ipc6;
1da177e4
LT
422 int iif = 0;
423 int addr_type = 0;
424 int len;
1da177e4 425 int err = 0;
e110861f 426 u32 mark = IP6_REPLY_MARK(net, skb->mark);
1da177e4 427
27a884dc 428 if ((u8 *)hdr < skb->head ||
29a3cad5 429 (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
1da177e4
LT
430 return;
431
432 /*
1ab1457c 433 * Make sure we respect the rules
1da177e4 434 * i.e. RFC 1885 2.4(e)
5f5624cf 435 * Rule (e.1) is enforced by not using icmp6_send
1da177e4
LT
436 * in any code that processes icmp errors.
437 */
438 addr_type = ipv6_addr_type(&hdr->daddr);
439
446fab59 440 if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) ||
d94c1f92 441 ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr))
1da177e4
LT
442 saddr = &hdr->daddr;
443
444 /*
445 * Dest addr check
446 */
447
9a6b4b39 448 if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) {
1da177e4 449 if (type != ICMPV6_PKT_TOOBIG &&
1ab1457c
YH
450 !(type == ICMPV6_PARAMPROB &&
451 code == ICMPV6_UNK_OPTION &&
1da177e4
LT
452 (opt_unrec(skb, info))))
453 return;
454
455 saddr = NULL;
456 }
457
458 addr_type = ipv6_addr_type(&hdr->saddr);
459
460 /*
461 * Source addr check
462 */
463
842df073 464 if (__ipv6_addr_needs_scope_id(addr_type))
1da177e4 465 iif = skb->dev->ifindex;
79dc7e3f
DA
466 else {
467 dst = skb_dst(skb);
468 iif = l3mdev_master_ifindex(dst ? dst->dev : skb->dev);
469 }
1da177e4
LT
470
471 /*
8de3351e
YH
472 * Must not send error if the source does not uniquely
473 * identify a single node (RFC2463 Section 2.4).
474 * We check unspecified / multicast addresses here,
475 * and anycast addresses will be checked later.
1da177e4
LT
476 */
477 if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
4b3418fb
BM
478 net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n",
479 &hdr->saddr, &hdr->daddr);
1da177e4
LT
480 return;
481 }
482
1ab1457c 483 /*
1da177e4
LT
484 * Never answer to a ICMP packet.
485 */
486 if (is_ineligible(skb)) {
4b3418fb
BM
487 net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n",
488 &hdr->saddr, &hdr->daddr);
1da177e4
LT
489 return;
490 }
491
79383236
MN
492 mip6_addr_swap(skb);
493
4c9483b2
DM
494 memset(&fl6, 0, sizeof(fl6));
495 fl6.flowi6_proto = IPPROTO_ICMPV6;
4e3fd7a0 496 fl6.daddr = hdr->saddr;
b1cadc1a
ED
497 if (force_saddr)
498 saddr = force_saddr;
1da177e4 499 if (saddr)
4e3fd7a0 500 fl6.saddr = *saddr;
e110861f 501 fl6.flowi6_mark = mark;
4c9483b2 502 fl6.flowi6_oif = iif;
1958b856
DM
503 fl6.fl6_icmp_type = type;
504 fl6.fl6_icmp_code = code;
e2d118a1 505 fl6.flowi6_uid = sock_net_uid(net, NULL);
4c9483b2 506 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
1da177e4 507
fdc0bde9 508 sk = icmpv6_xmit_lock(net);
63159f29 509 if (!sk)
405666db 510 return;
c0303efe
JDB
511
512 if (!icmpv6_global_allow(type))
513 goto out;
514
e110861f 515 sk->sk_mark = mark;
fdc0bde9 516 np = inet6_sk(sk);
405666db 517
4c9483b2 518 if (!icmpv6_xrlim_allow(sk, type, &fl6))
1da177e4
LT
519 goto out;
520
521 tmp_hdr.icmp6_type = type;
522 tmp_hdr.icmp6_code = code;
523 tmp_hdr.icmp6_cksum = 0;
524 tmp_hdr.icmp6_pointer = htonl(info);
525
4c9483b2
DM
526 if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
527 fl6.flowi6_oif = np->mcast_oif;
c4062dfc
EH
528 else if (!fl6.flowi6_oif)
529 fl6.flowi6_oif = np->ucast_oif;
1da177e4 530
38b7097b
HFS
531 ipc6.tclass = np->tclass;
532 fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
533
4c9483b2 534 dst = icmpv6_route_lookup(net, skb, sk, &fl6);
b42835db 535 if (IS_ERR(dst))
1da177e4 536 goto out;
8de3351e 537
26879da5 538 ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
26879da5
WW
539 ipc6.dontfrag = np->dontfrag;
540 ipc6.opt = NULL;
1da177e4
LT
541
542 msg.skb = skb;
bbe735e4 543 msg.offset = skb_network_offset(skb);
763ecff1 544 msg.type = type;
1da177e4
LT
545
546 len = skb->len - msg.offset;
67ba4152 547 len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr));
1da177e4 548 if (len < 0) {
4b3418fb
BM
549 net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n",
550 &hdr->saddr, &hdr->daddr);
1da177e4
LT
551 goto out_dst_release;
552 }
553
cfdf7647
ED
554 rcu_read_lock();
555 idev = __in6_dev_get(skb->dev);
1da177e4
LT
556
557 err = ip6_append_data(sk, icmpv6_getfrag, &msg,
558 len + sizeof(struct icmp6hdr),
26879da5
WW
559 sizeof(struct icmp6hdr),
560 &ipc6, &fl6, (struct rt6_info *)dst,
561 MSG_DONTWAIT, &sockc_unused);
1da177e4 562 if (err) {
43a43b60 563 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
1da177e4 564 ip6_flush_pending_frames(sk);
cfdf7647
ED
565 } else {
566 err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
567 len + sizeof(struct icmp6hdr));
1da177e4 568 }
cfdf7647 569 rcu_read_unlock();
1da177e4
LT
570out_dst_release:
571 dst_release(dst);
572out:
405666db 573 icmpv6_xmit_unlock(sk);
1da177e4 574}
5f5624cf
PS
575
576/* Slightly more convenient version of icmp6_send.
577 */
578void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
579{
b1cadc1a 580 icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL);
5f5624cf
PS
581 kfree_skb(skb);
582}
7159039a 583
5fbba8ac
ED
584/* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
585 * if sufficient data bytes are available
586 * @nhs is the size of the tunnel header(s) :
587 * Either an IPv4 header for SIT encap
588 * an IPv4 header + GRE header for GRE encap
589 */
20e1954f
ED
590int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
591 unsigned int data_len)
5fbba8ac 592{
2d7a3b27 593 struct in6_addr temp_saddr;
5fbba8ac
ED
594 struct rt6_info *rt;
595 struct sk_buff *skb2;
20e1954f 596 u32 info = 0;
5fbba8ac
ED
597
598 if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8))
599 return 1;
600
20e1954f
ED
601 /* RFC 4884 (partial) support for ICMP extensions */
602 if (data_len < 128 || (data_len & 7) || skb->len < data_len)
603 data_len = 0;
604
605 skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC);
5fbba8ac
ED
606
607 if (!skb2)
608 return 1;
609
610 skb_dst_drop(skb2);
611 skb_pull(skb2, nhs);
612 skb_reset_network_header(skb2);
613
614 rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0, 0);
615
616 if (rt && rt->dst.dev)
617 skb2->dev = rt->dst.dev;
618
2d7a3b27 619 ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr);
20e1954f
ED
620
621 if (data_len) {
622 /* RFC 4884 (partial) support :
623 * insert 0 padding at the end, before the extensions
624 */
625 __skb_push(skb2, nhs);
626 skb_reset_network_header(skb2);
627 memmove(skb2->data, skb2->data + nhs, data_len - nhs);
628 memset(skb2->data + data_len - nhs, 0, nhs);
629 /* RFC 4884 4.5 : Length is measured in 64-bit words,
630 * and stored in reserved[0]
631 */
632 info = (data_len/8) << 24;
633 }
2d7a3b27
ED
634 if (type == ICMP_TIME_EXCEEDED)
635 icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
20e1954f 636 info, &temp_saddr);
2d7a3b27
ED
637 else
638 icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH,
20e1954f 639 info, &temp_saddr);
5fbba8ac
ED
640 if (rt)
641 ip6_rt_put(rt);
642
643 kfree_skb(skb2);
644
645 return 0;
646}
647EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach);
648
1da177e4
LT
649static void icmpv6_echo_reply(struct sk_buff *skb)
650{
c346dca1 651 struct net *net = dev_net(skb->dev);
84427d53 652 struct sock *sk;
1da177e4 653 struct inet6_dev *idev;
84427d53 654 struct ipv6_pinfo *np;
b71d1d42 655 const struct in6_addr *saddr = NULL;
cc70ab26 656 struct icmp6hdr *icmph = icmp6_hdr(skb);
1da177e4 657 struct icmp6hdr tmp_hdr;
4c9483b2 658 struct flowi6 fl6;
1da177e4
LT
659 struct icmpv6_msg msg;
660 struct dst_entry *dst;
26879da5 661 struct ipcm6_cookie ipc6;
1da177e4 662 int err = 0;
e110861f 663 u32 mark = IP6_REPLY_MARK(net, skb->mark);
c14ac945 664 struct sockcm_cookie sockc_unused = {0};
1da177e4 665
0660e03f 666 saddr = &ipv6_hdr(skb)->daddr;
1da177e4 667
509aba3b 668 if (!ipv6_unicast_destination(skb) &&
ec35b61e 669 !(net->ipv6.sysctl.anycast_src_echo_reply &&
2647a9b0 670 ipv6_anycast_destination(skb_dst(skb), saddr)))
1da177e4
LT
671 saddr = NULL;
672
673 memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
674 tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
675
4c9483b2
DM
676 memset(&fl6, 0, sizeof(fl6));
677 fl6.flowi6_proto = IPPROTO_ICMPV6;
4e3fd7a0 678 fl6.daddr = ipv6_hdr(skb)->saddr;
1da177e4 679 if (saddr)
4e3fd7a0 680 fl6.saddr = *saddr;
9ff74384 681 fl6.flowi6_oif = skb->dev->ifindex;
1958b856 682 fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
e110861f 683 fl6.flowi6_mark = mark;
e2d118a1 684 fl6.flowi6_uid = sock_net_uid(net, NULL);
4c9483b2 685 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
1da177e4 686
fdc0bde9 687 sk = icmpv6_xmit_lock(net);
63159f29 688 if (!sk)
405666db 689 return;
e110861f 690 sk->sk_mark = mark;
fdc0bde9 691 np = inet6_sk(sk);
405666db 692
4c9483b2
DM
693 if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
694 fl6.flowi6_oif = np->mcast_oif;
c4062dfc
EH
695 else if (!fl6.flowi6_oif)
696 fl6.flowi6_oif = np->ucast_oif;
1da177e4 697
343d60aa 698 err = ip6_dst_lookup(net, sk, &dst, &fl6);
1da177e4
LT
699 if (err)
700 goto out;
4c9483b2 701 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
452edd59 702 if (IS_ERR(dst))
e104411b 703 goto out;
1da177e4 704
cfdf7647 705 idev = __in6_dev_get(skb->dev);
1da177e4
LT
706
707 msg.skb = skb;
708 msg.offset = 0;
763ecff1 709 msg.type = ICMPV6_ECHO_REPLY;
1da177e4 710
26879da5
WW
711 ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
712 ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
713 ipc6.dontfrag = np->dontfrag;
714 ipc6.opt = NULL;
715
1da177e4 716 err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
26879da5 717 sizeof(struct icmp6hdr), &ipc6, &fl6,
a2d91a09 718 (struct rt6_info *)dst, MSG_DONTWAIT,
26879da5 719 &sockc_unused);
1da177e4
LT
720
721 if (err) {
a16292a0 722 __ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
1da177e4 723 ip6_flush_pending_frames(sk);
cfdf7647
ED
724 } else {
725 err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
726 skb->len + sizeof(struct icmp6hdr));
1da177e4 727 }
1da177e4 728 dst_release(dst);
1ab1457c 729out:
405666db 730 icmpv6_xmit_unlock(sk);
1da177e4
LT
731}
732
b94f1c09 733void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
1da177e4 734{
41135cc8 735 const struct inet6_protocol *ipprot;
1da177e4 736 int inner_offset;
75f2811c 737 __be16 frag_off;
f9242b6b 738 u8 nexthdr;
7304fe46 739 struct net *net = dev_net(skb->dev);
1da177e4
LT
740
741 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
7304fe46 742 goto out;
1da177e4
LT
743
744 nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
745 if (ipv6_ext_hdr(nexthdr)) {
746 /* now skip over extension headers */
75f2811c
JG
747 inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
748 &nexthdr, &frag_off);
67ba4152 749 if (inner_offset < 0)
7304fe46 750 goto out;
1da177e4
LT
751 } else {
752 inner_offset = sizeof(struct ipv6hdr);
753 }
754
755 /* Checkin header including 8 bytes of inner protocol header. */
756 if (!pskb_may_pull(skb, inner_offset+8))
7304fe46 757 goto out;
1da177e4 758
1da177e4
LT
759 /* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
760 Without this we will not able f.e. to make source routed
761 pmtu discovery.
762 Corresponding argument (opt) to notifiers is already added.
763 --ANK (980726)
764 */
765
f9242b6b 766 ipprot = rcu_dereference(inet6_protos[nexthdr]);
1da177e4
LT
767 if (ipprot && ipprot->err_handler)
768 ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
1da177e4 769
69d6da0b 770 raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
7304fe46
DJ
771 return;
772
773out:
a16292a0 774 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
1da177e4 775}
1ab1457c 776
1da177e4
LT
777/*
778 * Handle icmp messages
779 */
780
e5bbef20 781static int icmpv6_rcv(struct sk_buff *skb)
1da177e4 782{
1da177e4
LT
783 struct net_device *dev = skb->dev;
784 struct inet6_dev *idev = __in6_dev_get(dev);
b71d1d42 785 const struct in6_addr *saddr, *daddr;
1da177e4 786 struct icmp6hdr *hdr;
d5fdd6ba 787 u8 type;
e3e32170 788 bool success = false;
1da177e4 789
aebcf82c 790 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
def8b4fa 791 struct sec_path *sp = skb_sec_path(skb);
8b7817f3
HX
792 int nh;
793
def8b4fa 794 if (!(sp && sp->xvec[sp->len - 1]->props.flags &
aebcf82c
HX
795 XFRM_STATE_ICMP))
796 goto drop_no_count;
797
81aded24 798 if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
8b7817f3
HX
799 goto drop_no_count;
800
801 nh = skb_network_offset(skb);
802 skb_set_network_header(skb, sizeof(*hdr));
803
804 if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
805 goto drop_no_count;
806
807 skb_set_network_header(skb, nh);
808 }
809
a16292a0 810 __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS);
1da177e4 811
0660e03f
ACM
812 saddr = &ipv6_hdr(skb)->saddr;
813 daddr = &ipv6_hdr(skb)->daddr;
1da177e4 814
39471ac8 815 if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
ba7a46f1
JP
816 net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n",
817 saddr, daddr);
39471ac8 818 goto csum_error;
1da177e4
LT
819 }
820
8cf22943
HX
821 if (!pskb_pull(skb, sizeof(*hdr)))
822 goto discard_it;
1da177e4 823
cc70ab26 824 hdr = icmp6_hdr(skb);
1da177e4
LT
825
826 type = hdr->icmp6_type;
827
f3832ed2 828 ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type);
1da177e4
LT
829
830 switch (type) {
831 case ICMPV6_ECHO_REQUEST:
832 icmpv6_echo_reply(skb);
833 break;
834
835 case ICMPV6_ECHO_REPLY:
e3e32170 836 success = ping_rcv(skb);
1da177e4
LT
837 break;
838
839 case ICMPV6_PKT_TOOBIG:
840 /* BUGGG_FUTURE: if packet contains rthdr, we cannot update
841 standard destination cache. Seems, only "advanced"
842 destination cache will allow to solve this problem
843 --ANK (980726)
844 */
845 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
846 goto discard_it;
cc70ab26 847 hdr = icmp6_hdr(skb);
1da177e4
LT
848
849 /*
850 * Drop through to notify
851 */
852
853 case ICMPV6_DEST_UNREACH:
854 case ICMPV6_TIME_EXCEED:
855 case ICMPV6_PARAMPROB:
856 icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
857 break;
858
859 case NDISC_ROUTER_SOLICITATION:
860 case NDISC_ROUTER_ADVERTISEMENT:
861 case NDISC_NEIGHBOUR_SOLICITATION:
862 case NDISC_NEIGHBOUR_ADVERTISEMENT:
863 case NDISC_REDIRECT:
864 ndisc_rcv(skb);
865 break;
866
867 case ICMPV6_MGM_QUERY:
868 igmp6_event_query(skb);
869 break;
870
871 case ICMPV6_MGM_REPORT:
872 igmp6_event_report(skb);
873 break;
874
875 case ICMPV6_MGM_REDUCTION:
876 case ICMPV6_NI_QUERY:
877 case ICMPV6_NI_REPLY:
878 case ICMPV6_MLD2_REPORT:
879 case ICMPV6_DHAAD_REQUEST:
880 case ICMPV6_DHAAD_REPLY:
881 case ICMPV6_MOBILE_PREFIX_SOL:
882 case ICMPV6_MOBILE_PREFIX_ADV:
883 break;
884
885 default:
1da177e4
LT
886 /* informational */
887 if (type & ICMPV6_INFOMSG_MASK)
888 break;
889
4b3418fb
BM
890 net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n",
891 saddr, daddr);
ea85a0a2 892
1ab1457c
YH
893 /*
894 * error of unknown type.
895 * must pass to upper level
1da177e4
LT
896 */
897
898 icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
3ff50b79
SH
899 }
900
e3e32170
RJ
901 /* until the v6 path can be better sorted assume failure and
902 * preserve the status quo behaviour for the rest of the paths to here
903 */
904 if (success)
905 consume_skb(skb);
906 else
907 kfree_skb(skb);
908
1da177e4
LT
909 return 0;
910
6a5dc9e5 911csum_error:
a16292a0 912 __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
1da177e4 913discard_it:
a16292a0 914 __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS);
8b7817f3 915drop_no_count:
1da177e4
LT
916 kfree_skb(skb);
917 return 0;
918}
919
4c9483b2 920void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
95e41e93
YH
921 u8 type,
922 const struct in6_addr *saddr,
923 const struct in6_addr *daddr,
924 int oif)
925{
4c9483b2 926 memset(fl6, 0, sizeof(*fl6));
4e3fd7a0
AD
927 fl6->saddr = *saddr;
928 fl6->daddr = *daddr;
67ba4152 929 fl6->flowi6_proto = IPPROTO_ICMPV6;
1958b856
DM
930 fl6->fl6_icmp_type = type;
931 fl6->fl6_icmp_code = 0;
4c9483b2
DM
932 fl6->flowi6_oif = oif;
933 security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
95e41e93
YH
934}
935
98c6d1b2 936static int __net_init icmpv6_sk_init(struct net *net)
1da177e4
LT
937{
938 struct sock *sk;
939 int err, i, j;
940
98c6d1b2
DL
941 net->ipv6.icmp_sk =
942 kzalloc(nr_cpu_ids * sizeof(struct sock *), GFP_KERNEL);
63159f29 943 if (!net->ipv6.icmp_sk)
79c91159
DL
944 return -ENOMEM;
945
6f912042 946 for_each_possible_cpu(i) {
1ed8516f
DL
947 err = inet_ctl_sock_create(&sk, PF_INET6,
948 SOCK_RAW, IPPROTO_ICMPV6, net);
1da177e4 949 if (err < 0) {
f3213831 950 pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
1da177e4
LT
951 err);
952 goto fail;
953 }
954
1ed8516f 955 net->ipv6.icmp_sk[i] = sk;
5c8cafd6 956
1da177e4
LT
957 /* Enough space for 2 64K ICMP packets, including
958 * sk_buff struct overhead.
959 */
87fb4b7b 960 sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
1da177e4 961 }
1da177e4
LT
962 return 0;
963
964 fail:
5c8cafd6 965 for (j = 0; j < i; j++)
1ed8516f 966 inet_ctl_sock_destroy(net->ipv6.icmp_sk[j]);
98c6d1b2 967 kfree(net->ipv6.icmp_sk);
1da177e4
LT
968 return err;
969}
970
98c6d1b2 971static void __net_exit icmpv6_sk_exit(struct net *net)
1da177e4
LT
972{
973 int i;
974
6f912042 975 for_each_possible_cpu(i) {
1ed8516f 976 inet_ctl_sock_destroy(net->ipv6.icmp_sk[i]);
1da177e4 977 }
98c6d1b2
DL
978 kfree(net->ipv6.icmp_sk);
979}
980
8ed7edce 981static struct pernet_operations icmpv6_sk_ops = {
67ba4152
IM
982 .init = icmpv6_sk_init,
983 .exit = icmpv6_sk_exit,
98c6d1b2
DL
984};
985
986int __init icmpv6_init(void)
987{
988 int err;
989
990 err = register_pernet_subsys(&icmpv6_sk_ops);
991 if (err < 0)
992 return err;
993
994 err = -EAGAIN;
995 if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
996 goto fail;
5f5624cf
PS
997
998 err = inet6_register_icmp_sender(icmp6_send);
999 if (err)
1000 goto sender_reg_err;
98c6d1b2
DL
1001 return 0;
1002
5f5624cf
PS
1003sender_reg_err:
1004 inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
98c6d1b2 1005fail:
f3213831 1006 pr_err("Failed to register ICMP6 protocol\n");
98c6d1b2
DL
1007 unregister_pernet_subsys(&icmpv6_sk_ops);
1008 return err;
1009}
1010
8ed7edce 1011void icmpv6_cleanup(void)
98c6d1b2 1012{
5f5624cf 1013 inet6_unregister_icmp_sender(icmp6_send);
98c6d1b2 1014 unregister_pernet_subsys(&icmpv6_sk_ops);
1da177e4
LT
1015 inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1016}
1017
98c6d1b2 1018
9b5b5cff 1019static const struct icmp6_err {
1da177e4
LT
1020 int err;
1021 int fatal;
1022} tab_unreach[] = {
1023 { /* NOROUTE */
1024 .err = ENETUNREACH,
1025 .fatal = 0,
1026 },
1027 { /* ADM_PROHIBITED */
1028 .err = EACCES,
1029 .fatal = 1,
1030 },
1031 { /* Was NOT_NEIGHBOUR, now reserved */
1032 .err = EHOSTUNREACH,
1033 .fatal = 0,
1034 },
1035 { /* ADDR_UNREACH */
1036 .err = EHOSTUNREACH,
1037 .fatal = 0,
1038 },
1039 { /* PORT_UNREACH */
1040 .err = ECONNREFUSED,
1041 .fatal = 1,
1042 },
61e76b17
JB
1043 { /* POLICY_FAIL */
1044 .err = EACCES,
1045 .fatal = 1,
1046 },
1047 { /* REJECT_ROUTE */
1048 .err = EACCES,
1049 .fatal = 1,
1050 },
1da177e4
LT
1051};
1052
d5fdd6ba 1053int icmpv6_err_convert(u8 type, u8 code, int *err)
1da177e4
LT
1054{
1055 int fatal = 0;
1056
1057 *err = EPROTO;
1058
1059 switch (type) {
1060 case ICMPV6_DEST_UNREACH:
1061 fatal = 1;
61e76b17 1062 if (code < ARRAY_SIZE(tab_unreach)) {
1da177e4
LT
1063 *err = tab_unreach[code].err;
1064 fatal = tab_unreach[code].fatal;
1065 }
1066 break;
1067
1068 case ICMPV6_PKT_TOOBIG:
1069 *err = EMSGSIZE;
1070 break;
1ab1457c 1071
1da177e4
LT
1072 case ICMPV6_PARAMPROB:
1073 *err = EPROTO;
1074 fatal = 1;
1075 break;
1076
1077 case ICMPV6_TIME_EXCEED:
1078 *err = EHOSTUNREACH;
1079 break;
3ff50b79 1080 }
1da177e4
LT
1081
1082 return fatal;
1083}
7159039a
YH
1084EXPORT_SYMBOL(icmpv6_err_convert);
1085
1da177e4 1086#ifdef CONFIG_SYSCTL
e8243534 1087static struct ctl_table ipv6_icmp_table_template[] = {
1da177e4 1088 {
1da177e4 1089 .procname = "ratelimit",
41a76906 1090 .data = &init_net.ipv6.sysctl.icmpv6_time,
1da177e4
LT
1091 .maxlen = sizeof(int),
1092 .mode = 0644,
6d9f239a 1093 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 1094 },
f8572d8f 1095 { },
1da177e4 1096};
760f2d01 1097
2c8c1e72 1098struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
760f2d01
DL
1099{
1100 struct ctl_table *table;
1101
1102 table = kmemdup(ipv6_icmp_table_template,
1103 sizeof(ipv6_icmp_table_template),
1104 GFP_KERNEL);
5ee09105 1105
c027aab4 1106 if (table)
5ee09105
YH
1107 table[0].data = &net->ipv6.sysctl.icmpv6_time;
1108
760f2d01
DL
1109 return table;
1110}
1da177e4 1111#endif