net/ipv6/icmp.c

   1 // SPDX-License-Identifier: GPL-2.0-or-later
   2 /*
   3  *      Internet Control Message Protocol (ICMPv6)
   4  *      Linux INET6 implementation
   5  *
   6  *      Authors:
   7  *      Pedro Roque             <roque@di.fc.ul.pt>
   8  *
   9  *      Based on net/ipv4/icmp.c
  10  *
  11  *      RFC 1885
  12  */
  13
  14 /*
  15  *      Changes:
  16  *
  17  *      Andi Kleen              :       exception handling
  18  *      Andi Kleen                      add rate limits. never reply to a icmp.
  19  *                                      add more length checks and other fixes.
  20  *      yoshfuji                :       ensure to sent parameter problem for
  21  *                                      fragments.
  22  *      YOSHIFUJI Hideaki @USAGI:       added sysctl for icmp rate limit.
  23  *      Randy Dunlap and
  24  *      YOSHIFUJI Hideaki @USAGI:       Per-interface statistics support
  25  *      Kazunori MIYAZAWA @USAGI:       change output process to use ip6_append_data
  26  */
  27
  28 #define pr_fmt(fmt) "IPv6: " fmt
  29
  30 #include <linux/module.h>
  31 #include <linux/errno.h>
  32 #include <linux/types.h>
  33 #include <linux/socket.h>
  34 #include <linux/in.h>
  35 #include <linux/kernel.h>
  36 #include <linux/sockios.h>
  37 #include <linux/net.h>
  38 #include <linux/skbuff.h>
  39 #include <linux/init.h>
  40 #include <linux/netfilter.h>
  41 #include <linux/slab.h>
  42
  43 #ifdef CONFIG_SYSCTL
  44 #include <linux/sysctl.h>
  45 #endif
  46
  47 #include <linux/inet.h>
  48 #include <linux/netdevice.h>
  49 #include <linux/icmpv6.h>
  50
  51 #include <net/ip.h>
  52 #include <net/sock.h>
  53
  54 #include <net/ipv6.h>
  55 #include <net/ip6_checksum.h>
  56 #include <net/ping.h>
  57 #include <net/protocol.h>
  58 #include <net/raw.h>
  59 #include <net/rawv6.h>
  60 #include <net/transp_v6.h>
  61 #include <net/ip6_route.h>
  62 #include <net/addrconf.h>
  63 #include <net/icmp.h>
  64 #include <net/xfrm.h>
  65 #include <net/inet_common.h>
  66 #include <net/dsfield.h>
  67 #include <net/l3mdev.h>
  68
  69 #include <linux/uaccess.h>
  70
  71 /*
  72  *      The ICMP socket(s). This is the most convenient way to flow control
  73  *      our ICMP output as well as maintain a clean interface throughout
  74  *      all layers. All Socketless IP sends will soon be gone.
  75  *
  76  *      On SMP we have one ICMP socket per-cpu.
  77  */
  78 static struct sock *icmpv6_sk(struct net *net)
  79 {
  80         return this_cpu_read(*net->ipv6.icmp_sk);
  81 }
  82
  83 static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
  84                        u8 type, u8 code, int offset, __be32 info)
  85 {
  86         /* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
  87         struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
  88         struct net *net = dev_net(skb->dev);
  89
  90         if (type == ICMPV6_PKT_TOOBIG)
  91                 ip6_update_pmtu(skb, net, info, skb->dev->ifindex, 0, sock_net_uid(net, NULL));
  92         else if (type == NDISC_REDIRECT)
  93                 ip6_redirect(skb, net, skb->dev->ifindex, 0,
  94                              sock_net_uid(net, NULL));
  95
  96         if (!(type & ICMPV6_INFOMSG_MASK))
  97                 if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
  98                         ping_err(skb, offset, ntohl(info));
  99
 100         return 0;
 101 }
 102
 103 static int icmpv6_rcv(struct sk_buff *skb);
 104
 105 static const struct inet6_protocol icmpv6_protocol = {
 106         .handler        =       icmpv6_rcv,
 107         .err_handler    =       icmpv6_err,
 108         .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
 109 };
 110
 111 /* Called with BH disabled */
 112 static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
 113 {
 114         struct sock *sk;
 115
 116         sk = icmpv6_sk(net);
 117         if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
 118                 /* This can happen if the output path (f.e. SIT or
 119                  * ip6ip6 tunnel) signals dst_link_failure() for an
 120                  * outgoing ICMP6 packet.
 121                  */
 122                 return NULL;
 123         }
 124         return sk;
 125 }
 126
 127 static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
 128 {
 129         spin_unlock(&sk->sk_lock.slock);
 130 }
 131
 132 /*
 133  * Figure out, may we reply to this packet with icmp error.
 134  *
 135  * We do not reply, if:
 136  *      - it was icmp error message.
 137  *      - it is truncated, so that it is known, that protocol is ICMPV6
 138  *        (i.e. in the middle of some exthdr)
 139  *
 140  *      --ANK (980726)
 141  */
 142
 143 static bool is_ineligible(const struct sk_buff *skb)
 144 {
 145         int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
 146         int len = skb->len - ptr;
 147         __u8 nexthdr = ipv6_hdr(skb)->nexthdr;
 148         __be16 frag_off;
 149
 150         if (len < 0)
 151                 return true;
 152
 153         ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
 154         if (ptr < 0)
 155                 return false;
 156         if (nexthdr == IPPROTO_ICMPV6) {
 157                 u8 _type, *tp;
 158                 tp = skb_header_pointer(skb,
 159                         ptr+offsetof(struct icmp6hdr, icmp6_type),
 160                         sizeof(_type), &_type);
 161
 162                 /* Based on RFC 8200, Section 4.5 Fragment Header, return
 163                  * false if this is a fragment packet with no icmp header info.
 164                  */
 165                 if (!tp && frag_off != 0)
 166                         return false;
 167                 else if (!tp || !(*tp & ICMPV6_INFOMSG_MASK))
 168                         return true;
 169         }
 170         return false;
 171 }
 172
 173 static bool icmpv6_mask_allow(struct net *net, int type)
 174 {
 175         if (type > ICMPV6_MSG_MAX)
 176                 return true;
 177
 178         /* Limit if icmp type is set in ratemask. */
 179         if (!test_bit(type, net->ipv6.sysctl.icmpv6_ratemask))
 180                 return true;
 181
 182         return false;
 183 }
 184
 185 static bool icmpv6_global_allow(struct net *net, int type)
 186 {
 187         if (icmpv6_mask_allow(net, type))
 188                 return true;
 189
 190         if (icmp_global_allow())
 191                 return true;
 192
 193         return false;
 194 }
 195
 196 /*
 197  * Check the ICMP output rate limit
 198  */
 199 static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
 200                                struct flowi6 *fl6)
 201 {
 202         struct net *net = sock_net(sk);
 203         struct dst_entry *dst;
 204         bool res = false;
 205
 206         if (icmpv6_mask_allow(net, type))
 207                 return true;
 208
 209         /*
 210          * Look up the output route.
 211          * XXX: perhaps the expire for routing entries cloned by
 212          * this lookup should be more aggressive (not longer than timeout).
 213          */
 214         dst = ip6_route_output(net, sk, fl6);
 215         if (dst->error) {
 216                 IP6_INC_STATS(net, ip6_dst_idev(dst),
 217                               IPSTATS_MIB_OUTNOROUTES);
 218         } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
 219                 res = true;
 220         } else {
 221                 struct rt6_info *rt = (struct rt6_info *)dst;
 222                 int tmo = net->ipv6.sysctl.icmpv6_time;
 223                 struct inet_peer *peer;
 224
 225                 /* Give more bandwidth to wider prefixes. */
 226                 if (rt->rt6i_dst.plen < 128)
 227                         tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
 228
 229                 peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr, 1);
 230                 res = inet_peer_xrlim_allow(peer, tmo);
 231                 if (peer)
 232                         inet_putpeer(peer);
 233         }
 234         dst_release(dst);
 235         return res;
 236 }
 237
 238 static bool icmpv6_rt_has_prefsrc(struct sock *sk, u8 type,
 239                                   struct flowi6 *fl6)
 240 {
 241         struct net *net = sock_net(sk);
 242         struct dst_entry *dst;
 243         bool res = false;
 244
 245         dst = ip6_route_output(net, sk, fl6);
 246         if (!dst->error) {
 247                 struct rt6_info *rt = (struct rt6_info *)dst;
 248                 struct in6_addr prefsrc;
 249
 250                 rt6_get_prefsrc(rt, &prefsrc);
 251                 res = !ipv6_addr_any(&prefsrc);
 252         }
 253         dst_release(dst);
 254         return res;
 255 }
 256
 257 /*
 258  *      an inline helper for the "simple" if statement below
 259  *      checks if parameter problem report is caused by an
 260  *      unrecognized IPv6 option that has the Option Type
 261  *      highest-order two bits set to 10
 262  */
 263
 264 static bool opt_unrec(struct sk_buff *skb, __u32 offset)
 265 {
 266         u8 _optval, *op;
 267
 268         offset += skb_network_offset(skb);
 269         op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
 270         if (!op)
 271                 return true;
 272         return (*op & 0xC0) == 0x80;
 273 }
 274
 275 void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
 276                                 struct icmp6hdr *thdr, int len)
 277 {
 278         struct sk_buff *skb;
 279         struct icmp6hdr *icmp6h;
 280
 281         skb = skb_peek(&sk->sk_write_queue);
 282         if (!skb)
 283                 return;
 284
 285         icmp6h = icmp6_hdr(skb);
 286         memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
 287         icmp6h->icmp6_cksum = 0;
 288
 289         if (skb_queue_len(&sk->sk_write_queue) == 1) {
 290                 skb->csum = csum_partial(icmp6h,
 291                                         sizeof(struct icmp6hdr), skb->csum);
 292                 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
 293                                                       &fl6->daddr,
 294                                                       len, fl6->flowi6_proto,
 295                                                       skb->csum);
 296         } else {
 297                 __wsum tmp_csum = 0;
 298
 299                 skb_queue_walk(&sk->sk_write_queue, skb) {
 300                         tmp_csum = csum_add(tmp_csum, skb->csum);
 301                 }
 302
 303                 tmp_csum = csum_partial(icmp6h,
 304                                         sizeof(struct icmp6hdr), tmp_csum);
 305                 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
 306                                                       &fl6->daddr,
 307                                                       len, fl6->flowi6_proto,
 308                                                       tmp_csum);
 309         }
 310         ip6_push_pending_frames(sk);
 311 }
 312
 313 struct icmpv6_msg {
 314         struct sk_buff  *skb;
 315         int             offset;
 316         uint8_t         type;
 317 };
 318
 319 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
 320 {
 321         struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
 322         struct sk_buff *org_skb = msg->skb;
 323         __wsum csum;
 324
 325         csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
 326                                       to, len);
 327         skb->csum = csum_block_add(skb->csum, csum, odd);
 328         if (!(msg->type & ICMPV6_INFOMSG_MASK))
 329                 nf_ct_attach(skb, org_skb);
 330         return 0;
 331 }
 332
 333 #if IS_ENABLED(CONFIG_IPV6_MIP6)
 334 static void mip6_addr_swap(struct sk_buff *skb)
 335 {
 336         struct ipv6hdr *iph = ipv6_hdr(skb);
 337         struct inet6_skb_parm *opt = IP6CB(skb);
 338         struct ipv6_destopt_hao *hao;
 339         struct in6_addr tmp;
 340         int off;
 341
 342         if (opt->dsthao) {
 343                 off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
 344                 if (likely(off >= 0)) {
 345                         hao = (struct ipv6_destopt_hao *)
 346                                         (skb_network_header(skb) + off);
 347                         tmp = iph->saddr;
 348                         iph->saddr = hao->addr;
 349                         hao->addr = tmp;
 350                 }
 351         }
 352 }
 353 #else
 354 static inline void mip6_addr_swap(struct sk_buff *skb) {}
 355 #endif
 356
 357 static struct dst_entry *icmpv6_route_lookup(struct net *net,
 358                                              struct sk_buff *skb,
 359                                              struct sock *sk,
 360                                              struct flowi6 *fl6)
 361 {
 362         struct dst_entry *dst, *dst2;
 363         struct flowi6 fl2;
 364         int err;
 365
 366         err = ip6_dst_lookup(net, sk, &dst, fl6);
 367         if (err)
 368                 return ERR_PTR(err);
 369
 370         /*
 371          * We won't send icmp if the destination is known
 372          * anycast.
 373          */
 374         if (ipv6_anycast_destination(dst, &fl6->daddr)) {
 375                 net_dbg_ratelimited("icmp6_send: acast source\n");
 376                 dst_release(dst);
 377                 return ERR_PTR(-EINVAL);
 378         }
 379
 380         /* No need to clone since we're just using its address. */
 381         dst2 = dst;
 382
 383         dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
 384         if (!IS_ERR(dst)) {
 385                 if (dst != dst2)
 386                         return dst;
 387         } else {
 388                 if (PTR_ERR(dst) == -EPERM)
 389                         dst = NULL;
 390                 else
 391                         return dst;
 392         }
 393
 394         err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
 395         if (err)
 396                 goto relookup_failed;
 397
 398         err = ip6_dst_lookup(net, sk, &dst2, &fl2);
 399         if (err)
 400                 goto relookup_failed;
 401
 402         dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
 403         if (!IS_ERR(dst2)) {
 404                 dst_release(dst);
 405                 dst = dst2;
 406         } else {
 407                 err = PTR_ERR(dst2);
 408                 if (err == -EPERM) {
 409                         dst_release(dst);
 410                         return dst2;
 411                 } else
 412                         goto relookup_failed;
 413         }
 414
 415 relookup_failed:
 416         if (dst)
 417                 return dst;
 418         return ERR_PTR(err);
 419 }
 420
 421 static struct net_device *icmp6_dev(const struct sk_buff *skb)
 422 {
 423         struct net_device *dev = skb->dev;
 424
 425         /* for local traffic to local address, skb dev is the loopback
 426          * device. Check if there is a dst attached to the skb and if so
 427          * get the real device index. Same is needed for replies to a link
 428          * local address on a device enslaved to an L3 master device
 429          */
 430         if (unlikely(dev->ifindex == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) {
 431                 const struct rt6_info *rt6 = skb_rt6_info(skb);
 432
 433                 if (rt6)
 434                         dev = rt6->rt6i_idev->dev;
 435         }
 436
 437         return dev;
 438 }
 439
 440 static int icmp6_iif(const struct sk_buff *skb)
 441 {
 442         return icmp6_dev(skb)->ifindex;
 443 }
 444
 445 /*
 446  *      Send an ICMP message in response to a packet in error
 447  */
 448 void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
 449                 const struct in6_addr *force_saddr)
 450 {
 451         struct inet6_dev *idev = NULL;
 452         struct ipv6hdr *hdr = ipv6_hdr(skb);
 453         struct sock *sk;
 454         struct net *net;
 455         struct ipv6_pinfo *np;
 456         const struct in6_addr *saddr = NULL;
 457         struct dst_entry *dst;
 458         struct icmp6hdr tmp_hdr;
 459         struct flowi6 fl6;
 460         struct icmpv6_msg msg;
 461         struct ipcm6_cookie ipc6;
 462         int iif = 0;
 463         int addr_type = 0;
 464         int len;
 465         u32 mark;
 466
 467         if ((u8 *)hdr < skb->head ||
 468             (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
 469                 return;
 470
 471         if (!skb->dev)
 472                 return;
 473         net = dev_net(skb->dev);
 474         mark = IP6_REPLY_MARK(net, skb->mark);
 475         /*
 476          *      Make sure we respect the rules
 477          *      i.e. RFC 1885 2.4(e)
 478          *      Rule (e.1) is enforced by not using icmp6_send
 479          *      in any code that processes icmp errors.
 480          */
 481         addr_type = ipv6_addr_type(&hdr->daddr);
 482
 483         if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) ||
 484             ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr))
 485                 saddr = &hdr->daddr;
 486
 487         /*
 488          *      Dest addr check
 489          */
 490
 491         if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) {
 492                 if (type != ICMPV6_PKT_TOOBIG &&
 493                     !(type == ICMPV6_PARAMPROB &&
 494                       code == ICMPV6_UNK_OPTION &&
 495                       (opt_unrec(skb, info))))
 496                         return;
 497
 498                 saddr = NULL;
 499         }
 500
 501         addr_type = ipv6_addr_type(&hdr->saddr);
 502
 503         /*
 504          *      Source addr check
 505          */
 506
 507         if (__ipv6_addr_needs_scope_id(addr_type)) {
 508                 iif = icmp6_iif(skb);
 509         } else {
 510                 /*
 511                  * The source device is used for looking up which routing table
 512                  * to use for sending an ICMP error.
 513                  */
 514                 iif = l3mdev_master_ifindex(skb->dev);
 515         }
 516
 517         /*
 518          *      Must not send error if the source does not uniquely
 519          *      identify a single node (RFC2463 Section 2.4).
 520          *      We check unspecified / multicast addresses here,
 521          *      and anycast addresses will be checked later.
 522          */
 523         if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
 524                 net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n",
 525                                     &hdr->saddr, &hdr->daddr);
 526                 return;
 527         }
 528
 529         /*
 530          *      Never answer to a ICMP packet.
 531          */
 532         if (is_ineligible(skb)) {
 533                 net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n",
 534                                     &hdr->saddr, &hdr->daddr);
 535                 return;
 536         }
 537
 538         /* Needed by both icmp_global_allow and icmpv6_xmit_lock */
 539         local_bh_disable();
 540
 541         /* Check global sysctl_icmp_msgs_per_sec ratelimit */
 542         if (!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, type))
 543                 goto out_bh_enable;
 544
 545         mip6_addr_swap(skb);
 546
 547         sk = icmpv6_xmit_lock(net);
 548         if (!sk)
 549                 goto out_bh_enable;
 550
 551         memset(&fl6, 0, sizeof(fl6));
 552         fl6.flowi6_proto = IPPROTO_ICMPV6;
 553         fl6.daddr = hdr->saddr;
 554         if (force_saddr)
 555                 saddr = force_saddr;
 556         if (saddr) {
 557                 fl6.saddr = *saddr;
 558         } else if (!icmpv6_rt_has_prefsrc(sk, type, &fl6)) {
 559                 /* select a more meaningful saddr from input if */
 560                 struct net_device *in_netdev;
 561
 562                 in_netdev = dev_get_by_index(net, IP6CB(skb)->iif);
 563                 if (in_netdev) {
 564                         ipv6_dev_get_saddr(net, in_netdev, &fl6.daddr,
 565                                            inet6_sk(sk)->srcprefs,
 566                                            &fl6.saddr);
 567                         dev_put(in_netdev);
 568                 }
 569         }
 570         fl6.flowi6_mark = mark;
 571         fl6.flowi6_oif = iif;
 572         fl6.fl6_icmp_type = type;
 573         fl6.fl6_icmp_code = code;
 574         fl6.flowi6_uid = sock_net_uid(net, NULL);
 575         fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL);
 576         security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
 577
 578         np = inet6_sk(sk);
 579
 580         if (!icmpv6_xrlim_allow(sk, type, &fl6))
 581                 goto out;
 582
 583         tmp_hdr.icmp6_type = type;
 584         tmp_hdr.icmp6_code = code;
 585         tmp_hdr.icmp6_cksum = 0;
 586         tmp_hdr.icmp6_pointer = htonl(info);
 587
 588         if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
 589                 fl6.flowi6_oif = np->mcast_oif;
 590         else if (!fl6.flowi6_oif)
 591                 fl6.flowi6_oif = np->ucast_oif;
 592
 593         ipcm6_init_sk(&ipc6, np);
 594         ipc6.sockc.mark = mark;
 595         fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
 596
 597         dst = icmpv6_route_lookup(net, skb, sk, &fl6);
 598         if (IS_ERR(dst))
 599                 goto out;
 600
 601         ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
 602
 603         msg.skb = skb;
 604         msg.offset = skb_network_offset(skb);
 605         msg.type = type;
 606
 607         len = skb->len - msg.offset;
 608         len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr));
 609         if (len < 0) {
 610                 net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n",
 611                                     &hdr->saddr, &hdr->daddr);
 612                 goto out_dst_release;
 613         }
 614
 615         rcu_read_lock();
 616         idev = __in6_dev_get(skb->dev);
 617
 618         if (ip6_append_data(sk, icmpv6_getfrag, &msg,
 619                             len + sizeof(struct icmp6hdr),
 620                             sizeof(struct icmp6hdr),
 621                             &ipc6, &fl6, (struct rt6_info *)dst,
 622                             MSG_DONTWAIT)) {
 623                 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
 624                 ip6_flush_pending_frames(sk);
 625         } else {
 626                 icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
 627                                            len + sizeof(struct icmp6hdr));
 628         }
 629         rcu_read_unlock();
 630 out_dst_release:
 631         dst_release(dst);
 632 out:
 633         icmpv6_xmit_unlock(sk);
 634 out_bh_enable:
 635         local_bh_enable();
 636 }
 637 EXPORT_SYMBOL(icmp6_send);
 638
 639 /* Slightly more convenient version of icmp6_send.
 640  */
 641 void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
 642 {
 643         icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL);
 644         kfree_skb(skb);
 645 }
 646
 647 /* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
 648  * if sufficient data bytes are available
 649  * @nhs is the size of the tunnel header(s) :
 650  *  Either an IPv4 header for SIT encap
 651  *         an IPv4 header + GRE header for GRE encap
 652  */
 653 int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
 654                                unsigned int data_len)
 655 {
 656         struct in6_addr temp_saddr;
 657         struct rt6_info *rt;
 658         struct sk_buff *skb2;
 659         u32 info = 0;
 660
 661         if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8))
 662                 return 1;
 663
 664         /* RFC 4884 (partial) support for ICMP extensions */
 665         if (data_len < 128 || (data_len & 7) || skb->len < data_len)
 666                 data_len = 0;
 667
 668         skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC);
 669
 670         if (!skb2)
 671                 return 1;
 672
 673         skb_dst_drop(skb2);
 674         skb_pull(skb2, nhs);
 675         skb_reset_network_header(skb2);
 676
 677         rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0,
 678                         skb, 0);
 679
 680         if (rt && rt->dst.dev)
 681                 skb2->dev = rt->dst.dev;
 682
 683         ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr);
 684
 685         if (data_len) {
 686                 /* RFC 4884 (partial) support :
 687                  * insert 0 padding at the end, before the extensions
 688                  */
 689                 __skb_push(skb2, nhs);
 690                 skb_reset_network_header(skb2);
 691                 memmove(skb2->data, skb2->data + nhs, data_len - nhs);
 692                 memset(skb2->data + data_len - nhs, 0, nhs);
 693                 /* RFC 4884 4.5 : Length is measured in 64-bit words,
 694                  * and stored in reserved[0]
 695                  */
 696                 info = (data_len/8) << 24;
 697         }
 698         if (type == ICMP_TIME_EXCEEDED)
 699                 icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
 700                            info, &temp_saddr);
 701         else
 702                 icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH,
 703                            info, &temp_saddr);
 704         if (rt)
 705                 ip6_rt_put(rt);
 706
 707         kfree_skb(skb2);
 708
 709         return 0;
 710 }
 711 EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach);
 712
 713 static void icmpv6_echo_reply(struct sk_buff *skb)
 714 {
 715         struct net *net = dev_net(skb->dev);
 716         struct sock *sk;
 717         struct inet6_dev *idev;
 718         struct ipv6_pinfo *np;
 719         const struct in6_addr *saddr = NULL;
 720         struct icmp6hdr *icmph = icmp6_hdr(skb);
 721         struct icmp6hdr tmp_hdr;
 722         struct flowi6 fl6;
 723         struct icmpv6_msg msg;
 724         struct dst_entry *dst;
 725         struct ipcm6_cookie ipc6;
 726         u32 mark = IP6_REPLY_MARK(net, skb->mark);
 727         bool acast;
 728
 729         if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) &&
 730             net->ipv6.sysctl.icmpv6_echo_ignore_multicast)
 731                 return;
 732
 733         saddr = &ipv6_hdr(skb)->daddr;
 734
 735         acast = ipv6_anycast_destination(skb_dst(skb), saddr);
 736         if (acast && net->ipv6.sysctl.icmpv6_echo_ignore_anycast)
 737                 return;
 738
 739         if (!ipv6_unicast_destination(skb) &&
 740             !(net->ipv6.sysctl.anycast_src_echo_reply && acast))
 741                 saddr = NULL;
 742
 743         memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
 744         tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
 745
 746         memset(&fl6, 0, sizeof(fl6));
 747         if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ICMPV6_ECHO_REPLIES)
 748                 fl6.flowlabel = ip6_flowlabel(ipv6_hdr(skb));
 749
 750         fl6.flowi6_proto = IPPROTO_ICMPV6;
 751         fl6.daddr = ipv6_hdr(skb)->saddr;
 752         if (saddr)
 753                 fl6.saddr = *saddr;
 754         fl6.flowi6_oif = icmp6_iif(skb);
 755         fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
 756         fl6.flowi6_mark = mark;
 757         fl6.flowi6_uid = sock_net_uid(net, NULL);
 758         security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
 759
 760         local_bh_disable();
 761         sk = icmpv6_xmit_lock(net);
 762         if (!sk)
 763                 goto out_bh_enable;
 764         np = inet6_sk(sk);
 765
 766         if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
 767                 fl6.flowi6_oif = np->mcast_oif;
 768         else if (!fl6.flowi6_oif)
 769                 fl6.flowi6_oif = np->ucast_oif;
 770
 771         if (ip6_dst_lookup(net, sk, &dst, &fl6))
 772                 goto out;
 773         dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
 774         if (IS_ERR(dst))
 775                 goto out;
 776
 777         /* Check the ratelimit */
 778         if ((!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY)) ||
 779             !icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6))
 780                 goto out_dst_release;
 781
 782         idev = __in6_dev_get(skb->dev);
 783
 784         msg.skb = skb;
 785         msg.offset = 0;
 786         msg.type = ICMPV6_ECHO_REPLY;
 787
 788         ipcm6_init_sk(&ipc6, np);
 789         ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
 790         ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
 791         ipc6.sockc.mark = mark;
 792
 793         if (ip6_append_data(sk, icmpv6_getfrag, &msg,
 794                             skb->len + sizeof(struct icmp6hdr),
 795                             sizeof(struct icmp6hdr), &ipc6, &fl6,
 796                             (struct rt6_info *)dst, MSG_DONTWAIT)) {
 797                 __ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
 798                 ip6_flush_pending_frames(sk);
 799         } else {
 800                 icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
 801                                            skb->len + sizeof(struct icmp6hdr));
 802         }
 803 out_dst_release:
 804         dst_release(dst);
 805 out:
 806         icmpv6_xmit_unlock(sk);
 807 out_bh_enable:
 808         local_bh_enable();
 809 }
 810
 811 void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
 812 {
 813         const struct inet6_protocol *ipprot;
 814         int inner_offset;
 815         __be16 frag_off;
 816         u8 nexthdr;
 817         struct net *net = dev_net(skb->dev);
 818
 819         if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
 820                 goto out;
 821
 822         nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
 823         if (ipv6_ext_hdr(nexthdr)) {
 824                 /* now skip over extension headers */
 825                 inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
 826                                                 &nexthdr, &frag_off);
 827                 if (inner_offset < 0)
 828                         goto out;
 829         } else {
 830                 inner_offset = sizeof(struct ipv6hdr);
 831         }
 832
 833         /* Checkin header including 8 bytes of inner protocol header. */
 834         if (!pskb_may_pull(skb, inner_offset+8))
 835                 goto out;
 836
 837         /* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
 838            Without this we will not able f.e. to make source routed
 839            pmtu discovery.
 840            Corresponding argument (opt) to notifiers is already added.
 841            --ANK (980726)
 842          */
 843
 844         ipprot = rcu_dereference(inet6_protos[nexthdr]);
 845         if (ipprot && ipprot->err_handler)
 846                 ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
 847
 848         raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
 849         return;
 850
 851 out:
 852         __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
 853 }
 854
 855 /*
 856  *      Handle icmp messages
 857  */
 858
 859 static int icmpv6_rcv(struct sk_buff *skb)
 860 {
 861         struct net *net = dev_net(skb->dev);
 862         struct net_device *dev = icmp6_dev(skb);
 863         struct inet6_dev *idev = __in6_dev_get(dev);
 864         const struct in6_addr *saddr, *daddr;
 865         struct icmp6hdr *hdr;
 866         u8 type;
 867         bool success = false;
 868
 869         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
 870                 struct sec_path *sp = skb_sec_path(skb);
 871                 int nh;
 872
 873                 if (!(sp && sp->xvec[sp->len - 1]->props.flags &
 874                                  XFRM_STATE_ICMP))
 875                         goto drop_no_count;
 876
 877                 if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
 878                         goto drop_no_count;
 879
 880                 nh = skb_network_offset(skb);
 881                 skb_set_network_header(skb, sizeof(*hdr));
 882
 883                 if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
 884                         goto drop_no_count;
 885
 886                 skb_set_network_header(skb, nh);
 887         }
 888
 889         __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS);
 890
 891         saddr = &ipv6_hdr(skb)->saddr;
 892         daddr = &ipv6_hdr(skb)->daddr;
 893
 894         if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
 895                 net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n",
 896                                     saddr, daddr);
 897                 goto csum_error;
 898         }
 899
 900         if (!pskb_pull(skb, sizeof(*hdr)))
 901                 goto discard_it;
 902
 903         hdr = icmp6_hdr(skb);
 904
 905         type = hdr->icmp6_type;
 906
 907         ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type);
 908
 909         switch (type) {
 910         case ICMPV6_ECHO_REQUEST:
 911                 if (!net->ipv6.sysctl.icmpv6_echo_ignore_all)
 912                         icmpv6_echo_reply(skb);
 913                 break;
 914
 915         case ICMPV6_ECHO_REPLY:
 916                 success = ping_rcv(skb);
 917                 break;
 918
 919         case ICMPV6_PKT_TOOBIG:
 920                 /* BUGGG_FUTURE: if packet contains rthdr, we cannot update
 921                    standard destination cache. Seems, only "advanced"
 922                    destination cache will allow to solve this problem
 923                    --ANK (980726)
 924                  */
 925                 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
 926                         goto discard_it;
 927                 hdr = icmp6_hdr(skb);
 928
 929                 /* to notify */
 930                 fallthrough;
 931         case ICMPV6_DEST_UNREACH:
 932         case ICMPV6_TIME_EXCEED:
 933         case ICMPV6_PARAMPROB:
 934                 icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
 935                 break;
 936
 937         case NDISC_ROUTER_SOLICITATION:
 938         case NDISC_ROUTER_ADVERTISEMENT:
 939         case NDISC_NEIGHBOUR_SOLICITATION:
 940         case NDISC_NEIGHBOUR_ADVERTISEMENT:
 941         case NDISC_REDIRECT:
 942                 ndisc_rcv(skb);
 943                 break;
 944
 945         case ICMPV6_MGM_QUERY:
 946                 igmp6_event_query(skb);
 947                 break;
 948
 949         case ICMPV6_MGM_REPORT:
 950                 igmp6_event_report(skb);
 951                 break;
 952
 953         case ICMPV6_MGM_REDUCTION:
 954         case ICMPV6_NI_QUERY:
 955         case ICMPV6_NI_REPLY:
 956         case ICMPV6_MLD2_REPORT:
 957         case ICMPV6_DHAAD_REQUEST:
 958         case ICMPV6_DHAAD_REPLY:
 959         case ICMPV6_MOBILE_PREFIX_SOL:
 960         case ICMPV6_MOBILE_PREFIX_ADV:
 961                 break;
 962
 963         default:
 964                 /* informational */
 965                 if (type & ICMPV6_INFOMSG_MASK)
 966                         break;
 967
 968                 net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n",
 969                                     saddr, daddr);
 970
 971                 /*
 972                  * error of unknown type.
 973                  * must pass to upper level
 974                  */
 975
 976                 icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
 977         }
 978
 979         /* until the v6 path can be better sorted assume failure and
 980          * preserve the status quo behaviour for the rest of the paths to here
 981          */
 982         if (success)
 983                 consume_skb(skb);
 984         else
 985                 kfree_skb(skb);
 986
 987         return 0;
 988
 989 csum_error:
 990         __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
 991 discard_it:
 992         __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS);
 993 drop_no_count:
 994         kfree_skb(skb);
 995         return 0;
 996 }
 997
 998 void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
 999                       u8 type,
1000                       const struct in6_addr *saddr,
1001                       const struct in6_addr *daddr,
1002                       int oif)
1003 {
1004         memset(fl6, 0, sizeof(*fl6));
1005         fl6->saddr = *saddr;
1006         fl6->daddr = *daddr;
1007         fl6->flowi6_proto       = IPPROTO_ICMPV6;
1008         fl6->fl6_icmp_type      = type;
1009         fl6->fl6_icmp_code      = 0;
1010         fl6->flowi6_oif         = oif;
1011         security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6));
1012 }
1013
1014 static void __net_exit icmpv6_sk_exit(struct net *net)
1015 {
1016         int i;
1017
1018         for_each_possible_cpu(i)
1019                 inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv6.icmp_sk, i));
1020         free_percpu(net->ipv6.icmp_sk);
1021 }
1022
1023 static int __net_init icmpv6_sk_init(struct net *net)
1024 {
1025         struct sock *sk;
1026         int err, i;
1027
1028         net->ipv6.icmp_sk = alloc_percpu(struct sock *);
1029         if (!net->ipv6.icmp_sk)
1030                 return -ENOMEM;
1031
1032         for_each_possible_cpu(i) {
1033                 err = inet_ctl_sock_create(&sk, PF_INET6,
1034                                            SOCK_RAW, IPPROTO_ICMPV6, net);
1035                 if (err < 0) {
1036                         pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
1037                                err);
1038                         goto fail;
1039                 }
1040
1041                 *per_cpu_ptr(net->ipv6.icmp_sk, i) = sk;
1042
1043                 /* Enough space for 2 64K ICMP packets, including
1044                  * sk_buff struct overhead.
1045                  */
1046                 sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
1047         }
1048         return 0;
1049
1050  fail:
1051         icmpv6_sk_exit(net);
1052         return err;
1053 }
1054
1055 static struct pernet_operations icmpv6_sk_ops = {
1056         .init = icmpv6_sk_init,
1057         .exit = icmpv6_sk_exit,
1058 };
1059
1060 int __init icmpv6_init(void)
1061 {
1062         int err;
1063
1064         err = register_pernet_subsys(&icmpv6_sk_ops);
1065         if (err < 0)
1066                 return err;
1067
1068         err = -EAGAIN;
1069         if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
1070                 goto fail;
1071
1072         err = inet6_register_icmp_sender(icmp6_send);
1073         if (err)
1074                 goto sender_reg_err;
1075         return 0;
1076
1077 sender_reg_err:
1078         inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1079 fail:
1080         pr_err("Failed to register ICMP6 protocol\n");
1081         unregister_pernet_subsys(&icmpv6_sk_ops);
1082         return err;
1083 }
1084
1085 void icmpv6_cleanup(void)
1086 {
1087         inet6_unregister_icmp_sender(icmp6_send);
1088         unregister_pernet_subsys(&icmpv6_sk_ops);
1089         inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1090 }
1091
1092
1093 static const struct icmp6_err {
1094         int err;
1095         int fatal;
1096 } tab_unreach[] = {
1097         {       /* NOROUTE */
1098                 .err    = ENETUNREACH,
1099                 .fatal  = 0,
1100         },
1101         {       /* ADM_PROHIBITED */
1102                 .err    = EACCES,
1103                 .fatal  = 1,
1104         },
1105         {       /* Was NOT_NEIGHBOUR, now reserved */
1106                 .err    = EHOSTUNREACH,
1107                 .fatal  = 0,
1108         },
1109         {       /* ADDR_UNREACH */
1110                 .err    = EHOSTUNREACH,
1111                 .fatal  = 0,
1112         },
1113         {       /* PORT_UNREACH */
1114                 .err    = ECONNREFUSED,
1115                 .fatal  = 1,
1116         },
1117         {       /* POLICY_FAIL */
1118                 .err    = EACCES,
1119                 .fatal  = 1,
1120         },
1121         {       /* REJECT_ROUTE */
1122                 .err    = EACCES,
1123                 .fatal  = 1,
1124         },
1125 };
1126
1127 int icmpv6_err_convert(u8 type, u8 code, int *err)
1128 {
1129         int fatal = 0;
1130
1131         *err = EPROTO;
1132
1133         switch (type) {
1134         case ICMPV6_DEST_UNREACH:
1135                 fatal = 1;
1136                 if (code < ARRAY_SIZE(tab_unreach)) {
1137                         *err  = tab_unreach[code].err;
1138                         fatal = tab_unreach[code].fatal;
1139                 }
1140                 break;
1141
1142         case ICMPV6_PKT_TOOBIG:
1143                 *err = EMSGSIZE;
1144                 break;
1145
1146         case ICMPV6_PARAMPROB:
1147                 *err = EPROTO;
1148                 fatal = 1;
1149                 break;
1150
1151         case ICMPV6_TIME_EXCEED:
1152                 *err = EHOSTUNREACH;
1153                 break;
1154         }
1155
1156         return fatal;
1157 }
1158 EXPORT_SYMBOL(icmpv6_err_convert);
1159
1160 #ifdef CONFIG_SYSCTL
1161 static struct ctl_table ipv6_icmp_table_template[] = {
1162         {
1163                 .procname       = "ratelimit",
1164                 .data           = &init_net.ipv6.sysctl.icmpv6_time,
1165                 .maxlen         = sizeof(int),
1166                 .mode           = 0644,
1167                 .proc_handler   = proc_dointvec_ms_jiffies,
1168         },
1169         {
1170                 .procname       = "echo_ignore_all",
1171                 .data           = &init_net.ipv6.sysctl.icmpv6_echo_ignore_all,
1172                 .maxlen         = sizeof(int),
1173                 .mode           = 0644,
1174                 .proc_handler = proc_dointvec,
1175         },
1176         {
1177                 .procname       = "echo_ignore_multicast",
1178                 .data           = &init_net.ipv6.sysctl.icmpv6_echo_ignore_multicast,
1179                 .maxlen         = sizeof(int),
1180                 .mode           = 0644,
1181                 .proc_handler = proc_dointvec,
1182         },
1183         {
1184                 .procname       = "echo_ignore_anycast",
1185                 .data           = &init_net.ipv6.sysctl.icmpv6_echo_ignore_anycast,
1186                 .maxlen         = sizeof(int),
1187                 .mode           = 0644,
1188                 .proc_handler = proc_dointvec,
1189         },
1190         {
1191                 .procname       = "ratemask",
1192                 .data           = &init_net.ipv6.sysctl.icmpv6_ratemask_ptr,
1193                 .maxlen         = ICMPV6_MSG_MAX + 1,
1194                 .mode           = 0644,
1195                 .proc_handler = proc_do_large_bitmap,
1196         },
1197         { },
1198 };
1199
1200 struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
1201 {
1202         struct ctl_table *table;
1203
1204         table = kmemdup(ipv6_icmp_table_template,
1205                         sizeof(ipv6_icmp_table_template),
1206                         GFP_KERNEL);
1207
1208         if (table) {
1209                 table[0].data = &net->ipv6.sysctl.icmpv6_time;
1210                 table[1].data = &net->ipv6.sysctl.icmpv6_echo_ignore_all;
1211                 table[2].data = &net->ipv6.sysctl.icmpv6_echo_ignore_multicast;
1212                 table[3].data = &net->ipv6.sysctl.icmpv6_echo_ignore_anycast;
1213                 table[4].data = &net->ipv6.sysctl.icmpv6_ratemask_ptr;
1214         }
1215         return table;
1216 }
1217 #endif