]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blobdiff - net/ipv6/addrconf.c
net: Optimize snmp stat aggregation by walking all the percpu data at once
[mirror_ubuntu-jammy-kernel.git] / net / ipv6 / addrconf.c
index 32153c248959c1768810d8d86266f04174b0c025..99c0f2b843f01de8ae86190b720d58b57bcf3e57 100644 (file)
@@ -195,6 +195,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
        .max_addresses          = IPV6_MAX_ADDRESSES,
        .accept_ra_defrtr       = 1,
        .accept_ra_from_local   = 0,
+       .accept_ra_min_hop_limit= 1,
        .accept_ra_pinfo        = 1,
 #ifdef CONFIG_IPV6_ROUTER_PREF
        .accept_ra_rtr_pref     = 1,
@@ -211,7 +212,9 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
        .accept_ra_mtu          = 1,
        .stable_secret          = {
                .initialized = false,
-       }
+       },
+       .use_oif_addrs_only     = 0,
+       .ignore_routes_with_linkdown = 0,
 };
 
 static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
@@ -236,6 +239,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
        .max_addresses          = IPV6_MAX_ADDRESSES,
        .accept_ra_defrtr       = 1,
        .accept_ra_from_local   = 0,
+       .accept_ra_min_hop_limit= 1,
        .accept_ra_pinfo        = 1,
 #ifdef CONFIG_IPV6_ROUTER_PREF
        .accept_ra_rtr_pref     = 1,
@@ -253,6 +257,8 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
        .stable_secret          = {
                .initialized = false,
        },
+       .use_oif_addrs_only     = 0,
+       .ignore_routes_with_linkdown = 0,
 };
 
 /* Check if a valid qdisc is available */
@@ -468,6 +474,9 @@ static int inet6_netconf_msgsize_devconf(int type)
        if (type == -1 || type == NETCONFA_PROXY_NEIGH)
                size += nla_total_size(4);
 
+       if (type == -1 || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
+               size += nla_total_size(4);
+
        return size;
 }
 
@@ -504,6 +513,11 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
            nla_put_s32(skb, NETCONFA_PROXY_NEIGH, devconf->proxy_ndp) < 0)
                goto nla_put_failure;
 
+       if ((type == -1 || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
+           nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
+                       devconf->ignore_routes_with_linkdown) < 0)
+               goto nla_put_failure;
+
        nlmsg_end(skb, nlh);
        return 0;
 
@@ -540,6 +554,7 @@ static const struct nla_policy devconf_ipv6_policy[NETCONFA_MAX+1] = {
        [NETCONFA_IFINDEX]      = { .len = sizeof(int) },
        [NETCONFA_FORWARDING]   = { .len = sizeof(int) },
        [NETCONFA_PROXY_NEIGH]  = { .len = sizeof(int) },
+       [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]  = { .len = sizeof(int) },
 };
 
 static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
@@ -762,6 +777,63 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf)
                rt6_purge_dflt_routers(net);
        return 1;
 }
+
+static void addrconf_linkdown_change(struct net *net, __s32 newf)
+{
+       struct net_device *dev;
+       struct inet6_dev *idev;
+
+       for_each_netdev(net, dev) {
+               idev = __in6_dev_get(dev);
+               if (idev) {
+                       int changed = (!idev->cnf.ignore_routes_with_linkdown) ^ (!newf);
+
+                       idev->cnf.ignore_routes_with_linkdown = newf;
+                       if (changed)
+                               inet6_netconf_notify_devconf(dev_net(dev),
+                                                            NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
+                                                            dev->ifindex,
+                                                            &idev->cnf);
+               }
+       }
+}
+
+static int addrconf_fixup_linkdown(struct ctl_table *table, int *p, int newf)
+{
+       struct net *net;
+       int old;
+
+       if (!rtnl_trylock())
+               return restart_syscall();
+
+       net = (struct net *)table->extra2;
+       old = *p;
+       *p = newf;
+
+       if (p == &net->ipv6.devconf_dflt->ignore_routes_with_linkdown) {
+               if ((!newf) ^ (!old))
+                       inet6_netconf_notify_devconf(net,
+                                                    NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
+                                                    NETCONFA_IFINDEX_DEFAULT,
+                                                    net->ipv6.devconf_dflt);
+               rtnl_unlock();
+               return 0;
+       }
+
+       if (p == &net->ipv6.devconf_all->ignore_routes_with_linkdown) {
+               net->ipv6.devconf_dflt->ignore_routes_with_linkdown = newf;
+               addrconf_linkdown_change(net, newf);
+               if ((!newf) ^ (!old))
+                       inet6_netconf_notify_devconf(net,
+                                                    NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
+                                                    NETCONFA_IFINDEX_ALL,
+                                                    net->ipv6.devconf_all);
+       }
+       rtnl_unlock();
+
+       return 1;
+}
+
 #endif
 
 /* Nobody refers to this ifaddr, destroy it */
@@ -1472,11 +1544,16 @@ int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev,
         *    include addresses assigned to interfaces
         *    belonging to the same site as the outgoing
         *    interface.)
+        *  - "It is RECOMMENDED that the candidate source addresses
+        *    be the set of unicast addresses assigned to the
+        *    interface that will be used to send to the destination
+        *    (the 'outgoing' interface)." (RFC 6724)
         */
        if (dst_dev) {
+               idev = __in6_dev_get(dst_dev);
                if ((dst_type & IPV6_ADDR_MULTICAST) ||
-                   dst.scope <= IPV6_ADDR_SCOPE_LINKLOCAL) {
-                       idev = __in6_dev_get(dst_dev);
+                   dst.scope <= IPV6_ADDR_SCOPE_LINKLOCAL ||
+                   (idev && idev->cnf.use_oif_addrs_only)) {
                        use_oif_addr = true;
                }
        }
@@ -3579,7 +3656,7 @@ static void addrconf_dad_work(struct work_struct *w)
 
        /* send a neighbour solicitation for our addr */
        addrconf_addr_solict_mult(&ifp->addr, &mcaddr);
-       ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &in6addr_any);
+       ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &in6addr_any, NULL);
 out:
        in6_ifa_put(ifp);
        rtnl_unlock();
@@ -4581,6 +4658,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
        array[DEVCONF_MAX_DESYNC_FACTOR] = cnf->max_desync_factor;
        array[DEVCONF_MAX_ADDRESSES] = cnf->max_addresses;
        array[DEVCONF_ACCEPT_RA_DEFRTR] = cnf->accept_ra_defrtr;
+       array[DEVCONF_ACCEPT_RA_MIN_HOP_LIMIT] = cnf->accept_ra_min_hop_limit;
        array[DEVCONF_ACCEPT_RA_PINFO] = cnf->accept_ra_pinfo;
 #ifdef CONFIG_IPV6_ROUTER_PREF
        array[DEVCONF_ACCEPT_RA_RTR_PREF] = cnf->accept_ra_rtr_pref;
@@ -4606,7 +4684,9 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
        array[DEVCONF_SUPPRESS_FRAG_NDISC] = cnf->suppress_frag_ndisc;
        array[DEVCONF_ACCEPT_RA_FROM_LOCAL] = cnf->accept_ra_from_local;
        array[DEVCONF_ACCEPT_RA_MTU] = cnf->accept_ra_mtu;
+       array[DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN] = cnf->ignore_routes_with_linkdown;
        /* we omit DEVCONF_STABLE_SECRET for now */
+       array[DEVCONF_USE_OIF_ADDRS_ONLY] = cnf->use_oif_addrs_only;
 }
 
 static inline size_t inet6_ifla6_size(void)
@@ -4626,6 +4706,7 @@ static inline size_t inet6_if_nlmsg_size(void)
               + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
               + nla_total_size(4) /* IFLA_MTU */
               + nla_total_size(4) /* IFLA_LINK */
+              + nla_total_size(1) /* IFLA_OPERSTATE */
               + nla_total_size(inet6_ifla6_size()); /* IFLA_PROTINFO */
 }
 
@@ -4645,18 +4726,24 @@ static inline void __snmp6_fill_statsdev(u64 *stats, atomic_long_t *mib,
 }
 
 static inline void __snmp6_fill_stats64(u64 *stats, void __percpu *mib,
-                                     int items, int bytes, size_t syncpoff)
+                                       int bytes, size_t syncpoff)
 {
-       int i;
-       int pad = bytes - sizeof(u64) * items;
+       int i, c;
+       u64 buff[IPSTATS_MIB_MAX];
+       int pad = bytes - sizeof(u64) * IPSTATS_MIB_MAX;
+
        BUG_ON(pad < 0);
 
-       /* Use put_unaligned() because stats may not be aligned for u64. */
-       put_unaligned(items, &stats[0]);
-       for (i = 1; i < items; i++)
-               put_unaligned(snmp_fold_field64(mib, i, syncpoff), &stats[i]);
+       memset(buff, 0, sizeof(buff));
+       buff[0] = IPSTATS_MIB_MAX;
 
-       memset(&stats[items], 0, pad);
+       for_each_possible_cpu(c) {
+               for (i = 1; i < IPSTATS_MIB_MAX; i++)
+                       buff[i] += snmp_get_cpu_field64(mib, c, i, syncpoff);
+       }
+
+       memcpy(stats, buff, IPSTATS_MIB_MAX * sizeof(u64));
+       memset(&stats[IPSTATS_MIB_MAX], 0, pad);
 }
 
 static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype,
@@ -4664,8 +4751,8 @@ static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype,
 {
        switch (attrtype) {
        case IFLA_INET6_STATS:
-               __snmp6_fill_stats64(stats, idev->stats.ipv6,
-                                    IPSTATS_MIB_MAX, bytes, offsetof(struct ipstats_mib, syncp));
+               __snmp6_fill_stats64(stats, idev->stats.ipv6, bytes,
+                                    offsetof(struct ipstats_mib, syncp));
                break;
        case IFLA_INET6_ICMP6STATS:
                __snmp6_fill_statsdev(stats, idev->stats.icmpv6dev->mibs, ICMP6_MIB_MAX, bytes);
@@ -4882,7 +4969,9 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
             nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr)) ||
            nla_put_u32(skb, IFLA_MTU, dev->mtu) ||
            (dev->ifindex != dev_get_iflink(dev) &&
-            nla_put_u32(skb, IFLA_LINK, dev_get_iflink(dev))))
+            nla_put_u32(skb, IFLA_LINK, dev_get_iflink(dev))) ||
+           nla_put_u8(skb, IFLA_OPERSTATE,
+                      netif_running(dev) ? dev->operstate : IF_OPER_DOWN))
                goto nla_put_failure;
        protoinfo = nla_nest_start(skb, IFLA_PROTINFO);
        if (!protoinfo)
@@ -5327,6 +5416,34 @@ out:
        return err;
 }
 
+static
+int addrconf_sysctl_ignore_routes_with_linkdown(struct ctl_table *ctl,
+                                               int write,
+                                               void __user *buffer,
+                                               size_t *lenp,
+                                               loff_t *ppos)
+{
+       int *valp = ctl->data;
+       int val = *valp;
+       loff_t pos = *ppos;
+       struct ctl_table lctl;
+       int ret;
+
+       /* ctl->data points to idev->cnf.ignore_routes_when_linkdown
+        * we should not modify it until we get the rtnl lock.
+        */
+       lctl = *ctl;
+       lctl.data = &val;
+
+       ret = proc_dointvec(&lctl, write, buffer, lenp, ppos);
+
+       if (write)
+               ret = addrconf_fixup_linkdown(ctl, valp, val);
+       if (ret)
+               *ppos = pos;
+       return ret;
+}
+
 static struct addrconf_sysctl_table
 {
        struct ctl_table_header *sysctl_header;
@@ -5476,6 +5593,13 @@ static struct addrconf_sysctl_table
                        .mode           = 0644,
                        .proc_handler   = proc_dointvec,
                },
+               {
+                       .procname       = "accept_ra_min_hop_limit",
+                       .data           = &ipv6_devconf.accept_ra_min_hop_limit,
+                       .maxlen         = sizeof(int),
+                       .mode           = 0644,
+                       .proc_handler   = proc_dointvec,
+               },
                {
                        .procname       = "accept_ra_pinfo",
                        .data           = &ipv6_devconf.accept_ra_pinfo,
@@ -5605,6 +5729,20 @@ static struct addrconf_sysctl_table
                        .mode           = 0600,
                        .proc_handler   = addrconf_sysctl_stable_secret,
                },
+               {
+                       .procname       = "use_oif_addrs_only",
+                       .data           = &ipv6_devconf.use_oif_addrs_only,
+                       .maxlen         = sizeof(int),
+                       .mode           = 0644,
+                       .proc_handler   = proc_dointvec,
+               },
+               {
+                       .procname       = "ignore_routes_with_linkdown",
+                       .data           = &ipv6_devconf.ignore_routes_with_linkdown,
+                       .maxlen         = sizeof(int),
+                       .mode           = 0644,
+                       .proc_handler   = addrconf_sysctl_ignore_routes_with_linkdown,
+               },
                {
                        /* sentinel */
                }