]> git.proxmox.com Git - mirror_zfs.git/commitdiff
Use percpu_counter for obj_alloc counter of Linux-backed caches
authorSerapheim Dimitropoulos <serapheim@delphix.com>
Sat, 27 Jun 2020 01:06:50 +0000 (18:06 -0700)
committerGitHub <noreply@github.com>
Sat, 27 Jun 2020 01:06:50 +0000 (18:06 -0700)
A previous commit enabled the tracking of object allocations
in Linux-backed caches from the SPL layer for debuggability.
The commit is: 9a170fc6fe54f1e852b6c39630fe5ef2bbd97c16

Unfortunately, it also introduced minor performance regressions
that were highlighted by the ZFS perf test-suite. Within Delphix
we found that the regression would be from -1%, all the way up
to -8% for some workloads.

This commit brings performance back up to par by creating a
separate counter for those caches and making it a percpu in
order to avoid lock-contention.

The initial performance testing was done by myself, and the
final round was conducted by @tonynguien who was also the one
that discovered the regression and highlighted the culprit.

Reviewed-by: Matt Ahrens <matt@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Serapheim Dimitropoulos <serapheim@delphix.com>
Closes #10397

config/kernel-percpu.m4 [new file with mode: 0644]
config/kernel.m4
include/os/linux/kernel/linux/Makefile.am
include/os/linux/kernel/linux/percpu_compat.h [new file with mode: 0644]
include/os/linux/spl/sys/kmem_cache.h
module/os/linux/spl/spl-kmem-cache.c
module/os/linux/spl/spl-proc.c

diff --git a/config/kernel-percpu.m4 b/config/kernel-percpu.m4
new file mode 100644 (file)
index 0000000..e9654a6
--- /dev/null
@@ -0,0 +1,34 @@
+dnl #
+dnl # 3.18 API change,
+dnl # The function percpu_counter_init now must be passed a GFP mask.
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_PERCPU_COUNTER_INIT], [
+       ZFS_LINUX_TEST_SRC([percpu_counter_init_with_gfp], [
+               #include <linux/gfp.h>
+               #include <linux/percpu_counter.h>
+       ],[
+               struct percpu_counter counter;
+               int error;
+
+               error = percpu_counter_init(&counter, 0, GFP_KERNEL);
+       ])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_PERCPU_COUNTER_INIT], [
+       AC_MSG_CHECKING([whether percpu_counter_init() wants gfp_t])
+       ZFS_LINUX_TEST_RESULT([percpu_counter_init_with_gfp], [
+               AC_MSG_RESULT(yes)
+               AC_DEFINE(HAVE_PERCPU_COUNTER_INIT_WITH_GFP, 1,
+                   [percpu_counter_init() wants gfp_t])
+       ],[
+               AC_MSG_RESULT(no)
+       ])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_SRC_PERCPU], [
+       ZFS_AC_KERNEL_SRC_PERCPU_COUNTER_INIT
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_PERCPU], [
+       ZFS_AC_KERNEL_PERCPU_COUNTER_INIT
+])
index 78b0ce4d3aa963b51ee442fbd73f6c8fe36b302b..ec52f014a7a35cfbfbaa984b3d9a461be3f883ae 100644 (file)
@@ -121,6 +121,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
        ZFS_AC_KERNEL_SRC_TOTALRAM_PAGES_FUNC
        ZFS_AC_KERNEL_SRC_TOTALHIGH_PAGES
        ZFS_AC_KERNEL_SRC_KSTRTOUL
+       ZFS_AC_KERNEL_SRC_PERCPU
 
        AC_MSG_CHECKING([for available kernel interfaces])
        ZFS_LINUX_TEST_COMPILE_ALL([kabi])
@@ -216,6 +217,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
        ZFS_AC_KERNEL_TOTALRAM_PAGES_FUNC
        ZFS_AC_KERNEL_TOTALHIGH_PAGES
        ZFS_AC_KERNEL_KSTRTOUL
+       ZFS_AC_KERNEL_PERCPU
 ])
 
 dnl #
index c142aac331e9225e4bd549286650abe0a474f3b8..86b253304d5c4715572c6b4f68c7591a62c7e4fe 100644 (file)
@@ -5,6 +5,7 @@ KERNEL_H = \
        blkdev_compat.h \
        utsname_compat.h \
        kmap_compat.h \
+       percpu_compat.h \
        simd.h \
        simd_x86.h \
        simd_aarch64.h \
diff --git a/include/os/linux/kernel/linux/percpu_compat.h b/include/os/linux/kernel/linux/percpu_compat.h
new file mode 100644 (file)
index 0000000..e7a4242
--- /dev/null
@@ -0,0 +1,44 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2020 by Delphix. All rights reserved.
+ */
+
+#ifndef _ZFS_PERCPU_H
+#define        _ZFS_PERCPU_H
+
+#include <linux/percpu_counter.h>
+
+/*
+ * 3.18 API change,
+ * percpu_counter_init() now must be passed a gfp mask which will be
+ * used for the dynamic allocation of the actual counter.
+ */
+#ifdef HAVE_PERCPU_COUNTER_INIT_WITH_GFP
+#define        percpu_counter_init_common(counter, n, gfp) \
+       percpu_counter_init(counter, n, gfp)
+#else
+#define        percpu_counter_init_common(counter, n, gfp) \
+       percpu_counter_init(counter, n)
+#endif
+
+#endif /* _ZFS_PERCPU_H */
index 5667382f7161ea6265c6da1b7dfdcb847acbc2be..ed63f400029aae080f80371245cba834ca8c8335 100644 (file)
@@ -202,6 +202,7 @@ typedef struct spl_kmem_cache {
        uint64_t                skc_slab_max;   /* Slab max historic  */
        uint64_t                skc_obj_total;  /* Obj total current */
        uint64_t                skc_obj_alloc;  /* Obj alloc current */
+       struct percpu_counter   skc_linux_alloc;   /* Linux-backed Obj alloc  */
        uint64_t                skc_obj_max;    /* Obj max historic */
        uint64_t                skc_obj_deadlock;  /* Obj emergency deadlocks */
        uint64_t                skc_obj_emergency; /* Obj emergency current */
index 4e8ce90931880ea9ca537ef3b66cd5de49602b8b..3fab184c893e0a1a20dbe9ba787cb70cd018a23a 100644 (file)
@@ -31,6 +31,7 @@
 #include <sys/wait.h>
 #include <linux/slab.h>
 #include <linux/swap.h>
+#include <linux/percpu_compat.h>
 #include <linux/prefetch.h>
 
 /*
@@ -948,6 +949,13 @@ spl_kmem_cache_create(char *name, size_t size, size_t align,
        skc->skc_obj_emergency = 0;
        skc->skc_obj_emergency_max = 0;
 
+       rc = percpu_counter_init_common(&skc->skc_linux_alloc, 0,
+           GFP_KERNEL);
+       if (rc != 0) {
+               kfree(skc);
+               return (NULL);
+       }
+
        /*
         * Verify the requested alignment restriction is sane.
         */
@@ -1047,6 +1055,7 @@ spl_kmem_cache_create(char *name, size_t size, size_t align,
        return (skc);
 out:
        kfree(skc->skc_name);
+       percpu_counter_destroy(&skc->skc_linux_alloc);
        kfree(skc);
        return (NULL);
 }
@@ -1117,6 +1126,9 @@ spl_kmem_cache_destroy(spl_kmem_cache_t *skc)
        ASSERT3U(skc->skc_obj_emergency, ==, 0);
        ASSERT(list_empty(&skc->skc_complete_list));
 
+       ASSERT3U(percpu_counter_sum(&skc->skc_linux_alloc), ==, 0);
+       percpu_counter_destroy(&skc->skc_linux_alloc);
+
        spin_unlock(&skc->skc_lock);
 
        kfree(skc->skc_name);
@@ -1473,9 +1485,7 @@ spl_kmem_cache_alloc(spl_kmem_cache_t *skc, int flags)
                         * how many objects we've allocated in it for
                         * better debuggability.
                         */
-                       spin_lock(&skc->skc_lock);
-                       skc->skc_obj_alloc++;
-                       spin_unlock(&skc->skc_lock);
+                       percpu_counter_inc(&skc->skc_linux_alloc);
                }
                goto ret;
        }
@@ -1550,9 +1560,7 @@ spl_kmem_cache_free(spl_kmem_cache_t *skc, void *obj)
         */
        if (skc->skc_flags & KMC_SLAB) {
                kmem_cache_free(skc->skc_linux_cache, obj);
-               spin_lock(&skc->skc_lock);
-               skc->skc_obj_alloc--;
-               spin_unlock(&skc->skc_lock);
+               percpu_counter_dec(&skc->skc_linux_alloc);
                return;
        }
 
index f68f9b5220d216c1c87cb8f390b0c0d15376b733..1d777d234f1014d80dc7375feee93dd34526ac64 100644 (file)
@@ -446,16 +446,18 @@ slab_seq_show(struct seq_file *f, void *p)
                 * the underlying Linux cache please refer to /proc/slabinfo.
                 */
                spin_lock(&skc->skc_lock);
+               uint64_t objs_allocated =
+                   percpu_counter_sum(&skc->skc_linux_alloc);
                seq_printf(f, "%-36s  ", skc->skc_name);
                seq_printf(f, "0x%05lx %9s %9lu %8s %8u  "
                    "%5s %5s %5s  %5s %5lu %5s  %5s %5s %5s\n",
                    (long unsigned)skc->skc_flags,
                    "-",
-                   (long unsigned)(skc->skc_obj_size * skc->skc_obj_alloc),
+                   (long unsigned)(skc->skc_obj_size * objs_allocated),
                    "-",
                    (unsigned)skc->skc_obj_size,
                    "-", "-", "-", "-",
-                   (long unsigned)skc->skc_obj_alloc,
+                   (long unsigned)objs_allocated,
                    "-", "-", "-", "-");
                spin_unlock(&skc->skc_lock);
                return (0);