A previous commit enabled the tracking of object allocations
in Linux-backed caches from the SPL layer for debuggability.
The commit is:
9a170fc6fe54f1e852b6c39630fe5ef2bbd97c16
Unfortunately, it also introduced minor performance regressions
that were highlighted by the ZFS perf test-suite. Within Delphix
we found that the regression would be from -1%, all the way up
to -8% for some workloads.
This commit brings performance back up to par by creating a
separate counter for those caches and making it a percpu in
order to avoid lock-contention.
The initial performance testing was done by myself, and the
final round was conducted by @tonynguien who was also the one
that discovered the regression and highlighted the culprit.
Reviewed-by: Matt Ahrens <matt@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Serapheim Dimitropoulos <serapheim@delphix.com>
Closes #10397
--- /dev/null
+dnl #
+dnl # 3.18 API change,
+dnl # The function percpu_counter_init now must be passed a GFP mask.
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_PERCPU_COUNTER_INIT], [
+ ZFS_LINUX_TEST_SRC([percpu_counter_init_with_gfp], [
+ #include <linux/gfp.h>
+ #include <linux/percpu_counter.h>
+ ],[
+ struct percpu_counter counter;
+ int error;
+
+ error = percpu_counter_init(&counter, 0, GFP_KERNEL);
+ ])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_PERCPU_COUNTER_INIT], [
+ AC_MSG_CHECKING([whether percpu_counter_init() wants gfp_t])
+ ZFS_LINUX_TEST_RESULT([percpu_counter_init_with_gfp], [
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(HAVE_PERCPU_COUNTER_INIT_WITH_GFP, 1,
+ [percpu_counter_init() wants gfp_t])
+ ],[
+ AC_MSG_RESULT(no)
+ ])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_SRC_PERCPU], [
+ ZFS_AC_KERNEL_SRC_PERCPU_COUNTER_INIT
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_PERCPU], [
+ ZFS_AC_KERNEL_PERCPU_COUNTER_INIT
+])
ZFS_AC_KERNEL_SRC_TOTALRAM_PAGES_FUNC
ZFS_AC_KERNEL_SRC_TOTALHIGH_PAGES
ZFS_AC_KERNEL_SRC_KSTRTOUL
+ ZFS_AC_KERNEL_SRC_PERCPU
AC_MSG_CHECKING([for available kernel interfaces])
ZFS_LINUX_TEST_COMPILE_ALL([kabi])
ZFS_AC_KERNEL_TOTALRAM_PAGES_FUNC
ZFS_AC_KERNEL_TOTALHIGH_PAGES
ZFS_AC_KERNEL_KSTRTOUL
+ ZFS_AC_KERNEL_PERCPU
])
dnl #
blkdev_compat.h \
utsname_compat.h \
kmap_compat.h \
+ percpu_compat.h \
simd.h \
simd_x86.h \
simd_aarch64.h \
--- /dev/null
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2020 by Delphix. All rights reserved.
+ */
+
+#ifndef _ZFS_PERCPU_H
+#define _ZFS_PERCPU_H
+
+#include <linux/percpu_counter.h>
+
+/*
+ * 3.18 API change,
+ * percpu_counter_init() now must be passed a gfp mask which will be
+ * used for the dynamic allocation of the actual counter.
+ */
+#ifdef HAVE_PERCPU_COUNTER_INIT_WITH_GFP
+#define percpu_counter_init_common(counter, n, gfp) \
+ percpu_counter_init(counter, n, gfp)
+#else
+#define percpu_counter_init_common(counter, n, gfp) \
+ percpu_counter_init(counter, n)
+#endif
+
+#endif /* _ZFS_PERCPU_H */
uint64_t skc_slab_max; /* Slab max historic */
uint64_t skc_obj_total; /* Obj total current */
uint64_t skc_obj_alloc; /* Obj alloc current */
+ struct percpu_counter skc_linux_alloc; /* Linux-backed Obj alloc */
uint64_t skc_obj_max; /* Obj max historic */
uint64_t skc_obj_deadlock; /* Obj emergency deadlocks */
uint64_t skc_obj_emergency; /* Obj emergency current */
#include <sys/wait.h>
#include <linux/slab.h>
#include <linux/swap.h>
+#include <linux/percpu_compat.h>
#include <linux/prefetch.h>
/*
skc->skc_obj_emergency = 0;
skc->skc_obj_emergency_max = 0;
+ rc = percpu_counter_init_common(&skc->skc_linux_alloc, 0,
+ GFP_KERNEL);
+ if (rc != 0) {
+ kfree(skc);
+ return (NULL);
+ }
+
/*
* Verify the requested alignment restriction is sane.
*/
return (skc);
out:
kfree(skc->skc_name);
+ percpu_counter_destroy(&skc->skc_linux_alloc);
kfree(skc);
return (NULL);
}
ASSERT3U(skc->skc_obj_emergency, ==, 0);
ASSERT(list_empty(&skc->skc_complete_list));
+ ASSERT3U(percpu_counter_sum(&skc->skc_linux_alloc), ==, 0);
+ percpu_counter_destroy(&skc->skc_linux_alloc);
+
spin_unlock(&skc->skc_lock);
kfree(skc->skc_name);
* how many objects we've allocated in it for
* better debuggability.
*/
- spin_lock(&skc->skc_lock);
- skc->skc_obj_alloc++;
- spin_unlock(&skc->skc_lock);
+ percpu_counter_inc(&skc->skc_linux_alloc);
}
goto ret;
}
*/
if (skc->skc_flags & KMC_SLAB) {
kmem_cache_free(skc->skc_linux_cache, obj);
- spin_lock(&skc->skc_lock);
- skc->skc_obj_alloc--;
- spin_unlock(&skc->skc_lock);
+ percpu_counter_dec(&skc->skc_linux_alloc);
return;
}
* the underlying Linux cache please refer to /proc/slabinfo.
*/
spin_lock(&skc->skc_lock);
+ uint64_t objs_allocated =
+ percpu_counter_sum(&skc->skc_linux_alloc);
seq_printf(f, "%-36s ", skc->skc_name);
seq_printf(f, "0x%05lx %9s %9lu %8s %8u "
"%5s %5s %5s %5s %5lu %5s %5s %5s %5s\n",
(long unsigned)skc->skc_flags,
"-",
- (long unsigned)(skc->skc_obj_size * skc->skc_obj_alloc),
+ (long unsigned)(skc->skc_obj_size * objs_allocated),
"-",
(unsigned)skc->skc_obj_size,
"-", "-", "-", "-",
- (long unsigned)skc->skc_obj_alloc,
+ (long unsigned)objs_allocated,
"-", "-", "-", "-");
spin_unlock(&skc->skc_lock);
return (0);