Make use of kvmalloc if available and fix vmem_alloc implementation

author Michael Niewöhner <foss@mniewoehner.de>

Sun, 21 Jul 2019 17:34:07 +0000 (19:34 +0200)

committer Brian Behlendorf <behlendorf1@llnl.gov>

Wed, 13 Nov 2019 18:05:10 +0000 (10:05 -0800)
author Michael Niewöhner <foss@mniewoehner.de>
Sun, 21 Jul 2019 17:34:07 +0000 (19:34 +0200)
committer Brian Behlendorf <behlendorf1@llnl.gov>
Wed, 13 Nov 2019 18:05:10 +0000 (10:05 -0800)
diff --git a/config/kernel-kmem.m4 b/config/kernel-kmem.m4

index cc055e530c15216ff88bd4e0718ad2cbe3ed6863..2862299168c1fd0a96de978de3f1fca5605d50d3 100644 (file)
--- a/config/kernel-kmem.m4
+++ b/config/kernel-kmem.m4
@@ -56,3 +56,27 @@ AC_DEFUN([SPL_AC_DEBUG_KMEM_TRACKING], [
         AC_MSG_CHECKING([whether detailed kmem tracking is enabled])
         AC_MSG_RESULT([$enable_debug_kmem_tracking])
  ])
+
+dnl #
+dnl # 4.12 API,
+dnl # Added kvmalloc allocation strategy
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_KVMALLOC], [
+       ZFS_LINUX_TEST_SRC([kvmalloc], [
+               #include <linux/mm.h>
+       ],[
+               void *p __attribute__ ((unused));
+
+               p = kvmalloc(0, GFP_KERNEL);
+       ])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_KVMALLOC], [
+       AC_MSG_CHECKING([whether kvmalloc(ptr, flags) is available])
+       ZFS_LINUX_TEST_RESULT([kvmalloc], [
+               AC_MSG_RESULT(yes)
+               AC_DEFINE(HAVE_KVMALLOC, 1, [kvmalloc exists])
+       ],[
+               AC_MSG_RESULT(no)
+       ])
+])
diff --git a/config/kernel.m4 b/config/kernel.m4

index 1e39c15cac4fedf612d8370cc12c08579110c31f..4309d5456e2cd0aaced7b31725b777da3ec009f0 100644 (file)
--- a/config/kernel.m4
+++ b/config/kernel.m4
@@ -44,6 +44,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
         ZFS_AC_KERNEL_SRC_SCHED
         ZFS_AC_KERNEL_SRC_USLEEP_RANGE
         ZFS_AC_KERNEL_SRC_KMEM_CACHE
+       ZFS_AC_KERNEL_SRC_KVMALLOC
         ZFS_AC_KERNEL_SRC_WAIT
         ZFS_AC_KERNEL_SRC_INODE_TIMES
         ZFS_AC_KERNEL_SRC_INODE_LOCK
@@ -137,6 +138,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
         ZFS_AC_KERNEL_SCHED
         ZFS_AC_KERNEL_USLEEP_RANGE
         ZFS_AC_KERNEL_KMEM_CACHE
+       ZFS_AC_KERNEL_KVMALLOC
         ZFS_AC_KERNEL_WAIT
         ZFS_AC_KERNEL_INODE_TIMES
         ZFS_AC_KERNEL_INODE_LOCK
diff --git a/include/os/linux/spl/sys/kmem.h b/include/os/linux/spl/sys/kmem.h

index 986c7d244974c10a2c230ea73f0489d5fb37e9ba..1f51f5d98cdbc52c9f6a190287378cce09fe53aa 100644 (file)
--- a/include/os/linux/spl/sys/kmem.h
+++ b/include/os/linux/spl/sys/kmem.h
@@ -28,6 +28,8 @@
  #include <sys/debug.h>
  #include <linux/slab.h>
  #include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
  
  extern int kmem_debugging(void);
  extern char *kmem_vasprintf(const char *fmt, va_list ap);
@@ -47,6 +49,7 @@ extern void kmem_strfree(char *str);
  #define        KM_PUBLIC_MASK  (KM_SLEEP | KM_NOSLEEP | KM_PUSHPAGE)
  
  static int spl_fstrans_check(void);
+void *spl_kvmalloc(size_t size, gfp_t flags);
  
  /*
   * Convert a KM_* flags mask to its Linux GFP_* counterpart.  The conversion
diff --git a/module/os/linux/spl/spl-kmem-cache.c b/module/os/linux/spl/spl-kmem-cache.c

index 24bf084b9c102996c346dba3ed7f23bfd75effed..7e423100d6b08d8551345f8be35b3c332e1a5ae2 100644 (file)
--- a/module/os/linux/spl/spl-kmem-cache.c
+++ b/module/os/linux/spl/spl-kmem-cache.c
@@ -203,7 +203,23 @@ kv_alloc(spl_kmem_cache_t *skc, int size, int flags)
                 ASSERT(ISP2(size));
                 ptr = (void *)__get_free_pages(lflags, get_order(size));
         } else {
-               ptr = __vmalloc(size, lflags | __GFP_HIGHMEM, PAGE_KERNEL);
+               /*
+                * GFP_KERNEL allocations can safely use kvmalloc which may
+                * improve performance by avoiding a) high latency caused by
+                * vmalloc's on-access allocation, b) performance loss due to
+                * MMU memory address mapping and c) vmalloc locking overhead.
+                * This has the side-effect that the slab statistics will
+                * incorrectly report this as a vmem allocation, but that is
+                * purely cosmetic.
+                *
+                * For non-GFP_KERNEL allocations we stick to __vmalloc.
+                */
+               if ((lflags & GFP_KERNEL) == GFP_KERNEL) {
+                       ptr = spl_kvmalloc(size, lflags);
+               } else {
+                       ptr = __vmalloc(size, lflags | __GFP_HIGHMEM,
+                           PAGE_KERNEL);
+               }
         }
  
         /* Resulting allocated memory will be page aligned */
@@ -231,7 +247,7 @@ kv_free(spl_kmem_cache_t *skc, void *ptr, int size)
                 ASSERT(ISP2(size));
                 free_pages((unsigned long)ptr, get_order(size));
         } else {
-               vfree(ptr);
+               spl_kmem_free_impl(ptr, size);
         }
  }
  
diff --git a/module/os/linux/spl/spl-kmem.c b/module/os/linux/spl/spl-kmem.c

index 8a32929c8005aa75f963c777ab47eca63a62f032..d2799b5bd399ffce4ef0dfa2136feb7a6f32b9c6 100644 (file)
--- a/module/os/linux/spl/spl-kmem.c
+++ b/module/os/linux/spl/spl-kmem.c
@@ -133,6 +133,73 @@ kmem_strfree(char *str)
  }
  EXPORT_SYMBOL(kmem_strfree);
  
+/* Kernel compatibility for <4.13 */
+#ifndef __GFP_RETRY_MAYFAIL
+#define        __GFP_RETRY_MAYFAIL     __GFP_REPEAT
+#endif
+
+void *
+spl_kvmalloc(size_t size, gfp_t lflags)
+{
+#ifdef HAVE_KVMALLOC
+       /*
+        * GFP_KERNEL allocations can safely use kvmalloc which may
+        * improve performance by avoiding a) high latency caused by
+        * vmalloc's on-access allocation, b) performance loss due to
+        * MMU memory address mapping and c) vmalloc locking overhead.
+        * This has the side-effect that the slab statistics will
+        * incorrectly report this as a vmem allocation, but that is
+        * purely cosmetic.
+        */
+       if ((lflags & GFP_KERNEL) == GFP_KERNEL)
+               return (kvmalloc(size, lflags));
+#endif
+
+       gfp_t kmalloc_lflags = lflags;
+
+       if (size > PAGE_SIZE) {
+               /*
+                * We need to set __GFP_NOWARN here since spl_kvmalloc is not
+                * only called by spl_kmem_alloc_impl but can be called
+                * directly with custom lflags, too. In that case
+                * kmem_flags_convert does not get called, which would
+                * implicitly set __GFP_NOWARN.
+                */
+               kmalloc_lflags |= __GFP_NOWARN;
+
+               /*
+                * N.B. __GFP_RETRY_MAYFAIL is supported only for large
+                * e (>32kB) allocations.
+                *
+                * We have to override __GFP_RETRY_MAYFAIL by __GFP_NORETRY
+                * for !costly requests because there is no other way to tell
+                * the allocator that we want to fail rather than retry
+                * endlessly.
+                */
+               if (!(kmalloc_lflags & __GFP_RETRY_MAYFAIL) ||
+                   (size <= PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
+                       kmalloc_lflags |= __GFP_NORETRY;
+               }
+       }
+
+       /*
+        * We first try kmalloc - even for big sizes - and fall back to
+        * __vmalloc if that fails.
+        *
+        * For non-GFP_KERNEL allocations we always stick to kmalloc_node,
+        * and fail when kmalloc is not successful (returns NULL).
+        * We cannot fall back to __vmalloc in this case because __vmalloc
+        * internally uses GPF_KERNEL allocations.
+        */
+       void *ptr = kmalloc_node(size, kmalloc_lflags, NUMA_NO_NODE);
+       if (ptr || size <= PAGE_SIZE ||
+           (lflags & GFP_KERNEL) != GFP_KERNEL) {
+               return (ptr);
+       }
+
+       return (__vmalloc(size, lflags | __GFP_HIGHMEM, PAGE_KERNEL));
+}
+
  /*
   * General purpose unified implementation of kmem_alloc(). It is an
   * amalgamation of Linux and Illumos allocator design. It should never be
@@ -144,7 +211,6 @@ inline void *
  spl_kmem_alloc_impl(size_t size, int flags, int node)
  {
         gfp_t lflags = kmem_flags_convert(flags);
-       int use_vmem = 0;
         void *ptr;
  
         /*
@@ -178,7 +244,7 @@ spl_kmem_alloc_impl(size_t size, int flags, int node)
                  * impact performance so frequently manipulating the virtual
                  * address space is strongly discouraged.
                  */
-               if ((size > spl_kmem_alloc_max) || use_vmem) {
+               if (size > spl_kmem_alloc_max) {
                         if (flags & KM_VMEM) {
                                 ptr = __vmalloc(size, lflags | __GFP_HIGHMEM,
                                     PAGE_KERNEL);
@@ -186,20 +252,22 @@ spl_kmem_alloc_impl(size_t size, int flags, int node)
                                 return (NULL);
                         }
                 } else {
-                       ptr = kmalloc_node(size, lflags, node);
+                       if (flags & KM_VMEM) {
+                               ptr = spl_kvmalloc(size, lflags);
+                       } else {
+                               ptr = kmalloc_node(size, lflags, node);
+                       }
                 }
  
                 if (likely(ptr) || (flags & KM_NOSLEEP))
                         return (ptr);
  
                 /*
-                * For vmem_alloc() and vmem_zalloc() callers retry immediately
-                * using __vmalloc() which is unlikely to fail.
+                * Try hard to satisfy the allocation. However, when progress
+                * cannot be made, the allocation is allowed to fail.
                  */
-               if ((flags & KM_VMEM) && (use_vmem == 0))  {
-                       use_vmem = 1;
-                       continue;
-               }
+               if ((lflags & GFP_KERNEL) == GFP_KERNEL)
+                       lflags |= __GFP_RETRY_MAYFAIL;
  
                 /*
                  * Use cond_resched() instead of congestion_wait() to avoid
author	Michael Niewöhner <foss@mniewoehner.de>
	Sun, 21 Jul 2019 17:34:07 +0000 (19:34 +0200)
committer	Brian Behlendorf <behlendorf1@llnl.gov>
	Wed, 13 Nov 2019 18:05:10 +0000 (10:05 -0800)
config/kernel-kmem.m4		patch \| blob \| blame \| history
config/kernel.m4		patch \| blob \| blame \| history
include/os/linux/spl/sys/kmem.h		patch \| blob \| blame \| history
module/os/linux/spl/spl-kmem-cache.c		patch \| blob \| blame \| history
module/os/linux/spl/spl-kmem.c		patch \| blob \| blame \| history