]> git.proxmox.com Git - mirror_spl.git/blobdiff - module/spl/spl-kmem-cache.c
Fix more cstyle warnings
[mirror_spl.git] / module / spl / spl-kmem-cache.c
index 86c26ff05436e2ef8e25599052c78391d5b57438..c73a2fdc24784593221fd5855512b4ac0b807d90 100644 (file)
@@ -31,6 +31,7 @@
 #include <linux/swap.h>
 #include <linux/mm_compat.h>
 #include <linux/wait_compat.h>
+#include <linux/prefetch.h>
 
 /*
  * Within the scope of spl-kmem.c file the kmem_cache_* definitions
@@ -65,6 +66,7 @@
  * because it has been shown to improve responsiveness on low memory systems.
  * This policy may be changed by setting KMC_EXPIRE_AGE or KMC_EXPIRE_MEM.
  */
+/* BEGIN CSTYLED */
 unsigned int spl_kmem_cache_expire = KMC_EXPIRE_MEM;
 EXPORT_SYMBOL(spl_kmem_cache_expire);
 module_param(spl_kmem_cache_expire, uint, 0644);
@@ -87,7 +89,7 @@ MODULE_PARM_DESC(spl_kmem_cache_expire, "By age (0x1) or low memory (0x2)");
 unsigned int spl_kmem_cache_magazine_size = 0;
 module_param(spl_kmem_cache_magazine_size, uint, 0444);
 MODULE_PARM_DESC(spl_kmem_cache_magazine_size,
-       "Default magazine size (2-256), set automatically (0)\n");
+       "Default magazine size (2-256), set automatically (0)");
 
 /*
  * The default behavior is to report the number of objects remaining in the
@@ -133,8 +135,8 @@ MODULE_PARM_DESC(spl_kmem_cache_slab_limit,
  * have been deemed costly by the kernel.
  */
 unsigned int spl_kmem_cache_kmem_limit =
-    ((1 << (PAGE_ALLOC_COSTLY_ORDER - 1)) * PAGE_SIZE) /
-    SPL_KMEM_CACHE_OBJ_PER_SLAB;
+       ((1 << (PAGE_ALLOC_COSTLY_ORDER - 1)) * PAGE_SIZE) /
+       SPL_KMEM_CACHE_OBJ_PER_SLAB;
 module_param(spl_kmem_cache_kmem_limit, uint, 0644);
 MODULE_PARM_DESC(spl_kmem_cache_kmem_limit,
        "Objects less than N bytes use the kmalloc");
@@ -147,6 +149,7 @@ unsigned int spl_kmem_cache_kmem_threads = 4;
 module_param(spl_kmem_cache_kmem_threads, uint, 0444);
 MODULE_PARM_DESC(spl_kmem_cache_kmem_threads,
        "Number of spl_kmem_cache threads");
+/* END CSTYLED */
 
 /*
  * Slab allocation interfaces
@@ -200,7 +203,7 @@ kv_alloc(spl_kmem_cache_t *skc, int size, int flags)
                ASSERT(ISP2(size));
                ptr = (void *)__get_free_pages(lflags, get_order(size));
        } else {
-               ptr = spl_vmalloc(size, lflags | __GFP_HIGHMEM, PAGE_KERNEL);
+               ptr = __vmalloc(size, lflags | __GFP_HIGHMEM, PAGE_KERNEL);
        }
 
        /* Resulting allocated memory will be page aligned */
@@ -355,8 +358,9 @@ out:
        if (rc) {
                if (skc->skc_flags & KMC_OFFSLAB)
                        list_for_each_entry_safe(sko,
-                           n, &sks->sks_free_list, sko_list)
+                           n, &sks->sks_free_list, sko_list) {
                                kv_free(skc, sko->sko_addr, offslab_size);
+                       }
 
                kv_free(skc, base, skc->skc_slab_size);
                sks = NULL;
@@ -381,7 +385,6 @@ spl_slab_free(spl_kmem_slab_t *sks,
 
        skc = sks->sks_cache;
        ASSERT(skc->skc_magic == SKC_MAGIC);
-       ASSERT(spin_is_locked(&skc->skc_lock));
 
        /*
         * Update slab/objects counters in the cache, then remove the
@@ -582,7 +585,6 @@ __spl_cache_flush(spl_kmem_cache_t *skc, spl_kmem_magazine_t *skm, int flush)
 
        ASSERT(skc->skc_magic == SKC_MAGIC);
        ASSERT(skm->skm_magic == SKM_MAGIC);
-       ASSERT(spin_is_locked(&skc->skc_lock));
 
        for (i = 0; i < count; i++)
                spl_cache_shrink(skc, skm->skm_objs[i]);
@@ -805,15 +807,18 @@ spl_magazine_create(spl_kmem_cache_t *skc)
        if (skc->skc_flags & KMC_NOMAGAZINE)
                return (0);
 
+       skc->skc_mag = kzalloc(sizeof (spl_kmem_magazine_t *) *
+           num_possible_cpus(), kmem_flags_convert(KM_SLEEP));
        skc->skc_mag_size = spl_magazine_size(skc);
        skc->skc_mag_refill = (skc->skc_mag_size + 1) / 2;
 
-       for_each_online_cpu(i) {
+       for_each_possible_cpu(i) {
                skc->skc_mag[i] = spl_magazine_alloc(skc, i);
                if (!skc->skc_mag[i]) {
                        for (i--; i >= 0; i--)
                                spl_magazine_free(skc->skc_mag[i]);
 
+                       kfree(skc->skc_mag);
                        return (-ENOMEM);
                }
        }
@@ -833,11 +838,13 @@ spl_magazine_destroy(spl_kmem_cache_t *skc)
        if (skc->skc_flags & KMC_NOMAGAZINE)
                return;
 
-       for_each_online_cpu(i) {
+       for_each_possible_cpu(i) {
                skm = skc->skc_mag[i];
                spl_cache_flush(skc, skm, skm->skm_avail);
                spl_magazine_free(skm);
        }
+
+       kfree(skc->skc_mag);
 }
 
 /*
@@ -880,12 +887,6 @@ spl_kmem_cache_create(char *name, size_t size, size_t align,
 
        might_sleep();
 
-       /*
-        * Allocate memory for a new cache and initialize it.  Unfortunately,
-        * this usually ends up being a large allocation of ~32k because
-        * we need to allocate enough memory for the worst case number of
-        * cpus in the magazine, skc_mag[NR_CPUS].
-        */
        skc = kzalloc(sizeof (*skc), lflags);
        if (skc == NULL)
                return (NULL);
@@ -986,13 +987,32 @@ spl_kmem_cache_create(char *name, size_t size, size_t align,
                if (rc)
                        goto out;
        } else {
+               unsigned long slabflags = 0;
+
                if (size > (SPL_MAX_KMEM_ORDER_NR_PAGES * PAGE_SIZE)) {
                        rc = EINVAL;
                        goto out;
                }
 
-               skc->skc_linux_cache = kmem_cache_create(
-                   skc->skc_name, size, align, 0, NULL);
+#if defined(SLAB_USERCOPY)
+               /*
+                * Required for PAX-enabled kernels if the slab is to be
+                * used for coping between user and kernel space.
+                */
+               slabflags |= SLAB_USERCOPY;
+#endif
+
+#if defined(HAVE_KMEM_CACHE_CREATE_USERCOPY)
+       /*
+        * Newer grsec patchset uses kmem_cache_create_usercopy()
+        * instead of SLAB_USERCOPY flag
+        */
+       skc->skc_linux_cache = kmem_cache_create_usercopy(
+           skc->skc_name, size, align, slabflags, 0, size, NULL);
+#else
+       skc->skc_linux_cache = kmem_cache_create(
+           skc->skc_name, size, align, slabflags, NULL);
+#endif
                if (skc->skc_linux_cache == NULL) {
                        rc = ENOMEM;
                        goto out;
@@ -1106,7 +1126,6 @@ spl_cache_obj(spl_kmem_cache_t *skc, spl_kmem_slab_t *sks)
 
        ASSERT(skc->skc_magic == SKC_MAGIC);
        ASSERT(sks->sks_magic == SKS_MAGIC);
-       ASSERT(spin_is_locked(&skc->skc_lock));
 
        sko = list_entry(sks->sks_free_list.next, spl_kmem_obj_t, sko_list);
        ASSERT(sko->sko_magic == SKO_MAGIC);
@@ -1139,36 +1158,43 @@ spl_cache_obj(spl_kmem_cache_t *skc, spl_kmem_slab_t *sks)
  * It is responsible for allocating a new slab, linking it in to the list
  * of partial slabs, and then waking any waiters.
  */
-static void
-spl_cache_grow_work(void *data)
+static int
+__spl_cache_grow(spl_kmem_cache_t *skc, int flags)
 {
-       spl_kmem_alloc_t *ska = (spl_kmem_alloc_t *)data;
-       spl_kmem_cache_t *skc = ska->ska_cache;
        spl_kmem_slab_t *sks;
 
-#if defined(PF_MEMALLOC_NOIO)
-       unsigned noio_flag = memalloc_noio_save();
-       sks = spl_slab_alloc(skc, ska->ska_flags);
-       memalloc_noio_restore(noio_flag);
-#else
        fstrans_cookie_t cookie = spl_fstrans_mark();
-       sks = spl_slab_alloc(skc, ska->ska_flags);
+       sks = spl_slab_alloc(skc, flags);
        spl_fstrans_unmark(cookie);
-#endif
+
        spin_lock(&skc->skc_lock);
        if (sks) {
                skc->skc_slab_total++;
                skc->skc_obj_total += sks->sks_objs;
                list_add_tail(&sks->sks_list, &skc->skc_partial_list);
+
+               smp_mb__before_atomic();
+               clear_bit(KMC_BIT_DEADLOCKED, &skc->skc_flags);
+               smp_mb__after_atomic();
+               wake_up_all(&skc->skc_waitq);
        }
+       spin_unlock(&skc->skc_lock);
+
+       return (sks == NULL ? -ENOMEM : 0);
+}
+
+static void
+spl_cache_grow_work(void *data)
+{
+       spl_kmem_alloc_t *ska = (spl_kmem_alloc_t *)data;
+       spl_kmem_cache_t *skc = ska->ska_cache;
+
+       (void) __spl_cache_grow(skc, ska->ska_flags);
 
        atomic_dec(&skc->skc_ref);
        smp_mb__before_atomic();
        clear_bit(KMC_BIT_GROWING, &skc->skc_flags);
-       clear_bit(KMC_BIT_DEADLOCKED, &skc->skc_flags);
        smp_mb__after_atomic();
-       wake_up_all(&skc->skc_waitq);
-       spin_unlock(&skc->skc_lock);
 
        kfree(ska);
 }
@@ -1208,6 +1234,21 @@ spl_cache_grow(spl_kmem_cache_t *skc, int flags, void **obj)
                return (rc ? rc : -EAGAIN);
        }
 
+       /*
+        * To reduce the overhead of context switch and improve NUMA locality,
+        * it tries to allocate a new slab in the current process context with
+        * KM_NOSLEEP flag. If it fails, it will launch a new taskq to do the
+        * allocation.
+        *
+        * However, this can't be applied to KVM_VMEM due to a bug that
+        * __vmalloc() doesn't honor gfp flags in page table allocation.
+        */
+       if (!(skc->skc_flags & KMC_VMEM)) {
+               rc = __spl_cache_grow(skc, flags | KM_NOSLEEP);
+               if (rc == 0)
+                       return (0);
+       }
+
        /*
         * This is handled by dispatching a work request to the global work
         * queue.  This allows us to asynchronously allocate a new slab while
@@ -1355,7 +1396,6 @@ spl_cache_shrink(spl_kmem_cache_t *skc, void *obj)
        spl_kmem_obj_t *sko = NULL;
 
        ASSERT(skc->skc_magic == SKC_MAGIC);
-       ASSERT(spin_is_locked(&skc->skc_lock));
 
        sko = spl_sko_from_obj(skc, obj);
        ASSERT(sko->sko_magic == SKO_MAGIC);
@@ -1403,8 +1443,6 @@ spl_kmem_cache_alloc(spl_kmem_cache_t *skc, int flags)
        ASSERT(skc->skc_magic == SKC_MAGIC);
        ASSERT(!test_bit(KMC_BIT_DESTROY, &skc->skc_flags));
 
-       atomic_inc(&skc->skc_ref);
-
        /*
         * Allocate directly from a Linux slab.  All optimizations are left
         * to the underlying cache we only need to guarantee that KM_SLEEP
@@ -1457,8 +1495,6 @@ ret:
                        prefetchw(obj);
        }
 
-       atomic_dec(&skc->skc_ref);
-
        return (obj);
 }
 EXPORT_SYMBOL(spl_kmem_cache_alloc);
@@ -1479,7 +1515,6 @@ spl_kmem_cache_free(spl_kmem_cache_t *skc, void *obj)
 
        ASSERT(skc->skc_magic == SKC_MAGIC);
        ASSERT(!test_bit(KMC_BIT_DESTROY, &skc->skc_flags));
-       atomic_inc(&skc->skc_ref);
 
        /*
         * Run the destructor
@@ -1492,7 +1527,7 @@ spl_kmem_cache_free(spl_kmem_cache_t *skc, void *obj)
         */
        if (skc->skc_flags & KMC_SLAB) {
                kmem_cache_free(skc->skc_linux_cache, obj);
-               goto out;
+               return;
        }
 
        /*
@@ -1507,7 +1542,7 @@ spl_kmem_cache_free(spl_kmem_cache_t *skc, void *obj)
                spin_unlock(&skc->skc_lock);
 
                if (do_emergency && (spl_emergency_free(skc, obj) == 0))
-                       goto out;
+                       return;
        }
 
        local_irq_save(flags);
@@ -1538,8 +1573,6 @@ spl_kmem_cache_free(spl_kmem_cache_t *skc, void *obj)
 
        if (do_reclaim)
                spl_slab_reclaim(skc);
-out:
-       atomic_dec(&skc->skc_ref);
 }
 EXPORT_SYMBOL(spl_kmem_cache_free);
 
@@ -1632,16 +1665,11 @@ spl_kmem_cache_reap_now(spl_kmem_cache_t *skc, int count)
        atomic_inc(&skc->skc_ref);
 
        /*
-        * Execute the registered reclaim callback if it exists.  The
-        * per-cpu caches will be drained when is set KMC_EXPIRE_MEM.
+        * Execute the registered reclaim callback if it exists.
         */
        if (skc->skc_flags & KMC_SLAB) {
                if (skc->skc_reclaim)
                        skc->skc_reclaim(skc->skc_private);
-
-               if (spl_kmem_cache_expire & KMC_EXPIRE_MEM)
-                       kmem_cache_shrink(skc->skc_linux_cache);
-
                goto out;
        }