#include <linux/swap.h>
#include <linux/mm_compat.h>
#include <linux/wait_compat.h>
+#include <linux/prefetch.h>
/*
* Within the scope of spl-kmem.c file the kmem_cache_* definitions
* because it has been shown to improve responsiveness on low memory systems.
* This policy may be changed by setting KMC_EXPIRE_AGE or KMC_EXPIRE_MEM.
*/
+/* BEGIN CSTYLED */
unsigned int spl_kmem_cache_expire = KMC_EXPIRE_MEM;
EXPORT_SYMBOL(spl_kmem_cache_expire);
module_param(spl_kmem_cache_expire, uint, 0644);
unsigned int spl_kmem_cache_magazine_size = 0;
module_param(spl_kmem_cache_magazine_size, uint, 0444);
MODULE_PARM_DESC(spl_kmem_cache_magazine_size,
- "Default magazine size (2-256), set automatically (0)\n");
+ "Default magazine size (2-256), set automatically (0)");
/*
* The default behavior is to report the number of objects remaining in the
* have been deemed costly by the kernel.
*/
unsigned int spl_kmem_cache_kmem_limit =
- ((1 << (PAGE_ALLOC_COSTLY_ORDER - 1)) * PAGE_SIZE) /
- SPL_KMEM_CACHE_OBJ_PER_SLAB;
+ ((1 << (PAGE_ALLOC_COSTLY_ORDER - 1)) * PAGE_SIZE) /
+ SPL_KMEM_CACHE_OBJ_PER_SLAB;
module_param(spl_kmem_cache_kmem_limit, uint, 0644);
MODULE_PARM_DESC(spl_kmem_cache_kmem_limit,
"Objects less than N bytes use the kmalloc");
module_param(spl_kmem_cache_kmem_threads, uint, 0444);
MODULE_PARM_DESC(spl_kmem_cache_kmem_threads,
"Number of spl_kmem_cache threads");
+/* END CSTYLED */
/*
* Slab allocation interfaces
ASSERT(ISP2(size));
ptr = (void *)__get_free_pages(lflags, get_order(size));
} else {
- ptr = spl_vmalloc(size, lflags | __GFP_HIGHMEM, PAGE_KERNEL);
+ ptr = __vmalloc(size, lflags | __GFP_HIGHMEM, PAGE_KERNEL);
}
/* Resulting allocated memory will be page aligned */
if (rc) {
if (skc->skc_flags & KMC_OFFSLAB)
list_for_each_entry_safe(sko,
- n, &sks->sks_free_list, sko_list)
+ n, &sks->sks_free_list, sko_list) {
kv_free(skc, sko->sko_addr, offslab_size);
+ }
kv_free(skc, base, skc->skc_slab_size);
sks = NULL;
skc = sks->sks_cache;
ASSERT(skc->skc_magic == SKC_MAGIC);
- ASSERT(spin_is_locked(&skc->skc_lock));
/*
* Update slab/objects counters in the cache, then remove the
ASSERT(skc->skc_magic == SKC_MAGIC);
ASSERT(skm->skm_magic == SKM_MAGIC);
- ASSERT(spin_is_locked(&skc->skc_lock));
for (i = 0; i < count; i++)
spl_cache_shrink(skc, skm->skm_objs[i]);
if (skc->skc_flags & KMC_NOMAGAZINE)
return (0);
+ skc->skc_mag = kzalloc(sizeof (spl_kmem_magazine_t *) *
+ num_possible_cpus(), kmem_flags_convert(KM_SLEEP));
skc->skc_mag_size = spl_magazine_size(skc);
skc->skc_mag_refill = (skc->skc_mag_size + 1) / 2;
- for_each_online_cpu(i) {
+ for_each_possible_cpu(i) {
skc->skc_mag[i] = spl_magazine_alloc(skc, i);
if (!skc->skc_mag[i]) {
for (i--; i >= 0; i--)
spl_magazine_free(skc->skc_mag[i]);
+ kfree(skc->skc_mag);
return (-ENOMEM);
}
}
if (skc->skc_flags & KMC_NOMAGAZINE)
return;
- for_each_online_cpu(i) {
+ for_each_possible_cpu(i) {
skm = skc->skc_mag[i];
spl_cache_flush(skc, skm, skm->skm_avail);
spl_magazine_free(skm);
}
+
+ kfree(skc->skc_mag);
}
/*
might_sleep();
- /*
- * Allocate memory for a new cache and initialize it. Unfortunately,
- * this usually ends up being a large allocation of ~32k because
- * we need to allocate enough memory for the worst case number of
- * cpus in the magazine, skc_mag[NR_CPUS].
- */
skc = kzalloc(sizeof (*skc), lflags);
if (skc == NULL)
return (NULL);
if (rc)
goto out;
} else {
+ unsigned long slabflags = 0;
+
if (size > (SPL_MAX_KMEM_ORDER_NR_PAGES * PAGE_SIZE)) {
rc = EINVAL;
goto out;
}
- skc->skc_linux_cache = kmem_cache_create(
- skc->skc_name, size, align, 0, NULL);
+#if defined(SLAB_USERCOPY)
+ /*
+ * Required for PAX-enabled kernels if the slab is to be
+ * used for coping between user and kernel space.
+ */
+ slabflags |= SLAB_USERCOPY;
+#endif
+
+#if defined(HAVE_KMEM_CACHE_CREATE_USERCOPY)
+ /*
+ * Newer grsec patchset uses kmem_cache_create_usercopy()
+ * instead of SLAB_USERCOPY flag
+ */
+ skc->skc_linux_cache = kmem_cache_create_usercopy(
+ skc->skc_name, size, align, slabflags, 0, size, NULL);
+#else
+ skc->skc_linux_cache = kmem_cache_create(
+ skc->skc_name, size, align, slabflags, NULL);
+#endif
if (skc->skc_linux_cache == NULL) {
rc = ENOMEM;
goto out;
ASSERT(skc->skc_magic == SKC_MAGIC);
ASSERT(sks->sks_magic == SKS_MAGIC);
- ASSERT(spin_is_locked(&skc->skc_lock));
sko = list_entry(sks->sks_free_list.next, spl_kmem_obj_t, sko_list);
ASSERT(sko->sko_magic == SKO_MAGIC);
* It is responsible for allocating a new slab, linking it in to the list
* of partial slabs, and then waking any waiters.
*/
-static void
-spl_cache_grow_work(void *data)
+static int
+__spl_cache_grow(spl_kmem_cache_t *skc, int flags)
{
- spl_kmem_alloc_t *ska = (spl_kmem_alloc_t *)data;
- spl_kmem_cache_t *skc = ska->ska_cache;
spl_kmem_slab_t *sks;
-#if defined(PF_MEMALLOC_NOIO)
- unsigned noio_flag = memalloc_noio_save();
- sks = spl_slab_alloc(skc, ska->ska_flags);
- memalloc_noio_restore(noio_flag);
-#else
fstrans_cookie_t cookie = spl_fstrans_mark();
- sks = spl_slab_alloc(skc, ska->ska_flags);
+ sks = spl_slab_alloc(skc, flags);
spl_fstrans_unmark(cookie);
-#endif
+
spin_lock(&skc->skc_lock);
if (sks) {
skc->skc_slab_total++;
skc->skc_obj_total += sks->sks_objs;
list_add_tail(&sks->sks_list, &skc->skc_partial_list);
+
+ smp_mb__before_atomic();
+ clear_bit(KMC_BIT_DEADLOCKED, &skc->skc_flags);
+ smp_mb__after_atomic();
+ wake_up_all(&skc->skc_waitq);
}
+ spin_unlock(&skc->skc_lock);
+
+ return (sks == NULL ? -ENOMEM : 0);
+}
+
+static void
+spl_cache_grow_work(void *data)
+{
+ spl_kmem_alloc_t *ska = (spl_kmem_alloc_t *)data;
+ spl_kmem_cache_t *skc = ska->ska_cache;
+
+ (void) __spl_cache_grow(skc, ska->ska_flags);
atomic_dec(&skc->skc_ref);
smp_mb__before_atomic();
clear_bit(KMC_BIT_GROWING, &skc->skc_flags);
- clear_bit(KMC_BIT_DEADLOCKED, &skc->skc_flags);
smp_mb__after_atomic();
- wake_up_all(&skc->skc_waitq);
- spin_unlock(&skc->skc_lock);
kfree(ska);
}
return (rc ? rc : -EAGAIN);
}
+ /*
+ * To reduce the overhead of context switch and improve NUMA locality,
+ * it tries to allocate a new slab in the current process context with
+ * KM_NOSLEEP flag. If it fails, it will launch a new taskq to do the
+ * allocation.
+ *
+ * However, this can't be applied to KVM_VMEM due to a bug that
+ * __vmalloc() doesn't honor gfp flags in page table allocation.
+ */
+ if (!(skc->skc_flags & KMC_VMEM)) {
+ rc = __spl_cache_grow(skc, flags | KM_NOSLEEP);
+ if (rc == 0)
+ return (0);
+ }
+
/*
* This is handled by dispatching a work request to the global work
* queue. This allows us to asynchronously allocate a new slab while
spl_kmem_obj_t *sko = NULL;
ASSERT(skc->skc_magic == SKC_MAGIC);
- ASSERT(spin_is_locked(&skc->skc_lock));
sko = spl_sko_from_obj(skc, obj);
ASSERT(sko->sko_magic == SKO_MAGIC);
ASSERT(skc->skc_magic == SKC_MAGIC);
ASSERT(!test_bit(KMC_BIT_DESTROY, &skc->skc_flags));
- atomic_inc(&skc->skc_ref);
-
/*
* Allocate directly from a Linux slab. All optimizations are left
* to the underlying cache we only need to guarantee that KM_SLEEP
prefetchw(obj);
}
- atomic_dec(&skc->skc_ref);
-
return (obj);
}
EXPORT_SYMBOL(spl_kmem_cache_alloc);
ASSERT(skc->skc_magic == SKC_MAGIC);
ASSERT(!test_bit(KMC_BIT_DESTROY, &skc->skc_flags));
- atomic_inc(&skc->skc_ref);
/*
* Run the destructor
*/
if (skc->skc_flags & KMC_SLAB) {
kmem_cache_free(skc->skc_linux_cache, obj);
- goto out;
+ return;
}
/*
spin_unlock(&skc->skc_lock);
if (do_emergency && (spl_emergency_free(skc, obj) == 0))
- goto out;
+ return;
}
local_irq_save(flags);
if (do_reclaim)
spl_slab_reclaim(skc);
-out:
- atomic_dec(&skc->skc_ref);
}
EXPORT_SYMBOL(spl_kmem_cache_free);
atomic_inc(&skc->skc_ref);
/*
- * Execute the registered reclaim callback if it exists. The
- * per-cpu caches will be drained when is set KMC_EXPIRE_MEM.
+ * Execute the registered reclaim callback if it exists.
*/
if (skc->skc_flags & KMC_SLAB) {
if (skc->skc_reclaim)
skc->skc_reclaim(skc->skc_private);
-
- if (spl_kmem_cache_expire & KMC_EXPIRE_MEM)
- kmem_cache_shrink(skc->skc_linux_cache);
-
goto out;
}
init_rwsem(&spl_kmem_cache_sem);
INIT_LIST_HEAD(&spl_kmem_cache_list);
spl_kmem_cache_taskq = taskq_create("spl_kmem_cache",
- spl_kmem_cache_kmem_threads, defclsyspri,
+ spl_kmem_cache_kmem_threads, maxclsyspri,
spl_kmem_cache_kmem_threads * 8, INT_MAX,
TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
spl_register_shrinker(&spl_kmem_cache_shrinker);