KMC_BIT_QCACHE = 4, /* XXX: Unsupported */
KMC_BIT_KMEM = 5, /* Use kmem cache */
KMC_BIT_VMEM = 6, /* Use vmem cache */
- KMC_BIT_OFFSLAB = 7, /* Objects not on slab */
- KMC_BIT_NOEMERGENCY = 8, /* Disable emergency objects */
+ KMC_BIT_SLAB = 7, /* Use Linux slab cache */
+ KMC_BIT_OFFSLAB = 8, /* Objects not on slab */
+ KMC_BIT_NOEMERGENCY = 9, /* Disable emergency objects */
KMC_BIT_DEADLOCKED = 14, /* Deadlock detected */
KMC_BIT_GROWING = 15, /* Growing in progress */
KMC_BIT_REAPING = 16, /* Reaping in progress */
#define KMC_QCACHE (1 << KMC_BIT_QCACHE)
#define KMC_KMEM (1 << KMC_BIT_KMEM)
#define KMC_VMEM (1 << KMC_BIT_VMEM)
+#define KMC_SLAB (1 << KMC_BIT_SLAB)
#define KMC_OFFSLAB (1 << KMC_BIT_OFFSLAB)
#define KMC_NOEMERGENCY (1 << KMC_BIT_NOEMERGENCY)
#define KMC_DEADLOCKED (1 << KMC_BIT_DEADLOCKED)
spl_kmem_reclaim_t skc_reclaim; /* Reclaimator */
void *skc_private; /* Private data */
void *skc_vmp; /* Unused */
+ struct kmem_cache *skc_linux_cache; /* Linux slab cache if used */
unsigned long skc_flags; /* Flags */
uint32_t skc_obj_size; /* Object size */
uint32_t skc_obj_align; /* Object alignment */
#define kmem_virt(ptr) (((ptr) >= (void *)VMALLOC_START) && \
((ptr) < (void *)VMALLOC_END))
+/*
+ * Allow custom slab allocation flags to be set for KMC_SLAB based caches.
+ * One use for this function is to ensure the __GFP_COMP flag is part of
+ * the default allocation mask which ensures higher order allocations are
+ * properly refcounted. This flag was added to the default ->allocflags
+ * as of Linux 3.11.
+ */
+static inline void
+kmem_cache_set_allocflags(spl_kmem_cache_t *skc, gfp_t flags)
+{
+ if (skc->skc_linux_cache == NULL)
+ return;
+
+#if defined(HAVE_KMEM_CACHE_ALLOCFLAGS)
+ skc->skc_linux_cache->allocflags |= flags;
+#elif defined(HAVE_KMEM_CACHE_GFPFLAGS)
+ skc->skc_linux_cache->gfpflags |= flags;
+#endif
+}
+
#endif /* _SPL_KMEM_H */
#define SS_DEBUG_SUBSYS SS_KMEM
+/*
+ * Within the scope of spl-kmem.c file the kmem_cache_* definitions
+ * are removed to allow access to the real Linux slab allocator.
+ */
+#undef kmem_cache_destroy
+#undef kmem_cache_create
+#undef kmem_cache_alloc
+#undef kmem_cache_free
+
+
/*
* Cache expiration was implemented because it was part of the default Solaris
* kmem_cache behavior. The idea is that per-cpu objects which haven't been
module_param(spl_kmem_cache_max_size, uint, 0644);
MODULE_PARM_DESC(spl_kmem_cache_max_size, "Maximum size of slab in MB");
+unsigned int spl_kmem_cache_slab_limit = 0;
+module_param(spl_kmem_cache_slab_limit, uint, 0644);
+MODULE_PARM_DESC(spl_kmem_cache_slab_limit,
+ "Objects less than N bytes use the Linux slab");
+
+unsigned int spl_kmem_cache_kmem_limit = (PAGE_SIZE / 4);
+module_param(spl_kmem_cache_kmem_limit, uint, 0644);
+MODULE_PARM_DESC(spl_kmem_cache_kmem_limit,
+ "Objects less than N bytes use the kmalloc");
+
/*
* The minimum amount of memory measured in pages to be free at all
* times on the system. This is similar to Linux's zone->pages_min
return;
atomic_inc(&skc->skc_ref);
- spl_on_each_cpu(spl_magazine_age, skc, 1);
+
+ if (!(skc->skc_flags & KMC_NOMAGAZINE))
+ spl_on_each_cpu(spl_magazine_age, skc, 1);
+
spl_slab_reclaim(skc, skc->skc_reap, 0);
while (!test_bit(KMC_BIT_DESTROY, &skc->skc_flags) && !id) {
int i;
SENTRY;
+ if (skc->skc_flags & KMC_NOMAGAZINE)
+ SRETURN(0);
+
skc->skc_mag_size = spl_magazine_size(skc);
skc->skc_mag_refill = (skc->skc_mag_size + 1) / 2;
int i;
SENTRY;
+ if (skc->skc_flags & KMC_NOMAGAZINE) {
+ SEXIT;
+ return;
+ }
+
for_each_online_cpu(i) {
skm = skc->skc_mag[i];
spl_cache_flush(skc, skm, skm->skm_avail);
* flags
* KMC_NOTOUCH Disable cache object aging (unsupported)
* KMC_NODEBUG Disable debugging (unsupported)
- * KMC_NOMAGAZINE Disable magazine (unsupported)
* KMC_NOHASH Disable hashing (unsupported)
* KMC_QCACHE Disable qcache (unsupported)
+ * KMC_NOMAGAZINE Enabled for kmem/vmem, Disabled for Linux slab
* KMC_KMEM Force kmem backed cache
* KMC_VMEM Force vmem backed cache
+ * KMC_SLAB Force Linux slab backed cache
* KMC_OFFSLAB Locate objects off the slab
*/
spl_kmem_cache_t *
skc->skc_reclaim = reclaim;
skc->skc_private = priv;
skc->skc_vmp = vmp;
+ skc->skc_linux_cache = NULL;
skc->skc_flags = flags;
skc->skc_obj_size = size;
skc->skc_obj_align = SPL_KMEM_CACHE_ALIGN;
skc->skc_obj_emergency = 0;
skc->skc_obj_emergency_max = 0;
+ /*
+ * Verify the requested alignment restriction is sane.
+ */
if (align) {
VERIFY(ISP2(align));
- VERIFY3U(align, >=, SPL_KMEM_CACHE_ALIGN); /* Min alignment */
- VERIFY3U(align, <=, PAGE_SIZE); /* Max alignment */
+ VERIFY3U(align, >=, SPL_KMEM_CACHE_ALIGN);
+ VERIFY3U(align, <=, PAGE_SIZE);
skc->skc_obj_align = align;
}
- /* If none passed select a cache type based on object size */
- if (!(skc->skc_flags & (KMC_KMEM | KMC_VMEM))) {
- if (spl_obj_size(skc) < (PAGE_SIZE / 8))
+ /*
+ * When no specific type of slab is requested (kmem, vmem, or
+ * linuxslab) then select a cache type based on the object size
+ * and default tunables.
+ */
+ if (!(skc->skc_flags & (KMC_KMEM | KMC_VMEM | KMC_SLAB))) {
+
+ /*
+ * Objects smaller than spl_kmem_cache_slab_limit can
+ * use the Linux slab for better space-efficiency. By
+ * default this functionality is disabled until its
+ * performance characters are fully understood.
+ */
+ if (spl_kmem_cache_slab_limit &&
+ size <= (size_t)spl_kmem_cache_slab_limit)
+ skc->skc_flags |= KMC_SLAB;
+
+ /*
+ * Small objects, less than spl_kmem_cache_kmem_limit per
+ * object should use kmem because their slabs are small.
+ */
+ else if (spl_obj_size(skc) <= spl_kmem_cache_kmem_limit)
skc->skc_flags |= KMC_KMEM;
+
+ /*
+ * All other objects are considered large and are placed
+ * on vmem backed slabs.
+ */
else
skc->skc_flags |= KMC_VMEM;
}
- rc = spl_slab_size(skc, &skc->skc_slab_objs, &skc->skc_slab_size);
- if (rc)
- SGOTO(out, rc);
+ /*
+ * Given the type of slab allocate the required resources.
+ */
+ if (skc->skc_flags & (KMC_KMEM | KMC_VMEM)) {
+ rc = spl_slab_size(skc,
+ &skc->skc_slab_objs, &skc->skc_slab_size);
+ if (rc)
+ SGOTO(out, rc);
+
+ rc = spl_magazine_create(skc);
+ if (rc)
+ SGOTO(out, rc);
+ } else {
+ skc->skc_linux_cache = kmem_cache_create(
+ skc->skc_name, size, align, 0, NULL);
+ if (skc->skc_linux_cache == NULL)
+ SGOTO(out, rc = ENOMEM);
- rc = spl_magazine_create(skc);
- if (rc)
- SGOTO(out, rc);
+ kmem_cache_set_allocflags(skc, __GFP_COMP);
+ skc->skc_flags |= KMC_NOMAGAZINE;
+ }
if (spl_kmem_cache_expire & KMC_EXPIRE_AGE)
skc->skc_taskqid = taskq_dispatch_delay(spl_kmem_cache_taskq,
SENTRY;
ASSERT(skc->skc_magic == SKC_MAGIC);
+ ASSERT(skc->skc_flags & (KMC_KMEM | KMC_VMEM | KMC_SLAB));
down_write(&spl_kmem_cache_sem);
list_del_init(&skc->skc_list);
* cache reaping action which races with this destroy. */
wait_event(wq, atomic_read(&skc->skc_ref) == 0);
- spl_magazine_destroy(skc);
- spl_slab_reclaim(skc, 0, 1);
+ if (skc->skc_flags & (KMC_KMEM | KMC_VMEM)) {
+ spl_magazine_destroy(skc);
+ spl_slab_reclaim(skc, 0, 1);
+ } else {
+ ASSERT(skc->skc_flags & KMC_SLAB);
+ kmem_cache_destroy(skc->skc_linux_cache);
+ }
+
spin_lock(&skc->skc_lock);
/* Validate there are no objects in use and free all the
}
/*
- * No available objects on any slabs, create a new slab.
+ * No available objects on any slabs, create a new slab. Note that this
+ * functionality is disabled for KMC_SLAB caches which are backed by the
+ * Linux slab.
*/
static int
spl_cache_grow(spl_kmem_cache_t *skc, int flags, void **obj)
SENTRY;
ASSERT(skc->skc_magic == SKC_MAGIC);
+ ASSERT((skc->skc_flags & KMC_SLAB) == 0);
might_sleep();
*obj = NULL;
ASSERT(skc->skc_magic == SKC_MAGIC);
ASSERT(!test_bit(KMC_BIT_DESTROY, &skc->skc_flags));
ASSERT(flags & KM_SLEEP);
+
atomic_inc(&skc->skc_ref);
+
+ /*
+ * Allocate directly from a Linux slab. All optimizations are left
+ * to the underlying cache we only need to guarantee that KM_SLEEP
+ * callers will never fail.
+ */
+ if (skc->skc_flags & KMC_SLAB) {
+ struct kmem_cache *slc = skc->skc_linux_cache;
+
+ do {
+ obj = kmem_cache_alloc(slc, flags | __GFP_COMP);
+ if (obj && skc->skc_ctor)
+ skc->skc_ctor(obj, skc->skc_private, flags);
+
+ } while ((obj == NULL) && !(flags & KM_NOSLEEP));
+
+ atomic_dec(&skc->skc_ref);
+ SRETURN(obj);
+ }
+
local_irq_disable();
restart:
ASSERT(!test_bit(KMC_BIT_DESTROY, &skc->skc_flags));
atomic_inc(&skc->skc_ref);
+ /*
+ * Free the object from the Linux underlying Linux slab.
+ */
+ if (skc->skc_flags & KMC_SLAB) {
+ if (skc->skc_dtor)
+ skc->skc_dtor(obj, skc->skc_private);
+
+ kmem_cache_free(skc->skc_linux_cache, obj);
+ goto out;
+ }
+
/*
* Only virtual slabs may have emergency objects and these objects
* are guaranteed to have physical addresses. They must be removed
ASSERT(skc->skc_magic == SKC_MAGIC);
ASSERT(!test_bit(KMC_BIT_DESTROY, &skc->skc_flags));
- /* Prevent concurrent cache reaping when contended */
- if (test_and_set_bit(KMC_BIT_REAPING, &skc->skc_flags)) {
- SEXIT;
- return;
+ atomic_inc(&skc->skc_ref);
+
+ /*
+ * Execute the registered reclaim callback if it exists. The
+ * per-cpu caches will be drained when is set KMC_EXPIRE_MEM.
+ */
+ if (skc->skc_flags & KMC_SLAB) {
+ if (skc->skc_reclaim)
+ skc->skc_reclaim(skc->skc_private);
+
+ if (spl_kmem_cache_expire & KMC_EXPIRE_MEM)
+ kmem_cache_shrink(skc->skc_linux_cache);
+
+ SGOTO(out, 0);
}
- atomic_inc(&skc->skc_ref);
+ /*
+ * Prevent concurrent cache reaping when contended.
+ */
+ if (test_and_set_bit(KMC_BIT_REAPING, &skc->skc_flags))
+ SGOTO(out, 0);
/*
* When a reclaim function is available it may be invoked repeatedly
clear_bit(KMC_BIT_REAPING, &skc->skc_flags);
smp_mb__after_clear_bit();
wake_up_bit(&skc->skc_flags, KMC_BIT_REAPING);
-
+out:
atomic_dec(&skc->skc_ref);
SEXIT;
{
int j;
- splat_vprint(file, name,
- "%s cache objects %d, slabs %u/%u objs %u/%u mags ",
- kcp->kcp_cache->skc_name, kcp->kcp_count,
+ splat_vprint(file, name, "%s cache objects %d",
+ kcp->kcp_cache->skc_name, kcp->kcp_count);
+
+ if (kcp->kcp_cache->skc_flags & (KMC_KMEM | KMC_VMEM)) {
+ splat_vprint(file, name, ", slabs %u/%u objs %u/%u",
(unsigned)kcp->kcp_cache->skc_slab_alloc,
(unsigned)kcp->kcp_cache->skc_slab_total,
(unsigned)kcp->kcp_cache->skc_obj_alloc,
(unsigned)kcp->kcp_cache->skc_obj_total);
- for_each_online_cpu(j)
- splat_print(file, "%u/%u ",
- kcp->kcp_cache->skc_mag[j]->skm_avail,
- kcp->kcp_cache->skc_mag[j]->skm_size);
+ if (!(kcp->kcp_cache->skc_flags & KMC_NOMAGAZINE)) {
+ splat_vprint(file, name, "%s", "mags");
+
+ for_each_online_cpu(j)
+ splat_print(file, "%u/%u ",
+ kcp->kcp_cache->skc_mag[j]->skm_avail,
+ kcp->kcp_cache->skc_mag[j]->skm_size);
+ }
+ }
splat_print(file, "%s\n", "");
}
kmem_cache_reap_now(kcp->kcp_cache);
splat_kmem_cache_test_debug(file, SPLAT_KMEM_TEST8_NAME, kcp);
- if (kcp->kcp_cache->skc_obj_total == 0)
+ if (kcp->kcp_count == 0)
break;
set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(HZ / 10);
}
- if (kcp->kcp_cache->skc_obj_total == 0) {
+ if (kcp->kcp_count == 0) {
splat_vprint(file, SPLAT_KMEM_TEST8_NAME,
"Successfully created %d objects "
"in cache %s and reclaimed them\n",
} else {
splat_vprint(file, SPLAT_KMEM_TEST8_NAME,
"Failed to reclaim %u/%d objects from cache %s\n",
- (unsigned)kcp->kcp_cache->skc_obj_total,
+ (unsigned)kcp->kcp_count,
SPLAT_KMEM_OBJ_COUNT, SPLAT_KMEM_CACHE_NAME);
rc = -ENOMEM;
}
for (i = 0; i < 60; i++) {
splat_kmem_cache_test_debug(file, SPLAT_KMEM_TEST9_NAME, kcp);
- if (kcp->kcp_cache->skc_obj_total == 0)
+ if (kcp->kcp_count == 0)
break;
set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(HZ);
}
- if (kcp->kcp_cache->skc_obj_total == 0) {
+ if (kcp->kcp_count == 0) {
splat_vprint(file, SPLAT_KMEM_TEST9_NAME,
"Successfully created %d objects "
"in cache %s and reclaimed them\n",
} else {
splat_vprint(file, SPLAT_KMEM_TEST9_NAME,
"Failed to reclaim %u/%d objects from cache %s\n",
- (unsigned)kcp->kcp_cache->skc_obj_total, count,
+ (unsigned)kcp->kcp_count, count,
SPLAT_KMEM_CACHE_NAME);
rc = -ENOMEM;
}