[mirror_spl-debian.git] / modules / spl / spl-kmem.c

/*
 *  This file is part of the SPL: Solaris Porting Layer.
 *
 *  Copyright (c) 2008 Lawrence Livermore National Security, LLC.
 *  Produced at Lawrence Livermore National Laboratory
 *  Written by:
 *          Brian Behlendorf <behlendorf1@llnl.gov>,
 *          Herb Wartens <wartens2@llnl.gov>,
 *          Jim Garlick <garlick@llnl.gov>
 *  UCRL-CODE-235197
 *
 *  This is free software; you can redistribute it and/or modify it
 *  under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This is distributed in the hope that it will be useful, but WITHOUT
 *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 *  for more details.
 *
 *  You should have received a copy of the GNU General Public License along
 *  with this program; if not, write to the Free Software Foundation, Inc.,
 *  51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
 */

#include <sys/kmem.h>

#ifdef DEBUG_SUBSYSTEM
#undef DEBUG_SUBSYSTEM
#endif

#define DEBUG_SUBSYSTEM S_KMEM

/*
 * Memory allocation interfaces
 */
#ifdef DEBUG_KMEM
/* Shim layer memory accounting */
atomic64_t kmem_alloc_used;
unsigned long kmem_alloc_max = 0;
atomic64_t vmem_alloc_used;
unsigned long vmem_alloc_max = 0;
int kmem_warning_flag = 1;
atomic64_t kmem_cache_alloc_failed;

spinlock_t kmem_lock;
struct hlist_head kmem_table[KMEM_TABLE_SIZE];
struct list_head kmem_list;

spinlock_t vmem_lock;
struct hlist_head vmem_table[VMEM_TABLE_SIZE];
struct list_head vmem_list;

EXPORT_SYMBOL(kmem_alloc_used);
EXPORT_SYMBOL(kmem_alloc_max);
EXPORT_SYMBOL(vmem_alloc_used);
EXPORT_SYMBOL(vmem_alloc_max);
EXPORT_SYMBOL(kmem_warning_flag);

EXPORT_SYMBOL(kmem_lock);
EXPORT_SYMBOL(kmem_table);
EXPORT_SYMBOL(kmem_list);

EXPORT_SYMBOL(vmem_lock);
EXPORT_SYMBOL(vmem_table);
EXPORT_SYMBOL(vmem_list);

int kmem_set_warning(int flag) { return (kmem_warning_flag = !!flag); }
#else
int kmem_set_warning(int flag) { return 0; }
#endif
EXPORT_SYMBOL(kmem_set_warning);

/*
 * Slab allocation interfaces
 *
 * While the linux slab implementation was inspired by solaris they
 * have made some changes to the API which complicates this shim
 * layer.  For one thing the same symbol names are used with different
 * arguments for the prototypes.  To deal with this we must use the
 * preprocessor to re-order arguments.  Happily for us standard C says,
 * "Macro's appearing in their own expansion are not reexpanded" so
 * this does not result in an infinite recursion.  Additionally the
 * function pointers registered by solarias differ from those used
 * by linux so a lookup and mapping from linux style callback to a
 * solaris style callback is needed.  There is some overhead in this
 * operation which isn't horibile but it needs to be kept in mind.
 */
#define KCC_MAGIC                0x7a7a7a7a
#define KCC_POISON               0x77

typedef struct kmem_cache_cb {
        int                 kcc_magic;
        struct list_head    kcc_list;
        kmem_cache_t *      kcc_cache;
        kmem_constructor_t  kcc_constructor;
        kmem_destructor_t   kcc_destructor;
        kmem_reclaim_t      kcc_reclaim;
        void *              kcc_private;
        void *              kcc_vmp;
	atomic_t            kcc_ref;
} kmem_cache_cb_t;

static struct rw_semaphore kmem_cache_cb_sem;
static struct list_head kmem_cache_cb_list;
static struct shrinker *kmem_cache_shrinker;

/* Function must be called while holding the kmem_cache_cb_sem
 * Because kmem_cache_t is an opaque datatype we're forced to
 * match pointers to identify specific cache entires.
 */
static kmem_cache_cb_t *
kmem_cache_find_cache_cb(kmem_cache_t *cache)
{
        kmem_cache_cb_t *kcc;
#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
        ASSERT(rwsem_is_locked(&kmem_cache_cb_sem));
#endif

        list_for_each_entry(kcc, &kmem_cache_cb_list, kcc_list)
		if (cache == kcc->kcc_cache)
                        return kcc;

        return NULL;
}

static kmem_cache_cb_t *
kmem_cache_add_cache_cb(kmem_cache_t *cache,
			kmem_constructor_t constructor,
                        kmem_destructor_t destructor,
			kmem_reclaim_t reclaim,
                        void *priv, void *vmp)
{
        kmem_cache_cb_t *kcc;

        kcc = (kmem_cache_cb_t *)kmalloc(sizeof(*kcc), GFP_KERNEL);
        if (kcc) {
		kcc->kcc_magic = KCC_MAGIC;
		kcc->kcc_cache = cache;
                kcc->kcc_constructor = constructor;
                kcc->kcc_destructor = destructor;
                kcc->kcc_reclaim = reclaim;
                kcc->kcc_private = priv;
                kcc->kcc_vmp = vmp;
		atomic_set(&kcc->kcc_ref, 0);
		down_write(&kmem_cache_cb_sem);
                list_add(&kcc->kcc_list, &kmem_cache_cb_list);
		up_write(&kmem_cache_cb_sem);
        }

        return kcc;
}

static void
kmem_cache_remove_cache_cb(kmem_cache_cb_t *kcc)
{
        down_write(&kmem_cache_cb_sem);
	ASSERT(atomic_read(&kcc->kcc_ref) == 0);
        list_del(&kcc->kcc_list);
        up_write(&kmem_cache_cb_sem);

        if (kcc){
		memset(kcc, KCC_POISON, sizeof(*kcc));
                kfree(kcc);
	}
}

static void
kmem_cache_generic_constructor(void *ptr, kmem_cache_t *cache, unsigned long flags)
{
        kmem_cache_cb_t *kcc;
	kmem_constructor_t constructor;
	void *private;

	ASSERT(flags & SLAB_CTOR_CONSTRUCTOR);

	/* Ensure constructor verifies are not passed to the registered
	 * constructors.  This may not be safe due to the Solaris constructor
	 * not being aware of how to handle the SLAB_CTOR_VERIFY flag
	 */
	if (flags & SLAB_CTOR_VERIFY)
		return;

	if (flags & SLAB_CTOR_ATOMIC)
		flags = KM_NOSLEEP;
	else
		flags = KM_SLEEP;

	/* We can be called with interrupts disabled so it is critical that
	 * this function and the registered constructor never sleep.
	 */
        while (!down_read_trylock(&kmem_cache_cb_sem));

        /* Callback list must be in sync with linux slab caches */
        kcc = kmem_cache_find_cache_cb(cache);
        ASSERT(kcc);
	ASSERT(kcc->kcc_magic == KCC_MAGIC);
	atomic_inc(&kcc->kcc_ref);

	constructor = kcc->kcc_constructor;
	private = kcc->kcc_private;

        up_read(&kmem_cache_cb_sem);

	if (constructor)
		constructor(ptr, private, (int)flags);

	atomic_dec(&kcc->kcc_ref);

	/* Linux constructor has no return code, silently eat it */
}

static void
kmem_cache_generic_destructor(void *ptr, kmem_cache_t *cache, unsigned long flags)
{
        kmem_cache_cb_t *kcc;
        kmem_destructor_t destructor;
	void *private;

	/* No valid destructor flags */
	ASSERT(flags == 0);

	/* We can be called with interrupts disabled so it is critical that
	 * this function and the registered constructor never sleep.
	 */
        while (!down_read_trylock(&kmem_cache_cb_sem));

        /* Callback list must be in sync with linux slab caches */
        kcc = kmem_cache_find_cache_cb(cache);
	ASSERT(kcc);
	ASSERT(kcc->kcc_magic == KCC_MAGIC);
	atomic_inc(&kcc->kcc_ref);

	destructor = kcc->kcc_destructor;
	private = kcc->kcc_private;

        up_read(&kmem_cache_cb_sem);

	/* Solaris destructor takes no flags, silently eat them */
	if (destructor)
		destructor(ptr, private);

	atomic_dec(&kcc->kcc_ref);
}

/* XXX - Arguments are ignored */
static int
kmem_cache_generic_shrinker(int nr_to_scan, unsigned int gfp_mask)
{
        kmem_cache_cb_t *kcc;
        int total = 0;

	/* Under linux a shrinker is not tightly coupled with a slab
	 * cache.  In fact linux always systematically trys calling all
	 * registered shrinker callbacks until its target reclamation level
	 * is reached.  Because of this we only register one shrinker
	 * function in the shim layer for all slab caches.  And we always
	 * attempt to shrink all caches when this generic shrinker is called.
	 */
        down_read(&kmem_cache_cb_sem);

        list_for_each_entry(kcc, &kmem_cache_cb_list, kcc_list) {
	        ASSERT(kcc);
                ASSERT(kcc->kcc_magic == KCC_MAGIC);

		/* Take a reference on the cache in question.  If that
		 * cache is contended simply skip it, it may already be
		 * in the process of a reclaim or the ctor/dtor may be
		 * running in either case it's best to skip it.
		 */
	        atomic_inc(&kcc->kcc_ref);
		if (atomic_read(&kcc->kcc_ref) > 1) {
	                atomic_dec(&kcc->kcc_ref);
			continue;
		}

	        /* Under linux the desired number and gfp type of objects
		 * is passed to the reclaiming function as a sugested reclaim
		 * target.  I do not pass these args on because reclaim
		 * policy is entirely up to the owner under solaris.  We only
		 * pass on the pre-registered private data.
                 */
		if (kcc->kcc_reclaim)
                        kcc->kcc_reclaim(kcc->kcc_private);

	        atomic_dec(&kcc->kcc_ref);
	        total += 1;
        }

	/* Under linux we should return the remaining number of entires in
	 * the cache.  Unfortunately, I don't see an easy way to safely
	 * emulate this behavior so I'm returning one entry per cache which
	 * was registered with the generic shrinker.  This should fake out
	 * the linux VM when it attempts to shrink caches.
	 */
        up_read(&kmem_cache_cb_sem);

	return total;
}

/* Ensure the __kmem_cache_create/__kmem_cache_destroy macros are
 * removed here to prevent a recursive substitution, we want to call
 * the native linux version.
 */
#undef kmem_cache_create
#undef kmem_cache_destroy
#undef kmem_cache_alloc

kmem_cache_t *
__kmem_cache_create(char *name, size_t size, size_t align,
        kmem_constructor_t constructor,
	kmem_destructor_t destructor,
	kmem_reclaim_t reclaim,
        void *priv, void *vmp, int flags)
{
        kmem_cache_t *cache;
        kmem_cache_cb_t *kcc;
	int shrinker_flag = 0;
	char *cache_name;
	ENTRY;

        /* XXX: - Option currently unsupported by shim layer */
        ASSERT(!vmp);
	ASSERT(flags == 0);

	cache_name = kzalloc(strlen(name) + 1, GFP_KERNEL);
	if (cache_name == NULL)
		RETURN(NULL);

	strcpy(cache_name, name);
        cache = kmem_cache_create(cache_name, size, align, flags,
                                  kmem_cache_generic_constructor,
                                  kmem_cache_generic_destructor);
	if (cache == NULL)
                RETURN(NULL);

        /* Register shared shrinker function on initial cache create */
        down_read(&kmem_cache_cb_sem);
	if (list_empty(&kmem_cache_cb_list)) {
                kmem_cache_shrinker = set_shrinker(KMC_DEFAULT_SEEKS,
                                                 kmem_cache_generic_shrinker);
                if (kmem_cache_shrinker == NULL) {
                        kmem_cache_destroy(cache);
                        up_read(&kmem_cache_cb_sem);
                        RETURN(NULL);
                }

        }
        up_read(&kmem_cache_cb_sem);

        kcc = kmem_cache_add_cache_cb(cache, constructor, destructor,
                                      reclaim, priv, vmp);
        if (kcc == NULL) {
		if (shrinker_flag) /* New shrinker registered must be removed */
			remove_shrinker(kmem_cache_shrinker);

                kmem_cache_destroy(cache);
                RETURN(NULL);
        }

        RETURN(cache);
}
EXPORT_SYMBOL(__kmem_cache_create);

/* Return code provided despite Solaris's void return.  There should be no
 * harm here since the Solaris versions will ignore it anyway. */
int
__kmem_cache_destroy(kmem_cache_t *cache)
{
        kmem_cache_cb_t *kcc;
	char *name;
	int rc;
	ENTRY;

        down_read(&kmem_cache_cb_sem);
        kcc = kmem_cache_find_cache_cb(cache);
        if (kcc == NULL) {
                up_read(&kmem_cache_cb_sem);
                RETURN(-EINVAL);
        }
	atomic_inc(&kcc->kcc_ref);
        up_read(&kmem_cache_cb_sem);

	name = (char *)kmem_cache_name(cache);
        rc = kmem_cache_destroy(cache);

	atomic_dec(&kcc->kcc_ref);
        kmem_cache_remove_cache_cb(kcc);
	kfree(name);

	/* Unregister generic shrinker on removal of all caches */
        down_read(&kmem_cache_cb_sem);
	if (list_empty(&kmem_cache_cb_list))
                remove_shrinker(kmem_cache_shrinker);

        up_read(&kmem_cache_cb_sem);
	RETURN(rc);
}
EXPORT_SYMBOL(__kmem_cache_destroy);

/* Under Solaris if the KM_SLEEP flag is passed we absolutely must
 * sleep until we are allocated the memory.  Under Linux you can still
 * get a memory allocation failure, so I'm forced to keep requesting
 * the memory even if the system is under substantial memory pressure
 * of fragmentation prevents the allocation from succeeded.  This is
 * not the correct fix, or even a good one.  But it will do for now.
 */
void *
__kmem_cache_alloc(kmem_cache_t *cache, gfp_t flags)
{
	void *rc;
	ENTRY;

restart:
	rc = kmem_cache_alloc(cache, flags);
        if ((rc == NULL) && (flags & KM_SLEEP)) {
#ifdef DEBUG_KMEM
		atomic64_inc(&kmem_cache_alloc_failed);
#endif /* DEBUG_KMEM */
		GOTO(restart, rc);
	}

	RETURN(rc);
}
EXPORT_SYMBOL(__kmem_cache_alloc);

void
__kmem_reap(void)
{
	ENTRY;
	/* Since there's no easy hook in to linux to force all the registered
	 * shrinkers to run we just run the ones registered for this shim */
	kmem_cache_generic_shrinker(KMC_REAP_CHUNK, GFP_KERNEL);
	EXIT;
}
EXPORT_SYMBOL(__kmem_reap);

int
kmem_init(void)
{
        ENTRY;

	init_rwsem(&kmem_cache_cb_sem);
        INIT_LIST_HEAD(&kmem_cache_cb_list);
#ifdef DEBUG_KMEM
        {
                int i;
		atomic64_set(&kmem_alloc_used, 0);
		atomic64_set(&vmem_alloc_used, 0);

                spin_lock_init(&kmem_lock);
                INIT_LIST_HEAD(&kmem_list);

                for (i = 0; i < KMEM_TABLE_SIZE; i++)
                        INIT_HLIST_HEAD(&kmem_table[i]);

                spin_lock_init(&vmem_lock);
                INIT_LIST_HEAD(&vmem_list);

                for (i = 0; i < VMEM_TABLE_SIZE; i++)
                        INIT_HLIST_HEAD(&vmem_table[i]);

		atomic64_set(&kmem_cache_alloc_failed, 0);
        }
#endif
	RETURN(0);
}

#ifdef DEBUG_KMEM
static char *
sprintf_addr(kmem_debug_t *kd, char *str, int len, int min)
{
        int size = ((len - 1) < kd->kd_size) ? (len - 1) : kd->kd_size;
	int i, flag = 1;

	ASSERT(str != NULL && len >= 17);
        memset(str, 0, len);

	/* Check for a fully printable string, and while we are at
         * it place the printable characters in the passed buffer. */
	for (i = 0; i < size; i++) {
                str[i] = ((char *)(kd->kd_addr))[i];
                if (isprint(str[i])) {
                        continue;
                } else {
                        /* Minimum number of printable characters found
                         * to make it worthwhile to print this as ascii. */
                        if (i > min)
                                break;

                         flag = 0;
                         break;
                }

	}

	if (!flag) {
		sprintf(str, "%02x%02x%02x%02x%02x%02x%02x%02x",
		        *((uint8_t *)kd->kd_addr),
		        *((uint8_t *)kd->kd_addr + 2),
		        *((uint8_t *)kd->kd_addr + 4),
		        *((uint8_t *)kd->kd_addr + 6),
		        *((uint8_t *)kd->kd_addr + 8),
		        *((uint8_t *)kd->kd_addr + 10),
		        *((uint8_t *)kd->kd_addr + 12),
		        *((uint8_t *)kd->kd_addr + 14));
	}

	return str;
}
#endif /* DEBUG_KMEM */

void
kmem_fini(void)
{
	ENTRY;
#ifdef DEBUG_KMEM
        {
                unsigned long flags;
                kmem_debug_t *kd;
		char str[17];

		/* Display all unreclaimed memory addresses, including the
		 * allocation size and the first few bytes of what's located
		 * at that address to aid in debugging.  Performance is not
		 * a serious concern here since it is module unload time. */
                if (atomic64_read(&kmem_alloc_used) != 0)
                        CWARN("kmem leaked %ld/%ld bytes\n",
                               atomic_read(&kmem_alloc_used), kmem_alloc_max);

                spin_lock_irqsave(&kmem_lock, flags);
                if (!list_empty(&kmem_list))
                        CDEBUG(D_WARNING, "%-16s %-5s %-16s %s:%s\n",
			       "address", "size", "data", "func", "line");

                list_for_each_entry(kd, &kmem_list, kd_list)
                        CDEBUG(D_WARNING, "%p %-5d %-16s %s:%d\n",
			       kd->kd_addr, kd->kd_size,
                               sprintf_addr(kd, str, 17, 8),
			       kd->kd_func, kd->kd_line);

                spin_unlock_irqrestore(&kmem_lock, flags);

                if (atomic64_read(&vmem_alloc_used) != 0)
                        CWARN("vmem leaked %ld/%ld bytes\n",
                               atomic_read(&vmem_alloc_used), vmem_alloc_max);

                spin_lock_irqsave(&vmem_lock, flags);
                if (!list_empty(&vmem_list))
                        CDEBUG(D_WARNING, "%-16s %-5s %-16s %s:%s\n",
			       "address", "size", "data", "func", "line");

                list_for_each_entry(kd, &vmem_list, kd_list)
                        CDEBUG(D_WARNING, "%p %-5d %-16s %s:%d\n",
			       kd->kd_addr, kd->kd_size,
                               sprintf_addr(kd, str, 17, 8),
			       kd->kd_func, kd->kd_line);

                spin_unlock_irqrestore(&vmem_lock, flags);
        }
#endif
	EXIT;
}
Commit	Line	Data
715f6251	1	/*
	2	* This file is part of the SPL: Solaris Porting Layer.
	3	*
	4	* Copyright (c) 2008 Lawrence Livermore National Security, LLC.
	5	* Produced at Lawrence Livermore National Laboratory
	6	* Written by:
	7	* Brian Behlendorf <behlendorf1@llnl.gov>,
	8	* Herb Wartens <wartens2@llnl.gov>,
	9	* Jim Garlick <garlick@llnl.gov>
	10	* UCRL-CODE-235197
	11	*
	12	* This is free software; you can redistribute it and/or modify it
	13	* under the terms of the GNU General Public License as published by
	14	* the Free Software Foundation; either version 2 of the License, or
	15	* (at your option) any later version.
	16	*
	17	* This is distributed in the hope that it will be useful, but WITHOUT
	18	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
	19	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
	20	* for more details.
	21	*
	22	* You should have received a copy of the GNU General Public License along
	23	* with this program; if not, write to the Free Software Foundation, Inc.,
	24	* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
	25	*/
	26
f4b37741	27	#include <sys/kmem.h>
f1ca4da6	28
937879f1	29	#ifdef DEBUG_SUBSYSTEM
	30	#undef DEBUG_SUBSYSTEM
	31	#endif
	32
	33	#define DEBUG_SUBSYSTEM S_KMEM
	34
f1ca4da6	35	/*
	36	* Memory allocation interfaces
	37	*/
	38	#ifdef DEBUG_KMEM
	39	/* Shim layer memory accounting */
c19c06f3	40	atomic64_t kmem_alloc_used;
	41	unsigned long kmem_alloc_max = 0;
	42	atomic64_t vmem_alloc_used;
	43	unsigned long vmem_alloc_max = 0;
	44	int kmem_warning_flag = 1;
5c2bb9b2	45	atomic64_t kmem_cache_alloc_failed;
79b31f36	46
d6a26c6a	47	spinlock_t kmem_lock;
	48	struct hlist_head kmem_table[KMEM_TABLE_SIZE];
	49	struct list_head kmem_list;
	50
13cdca65	51	spinlock_t vmem_lock;
	52	struct hlist_head vmem_table[VMEM_TABLE_SIZE];
	53	struct list_head vmem_list;
	54
79b31f36	55	EXPORT_SYMBOL(kmem_alloc_used);
	56	EXPORT_SYMBOL(kmem_alloc_max);
	57	EXPORT_SYMBOL(vmem_alloc_used);
	58	EXPORT_SYMBOL(vmem_alloc_max);
c19c06f3	59	EXPORT_SYMBOL(kmem_warning_flag);
c19c06f3	60
d6a26c6a	61	EXPORT_SYMBOL(kmem_lock);
	62	EXPORT_SYMBOL(kmem_table);
	63	EXPORT_SYMBOL(kmem_list);
	64
13cdca65	65	EXPORT_SYMBOL(vmem_lock);
	66	EXPORT_SYMBOL(vmem_table);
	67	EXPORT_SYMBOL(vmem_list);
	68
c19c06f3	69	int kmem_set_warning(int flag) { return (kmem_warning_flag = !!flag); }
	70	#else
	71	int kmem_set_warning(int flag) { return 0; }
f1ca4da6	72	#endif
c19c06f3	73	EXPORT_SYMBOL(kmem_set_warning);
f1ca4da6	74
	75	/*
	76	* Slab allocation interfaces
	77	*
	78	* While the linux slab implementation was inspired by solaris they
	79	* have made some changes to the API which complicates this shim
	80	* layer. For one thing the same symbol names are used with different
	81	* arguments for the prototypes. To deal with this we must use the
	82	* preprocessor to re-order arguments. Happily for us standard C says,
	83	* "Macro's appearing in their own expansion are not reexpanded" so
	84	* this does not result in an infinite recursion. Additionally the
	85	* function pointers registered by solarias differ from those used
	86	* by linux so a lookup and mapping from linux style callback to a
	87	* solaris style callback is needed. There is some overhead in this
	88	* operation which isn't horibile but it needs to be kept in mind.
	89	*/
d6a26c6a	90	#define KCC_MAGIC 0x7a7a7a7a
	91	#define KCC_POISON 0x77
	92
f1ca4da6	93	typedef struct kmem_cache_cb {
d6a26c6a	94	int kcc_magic;
f1ca4da6	95	struct list_head kcc_list;
	96	kmem_cache_t * kcc_cache;
	97	kmem_constructor_t kcc_constructor;
	98	kmem_destructor_t kcc_destructor;
	99	kmem_reclaim_t kcc_reclaim;
	100	void * kcc_private;
	101	void * kcc_vmp;
d6a26c6a	102	atomic_t kcc_ref;
f1ca4da6	103	} kmem_cache_cb_t;
f1ca4da6	104
d6a26c6a	105	static struct rw_semaphore kmem_cache_cb_sem;
d6a26c6a	106	static struct list_head kmem_cache_cb_list;
f1ca4da6	107	static struct shrinker *kmem_cache_shrinker;
f1ca4da6	108
d6a26c6a	109	/* Function must be called while holding the kmem_cache_cb_sem
f1ca4da6	110	* Because kmem_cache_t is an opaque datatype we're forced to
	111	* match pointers to identify specific cache entires.
	112	*/
	113	static kmem_cache_cb_t *
	114	kmem_cache_find_cache_cb(kmem_cache_t *cache)
	115	{
	116	kmem_cache_cb_t *kcc;
d6a26c6a	117	#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
	118	ASSERT(rwsem_is_locked(&kmem_cache_cb_sem));
	119	#endif
f1ca4da6	120
	121	list_for_each_entry(kcc, &kmem_cache_cb_list, kcc_list)
	122	if (cache == kcc->kcc_cache)
	123	return kcc;
	124
	125	return NULL;
	126	}
	127
	128	static kmem_cache_cb_t *
	129	kmem_cache_add_cache_cb(kmem_cache_t *cache,
	130	kmem_constructor_t constructor,
	131	kmem_destructor_t destructor,
	132	kmem_reclaim_t reclaim,
	133	void priv, void vmp)
	134	{
	135	kmem_cache_cb_t *kcc;
	136
	137	kcc = (kmem_cache_cb_t )kmalloc(sizeof(kcc), GFP_KERNEL);
	138	if (kcc) {
d6a26c6a	139	kcc->kcc_magic = KCC_MAGIC;
f1ca4da6	140	kcc->kcc_cache = cache;
	141	kcc->kcc_constructor = constructor;
	142	kcc->kcc_destructor = destructor;
	143	kcc->kcc_reclaim = reclaim;
	144	kcc->kcc_private = priv;
	145	kcc->kcc_vmp = vmp;
d6a26c6a	146	atomic_set(&kcc->kcc_ref, 0);
d6a26c6a	147	down_write(&kmem_cache_cb_sem);
f1ca4da6	148	list_add(&kcc->kcc_list, &kmem_cache_cb_list);
d6a26c6a	149	up_write(&kmem_cache_cb_sem);
f1ca4da6	150	}
	151
	152	return kcc;
	153	}
	154
	155	static void
	156	kmem_cache_remove_cache_cb(kmem_cache_cb_t *kcc)
	157	{
d6a26c6a	158	down_write(&kmem_cache_cb_sem);
d6a26c6a	159	ASSERT(atomic_read(&kcc->kcc_ref) == 0);
f1ca4da6	160	list_del(&kcc->kcc_list);
d6a26c6a	161	up_write(&kmem_cache_cb_sem);
f1ca4da6	162
d6a26c6a	163	if (kcc){
	164	memset(kcc, KCC_POISON, sizeof(*kcc));
	165	kfree(kcc);
	166	}
f1ca4da6	167	}
	168
	169	static void
	170	kmem_cache_generic_constructor(void ptr, kmem_cache_t cache, unsigned long flags)
	171	{
	172	kmem_cache_cb_t *kcc;
d61e12af	173	kmem_constructor_t constructor;
d61e12af	174	void *private;
f1ca4da6	175
4efd4118	176	ASSERT(flags & SLAB_CTOR_CONSTRUCTOR);
4efd4118	177
d6a26c6a	178	/* Ensure constructor verifies are not passed to the registered
	179	* constructors. This may not be safe due to the Solaris constructor
	180	* not being aware of how to handle the SLAB_CTOR_VERIFY flag
	181	*/
	182	if (flags & SLAB_CTOR_VERIFY)
	183	return;
	184
4efd4118	185	if (flags & SLAB_CTOR_ATOMIC)
	186	flags = KM_NOSLEEP;
	187	else
	188	flags = KM_SLEEP;
	189
d6a26c6a	190	/* We can be called with interrupts disabled so it is critical that
	191	* this function and the registered constructor never sleep.
	192	*/
	193	while (!down_read_trylock(&kmem_cache_cb_sem));
f1ca4da6	194
	195	/* Callback list must be in sync with linux slab caches */
	196	kcc = kmem_cache_find_cache_cb(cache);
937879f1	197	ASSERT(kcc);
d6a26c6a	198	ASSERT(kcc->kcc_magic == KCC_MAGIC);
d6a26c6a	199	atomic_inc(&kcc->kcc_ref);
937879f1	200
d61e12af	201	constructor = kcc->kcc_constructor;
d61e12af	202	private = kcc->kcc_private;
0a6fd143	203
d6a26c6a	204	up_read(&kmem_cache_cb_sem);
d61e12af	205
	206	if (constructor)
	207	constructor(ptr, private, (int)flags);
	208
d6a26c6a	209	atomic_dec(&kcc->kcc_ref);
d6a26c6a	210
f1ca4da6	211	/* Linux constructor has no return code, silently eat it */
	212	}
	213
	214	static void
	215	kmem_cache_generic_destructor(void ptr, kmem_cache_t cache, unsigned long flags)
	216	{
	217	kmem_cache_cb_t *kcc;
d61e12af	218	kmem_destructor_t destructor;
d61e12af	219	void *private;
f1ca4da6	220
4efd4118	221	/* No valid destructor flags */
	222	ASSERT(flags == 0);
	223
d6a26c6a	224	/* We can be called with interrupts disabled so it is critical that
	225	* this function and the registered constructor never sleep.
	226	*/
	227	while (!down_read_trylock(&kmem_cache_cb_sem));
f1ca4da6	228
	229	/* Callback list must be in sync with linux slab caches */
	230	kcc = kmem_cache_find_cache_cb(cache);
937879f1	231	ASSERT(kcc);
d6a26c6a	232	ASSERT(kcc->kcc_magic == KCC_MAGIC);
d6a26c6a	233	atomic_inc(&kcc->kcc_ref);
937879f1	234
d61e12af	235	destructor = kcc->kcc_destructor;
d61e12af	236	private = kcc->kcc_private;
0a6fd143	237
d6a26c6a	238	up_read(&kmem_cache_cb_sem);
d61e12af	239
	240	/* Solaris destructor takes no flags, silently eat them */
	241	if (destructor)
	242	destructor(ptr, private);
d6a26c6a	243
d6a26c6a	244	atomic_dec(&kcc->kcc_ref);
f1ca4da6	245	}
	246
	247	/* XXX - Arguments are ignored */
	248	static int
	249	kmem_cache_generic_shrinker(int nr_to_scan, unsigned int gfp_mask)
	250	{
	251	kmem_cache_cb_t *kcc;
	252	int total = 0;
	253
	254	/* Under linux a shrinker is not tightly coupled with a slab
	255	* cache. In fact linux always systematically trys calling all
	256	* registered shrinker callbacks until its target reclamation level
	257	* is reached. Because of this we only register one shrinker
	258	* function in the shim layer for all slab caches. And we always
	259	* attempt to shrink all caches when this generic shrinker is called.
	260	*/
d6a26c6a	261	down_read(&kmem_cache_cb_sem);
f1ca4da6	262
f1ca4da6	263	list_for_each_entry(kcc, &kmem_cache_cb_list, kcc_list) {
d6a26c6a	264	ASSERT(kcc);
	265	ASSERT(kcc->kcc_magic == KCC_MAGIC);
	266
	267	/* Take a reference on the cache in question. If that
	268	* cache is contended simply skip it, it may already be
	269	* in the process of a reclaim or the ctor/dtor may be
	270	* running in either case it's best to skip it.
	271	*/
	272	atomic_inc(&kcc->kcc_ref);
	273	if (atomic_read(&kcc->kcc_ref) > 1) {
	274	atomic_dec(&kcc->kcc_ref);
	275	continue;
	276	}
	277
f1ca4da6	278	/* Under linux the desired number and gfp type of objects
	279	* is passed to the reclaiming function as a sugested reclaim
	280	* target. I do not pass these args on because reclaim
	281	* policy is entirely up to the owner under solaris. We only
	282	* pass on the pre-registered private data.
	283	*/
	284	if (kcc->kcc_reclaim)
	285	kcc->kcc_reclaim(kcc->kcc_private);
	286
d6a26c6a	287	atomic_dec(&kcc->kcc_ref);
f1ca4da6	288	total += 1;
	289	}
	290
	291	/* Under linux we should return the remaining number of entires in
	292	* the cache. Unfortunately, I don't see an easy way to safely
	293	* emulate this behavior so I'm returning one entry per cache which
	294	* was registered with the generic shrinker. This should fake out
	295	* the linux VM when it attempts to shrink caches.
	296	*/
d6a26c6a	297	up_read(&kmem_cache_cb_sem);
d6a26c6a	298
f1ca4da6	299	return total;
	300	}
	301
	302	/* Ensure the __kmem_cache_create/__kmem_cache_destroy macros are
	303	* removed here to prevent a recursive substitution, we want to call
	304	* the native linux version.
	305	*/
	306	#undef kmem_cache_create
	307	#undef kmem_cache_destroy
5c2bb9b2	308	#undef kmem_cache_alloc
f1ca4da6	309
	310	kmem_cache_t *
	311	__kmem_cache_create(char *name, size_t size, size_t align,
f1b59d26	312	kmem_constructor_t constructor,
	313	kmem_destructor_t destructor,
	314	kmem_reclaim_t reclaim,
f1ca4da6	315	void priv, void vmp, int flags)
	316	{
	317	kmem_cache_t *cache;
	318	kmem_cache_cb_t *kcc;
	319	int shrinker_flag = 0;
c19c06f3	320	char *cache_name;
937879f1	321	ENTRY;
f1ca4da6	322
937879f1	323	/* XXX: - Option currently unsupported by shim layer */
937879f1	324	ASSERT(!vmp);
4efd4118	325	ASSERT(flags == 0);
f1ca4da6	326
c19c06f3	327	cache_name = kzalloc(strlen(name) + 1, GFP_KERNEL);
c19c06f3	328	if (cache_name == NULL)
937879f1	329	RETURN(NULL);
c19c06f3	330
	331	strcpy(cache_name, name);
	332	cache = kmem_cache_create(cache_name, size, align, flags,
f1ca4da6	333	kmem_cache_generic_constructor,
	334	kmem_cache_generic_destructor);
	335	if (cache == NULL)
937879f1	336	RETURN(NULL);
f1ca4da6	337
f1ca4da6	338	/* Register shared shrinker function on initial cache create */
d6a26c6a	339	down_read(&kmem_cache_cb_sem);
f1ca4da6	340	if (list_empty(&kmem_cache_cb_list)) {
	341	kmem_cache_shrinker = set_shrinker(KMC_DEFAULT_SEEKS,
	342	kmem_cache_generic_shrinker);
	343	if (kmem_cache_shrinker == NULL) {
	344	kmem_cache_destroy(cache);
d6a26c6a	345	up_read(&kmem_cache_cb_sem);
937879f1	346	RETURN(NULL);
f1ca4da6	347	}
	348
	349	}
d6a26c6a	350	up_read(&kmem_cache_cb_sem);
f1ca4da6	351
	352	kcc = kmem_cache_add_cache_cb(cache, constructor, destructor,
	353	reclaim, priv, vmp);
	354	if (kcc == NULL) {
	355	if (shrinker_flag) /* New shrinker registered must be removed */
	356	remove_shrinker(kmem_cache_shrinker);
	357
	358	kmem_cache_destroy(cache);
937879f1	359	RETURN(NULL);
f1ca4da6	360	}
f1ca4da6	361
937879f1	362	RETURN(cache);
f1ca4da6	363	}
f1b59d26	364	EXPORT_SYMBOL(__kmem_cache_create);
f1ca4da6	365
e4f1d29f	366	/* Return code provided despite Solaris's void return. There should be no
	367	* harm here since the Solaris versions will ignore it anyway. */
	368	int
f1ca4da6	369	__kmem_cache_destroy(kmem_cache_t *cache)
	370	{
	371	kmem_cache_cb_t *kcc;
c19c06f3	372	char *name;
e4f1d29f	373	int rc;
937879f1	374	ENTRY;
f1ca4da6	375
d6a26c6a	376	down_read(&kmem_cache_cb_sem);
f1ca4da6	377	kcc = kmem_cache_find_cache_cb(cache);
d6a26c6a	378	if (kcc == NULL) {
d6a26c6a	379	up_read(&kmem_cache_cb_sem);
937879f1	380	RETURN(-EINVAL);
d6a26c6a	381	}
	382	atomic_inc(&kcc->kcc_ref);
	383	up_read(&kmem_cache_cb_sem);
f1ca4da6	384
c19c06f3	385	name = (char *)kmem_cache_name(cache);
e4f1d29f	386	rc = kmem_cache_destroy(cache);
d6a26c6a	387
d6a26c6a	388	atomic_dec(&kcc->kcc_ref);
f1ca4da6	389	kmem_cache_remove_cache_cb(kcc);
c19c06f3	390	kfree(name);
f1ca4da6	391
f1ca4da6	392	/* Unregister generic shrinker on removal of all caches */
d6a26c6a	393	down_read(&kmem_cache_cb_sem);
f1ca4da6	394	if (list_empty(&kmem_cache_cb_list))
	395	remove_shrinker(kmem_cache_shrinker);
	396
d6a26c6a	397	up_read(&kmem_cache_cb_sem);
937879f1	398	RETURN(rc);
f1ca4da6	399	}
f1b59d26	400	EXPORT_SYMBOL(__kmem_cache_destroy);
f1ca4da6	401
5c2bb9b2	402	/* Under Solaris if the KM_SLEEP flag is passed we absolutely must
	403	* sleep until we are allocated the memory. Under Linux you can still
	404	* get a memory allocation failure, so I'm forced to keep requesting
	405	* the memory even if the system is under substantial memory pressure
	406	* of fragmentation prevents the allocation from succeeded. This is
	407	* not the correct fix, or even a good one. But it will do for now.
	408	*/
	409	void *
	410	__kmem_cache_alloc(kmem_cache_t *cache, gfp_t flags)
	411	{
	412	void *rc;
	413	ENTRY;
	414
	415	restart:
	416	rc = kmem_cache_alloc(cache, flags);
	417	if ((rc == NULL) && (flags & KM_SLEEP)) {
c6dc93d6	418	#ifdef DEBUG_KMEM
5c2bb9b2	419	atomic64_inc(&kmem_cache_alloc_failed);
c6dc93d6	420	#endif /* DEBUG_KMEM */
5c2bb9b2	421	GOTO(restart, rc);
	422	}
	423
	424	RETURN(rc);
	425	}
	426	EXPORT_SYMBOL(__kmem_cache_alloc);
	427
f1b59d26	428	void
937879f1	429	__kmem_reap(void)
	430	{
	431	ENTRY;
f1b59d26	432	/* Since there's no easy hook in to linux to force all the registered
f1ca4da6	433	* shrinkers to run we just run the ones registered for this shim */
f1ca4da6	434	kmem_cache_generic_shrinker(KMC_REAP_CHUNK, GFP_KERNEL);
937879f1	435	EXIT;
f1ca4da6	436	}
f1b59d26	437	EXPORT_SYMBOL(__kmem_reap);
5d86345d	438
	439	int
	440	kmem_init(void)
	441	{
937879f1	442	ENTRY;
d6a26c6a	443
	444	init_rwsem(&kmem_cache_cb_sem);
	445	INIT_LIST_HEAD(&kmem_cache_cb_list);
5d86345d	446	#ifdef DEBUG_KMEM
d6a26c6a	447	{
	448	int i;
	449	atomic64_set(&kmem_alloc_used, 0);
	450	atomic64_set(&vmem_alloc_used, 0);
	451
	452	spin_lock_init(&kmem_lock);
	453	INIT_LIST_HEAD(&kmem_list);
	454
	455	for (i = 0; i < KMEM_TABLE_SIZE; i++)
	456	INIT_HLIST_HEAD(&kmem_table[i]);
13cdca65	457
	458	spin_lock_init(&vmem_lock);
	459	INIT_LIST_HEAD(&vmem_list);
	460
	461	for (i = 0; i < VMEM_TABLE_SIZE; i++)
	462	INIT_HLIST_HEAD(&vmem_table[i]);
5c2bb9b2	463
5c2bb9b2	464	atomic64_set(&kmem_cache_alloc_failed, 0);
d6a26c6a	465	}
5d86345d	466	#endif
937879f1	467	RETURN(0);
5d86345d	468	}
5d86345d	469
c6dc93d6	470	#ifdef DEBUG_KMEM
	471	static char *
	472	sprintf_addr(kmem_debug_t kd, char str, int len, int min)
d6a26c6a	473	{
	474	int size = ((len - 1) < kd->kd_size) ? (len - 1) : kd->kd_size;
	475	int i, flag = 1;
	476
	477	ASSERT(str != NULL && len >= 17);
	478	memset(str, 0, len);
	479
	480	/* Check for a fully printable string, and while we are at
	481	* it place the printable characters in the passed buffer. */
	482	for (i = 0; i < size; i++) {
	483	str[i] = ((char *)(kd->kd_addr))[i];
	484	if (isprint(str[i])) {
	485	continue;
	486	} else {
	487	/* Minimum number of printable characters found
	488	* to make it worthwhile to print this as ascii. */
	489	if (i > min)
	490	break;
	491
	492	flag = 0;
	493	break;
	494	}
	495
	496	}
	497
	498	if (!flag) {
	499	sprintf(str, "%02x%02x%02x%02x%02x%02x%02x%02x",
	500	((uint8_t )kd->kd_addr),
	501	((uint8_t )kd->kd_addr + 2),
	502	((uint8_t )kd->kd_addr + 4),
	503	((uint8_t )kd->kd_addr + 6),
	504	((uint8_t )kd->kd_addr + 8),
	505	((uint8_t )kd->kd_addr + 10),
	506	((uint8_t )kd->kd_addr + 12),
	507	((uint8_t )kd->kd_addr + 14));
	508	}
	509
	510	return str;
	511	}
c6dc93d6	512	#endif /* DEBUG_KMEM */
d6a26c6a	513
5d86345d	514	void
	515	kmem_fini(void)
	516	{
937879f1	517	ENTRY;
5d86345d	518	#ifdef DEBUG_KMEM
d6a26c6a	519	{
	520	unsigned long flags;
	521	kmem_debug_t *kd;
	522	char str[17];
	523
d6a26c6a	524	/* Display all unreclaimed memory addresses, including the
	525	* allocation size and the first few bytes of what's located
	526	* at that address to aid in debugging. Performance is not
	527	* a serious concern here since it is module unload time. */
13cdca65	528	if (atomic64_read(&kmem_alloc_used) != 0)
	529	CWARN("kmem leaked %ld/%ld bytes\n",
	530	atomic_read(&kmem_alloc_used), kmem_alloc_max);
	531
d6a26c6a	532	spin_lock_irqsave(&kmem_lock, flags);
	533	if (!list_empty(&kmem_list))
	534	CDEBUG(D_WARNING, "%-16s %-5s %-16s %s:%s\n",
	535	"address", "size", "data", "func", "line");
	536
13cdca65	537	list_for_each_entry(kd, &kmem_list, kd_list)
d6a26c6a	538	CDEBUG(D_WARNING, "%p %-5d %-16s %s:%d\n",
13cdca65	539	kd->kd_addr, kd->kd_size,
d6a26c6a	540	sprintf_addr(kd, str, 17, 8),
d6a26c6a	541	kd->kd_func, kd->kd_line);
13cdca65	542
d6a26c6a	543	spin_unlock_irqrestore(&kmem_lock, flags);
	544
	545	if (atomic64_read(&vmem_alloc_used) != 0)
	546	CWARN("vmem leaked %ld/%ld bytes\n",
	547	atomic_read(&vmem_alloc_used), vmem_alloc_max);
13cdca65	548
	549	spin_lock_irqsave(&vmem_lock, flags);
	550	if (!list_empty(&vmem_list))
	551	CDEBUG(D_WARNING, "%-16s %-5s %-16s %s:%s\n",
	552	"address", "size", "data", "func", "line");
	553
	554	list_for_each_entry(kd, &vmem_list, kd_list)
	555	CDEBUG(D_WARNING, "%p %-5d %-16s %s:%d\n",
	556	kd->kd_addr, kd->kd_size,
	557	sprintf_addr(kd, str, 17, 8),
	558	kd->kd_func, kd->kd_line);
	559
	560	spin_unlock_irqrestore(&vmem_lock, flags);
d6a26c6a	561	}
5d86345d	562	#endif
937879f1	563	EXIT;
5d86345d	564	}