+/*
+ * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ * Copyright (C) 2007 The Regents of the University of California.
+ * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ * Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ * UCRL-CODE-235197
+ *
+ * This file is part of the SPL, Solaris Porting Layer.
+ * For details, see <http://zfsonlinux.org/>.
+ *
+ * The SPL is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * The SPL is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with the SPL. If not, see <http://www.gnu.org/licenses/>.
+ */
+
#ifndef _SPL_KMEM_H
#define _SPL_KMEM_H
-#ifdef __cplusplus
-extern "C" {
-#endif
+#include <linux/slab.h>
-//#define DEBUG_KMEM
-#undef DEBUG_KMEM
-#undef DEBUG_KMEM_UNIMPLEMENTED
+extern int kmem_debugging(void);
+extern char *kmem_vasprintf(const char *fmt, va_list ap);
+extern char *kmem_asprintf(const char *fmt, ...);
+extern char *strdup(const char *str);
+extern void strfree(char *str);
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-#include <linux/mm.h>
-#include <linux/spinlock.h>
-#include <linux/rwsem.h>
-#include <linux/hash.h>
-#include <linux/ctype.h>
-#include <sys/debug.h>
/*
* Memory allocation interfaces
*/
-#define KM_SLEEP GFP_KERNEL
-#define KM_NOSLEEP GFP_ATOMIC
-#undef KM_PANIC /* No linux analog */
-#define KM_PUSHPAGE (GFP_KERNEL | GFP_HIGH)
-#define KM_VMFLAGS GFP_LEVEL_MASK
-#define KM_FLAGS __GFP_BITS_MASK
-
-#ifdef DEBUG_KMEM
-extern atomic64_t kmem_alloc_used;
-extern unsigned long kmem_alloc_max;
-extern atomic64_t vmem_alloc_used;
-extern unsigned long vmem_alloc_max;
-
-extern int kmem_warning_flag;
-extern atomic64_t kmem_cache_alloc_failed;
+#define KM_SLEEP GFP_KERNEL /* Can sleep, never fails */
+#define KM_NOSLEEP GFP_ATOMIC /* Can not sleep, may fail */
+#define KM_PUSHPAGE (GFP_NOIO | __GFP_HIGH) /* Use reserved memory */
+#define KM_NODEBUG __GFP_NOWARN /* Suppress warnings */
+#define KM_FLAGS __GFP_BITS_MASK
+#define KM_VMFLAGS GFP_LEVEL_MASK
-/* XXX - Not to surprisingly with debugging enabled the xmem_locks are very
- * highly contended particularly on xfree(). If we want to run with this
- * detailed debugging enabled for anything other than debugging we need to
- * minimize the contention by moving to a lock per xmem_table entry model.
+/*
+ * Used internally, the kernel does not need to support this flag
*/
-#define KMEM_HASH_BITS 10
-#define KMEM_TABLE_SIZE (1 << KMEM_HASH_BITS)
-
-extern struct hlist_head kmem_table[KMEM_TABLE_SIZE];
-extern struct list_head kmem_list;
-extern spinlock_t kmem_lock;
-
-#define VMEM_HASH_BITS 10
-#define VMEM_TABLE_SIZE (1 << VMEM_HASH_BITS)
-
-extern struct hlist_head vmem_table[VMEM_TABLE_SIZE];
-extern struct list_head vmem_list;
-extern spinlock_t vmem_lock;
-
-typedef struct kmem_debug {
- struct hlist_node kd_hlist; /* Hash node linkage */
- struct list_head kd_list; /* List of all allocations */
- void *kd_addr; /* Allocation pointer */
- size_t kd_size; /* Allocation size */
- const char *kd_func; /* Allocation function */
- int kd_line; /* Allocation line */
-} kmem_debug_t;
+#ifndef __GFP_ZERO
+#define __GFP_ZERO 0x8000
+#endif
-static __inline__ kmem_debug_t *
-__kmem_del_init(spinlock_t *lock,struct hlist_head *table,int bits,void *addr)
+/*
+ * __GFP_NOFAIL looks like it will be removed from the kernel perhaps as
+ * early as 2.6.32. To avoid this issue when it occurs in upstream kernels
+ * we retry the allocation here as long as it is not __GFP_WAIT (GFP_ATOMIC).
+ * I would prefer the caller handle the failure case cleanly but we are
+ * trying to emulate Solaris and those are not the Solaris semantics.
+ */
+static inline void *
+kmalloc_nofail(size_t size, gfp_t flags)
{
- struct hlist_head *head;
- struct hlist_node *node;
- struct kmem_debug *p;
- unsigned long flags;
+ void *ptr;
- spin_lock_irqsave(lock, flags);
- head = &table[hash_ptr(addr, bits)];
- hlist_for_each_entry_rcu(p, node, head, kd_hlist) {
- if (p->kd_addr == addr) {
- hlist_del_init(&p->kd_hlist);
- list_del_init(&p->kd_list);
- spin_unlock_irqrestore(lock, flags);
- return p;
- }
- }
+ do {
+ ptr = kmalloc(size, flags);
+ } while (ptr == NULL && (flags & __GFP_WAIT));
- spin_unlock_irqrestore(lock, flags);
- return NULL;
+ return (ptr);
}
-#define __kmem_alloc(size, flags, allocator) \
-({ void *_ptr_ = NULL; \
- kmem_debug_t *_dptr_; \
- unsigned long _flags_; \
- \
- _dptr_ = (kmem_debug_t *)kmalloc(sizeof(kmem_debug_t), (flags)); \
- if (_dptr_ == NULL) { \
- __CDEBUG_LIMIT(S_KMEM, D_WARNING, "Warning " \
- "kmem_alloc(%d, 0x%x) debug failed\n", \
- sizeof(kmem_debug_t), (int)(flags)); \
- } else { \
- /* Marked unlikely because we should never be doing this, */ \
- /* we tolerate to up 2 pages but a single page is best. */ \
- if (unlikely((size) > (PAGE_SIZE * 2)) && kmem_warning_flag) \
- __CDEBUG_LIMIT(S_KMEM, D_WARNING, "Warning large " \
- "kmem_alloc(%d, 0x%x) (%ld/%ld)\n", \
- (int)(size), (int)(flags), \
- atomic64_read(&kmem_alloc_used), \
- kmem_alloc_max); \
- \
- _ptr_ = (void *)allocator((size), (flags)); \
- if (_ptr_ == NULL) { \
- kfree(_dptr_); \
- __CDEBUG_LIMIT(S_KMEM, D_WARNING, "Warning " \
- "kmem_alloc(%d, 0x%x) failed (%ld/" \
- "%ld)\n", (int)(size), (int)(flags), \
- atomic64_read(&kmem_alloc_used), \
- kmem_alloc_max); \
- } else { \
- atomic64_add((size), &kmem_alloc_used); \
- if (unlikely(atomic64_read(&kmem_alloc_used) > \
- kmem_alloc_max)) \
- kmem_alloc_max = \
- atomic64_read(&kmem_alloc_used); \
- \
- INIT_HLIST_NODE(&_dptr_->kd_hlist); \
- INIT_LIST_HEAD(&_dptr_->kd_list); \
- _dptr_->kd_addr = _ptr_; \
- _dptr_->kd_size = (size); \
- _dptr_->kd_func = __FUNCTION__; \
- _dptr_->kd_line = __LINE__; \
- spin_lock_irqsave(&kmem_lock, _flags_); \
- hlist_add_head_rcu(&_dptr_->kd_hlist, \
- &kmem_table[hash_ptr(_ptr_, KMEM_HASH_BITS)]);\
- list_add_tail(&_dptr_->kd_list, &kmem_list); \
- spin_unlock_irqrestore(&kmem_lock, _flags_); \
- \
- __CDEBUG_LIMIT(S_KMEM, D_INFO, "kmem_alloc(" \
- "%d, 0x%x) = %p (%ld/%ld)\n", \
- (int)(size), (int)(flags), _ptr_, \
- atomic64_read(&kmem_alloc_used), \
- kmem_alloc_max); \
- } \
- } \
- \
- _ptr_; \
-})
-
-#define kmem_alloc(size, flags) __kmem_alloc((size), (flags), kmalloc)
-#define kmem_zalloc(size, flags) __kmem_alloc((size), (flags), kzalloc)
-
-#define kmem_free(ptr, size) \
-({ \
- kmem_debug_t *_dptr_; \
- ASSERT((ptr) || (size > 0)); \
- \
- _dptr_ = __kmem_del_init(&kmem_lock, kmem_table, KMEM_HASH_BITS, ptr);\
- ASSERT(_dptr_); /* Must exist in hash due to kmem_alloc() */ \
- ASSERTF(_dptr_->kd_size == (size), "kd_size (%d) != size (%d), " \
- "kd_func = %s, kd_line = %d\n", _dptr_->kd_size, (size), \
- _dptr_->kd_func, _dptr_->kd_line); /* Size must match */ \
- atomic64_sub((size), &kmem_alloc_used); \
- __CDEBUG_LIMIT(S_KMEM, D_INFO, "kmem_free(%p, %d) (%ld/%ld)\n", \
- (ptr), (int)(size), atomic64_read(&kmem_alloc_used), \
- kmem_alloc_max); \
- \
- memset(_dptr_, 0x5a, sizeof(kmem_debug_t)); \
- kfree(_dptr_); \
- \
- memset(ptr, 0x5a, (size)); \
- kfree(ptr); \
-})
-
-#define __vmem_alloc(size, flags) \
-({ void *_ptr_ = NULL; \
- kmem_debug_t *_dptr_; \
- unsigned long _flags_; \
- \
- ASSERT((flags) & KM_SLEEP); \
- \
- _dptr_ = (kmem_debug_t *)kmalloc(sizeof(kmem_debug_t), (flags)); \
- if (_dptr_ == NULL) { \
- __CDEBUG_LIMIT(S_KMEM, D_WARNING, "Warning " \
- "vmem_alloc(%d, 0x%x) debug failed\n", \
- sizeof(kmem_debug_t), (int)(flags)); \
- } else { \
- _ptr_ = (void *)__vmalloc((size), (((flags) | \
- __GFP_HIGHMEM) & ~__GFP_ZERO), \
- PAGE_KERNEL); \
- if (_ptr_ == NULL) { \
- kfree(_dptr_); \
- __CDEBUG_LIMIT(S_KMEM, D_WARNING, "Warning " \
- "vmem_alloc(%d, 0x%x) failed (%ld/" \
- "%ld)\n", (int)(size), (int)(flags), \
- atomic64_read(&vmem_alloc_used), \
- vmem_alloc_max); \
- } else { \
- if (flags & __GFP_ZERO) \
- memset(_ptr_, 0, (size)); \
- \
- atomic64_add((size), &vmem_alloc_used); \
- if (unlikely(atomic64_read(&vmem_alloc_used) > \
- vmem_alloc_max)) \
- vmem_alloc_max = \
- atomic64_read(&vmem_alloc_used); \
- \
- INIT_HLIST_NODE(&_dptr_->kd_hlist); \
- INIT_LIST_HEAD(&_dptr_->kd_list); \
- _dptr_->kd_addr = _ptr_; \
- _dptr_->kd_size = (size); \
- _dptr_->kd_func = __FUNCTION__; \
- _dptr_->kd_line = __LINE__; \
- spin_lock_irqsave(&vmem_lock, _flags_); \
- hlist_add_head_rcu(&_dptr_->kd_hlist, \
- &vmem_table[hash_ptr(_ptr_, VMEM_HASH_BITS)]);\
- list_add_tail(&_dptr_->kd_list, &vmem_list); \
- spin_unlock_irqrestore(&vmem_lock, _flags_); \
- \
- __CDEBUG_LIMIT(S_KMEM, D_INFO, "vmem_alloc(" \
- "%d, 0x%x) = %p (%ld/%ld)\n", \
- (int)(size), (int)(flags), _ptr_, \
- atomic64_read(&vmem_alloc_used), \
- vmem_alloc_max); \
- } \
- } \
- \
- _ptr_; \
-})
-
-#define vmem_alloc(size, flags) __vmem_alloc((size), (flags))
-#define vmem_zalloc(size, flags) __vmem_alloc((size), ((flags) | \
- __GFP_ZERO))
-
-#define vmem_free(ptr, size) \
-({ \
- kmem_debug_t *_dptr_; \
- ASSERT((ptr) || (size > 0)); \
- \
- _dptr_ = __kmem_del_init(&vmem_lock, vmem_table, VMEM_HASH_BITS, ptr);\
- ASSERT(_dptr_); /* Must exist in hash due to vmem_alloc() */ \
- ASSERTF(_dptr_->kd_size == (size), "kd_size (%d) != size (%d), " \
- "kd_func = %s, kd_line = %d\n", _dptr_->kd_size, (size), \
- _dptr_->kd_func, _dptr_->kd_line); /* Size must match */ \
- atomic64_sub((size), &vmem_alloc_used); \
- __CDEBUG_LIMIT(S_KMEM, D_INFO, "vmem_free(%p, %d) (%ld/%ld)\n", \
- (ptr), (int)(size), atomic64_read(&vmem_alloc_used), \
- vmem_alloc_max); \
- \
- memset(_dptr_, 0x5a, sizeof(kmem_debug_t)); \
- kfree(_dptr_); \
- \
- memset(ptr, 0x5a, (size)); \
- vfree(ptr); \
-})
+static inline void *
+kzalloc_nofail(size_t size, gfp_t flags)
+{
+ void *ptr;
-#else /* DEBUG_KMEM */
+ do {
+ ptr = kzalloc(size, flags);
+ } while (ptr == NULL && (flags & __GFP_WAIT));
-#define kmem_alloc(size, flags) kmalloc((size), (flags))
-#define kmem_zalloc(size, flags) kzalloc((size), (flags))
-#define kmem_free(ptr, size) kfree(ptr)
+ return (ptr);
+}
-#define vmem_alloc(size, flags) __vmalloc((size), ((flags) | \
- __GFP_HIGHMEM), PAGE_KERNEL)
-#define vmem_zalloc(size, flags) \
-({ \
- void *_ptr_ = __vmalloc((size),((flags)|__GFP_HIGHMEM),PAGE_KERNEL); \
- if (_ptr_) \
- memset(_ptr_, 0, (size)); \
- _ptr_; \
-})
-#define vmem_free(ptr, size) vfree(ptr)
+static inline void *
+kmalloc_node_nofail(size_t size, gfp_t flags, int node)
+{
+ void *ptr;
-#endif /* DEBUG_KMEM */
+ do {
+ ptr = kmalloc_node(size, flags, node);
+ } while (ptr == NULL && (flags & __GFP_WAIT));
-#ifdef DEBUG_KMEM_UNIMPLEMENTED
-static __inline__ void *
-kmem_alloc_tryhard(size_t size, size_t *alloc_size, int kmflags)
-{
-#error "kmem_alloc_tryhard() not implemented"
+ return (ptr);
}
-#endif /* DEBUG_KMEM_UNIMPLEMENTED */
+
+#ifdef DEBUG_KMEM
/*
- * Slab allocation interfaces
+ * Memory accounting functions to be used only when DEBUG_KMEM is set.
*/
-#undef KMC_NOTOUCH /* No linux analog */
-#define KMC_NODEBUG 0x00000000 /* Default behavior */
-#define KMC_NOMAGAZINE /* No linux analog */
-#define KMC_NOHASH /* No linux analog */
-#define KMC_QCACHE /* No linux analog */
-
-#define KMC_REAP_CHUNK 256
-#define KMC_DEFAULT_SEEKS DEFAULT_SEEKS
-
-/* Defined by linux slab.h
- * typedef struct kmem_cache_s kmem_cache_t;
+#ifdef HAVE_ATOMIC64_T
+#define kmem_alloc_used_add(size) atomic64_add(size, &kmem_alloc_used)
+#define kmem_alloc_used_sub(size) atomic64_sub(size, &kmem_alloc_used)
+#define kmem_alloc_used_read() atomic64_read(&kmem_alloc_used)
+#define kmem_alloc_used_set(size) atomic64_set(&kmem_alloc_used, size)
+extern atomic64_t kmem_alloc_used;
+extern unsigned long long kmem_alloc_max;
+#else /* HAVE_ATOMIC64_T */
+#define kmem_alloc_used_add(size) atomic_add(size, &kmem_alloc_used)
+#define kmem_alloc_used_sub(size) atomic_sub(size, &kmem_alloc_used)
+#define kmem_alloc_used_read() atomic_read(&kmem_alloc_used)
+#define kmem_alloc_used_set(size) atomic_set(&kmem_alloc_used, size)
+extern atomic_t kmem_alloc_used;
+extern unsigned long long kmem_alloc_max;
+#endif /* HAVE_ATOMIC64_T */
+
+#ifdef DEBUG_KMEM_TRACKING
+/*
+ * DEBUG_KMEM && DEBUG_KMEM_TRACKING
+ *
+ * The maximum level of memory debugging. All memory will be accounted
+ * for and each allocation will be explicitly tracked. Any allocation
+ * which is leaked will be reported on module unload and the exact location
+ * where that memory was allocation will be reported. This level of memory
+ * tracking will have a significant impact on performance and should only
+ * be enabled for debugging. This feature may be enabled by passing
+ * --enable-debug-kmem-tracking to configure.
*/
-
-/* No linux analog
- * extern int kmem_ready;
- * extern pgcnt_t kmem_reapahead;
+#define kmem_alloc(sz, fl) kmem_alloc_track((sz), (fl), \
+ __FUNCTION__, __LINE__, 0, 0)
+#define kmem_zalloc(sz, fl) kmem_alloc_track((sz), (fl)|__GFP_ZERO,\
+ __FUNCTION__, __LINE__, 0, 0)
+#define kmem_alloc_node(sz, fl, nd) kmem_alloc_track((sz), (fl), \
+ __FUNCTION__, __LINE__, 1, nd)
+#define kmem_free(ptr, sz) kmem_free_track((ptr), (sz))
+
+extern void *kmem_alloc_track(size_t, int, const char *, int, int, int);
+extern void kmem_free_track(const void *, size_t);
+
+#else /* DEBUG_KMEM_TRACKING */
+/*
+ * DEBUG_KMEM && !DEBUG_KMEM_TRACKING
+ *
+ * The default build will set DEBUG_KEM. This provides basic memory
+ * accounting with little to no impact on performance. When the module
+ * is unloaded in any memory was leaked the total number of leaked bytes
+ * will be reported on the console. To disable this basic accounting
+ * pass the --disable-debug-kmem option to configure.
*/
-
-#ifdef DEBUG_KMEM_UNIMPLEMENTED
-static __inline__ void kmem_init(void) {
-#error "kmem_init() not implemented"
-}
-
-static __inline__ void kmem_thread_init(void) {
-#error "kmem_thread_init() not implemented"
-}
-
-static __inline__ void kmem_mp_init(void) {
-#error "kmem_mp_init() not implemented"
-}
-
-static __inline__ void kmem_reap_idspace(void) {
-#error "kmem_reap_idspace() not implemented"
-}
-
-static __inline__ size_t kmem_avail(void) {
-#error "kmem_avail() not implemented"
-}
-
-static __inline__ size_t kmem_maxavail(void) {
-#error "kmem_maxavail() not implemented"
-}
-
-static __inline__ uint64_t kmem_cache_stat(kmem_cache_t *cache) {
-#error "kmem_cache_stat() not implemented"
-}
-#endif /* DEBUG_KMEM_UNIMPLEMENTED */
-
-/* XXX - Used by arc.c to adjust its memory footprint. We may want
- * to use this hook in the future to adjust behavior based on
- * debug levels. For now it's safe to always return 0.
+#define kmem_alloc(sz, fl) kmem_alloc_debug((sz), (fl), \
+ __FUNCTION__, __LINE__, 0, 0)
+#define kmem_zalloc(sz, fl) kmem_alloc_debug((sz), (fl)|__GFP_ZERO,\
+ __FUNCTION__, __LINE__, 0, 0)
+#define kmem_alloc_node(sz, fl, nd) kmem_alloc_debug((sz), (fl), \
+ __FUNCTION__, __LINE__, 1, nd)
+#define kmem_free(ptr, sz) kmem_free_debug((ptr), (sz))
+
+extern void *kmem_alloc_debug(size_t, int, const char *, int, int, int);
+extern void kmem_free_debug(const void *, size_t);
+
+#endif /* DEBUG_KMEM_TRACKING */
+#else /* DEBUG_KMEM */
+/*
+ * !DEBUG_KMEM && !DEBUG_KMEM_TRACKING
+ *
+ * All debugging is disabled. There will be no overhead even for
+ * minimal memory accounting. To enable basic accounting pass the
+ * --enable-debug-kmem option to configure.
*/
-static __inline__ int
-kmem_debugging(void)
-{
- return 0;
-}
+#define kmem_alloc(sz, fl) kmalloc_nofail((sz), (fl))
+#define kmem_zalloc(sz, fl) kzalloc_nofail((sz), (fl))
+#define kmem_alloc_node(sz, fl, nd) kmalloc_node_nofail((sz), (fl), (nd))
+#define kmem_free(ptr, sz) ((void)(sz), kfree(ptr))
-typedef int (*kmem_constructor_t)(void *, void *, int);
-typedef void (*kmem_destructor_t)(void *, void *);
-typedef void (*kmem_reclaim_t)(void *);
-
-extern int kmem_set_warning(int flag);
-
-extern kmem_cache_t *
-__kmem_cache_create(char *name, size_t size, size_t align,
- kmem_constructor_t constructor,
- kmem_destructor_t destructor,
- kmem_reclaim_t reclaim,
- void *priv, void *vmp, int flags);
-
-extern int __kmem_cache_destroy(kmem_cache_t *cache);
-extern void *__kmem_cache_alloc(kmem_cache_t *cache, gfp_t flags);
-extern void __kmem_reap(void);
-
-int kmem_init(void);
-void kmem_fini(void);
+#endif /* DEBUG_KMEM */
-#define kmem_cache_create(name,size,align,ctor,dtor,rclm,priv,vmp,flags) \
- __kmem_cache_create(name,size,align,ctor,dtor,rclm,priv,vmp,flags)
-#define kmem_cache_destroy(cache) __kmem_cache_destroy(cache)
-#define kmem_cache_alloc(cache, flags) __kmem_cache_alloc(cache, flags)
-#define kmem_cache_free(cache, ptr) kmem_cache_free(cache, ptr)
-#define kmem_cache_reap_now(cache) kmem_cache_shrink(cache)
-#define kmem_reap() __kmem_reap()
+int spl_kmem_init(void);
+void spl_kmem_fini(void);
-#ifdef __cplusplus
-}
-#endif
+#define kmem_virt(ptr) (((ptr) >= (void *)VMALLOC_START) && \
+ ((ptr) < (void *)VMALLOC_END))
#endif /* _SPL_KMEM_H */