]> git.proxmox.com Git - mirror_spl.git/blame - include/sys/kmem.h
Add missing atomic functions
[mirror_spl.git] / include / sys / kmem.h
CommitLineData
716154c5
BB
1/*****************************************************************************\
2 * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
3 * Copyright (C) 2007 The Regents of the University of California.
4 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
5 * Written by Brian Behlendorf <behlendorf1@llnl.gov>.
715f6251 6 * UCRL-CODE-235197
7 *
716154c5
BB
8 * This file is part of the SPL, Solaris Porting Layer.
9 * For details, see <http://github.com/behlendorf/spl/>.
10 *
11 * The SPL is free software; you can redistribute it and/or modify it
12 * under the terms of the GNU General Public License as published by the
13 * Free Software Foundation; either version 2 of the License, or (at your
14 * option) any later version.
715f6251 15 *
716154c5 16 * The SPL is distributed in the hope that it will be useful, but WITHOUT
715f6251 17 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
18 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 * for more details.
20 *
21 * You should have received a copy of the GNU General Public License along
716154c5
BB
22 * with the SPL. If not, see <http://www.gnu.org/licenses/>.
23\*****************************************************************************/
715f6251 24
09b414e8 25#ifndef _SPL_KMEM_H
26#define _SPL_KMEM_H
f1ca4da6 27
f1b59d26 28#include <linux/module.h>
f1ca4da6 29#include <linux/slab.h>
79b31f36 30#include <linux/vmalloc.h>
baf2979e 31#include <linux/mm_compat.h>
f1ca4da6 32#include <linux/spinlock.h>
d6a26c6a 33#include <linux/rwsem.h>
34#include <linux/hash.h>
35#include <linux/ctype.h>
d04c8a56 36#include <asm/atomic.h>
57d86234 37#include <sys/types.h>
36b313da 38#include <sys/vmsystm.h>
def465ad 39#include <sys/kstat.h>
550f1705 40
f1ca4da6 41/*
42 * Memory allocation interfaces
43 */
82b8c8fa 44#define KM_SLEEP GFP_NOFS
f1ca4da6 45#define KM_NOSLEEP GFP_ATOMIC
46#undef KM_PANIC /* No linux analog */
a0f6da3d 47#define KM_PUSHPAGE (KM_SLEEP | __GFP_HIGH)
f1ca4da6 48#define KM_VMFLAGS GFP_LEVEL_MASK
49#define KM_FLAGS __GFP_BITS_MASK
23d91792 50#define KM_NODEBUG __GFP_NOWARN
f1ca4da6 51
3d061e9d 52/*
53 * Used internally, the kernel does not need to support this flag
54 */
55#ifndef __GFP_ZERO
a0f6da3d 56# define __GFP_ZERO 0x8000
3d061e9d 57#endif
58
c89fdee4
BB
59/*
60 * __GFP_NOFAIL looks like it will be removed from the kernel perhaps as
61 * early as 2.6.32. To avoid this issue when it occurs in upstream kernels
62 * we retry the allocation here as long as it is not __GFP_WAIT (GFP_ATOMIC).
63 * I would prefer the caller handle the failure case cleanly but we are
64 * trying to emulate Solaris and those are not the Solaris semantics.
65 */
66static inline void *
67kmalloc_nofail(size_t size, gfp_t flags)
68{
69 void *ptr;
70
71 do {
72 ptr = kmalloc(size, flags);
73 } while (ptr == NULL && (flags & __GFP_WAIT));
74
75 return ptr;
76}
77
78static inline void *
79kzalloc_nofail(size_t size, gfp_t flags)
80{
81 void *ptr;
82
83 do {
84 ptr = kzalloc(size, flags);
85 } while (ptr == NULL && (flags & __GFP_WAIT));
86
87 return ptr;
88}
89
c89fdee4
BB
90static inline void *
91kmalloc_node_nofail(size_t size, gfp_t flags, int node)
92{
10129680 93#ifdef HAVE_KMALLOC_NODE
c89fdee4
BB
94 void *ptr;
95
96 do {
97 ptr = kmalloc_node(size, flags, node);
98 } while (ptr == NULL && (flags & __GFP_WAIT));
99
100 return ptr;
10129680
BB
101#else
102 return kmalloc_nofail(size, flags);
c89fdee4 103#endif /* HAVE_KMALLOC_NODE */
10129680
BB
104}
105
106static inline void *
107vmalloc_nofail(size_t size, gfp_t flags)
108{
109 void *ptr;
110
111 /*
112 * Retry failed __vmalloc() allocations once every second. The
113 * rational for the delay is that the likely failure modes are:
114 *
115 * 1) The system has completely exhausted memory, in which case
116 * delaying 1 second for the memory reclaim to run is reasonable
117 * to avoid thrashing the system.
118 * 2) The system has memory but has exhausted the small virtual
119 * address space available on 32-bit systems. Retrying the
120 * allocation immediately will only result in spinning on the
121 * virtual address space lock. It is better delay a second and
122 * hope that another process will free some of the address space.
123 * But the bottom line is there is not much we can actually do
124 * since we can never safely return a failure and honor the
125 * Solaris semantics.
126 */
127 while (1) {
128 ptr = __vmalloc(size, flags | __GFP_HIGHMEM, PAGE_KERNEL);
129 if (unlikely((ptr == NULL) && (flags & __GFP_WAIT))) {
130 set_current_state(TASK_INTERRUPTIBLE);
131 schedule_timeout(HZ);
132 } else {
133 break;
134 }
135 }
136
137 return ptr;
138}
139
140static inline void *
141vzalloc_nofail(size_t size, gfp_t flags)
142{
143 void *ptr;
144
145 ptr = vmalloc_nofail(size, flags);
146 if (ptr)
147 memset(ptr, 0, (size));
148
149 return ptr;
150}
c89fdee4 151
f1ca4da6 152#ifdef DEBUG_KMEM
a0f6da3d 153
10129680
BB
154/*
155 * Memory accounting functions to be used only when DEBUG_KMEM is set.
156 */
157# ifdef HAVE_ATOMIC64_T
a0f6da3d 158
d04c8a56
BB
159# define kmem_alloc_used_add(size) atomic64_add(size, &kmem_alloc_used)
160# define kmem_alloc_used_sub(size) atomic64_sub(size, &kmem_alloc_used)
161# define kmem_alloc_used_read() atomic64_read(&kmem_alloc_used)
162# define kmem_alloc_used_set(size) atomic64_set(&kmem_alloc_used, size)
163# define vmem_alloc_used_add(size) atomic64_add(size, &vmem_alloc_used)
164# define vmem_alloc_used_sub(size) atomic64_sub(size, &vmem_alloc_used)
165# define vmem_alloc_used_read() atomic64_read(&vmem_alloc_used)
166# define vmem_alloc_used_set(size) atomic64_set(&vmem_alloc_used, size)
167
10129680 168extern atomic64_t kmem_alloc_used;
d04c8a56 169extern unsigned long long kmem_alloc_max;
10129680 170extern atomic64_t vmem_alloc_used;
d04c8a56
BB
171extern unsigned long long vmem_alloc_max;
172
10129680
BB
173# else /* HAVE_ATOMIC64_T */
174
d04c8a56
BB
175# define kmem_alloc_used_add(size) atomic_add(size, &kmem_alloc_used)
176# define kmem_alloc_used_sub(size) atomic_sub(size, &kmem_alloc_used)
177# define kmem_alloc_used_read() atomic_read(&kmem_alloc_used)
178# define kmem_alloc_used_set(size) atomic_set(&kmem_alloc_used, size)
179# define vmem_alloc_used_add(size) atomic_add(size, &vmem_alloc_used)
180# define vmem_alloc_used_sub(size) atomic_sub(size, &vmem_alloc_used)
181# define vmem_alloc_used_read() atomic_read(&vmem_alloc_used)
182# define vmem_alloc_used_set(size) atomic_set(&vmem_alloc_used, size)
183
10129680
BB
184extern atomic_t kmem_alloc_used;
185extern unsigned long long kmem_alloc_max;
186extern atomic_t vmem_alloc_used;
187extern unsigned long long vmem_alloc_max;
a0f6da3d 188
10129680 189# endif /* HAVE_ATOMIC64_T */
a0f6da3d 190
191# ifdef DEBUG_KMEM_TRACKING
10129680
BB
192/*
193 * DEBUG_KMEM && DEBUG_KMEM_TRACKING
194 *
195 * The maximum level of memory debugging. All memory will be accounted
196 * for and each allocation will be explicitly tracked. Any allocation
197 * which is leaked will be reported on module unload and the exact location
198 * where that memory was allocation will be reported. This level of memory
199 * tracking will have a significant impact on performance and should only
200 * be enabled for debugging. This feature may be enabled by passing
201 * --enable-debug-kmem-tracking to configure.
202 */
203# define kmem_alloc(sz, fl) kmem_alloc_track((sz), (fl), \
204 __FUNCTION__, __LINE__, 0, 0)
205# define kmem_zalloc(sz, fl) kmem_alloc_track((sz), (fl)|__GFP_ZERO,\
206 __FUNCTION__, __LINE__, 0, 0)
207# define kmem_alloc_node(sz, fl, nd) kmem_alloc_track((sz), (fl), \
208 __FUNCTION__, __LINE__, 1, nd)
209# define kmem_free(ptr, sz) kmem_free_track((ptr), (sz))
210
211# define vmem_alloc(sz, fl) vmem_alloc_track((sz), (fl), \
212 __FUNCTION__, __LINE__)
213# define vmem_zalloc(sz, fl) vmem_alloc_track((sz), (fl)|__GFP_ZERO,\
214 __FUNCTION__, __LINE__)
215# define vmem_free(ptr, sz) vmem_free_track((ptr), (sz))
216
217extern void *kmem_alloc_track(size_t, int, const char *, int, int, int);
218extern void kmem_free_track(void *, size_t);
219extern void *vmem_alloc_track(size_t, int, const char *, int);
220extern void vmem_free_track(void *, size_t);
a0f6da3d 221
222# else /* DEBUG_KMEM_TRACKING */
10129680
BB
223/*
224 * DEBUG_KMEM && !DEBUG_KMEM_TRACKING
225 *
226 * The default build will set DEBUG_KEM. This provides basic memory
227 * accounting with little to no impact on performance. When the module
228 * is unloaded in any memory was leaked the total number of leaked bytes
229 * will be reported on the console. To disable this basic accounting
230 * pass the --disable-debug-kmem option to configure.
231 */
232# define kmem_alloc(sz, fl) kmem_alloc_debug((sz), (fl), \
233 __FUNCTION__, __LINE__, 0, 0)
234# define kmem_zalloc(sz, fl) kmem_alloc_debug((sz), (fl)|__GFP_ZERO,\
235 __FUNCTION__, __LINE__, 0, 0)
236# define kmem_alloc_node(sz, fl, nd) kmem_alloc_debug((sz), (fl), \
237 __FUNCTION__, __LINE__, 1, nd)
238# define kmem_free(ptr, sz) kmem_free_debug((ptr), (sz))
239
240# define vmem_alloc(sz, fl) vmem_alloc_debug((sz), (fl), \
241 __FUNCTION__, __LINE__)
242# define vmem_zalloc(sz, fl) vmem_alloc_debug((sz), (fl)|__GFP_ZERO,\
243 __FUNCTION__, __LINE__)
244# define vmem_free(ptr, sz) vmem_free_debug((ptr), (sz))
245
246extern void *kmem_alloc_debug(size_t, int, const char *, int, int, int);
247extern void kmem_free_debug(void *, size_t);
248extern void *vmem_alloc_debug(size_t, int, const char *, int);
249extern void vmem_free_debug(void *, size_t);
a0f6da3d 250
251# endif /* DEBUG_KMEM_TRACKING */
c6dc93d6 252#else /* DEBUG_KMEM */
10129680
BB
253/*
254 * !DEBUG_KMEM && !DEBUG_KMEM_TRACKING
255 *
256 * All debugging is disabled. There will be no overhead even for
257 * minimal memory accounting. To enable basic accounting pass the
258 * --enable-debug-kmem option to configure.
259 */
260# define kmem_alloc(sz, fl) kmalloc_nofail((sz), (fl))
261# define kmem_zalloc(sz, fl) kzalloc_nofail((sz), (fl))
262# define kmem_alloc_node(sz, fl, nd) kmalloc_node_nofail((sz), (fl), (nd))
263# define kmem_free(ptr, sz) ((void)(sz), kfree(ptr))
f1ca4da6 264
10129680
BB
265# define vmem_alloc(sz, fl) vmalloc_nofail((sz), (fl))
266# define vmem_zalloc(sz, fl) vzalloc_nofail((sz), (fl))
267# define vmem_free(ptr, sz) ((void)(sz), vfree(ptr))
79b31f36 268
f1ca4da6 269#endif /* DEBUG_KMEM */
270
10129680
BB
271extern int kmem_debugging(void);
272extern char *kmem_vasprintf(const char *fmt, va_list ap);
273extern char *kmem_asprintf(const char *fmt, ...);
274extern char *strdup(const char *str);
275extern void strfree(char *str);
276
277
f1ca4da6 278/*
10129680
BB
279 * Slab allocation interfaces. The SPL slab differs from the standard
280 * Linux SLAB or SLUB primarily in that each cache may be backed by slabs
281 * allocated from the physical or virtal memory address space. The virtual
282 * slabs allow for good behavior when allocation large objects of identical
283 * size. This slab implementation also supports both constructors and
284 * destructions which the Linux slab does not.
f1ca4da6 285 */
ea3e6ca9
BB
286enum {
287 KMC_BIT_NOTOUCH = 0, /* Don't update ages */
288 KMC_BIT_NODEBUG = 1, /* Default behavior */
289 KMC_BIT_NOMAGAZINE = 2, /* XXX: Unsupported */
290 KMC_BIT_NOHASH = 3, /* XXX: Unsupported */
291 KMC_BIT_QCACHE = 4, /* XXX: Unsupported */
292 KMC_BIT_KMEM = 5, /* Use kmem cache */
293 KMC_BIT_VMEM = 6, /* Use vmem cache */
294 KMC_BIT_OFFSLAB = 7, /* Objects not on slab */
295 KMC_BIT_REAPING = 16, /* Reaping in progress */
296 KMC_BIT_DESTROY = 17, /* Destroy in progress */
297};
298
299#define KMC_NOTOUCH (1 << KMC_BIT_NOTOUCH)
300#define KMC_NODEBUG (1 << KMC_BIT_NODEBUG)
301#define KMC_NOMAGAZINE (1 << KMC_BIT_NOMAGAZINE)
302#define KMC_NOHASH (1 << KMC_BIT_NOHASH)
303#define KMC_QCACHE (1 << KMC_BIT_QCACHE)
304#define KMC_KMEM (1 << KMC_BIT_KMEM)
305#define KMC_VMEM (1 << KMC_BIT_VMEM)
306#define KMC_OFFSLAB (1 << KMC_BIT_OFFSLAB)
307#define KMC_REAPING (1 << KMC_BIT_REAPING)
308#define KMC_DESTROY (1 << KMC_BIT_DESTROY)
309
310#define KMC_REAP_CHUNK INT_MAX
311#define KMC_DEFAULT_SEEKS 1
f1ca4da6 312
ff449ac4 313extern struct list_head spl_kmem_cache_list;
314extern struct rw_semaphore spl_kmem_cache_sem;
2fb9b26a 315
4afaaefa 316#define SKM_MAGIC 0x2e2e2e2e
2fb9b26a 317#define SKO_MAGIC 0x20202020
318#define SKS_MAGIC 0x22222222
319#define SKC_MAGIC 0x2c2c2c2c
320
37db7d8c
BB
321#define SPL_KMEM_CACHE_DELAY 15 /* Minimum slab release age */
322#define SPL_KMEM_CACHE_REAP 0 /* Default reap everything */
ea3e6ca9
BB
323#define SPL_KMEM_CACHE_OBJ_PER_SLAB 32 /* Target objects per slab */
324#define SPL_KMEM_CACHE_OBJ_PER_SLAB_MIN 8 /* Minimum objects per slab */
325#define SPL_KMEM_CACHE_ALIGN 8 /* Default object alignment */
2fb9b26a 326
327typedef int (*spl_kmem_ctor_t)(void *, void *, int);
328typedef void (*spl_kmem_dtor_t)(void *, void *);
329typedef void (*spl_kmem_reclaim_t)(void *);
330
4afaaefa 331typedef struct spl_kmem_magazine {
9b1b8e4c 332 uint32_t skm_magic; /* Sanity magic */
4afaaefa 333 uint32_t skm_avail; /* Available objects */
334 uint32_t skm_size; /* Magazine size */
335 uint32_t skm_refill; /* Batch refill size */
9b1b8e4c
BB
336 struct spl_kmem_cache *skm_cache; /* Owned by cache */
337 struct delayed_work skm_work; /* Magazine reclaim work */
4afaaefa 338 unsigned long skm_age; /* Last cache access */
339 void *skm_objs[0]; /* Object pointers */
340} spl_kmem_magazine_t;
341
2fb9b26a 342typedef struct spl_kmem_obj {
343 uint32_t sko_magic; /* Sanity magic */
2fb9b26a 344 void *sko_addr; /* Buffer address */
345 struct spl_kmem_slab *sko_slab; /* Owned by slab */
346 struct list_head sko_list; /* Free object list linkage */
2fb9b26a 347} spl_kmem_obj_t;
348
349typedef struct spl_kmem_slab {
350 uint32_t sks_magic; /* Sanity magic */
351 uint32_t sks_objs; /* Objects per slab */
352 struct spl_kmem_cache *sks_cache; /* Owned by cache */
353 struct list_head sks_list; /* Slab list linkage */
354 struct list_head sks_free_list; /* Free object list */
355 unsigned long sks_age; /* Last modify jiffie */
4afaaefa 356 uint32_t sks_ref; /* Ref count used objects */
2fb9b26a 357} spl_kmem_slab_t;
358
359typedef struct spl_kmem_cache {
ea3e6ca9
BB
360 uint32_t skc_magic; /* Sanity magic */
361 uint32_t skc_name_size; /* Name length */
362 char *skc_name; /* Name string */
4afaaefa 363 spl_kmem_magazine_t *skc_mag[NR_CPUS]; /* Per-CPU warm cache */
364 uint32_t skc_mag_size; /* Magazine size */
365 uint32_t skc_mag_refill; /* Magazine refill count */
ea3e6ca9
BB
366 spl_kmem_ctor_t skc_ctor; /* Constructor */
367 spl_kmem_dtor_t skc_dtor; /* Destructor */
368 spl_kmem_reclaim_t skc_reclaim; /* Reclaimator */
369 void *skc_private; /* Private data */
370 void *skc_vmp; /* Unused */
31a033ec 371 unsigned long skc_flags; /* Flags */
2fb9b26a 372 uint32_t skc_obj_size; /* Object size */
48e0606a 373 uint32_t skc_obj_align; /* Object alignment */
a1502d76 374 uint32_t skc_slab_objs; /* Objects per slab */
ea3e6ca9
BB
375 uint32_t skc_slab_size; /* Slab size */
376 uint32_t skc_delay; /* Slab reclaim interval */
37db7d8c 377 uint32_t skc_reap; /* Slab reclaim count */
ea3e6ca9
BB
378 atomic_t skc_ref; /* Ref count callers */
379 struct delayed_work skc_work; /* Slab reclaim work */
ea3e6ca9 380 struct list_head skc_list; /* List of caches linkage */
2fb9b26a 381 struct list_head skc_complete_list;/* Completely alloc'ed */
382 struct list_head skc_partial_list; /* Partially alloc'ed */
d46630e0 383 spinlock_t skc_lock; /* Cache lock */
2fb9b26a 384 uint64_t skc_slab_fail; /* Slab alloc failures */
385 uint64_t skc_slab_create;/* Slab creates */
386 uint64_t skc_slab_destroy;/* Slab destroys */
d46630e0 387 uint64_t skc_slab_total; /* Slab total current */
ea3e6ca9 388 uint64_t skc_slab_alloc; /* Slab alloc current */
d46630e0 389 uint64_t skc_slab_max; /* Slab max historic */
390 uint64_t skc_obj_total; /* Obj total current */
391 uint64_t skc_obj_alloc; /* Obj alloc current */
392 uint64_t skc_obj_max; /* Obj max historic */
2fb9b26a 393} spl_kmem_cache_t;
7afde631 394#define kmem_cache_t spl_kmem_cache_t
2fb9b26a 395
396extern spl_kmem_cache_t *
397spl_kmem_cache_create(char *name, size_t size, size_t align,
398 spl_kmem_ctor_t ctor, spl_kmem_dtor_t dtor, spl_kmem_reclaim_t reclaim,
f1ca4da6 399 void *priv, void *vmp, int flags);
400
2fb9b26a 401extern void spl_kmem_cache_destroy(spl_kmem_cache_t *skc);
402extern void *spl_kmem_cache_alloc(spl_kmem_cache_t *skc, int flags);
403extern void spl_kmem_cache_free(spl_kmem_cache_t *skc, void *obj);
404extern void spl_kmem_cache_reap_now(spl_kmem_cache_t *skc);
405extern void spl_kmem_reap(void);
f1ca4da6 406
d1ff2312 407int spl_kmem_init_kallsyms_lookup(void);
2fb9b26a 408int spl_kmem_init(void);
409void spl_kmem_fini(void);
5d86345d 410
f1ca4da6 411#define kmem_cache_create(name,size,align,ctor,dtor,rclm,priv,vmp,flags) \
2fb9b26a 412 spl_kmem_cache_create(name,size,align,ctor,dtor,rclm,priv,vmp,flags)
413#define kmem_cache_destroy(skc) spl_kmem_cache_destroy(skc)
414#define kmem_cache_alloc(skc, flags) spl_kmem_cache_alloc(skc, flags)
415#define kmem_cache_free(skc, obj) spl_kmem_cache_free(skc, obj)
416#define kmem_cache_reap_now(skc) spl_kmem_cache_reap_now(skc)
417#define kmem_reap() spl_kmem_reap()
a1502d76 418#define kmem_virt(ptr) (((ptr) >= (void *)VMALLOC_START) && \
419 ((ptr) < (void *)VMALLOC_END))
f1ca4da6 420
09b414e8 421#endif /* _SPL_KMEM_H */