]> git.proxmox.com Git - mirror_spl-debian.git/blame - modules/spl/spl-kmem.c
Go through and add a header with the proper UCRL number.
[mirror_spl-debian.git] / modules / spl / spl-kmem.c
CommitLineData
715f6251 1/*
2 * This file is part of the SPL: Solaris Porting Layer.
3 *
4 * Copyright (c) 2008 Lawrence Livermore National Security, LLC.
5 * Produced at Lawrence Livermore National Laboratory
6 * Written by:
7 * Brian Behlendorf <behlendorf1@llnl.gov>,
8 * Herb Wartens <wartens2@llnl.gov>,
9 * Jim Garlick <garlick@llnl.gov>
10 * UCRL-CODE-235197
11 *
12 * This is free software; you can redistribute it and/or modify it
13 * under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
16 *
17 * This is distributed in the hope that it will be useful, but WITHOUT
18 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
20 * for more details.
21 *
22 * You should have received a copy of the GNU General Public License along
23 * with this program; if not, write to the Free Software Foundation, Inc.,
24 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 */
26
f4b37741 27#include <sys/kmem.h>
f1ca4da6 28
937879f1 29#ifdef DEBUG_SUBSYSTEM
30#undef DEBUG_SUBSYSTEM
31#endif
32
33#define DEBUG_SUBSYSTEM S_KMEM
34
f1ca4da6 35/*
36 * Memory allocation interfaces
37 */
38#ifdef DEBUG_KMEM
39/* Shim layer memory accounting */
c19c06f3 40atomic64_t kmem_alloc_used;
41unsigned long kmem_alloc_max = 0;
42atomic64_t vmem_alloc_used;
43unsigned long vmem_alloc_max = 0;
44int kmem_warning_flag = 1;
5c2bb9b2 45atomic64_t kmem_cache_alloc_failed;
79b31f36 46
d6a26c6a 47spinlock_t kmem_lock;
48struct hlist_head kmem_table[KMEM_TABLE_SIZE];
49struct list_head kmem_list;
50
13cdca65 51spinlock_t vmem_lock;
52struct hlist_head vmem_table[VMEM_TABLE_SIZE];
53struct list_head vmem_list;
54
79b31f36 55EXPORT_SYMBOL(kmem_alloc_used);
56EXPORT_SYMBOL(kmem_alloc_max);
57EXPORT_SYMBOL(vmem_alloc_used);
58EXPORT_SYMBOL(vmem_alloc_max);
c19c06f3 59EXPORT_SYMBOL(kmem_warning_flag);
60
d6a26c6a 61EXPORT_SYMBOL(kmem_lock);
62EXPORT_SYMBOL(kmem_table);
63EXPORT_SYMBOL(kmem_list);
64
13cdca65 65EXPORT_SYMBOL(vmem_lock);
66EXPORT_SYMBOL(vmem_table);
67EXPORT_SYMBOL(vmem_list);
68
c19c06f3 69int kmem_set_warning(int flag) { return (kmem_warning_flag = !!flag); }
70#else
71int kmem_set_warning(int flag) { return 0; }
f1ca4da6 72#endif
c19c06f3 73EXPORT_SYMBOL(kmem_set_warning);
f1ca4da6 74
75/*
76 * Slab allocation interfaces
77 *
78 * While the linux slab implementation was inspired by solaris they
79 * have made some changes to the API which complicates this shim
80 * layer. For one thing the same symbol names are used with different
81 * arguments for the prototypes. To deal with this we must use the
82 * preprocessor to re-order arguments. Happily for us standard C says,
83 * "Macro's appearing in their own expansion are not reexpanded" so
84 * this does not result in an infinite recursion. Additionally the
85 * function pointers registered by solarias differ from those used
86 * by linux so a lookup and mapping from linux style callback to a
87 * solaris style callback is needed. There is some overhead in this
88 * operation which isn't horibile but it needs to be kept in mind.
89 */
d6a26c6a 90#define KCC_MAGIC 0x7a7a7a7a
91#define KCC_POISON 0x77
92
f1ca4da6 93typedef struct kmem_cache_cb {
d6a26c6a 94 int kcc_magic;
f1ca4da6 95 struct list_head kcc_list;
96 kmem_cache_t * kcc_cache;
97 kmem_constructor_t kcc_constructor;
98 kmem_destructor_t kcc_destructor;
99 kmem_reclaim_t kcc_reclaim;
100 void * kcc_private;
101 void * kcc_vmp;
d6a26c6a 102 atomic_t kcc_ref;
f1ca4da6 103} kmem_cache_cb_t;
104
d6a26c6a 105static struct rw_semaphore kmem_cache_cb_sem;
106static struct list_head kmem_cache_cb_list;
f1ca4da6 107static struct shrinker *kmem_cache_shrinker;
108
d6a26c6a 109/* Function must be called while holding the kmem_cache_cb_sem
f1ca4da6 110 * Because kmem_cache_t is an opaque datatype we're forced to
111 * match pointers to identify specific cache entires.
112 */
113static kmem_cache_cb_t *
114kmem_cache_find_cache_cb(kmem_cache_t *cache)
115{
116 kmem_cache_cb_t *kcc;
d6a26c6a 117#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
118 ASSERT(rwsem_is_locked(&kmem_cache_cb_sem));
119#endif
f1ca4da6 120
121 list_for_each_entry(kcc, &kmem_cache_cb_list, kcc_list)
122 if (cache == kcc->kcc_cache)
123 return kcc;
124
125 return NULL;
126}
127
128static kmem_cache_cb_t *
129kmem_cache_add_cache_cb(kmem_cache_t *cache,
130 kmem_constructor_t constructor,
131 kmem_destructor_t destructor,
132 kmem_reclaim_t reclaim,
133 void *priv, void *vmp)
134{
135 kmem_cache_cb_t *kcc;
136
137 kcc = (kmem_cache_cb_t *)kmalloc(sizeof(*kcc), GFP_KERNEL);
138 if (kcc) {
d6a26c6a 139 kcc->kcc_magic = KCC_MAGIC;
f1ca4da6 140 kcc->kcc_cache = cache;
141 kcc->kcc_constructor = constructor;
142 kcc->kcc_destructor = destructor;
143 kcc->kcc_reclaim = reclaim;
144 kcc->kcc_private = priv;
145 kcc->kcc_vmp = vmp;
d6a26c6a 146 atomic_set(&kcc->kcc_ref, 0);
147 down_write(&kmem_cache_cb_sem);
f1ca4da6 148 list_add(&kcc->kcc_list, &kmem_cache_cb_list);
d6a26c6a 149 up_write(&kmem_cache_cb_sem);
f1ca4da6 150 }
151
152 return kcc;
153}
154
155static void
156kmem_cache_remove_cache_cb(kmem_cache_cb_t *kcc)
157{
d6a26c6a 158 down_write(&kmem_cache_cb_sem);
159 ASSERT(atomic_read(&kcc->kcc_ref) == 0);
f1ca4da6 160 list_del(&kcc->kcc_list);
d6a26c6a 161 up_write(&kmem_cache_cb_sem);
f1ca4da6 162
d6a26c6a 163 if (kcc){
164 memset(kcc, KCC_POISON, sizeof(*kcc));
165 kfree(kcc);
166 }
f1ca4da6 167}
168
169static void
170kmem_cache_generic_constructor(void *ptr, kmem_cache_t *cache, unsigned long flags)
171{
172 kmem_cache_cb_t *kcc;
d61e12af 173 kmem_constructor_t constructor;
174 void *private;
f1ca4da6 175
4efd4118 176 ASSERT(flags & SLAB_CTOR_CONSTRUCTOR);
177
d6a26c6a 178 /* Ensure constructor verifies are not passed to the registered
179 * constructors. This may not be safe due to the Solaris constructor
180 * not being aware of how to handle the SLAB_CTOR_VERIFY flag
181 */
182 if (flags & SLAB_CTOR_VERIFY)
183 return;
184
4efd4118 185 if (flags & SLAB_CTOR_ATOMIC)
186 flags = KM_NOSLEEP;
187 else
188 flags = KM_SLEEP;
189
d6a26c6a 190 /* We can be called with interrupts disabled so it is critical that
191 * this function and the registered constructor never sleep.
192 */
193 while (!down_read_trylock(&kmem_cache_cb_sem));
f1ca4da6 194
195 /* Callback list must be in sync with linux slab caches */
196 kcc = kmem_cache_find_cache_cb(cache);
937879f1 197 ASSERT(kcc);
d6a26c6a 198 ASSERT(kcc->kcc_magic == KCC_MAGIC);
199 atomic_inc(&kcc->kcc_ref);
937879f1 200
d61e12af 201 constructor = kcc->kcc_constructor;
202 private = kcc->kcc_private;
0a6fd143 203
d6a26c6a 204 up_read(&kmem_cache_cb_sem);
d61e12af 205
206 if (constructor)
207 constructor(ptr, private, (int)flags);
208
d6a26c6a 209 atomic_dec(&kcc->kcc_ref);
210
f1ca4da6 211 /* Linux constructor has no return code, silently eat it */
212}
213
214static void
215kmem_cache_generic_destructor(void *ptr, kmem_cache_t *cache, unsigned long flags)
216{
217 kmem_cache_cb_t *kcc;
d61e12af 218 kmem_destructor_t destructor;
219 void *private;
f1ca4da6 220
4efd4118 221 /* No valid destructor flags */
222 ASSERT(flags == 0);
223
d6a26c6a 224 /* We can be called with interrupts disabled so it is critical that
225 * this function and the registered constructor never sleep.
226 */
227 while (!down_read_trylock(&kmem_cache_cb_sem));
f1ca4da6 228
229 /* Callback list must be in sync with linux slab caches */
230 kcc = kmem_cache_find_cache_cb(cache);
937879f1 231 ASSERT(kcc);
d6a26c6a 232 ASSERT(kcc->kcc_magic == KCC_MAGIC);
233 atomic_inc(&kcc->kcc_ref);
937879f1 234
d61e12af 235 destructor = kcc->kcc_destructor;
236 private = kcc->kcc_private;
0a6fd143 237
d6a26c6a 238 up_read(&kmem_cache_cb_sem);
d61e12af 239
240 /* Solaris destructor takes no flags, silently eat them */
241 if (destructor)
242 destructor(ptr, private);
d6a26c6a 243
244 atomic_dec(&kcc->kcc_ref);
f1ca4da6 245}
246
247/* XXX - Arguments are ignored */
248static int
249kmem_cache_generic_shrinker(int nr_to_scan, unsigned int gfp_mask)
250{
251 kmem_cache_cb_t *kcc;
252 int total = 0;
253
254 /* Under linux a shrinker is not tightly coupled with a slab
255 * cache. In fact linux always systematically trys calling all
256 * registered shrinker callbacks until its target reclamation level
257 * is reached. Because of this we only register one shrinker
258 * function in the shim layer for all slab caches. And we always
259 * attempt to shrink all caches when this generic shrinker is called.
260 */
d6a26c6a 261 down_read(&kmem_cache_cb_sem);
f1ca4da6 262
263 list_for_each_entry(kcc, &kmem_cache_cb_list, kcc_list) {
d6a26c6a 264 ASSERT(kcc);
265 ASSERT(kcc->kcc_magic == KCC_MAGIC);
266
267 /* Take a reference on the cache in question. If that
268 * cache is contended simply skip it, it may already be
269 * in the process of a reclaim or the ctor/dtor may be
270 * running in either case it's best to skip it.
271 */
272 atomic_inc(&kcc->kcc_ref);
273 if (atomic_read(&kcc->kcc_ref) > 1) {
274 atomic_dec(&kcc->kcc_ref);
275 continue;
276 }
277
f1ca4da6 278 /* Under linux the desired number and gfp type of objects
279 * is passed to the reclaiming function as a sugested reclaim
280 * target. I do not pass these args on because reclaim
281 * policy is entirely up to the owner under solaris. We only
282 * pass on the pre-registered private data.
283 */
284 if (kcc->kcc_reclaim)
285 kcc->kcc_reclaim(kcc->kcc_private);
286
d6a26c6a 287 atomic_dec(&kcc->kcc_ref);
f1ca4da6 288 total += 1;
289 }
290
291 /* Under linux we should return the remaining number of entires in
292 * the cache. Unfortunately, I don't see an easy way to safely
293 * emulate this behavior so I'm returning one entry per cache which
294 * was registered with the generic shrinker. This should fake out
295 * the linux VM when it attempts to shrink caches.
296 */
d6a26c6a 297 up_read(&kmem_cache_cb_sem);
298
f1ca4da6 299 return total;
300}
301
302/* Ensure the __kmem_cache_create/__kmem_cache_destroy macros are
303 * removed here to prevent a recursive substitution, we want to call
304 * the native linux version.
305 */
306#undef kmem_cache_create
307#undef kmem_cache_destroy
5c2bb9b2 308#undef kmem_cache_alloc
f1ca4da6 309
310kmem_cache_t *
311__kmem_cache_create(char *name, size_t size, size_t align,
f1b59d26 312 kmem_constructor_t constructor,
313 kmem_destructor_t destructor,
314 kmem_reclaim_t reclaim,
f1ca4da6 315 void *priv, void *vmp, int flags)
316{
317 kmem_cache_t *cache;
318 kmem_cache_cb_t *kcc;
319 int shrinker_flag = 0;
c19c06f3 320 char *cache_name;
937879f1 321 ENTRY;
f1ca4da6 322
937879f1 323 /* XXX: - Option currently unsupported by shim layer */
324 ASSERT(!vmp);
4efd4118 325 ASSERT(flags == 0);
f1ca4da6 326
c19c06f3 327 cache_name = kzalloc(strlen(name) + 1, GFP_KERNEL);
328 if (cache_name == NULL)
937879f1 329 RETURN(NULL);
c19c06f3 330
331 strcpy(cache_name, name);
332 cache = kmem_cache_create(cache_name, size, align, flags,
f1ca4da6 333 kmem_cache_generic_constructor,
334 kmem_cache_generic_destructor);
335 if (cache == NULL)
937879f1 336 RETURN(NULL);
f1ca4da6 337
338 /* Register shared shrinker function on initial cache create */
d6a26c6a 339 down_read(&kmem_cache_cb_sem);
f1ca4da6 340 if (list_empty(&kmem_cache_cb_list)) {
341 kmem_cache_shrinker = set_shrinker(KMC_DEFAULT_SEEKS,
342 kmem_cache_generic_shrinker);
343 if (kmem_cache_shrinker == NULL) {
344 kmem_cache_destroy(cache);
d6a26c6a 345 up_read(&kmem_cache_cb_sem);
937879f1 346 RETURN(NULL);
f1ca4da6 347 }
348
349 }
d6a26c6a 350 up_read(&kmem_cache_cb_sem);
f1ca4da6 351
352 kcc = kmem_cache_add_cache_cb(cache, constructor, destructor,
353 reclaim, priv, vmp);
354 if (kcc == NULL) {
355 if (shrinker_flag) /* New shrinker registered must be removed */
356 remove_shrinker(kmem_cache_shrinker);
357
358 kmem_cache_destroy(cache);
937879f1 359 RETURN(NULL);
f1ca4da6 360 }
361
937879f1 362 RETURN(cache);
f1ca4da6 363}
f1b59d26 364EXPORT_SYMBOL(__kmem_cache_create);
f1ca4da6 365
e4f1d29f 366/* Return code provided despite Solaris's void return. There should be no
367 * harm here since the Solaris versions will ignore it anyway. */
368int
f1ca4da6 369__kmem_cache_destroy(kmem_cache_t *cache)
370{
371 kmem_cache_cb_t *kcc;
c19c06f3 372 char *name;
e4f1d29f 373 int rc;
937879f1 374 ENTRY;
f1ca4da6 375
d6a26c6a 376 down_read(&kmem_cache_cb_sem);
f1ca4da6 377 kcc = kmem_cache_find_cache_cb(cache);
d6a26c6a 378 if (kcc == NULL) {
379 up_read(&kmem_cache_cb_sem);
937879f1 380 RETURN(-EINVAL);
d6a26c6a 381 }
382 atomic_inc(&kcc->kcc_ref);
383 up_read(&kmem_cache_cb_sem);
f1ca4da6 384
c19c06f3 385 name = (char *)kmem_cache_name(cache);
e4f1d29f 386 rc = kmem_cache_destroy(cache);
d6a26c6a 387
388 atomic_dec(&kcc->kcc_ref);
f1ca4da6 389 kmem_cache_remove_cache_cb(kcc);
c19c06f3 390 kfree(name);
f1ca4da6 391
392 /* Unregister generic shrinker on removal of all caches */
d6a26c6a 393 down_read(&kmem_cache_cb_sem);
f1ca4da6 394 if (list_empty(&kmem_cache_cb_list))
395 remove_shrinker(kmem_cache_shrinker);
396
d6a26c6a 397 up_read(&kmem_cache_cb_sem);
937879f1 398 RETURN(rc);
f1ca4da6 399}
f1b59d26 400EXPORT_SYMBOL(__kmem_cache_destroy);
f1ca4da6 401
5c2bb9b2 402/* Under Solaris if the KM_SLEEP flag is passed we absolutely must
403 * sleep until we are allocated the memory. Under Linux you can still
404 * get a memory allocation failure, so I'm forced to keep requesting
405 * the memory even if the system is under substantial memory pressure
406 * of fragmentation prevents the allocation from succeeded. This is
407 * not the correct fix, or even a good one. But it will do for now.
408 */
409void *
410__kmem_cache_alloc(kmem_cache_t *cache, gfp_t flags)
411{
412 void *rc;
413 ENTRY;
414
415restart:
416 rc = kmem_cache_alloc(cache, flags);
417 if ((rc == NULL) && (flags & KM_SLEEP)) {
c6dc93d6 418#ifdef DEBUG_KMEM
5c2bb9b2 419 atomic64_inc(&kmem_cache_alloc_failed);
c6dc93d6 420#endif /* DEBUG_KMEM */
5c2bb9b2 421 GOTO(restart, rc);
422 }
423
424 RETURN(rc);
425}
426EXPORT_SYMBOL(__kmem_cache_alloc);
427
f1b59d26 428void
937879f1 429__kmem_reap(void)
430{
431 ENTRY;
f1b59d26 432 /* Since there's no easy hook in to linux to force all the registered
f1ca4da6 433 * shrinkers to run we just run the ones registered for this shim */
434 kmem_cache_generic_shrinker(KMC_REAP_CHUNK, GFP_KERNEL);
937879f1 435 EXIT;
f1ca4da6 436}
f1b59d26 437EXPORT_SYMBOL(__kmem_reap);
5d86345d 438
439int
440kmem_init(void)
441{
937879f1 442 ENTRY;
d6a26c6a 443
444 init_rwsem(&kmem_cache_cb_sem);
445 INIT_LIST_HEAD(&kmem_cache_cb_list);
5d86345d 446#ifdef DEBUG_KMEM
d6a26c6a 447 {
448 int i;
449 atomic64_set(&kmem_alloc_used, 0);
450 atomic64_set(&vmem_alloc_used, 0);
451
452 spin_lock_init(&kmem_lock);
453 INIT_LIST_HEAD(&kmem_list);
454
455 for (i = 0; i < KMEM_TABLE_SIZE; i++)
456 INIT_HLIST_HEAD(&kmem_table[i]);
13cdca65 457
458 spin_lock_init(&vmem_lock);
459 INIT_LIST_HEAD(&vmem_list);
460
461 for (i = 0; i < VMEM_TABLE_SIZE; i++)
462 INIT_HLIST_HEAD(&vmem_table[i]);
5c2bb9b2 463
464 atomic64_set(&kmem_cache_alloc_failed, 0);
d6a26c6a 465 }
5d86345d 466#endif
937879f1 467 RETURN(0);
5d86345d 468}
469
c6dc93d6 470#ifdef DEBUG_KMEM
471static char *
472sprintf_addr(kmem_debug_t *kd, char *str, int len, int min)
d6a26c6a 473{
474 int size = ((len - 1) < kd->kd_size) ? (len - 1) : kd->kd_size;
475 int i, flag = 1;
476
477 ASSERT(str != NULL && len >= 17);
478 memset(str, 0, len);
479
480 /* Check for a fully printable string, and while we are at
481 * it place the printable characters in the passed buffer. */
482 for (i = 0; i < size; i++) {
483 str[i] = ((char *)(kd->kd_addr))[i];
484 if (isprint(str[i])) {
485 continue;
486 } else {
487 /* Minimum number of printable characters found
488 * to make it worthwhile to print this as ascii. */
489 if (i > min)
490 break;
491
492 flag = 0;
493 break;
494 }
495
496 }
497
498 if (!flag) {
499 sprintf(str, "%02x%02x%02x%02x%02x%02x%02x%02x",
500 *((uint8_t *)kd->kd_addr),
501 *((uint8_t *)kd->kd_addr + 2),
502 *((uint8_t *)kd->kd_addr + 4),
503 *((uint8_t *)kd->kd_addr + 6),
504 *((uint8_t *)kd->kd_addr + 8),
505 *((uint8_t *)kd->kd_addr + 10),
506 *((uint8_t *)kd->kd_addr + 12),
507 *((uint8_t *)kd->kd_addr + 14));
508 }
509
510 return str;
511}
c6dc93d6 512#endif /* DEBUG_KMEM */
d6a26c6a 513
5d86345d 514void
515kmem_fini(void)
516{
937879f1 517 ENTRY;
5d86345d 518#ifdef DEBUG_KMEM
d6a26c6a 519 {
520 unsigned long flags;
521 kmem_debug_t *kd;
522 char str[17];
523
d6a26c6a 524 /* Display all unreclaimed memory addresses, including the
525 * allocation size and the first few bytes of what's located
526 * at that address to aid in debugging. Performance is not
527 * a serious concern here since it is module unload time. */
13cdca65 528 if (atomic64_read(&kmem_alloc_used) != 0)
529 CWARN("kmem leaked %ld/%ld bytes\n",
530 atomic_read(&kmem_alloc_used), kmem_alloc_max);
531
d6a26c6a 532 spin_lock_irqsave(&kmem_lock, flags);
533 if (!list_empty(&kmem_list))
534 CDEBUG(D_WARNING, "%-16s %-5s %-16s %s:%s\n",
535 "address", "size", "data", "func", "line");
536
13cdca65 537 list_for_each_entry(kd, &kmem_list, kd_list)
d6a26c6a 538 CDEBUG(D_WARNING, "%p %-5d %-16s %s:%d\n",
13cdca65 539 kd->kd_addr, kd->kd_size,
d6a26c6a 540 sprintf_addr(kd, str, 17, 8),
541 kd->kd_func, kd->kd_line);
13cdca65 542
d6a26c6a 543 spin_unlock_irqrestore(&kmem_lock, flags);
544
545 if (atomic64_read(&vmem_alloc_used) != 0)
546 CWARN("vmem leaked %ld/%ld bytes\n",
547 atomic_read(&vmem_alloc_used), vmem_alloc_max);
13cdca65 548
549 spin_lock_irqsave(&vmem_lock, flags);
550 if (!list_empty(&vmem_list))
551 CDEBUG(D_WARNING, "%-16s %-5s %-16s %s:%s\n",
552 "address", "size", "data", "func", "line");
553
554 list_for_each_entry(kd, &vmem_list, kd_list)
555 CDEBUG(D_WARNING, "%p %-5d %-16s %s:%d\n",
556 kd->kd_addr, kd->kd_size,
557 sprintf_addr(kd, str, 17, 8),
558 kd->kd_func, kd->kd_line);
559
560 spin_unlock_irqrestore(&vmem_lock, flags);
d6a26c6a 561 }
5d86345d 562#endif
937879f1 563 EXIT;
5d86345d 564}