2 * This file is part of the SPL: Solaris Porting Layer.
4 * Copyright (c) 2008 Lawrence Livermore National Security, LLC.
5 * Produced at Lawrence Livermore National Laboratory
7 * Brian Behlendorf <behlendorf1@llnl.gov>,
8 * Herb Wartens <wartens2@llnl.gov>,
9 * Jim Garlick <garlick@llnl.gov>
12 * This is free software; you can redistribute it and/or modify it
13 * under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
17 * This is distributed in the hope that it will be useful, but WITHOUT
18 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
22 * You should have received a copy of the GNU General Public License along
23 * with this program; if not, write to the Free Software Foundation, Inc.,
24 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
29 #ifdef DEBUG_SUBSYSTEM
30 # undef DEBUG_SUBSYSTEM
33 #define DEBUG_SUBSYSTEM S_KMEM
36 * Memory allocation interfaces and debugging for basic kmem_*
37 * and vmem_* style memory allocation. When DEBUG_KMEM is enable
38 * all allocations will be tracked when they are allocated and
39 * freed. When the SPL module is unload a list of all leaked
40 * addresses and where they were allocated will be dumped to the
41 * console. Enabling this feature has a significant impant on
42 * performance but it makes finding memory leaks staight forward.
45 /* Shim layer memory accounting */
46 atomic64_t kmem_alloc_used
= ATOMIC64_INIT(0);
47 unsigned long long kmem_alloc_max
= 0;
48 atomic64_t vmem_alloc_used
= ATOMIC64_INIT(0);
49 unsigned long long vmem_alloc_max
= 0;
50 int kmem_warning_flag
= 1;
52 EXPORT_SYMBOL(kmem_alloc_used
);
53 EXPORT_SYMBOL(kmem_alloc_max
);
54 EXPORT_SYMBOL(vmem_alloc_used
);
55 EXPORT_SYMBOL(vmem_alloc_max
);
56 EXPORT_SYMBOL(kmem_warning_flag
);
58 # ifdef DEBUG_KMEM_TRACKING
60 /* XXX - Not to surprisingly with debugging enabled the xmem_locks are very
61 * highly contended particularly on xfree(). If we want to run with this
62 * detailed debugging enabled for anything other than debugging we need to
63 * minimize the contention by moving to a lock per xmem_table entry model.
66 # define KMEM_HASH_BITS 10
67 # define KMEM_TABLE_SIZE (1 << KMEM_HASH_BITS)
69 # define VMEM_HASH_BITS 10
70 # define VMEM_TABLE_SIZE (1 << VMEM_HASH_BITS)
72 typedef struct kmem_debug
{
73 struct hlist_node kd_hlist
; /* Hash node linkage */
74 struct list_head kd_list
; /* List of all allocations */
75 void *kd_addr
; /* Allocation pointer */
76 size_t kd_size
; /* Allocation size */
77 const char *kd_func
; /* Allocation function */
78 int kd_line
; /* Allocation line */
82 struct hlist_head kmem_table
[KMEM_TABLE_SIZE
];
83 struct list_head kmem_list
;
86 struct hlist_head vmem_table
[VMEM_TABLE_SIZE
];
87 struct list_head vmem_list
;
89 EXPORT_SYMBOL(kmem_lock
);
90 EXPORT_SYMBOL(kmem_table
);
91 EXPORT_SYMBOL(kmem_list
);
93 EXPORT_SYMBOL(vmem_lock
);
94 EXPORT_SYMBOL(vmem_table
);
95 EXPORT_SYMBOL(vmem_list
);
98 int kmem_set_warning(int flag
) { return (kmem_warning_flag
= !!flag
); }
100 int kmem_set_warning(int flag
) { return 0; }
102 EXPORT_SYMBOL(kmem_set_warning
);
105 * Slab allocation interfaces
107 * While the Linux slab implementation was inspired by the Solaris
108 * implemenation I cannot use it to emulate the Solaris APIs. I
109 * require two features which are not provided by the Linux slab.
111 * 1) Constructors AND destructors. Recent versions of the Linux
112 * kernel have removed support for destructors. This is a deal
113 * breaker for the SPL which contains particularly expensive
114 * initializers for mutex's, condition variables, etc. We also
115 * require a minimal level of cleanup for these data types unlike
116 * many Linux data type which do need to be explicitly destroyed.
118 * 2) Virtual address space backed slab. Callers of the Solaris slab
119 * expect it to work well for both small are very large allocations.
120 * Because of memory fragmentation the Linux slab which is backed
121 * by kmalloc'ed memory performs very badly when confronted with
122 * large numbers of large allocations. Basing the slab on the
123 * virtual address space removes the need for contigeous pages
124 * and greatly improve performance for large allocations.
126 * For these reasons, the SPL has its own slab implementation with
127 * the needed features. It is not as highly optimized as either the
128 * Solaris or Linux slabs, but it should get me most of what is
129 * needed until it can be optimized or obsoleted by another approach.
131 * One serious concern I do have about this method is the relatively
132 * small virtual address space on 32bit arches. This will seriously
133 * constrain the size of the slab caches and their performance.
135 * XXX: Implement work requests to keep an eye on each cache and
136 * shrink them via spl_slab_reclaim() when they are wasting lots
137 * of space. Currently this process is driven by the reapers.
139 * XXX: Improve the partial slab list by carefully maintaining a
140 * strict ordering of fullest to emptiest slabs based on
141 * the slab reference count. This gaurentees the when freeing
142 * slabs back to the system we need only linearly traverse the
143 * last N slabs in the list to discover all the freeable slabs.
145 * XXX: NUMA awareness for optionally allocating memory close to a
146 * particular core. This can be adventageous if you know the slab
147 * object will be short lived and primarily accessed from one core.
149 * XXX: Slab coloring may also yield performance improvements and would
150 * be desirable to implement.
153 struct list_head spl_kmem_cache_list
; /* List of caches */
154 struct rw_semaphore spl_kmem_cache_sem
; /* Cache list lock */
156 static int spl_cache_flush(spl_kmem_cache_t
*skc
,
157 spl_kmem_magazine_t
*skm
, int flush
);
159 #ifdef HAVE_SET_SHRINKER
160 static struct shrinker
*spl_kmem_cache_shrinker
;
162 static int spl_kmem_cache_generic_shrinker(int nr_to_scan
,
163 unsigned int gfp_mask
);
164 static struct shrinker spl_kmem_cache_shrinker
= {
165 .shrink
= spl_kmem_cache_generic_shrinker
,
166 .seeks
= KMC_DEFAULT_SEEKS
,
171 # ifdef DEBUG_KMEM_TRACKING
173 static kmem_debug_t
*
174 kmem_del_init(spinlock_t
*lock
, struct hlist_head
*table
, int bits
,
177 struct hlist_head
*head
;
178 struct hlist_node
*node
;
179 struct kmem_debug
*p
;
183 spin_lock_irqsave(lock
, flags
);
185 head
= &table
[hash_ptr(addr
, bits
)];
186 hlist_for_each_entry_rcu(p
, node
, head
, kd_hlist
) {
187 if (p
->kd_addr
== addr
) {
188 hlist_del_init(&p
->kd_hlist
);
189 list_del_init(&p
->kd_list
);
190 spin_unlock_irqrestore(lock
, flags
);
195 spin_unlock_irqrestore(lock
, flags
);
201 kmem_alloc_track(size_t size
, int flags
, const char *func
, int line
,
202 int node_alloc
, int node
)
206 unsigned long irq_flags
;
209 dptr
= (kmem_debug_t
*) kmalloc(sizeof(kmem_debug_t
),
210 flags
& ~__GFP_ZERO
);
213 CWARN("kmem_alloc(%ld, 0x%x) debug failed\n",
214 sizeof(kmem_debug_t
), flags
);
216 /* Marked unlikely because we should never be doing this,
217 * we tolerate to up 2 pages but a single page is best. */
218 if (unlikely((size
) > (PAGE_SIZE
* 2)) && kmem_warning_flag
)
219 CWARN("Large kmem_alloc(%llu, 0x%x) (%lld/%llu)\n",
220 (unsigned long long) size
, flags
,
221 atomic64_read(&kmem_alloc_used
), kmem_alloc_max
);
223 /* We use kstrdup() below because the string pointed to by
224 * __FUNCTION__ might not be available by the time we want
225 * to print it since the module might have been unloaded. */
226 dptr
->kd_func
= kstrdup(func
, flags
& ~__GFP_ZERO
);
227 if (unlikely(dptr
->kd_func
== NULL
)) {
229 CWARN("kstrdup() failed in kmem_alloc(%llu, 0x%x) "
230 "(%lld/%llu)\n", (unsigned long long) size
, flags
,
231 atomic64_read(&kmem_alloc_used
), kmem_alloc_max
);
235 /* Use the correct allocator */
237 ASSERT(!(flags
& __GFP_ZERO
));
238 ptr
= kmalloc_node(size
, flags
, node
);
239 } else if (flags
& __GFP_ZERO
) {
240 ptr
= kzalloc(size
, flags
& ~__GFP_ZERO
);
242 ptr
= kmalloc(size
, flags
);
245 if (unlikely(ptr
== NULL
)) {
246 kfree(dptr
->kd_func
);
248 CWARN("kmem_alloc(%llu, 0x%x) failed (%lld/%llu)\n",
249 (unsigned long long) size
, flags
,
250 atomic64_read(&kmem_alloc_used
), kmem_alloc_max
);
254 atomic64_add(size
, &kmem_alloc_used
);
255 if (unlikely(atomic64_read(&kmem_alloc_used
) >
258 atomic64_read(&kmem_alloc_used
);
260 INIT_HLIST_NODE(&dptr
->kd_hlist
);
261 INIT_LIST_HEAD(&dptr
->kd_list
);
264 dptr
->kd_size
= size
;
265 dptr
->kd_line
= line
;
267 spin_lock_irqsave(&kmem_lock
, irq_flags
);
268 hlist_add_head_rcu(&dptr
->kd_hlist
,
269 &kmem_table
[hash_ptr(ptr
, KMEM_HASH_BITS
)]);
270 list_add_tail(&dptr
->kd_list
, &kmem_list
);
271 spin_unlock_irqrestore(&kmem_lock
, irq_flags
);
273 CDEBUG_LIMIT(D_INFO
, "kmem_alloc(%llu, 0x%x) = %p "
274 "(%lld/%llu)\n", (unsigned long long) size
, flags
,
275 ptr
, atomic64_read(&kmem_alloc_used
),
281 EXPORT_SYMBOL(kmem_alloc_track
);
284 kmem_free_track(void *ptr
, size_t size
)
289 ASSERTF(ptr
|| size
> 0, "ptr: %p, size: %llu", ptr
,
290 (unsigned long long) size
);
292 dptr
= kmem_del_init(&kmem_lock
, kmem_table
, KMEM_HASH_BITS
, ptr
);
294 ASSERT(dptr
); /* Must exist in hash due to kmem_alloc() */
296 /* Size must match */
297 ASSERTF(dptr
->kd_size
== size
, "kd_size (%llu) != size (%llu), "
298 "kd_func = %s, kd_line = %d\n", (unsigned long long) dptr
->kd_size
,
299 (unsigned long long) size
, dptr
->kd_func
, dptr
->kd_line
);
301 atomic64_sub(size
, &kmem_alloc_used
);
303 CDEBUG_LIMIT(D_INFO
, "kmem_free(%p, %llu) (%lld/%llu)\n", ptr
,
304 (unsigned long long) size
, atomic64_read(&kmem_alloc_used
),
307 kfree(dptr
->kd_func
);
309 memset(dptr
, 0x5a, sizeof(kmem_debug_t
));
312 memset(ptr
, 0x5a, size
);
317 EXPORT_SYMBOL(kmem_free_track
);
320 vmem_alloc_track(size_t size
, int flags
, const char *func
, int line
)
324 unsigned long irq_flags
;
327 ASSERT(flags
& KM_SLEEP
);
329 dptr
= (kmem_debug_t
*) kmalloc(sizeof(kmem_debug_t
), flags
);
331 CWARN("vmem_alloc(%ld, 0x%x) debug failed\n",
332 sizeof(kmem_debug_t
), flags
);
334 /* We use kstrdup() below because the string pointed to by
335 * __FUNCTION__ might not be available by the time we want
336 * to print it, since the module might have been unloaded. */
337 dptr
->kd_func
= kstrdup(func
, flags
& ~__GFP_ZERO
);
338 if (unlikely(dptr
->kd_func
== NULL
)) {
340 CWARN("kstrdup() failed in vmem_alloc(%llu, 0x%x) "
341 "(%lld/%llu)\n", (unsigned long long) size
, flags
,
342 atomic64_read(&vmem_alloc_used
), vmem_alloc_max
);
346 ptr
= __vmalloc(size
, (flags
| __GFP_HIGHMEM
) & ~__GFP_ZERO
,
349 if (unlikely(ptr
== NULL
)) {
350 kfree(dptr
->kd_func
);
352 CWARN("vmem_alloc(%llu, 0x%x) failed (%lld/%llu)\n",
353 (unsigned long long) size
, flags
,
354 atomic64_read(&vmem_alloc_used
), vmem_alloc_max
);
358 if (flags
& __GFP_ZERO
)
359 memset(ptr
, 0, size
);
361 atomic64_add(size
, &vmem_alloc_used
);
362 if (unlikely(atomic64_read(&vmem_alloc_used
) >
365 atomic64_read(&vmem_alloc_used
);
367 INIT_HLIST_NODE(&dptr
->kd_hlist
);
368 INIT_LIST_HEAD(&dptr
->kd_list
);
371 dptr
->kd_size
= size
;
372 dptr
->kd_line
= line
;
374 spin_lock_irqsave(&vmem_lock
, irq_flags
);
375 hlist_add_head_rcu(&dptr
->kd_hlist
,
376 &vmem_table
[hash_ptr(ptr
, VMEM_HASH_BITS
)]);
377 list_add_tail(&dptr
->kd_list
, &vmem_list
);
378 spin_unlock_irqrestore(&vmem_lock
, irq_flags
);
380 CDEBUG_LIMIT(D_INFO
, "vmem_alloc(%llu, 0x%x) = %p "
381 "(%lld/%llu)\n", (unsigned long long) size
, flags
,
382 ptr
, atomic64_read(&vmem_alloc_used
),
388 EXPORT_SYMBOL(vmem_alloc_track
);
391 vmem_free_track(void *ptr
, size_t size
)
396 ASSERTF(ptr
|| size
> 0, "ptr: %p, size: %llu", ptr
,
397 (unsigned long long) size
);
399 dptr
= kmem_del_init(&vmem_lock
, vmem_table
, VMEM_HASH_BITS
, ptr
);
400 ASSERT(dptr
); /* Must exist in hash due to vmem_alloc() */
402 /* Size must match */
403 ASSERTF(dptr
->kd_size
== size
, "kd_size (%llu) != size (%llu), "
404 "kd_func = %s, kd_line = %d\n", (unsigned long long) dptr
->kd_size
,
405 (unsigned long long) size
, dptr
->kd_func
, dptr
->kd_line
);
407 atomic64_sub(size
, &vmem_alloc_used
);
408 CDEBUG_LIMIT(D_INFO
, "vmem_free(%p, %llu) (%lld/%llu)\n", ptr
,
409 (unsigned long long) size
, atomic64_read(&vmem_alloc_used
),
412 kfree(dptr
->kd_func
);
414 memset(dptr
, 0x5a, sizeof(kmem_debug_t
));
417 memset(ptr
, 0x5a, size
);
422 EXPORT_SYMBOL(vmem_free_track
);
424 # else /* DEBUG_KMEM_TRACKING */
427 kmem_alloc_debug(size_t size
, int flags
, const char *func
, int line
,
428 int node_alloc
, int node
)
433 /* Marked unlikely because we should never be doing this,
434 * we tolerate to up 2 pages but a single page is best. */
435 if (unlikely(size
> (PAGE_SIZE
* 2)) && kmem_warning_flag
)
436 CWARN("Large kmem_alloc(%llu, 0x%x) (%lld/%llu)\n",
437 (unsigned long long) size
, flags
,
438 atomic64_read(&kmem_alloc_used
), kmem_alloc_max
);
440 /* Use the correct allocator */
442 ASSERT(!(flags
& __GFP_ZERO
));
443 ptr
= kmalloc_node(size
, flags
, node
);
444 } else if (flags
& __GFP_ZERO
) {
445 ptr
= kzalloc(size
, flags
& (~__GFP_ZERO
));
447 ptr
= kmalloc(size
, flags
);
451 CWARN("kmem_alloc(%llu, 0x%x) failed (%lld/%llu)\n",
452 (unsigned long long) size
, flags
,
453 atomic64_read(&kmem_alloc_used
), kmem_alloc_max
);
455 atomic64_add(size
, &kmem_alloc_used
);
456 if (unlikely(atomic64_read(&kmem_alloc_used
) > kmem_alloc_max
))
457 kmem_alloc_max
= atomic64_read(&kmem_alloc_used
);
459 CDEBUG_LIMIT(D_INFO
, "kmem_alloc(%llu, 0x%x) = %p "
460 "(%lld/%llu)\n", (unsigned long long) size
, flags
, ptr
,
461 atomic64_read(&kmem_alloc_used
), kmem_alloc_max
);
465 EXPORT_SYMBOL(kmem_alloc_debug
);
468 kmem_free_debug(void *ptr
, size_t size
)
472 ASSERTF(ptr
|| size
> 0, "ptr: %p, size: %llu", ptr
,
473 (unsigned long long) size
);
475 atomic64_sub(size
, &kmem_alloc_used
);
477 CDEBUG_LIMIT(D_INFO
, "kmem_free(%p, %llu) (%lld/%llu)\n", ptr
,
478 (unsigned long long) size
, atomic64_read(&kmem_alloc_used
),
481 memset(ptr
, 0x5a, size
);
486 EXPORT_SYMBOL(kmem_free_debug
);
489 vmem_alloc_debug(size_t size
, int flags
, const char *func
, int line
)
494 ASSERT(flags
& KM_SLEEP
);
496 ptr
= __vmalloc(size
, (flags
| __GFP_HIGHMEM
) & ~__GFP_ZERO
,
499 CWARN("vmem_alloc(%llu, 0x%x) failed (%lld/%llu)\n",
500 (unsigned long long) size
, flags
,
501 atomic64_read(&vmem_alloc_used
), vmem_alloc_max
);
503 if (flags
& __GFP_ZERO
)
504 memset(ptr
, 0, size
);
506 atomic64_add(size
, &vmem_alloc_used
);
508 if (unlikely(atomic64_read(&vmem_alloc_used
) > vmem_alloc_max
))
509 vmem_alloc_max
= atomic64_read(&vmem_alloc_used
);
511 CDEBUG_LIMIT(D_INFO
, "vmem_alloc(%llu, 0x%x) = %p "
512 "(%lld/%llu)\n", (unsigned long long) size
, flags
, ptr
,
513 atomic64_read(&vmem_alloc_used
), vmem_alloc_max
);
518 EXPORT_SYMBOL(vmem_alloc_debug
);
521 vmem_free_debug(void *ptr
, size_t size
)
525 ASSERTF(ptr
|| size
> 0, "ptr: %p, size: %llu", ptr
,
526 (unsigned long long) size
);
528 atomic64_sub(size
, &vmem_alloc_used
);
530 CDEBUG_LIMIT(D_INFO
, "vmem_free(%p, %llu) (%lld/%llu)\n", ptr
,
531 (unsigned long long) size
, atomic64_read(&vmem_alloc_used
),
534 memset(ptr
, 0x5a, size
);
539 EXPORT_SYMBOL(vmem_free_debug
);
541 # endif /* DEBUG_KMEM_TRACKING */
542 #endif /* DEBUG_KMEM */
545 kv_alloc(spl_kmem_cache_t
*skc
, int size
, int flags
)
549 if (skc
->skc_flags
& KMC_KMEM
) {
550 if (size
> (2 * PAGE_SIZE
)) {
551 ptr
= (void *)__get_free_pages(flags
, get_order(size
));
553 ptr
= kmem_alloc(size
, flags
);
555 ptr
= vmem_alloc(size
, flags
);
562 kv_free(spl_kmem_cache_t
*skc
, void *ptr
, int size
)
564 if (skc
->skc_flags
& KMC_KMEM
) {
565 if (size
> (2 * PAGE_SIZE
))
566 free_pages((unsigned long)ptr
, get_order(size
));
568 kmem_free(ptr
, size
);
570 vmem_free(ptr
, size
);
574 /* It's important that we pack the spl_kmem_obj_t structure and the
575 * actual objects in to one large address space to minimize the number
576 * of calls to the allocator. It is far better to do a few large
577 * allocations and then subdivide it ourselves. Now which allocator
578 * we use requires balancing a few trade offs.
580 * For small objects we use kmem_alloc() because as long as you are
581 * only requesting a small number of pages (ideally just one) its cheap.
582 * However, when you start requesting multiple pages with kmem_alloc()
583 * it gets increasingly expensive since it requires contigeous pages.
584 * For this reason we shift to vmem_alloc() for slabs of large objects
585 * which removes the need for contigeous pages. We do not use
586 * vmem_alloc() in all cases because there is significant locking
587 * overhead in __get_vm_area_node(). This function takes a single
588 * global lock when aquiring an available virtual address range which
589 * serializes all vmem_alloc()'s for all slab caches. Using slightly
590 * different allocation functions for small and large objects should
591 * give us the best of both worlds.
593 * KMC_ONSLAB KMC_OFFSLAB
595 * +------------------------+ +-----------------+
596 * | spl_kmem_slab_t --+-+ | | spl_kmem_slab_t |---+-+
597 * | skc_obj_size <-+ | | +-----------------+ | |
598 * | spl_kmem_obj_t | | | |
599 * | skc_obj_size <---+ | +-----------------+ | |
600 * | spl_kmem_obj_t | | | skc_obj_size | <-+ |
601 * | ... v | | spl_kmem_obj_t | |
602 * +------------------------+ +-----------------+ v
604 static spl_kmem_slab_t
*
605 spl_slab_alloc(spl_kmem_cache_t
*skc
, int flags
)
607 spl_kmem_slab_t
*sks
;
608 spl_kmem_obj_t
*sko
, *n
;
610 int i
, align
, size
, rc
= 0;
612 base
= kv_alloc(skc
, skc
->skc_slab_size
, flags
);
616 sks
= (spl_kmem_slab_t
*)base
;
617 sks
->sks_magic
= SKS_MAGIC
;
618 sks
->sks_objs
= skc
->skc_slab_objs
;
619 sks
->sks_age
= jiffies
;
620 sks
->sks_cache
= skc
;
621 INIT_LIST_HEAD(&sks
->sks_list
);
622 INIT_LIST_HEAD(&sks
->sks_free_list
);
625 align
= skc
->skc_obj_align
;
626 size
= P2ROUNDUP(skc
->skc_obj_size
, align
) +
627 P2ROUNDUP(sizeof(spl_kmem_obj_t
), align
);
629 for (i
= 0; i
< sks
->sks_objs
; i
++) {
630 if (skc
->skc_flags
& KMC_OFFSLAB
) {
631 obj
= kv_alloc(skc
, size
, flags
);
633 GOTO(out
, rc
= -ENOMEM
);
636 P2ROUNDUP(sizeof(spl_kmem_slab_t
), align
) +
640 sko
= obj
+ P2ROUNDUP(skc
->skc_obj_size
, align
);
642 sko
->sko_magic
= SKO_MAGIC
;
644 INIT_LIST_HEAD(&sko
->sko_list
);
645 list_add_tail(&sko
->sko_list
, &sks
->sks_free_list
);
648 list_for_each_entry(sko
, &sks
->sks_free_list
, sko_list
)
650 skc
->skc_ctor(sko
->sko_addr
, skc
->skc_private
, flags
);
653 if (skc
->skc_flags
& KMC_OFFSLAB
)
654 list_for_each_entry_safe(sko
, n
, &sks
->sks_free_list
,
656 kv_free(skc
, sko
->sko_addr
, size
);
658 kv_free(skc
, base
, skc
->skc_slab_size
);
665 /* Removes slab from complete or partial list, so it must
666 * be called with the 'skc->skc_lock' held.
669 spl_slab_free(spl_kmem_slab_t
*sks
) {
670 spl_kmem_cache_t
*skc
;
671 spl_kmem_obj_t
*sko
, *n
;
675 ASSERT(sks
->sks_magic
== SKS_MAGIC
);
676 ASSERT(sks
->sks_ref
== 0);
678 skc
= sks
->sks_cache
;
679 ASSERT(skc
->skc_magic
== SKC_MAGIC
);
680 ASSERT(spin_is_locked(&skc
->skc_lock
));
682 skc
->skc_obj_total
-= sks
->sks_objs
;
683 skc
->skc_slab_total
--;
684 list_del(&sks
->sks_list
);
685 size
= P2ROUNDUP(skc
->skc_obj_size
, skc
->skc_obj_align
) +
686 P2ROUNDUP(sizeof(spl_kmem_obj_t
), skc
->skc_obj_align
);
688 /* Run destructors slab is being released */
689 list_for_each_entry_safe(sko
, n
, &sks
->sks_free_list
, sko_list
) {
690 ASSERT(sko
->sko_magic
== SKO_MAGIC
);
693 skc
->skc_dtor(sko
->sko_addr
, skc
->skc_private
);
695 if (skc
->skc_flags
& KMC_OFFSLAB
)
696 kv_free(skc
, sko
->sko_addr
, size
);
699 kv_free(skc
, sks
, skc
->skc_slab_size
);
704 __spl_slab_reclaim(spl_kmem_cache_t
*skc
)
706 spl_kmem_slab_t
*sks
, *m
;
710 ASSERT(spin_is_locked(&skc
->skc_lock
));
712 * Free empty slabs which have not been touched in skc_delay
713 * seconds. This delay time is important to avoid thrashing.
714 * Empty slabs will be at the end of the skc_partial_list.
716 list_for_each_entry_safe_reverse(sks
, m
, &skc
->skc_partial_list
,
718 if (sks
->sks_ref
> 0)
721 if (time_after(jiffies
, sks
->sks_age
+ skc
->skc_delay
* HZ
)) {
727 /* Returns number of slabs reclaimed */
732 spl_slab_reclaim(spl_kmem_cache_t
*skc
)
737 spin_lock(&skc
->skc_lock
);
738 rc
= __spl_slab_reclaim(skc
);
739 spin_unlock(&skc
->skc_lock
);
744 /* Size slabs properly to ensure they are not too large */
746 spl_slab_size(spl_kmem_cache_t
*skc
, uint32_t *objs
, uint32_t *size
)
748 int max
= ((uint64_t)1 << (MAX_ORDER
- 1)) * PAGE_SIZE
;
749 int align
= skc
->skc_obj_align
;
751 *objs
= SPL_KMEM_CACHE_OBJ_PER_SLAB
;
753 if (skc
->skc_flags
& KMC_OFFSLAB
) {
754 *size
= sizeof(spl_kmem_slab_t
);
757 *size
= P2ROUNDUP(sizeof(spl_kmem_slab_t
), align
) +
758 *objs
* (P2ROUNDUP(skc
->skc_obj_size
, align
) +
759 P2ROUNDUP(sizeof(spl_kmem_obj_t
), align
));
762 GOTO(resize
, *objs
= *objs
- 1);
767 ASSERTF(*size
<= max
, "%d < %d\n", *size
, max
);
772 spl_magazine_size(spl_kmem_cache_t
*skc
)
774 int size
, align
= skc
->skc_obj_align
;
777 /* Guesses for reasonable magazine sizes, they
778 * should really adapt based on observed usage. */
779 if (P2ROUNDUP(skc
->skc_obj_size
, align
) > (PAGE_SIZE
* 256))
781 else if (P2ROUNDUP(skc
->skc_obj_size
, align
) > (PAGE_SIZE
* 32))
783 else if (P2ROUNDUP(skc
->skc_obj_size
, align
) > (PAGE_SIZE
))
785 else if (P2ROUNDUP(skc
->skc_obj_size
, align
) > (PAGE_SIZE
/ 4))
793 static spl_kmem_magazine_t
*
794 spl_magazine_alloc(spl_kmem_cache_t
*skc
, int node
)
796 spl_kmem_magazine_t
*skm
;
797 int size
= sizeof(spl_kmem_magazine_t
) +
798 sizeof(void *) * skc
->skc_mag_size
;
801 skm
= kmem_alloc_node(size
, GFP_KERNEL
, node
);
803 skm
->skm_magic
= SKM_MAGIC
;
805 skm
->skm_size
= skc
->skc_mag_size
;
806 skm
->skm_refill
= skc
->skc_mag_refill
;
807 if (!(skc
->skc_flags
& KMC_NOTOUCH
))
808 skm
->skm_age
= jiffies
;
815 spl_magazine_free(spl_kmem_magazine_t
*skm
)
817 int size
= sizeof(spl_kmem_magazine_t
) +
818 sizeof(void *) * skm
->skm_size
;
821 ASSERT(skm
->skm_magic
== SKM_MAGIC
);
822 ASSERT(skm
->skm_avail
== 0);
824 kmem_free(skm
, size
);
829 spl_magazine_create(spl_kmem_cache_t
*skc
)
834 skc
->skc_mag_size
= spl_magazine_size(skc
);
835 skc
->skc_mag_refill
= (skc
->skc_mag_size
+ 1) / 2;
837 for_each_online_cpu(i
) {
838 skc
->skc_mag
[i
] = spl_magazine_alloc(skc
, cpu_to_node(i
));
839 if (!skc
->skc_mag
[i
]) {
840 for (i
--; i
>= 0; i
--)
841 spl_magazine_free(skc
->skc_mag
[i
]);
851 spl_magazine_destroy(spl_kmem_cache_t
*skc
)
853 spl_kmem_magazine_t
*skm
;
857 for_each_online_cpu(i
) {
858 skm
= skc
->skc_mag
[i
];
859 (void)spl_cache_flush(skc
, skm
, skm
->skm_avail
);
860 spl_magazine_free(skm
);
867 spl_kmem_cache_create(char *name
, size_t size
, size_t align
,
868 spl_kmem_ctor_t ctor
,
869 spl_kmem_dtor_t dtor
,
870 spl_kmem_reclaim_t reclaim
,
871 void *priv
, void *vmp
, int flags
)
873 spl_kmem_cache_t
*skc
;
874 int rc
, kmem_flags
= KM_SLEEP
;
877 ASSERTF(!(flags
& KMC_NOMAGAZINE
), "Bad KMC_NOMAGAZINE (%x)\n", flags
);
878 ASSERTF(!(flags
& KMC_NOHASH
), "Bad KMC_NOHASH (%x)\n", flags
);
879 ASSERTF(!(flags
& KMC_QCACHE
), "Bad KMC_QCACHE (%x)\n", flags
);
882 /* We may be called when there is a non-zero preempt_count or
883 * interrupts are disabled is which case we must not sleep.
885 if (current_thread_info()->preempt_count
|| irqs_disabled())
886 kmem_flags
= KM_NOSLEEP
;
888 /* Allocate new cache memory and initialize. */
889 skc
= (spl_kmem_cache_t
*)kmem_zalloc(sizeof(*skc
), kmem_flags
);
893 skc
->skc_magic
= SKC_MAGIC
;
894 skc
->skc_name_size
= strlen(name
) + 1;
895 skc
->skc_name
= (char *)kmem_alloc(skc
->skc_name_size
, kmem_flags
);
896 if (skc
->skc_name
== NULL
) {
897 kmem_free(skc
, sizeof(*skc
));
900 strncpy(skc
->skc_name
, name
, skc
->skc_name_size
);
902 skc
->skc_ctor
= ctor
;
903 skc
->skc_dtor
= dtor
;
904 skc
->skc_reclaim
= reclaim
;
905 skc
->skc_private
= priv
;
907 skc
->skc_flags
= flags
;
908 skc
->skc_obj_size
= size
;
909 skc
->skc_obj_align
= SPL_KMEM_CACHE_ALIGN
;
910 skc
->skc_delay
= SPL_KMEM_CACHE_DELAY
;
912 INIT_LIST_HEAD(&skc
->skc_list
);
913 INIT_LIST_HEAD(&skc
->skc_complete_list
);
914 INIT_LIST_HEAD(&skc
->skc_partial_list
);
915 spin_lock_init(&skc
->skc_lock
);
916 skc
->skc_slab_fail
= 0;
917 skc
->skc_slab_create
= 0;
918 skc
->skc_slab_destroy
= 0;
919 skc
->skc_slab_total
= 0;
920 skc
->skc_slab_alloc
= 0;
921 skc
->skc_slab_max
= 0;
922 skc
->skc_obj_total
= 0;
923 skc
->skc_obj_alloc
= 0;
924 skc
->skc_obj_max
= 0;
927 ASSERT((align
& (align
- 1)) == 0); /* Power of two */
928 ASSERT(align
>= SPL_KMEM_CACHE_ALIGN
); /* Minimum size */
929 skc
->skc_obj_align
= align
;
932 /* If none passed select a cache type based on object size */
933 if (!(skc
->skc_flags
& (KMC_KMEM
| KMC_VMEM
))) {
934 if (P2ROUNDUP(skc
->skc_obj_size
, skc
->skc_obj_align
) <
936 skc
->skc_flags
|= KMC_KMEM
;
938 skc
->skc_flags
|= KMC_VMEM
;
942 rc
= spl_slab_size(skc
, &skc
->skc_slab_objs
, &skc
->skc_slab_size
);
946 rc
= spl_magazine_create(skc
);
950 down_write(&spl_kmem_cache_sem
);
951 list_add_tail(&skc
->skc_list
, &spl_kmem_cache_list
);
952 up_write(&spl_kmem_cache_sem
);
956 kmem_free(skc
->skc_name
, skc
->skc_name_size
);
957 kmem_free(skc
, sizeof(*skc
));
960 EXPORT_SYMBOL(spl_kmem_cache_create
);
963 spl_kmem_cache_destroy(spl_kmem_cache_t
*skc
)
965 spl_kmem_slab_t
*sks
, *m
;
968 ASSERT(skc
->skc_magic
== SKC_MAGIC
);
970 down_write(&spl_kmem_cache_sem
);
971 list_del_init(&skc
->skc_list
);
972 up_write(&spl_kmem_cache_sem
);
974 spl_magazine_destroy(skc
);
975 spin_lock(&skc
->skc_lock
);
977 /* Validate there are no objects in use and free all the
978 * spl_kmem_slab_t, spl_kmem_obj_t, and object buffers. */
979 ASSERT(list_empty(&skc
->skc_complete_list
));
980 ASSERT(skc
->skc_slab_alloc
== 0);
981 ASSERT(skc
->skc_obj_alloc
== 0);
983 list_for_each_entry_safe(sks
, m
, &skc
->skc_partial_list
, sks_list
)
986 ASSERT(skc
->skc_slab_total
== 0);
987 ASSERT(skc
->skc_obj_total
== 0);
989 kmem_free(skc
->skc_name
, skc
->skc_name_size
);
990 spin_unlock(&skc
->skc_lock
);
992 kmem_free(skc
, sizeof(*skc
));
996 EXPORT_SYMBOL(spl_kmem_cache_destroy
);
999 spl_cache_obj(spl_kmem_cache_t
*skc
, spl_kmem_slab_t
*sks
)
1001 spl_kmem_obj_t
*sko
;
1003 ASSERT(skc
->skc_magic
== SKC_MAGIC
);
1004 ASSERT(sks
->sks_magic
== SKS_MAGIC
);
1005 ASSERT(spin_is_locked(&skc
->skc_lock
));
1007 sko
= list_entry(sks
->sks_free_list
.next
, spl_kmem_obj_t
, sko_list
);
1008 ASSERT(sko
->sko_magic
== SKO_MAGIC
);
1009 ASSERT(sko
->sko_addr
!= NULL
);
1011 /* Remove from sks_free_list */
1012 list_del_init(&sko
->sko_list
);
1014 sks
->sks_age
= jiffies
;
1016 skc
->skc_obj_alloc
++;
1018 /* Track max obj usage statistics */
1019 if (skc
->skc_obj_alloc
> skc
->skc_obj_max
)
1020 skc
->skc_obj_max
= skc
->skc_obj_alloc
;
1022 /* Track max slab usage statistics */
1023 if (sks
->sks_ref
== 1) {
1024 skc
->skc_slab_alloc
++;
1026 if (skc
->skc_slab_alloc
> skc
->skc_slab_max
)
1027 skc
->skc_slab_max
= skc
->skc_slab_alloc
;
1030 return sko
->sko_addr
;
1033 /* No available objects create a new slab. Since this is an
1034 * expensive operation we do it without holding the spinlock
1035 * and only briefly aquire it when we link in the fully
1036 * allocated and constructed slab.
1038 static spl_kmem_slab_t
*
1039 spl_cache_grow(spl_kmem_cache_t
*skc
, int flags
)
1041 spl_kmem_slab_t
*sks
;
1044 ASSERT(skc
->skc_magic
== SKC_MAGIC
);
1046 if (flags
& __GFP_WAIT
) {
1047 flags
|= __GFP_NOFAIL
;
1052 sks
= spl_slab_alloc(skc
, flags
);
1054 if (flags
& __GFP_WAIT
)
1055 local_irq_disable();
1060 if (flags
& __GFP_WAIT
)
1061 local_irq_disable();
1063 /* Link the new empty slab in to the end of skc_partial_list */
1064 spin_lock(&skc
->skc_lock
);
1065 skc
->skc_slab_total
++;
1066 skc
->skc_obj_total
+= sks
->sks_objs
;
1067 list_add_tail(&sks
->sks_list
, &skc
->skc_partial_list
);
1068 spin_unlock(&skc
->skc_lock
);
1074 spl_cache_refill(spl_kmem_cache_t
*skc
, spl_kmem_magazine_t
*skm
, int flags
)
1076 spl_kmem_slab_t
*sks
;
1080 ASSERT(skc
->skc_magic
== SKC_MAGIC
);
1081 ASSERT(skm
->skm_magic
== SKM_MAGIC
);
1083 /* XXX: Check for refill bouncing by age perhaps */
1084 refill
= MIN(skm
->skm_refill
, skm
->skm_size
- skm
->skm_avail
);
1086 spin_lock(&skc
->skc_lock
);
1088 while (refill
> 0) {
1089 /* No slabs available we must grow the cache */
1090 if (list_empty(&skc
->skc_partial_list
)) {
1091 spin_unlock(&skc
->skc_lock
);
1093 sks
= spl_cache_grow(skc
, flags
);
1097 /* Rescheduled to different CPU skm is not local */
1098 if (skm
!= skc
->skc_mag
[smp_processor_id()])
1101 /* Potentially rescheduled to the same CPU but
1102 * allocations may have occured from this CPU while
1103 * we were sleeping so recalculate max refill. */
1104 refill
= MIN(refill
, skm
->skm_size
- skm
->skm_avail
);
1106 spin_lock(&skc
->skc_lock
);
1110 /* Grab the next available slab */
1111 sks
= list_entry((&skc
->skc_partial_list
)->next
,
1112 spl_kmem_slab_t
, sks_list
);
1113 ASSERT(sks
->sks_magic
== SKS_MAGIC
);
1114 ASSERT(sks
->sks_ref
< sks
->sks_objs
);
1115 ASSERT(!list_empty(&sks
->sks_free_list
));
1117 /* Consume as many objects as needed to refill the requested
1118 * cache. We must also be careful not to overfill it. */
1119 while (sks
->sks_ref
< sks
->sks_objs
&& refill
-- > 0 && ++rc
) {
1120 ASSERT(skm
->skm_avail
< skm
->skm_size
);
1121 ASSERT(rc
< skm
->skm_size
);
1122 skm
->skm_objs
[skm
->skm_avail
++]=spl_cache_obj(skc
,sks
);
1125 /* Move slab to skc_complete_list when full */
1126 if (sks
->sks_ref
== sks
->sks_objs
) {
1127 list_del(&sks
->sks_list
);
1128 list_add(&sks
->sks_list
, &skc
->skc_complete_list
);
1132 spin_unlock(&skc
->skc_lock
);
1134 /* Returns the number of entries added to cache */
1139 spl_cache_shrink(spl_kmem_cache_t
*skc
, void *obj
)
1141 spl_kmem_slab_t
*sks
= NULL
;
1142 spl_kmem_obj_t
*sko
= NULL
;
1145 ASSERT(skc
->skc_magic
== SKC_MAGIC
);
1146 ASSERT(spin_is_locked(&skc
->skc_lock
));
1148 sko
= obj
+ P2ROUNDUP(skc
->skc_obj_size
, skc
->skc_obj_align
);
1149 ASSERT(sko
->sko_magic
== SKO_MAGIC
);
1151 sks
= sko
->sko_slab
;
1152 ASSERT(sks
->sks_magic
== SKS_MAGIC
);
1153 ASSERT(sks
->sks_cache
== skc
);
1154 list_add(&sko
->sko_list
, &sks
->sks_free_list
);
1156 sks
->sks_age
= jiffies
;
1158 skc
->skc_obj_alloc
--;
1160 /* Move slab to skc_partial_list when no longer full. Slabs
1161 * are added to the head to keep the partial list is quasi-full
1162 * sorted order. Fuller at the head, emptier at the tail. */
1163 if (sks
->sks_ref
== (sks
->sks_objs
- 1)) {
1164 list_del(&sks
->sks_list
);
1165 list_add(&sks
->sks_list
, &skc
->skc_partial_list
);
1168 /* Move emply slabs to the end of the partial list so
1169 * they can be easily found and freed during reclamation. */
1170 if (sks
->sks_ref
== 0) {
1171 list_del(&sks
->sks_list
);
1172 list_add_tail(&sks
->sks_list
, &skc
->skc_partial_list
);
1173 skc
->skc_slab_alloc
--;
1180 spl_cache_flush(spl_kmem_cache_t
*skc
, spl_kmem_magazine_t
*skm
, int flush
)
1182 int i
, count
= MIN(flush
, skm
->skm_avail
);
1185 ASSERT(skc
->skc_magic
== SKC_MAGIC
);
1186 ASSERT(skm
->skm_magic
== SKM_MAGIC
);
1188 spin_lock(&skc
->skc_lock
);
1190 for (i
= 0; i
< count
; i
++)
1191 spl_cache_shrink(skc
, skm
->skm_objs
[i
]);
1193 // __spl_slab_reclaim(skc);
1194 skm
->skm_avail
-= count
;
1195 memmove(skm
->skm_objs
, &(skm
->skm_objs
[count
]),
1196 sizeof(void *) * skm
->skm_avail
);
1198 spin_unlock(&skc
->skc_lock
);
1204 spl_kmem_cache_alloc(spl_kmem_cache_t
*skc
, int flags
)
1206 spl_kmem_magazine_t
*skm
;
1207 unsigned long irq_flags
;
1211 ASSERT(skc
->skc_magic
== SKC_MAGIC
);
1212 ASSERT(flags
& KM_SLEEP
); /* XXX: KM_NOSLEEP not yet supported */
1213 local_irq_save(irq_flags
);
1216 /* Safe to update per-cpu structure without lock, but
1217 * in the restart case we must be careful to reaquire
1218 * the local magazine since this may have changed
1219 * when we need to grow the cache. */
1220 skm
= skc
->skc_mag
[smp_processor_id()];
1221 ASSERTF(skm
->skm_magic
== SKM_MAGIC
, "%x != %x: %s/%p/%p %x/%x/%x\n",
1222 skm
->skm_magic
, SKM_MAGIC
, skc
->skc_name
, skc
, skm
,
1223 skm
->skm_size
, skm
->skm_refill
, skm
->skm_avail
);
1225 if (likely(skm
->skm_avail
)) {
1226 /* Object available in CPU cache, use it */
1227 obj
= skm
->skm_objs
[--skm
->skm_avail
];
1228 if (!(skc
->skc_flags
& KMC_NOTOUCH
))
1229 skm
->skm_age
= jiffies
;
1231 /* Per-CPU cache empty, directly allocate from
1232 * the slab and refill the per-CPU cache. */
1233 (void)spl_cache_refill(skc
, skm
, flags
);
1234 GOTO(restart
, obj
= NULL
);
1237 local_irq_restore(irq_flags
);
1239 ASSERT(((unsigned long)(obj
) % skc
->skc_obj_align
) == 0);
1241 /* Pre-emptively migrate object to CPU L1 cache */
1246 EXPORT_SYMBOL(spl_kmem_cache_alloc
);
1249 spl_kmem_cache_free(spl_kmem_cache_t
*skc
, void *obj
)
1251 spl_kmem_magazine_t
*skm
;
1252 unsigned long flags
;
1255 ASSERT(skc
->skc_magic
== SKC_MAGIC
);
1256 local_irq_save(flags
);
1258 /* Safe to update per-cpu structure without lock, but
1259 * no remote memory allocation tracking is being performed
1260 * it is entirely possible to allocate an object from one
1261 * CPU cache and return it to another. */
1262 skm
= skc
->skc_mag
[smp_processor_id()];
1263 ASSERT(skm
->skm_magic
== SKM_MAGIC
);
1265 /* Per-CPU cache full, flush it to make space */
1266 if (unlikely(skm
->skm_avail
>= skm
->skm_size
))
1267 (void)spl_cache_flush(skc
, skm
, skm
->skm_refill
);
1269 /* Available space in cache, use it */
1270 skm
->skm_objs
[skm
->skm_avail
++] = obj
;
1272 local_irq_restore(flags
);
1276 EXPORT_SYMBOL(spl_kmem_cache_free
);
1279 spl_kmem_cache_generic_shrinker(int nr_to_scan
, unsigned int gfp_mask
)
1281 spl_kmem_cache_t
*skc
;
1283 /* Under linux a shrinker is not tightly coupled with a slab
1284 * cache. In fact linux always systematically trys calling all
1285 * registered shrinker callbacks until its target reclamation level
1286 * is reached. Because of this we only register one shrinker
1287 * function in the shim layer for all slab caches. And we always
1288 * attempt to shrink all caches when this generic shrinker is called.
1290 down_read(&spl_kmem_cache_sem
);
1292 list_for_each_entry(skc
, &spl_kmem_cache_list
, skc_list
)
1293 spl_kmem_cache_reap_now(skc
);
1295 up_read(&spl_kmem_cache_sem
);
1297 /* XXX: Under linux we should return the remaining number of
1298 * entries in the cache. We should do this as well.
1304 spl_kmem_cache_reap_now(spl_kmem_cache_t
*skc
)
1306 spl_kmem_magazine_t
*skm
;
1310 ASSERT(skc
->skc_magic
== SKC_MAGIC
);
1312 if (skc
->skc_reclaim
)
1313 skc
->skc_reclaim(skc
->skc_private
);
1315 /* Ensure per-CPU caches which are idle gradually flush */
1316 for_each_online_cpu(i
) {
1317 skm
= skc
->skc_mag
[i
];
1319 if (time_after(jiffies
, skm
->skm_age
+ skc
->skc_delay
* HZ
))
1320 (void)spl_cache_flush(skc
, skm
, skm
->skm_refill
);
1323 spl_slab_reclaim(skc
);
1327 EXPORT_SYMBOL(spl_kmem_cache_reap_now
);
1332 spl_kmem_cache_generic_shrinker(KMC_REAP_CHUNK
, GFP_KERNEL
);
1334 EXPORT_SYMBOL(spl_kmem_reap
);
1336 #if defined(DEBUG_KMEM) && defined(DEBUG_KMEM_TRACKING)
1338 spl_sprintf_addr(kmem_debug_t
*kd
, char *str
, int len
, int min
)
1340 int size
= ((len
- 1) < kd
->kd_size
) ? (len
- 1) : kd
->kd_size
;
1343 ASSERT(str
!= NULL
&& len
>= 17);
1344 memset(str
, 0, len
);
1346 /* Check for a fully printable string, and while we are at
1347 * it place the printable characters in the passed buffer. */
1348 for (i
= 0; i
< size
; i
++) {
1349 str
[i
] = ((char *)(kd
->kd_addr
))[i
];
1350 if (isprint(str
[i
])) {
1353 /* Minimum number of printable characters found
1354 * to make it worthwhile to print this as ascii. */
1364 sprintf(str
, "%02x%02x%02x%02x%02x%02x%02x%02x",
1365 *((uint8_t *)kd
->kd_addr
),
1366 *((uint8_t *)kd
->kd_addr
+ 2),
1367 *((uint8_t *)kd
->kd_addr
+ 4),
1368 *((uint8_t *)kd
->kd_addr
+ 6),
1369 *((uint8_t *)kd
->kd_addr
+ 8),
1370 *((uint8_t *)kd
->kd_addr
+ 10),
1371 *((uint8_t *)kd
->kd_addr
+ 12),
1372 *((uint8_t *)kd
->kd_addr
+ 14));
1379 spl_kmem_init_tracking(struct list_head
*list
, spinlock_t
*lock
, int size
)
1384 spin_lock_init(lock
);
1385 INIT_LIST_HEAD(list
);
1387 for (i
= 0; i
< size
; i
++)
1388 INIT_HLIST_HEAD(&kmem_table
[i
]);
1394 spl_kmem_fini_tracking(struct list_head
*list
, spinlock_t
*lock
)
1396 unsigned long flags
;
1401 spin_lock_irqsave(lock
, flags
);
1402 if (!list_empty(list
))
1403 printk(KERN_WARNING
"%-16s %-5s %-16s %s:%s\n", "address",
1404 "size", "data", "func", "line");
1406 list_for_each_entry(kd
, list
, kd_list
)
1407 printk(KERN_WARNING
"%p %-5d %-16s %s:%d\n", kd
->kd_addr
,
1408 (int)kd
->kd_size
, spl_sprintf_addr(kd
, str
, 17, 8),
1409 kd
->kd_func
, kd
->kd_line
);
1411 spin_unlock_irqrestore(lock
, flags
);
1414 #else /* DEBUG_KMEM && DEBUG_KMEM_TRACKING */
1415 #define spl_kmem_init_tracking(list, lock, size)
1416 #define spl_kmem_fini_tracking(list, lock)
1417 #endif /* DEBUG_KMEM && DEBUG_KMEM_TRACKING */
1425 init_rwsem(&spl_kmem_cache_sem
);
1426 INIT_LIST_HEAD(&spl_kmem_cache_list
);
1428 #ifdef HAVE_SET_SHRINKER
1429 spl_kmem_cache_shrinker
= set_shrinker(KMC_DEFAULT_SEEKS
,
1430 spl_kmem_cache_generic_shrinker
);
1431 if (spl_kmem_cache_shrinker
== NULL
)
1432 RETURN(rc
= -ENOMEM
);
1434 register_shrinker(&spl_kmem_cache_shrinker
);
1438 atomic64_set(&kmem_alloc_used
, 0);
1439 atomic64_set(&vmem_alloc_used
, 0);
1441 spl_kmem_init_tracking(&kmem_list
, &kmem_lock
, KMEM_TABLE_SIZE
);
1442 spl_kmem_init_tracking(&vmem_list
, &vmem_lock
, VMEM_TABLE_SIZE
);
1451 /* Display all unreclaimed memory addresses, including the
1452 * allocation size and the first few bytes of what's located
1453 * at that address to aid in debugging. Performance is not
1454 * a serious concern here since it is module unload time. */
1455 if (atomic64_read(&kmem_alloc_used
) != 0)
1456 CWARN("kmem leaked %ld/%ld bytes\n",
1457 atomic64_read(&kmem_alloc_used
), kmem_alloc_max
);
1460 if (atomic64_read(&vmem_alloc_used
) != 0)
1461 CWARN("vmem leaked %ld/%ld bytes\n",
1462 atomic64_read(&vmem_alloc_used
), vmem_alloc_max
);
1464 spl_kmem_fini_tracking(&kmem_list
, &kmem_lock
);
1465 spl_kmem_fini_tracking(&vmem_list
, &vmem_lock
);
1466 #endif /* DEBUG_KMEM */
1469 #ifdef HAVE_SET_SHRINKER
1470 remove_shrinker(spl_kmem_cache_shrinker
);
1472 unregister_shrinker(&spl_kmem_cache_shrinker
);