]>
Commit | Line | Data |
---|---|---|
715f6251 BB |
1 | /* |
2 | * This file is part of the SPL: Solaris Porting Layer. | |
3 | * | |
4 | * Copyright (c) 2008 Lawrence Livermore National Security, LLC. | |
5 | * Produced at Lawrence Livermore National Laboratory | |
6 | * Written by: | |
7 | * Brian Behlendorf <behlendorf1@llnl.gov>, | |
8 | * Herb Wartens <wartens2@llnl.gov>, | |
9 | * Jim Garlick <garlick@llnl.gov> | |
10 | * UCRL-CODE-235197 | |
11 | * | |
12 | * This is free software; you can redistribute it and/or modify it | |
13 | * under the terms of the GNU General Public License as published by | |
14 | * the Free Software Foundation; either version 2 of the License, or | |
15 | * (at your option) any later version. | |
16 | * | |
17 | * This is distributed in the hope that it will be useful, but WITHOUT | |
18 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
19 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
20 | * for more details. | |
21 | * | |
22 | * You should have received a copy of the GNU General Public License along | |
23 | * with this program; if not, write to the Free Software Foundation, Inc., | |
24 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | |
25 | */ | |
26 | ||
f4b37741 | 27 | #include <sys/kmem.h> |
f1ca4da6 | 28 | |
937879f1 BB |
29 | #ifdef DEBUG_SUBSYSTEM |
30 | #undef DEBUG_SUBSYSTEM | |
31 | #endif | |
32 | ||
33 | #define DEBUG_SUBSYSTEM S_KMEM | |
34 | ||
f1ca4da6 | 35 | /* |
2fb9b26a BB |
36 | * Memory allocation interfaces and debugging for basic kmem_* |
37 | * and vmem_* style memory allocation. When DEBUG_KMEM is enable | |
38 | * all allocations will be tracked when they are allocated and | |
39 | * freed. When the SPL module is unload a list of all leaked | |
40 | * addresses and where they were allocated will be dumped to the | |
41 | * console. Enabling this feature has a significant impant on | |
42 | * performance but it makes finding memory leaks staight forward. | |
f1ca4da6 BB |
43 | */ |
44 | #ifdef DEBUG_KMEM | |
45 | /* Shim layer memory accounting */ | |
c19c06f3 BB |
46 | atomic64_t kmem_alloc_used; |
47 | unsigned long kmem_alloc_max = 0; | |
48 | atomic64_t vmem_alloc_used; | |
49 | unsigned long vmem_alloc_max = 0; | |
50 | int kmem_warning_flag = 1; | |
5c2bb9b2 | 51 | atomic64_t kmem_cache_alloc_failed; |
79b31f36 | 52 | |
d6a26c6a BB |
53 | spinlock_t kmem_lock; |
54 | struct hlist_head kmem_table[KMEM_TABLE_SIZE]; | |
55 | struct list_head kmem_list; | |
56 | ||
13cdca65 BB |
57 | spinlock_t vmem_lock; |
58 | struct hlist_head vmem_table[VMEM_TABLE_SIZE]; | |
59 | struct list_head vmem_list; | |
60 | ||
79b31f36 BB |
61 | EXPORT_SYMBOL(kmem_alloc_used); |
62 | EXPORT_SYMBOL(kmem_alloc_max); | |
63 | EXPORT_SYMBOL(vmem_alloc_used); | |
64 | EXPORT_SYMBOL(vmem_alloc_max); | |
c19c06f3 BB |
65 | EXPORT_SYMBOL(kmem_warning_flag); |
66 | ||
d6a26c6a BB |
67 | EXPORT_SYMBOL(kmem_lock); |
68 | EXPORT_SYMBOL(kmem_table); | |
69 | EXPORT_SYMBOL(kmem_list); | |
70 | ||
13cdca65 BB |
71 | EXPORT_SYMBOL(vmem_lock); |
72 | EXPORT_SYMBOL(vmem_table); | |
73 | EXPORT_SYMBOL(vmem_list); | |
74 | ||
c19c06f3 BB |
75 | int kmem_set_warning(int flag) { return (kmem_warning_flag = !!flag); } |
76 | #else | |
77 | int kmem_set_warning(int flag) { return 0; } | |
f1ca4da6 | 78 | #endif |
c19c06f3 | 79 | EXPORT_SYMBOL(kmem_set_warning); |
f1ca4da6 BB |
80 | |
81 | /* | |
82 | * Slab allocation interfaces | |
83 | * | |
2fb9b26a BB |
84 | * While the Linux slab implementation was inspired by the Solaris |
85 | * implemenation I cannot use it to emulate the Solaris APIs. I | |
86 | * require two features which are not provided by the Linux slab. | |
87 | * | |
88 | * 1) Constructors AND destructors. Recent versions of the Linux | |
89 | * kernel have removed support for destructors. This is a deal | |
90 | * breaker for the SPL which contains particularly expensive | |
91 | * initializers for mutex's, condition variables, etc. We also | |
92 | * require a minimal level of cleaner for these data types unlike | |
93 | * may Linux data type which do need to be explicitly destroyed. | |
94 | * | |
95 | * 2) Virtual address backed slab. Callers of the Solaris slab | |
96 | * expect it to work well for both small are very large allocations. | |
97 | * Because of memory fragmentation the Linux slab which is backed | |
98 | * by kmalloc'ed memory performs very badly when confronted with | |
99 | * large numbers of large allocations. Basing the slab on the | |
100 | * virtual address space removes the need for contigeous pages | |
101 | * and greatly improve performance for large allocations. | |
102 | * | |
103 | * For these reasons, the SPL has its own slab implementation with | |
104 | * the needed features. It is not as highly optimized as either the | |
105 | * Solaris or Linux slabs, but it should get me most of what is | |
106 | * needed until it can be optimized or obsoleted by another approach. | |
107 | * | |
108 | * One serious concern I do have about this method is the relatively | |
109 | * small virtual address space on 32bit arches. This will seriously | |
110 | * constrain the size of the slab caches and their performance. | |
111 | * | |
2fb9b26a BB |
112 | * XXX: Implement SPL proc interface to export full per cache stats. |
113 | * | |
114 | * XXX: Implement work requests to keep an eye on each cache and | |
4afaaefa | 115 | * shrink them via spl_slab_reclaim() when they are wasting lots |
2fb9b26a BB |
116 | * of space. Currently this process is driven by the reapers. |
117 | * | |
118 | * XXX: Implement proper small cache object support by embedding | |
119 | * the spl_kmem_slab_t, spl_kmem_obj_t's, and objects in the | |
120 | * allocated for a particular slab. | |
121 | * | |
122 | * XXX: Implement a resizable used object hash. Currently the hash | |
123 | * is statically sized for thousands of objects but it should | |
124 | * grow based on observed worst case slab depth. | |
125 | * | |
126 | * XXX: Improve the partial slab list by carefully maintaining a | |
127 | * strict ordering of fullest to emptiest slabs based on | |
128 | * the slab reference count. This gaurentees the when freeing | |
129 | * slabs back to the system we need only linearly traverse the | |
130 | * last N slabs in the list to discover all the freeable slabs. | |
131 | * | |
132 | * XXX: NUMA awareness for optionally allocating memory close to a | |
133 | * particular core. This can be adventageous if you know the slab | |
134 | * object will be short lived and primarily accessed from one core. | |
135 | * | |
136 | * XXX: Slab coloring may also yield performance improvements and would | |
137 | * be desirable to implement. | |
4afaaefa BB |
138 | * |
139 | * XXX: Proper hardware cache alignment would be good too. | |
f1ca4da6 | 140 | */ |
2fb9b26a BB |
141 | |
142 | /* Ensure the __kmem_cache_create/__kmem_cache_destroy macros are | |
143 | * removed here to prevent a recursive substitution, we want to call | |
144 | * the native linux version. | |
145 | */ | |
146 | #undef kmem_cache_t | |
147 | #undef kmem_cache_create | |
148 | #undef kmem_cache_destroy | |
149 | #undef kmem_cache_alloc | |
150 | #undef kmem_cache_free | |
151 | ||
152 | static struct list_head spl_kmem_cache_list; /* List of caches */ | |
153 | static struct rw_semaphore spl_kmem_cache_sem; /* Cache list lock */ | |
154 | static kmem_cache_t *spl_slab_cache; /* Cache for slab structs */ | |
155 | static kmem_cache_t *spl_obj_cache; /* Cache for obj structs */ | |
c30df9c8 | 156 | |
4afaaefa BB |
157 | static int spl_cache_flush(spl_kmem_cache_t *skc, |
158 | spl_kmem_magazine_t *skm, int flush); | |
159 | ||
57d86234 | 160 | #ifdef HAVE_SET_SHRINKER |
2fb9b26a | 161 | static struct shrinker *spl_kmem_cache_shrinker; |
57d86234 | 162 | #else |
4afaaefa BB |
163 | static int spl_kmem_cache_generic_shrinker(int nr_to_scan, |
164 | unsigned int gfp_mask); | |
2fb9b26a | 165 | static struct shrinker spl_kmem_cache_shrinker = { |
4afaaefa | 166 | .shrink = spl_kmem_cache_generic_shrinker, |
57d86234 BB |
167 | .seeks = KMC_DEFAULT_SEEKS, |
168 | }; | |
169 | #endif | |
f1ca4da6 | 170 | |
2fb9b26a | 171 | static spl_kmem_slab_t * |
4afaaefa | 172 | spl_slab_alloc(spl_kmem_cache_t *skc, int flags) { |
2fb9b26a BB |
173 | spl_kmem_slab_t *sks; |
174 | spl_kmem_obj_t *sko, *n; | |
175 | int i; | |
176 | ENTRY; | |
f1ca4da6 | 177 | |
2fb9b26a BB |
178 | sks = kmem_cache_alloc(spl_slab_cache, flags); |
179 | if (sks == NULL) | |
180 | RETURN(sks); | |
181 | ||
182 | sks->sks_magic = SKS_MAGIC; | |
183 | sks->sks_objs = SPL_KMEM_CACHE_OBJ_PER_SLAB; | |
184 | sks->sks_age = jiffies; | |
185 | sks->sks_cache = skc; | |
186 | INIT_LIST_HEAD(&sks->sks_list); | |
187 | INIT_LIST_HEAD(&sks->sks_free_list); | |
4afaaefa | 188 | sks->sks_ref = 0; |
2fb9b26a BB |
189 | |
190 | for (i = 0; i < sks->sks_objs; i++) { | |
191 | sko = kmem_cache_alloc(spl_obj_cache, flags); | |
192 | if (sko == NULL) { | |
193 | out_alloc: | |
194 | /* Unable to fully construct slab, objects, | |
195 | * and object data buffers unwind everything. | |
196 | */ | |
197 | list_for_each_entry_safe(sko, n, &sks->sks_free_list, | |
198 | sko_list) { | |
199 | ASSERT(sko->sko_magic == SKO_MAGIC); | |
200 | vmem_free(sko->sko_addr, skc->skc_obj_size); | |
201 | list_del(&sko->sko_list); | |
202 | kmem_cache_free(spl_obj_cache, sko); | |
203 | } | |
204 | ||
205 | kmem_cache_free(spl_slab_cache, sks); | |
206 | GOTO(out, sks = NULL); | |
207 | } | |
f1ca4da6 | 208 | |
2fb9b26a BB |
209 | sko->sko_addr = vmem_alloc(skc->skc_obj_size, flags); |
210 | if (sko->sko_addr == NULL) { | |
211 | kmem_cache_free(spl_obj_cache, sko); | |
212 | GOTO(out_alloc, sks = NULL); | |
213 | } | |
f1ca4da6 | 214 | |
2fb9b26a BB |
215 | sko->sko_magic = SKO_MAGIC; |
216 | sko->sko_flags = 0; | |
217 | sko->sko_slab = sks; | |
218 | INIT_LIST_HEAD(&sko->sko_list); | |
219 | INIT_HLIST_NODE(&sko->sko_hlist); | |
220 | list_add(&sko->sko_list, &sks->sks_free_list); | |
d6a26c6a | 221 | } |
2fb9b26a BB |
222 | out: |
223 | RETURN(sks); | |
f1ca4da6 BB |
224 | } |
225 | ||
2fb9b26a | 226 | /* Removes slab from complete or partial list, so it must |
d46630e0 | 227 | * be called with the 'skc->skc_lock' held. |
2fb9b26a | 228 | * */ |
f1ca4da6 | 229 | static void |
4afaaefa | 230 | spl_slab_free(spl_kmem_slab_t *sks) { |
2fb9b26a BB |
231 | spl_kmem_cache_t *skc; |
232 | spl_kmem_obj_t *sko, *n; | |
233 | int i = 0; | |
234 | ENTRY; | |
57d86234 | 235 | |
2fb9b26a | 236 | ASSERT(sks->sks_magic == SKS_MAGIC); |
4afaaefa | 237 | ASSERT(sks->sks_ref == 0); |
2fb9b26a BB |
238 | skc = sks->sks_cache; |
239 | skc->skc_obj_total -= sks->sks_objs; | |
240 | skc->skc_slab_total--; | |
d6a26c6a | 241 | |
d46630e0 | 242 | ASSERT(spin_is_locked(&skc->skc_lock)); |
f1ca4da6 | 243 | |
2fb9b26a BB |
244 | list_for_each_entry_safe(sko, n, &sks->sks_free_list, sko_list) { |
245 | ASSERT(sko->sko_magic == SKO_MAGIC); | |
937879f1 | 246 | |
2fb9b26a BB |
247 | /* Run destructors for being freed */ |
248 | if (skc->skc_dtor) | |
249 | skc->skc_dtor(sko->sko_addr, skc->skc_private); | |
0a6fd143 | 250 | |
2fb9b26a BB |
251 | vmem_free(sko->sko_addr, skc->skc_obj_size); |
252 | list_del(&sko->sko_list); | |
253 | kmem_cache_free(spl_obj_cache, sko); | |
254 | i++; | |
255 | } | |
d61e12af | 256 | |
2fb9b26a BB |
257 | ASSERT(sks->sks_objs == i); |
258 | list_del(&sks->sks_list); | |
259 | kmem_cache_free(spl_slab_cache, sks); | |
d61e12af | 260 | |
2fb9b26a BB |
261 | EXIT; |
262 | } | |
d6a26c6a | 263 | |
2fb9b26a | 264 | static int |
4afaaefa | 265 | __spl_slab_reclaim(spl_kmem_cache_t *skc) |
2fb9b26a BB |
266 | { |
267 | spl_kmem_slab_t *sks, *m; | |
268 | int rc = 0; | |
269 | ENTRY; | |
270 | ||
d46630e0 | 271 | ASSERT(spin_is_locked(&skc->skc_lock)); |
2fb9b26a BB |
272 | /* |
273 | * Free empty slabs which have not been touched in skc_delay | |
274 | * seconds. This delay time is important to avoid thrashing. | |
275 | * Empty slabs will be at the end of the skc_partial_list. | |
276 | */ | |
277 | list_for_each_entry_safe_reverse(sks, m, &skc->skc_partial_list, | |
278 | sks_list) { | |
4afaaefa | 279 | if (sks->sks_ref > 0) |
2fb9b26a BB |
280 | break; |
281 | ||
282 | if (time_after(jiffies, sks->sks_age + skc->skc_delay * HZ)) { | |
4afaaefa | 283 | spl_slab_free(sks); |
2fb9b26a BB |
284 | rc++; |
285 | } | |
286 | } | |
287 | ||
288 | /* Returns number of slabs reclaimed */ | |
289 | RETURN(rc); | |
f1ca4da6 BB |
290 | } |
291 | ||
2fb9b26a | 292 | static int |
4afaaefa | 293 | spl_slab_reclaim(spl_kmem_cache_t *skc) |
f1ca4da6 | 294 | { |
2fb9b26a BB |
295 | int rc; |
296 | ENTRY; | |
f1ca4da6 | 297 | |
d46630e0 | 298 | spin_lock(&skc->skc_lock); |
4afaaefa | 299 | rc = __spl_slab_reclaim(skc); |
d46630e0 | 300 | spin_unlock(&skc->skc_lock); |
4efd4118 | 301 | |
2fb9b26a BB |
302 | RETURN(rc); |
303 | } | |
f1ca4da6 | 304 | |
4afaaefa BB |
305 | static int |
306 | spl_magazine_size(spl_kmem_cache_t *skc) | |
307 | { | |
308 | int size; | |
309 | ENTRY; | |
310 | ||
311 | /* Guesses for reasonable magazine sizes, they | |
312 | * should really adapt based on observed usage. */ | |
313 | if (skc->skc_obj_size > (PAGE_SIZE * 256)) | |
314 | size = 1; | |
315 | else if (skc->skc_obj_size > (PAGE_SIZE * 32)) | |
316 | size = 4; | |
317 | else if (skc->skc_obj_size > (PAGE_SIZE)) | |
318 | size = 16; | |
319 | else if (skc->skc_obj_size > (PAGE_SIZE / 4)) | |
320 | size = 32; | |
321 | else if (skc->skc_obj_size > (PAGE_SIZE / 16)) | |
e9d7a2be | 322 | size = 48; |
4afaaefa | 323 | else |
e9d7a2be | 324 | size = 64; |
4afaaefa BB |
325 | |
326 | RETURN(size); | |
327 | } | |
328 | ||
329 | static spl_kmem_magazine_t * | |
330 | spl_magazine_alloc(spl_kmem_cache_t *skc, int node) | |
331 | { | |
332 | spl_kmem_magazine_t *skm; | |
333 | int size = sizeof(spl_kmem_magazine_t) + | |
334 | sizeof(void *) * skc->skc_mag_size; | |
335 | ENTRY; | |
336 | ||
337 | skm = kmalloc_node(size, GFP_KERNEL, node); | |
338 | if (skm) { | |
339 | skm->skm_magic = SKM_MAGIC; | |
340 | skm->skm_avail = 0; | |
341 | skm->skm_size = skc->skc_mag_size; | |
342 | skm->skm_refill = skc->skc_mag_refill; | |
343 | skm->skm_age = jiffies; | |
344 | } | |
345 | ||
346 | RETURN(skm); | |
347 | } | |
348 | ||
349 | static void | |
350 | spl_magazine_free(spl_kmem_magazine_t *skm) | |
351 | { | |
352 | ENTRY; | |
353 | ASSERT(skm->skm_magic == SKM_MAGIC); | |
354 | ASSERT(skm->skm_avail == 0); | |
355 | kfree(skm); | |
356 | EXIT; | |
357 | } | |
358 | ||
359 | static int | |
360 | spl_magazine_create(spl_kmem_cache_t *skc) | |
361 | { | |
362 | int i; | |
363 | ENTRY; | |
364 | ||
365 | skc->skc_mag_size = spl_magazine_size(skc); | |
366 | skc->skc_mag_refill = (skc->skc_mag_size + 1) / 2; | |
367 | ||
368 | for_each_online_cpu(i) { | |
369 | skc->skc_mag[i] = spl_magazine_alloc(skc, cpu_to_node(i)); | |
370 | if (!skc->skc_mag[i]) { | |
371 | for (i--; i >= 0; i--) | |
372 | spl_magazine_free(skc->skc_mag[i]); | |
373 | ||
374 | RETURN(-ENOMEM); | |
375 | } | |
376 | } | |
377 | ||
378 | RETURN(0); | |
379 | } | |
380 | ||
381 | static void | |
382 | spl_magazine_destroy(spl_kmem_cache_t *skc) | |
383 | { | |
384 | spl_kmem_magazine_t *skm; | |
385 | int i; | |
386 | ENTRY; | |
387 | ||
388 | for_each_online_cpu(i) { | |
389 | skm = skc->skc_mag[i]; | |
390 | (void)spl_cache_flush(skc, skm, skm->skm_avail); | |
391 | spl_magazine_free(skm); | |
392 | } | |
393 | ||
394 | EXIT; | |
395 | } | |
396 | ||
2fb9b26a BB |
397 | spl_kmem_cache_t * |
398 | spl_kmem_cache_create(char *name, size_t size, size_t align, | |
399 | spl_kmem_ctor_t ctor, | |
400 | spl_kmem_dtor_t dtor, | |
401 | spl_kmem_reclaim_t reclaim, | |
402 | void *priv, void *vmp, int flags) | |
403 | { | |
404 | spl_kmem_cache_t *skc; | |
4afaaefa | 405 | int i, rc, kmem_flags = KM_SLEEP; |
2fb9b26a | 406 | ENTRY; |
937879f1 | 407 | |
2fb9b26a BB |
408 | /* We may be called when there is a non-zero preempt_count or |
409 | * interrupts are disabled is which case we must not sleep. | |
410 | */ | |
e9d7a2be | 411 | if (current_thread_info()->preempt_count || irqs_disabled()) |
2fb9b26a | 412 | kmem_flags = KM_NOSLEEP; |
0a6fd143 | 413 | |
2fb9b26a | 414 | /* Allocate new cache memory and initialize. */ |
e9d7a2be BB |
415 | skc = (spl_kmem_cache_t *)kmem_alloc(sizeof(*skc), kmem_flags); |
416 | if (skc == NULL) | |
2fb9b26a | 417 | RETURN(NULL); |
d61e12af | 418 | |
2fb9b26a | 419 | skc->skc_magic = SKC_MAGIC; |
2fb9b26a BB |
420 | skc->skc_name_size = strlen(name) + 1; |
421 | skc->skc_name = (char *)kmem_alloc(skc->skc_name_size, kmem_flags); | |
422 | if (skc->skc_name == NULL) { | |
423 | kmem_free(skc, sizeof(*skc)); | |
424 | RETURN(NULL); | |
425 | } | |
426 | strncpy(skc->skc_name, name, skc->skc_name_size); | |
427 | ||
e9d7a2be BB |
428 | skc->skc_ctor = ctor; |
429 | skc->skc_dtor = dtor; | |
430 | skc->skc_reclaim = reclaim; | |
2fb9b26a BB |
431 | skc->skc_private = priv; |
432 | skc->skc_vmp = vmp; | |
433 | skc->skc_flags = flags; | |
434 | skc->skc_obj_size = size; | |
435 | skc->skc_chunk_size = 0; /* XXX: Needed only when implementing */ | |
436 | skc->skc_slab_size = 0; /* small slab object optimizations */ | |
437 | skc->skc_max_chunks = 0; /* which are yet supported. */ | |
438 | skc->skc_delay = SPL_KMEM_CACHE_DELAY; | |
439 | ||
440 | skc->skc_hash_bits = SPL_KMEM_CACHE_HASH_BITS; | |
441 | skc->skc_hash_size = SPL_KMEM_CACHE_HASH_SIZE; | |
442 | skc->skc_hash_elts = SPL_KMEM_CACHE_HASH_ELTS; | |
443 | skc->skc_hash = (struct hlist_head *) | |
444 | kmem_alloc(skc->skc_hash_size, kmem_flags); | |
445 | if (skc->skc_hash == NULL) { | |
446 | kmem_free(skc->skc_name, skc->skc_name_size); | |
447 | kmem_free(skc, sizeof(*skc)); | |
4afaaefa | 448 | RETURN(NULL); |
2fb9b26a BB |
449 | } |
450 | ||
451 | for (i = 0; i < skc->skc_hash_elts; i++) | |
452 | INIT_HLIST_HEAD(&skc->skc_hash[i]); | |
453 | ||
454 | INIT_LIST_HEAD(&skc->skc_list); | |
455 | INIT_LIST_HEAD(&skc->skc_complete_list); | |
456 | INIT_LIST_HEAD(&skc->skc_partial_list); | |
d46630e0 | 457 | spin_lock_init(&skc->skc_lock); |
e9d7a2be BB |
458 | skc->skc_slab_fail = 0; |
459 | skc->skc_slab_create = 0; | |
460 | skc->skc_slab_destroy = 0; | |
2fb9b26a BB |
461 | skc->skc_slab_total = 0; |
462 | skc->skc_slab_alloc = 0; | |
463 | skc->skc_slab_max = 0; | |
464 | skc->skc_obj_total = 0; | |
465 | skc->skc_obj_alloc = 0; | |
466 | skc->skc_obj_max = 0; | |
467 | skc->skc_hash_depth = 0; | |
4afaaefa BB |
468 | skc->skc_hash_count = 0; |
469 | ||
470 | rc = spl_magazine_create(skc); | |
471 | if (rc) { | |
472 | kmem_free(skc->skc_hash, skc->skc_hash_size); | |
473 | kmem_free(skc->skc_name, skc->skc_name_size); | |
474 | kmem_free(skc, sizeof(*skc)); | |
475 | RETURN(NULL); | |
476 | } | |
2fb9b26a BB |
477 | |
478 | down_write(&spl_kmem_cache_sem); | |
e9d7a2be | 479 | list_add_tail(&skc->skc_list, &spl_kmem_cache_list); |
2fb9b26a BB |
480 | up_write(&spl_kmem_cache_sem); |
481 | ||
e9d7a2be | 482 | RETURN(skc); |
f1ca4da6 | 483 | } |
2fb9b26a | 484 | EXPORT_SYMBOL(spl_kmem_cache_create); |
f1ca4da6 | 485 | |
2fb9b26a | 486 | /* The caller must ensure there are no racing calls to |
4afaaefa | 487 | * spl_kmem_cache_alloc() for this spl_kmem_cache_t. |
2fb9b26a BB |
488 | */ |
489 | void | |
490 | spl_kmem_cache_destroy(spl_kmem_cache_t *skc) | |
f1ca4da6 | 491 | { |
2fb9b26a BB |
492 | spl_kmem_slab_t *sks, *m; |
493 | ENTRY; | |
f1ca4da6 | 494 | |
e9d7a2be BB |
495 | ASSERT(skc->skc_magic == SKC_MAGIC); |
496 | ||
497 | down_write(&spl_kmem_cache_sem); | |
498 | list_del_init(&skc->skc_list); | |
499 | up_write(&spl_kmem_cache_sem); | |
2fb9b26a | 500 | |
4afaaefa | 501 | spl_magazine_destroy(skc); |
d46630e0 | 502 | spin_lock(&skc->skc_lock); |
d6a26c6a | 503 | |
2fb9b26a | 504 | /* Validate there are no objects in use and free all the |
4afaaefa | 505 | * spl_kmem_slab_t, spl_kmem_obj_t, and object buffers. */ |
2fb9b26a | 506 | ASSERT(list_empty(&skc->skc_complete_list)); |
4afaaefa BB |
507 | ASSERTF(skc->skc_hash_count == 0, "skc->skc_hash_count=%d\n", |
508 | skc->skc_hash_count); | |
d6a26c6a | 509 | |
e9d7a2be | 510 | list_for_each_entry_safe(sks, m, &skc->skc_partial_list, sks_list) |
4afaaefa | 511 | spl_slab_free(sks); |
2fb9b26a BB |
512 | |
513 | kmem_free(skc->skc_hash, skc->skc_hash_size); | |
514 | kmem_free(skc->skc_name, skc->skc_name_size); | |
d46630e0 | 515 | spin_unlock(&skc->skc_lock); |
4afaaefa | 516 | kmem_free(skc, sizeof(*skc)); |
2fb9b26a BB |
517 | |
518 | EXIT; | |
f1ca4da6 | 519 | } |
2fb9b26a | 520 | EXPORT_SYMBOL(spl_kmem_cache_destroy); |
f1ca4da6 | 521 | |
2fb9b26a BB |
522 | /* The kernel provided hash_ptr() function behaves exceptionally badly |
523 | * when all the addresses are page aligned which is likely the case | |
524 | * here. To avoid this issue shift off the low order non-random bits. | |
f1ca4da6 | 525 | */ |
2fb9b26a BB |
526 | static unsigned long |
527 | spl_hash_ptr(void *ptr, unsigned int bits) | |
528 | { | |
529 | return hash_long((unsigned long)ptr >> PAGE_SHIFT, bits); | |
530 | } | |
f1ca4da6 | 531 | |
4afaaefa BB |
532 | static spl_kmem_obj_t * |
533 | spl_hash_obj(spl_kmem_cache_t *skc, void *obj) | |
534 | { | |
e9d7a2be | 535 | struct hlist_node *node; |
4afaaefa BB |
536 | spl_kmem_obj_t *sko = NULL; |
537 | unsigned long key = spl_hash_ptr(obj, skc->skc_hash_bits); | |
538 | int i = 0; | |
5cbd57fa | 539 | |
e9d7a2be | 540 | ASSERT(skc->skc_magic == SKC_MAGIC); |
4afaaefa BB |
541 | ASSERT(spin_is_locked(&skc->skc_lock)); |
542 | ||
e9d7a2be | 543 | hlist_for_each_entry(sko, node, &skc->skc_hash[key], sko_hlist) { |
4afaaefa BB |
544 | |
545 | if (unlikely((++i) > skc->skc_hash_depth)) | |
546 | skc->skc_hash_depth = i; | |
547 | ||
e9d7a2be | 548 | if (sko->sko_addr == obj) { |
4afaaefa BB |
549 | ASSERT(sko->sko_magic == SKO_MAGIC); |
550 | RETURN(sko); | |
551 | } | |
552 | } | |
553 | ||
554 | RETURN(NULL); | |
555 | } | |
556 | ||
557 | static void * | |
558 | spl_cache_obj(spl_kmem_cache_t *skc, spl_kmem_slab_t *sks) | |
f1ca4da6 | 559 | { |
2fb9b26a | 560 | spl_kmem_obj_t *sko; |
2fb9b26a | 561 | unsigned long key; |
f1ca4da6 | 562 | |
e9d7a2be BB |
563 | ASSERT(skc->skc_magic == SKC_MAGIC); |
564 | ASSERT(sks->sks_magic == SKS_MAGIC); | |
4afaaefa | 565 | ASSERT(spin_is_locked(&skc->skc_lock)); |
2fb9b26a | 566 | |
4afaaefa BB |
567 | sko = list_entry((&sks->sks_free_list)->next,spl_kmem_obj_t,sko_list); |
568 | ASSERT(sko->sko_magic == SKO_MAGIC); | |
569 | ASSERT(sko->sko_addr != NULL); | |
2fb9b26a | 570 | |
4afaaefa BB |
571 | /* Remove from sks_free_list and add to used hash */ |
572 | list_del_init(&sko->sko_list); | |
573 | key = spl_hash_ptr(sko->sko_addr, skc->skc_hash_bits); | |
574 | hlist_add_head(&sko->sko_hlist, &skc->skc_hash[key]); | |
2fb9b26a | 575 | |
4afaaefa BB |
576 | sks->sks_age = jiffies; |
577 | sks->sks_ref++; | |
578 | skc->skc_obj_alloc++; | |
579 | skc->skc_hash_count++; | |
2fb9b26a | 580 | |
4afaaefa BB |
581 | /* Track max obj usage statistics */ |
582 | if (skc->skc_obj_alloc > skc->skc_obj_max) | |
583 | skc->skc_obj_max = skc->skc_obj_alloc; | |
2fb9b26a | 584 | |
4afaaefa BB |
585 | /* Track max slab usage statistics */ |
586 | if (sks->sks_ref == 1) { | |
587 | skc->skc_slab_alloc++; | |
f1ca4da6 | 588 | |
4afaaefa BB |
589 | if (skc->skc_slab_alloc > skc->skc_slab_max) |
590 | skc->skc_slab_max = skc->skc_slab_alloc; | |
2fb9b26a BB |
591 | } |
592 | ||
4afaaefa BB |
593 | return sko->sko_addr; |
594 | } | |
c30df9c8 | 595 | |
4afaaefa BB |
596 | /* No available objects create a new slab. Since this is an |
597 | * expensive operation we do it without holding the spinlock | |
598 | * and only briefly aquire it when we link in the fully | |
599 | * allocated and constructed slab. | |
600 | */ | |
601 | static spl_kmem_slab_t * | |
602 | spl_cache_grow(spl_kmem_cache_t *skc, int flags) | |
603 | { | |
e9d7a2be | 604 | spl_kmem_slab_t *sks; |
4afaaefa BB |
605 | spl_kmem_obj_t *sko; |
606 | ENTRY; | |
f1ca4da6 | 607 | |
e9d7a2be BB |
608 | ASSERT(skc->skc_magic == SKC_MAGIC); |
609 | ||
610 | if (flags & __GFP_WAIT) { | |
611 | // flags |= __GFP_NOFAIL; /* XXX: Solaris assumes this */ | |
4afaaefa BB |
612 | might_sleep(); |
613 | local_irq_enable(); | |
614 | } | |
f1ca4da6 | 615 | |
4afaaefa BB |
616 | sks = spl_slab_alloc(skc, flags); |
617 | if (sks == NULL) { | |
618 | if (flags & __GFP_WAIT) | |
619 | local_irq_disable(); | |
620 | ||
621 | RETURN(NULL); | |
622 | } | |
2fb9b26a BB |
623 | |
624 | /* Run all the constructors now that the slab is fully allocated */ | |
625 | list_for_each_entry(sko, &sks->sks_free_list, sko_list) { | |
626 | ASSERT(sko->sko_magic == SKO_MAGIC); | |
f1ca4da6 | 627 | |
2fb9b26a BB |
628 | if (skc->skc_ctor) |
629 | skc->skc_ctor(sko->sko_addr, skc->skc_private, flags); | |
630 | } | |
f1ca4da6 | 631 | |
e9d7a2be | 632 | if (flags & __GFP_WAIT) |
4afaaefa BB |
633 | local_irq_disable(); |
634 | ||
635 | /* Link the new empty slab in to the end of skc_partial_list */ | |
d46630e0 | 636 | spin_lock(&skc->skc_lock); |
2fb9b26a BB |
637 | skc->skc_slab_total++; |
638 | skc->skc_obj_total += sks->sks_objs; | |
639 | list_add_tail(&sks->sks_list, &skc->skc_partial_list); | |
d46630e0 | 640 | spin_unlock(&skc->skc_lock); |
4afaaefa BB |
641 | |
642 | RETURN(sks); | |
f1ca4da6 BB |
643 | } |
644 | ||
4afaaefa BB |
645 | static int |
646 | spl_cache_refill(spl_kmem_cache_t *skc, spl_kmem_magazine_t *skm, int flags) | |
f1ca4da6 | 647 | { |
e9d7a2be BB |
648 | spl_kmem_slab_t *sks; |
649 | int rc = 0, refill; | |
937879f1 | 650 | ENTRY; |
f1ca4da6 | 651 | |
e9d7a2be BB |
652 | ASSERT(skc->skc_magic == SKC_MAGIC); |
653 | ASSERT(skm->skm_magic == SKM_MAGIC); | |
654 | ||
4afaaefa | 655 | /* XXX: Check for refill bouncing by age perhaps */ |
e9d7a2be | 656 | refill = MIN(skm->skm_refill, skm->skm_size - skm->skm_avail); |
4afaaefa | 657 | |
d46630e0 | 658 | spin_lock(&skc->skc_lock); |
4afaaefa BB |
659 | while (refill > 0) { |
660 | /* No slabs available we must grow the cache */ | |
661 | if (list_empty(&skc->skc_partial_list)) { | |
662 | spin_unlock(&skc->skc_lock); | |
663 | sks = spl_cache_grow(skc, flags); | |
664 | if (!sks) | |
e9d7a2be | 665 | GOTO(out, rc); |
4afaaefa BB |
666 | |
667 | /* Rescheduled to different CPU skm is not local */ | |
668 | if (skm != skc->skc_mag[smp_processor_id()]) | |
e9d7a2be BB |
669 | GOTO(out, rc); |
670 | ||
671 | /* Potentially rescheduled to the same CPU but | |
672 | * allocations may have occured from this CPU while | |
673 | * we were sleeping so recalculate max refill. */ | |
674 | refill = MIN(refill, skm->skm_size - skm->skm_avail); | |
4afaaefa BB |
675 | |
676 | spin_lock(&skc->skc_lock); | |
677 | continue; | |
678 | } | |
d46630e0 | 679 | |
4afaaefa BB |
680 | /* Grab the next available slab */ |
681 | sks = list_entry((&skc->skc_partial_list)->next, | |
682 | spl_kmem_slab_t, sks_list); | |
683 | ASSERT(sks->sks_magic == SKS_MAGIC); | |
684 | ASSERT(sks->sks_ref < sks->sks_objs); | |
685 | ASSERT(!list_empty(&sks->sks_free_list)); | |
d46630e0 | 686 | |
4afaaefa | 687 | /* Consume as many objects as needed to refill the requested |
e9d7a2be BB |
688 | * cache. We must also be careful not to overfill it. */ |
689 | while (sks->sks_ref < sks->sks_objs && refill-- > 0 && ++rc) { | |
690 | ASSERT(skm->skm_avail < skm->skm_size); | |
691 | ASSERT(rc < skm->skm_size); | |
4afaaefa | 692 | skm->skm_objs[skm->skm_avail++]=spl_cache_obj(skc,sks); |
e9d7a2be | 693 | } |
f1ca4da6 | 694 | |
4afaaefa BB |
695 | /* Move slab to skc_complete_list when full */ |
696 | if (sks->sks_ref == sks->sks_objs) { | |
697 | list_del(&sks->sks_list); | |
698 | list_add(&sks->sks_list, &skc->skc_complete_list); | |
2fb9b26a BB |
699 | } |
700 | } | |
57d86234 | 701 | |
4afaaefa BB |
702 | spin_unlock(&skc->skc_lock); |
703 | out: | |
704 | /* Returns the number of entries added to cache */ | |
e9d7a2be | 705 | RETURN(rc); |
4afaaefa BB |
706 | } |
707 | ||
708 | static void | |
709 | spl_cache_shrink(spl_kmem_cache_t *skc, void *obj) | |
710 | { | |
e9d7a2be | 711 | spl_kmem_slab_t *sks = NULL; |
4afaaefa BB |
712 | spl_kmem_obj_t *sko = NULL; |
713 | ENTRY; | |
714 | ||
e9d7a2be | 715 | ASSERT(skc->skc_magic == SKC_MAGIC); |
4afaaefa BB |
716 | ASSERT(spin_is_locked(&skc->skc_lock)); |
717 | ||
718 | sko = spl_hash_obj(skc, obj); | |
719 | ASSERTF(sko, "Obj %p missing from in-use hash (%d) for cache %s\n", | |
720 | obj, skc->skc_hash_count, skc->skc_name); | |
721 | ||
722 | sks = sko->sko_slab; | |
723 | ASSERTF(sks, "Obj %p/%p linked to invalid slab for cache %s\n", | |
724 | obj, sko, skc->skc_name); | |
725 | ||
2fb9b26a BB |
726 | ASSERT(sks->sks_cache == skc); |
727 | hlist_del_init(&sko->sko_hlist); | |
728 | list_add(&sko->sko_list, &sks->sks_free_list); | |
d6a26c6a | 729 | |
2fb9b26a | 730 | sks->sks_age = jiffies; |
4afaaefa | 731 | sks->sks_ref--; |
2fb9b26a | 732 | skc->skc_obj_alloc--; |
4afaaefa | 733 | skc->skc_hash_count--; |
f1ca4da6 | 734 | |
2fb9b26a | 735 | /* Move slab to skc_partial_list when no longer full. Slabs |
4afaaefa BB |
736 | * are added to the head to keep the partial list is quasi-full |
737 | * sorted order. Fuller at the head, emptier at the tail. */ | |
738 | if (sks->sks_ref == (sks->sks_objs - 1)) { | |
2fb9b26a BB |
739 | list_del(&sks->sks_list); |
740 | list_add(&sks->sks_list, &skc->skc_partial_list); | |
741 | } | |
f1ca4da6 | 742 | |
2fb9b26a | 743 | /* Move emply slabs to the end of the partial list so |
4afaaefa BB |
744 | * they can be easily found and freed during reclamation. */ |
745 | if (sks->sks_ref == 0) { | |
2fb9b26a BB |
746 | list_del(&sks->sks_list); |
747 | list_add_tail(&sks->sks_list, &skc->skc_partial_list); | |
748 | skc->skc_slab_alloc--; | |
749 | } | |
750 | ||
4afaaefa BB |
751 | EXIT; |
752 | } | |
753 | ||
754 | static int | |
755 | spl_cache_flush(spl_kmem_cache_t *skc, spl_kmem_magazine_t *skm, int flush) | |
756 | { | |
757 | int i, count = MIN(flush, skm->skm_avail); | |
758 | ENTRY; | |
759 | ||
e9d7a2be BB |
760 | ASSERT(skc->skc_magic == SKC_MAGIC); |
761 | ASSERT(skm->skm_magic == SKM_MAGIC); | |
4afaaefa BB |
762 | |
763 | spin_lock(&skc->skc_lock); | |
764 | for (i = 0; i < count; i++) | |
765 | spl_cache_shrink(skc, skm->skm_objs[i]); | |
766 | ||
e9d7a2be BB |
767 | // __spl_slab_reclaim(skc); |
768 | skm->skm_avail -= count; | |
769 | memmove(skm->skm_objs, &(skm->skm_objs[count]), | |
4afaaefa BB |
770 | sizeof(void *) * skm->skm_avail); |
771 | ||
d46630e0 | 772 | spin_unlock(&skc->skc_lock); |
4afaaefa BB |
773 | |
774 | RETURN(count); | |
775 | } | |
776 | ||
777 | void * | |
778 | spl_kmem_cache_alloc(spl_kmem_cache_t *skc, int flags) | |
779 | { | |
780 | spl_kmem_magazine_t *skm; | |
781 | unsigned long irq_flags; | |
782 | void *obj = NULL; | |
e9d7a2be | 783 | int id; |
4afaaefa BB |
784 | ENTRY; |
785 | ||
e9d7a2be BB |
786 | ASSERT(skc->skc_magic == SKC_MAGIC); |
787 | ASSERT(flags & KM_SLEEP); /* XXX: KM_NOSLEEP not yet supported */ | |
4afaaefa BB |
788 | local_irq_save(irq_flags); |
789 | ||
790 | restart: | |
791 | /* Safe to update per-cpu structure without lock, but | |
792 | * in the restart case we must be careful to reaquire | |
793 | * the local magazine since this may have changed | |
794 | * when we need to grow the cache. */ | |
e9d7a2be BB |
795 | id = smp_processor_id(); |
796 | ASSERTF(id < 4, "cache=%p smp_processor_id=%d\n", skc, id); | |
4afaaefa | 797 | skm = skc->skc_mag[smp_processor_id()]; |
e9d7a2be BB |
798 | ASSERTF(skm->skm_magic == SKM_MAGIC, "%x != %x: %s/%p/%p %x/%x/%x\n", |
799 | skm->skm_magic, SKM_MAGIC, skc->skc_name, skc, skm, | |
800 | skm->skm_size, skm->skm_refill, skm->skm_avail); | |
4afaaefa BB |
801 | |
802 | if (likely(skm->skm_avail)) { | |
803 | /* Object available in CPU cache, use it */ | |
804 | obj = skm->skm_objs[--skm->skm_avail]; | |
805 | skm->skm_age = jiffies; | |
806 | } else { | |
807 | /* Per-CPU cache empty, directly allocate from | |
808 | * the slab and refill the per-CPU cache. */ | |
809 | (void)spl_cache_refill(skc, skm, flags); | |
810 | GOTO(restart, obj = NULL); | |
811 | } | |
812 | ||
813 | local_irq_restore(irq_flags); | |
814 | ||
815 | /* Pre-emptively migrate object to CPU L1 cache */ | |
816 | prefetchw(obj); | |
817 | ||
818 | RETURN(obj); | |
819 | } | |
820 | EXPORT_SYMBOL(spl_kmem_cache_alloc); | |
821 | ||
822 | void | |
823 | spl_kmem_cache_free(spl_kmem_cache_t *skc, void *obj) | |
824 | { | |
825 | spl_kmem_magazine_t *skm; | |
826 | unsigned long flags; | |
827 | ENTRY; | |
828 | ||
e9d7a2be | 829 | ASSERT(skc->skc_magic == SKC_MAGIC); |
4afaaefa BB |
830 | local_irq_save(flags); |
831 | ||
832 | /* Safe to update per-cpu structure without lock, but | |
833 | * no remote memory allocation tracking is being performed | |
834 | * it is entirely possible to allocate an object from one | |
835 | * CPU cache and return it to another. */ | |
836 | skm = skc->skc_mag[smp_processor_id()]; | |
e9d7a2be | 837 | ASSERT(skm->skm_magic == SKM_MAGIC); |
4afaaefa BB |
838 | |
839 | /* Per-CPU cache full, flush it to make space */ | |
840 | if (unlikely(skm->skm_avail >= skm->skm_size)) | |
841 | (void)spl_cache_flush(skc, skm, skm->skm_refill); | |
e9d7a2be | 842 | (void)spl_cache_flush(skc, skm, 1); |
4afaaefa BB |
843 | |
844 | /* Available space in cache, use it */ | |
845 | skm->skm_objs[skm->skm_avail++] = obj; | |
846 | ||
847 | local_irq_restore(flags); | |
848 | ||
849 | EXIT; | |
f1ca4da6 | 850 | } |
2fb9b26a | 851 | EXPORT_SYMBOL(spl_kmem_cache_free); |
5c2bb9b2 | 852 | |
2fb9b26a | 853 | static int |
4afaaefa | 854 | spl_kmem_cache_generic_shrinker(int nr_to_scan, unsigned int gfp_mask) |
2fb9b26a | 855 | { |
e9d7a2be | 856 | spl_kmem_cache_t *skc; |
5c2bb9b2 | 857 | |
2fb9b26a BB |
858 | /* Under linux a shrinker is not tightly coupled with a slab |
859 | * cache. In fact linux always systematically trys calling all | |
860 | * registered shrinker callbacks until its target reclamation level | |
861 | * is reached. Because of this we only register one shrinker | |
862 | * function in the shim layer for all slab caches. And we always | |
863 | * attempt to shrink all caches when this generic shrinker is called. | |
c30df9c8 | 864 | */ |
e9d7a2be | 865 | down_read(&spl_kmem_cache_sem); |
57d86234 | 866 | |
e9d7a2be | 867 | list_for_each_entry(skc, &spl_kmem_cache_list, skc_list) |
2fb9b26a BB |
868 | spl_kmem_cache_reap_now(skc); |
869 | ||
e9d7a2be | 870 | up_read(&spl_kmem_cache_sem); |
2fb9b26a BB |
871 | |
872 | /* XXX: Under linux we should return the remaining number of | |
873 | * entries in the cache. We should do this as well. | |
874 | */ | |
875 | return 1; | |
5c2bb9b2 | 876 | } |
5c2bb9b2 | 877 | |
57d86234 | 878 | void |
2fb9b26a | 879 | spl_kmem_cache_reap_now(spl_kmem_cache_t *skc) |
57d86234 | 880 | { |
4afaaefa BB |
881 | spl_kmem_magazine_t *skm; |
882 | int i; | |
2fb9b26a | 883 | ENTRY; |
e9d7a2be BB |
884 | |
885 | ASSERT(skc->skc_magic == SKC_MAGIC); | |
2fb9b26a BB |
886 | |
887 | if (skc->skc_reclaim) | |
888 | skc->skc_reclaim(skc->skc_private); | |
889 | ||
4afaaefa BB |
890 | /* Ensure per-CPU caches which are idle gradually flush */ |
891 | for_each_online_cpu(i) { | |
892 | skm = skc->skc_mag[i]; | |
893 | ||
894 | if (time_after(jiffies, skm->skm_age + skc->skc_delay * HZ)) | |
895 | (void)spl_cache_flush(skc, skm, skm->skm_refill); | |
896 | } | |
897 | ||
898 | spl_slab_reclaim(skc); | |
899 | ||
2fb9b26a | 900 | EXIT; |
57d86234 | 901 | } |
2fb9b26a | 902 | EXPORT_SYMBOL(spl_kmem_cache_reap_now); |
57d86234 | 903 | |
f1b59d26 | 904 | void |
2fb9b26a | 905 | spl_kmem_reap(void) |
937879f1 | 906 | { |
4afaaefa | 907 | spl_kmem_cache_generic_shrinker(KMC_REAP_CHUNK, GFP_KERNEL); |
f1ca4da6 | 908 | } |
2fb9b26a | 909 | EXPORT_SYMBOL(spl_kmem_reap); |
5d86345d BB |
910 | |
911 | int | |
2fb9b26a | 912 | spl_kmem_init(void) |
5d86345d | 913 | { |
e9d7a2be BB |
914 | int rc = 0; |
915 | ENTRY; | |
d6a26c6a | 916 | |
2fb9b26a BB |
917 | init_rwsem(&spl_kmem_cache_sem); |
918 | INIT_LIST_HEAD(&spl_kmem_cache_list); | |
919 | ||
920 | spl_slab_cache = NULL; | |
921 | spl_obj_cache = NULL; | |
922 | ||
5cbd57fa BB |
923 | spl_slab_cache = __kmem_cache_create("spl_slab_cache", |
924 | sizeof(spl_kmem_slab_t), | |
925 | 0, 0, NULL, NULL); | |
2fb9b26a BB |
926 | if (spl_slab_cache == NULL) |
927 | GOTO(out_cache, rc = -ENOMEM); | |
c30df9c8 | 928 | |
5cbd57fa BB |
929 | spl_obj_cache = __kmem_cache_create("spl_obj_cache", |
930 | sizeof(spl_kmem_obj_t), | |
931 | 0, 0, NULL, NULL); | |
2fb9b26a BB |
932 | if (spl_obj_cache == NULL) |
933 | GOTO(out_cache, rc = -ENOMEM); | |
934 | ||
935 | #ifdef HAVE_SET_SHRINKER | |
5cbd57fa | 936 | spl_kmem_cache_shrinker = set_shrinker(KMC_DEFAULT_SEEKS, |
4afaaefa | 937 | spl_kmem_cache_generic_shrinker); |
2fb9b26a BB |
938 | if (spl_kmem_cache_shrinker == NULL) |
939 | GOTO(out_cache, rc = -ENOMEM); | |
940 | #else | |
941 | register_shrinker(&spl_kmem_cache_shrinker); | |
942 | #endif | |
c30df9c8 | 943 | |
5d86345d | 944 | #ifdef DEBUG_KMEM |
2fb9b26a | 945 | { int i; |
c30df9c8 BB |
946 | atomic64_set(&kmem_alloc_used, 0); |
947 | atomic64_set(&vmem_alloc_used, 0); | |
2fb9b26a | 948 | atomic64_set(&kmem_cache_alloc_failed, 0); |
d6a26c6a | 949 | |
c30df9c8 BB |
950 | spin_lock_init(&kmem_lock); |
951 | INIT_LIST_HEAD(&kmem_list); | |
d6a26c6a | 952 | |
c30df9c8 BB |
953 | for (i = 0; i < KMEM_TABLE_SIZE; i++) |
954 | INIT_HLIST_HEAD(&kmem_table[i]); | |
13cdca65 | 955 | |
c30df9c8 BB |
956 | spin_lock_init(&vmem_lock); |
957 | INIT_LIST_HEAD(&vmem_list); | |
13cdca65 | 958 | |
c30df9c8 BB |
959 | for (i = 0; i < VMEM_TABLE_SIZE; i++) |
960 | INIT_HLIST_HEAD(&vmem_table[i]); | |
2fb9b26a | 961 | } |
5d86345d | 962 | #endif |
2fb9b26a BB |
963 | RETURN(rc); |
964 | ||
965 | out_cache: | |
966 | if (spl_obj_cache) | |
967 | (void)kmem_cache_destroy(spl_obj_cache); | |
968 | ||
969 | if (spl_slab_cache) | |
970 | (void)kmem_cache_destroy(spl_slab_cache); | |
971 | ||
972 | RETURN(rc); | |
5d86345d BB |
973 | } |
974 | ||
c6dc93d6 BB |
975 | #ifdef DEBUG_KMEM |
976 | static char * | |
4afaaefa | 977 | spl_sprintf_addr(kmem_debug_t *kd, char *str, int len, int min) |
d6a26c6a | 978 | { |
e9d7a2be | 979 | int size = ((len - 1) < kd->kd_size) ? (len - 1) : kd->kd_size; |
d6a26c6a BB |
980 | int i, flag = 1; |
981 | ||
982 | ASSERT(str != NULL && len >= 17); | |
e9d7a2be | 983 | memset(str, 0, len); |
d6a26c6a BB |
984 | |
985 | /* Check for a fully printable string, and while we are at | |
986 | * it place the printable characters in the passed buffer. */ | |
987 | for (i = 0; i < size; i++) { | |
e9d7a2be BB |
988 | str[i] = ((char *)(kd->kd_addr))[i]; |
989 | if (isprint(str[i])) { | |
990 | continue; | |
991 | } else { | |
992 | /* Minimum number of printable characters found | |
993 | * to make it worthwhile to print this as ascii. */ | |
994 | if (i > min) | |
995 | break; | |
996 | ||
997 | flag = 0; | |
998 | break; | |
999 | } | |
d6a26c6a BB |
1000 | } |
1001 | ||
1002 | if (!flag) { | |
1003 | sprintf(str, "%02x%02x%02x%02x%02x%02x%02x%02x", | |
1004 | *((uint8_t *)kd->kd_addr), | |
1005 | *((uint8_t *)kd->kd_addr + 2), | |
1006 | *((uint8_t *)kd->kd_addr + 4), | |
1007 | *((uint8_t *)kd->kd_addr + 6), | |
1008 | *((uint8_t *)kd->kd_addr + 8), | |
1009 | *((uint8_t *)kd->kd_addr + 10), | |
1010 | *((uint8_t *)kd->kd_addr + 12), | |
1011 | *((uint8_t *)kd->kd_addr + 14)); | |
1012 | } | |
1013 | ||
1014 | return str; | |
1015 | } | |
c6dc93d6 | 1016 | #endif /* DEBUG_KMEM */ |
d6a26c6a | 1017 | |
5d86345d | 1018 | void |
2fb9b26a | 1019 | spl_kmem_fini(void) |
5d86345d BB |
1020 | { |
1021 | #ifdef DEBUG_KMEM | |
2fb9b26a BB |
1022 | unsigned long flags; |
1023 | kmem_debug_t *kd; | |
1024 | char str[17]; | |
1025 | ||
1026 | /* Display all unreclaimed memory addresses, including the | |
1027 | * allocation size and the first few bytes of what's located | |
1028 | * at that address to aid in debugging. Performance is not | |
1029 | * a serious concern here since it is module unload time. */ | |
1030 | if (atomic64_read(&kmem_alloc_used) != 0) | |
1031 | CWARN("kmem leaked %ld/%ld bytes\n", | |
1032 | atomic_read(&kmem_alloc_used), kmem_alloc_max); | |
1033 | ||
1034 | spin_lock_irqsave(&kmem_lock, flags); | |
1035 | if (!list_empty(&kmem_list)) | |
1036 | CDEBUG(D_WARNING, "%-16s %-5s %-16s %s:%s\n", | |
1037 | "address", "size", "data", "func", "line"); | |
1038 | ||
1039 | list_for_each_entry(kd, &kmem_list, kd_list) | |
1040 | CDEBUG(D_WARNING, "%p %-5d %-16s %s:%d\n", | |
1041 | kd->kd_addr, kd->kd_size, | |
4afaaefa | 1042 | spl_sprintf_addr(kd, str, 17, 8), |
2fb9b26a BB |
1043 | kd->kd_func, kd->kd_line); |
1044 | ||
1045 | spin_unlock_irqrestore(&kmem_lock, flags); | |
1046 | ||
1047 | if (atomic64_read(&vmem_alloc_used) != 0) | |
1048 | CWARN("vmem leaked %ld/%ld bytes\n", | |
1049 | atomic_read(&vmem_alloc_used), vmem_alloc_max); | |
1050 | ||
1051 | spin_lock_irqsave(&vmem_lock, flags); | |
1052 | if (!list_empty(&vmem_list)) | |
1053 | CDEBUG(D_WARNING, "%-16s %-5s %-16s %s:%s\n", | |
1054 | "address", "size", "data", "func", "line"); | |
1055 | ||
1056 | list_for_each_entry(kd, &vmem_list, kd_list) | |
1057 | CDEBUG(D_WARNING, "%p %-5d %-16s %s:%d\n", | |
1058 | kd->kd_addr, kd->kd_size, | |
4afaaefa | 1059 | spl_sprintf_addr(kd, str, 17, 8), |
2fb9b26a BB |
1060 | kd->kd_func, kd->kd_line); |
1061 | ||
1062 | spin_unlock_irqrestore(&vmem_lock, flags); | |
1063 | #endif | |
1064 | ENTRY; | |
1065 | ||
1066 | #ifdef HAVE_SET_SHRINKER | |
1067 | remove_shrinker(spl_kmem_cache_shrinker); | |
1068 | #else | |
1069 | unregister_shrinker(&spl_kmem_cache_shrinker); | |
5d86345d | 1070 | #endif |
2fb9b26a | 1071 | |
e9d7a2be BB |
1072 | (void)kmem_cache_destroy(spl_obj_cache); |
1073 | (void)kmem_cache_destroy(spl_slab_cache); | |
2fb9b26a | 1074 | |
937879f1 | 1075 | EXIT; |
5d86345d | 1076 | } |