]> git.proxmox.com Git - mirror_spl.git/blob - modules/spl/spl-kmem.c
7337432eba1bc5e6db7038a6c42ceb87f02a8212
[mirror_spl.git] / modules / spl / spl-kmem.c
1 #include <sys/kmem.h>
2
3 #ifdef DEBUG_SUBSYSTEM
4 #undef DEBUG_SUBSYSTEM
5 #endif
6
7 #define DEBUG_SUBSYSTEM S_KMEM
8
9 /*
10 * Memory allocation interfaces
11 */
12 #ifdef DEBUG_KMEM
13 /* Shim layer memory accounting */
14 atomic64_t kmem_alloc_used;
15 unsigned long kmem_alloc_max = 0;
16 atomic64_t vmem_alloc_used;
17 unsigned long vmem_alloc_max = 0;
18 int kmem_warning_flag = 1;
19 atomic64_t kmem_cache_alloc_failed;
20
21 spinlock_t kmem_lock;
22 struct hlist_head kmem_table[KMEM_TABLE_SIZE];
23 struct list_head kmem_list;
24
25 spinlock_t vmem_lock;
26 struct hlist_head vmem_table[VMEM_TABLE_SIZE];
27 struct list_head vmem_list;
28
29 EXPORT_SYMBOL(kmem_alloc_used);
30 EXPORT_SYMBOL(kmem_alloc_max);
31 EXPORT_SYMBOL(vmem_alloc_used);
32 EXPORT_SYMBOL(vmem_alloc_max);
33 EXPORT_SYMBOL(kmem_warning_flag);
34
35 EXPORT_SYMBOL(kmem_lock);
36 EXPORT_SYMBOL(kmem_table);
37 EXPORT_SYMBOL(kmem_list);
38
39 EXPORT_SYMBOL(vmem_lock);
40 EXPORT_SYMBOL(vmem_table);
41 EXPORT_SYMBOL(vmem_list);
42
43 int kmem_set_warning(int flag) { return (kmem_warning_flag = !!flag); }
44 #else
45 int kmem_set_warning(int flag) { return 0; }
46 #endif
47 EXPORT_SYMBOL(kmem_set_warning);
48
49 /*
50 * Slab allocation interfaces
51 *
52 * While the linux slab implementation was inspired by solaris they
53 * have made some changes to the API which complicates this shim
54 * layer. For one thing the same symbol names are used with different
55 * arguments for the prototypes. To deal with this we must use the
56 * preprocessor to re-order arguments. Happily for us standard C says,
57 * "Macro's appearing in their own expansion are not reexpanded" so
58 * this does not result in an infinite recursion. Additionally the
59 * function pointers registered by solarias differ from those used
60 * by linux so a lookup and mapping from linux style callback to a
61 * solaris style callback is needed. There is some overhead in this
62 * operation which isn't horibile but it needs to be kept in mind.
63 */
64 #define KCC_MAGIC 0x7a7a7a7a
65 #define KCC_POISON 0x77
66
67 typedef struct kmem_cache_cb {
68 int kcc_magic;
69 struct list_head kcc_list;
70 kmem_cache_t * kcc_cache;
71 kmem_constructor_t kcc_constructor;
72 kmem_destructor_t kcc_destructor;
73 kmem_reclaim_t kcc_reclaim;
74 void * kcc_private;
75 void * kcc_vmp;
76 atomic_t kcc_ref;
77 } kmem_cache_cb_t;
78
79 static struct rw_semaphore kmem_cache_cb_sem;
80 static struct list_head kmem_cache_cb_list;
81 static struct shrinker *kmem_cache_shrinker;
82
83 /* Function must be called while holding the kmem_cache_cb_sem
84 * Because kmem_cache_t is an opaque datatype we're forced to
85 * match pointers to identify specific cache entires.
86 */
87 static kmem_cache_cb_t *
88 kmem_cache_find_cache_cb(kmem_cache_t *cache)
89 {
90 kmem_cache_cb_t *kcc;
91 #ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
92 ASSERT(rwsem_is_locked(&kmem_cache_cb_sem));
93 #endif
94
95 list_for_each_entry(kcc, &kmem_cache_cb_list, kcc_list)
96 if (cache == kcc->kcc_cache)
97 return kcc;
98
99 return NULL;
100 }
101
102 static kmem_cache_cb_t *
103 kmem_cache_add_cache_cb(kmem_cache_t *cache,
104 kmem_constructor_t constructor,
105 kmem_destructor_t destructor,
106 kmem_reclaim_t reclaim,
107 void *priv, void *vmp)
108 {
109 kmem_cache_cb_t *kcc;
110
111 kcc = (kmem_cache_cb_t *)kmalloc(sizeof(*kcc), GFP_KERNEL);
112 if (kcc) {
113 kcc->kcc_magic = KCC_MAGIC;
114 kcc->kcc_cache = cache;
115 kcc->kcc_constructor = constructor;
116 kcc->kcc_destructor = destructor;
117 kcc->kcc_reclaim = reclaim;
118 kcc->kcc_private = priv;
119 kcc->kcc_vmp = vmp;
120 atomic_set(&kcc->kcc_ref, 0);
121 down_write(&kmem_cache_cb_sem);
122 list_add(&kcc->kcc_list, &kmem_cache_cb_list);
123 up_write(&kmem_cache_cb_sem);
124 }
125
126 return kcc;
127 }
128
129 static void
130 kmem_cache_remove_cache_cb(kmem_cache_cb_t *kcc)
131 {
132 down_write(&kmem_cache_cb_sem);
133 ASSERT(atomic_read(&kcc->kcc_ref) == 0);
134 list_del(&kcc->kcc_list);
135 up_write(&kmem_cache_cb_sem);
136
137 if (kcc){
138 memset(kcc, KCC_POISON, sizeof(*kcc));
139 kfree(kcc);
140 }
141 }
142
143 static void
144 kmem_cache_generic_constructor(void *ptr, kmem_cache_t *cache, unsigned long flags)
145 {
146 kmem_cache_cb_t *kcc;
147 kmem_constructor_t constructor;
148 void *private;
149
150 ASSERT(flags & SLAB_CTOR_CONSTRUCTOR);
151
152 /* Ensure constructor verifies are not passed to the registered
153 * constructors. This may not be safe due to the Solaris constructor
154 * not being aware of how to handle the SLAB_CTOR_VERIFY flag
155 */
156 if (flags & SLAB_CTOR_VERIFY)
157 return;
158
159 if (flags & SLAB_CTOR_ATOMIC)
160 flags = KM_NOSLEEP;
161 else
162 flags = KM_SLEEP;
163
164 /* We can be called with interrupts disabled so it is critical that
165 * this function and the registered constructor never sleep.
166 */
167 while (!down_read_trylock(&kmem_cache_cb_sem));
168
169 /* Callback list must be in sync with linux slab caches */
170 kcc = kmem_cache_find_cache_cb(cache);
171 ASSERT(kcc);
172 ASSERT(kcc->kcc_magic == KCC_MAGIC);
173 atomic_inc(&kcc->kcc_ref);
174
175 constructor = kcc->kcc_constructor;
176 private = kcc->kcc_private;
177
178 up_read(&kmem_cache_cb_sem);
179
180 if (constructor)
181 constructor(ptr, private, (int)flags);
182
183 atomic_dec(&kcc->kcc_ref);
184
185 /* Linux constructor has no return code, silently eat it */
186 }
187
188 static void
189 kmem_cache_generic_destructor(void *ptr, kmem_cache_t *cache, unsigned long flags)
190 {
191 kmem_cache_cb_t *kcc;
192 kmem_destructor_t destructor;
193 void *private;
194
195 /* No valid destructor flags */
196 ASSERT(flags == 0);
197
198 /* We can be called with interrupts disabled so it is critical that
199 * this function and the registered constructor never sleep.
200 */
201 while (!down_read_trylock(&kmem_cache_cb_sem));
202
203 /* Callback list must be in sync with linux slab caches */
204 kcc = kmem_cache_find_cache_cb(cache);
205 ASSERT(kcc);
206 ASSERT(kcc->kcc_magic == KCC_MAGIC);
207 atomic_inc(&kcc->kcc_ref);
208
209 destructor = kcc->kcc_destructor;
210 private = kcc->kcc_private;
211
212 up_read(&kmem_cache_cb_sem);
213
214 /* Solaris destructor takes no flags, silently eat them */
215 if (destructor)
216 destructor(ptr, private);
217
218 atomic_dec(&kcc->kcc_ref);
219 }
220
221 /* XXX - Arguments are ignored */
222 static int
223 kmem_cache_generic_shrinker(int nr_to_scan, unsigned int gfp_mask)
224 {
225 kmem_cache_cb_t *kcc;
226 int total = 0;
227
228 /* Under linux a shrinker is not tightly coupled with a slab
229 * cache. In fact linux always systematically trys calling all
230 * registered shrinker callbacks until its target reclamation level
231 * is reached. Because of this we only register one shrinker
232 * function in the shim layer for all slab caches. And we always
233 * attempt to shrink all caches when this generic shrinker is called.
234 */
235 down_read(&kmem_cache_cb_sem);
236
237 list_for_each_entry(kcc, &kmem_cache_cb_list, kcc_list) {
238 ASSERT(kcc);
239 ASSERT(kcc->kcc_magic == KCC_MAGIC);
240
241 /* Take a reference on the cache in question. If that
242 * cache is contended simply skip it, it may already be
243 * in the process of a reclaim or the ctor/dtor may be
244 * running in either case it's best to skip it.
245 */
246 atomic_inc(&kcc->kcc_ref);
247 if (atomic_read(&kcc->kcc_ref) > 1) {
248 atomic_dec(&kcc->kcc_ref);
249 continue;
250 }
251
252 /* Under linux the desired number and gfp type of objects
253 * is passed to the reclaiming function as a sugested reclaim
254 * target. I do not pass these args on because reclaim
255 * policy is entirely up to the owner under solaris. We only
256 * pass on the pre-registered private data.
257 */
258 if (kcc->kcc_reclaim)
259 kcc->kcc_reclaim(kcc->kcc_private);
260
261 atomic_dec(&kcc->kcc_ref);
262 total += 1;
263 }
264
265 /* Under linux we should return the remaining number of entires in
266 * the cache. Unfortunately, I don't see an easy way to safely
267 * emulate this behavior so I'm returning one entry per cache which
268 * was registered with the generic shrinker. This should fake out
269 * the linux VM when it attempts to shrink caches.
270 */
271 up_read(&kmem_cache_cb_sem);
272
273 return total;
274 }
275
276 /* Ensure the __kmem_cache_create/__kmem_cache_destroy macros are
277 * removed here to prevent a recursive substitution, we want to call
278 * the native linux version.
279 */
280 #undef kmem_cache_create
281 #undef kmem_cache_destroy
282 #undef kmem_cache_alloc
283
284 kmem_cache_t *
285 __kmem_cache_create(char *name, size_t size, size_t align,
286 kmem_constructor_t constructor,
287 kmem_destructor_t destructor,
288 kmem_reclaim_t reclaim,
289 void *priv, void *vmp, int flags)
290 {
291 kmem_cache_t *cache;
292 kmem_cache_cb_t *kcc;
293 int shrinker_flag = 0;
294 char *cache_name;
295 ENTRY;
296
297 /* XXX: - Option currently unsupported by shim layer */
298 ASSERT(!vmp);
299 ASSERT(flags == 0);
300
301 cache_name = kzalloc(strlen(name) + 1, GFP_KERNEL);
302 if (cache_name == NULL)
303 RETURN(NULL);
304
305 strcpy(cache_name, name);
306 cache = kmem_cache_create(cache_name, size, align, flags,
307 kmem_cache_generic_constructor,
308 kmem_cache_generic_destructor);
309 if (cache == NULL)
310 RETURN(NULL);
311
312 /* Register shared shrinker function on initial cache create */
313 down_read(&kmem_cache_cb_sem);
314 if (list_empty(&kmem_cache_cb_list)) {
315 kmem_cache_shrinker = set_shrinker(KMC_DEFAULT_SEEKS,
316 kmem_cache_generic_shrinker);
317 if (kmem_cache_shrinker == NULL) {
318 kmem_cache_destroy(cache);
319 up_read(&kmem_cache_cb_sem);
320 RETURN(NULL);
321 }
322
323 }
324 up_read(&kmem_cache_cb_sem);
325
326 kcc = kmem_cache_add_cache_cb(cache, constructor, destructor,
327 reclaim, priv, vmp);
328 if (kcc == NULL) {
329 if (shrinker_flag) /* New shrinker registered must be removed */
330 remove_shrinker(kmem_cache_shrinker);
331
332 kmem_cache_destroy(cache);
333 RETURN(NULL);
334 }
335
336 RETURN(cache);
337 }
338 EXPORT_SYMBOL(__kmem_cache_create);
339
340 /* Return code provided despite Solaris's void return. There should be no
341 * harm here since the Solaris versions will ignore it anyway. */
342 int
343 __kmem_cache_destroy(kmem_cache_t *cache)
344 {
345 kmem_cache_cb_t *kcc;
346 char *name;
347 int rc;
348 ENTRY;
349
350 down_read(&kmem_cache_cb_sem);
351 kcc = kmem_cache_find_cache_cb(cache);
352 if (kcc == NULL) {
353 up_read(&kmem_cache_cb_sem);
354 RETURN(-EINVAL);
355 }
356 atomic_inc(&kcc->kcc_ref);
357 up_read(&kmem_cache_cb_sem);
358
359 name = (char *)kmem_cache_name(cache);
360 rc = kmem_cache_destroy(cache);
361
362 atomic_dec(&kcc->kcc_ref);
363 kmem_cache_remove_cache_cb(kcc);
364 kfree(name);
365
366 /* Unregister generic shrinker on removal of all caches */
367 down_read(&kmem_cache_cb_sem);
368 if (list_empty(&kmem_cache_cb_list))
369 remove_shrinker(kmem_cache_shrinker);
370
371 up_read(&kmem_cache_cb_sem);
372 RETURN(rc);
373 }
374 EXPORT_SYMBOL(__kmem_cache_destroy);
375
376 /* Under Solaris if the KM_SLEEP flag is passed we absolutely must
377 * sleep until we are allocated the memory. Under Linux you can still
378 * get a memory allocation failure, so I'm forced to keep requesting
379 * the memory even if the system is under substantial memory pressure
380 * of fragmentation prevents the allocation from succeeded. This is
381 * not the correct fix, or even a good one. But it will do for now.
382 */
383 void *
384 __kmem_cache_alloc(kmem_cache_t *cache, gfp_t flags)
385 {
386 void *rc;
387 ENTRY;
388
389 restart:
390 rc = kmem_cache_alloc(cache, flags);
391 if ((rc == NULL) && (flags & KM_SLEEP)) {
392 #ifdef DEBUG_KMEM
393 atomic64_inc(&kmem_cache_alloc_failed);
394 #endif /* DEBUG_KMEM */
395 GOTO(restart, rc);
396 }
397
398 RETURN(rc);
399 }
400 EXPORT_SYMBOL(__kmem_cache_alloc);
401
402 void
403 __kmem_reap(void)
404 {
405 ENTRY;
406 /* Since there's no easy hook in to linux to force all the registered
407 * shrinkers to run we just run the ones registered for this shim */
408 kmem_cache_generic_shrinker(KMC_REAP_CHUNK, GFP_KERNEL);
409 EXIT;
410 }
411 EXPORT_SYMBOL(__kmem_reap);
412
413 int
414 kmem_init(void)
415 {
416 ENTRY;
417
418 init_rwsem(&kmem_cache_cb_sem);
419 INIT_LIST_HEAD(&kmem_cache_cb_list);
420 #ifdef DEBUG_KMEM
421 {
422 int i;
423 atomic64_set(&kmem_alloc_used, 0);
424 atomic64_set(&vmem_alloc_used, 0);
425
426 spin_lock_init(&kmem_lock);
427 INIT_LIST_HEAD(&kmem_list);
428
429 for (i = 0; i < KMEM_TABLE_SIZE; i++)
430 INIT_HLIST_HEAD(&kmem_table[i]);
431
432 spin_lock_init(&vmem_lock);
433 INIT_LIST_HEAD(&vmem_list);
434
435 for (i = 0; i < VMEM_TABLE_SIZE; i++)
436 INIT_HLIST_HEAD(&vmem_table[i]);
437
438 atomic64_set(&kmem_cache_alloc_failed, 0);
439 }
440 #endif
441 RETURN(0);
442 }
443
444 #ifdef DEBUG_KMEM
445 static char *
446 sprintf_addr(kmem_debug_t *kd, char *str, int len, int min)
447 {
448 int size = ((len - 1) < kd->kd_size) ? (len - 1) : kd->kd_size;
449 int i, flag = 1;
450
451 ASSERT(str != NULL && len >= 17);
452 memset(str, 0, len);
453
454 /* Check for a fully printable string, and while we are at
455 * it place the printable characters in the passed buffer. */
456 for (i = 0; i < size; i++) {
457 str[i] = ((char *)(kd->kd_addr))[i];
458 if (isprint(str[i])) {
459 continue;
460 } else {
461 /* Minimum number of printable characters found
462 * to make it worthwhile to print this as ascii. */
463 if (i > min)
464 break;
465
466 flag = 0;
467 break;
468 }
469
470 }
471
472 if (!flag) {
473 sprintf(str, "%02x%02x%02x%02x%02x%02x%02x%02x",
474 *((uint8_t *)kd->kd_addr),
475 *((uint8_t *)kd->kd_addr + 2),
476 *((uint8_t *)kd->kd_addr + 4),
477 *((uint8_t *)kd->kd_addr + 6),
478 *((uint8_t *)kd->kd_addr + 8),
479 *((uint8_t *)kd->kd_addr + 10),
480 *((uint8_t *)kd->kd_addr + 12),
481 *((uint8_t *)kd->kd_addr + 14));
482 }
483
484 return str;
485 }
486 #endif /* DEBUG_KMEM */
487
488 void
489 kmem_fini(void)
490 {
491 ENTRY;
492 #ifdef DEBUG_KMEM
493 {
494 unsigned long flags;
495 kmem_debug_t *kd;
496 char str[17];
497
498 /* Display all unreclaimed memory addresses, including the
499 * allocation size and the first few bytes of what's located
500 * at that address to aid in debugging. Performance is not
501 * a serious concern here since it is module unload time. */
502 if (atomic64_read(&kmem_alloc_used) != 0)
503 CWARN("kmem leaked %ld/%ld bytes\n",
504 atomic_read(&kmem_alloc_used), kmem_alloc_max);
505
506 spin_lock_irqsave(&kmem_lock, flags);
507 if (!list_empty(&kmem_list))
508 CDEBUG(D_WARNING, "%-16s %-5s %-16s %s:%s\n",
509 "address", "size", "data", "func", "line");
510
511 list_for_each_entry(kd, &kmem_list, kd_list)
512 CDEBUG(D_WARNING, "%p %-5d %-16s %s:%d\n",
513 kd->kd_addr, kd->kd_size,
514 sprintf_addr(kd, str, 17, 8),
515 kd->kd_func, kd->kd_line);
516
517 spin_unlock_irqrestore(&kmem_lock, flags);
518
519 if (atomic64_read(&vmem_alloc_used) != 0)
520 CWARN("vmem leaked %ld/%ld bytes\n",
521 atomic_read(&vmem_alloc_used), vmem_alloc_max);
522
523 spin_lock_irqsave(&vmem_lock, flags);
524 if (!list_empty(&vmem_list))
525 CDEBUG(D_WARNING, "%-16s %-5s %-16s %s:%s\n",
526 "address", "size", "data", "func", "line");
527
528 list_for_each_entry(kd, &vmem_list, kd_list)
529 CDEBUG(D_WARNING, "%p %-5d %-16s %s:%d\n",
530 kd->kd_addr, kd->kd_size,
531 sprintf_addr(kd, str, 17, 8),
532 kd->kd_func, kd->kd_line);
533
534 spin_unlock_irqrestore(&vmem_lock, flags);
535 }
536 #endif
537 EXIT;
538 }