]> git.proxmox.com Git - mirror_spl-debian.git/blob - modules/spl/spl-kmem.c
f82c1c240e3f5be59dcd6435c09ff48cb2f82bb9
[mirror_spl-debian.git] / modules / spl / spl-kmem.c
1 #include <sys/kmem.h>
2
3 #ifdef DEBUG_SUBSYSTEM
4 #undef DEBUG_SUBSYSTEM
5 #endif
6
7 #define DEBUG_SUBSYSTEM S_KMEM
8
9 /*
10 * Memory allocation interfaces
11 */
12 #ifdef DEBUG_KMEM
13 /* Shim layer memory accounting */
14 atomic64_t kmem_alloc_used;
15 unsigned long kmem_alloc_max = 0;
16 atomic64_t vmem_alloc_used;
17 unsigned long vmem_alloc_max = 0;
18 int kmem_warning_flag = 1;
19 atomic64_t kmem_cache_alloc_failed;
20
21 spinlock_t kmem_lock;
22 struct hlist_head kmem_table[KMEM_TABLE_SIZE];
23 struct list_head kmem_list;
24
25 spinlock_t vmem_lock;
26 struct hlist_head vmem_table[VMEM_TABLE_SIZE];
27 struct list_head vmem_list;
28
29 EXPORT_SYMBOL(kmem_alloc_used);
30 EXPORT_SYMBOL(kmem_alloc_max);
31 EXPORT_SYMBOL(vmem_alloc_used);
32 EXPORT_SYMBOL(vmem_alloc_max);
33 EXPORT_SYMBOL(kmem_warning_flag);
34
35 EXPORT_SYMBOL(kmem_lock);
36 EXPORT_SYMBOL(kmem_table);
37 EXPORT_SYMBOL(kmem_list);
38
39 EXPORT_SYMBOL(vmem_lock);
40 EXPORT_SYMBOL(vmem_table);
41 EXPORT_SYMBOL(vmem_list);
42
43 int kmem_set_warning(int flag) { return (kmem_warning_flag = !!flag); }
44 #else
45 int kmem_set_warning(int flag) { return 0; }
46 #endif
47 EXPORT_SYMBOL(kmem_set_warning);
48
49 /*
50 * Slab allocation interfaces
51 *
52 * While the linux slab implementation was inspired by solaris they
53 * have made some changes to the API which complicates this shim
54 * layer. For one thing the same symbol names are used with different
55 * arguments for the prototypes. To deal with this we must use the
56 * preprocessor to re-order arguments. Happily for us standard C says,
57 * "Macro's appearing in their own expansion are not reexpanded" so
58 * this does not result in an infinite recursion. Additionally the
59 * function pointers registered by solarias differ from those used
60 * by linux so a lookup and mapping from linux style callback to a
61 * solaris style callback is needed. There is some overhead in this
62 * operation which isn't horibile but it needs to be kept in mind.
63 */
64 #define KCC_MAGIC 0x7a7a7a7a
65 #define KCC_POISON 0x77
66
67 typedef struct kmem_cache_cb {
68 int kcc_magic;
69 struct list_head kcc_list;
70 kmem_cache_t * kcc_cache;
71 kmem_constructor_t kcc_constructor;
72 kmem_destructor_t kcc_destructor;
73 kmem_reclaim_t kcc_reclaim;
74 void * kcc_private;
75 void * kcc_vmp;
76 atomic_t kcc_ref;
77 } kmem_cache_cb_t;
78
79 static struct rw_semaphore kmem_cache_cb_sem;
80 static struct list_head kmem_cache_cb_list;
81 static struct shrinker *kmem_cache_shrinker;
82
83 /* Function must be called while holding the kmem_cache_cb_sem
84 * Because kmem_cache_t is an opaque datatype we're forced to
85 * match pointers to identify specific cache entires.
86 */
87 static kmem_cache_cb_t *
88 kmem_cache_find_cache_cb(kmem_cache_t *cache)
89 {
90 kmem_cache_cb_t *kcc;
91 #ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
92 ASSERT(rwsem_is_locked(&kmem_cache_cb_sem));
93 #endif
94
95 list_for_each_entry(kcc, &kmem_cache_cb_list, kcc_list)
96 if (cache == kcc->kcc_cache)
97 return kcc;
98
99 return NULL;
100 }
101
102 static kmem_cache_cb_t *
103 kmem_cache_add_cache_cb(kmem_cache_t *cache,
104 kmem_constructor_t constructor,
105 kmem_destructor_t destructor,
106 kmem_reclaim_t reclaim,
107 void *priv, void *vmp)
108 {
109 kmem_cache_cb_t *kcc;
110
111 kcc = (kmem_cache_cb_t *)kmalloc(sizeof(*kcc), GFP_KERNEL);
112 if (kcc) {
113 kcc->kcc_magic = KCC_MAGIC;
114 kcc->kcc_cache = cache;
115 kcc->kcc_constructor = constructor;
116 kcc->kcc_destructor = destructor;
117 kcc->kcc_reclaim = reclaim;
118 kcc->kcc_private = priv;
119 kcc->kcc_vmp = vmp;
120 atomic_set(&kcc->kcc_ref, 0);
121 down_write(&kmem_cache_cb_sem);
122 list_add(&kcc->kcc_list, &kmem_cache_cb_list);
123 up_write(&kmem_cache_cb_sem);
124 }
125
126 return kcc;
127 }
128
129 static void
130 kmem_cache_remove_cache_cb(kmem_cache_cb_t *kcc)
131 {
132 down_write(&kmem_cache_cb_sem);
133 ASSERT(atomic_read(&kcc->kcc_ref) == 0);
134 list_del(&kcc->kcc_list);
135 up_write(&kmem_cache_cb_sem);
136
137 if (kcc){
138 memset(kcc, KCC_POISON, sizeof(*kcc));
139 kfree(kcc);
140 }
141 }
142
143 static void
144 kmem_cache_generic_constructor(void *ptr, kmem_cache_t *cache, unsigned long flags)
145 {
146 kmem_cache_cb_t *kcc;
147 kmem_constructor_t constructor;
148 void *private;
149
150 /* Ensure constructor verifies are not passed to the registered
151 * constructors. This may not be safe due to the Solaris constructor
152 * not being aware of how to handle the SLAB_CTOR_VERIFY flag
153 */
154 if (flags & SLAB_CTOR_VERIFY)
155 return;
156
157 /* We can be called with interrupts disabled so it is critical that
158 * this function and the registered constructor never sleep.
159 */
160 while (!down_read_trylock(&kmem_cache_cb_sem));
161
162 /* Callback list must be in sync with linux slab caches */
163 kcc = kmem_cache_find_cache_cb(cache);
164 ASSERT(kcc);
165 ASSERT(kcc->kcc_magic == KCC_MAGIC);
166 atomic_inc(&kcc->kcc_ref);
167
168 constructor = kcc->kcc_constructor;
169 private = kcc->kcc_private;
170
171 up_read(&kmem_cache_cb_sem);
172
173 if (constructor)
174 constructor(ptr, private, (int)flags);
175
176 atomic_dec(&kcc->kcc_ref);
177
178 /* Linux constructor has no return code, silently eat it */
179 }
180
181 static void
182 kmem_cache_generic_destructor(void *ptr, kmem_cache_t *cache, unsigned long flags)
183 {
184 kmem_cache_cb_t *kcc;
185 kmem_destructor_t destructor;
186 void *private;
187
188 /* We can be called with interrupts disabled so it is critical that
189 * this function and the registered constructor never sleep.
190 */
191 while (!down_read_trylock(&kmem_cache_cb_sem));
192
193 /* Callback list must be in sync with linux slab caches */
194 kcc = kmem_cache_find_cache_cb(cache);
195 ASSERT(kcc);
196 ASSERT(kcc->kcc_magic == KCC_MAGIC);
197 atomic_inc(&kcc->kcc_ref);
198
199 destructor = kcc->kcc_destructor;
200 private = kcc->kcc_private;
201
202 up_read(&kmem_cache_cb_sem);
203
204 /* Solaris destructor takes no flags, silently eat them */
205 if (destructor)
206 destructor(ptr, private);
207
208 atomic_dec(&kcc->kcc_ref);
209 }
210
211 /* XXX - Arguments are ignored */
212 static int
213 kmem_cache_generic_shrinker(int nr_to_scan, unsigned int gfp_mask)
214 {
215 kmem_cache_cb_t *kcc;
216 int total = 0;
217
218 /* Under linux a shrinker is not tightly coupled with a slab
219 * cache. In fact linux always systematically trys calling all
220 * registered shrinker callbacks until its target reclamation level
221 * is reached. Because of this we only register one shrinker
222 * function in the shim layer for all slab caches. And we always
223 * attempt to shrink all caches when this generic shrinker is called.
224 */
225 down_read(&kmem_cache_cb_sem);
226
227 list_for_each_entry(kcc, &kmem_cache_cb_list, kcc_list) {
228 ASSERT(kcc);
229 ASSERT(kcc->kcc_magic == KCC_MAGIC);
230
231 /* Take a reference on the cache in question. If that
232 * cache is contended simply skip it, it may already be
233 * in the process of a reclaim or the ctor/dtor may be
234 * running in either case it's best to skip it.
235 */
236 atomic_inc(&kcc->kcc_ref);
237 if (atomic_read(&kcc->kcc_ref) > 1) {
238 atomic_dec(&kcc->kcc_ref);
239 continue;
240 }
241
242 /* Under linux the desired number and gfp type of objects
243 * is passed to the reclaiming function as a sugested reclaim
244 * target. I do not pass these args on because reclaim
245 * policy is entirely up to the owner under solaris. We only
246 * pass on the pre-registered private data.
247 */
248 if (kcc->kcc_reclaim)
249 kcc->kcc_reclaim(kcc->kcc_private);
250
251 atomic_dec(&kcc->kcc_ref);
252 total += 1;
253 }
254
255 /* Under linux we should return the remaining number of entires in
256 * the cache. Unfortunately, I don't see an easy way to safely
257 * emulate this behavior so I'm returning one entry per cache which
258 * was registered with the generic shrinker. This should fake out
259 * the linux VM when it attempts to shrink caches.
260 */
261 up_read(&kmem_cache_cb_sem);
262
263 return total;
264 }
265
266 /* Ensure the __kmem_cache_create/__kmem_cache_destroy macros are
267 * removed here to prevent a recursive substitution, we want to call
268 * the native linux version.
269 */
270 #undef kmem_cache_create
271 #undef kmem_cache_destroy
272 #undef kmem_cache_alloc
273
274 kmem_cache_t *
275 __kmem_cache_create(char *name, size_t size, size_t align,
276 kmem_constructor_t constructor,
277 kmem_destructor_t destructor,
278 kmem_reclaim_t reclaim,
279 void *priv, void *vmp, int flags)
280 {
281 kmem_cache_t *cache;
282 kmem_cache_cb_t *kcc;
283 int shrinker_flag = 0;
284 char *cache_name;
285 ENTRY;
286
287 /* XXX: - Option currently unsupported by shim layer */
288 ASSERT(!vmp);
289
290 cache_name = kzalloc(strlen(name) + 1, GFP_KERNEL);
291 if (cache_name == NULL)
292 RETURN(NULL);
293
294 strcpy(cache_name, name);
295 cache = kmem_cache_create(cache_name, size, align, flags,
296 kmem_cache_generic_constructor,
297 kmem_cache_generic_destructor);
298 if (cache == NULL)
299 RETURN(NULL);
300
301 /* Register shared shrinker function on initial cache create */
302 down_read(&kmem_cache_cb_sem);
303 if (list_empty(&kmem_cache_cb_list)) {
304 kmem_cache_shrinker = set_shrinker(KMC_DEFAULT_SEEKS,
305 kmem_cache_generic_shrinker);
306 if (kmem_cache_shrinker == NULL) {
307 kmem_cache_destroy(cache);
308 up_read(&kmem_cache_cb_sem);
309 RETURN(NULL);
310 }
311
312 }
313 up_read(&kmem_cache_cb_sem);
314
315 kcc = kmem_cache_add_cache_cb(cache, constructor, destructor,
316 reclaim, priv, vmp);
317 if (kcc == NULL) {
318 if (shrinker_flag) /* New shrinker registered must be removed */
319 remove_shrinker(kmem_cache_shrinker);
320
321 kmem_cache_destroy(cache);
322 RETURN(NULL);
323 }
324
325 RETURN(cache);
326 }
327 EXPORT_SYMBOL(__kmem_cache_create);
328
329 /* Return code provided despite Solaris's void return. There should be no
330 * harm here since the Solaris versions will ignore it anyway. */
331 int
332 __kmem_cache_destroy(kmem_cache_t *cache)
333 {
334 kmem_cache_cb_t *kcc;
335 char *name;
336 int rc;
337 ENTRY;
338
339 down_read(&kmem_cache_cb_sem);
340 kcc = kmem_cache_find_cache_cb(cache);
341 if (kcc == NULL) {
342 up_read(&kmem_cache_cb_sem);
343 RETURN(-EINVAL);
344 }
345 atomic_inc(&kcc->kcc_ref);
346 up_read(&kmem_cache_cb_sem);
347
348 name = (char *)kmem_cache_name(cache);
349 rc = kmem_cache_destroy(cache);
350
351 atomic_dec(&kcc->kcc_ref);
352 kmem_cache_remove_cache_cb(kcc);
353 kfree(name);
354
355 /* Unregister generic shrinker on removal of all caches */
356 down_read(&kmem_cache_cb_sem);
357 if (list_empty(&kmem_cache_cb_list))
358 remove_shrinker(kmem_cache_shrinker);
359
360 up_read(&kmem_cache_cb_sem);
361 RETURN(rc);
362 }
363 EXPORT_SYMBOL(__kmem_cache_destroy);
364
365 /* Under Solaris if the KM_SLEEP flag is passed we absolutely must
366 * sleep until we are allocated the memory. Under Linux you can still
367 * get a memory allocation failure, so I'm forced to keep requesting
368 * the memory even if the system is under substantial memory pressure
369 * of fragmentation prevents the allocation from succeeded. This is
370 * not the correct fix, or even a good one. But it will do for now.
371 */
372 void *
373 __kmem_cache_alloc(kmem_cache_t *cache, gfp_t flags)
374 {
375 void *rc;
376 ENTRY;
377
378 restart:
379 rc = kmem_cache_alloc(cache, flags);
380 if ((rc == NULL) && (flags & KM_SLEEP)) {
381 #ifdef DEBUG_KMEM
382 atomic64_inc(&kmem_cache_alloc_failed);
383 #endif /* DEBUG_KMEM */
384 GOTO(restart, rc);
385 }
386
387 RETURN(rc);
388 }
389 EXPORT_SYMBOL(__kmem_cache_alloc);
390
391 void
392 __kmem_reap(void)
393 {
394 ENTRY;
395 /* Since there's no easy hook in to linux to force all the registered
396 * shrinkers to run we just run the ones registered for this shim */
397 kmem_cache_generic_shrinker(KMC_REAP_CHUNK, GFP_KERNEL);
398 EXIT;
399 }
400 EXPORT_SYMBOL(__kmem_reap);
401
402 int
403 kmem_init(void)
404 {
405 ENTRY;
406
407 init_rwsem(&kmem_cache_cb_sem);
408 INIT_LIST_HEAD(&kmem_cache_cb_list);
409 #ifdef DEBUG_KMEM
410 {
411 int i;
412 atomic64_set(&kmem_alloc_used, 0);
413 atomic64_set(&vmem_alloc_used, 0);
414
415 spin_lock_init(&kmem_lock);
416 INIT_LIST_HEAD(&kmem_list);
417
418 for (i = 0; i < KMEM_TABLE_SIZE; i++)
419 INIT_HLIST_HEAD(&kmem_table[i]);
420
421 spin_lock_init(&vmem_lock);
422 INIT_LIST_HEAD(&vmem_list);
423
424 for (i = 0; i < VMEM_TABLE_SIZE; i++)
425 INIT_HLIST_HEAD(&vmem_table[i]);
426
427 atomic64_set(&kmem_cache_alloc_failed, 0);
428 }
429 #endif
430 RETURN(0);
431 }
432
433 #ifdef DEBUG_KMEM
434 static char *
435 sprintf_addr(kmem_debug_t *kd, char *str, int len, int min)
436 {
437 int size = ((len - 1) < kd->kd_size) ? (len - 1) : kd->kd_size;
438 int i, flag = 1;
439
440 ASSERT(str != NULL && len >= 17);
441 memset(str, 0, len);
442
443 /* Check for a fully printable string, and while we are at
444 * it place the printable characters in the passed buffer. */
445 for (i = 0; i < size; i++) {
446 str[i] = ((char *)(kd->kd_addr))[i];
447 if (isprint(str[i])) {
448 continue;
449 } else {
450 /* Minimum number of printable characters found
451 * to make it worthwhile to print this as ascii. */
452 if (i > min)
453 break;
454
455 flag = 0;
456 break;
457 }
458
459 }
460
461 if (!flag) {
462 sprintf(str, "%02x%02x%02x%02x%02x%02x%02x%02x",
463 *((uint8_t *)kd->kd_addr),
464 *((uint8_t *)kd->kd_addr + 2),
465 *((uint8_t *)kd->kd_addr + 4),
466 *((uint8_t *)kd->kd_addr + 6),
467 *((uint8_t *)kd->kd_addr + 8),
468 *((uint8_t *)kd->kd_addr + 10),
469 *((uint8_t *)kd->kd_addr + 12),
470 *((uint8_t *)kd->kd_addr + 14));
471 }
472
473 return str;
474 }
475 #endif /* DEBUG_KMEM */
476
477 void
478 kmem_fini(void)
479 {
480 ENTRY;
481 #ifdef DEBUG_KMEM
482 {
483 unsigned long flags;
484 kmem_debug_t *kd;
485 char str[17];
486
487 /* Display all unreclaimed memory addresses, including the
488 * allocation size and the first few bytes of what's located
489 * at that address to aid in debugging. Performance is not
490 * a serious concern here since it is module unload time. */
491 if (atomic64_read(&kmem_alloc_used) != 0)
492 CWARN("kmem leaked %ld/%ld bytes\n",
493 atomic_read(&kmem_alloc_used), kmem_alloc_max);
494
495 spin_lock_irqsave(&kmem_lock, flags);
496 if (!list_empty(&kmem_list))
497 CDEBUG(D_WARNING, "%-16s %-5s %-16s %s:%s\n",
498 "address", "size", "data", "func", "line");
499
500 list_for_each_entry(kd, &kmem_list, kd_list)
501 CDEBUG(D_WARNING, "%p %-5d %-16s %s:%d\n",
502 kd->kd_addr, kd->kd_size,
503 sprintf_addr(kd, str, 17, 8),
504 kd->kd_func, kd->kd_line);
505
506 spin_unlock_irqrestore(&kmem_lock, flags);
507
508 if (atomic64_read(&vmem_alloc_used) != 0)
509 CWARN("vmem leaked %ld/%ld bytes\n",
510 atomic_read(&vmem_alloc_used), vmem_alloc_max);
511
512 spin_lock_irqsave(&vmem_lock, flags);
513 if (!list_empty(&vmem_list))
514 CDEBUG(D_WARNING, "%-16s %-5s %-16s %s:%s\n",
515 "address", "size", "data", "func", "line");
516
517 list_for_each_entry(kd, &vmem_list, kd_list)
518 CDEBUG(D_WARNING, "%p %-5d %-16s %s:%d\n",
519 kd->kd_addr, kd->kd_size,
520 sprintf_addr(kd, str, 17, 8),
521 kd->kd_func, kd->kd_line);
522
523 spin_unlock_irqrestore(&vmem_lock, flags);
524 }
525 #endif
526 EXIT;
527 }