modules/spl/spl-kmem.c

   1 #include <sys/kmem.h>
   2
   3 #ifdef DEBUG_SUBSYSTEM
   4 #undef DEBUG_SUBSYSTEM
   5 #endif
   6
   7 #define DEBUG_SUBSYSTEM S_KMEM
   8
   9 /*
  10  * Memory allocation interfaces
  11  */
  12 #ifdef DEBUG_KMEM
  13 /* Shim layer memory accounting */
  14 atomic64_t kmem_alloc_used;
  15 unsigned long kmem_alloc_max = 0;
  16 atomic64_t vmem_alloc_used;
  17 unsigned long vmem_alloc_max = 0;
  18 int kmem_warning_flag = 1;
  19 atomic64_t kmem_cache_alloc_failed;
  20
  21 spinlock_t kmem_lock;
  22 struct hlist_head kmem_table[KMEM_TABLE_SIZE];
  23 struct list_head kmem_list;
  24
  25 spinlock_t vmem_lock;
  26 struct hlist_head vmem_table[VMEM_TABLE_SIZE];
  27 struct list_head vmem_list;
  28
  29 EXPORT_SYMBOL(kmem_alloc_used);
  30 EXPORT_SYMBOL(kmem_alloc_max);
  31 EXPORT_SYMBOL(vmem_alloc_used);
  32 EXPORT_SYMBOL(vmem_alloc_max);
  33 EXPORT_SYMBOL(kmem_warning_flag);
  34
  35 EXPORT_SYMBOL(kmem_lock);
  36 EXPORT_SYMBOL(kmem_table);
  37 EXPORT_SYMBOL(kmem_list);
  38
  39 EXPORT_SYMBOL(vmem_lock);
  40 EXPORT_SYMBOL(vmem_table);
  41 EXPORT_SYMBOL(vmem_list);
  42
  43 int kmem_set_warning(int flag) { return (kmem_warning_flag = !!flag); }
  44 #else
  45 int kmem_set_warning(int flag) { return 0; }
  46 #endif
  47 EXPORT_SYMBOL(kmem_set_warning);
  48
  49 /*
  50  * Slab allocation interfaces
  51  *
  52  * While the linux slab implementation was inspired by solaris they
  53  * have made some changes to the API which complicates this shim
  54  * layer.  For one thing the same symbol names are used with different
  55  * arguments for the prototypes.  To deal with this we must use the
  56  * preprocessor to re-order arguments.  Happily for us standard C says,
  57  * "Macro's appearing in their own expansion are not reexpanded" so
  58  * this does not result in an infinite recursion.  Additionally the
  59  * function pointers registered by solarias differ from those used
  60  * by linux so a lookup and mapping from linux style callback to a
  61  * solaris style callback is needed.  There is some overhead in this
  62  * operation which isn't horibile but it needs to be kept in mind.
  63  */
  64 #define KCC_MAGIC                0x7a7a7a7a
  65 #define KCC_POISON               0x77
  66
  67 typedef struct kmem_cache_cb {
  68         int                 kcc_magic;
  69         struct list_head    kcc_list;
  70         kmem_cache_t *      kcc_cache;
  71         kmem_constructor_t  kcc_constructor;
  72         kmem_destructor_t   kcc_destructor;
  73         kmem_reclaim_t      kcc_reclaim;
  74         void *              kcc_private;
  75         void *              kcc_vmp;
  76         atomic_t            kcc_ref;
  77 } kmem_cache_cb_t;
  78
  79 static struct rw_semaphore kmem_cache_cb_sem;
  80 static struct list_head kmem_cache_cb_list;
  81 static struct shrinker *kmem_cache_shrinker;
  82
  83 /* Function must be called while holding the kmem_cache_cb_sem
  84  * Because kmem_cache_t is an opaque datatype we're forced to
  85  * match pointers to identify specific cache entires.
  86  */
  87 static kmem_cache_cb_t *
  88 kmem_cache_find_cache_cb(kmem_cache_t *cache)
  89 {
  90         kmem_cache_cb_t *kcc;
  91 #ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
  92         ASSERT(rwsem_is_locked(&kmem_cache_cb_sem));
  93 #endif
  94
  95         list_for_each_entry(kcc, &kmem_cache_cb_list, kcc_list)
  96                 if (cache == kcc->kcc_cache)
  97                         return kcc;
  98
  99         return NULL;
 100 }
 101
 102 static kmem_cache_cb_t *
 103 kmem_cache_add_cache_cb(kmem_cache_t *cache,
 104                         kmem_constructor_t constructor,
 105                         kmem_destructor_t destructor,
 106                         kmem_reclaim_t reclaim,
 107                         void *priv, void *vmp)
 108 {
 109         kmem_cache_cb_t *kcc;
 110
 111         kcc = (kmem_cache_cb_t *)kmalloc(sizeof(*kcc), GFP_KERNEL);
 112         if (kcc) {
 113                 kcc->kcc_magic = KCC_MAGIC;
 114                 kcc->kcc_cache = cache;
 115                 kcc->kcc_constructor = constructor;
 116                 kcc->kcc_destructor = destructor;
 117                 kcc->kcc_reclaim = reclaim;
 118                 kcc->kcc_private = priv;
 119                 kcc->kcc_vmp = vmp;
 120                 atomic_set(&kcc->kcc_ref, 0);
 121                 down_write(&kmem_cache_cb_sem);
 122                 list_add(&kcc->kcc_list, &kmem_cache_cb_list);
 123                 up_write(&kmem_cache_cb_sem);
 124         }
 125
 126         return kcc;
 127 }
 128
 129 static void
 130 kmem_cache_remove_cache_cb(kmem_cache_cb_t *kcc)
 131 {
 132         down_write(&kmem_cache_cb_sem);
 133         ASSERT(atomic_read(&kcc->kcc_ref) == 0);
 134         list_del(&kcc->kcc_list);
 135         up_write(&kmem_cache_cb_sem);
 136
 137         if (kcc){
 138                 memset(kcc, KCC_POISON, sizeof(*kcc));
 139                 kfree(kcc);
 140         }
 141 }
 142
 143 static void
 144 kmem_cache_generic_constructor(void *ptr, kmem_cache_t *cache, unsigned long flags)
 145 {
 146         kmem_cache_cb_t *kcc;
 147         kmem_constructor_t constructor;
 148         void *private;
 149
 150         /* Ensure constructor verifies are not passed to the registered
 151          * constructors.  This may not be safe due to the Solaris constructor
 152          * not being aware of how to handle the SLAB_CTOR_VERIFY flag
 153          */
 154         if (flags & SLAB_CTOR_VERIFY)
 155                 return;
 156
 157         /* We can be called with interrupts disabled so it is critical that
 158          * this function and the registered constructor never sleep.
 159          */
 160         while (!down_read_trylock(&kmem_cache_cb_sem));
 161
 162         /* Callback list must be in sync with linux slab caches */
 163         kcc = kmem_cache_find_cache_cb(cache);
 164         ASSERT(kcc);
 165         ASSERT(kcc->kcc_magic == KCC_MAGIC);
 166         atomic_inc(&kcc->kcc_ref);
 167
 168         constructor = kcc->kcc_constructor;
 169         private = kcc->kcc_private;
 170
 171         up_read(&kmem_cache_cb_sem);
 172
 173         if (constructor)
 174                 constructor(ptr, private, (int)flags);
 175
 176         atomic_dec(&kcc->kcc_ref);
 177
 178         /* Linux constructor has no return code, silently eat it */
 179 }
 180
 181 static void
 182 kmem_cache_generic_destructor(void *ptr, kmem_cache_t *cache, unsigned long flags)
 183 {
 184         kmem_cache_cb_t *kcc;
 185         kmem_destructor_t destructor;
 186         void *private;
 187
 188         /* We can be called with interrupts disabled so it is critical that
 189          * this function and the registered constructor never sleep.
 190          */
 191         while (!down_read_trylock(&kmem_cache_cb_sem));
 192
 193         /* Callback list must be in sync with linux slab caches */
 194         kcc = kmem_cache_find_cache_cb(cache);
 195         ASSERT(kcc);
 196         ASSERT(kcc->kcc_magic == KCC_MAGIC);
 197         atomic_inc(&kcc->kcc_ref);
 198
 199         destructor = kcc->kcc_destructor;
 200         private = kcc->kcc_private;
 201
 202         up_read(&kmem_cache_cb_sem);
 203
 204         /* Solaris destructor takes no flags, silently eat them */
 205         if (destructor)
 206                 destructor(ptr, private);
 207
 208         atomic_dec(&kcc->kcc_ref);
 209 }
 210
 211 /* XXX - Arguments are ignored */
 212 static int
 213 kmem_cache_generic_shrinker(int nr_to_scan, unsigned int gfp_mask)
 214 {
 215         kmem_cache_cb_t *kcc;
 216         int total = 0;
 217
 218         /* Under linux a shrinker is not tightly coupled with a slab
 219          * cache.  In fact linux always systematically trys calling all
 220          * registered shrinker callbacks until its target reclamation level
 221          * is reached.  Because of this we only register one shrinker
 222          * function in the shim layer for all slab caches.  And we always
 223          * attempt to shrink all caches when this generic shrinker is called.
 224          */
 225         down_read(&kmem_cache_cb_sem);
 226
 227         list_for_each_entry(kcc, &kmem_cache_cb_list, kcc_list) {
 228                 ASSERT(kcc);
 229                 ASSERT(kcc->kcc_magic == KCC_MAGIC);
 230
 231                 /* Take a reference on the cache in question.  If that
 232                  * cache is contended simply skip it, it may already be
 233                  * in the process of a reclaim or the ctor/dtor may be
 234                  * running in either case it's best to skip it.
 235                  */
 236                 atomic_inc(&kcc->kcc_ref);
 237                 if (atomic_read(&kcc->kcc_ref) > 1) {
 238                         atomic_dec(&kcc->kcc_ref);
 239                         continue;
 240                 }
 241
 242                 /* Under linux the desired number and gfp type of objects
 243                  * is passed to the reclaiming function as a sugested reclaim
 244                  * target.  I do not pass these args on because reclaim
 245                  * policy is entirely up to the owner under solaris.  We only
 246                  * pass on the pre-registered private data.
 247                  */
 248                 if (kcc->kcc_reclaim)
 249                         kcc->kcc_reclaim(kcc->kcc_private);
 250
 251                 atomic_dec(&kcc->kcc_ref);
 252                 total += 1;
 253         }
 254
 255         /* Under linux we should return the remaining number of entires in
 256          * the cache.  Unfortunately, I don't see an easy way to safely
 257          * emulate this behavior so I'm returning one entry per cache which
 258          * was registered with the generic shrinker.  This should fake out
 259          * the linux VM when it attempts to shrink caches.
 260          */
 261         up_read(&kmem_cache_cb_sem);
 262
 263         return total;
 264 }
 265
 266 /* Ensure the __kmem_cache_create/__kmem_cache_destroy macros are
 267  * removed here to prevent a recursive substitution, we want to call
 268  * the native linux version.
 269  */
 270 #undef kmem_cache_create
 271 #undef kmem_cache_destroy
 272 #undef kmem_cache_alloc
 273
 274 kmem_cache_t *
 275 __kmem_cache_create(char *name, size_t size, size_t align,
 276         kmem_constructor_t constructor,
 277         kmem_destructor_t destructor,
 278         kmem_reclaim_t reclaim,
 279         void *priv, void *vmp, int flags)
 280 {
 281         kmem_cache_t *cache;
 282         kmem_cache_cb_t *kcc;
 283         int shrinker_flag = 0;
 284         char *cache_name;
 285         ENTRY;
 286
 287         /* XXX: - Option currently unsupported by shim layer */
 288         ASSERT(!vmp);
 289
 290         cache_name = kzalloc(strlen(name) + 1, GFP_KERNEL);
 291         if (cache_name == NULL)
 292                 RETURN(NULL);
 293
 294         strcpy(cache_name, name);
 295         cache = kmem_cache_create(cache_name, size, align, flags,
 296                                   kmem_cache_generic_constructor,
 297                                   kmem_cache_generic_destructor);
 298         if (cache == NULL)
 299                 RETURN(NULL);
 300
 301         /* Register shared shrinker function on initial cache create */
 302         down_read(&kmem_cache_cb_sem);
 303         if (list_empty(&kmem_cache_cb_list)) {
 304                 kmem_cache_shrinker = set_shrinker(KMC_DEFAULT_SEEKS,
 305                                                  kmem_cache_generic_shrinker);
 306                 if (kmem_cache_shrinker == NULL) {
 307                         kmem_cache_destroy(cache);
 308                         up_read(&kmem_cache_cb_sem);
 309                         RETURN(NULL);
 310                 }
 311
 312         }
 313         up_read(&kmem_cache_cb_sem);
 314
 315         kcc = kmem_cache_add_cache_cb(cache, constructor, destructor,
 316                                       reclaim, priv, vmp);
 317         if (kcc == NULL) {
 318                 if (shrinker_flag) /* New shrinker registered must be removed */
 319                         remove_shrinker(kmem_cache_shrinker);
 320
 321                 kmem_cache_destroy(cache);
 322                 RETURN(NULL);
 323         }
 324
 325         RETURN(cache);
 326 }
 327 EXPORT_SYMBOL(__kmem_cache_create);
 328
 329 /* Return code provided despite Solaris's void return.  There should be no
 330  * harm here since the Solaris versions will ignore it anyway. */
 331 int
 332 __kmem_cache_destroy(kmem_cache_t *cache)
 333 {
 334         kmem_cache_cb_t *kcc;
 335         char *name;
 336         int rc;
 337         ENTRY;
 338
 339         down_read(&kmem_cache_cb_sem);
 340         kcc = kmem_cache_find_cache_cb(cache);
 341         if (kcc == NULL) {
 342                 up_read(&kmem_cache_cb_sem);
 343                 RETURN(-EINVAL);
 344         }
 345         atomic_inc(&kcc->kcc_ref);
 346         up_read(&kmem_cache_cb_sem);
 347
 348         name = (char *)kmem_cache_name(cache);
 349         rc = kmem_cache_destroy(cache);
 350
 351         atomic_dec(&kcc->kcc_ref);
 352         kmem_cache_remove_cache_cb(kcc);
 353         kfree(name);
 354
 355         /* Unregister generic shrinker on removal of all caches */
 356         down_read(&kmem_cache_cb_sem);
 357         if (list_empty(&kmem_cache_cb_list))
 358                 remove_shrinker(kmem_cache_shrinker);
 359
 360         up_read(&kmem_cache_cb_sem);
 361         RETURN(rc);
 362 }
 363 EXPORT_SYMBOL(__kmem_cache_destroy);
 364
 365 /* Under Solaris if the KM_SLEEP flag is passed we absolutely must
 366  * sleep until we are allocated the memory.  Under Linux you can still
 367  * get a memory allocation failure, so I'm forced to keep requesting
 368  * the memory even if the system is under substantial memory pressure
 369  * of fragmentation prevents the allocation from succeeded.  This is
 370  * not the correct fix, or even a good one.  But it will do for now.
 371  */
 372 void *
 373 __kmem_cache_alloc(kmem_cache_t *cache, gfp_t flags)
 374 {
 375         void *rc;
 376         ENTRY;
 377
 378 restart:
 379         rc = kmem_cache_alloc(cache, flags);
 380         if ((rc == NULL) && (flags & KM_SLEEP)) {
 381 #ifdef DEBUG_KMEM
 382                 atomic64_inc(&kmem_cache_alloc_failed);
 383 #endif /* DEBUG_KMEM */
 384                 GOTO(restart, rc);
 385         }
 386
 387         RETURN(rc);
 388 }
 389 EXPORT_SYMBOL(__kmem_cache_alloc);
 390
 391 void
 392 __kmem_reap(void)
 393 {
 394         ENTRY;
 395         /* Since there's no easy hook in to linux to force all the registered
 396          * shrinkers to run we just run the ones registered for this shim */
 397         kmem_cache_generic_shrinker(KMC_REAP_CHUNK, GFP_KERNEL);
 398         EXIT;
 399 }
 400 EXPORT_SYMBOL(__kmem_reap);
 401
 402 int
 403 kmem_init(void)
 404 {
 405         ENTRY;
 406
 407         init_rwsem(&kmem_cache_cb_sem);
 408         INIT_LIST_HEAD(&kmem_cache_cb_list);
 409 #ifdef DEBUG_KMEM
 410         {
 411                 int i;
 412                 atomic64_set(&kmem_alloc_used, 0);
 413                 atomic64_set(&vmem_alloc_used, 0);
 414
 415                 spin_lock_init(&kmem_lock);
 416                 INIT_LIST_HEAD(&kmem_list);
 417
 418                 for (i = 0; i < KMEM_TABLE_SIZE; i++)
 419                         INIT_HLIST_HEAD(&kmem_table[i]);
 420
 421                 spin_lock_init(&vmem_lock);
 422                 INIT_LIST_HEAD(&vmem_list);
 423
 424                 for (i = 0; i < VMEM_TABLE_SIZE; i++)
 425                         INIT_HLIST_HEAD(&vmem_table[i]);
 426
 427                 atomic64_set(&kmem_cache_alloc_failed, 0);
 428         }
 429 #endif
 430         RETURN(0);
 431 }
 432
 433 #ifdef DEBUG_KMEM
 434 static char *
 435 sprintf_addr(kmem_debug_t *kd, char *str, int len, int min)
 436 {
 437         int size = ((len - 1) < kd->kd_size) ? (len - 1) : kd->kd_size;
 438         int i, flag = 1;
 439
 440         ASSERT(str != NULL && len >= 17);
 441         memset(str, 0, len);
 442
 443         /* Check for a fully printable string, and while we are at
 444          * it place the printable characters in the passed buffer. */
 445         for (i = 0; i < size; i++) {
 446                 str[i] = ((char *)(kd->kd_addr))[i];
 447                 if (isprint(str[i])) {
 448                         continue;
 449                 } else {
 450                         /* Minimum number of printable characters found
 451                          * to make it worthwhile to print this as ascii. */
 452                         if (i > min)
 453                                 break;
 454
 455                          flag = 0;
 456                          break;
 457                 }
 458
 459         }
 460
 461         if (!flag) {
 462                 sprintf(str, "%02x%02x%02x%02x%02x%02x%02x%02x",
 463                         *((uint8_t *)kd->kd_addr),
 464                         *((uint8_t *)kd->kd_addr + 2),
 465                         *((uint8_t *)kd->kd_addr + 4),
 466                         *((uint8_t *)kd->kd_addr + 6),
 467                         *((uint8_t *)kd->kd_addr + 8),
 468                         *((uint8_t *)kd->kd_addr + 10),
 469                         *((uint8_t *)kd->kd_addr + 12),
 470                         *((uint8_t *)kd->kd_addr + 14));
 471         }
 472
 473         return str;
 474 }
 475 #endif /* DEBUG_KMEM */
 476
 477 void
 478 kmem_fini(void)
 479 {
 480         ENTRY;
 481 #ifdef DEBUG_KMEM
 482         {
 483                 unsigned long flags;
 484                 kmem_debug_t *kd;
 485                 char str[17];
 486
 487                 /* Display all unreclaimed memory addresses, including the
 488                  * allocation size and the first few bytes of what's located
 489                  * at that address to aid in debugging.  Performance is not
 490                  * a serious concern here since it is module unload time. */
 491                 if (atomic64_read(&kmem_alloc_used) != 0)
 492                         CWARN("kmem leaked %ld/%ld bytes\n",
 493                                atomic_read(&kmem_alloc_used), kmem_alloc_max);
 494
 495                 spin_lock_irqsave(&kmem_lock, flags);
 496                 if (!list_empty(&kmem_list))
 497                         CDEBUG(D_WARNING, "%-16s %-5s %-16s %s:%s\n",
 498                                "address", "size", "data", "func", "line");
 499
 500                 list_for_each_entry(kd, &kmem_list, kd_list)
 501                         CDEBUG(D_WARNING, "%p %-5d %-16s %s:%d\n",
 502                                kd->kd_addr, kd->kd_size,
 503                                sprintf_addr(kd, str, 17, 8),
 504                                kd->kd_func, kd->kd_line);
 505
 506                 spin_unlock_irqrestore(&kmem_lock, flags);
 507
 508                 if (atomic64_read(&vmem_alloc_used) != 0)
 509                         CWARN("vmem leaked %ld/%ld bytes\n",
 510                                atomic_read(&vmem_alloc_used), vmem_alloc_max);
 511
 512                 spin_lock_irqsave(&vmem_lock, flags);
 513                 if (!list_empty(&vmem_list))
 514                         CDEBUG(D_WARNING, "%-16s %-5s %-16s %s:%s\n",
 515                                "address", "size", "data", "func", "line");
 516
 517                 list_for_each_entry(kd, &vmem_list, kd_list)
 518                         CDEBUG(D_WARNING, "%p %-5d %-16s %s:%d\n",
 519                                kd->kd_addr, kd->kd_size,
 520                                sprintf_addr(kd, str, 17, 8),
 521                                kd->kd_func, kd->kd_line);
 522
 523                 spin_unlock_irqrestore(&vmem_lock, flags);
 524         }
 525 #endif
 526         EXIT;
 527 }