modules/spl/spl-kmem.c

   1 #include <sys/kmem.h>
   2
   3 #ifdef DEBUG_SUBSYSTEM
   4 #undef DEBUG_SUBSYSTEM
   5 #endif
   6
   7 #define DEBUG_SUBSYSTEM S_KMEM
   8
   9 /*
  10  * Memory allocation interfaces
  11  */
  12 #ifdef DEBUG_KMEM
  13 /* Shim layer memory accounting */
  14 atomic64_t kmem_alloc_used;
  15 unsigned long kmem_alloc_max = 0;
  16 atomic64_t vmem_alloc_used;
  17 unsigned long vmem_alloc_max = 0;
  18 int kmem_warning_flag = 1;
  19 atomic64_t kmem_cache_alloc_failed;
  20
  21 spinlock_t kmem_lock;
  22 struct hlist_head kmem_table[KMEM_TABLE_SIZE];
  23 struct list_head kmem_list;
  24
  25 spinlock_t vmem_lock;
  26 struct hlist_head vmem_table[VMEM_TABLE_SIZE];
  27 struct list_head vmem_list;
  28
  29 EXPORT_SYMBOL(kmem_alloc_used);
  30 EXPORT_SYMBOL(kmem_alloc_max);
  31 EXPORT_SYMBOL(vmem_alloc_used);
  32 EXPORT_SYMBOL(vmem_alloc_max);
  33 EXPORT_SYMBOL(kmem_warning_flag);
  34
  35 EXPORT_SYMBOL(kmem_lock);
  36 EXPORT_SYMBOL(kmem_table);
  37 EXPORT_SYMBOL(kmem_list);
  38
  39 EXPORT_SYMBOL(vmem_lock);
  40 EXPORT_SYMBOL(vmem_table);
  41 EXPORT_SYMBOL(vmem_list);
  42
  43 int kmem_set_warning(int flag) { return (kmem_warning_flag = !!flag); }
  44 #else
  45 int kmem_set_warning(int flag) { return 0; }
  46 #endif
  47 EXPORT_SYMBOL(kmem_set_warning);
  48
  49 /*
  50  * Slab allocation interfaces
  51  *
  52  * While the linux slab implementation was inspired by solaris they
  53  * have made some changes to the API which complicates this shim
  54  * layer.  For one thing the same symbol names are used with different
  55  * arguments for the prototypes.  To deal with this we must use the
  56  * preprocessor to re-order arguments.  Happily for us standard C says,
  57  * "Macro's appearing in their own expansion are not reexpanded" so
  58  * this does not result in an infinite recursion.  Additionally the
  59  * function pointers registered by solarias differ from those used
  60  * by linux so a lookup and mapping from linux style callback to a
  61  * solaris style callback is needed.  There is some overhead in this
  62  * operation which isn't horibile but it needs to be kept in mind.
  63  */
  64 #define KCC_MAGIC                0x7a7a7a7a
  65 #define KCC_POISON               0x77
  66
  67 typedef struct kmem_cache_cb {
  68         int                 kcc_magic;
  69         struct list_head    kcc_list;
  70         kmem_cache_t *      kcc_cache;
  71         kmem_constructor_t  kcc_constructor;
  72         kmem_destructor_t   kcc_destructor;
  73         kmem_reclaim_t      kcc_reclaim;
  74         void *              kcc_private;
  75         void *              kcc_vmp;
  76         atomic_t            kcc_ref;
  77 } kmem_cache_cb_t;
  78
  79 static struct rw_semaphore kmem_cache_cb_sem;
  80 static struct list_head kmem_cache_cb_list;
  81 static struct shrinker *kmem_cache_shrinker;
  82
  83 /* Function must be called while holding the kmem_cache_cb_sem
  84  * Because kmem_cache_t is an opaque datatype we're forced to
  85  * match pointers to identify specific cache entires.
  86  */
  87 static kmem_cache_cb_t *
  88 kmem_cache_find_cache_cb(kmem_cache_t *cache)
  89 {
  90         kmem_cache_cb_t *kcc;
  91 #ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
  92         ASSERT(rwsem_is_locked(&kmem_cache_cb_sem));
  93 #endif
  94
  95         list_for_each_entry(kcc, &kmem_cache_cb_list, kcc_list)
  96                 if (cache == kcc->kcc_cache)
  97                         return kcc;
  98
  99         return NULL;
 100 }
 101
 102 static kmem_cache_cb_t *
 103 kmem_cache_add_cache_cb(kmem_cache_t *cache,
 104                         kmem_constructor_t constructor,
 105                         kmem_destructor_t destructor,
 106                         kmem_reclaim_t reclaim,
 107                         void *priv, void *vmp)
 108 {
 109         kmem_cache_cb_t *kcc;
 110
 111         kcc = (kmem_cache_cb_t *)kmalloc(sizeof(*kcc), GFP_KERNEL);
 112         if (kcc) {
 113                 kcc->kcc_magic = KCC_MAGIC;
 114                 kcc->kcc_cache = cache;
 115                 kcc->kcc_constructor = constructor;
 116                 kcc->kcc_destructor = destructor;
 117                 kcc->kcc_reclaim = reclaim;
 118                 kcc->kcc_private = priv;
 119                 kcc->kcc_vmp = vmp;
 120                 atomic_set(&kcc->kcc_ref, 0);
 121                 down_write(&kmem_cache_cb_sem);
 122                 list_add(&kcc->kcc_list, &kmem_cache_cb_list);
 123                 up_write(&kmem_cache_cb_sem);
 124         }
 125
 126         return kcc;
 127 }
 128
 129 static void
 130 kmem_cache_remove_cache_cb(kmem_cache_cb_t *kcc)
 131 {
 132         down_write(&kmem_cache_cb_sem);
 133         ASSERT(atomic_read(&kcc->kcc_ref) == 0);
 134         list_del(&kcc->kcc_list);
 135         up_write(&kmem_cache_cb_sem);
 136
 137         if (kcc){
 138                 memset(kcc, KCC_POISON, sizeof(*kcc));
 139                 kfree(kcc);
 140         }
 141 }
 142
 143 static void
 144 kmem_cache_generic_constructor(void *ptr, kmem_cache_t *cache, unsigned long flags)
 145 {
 146         kmem_cache_cb_t *kcc;
 147         kmem_constructor_t constructor;
 148         void *private;
 149
 150         ASSERT(flags & SLAB_CTOR_CONSTRUCTOR);
 151
 152         /* Ensure constructor verifies are not passed to the registered
 153          * constructors.  This may not be safe due to the Solaris constructor
 154          * not being aware of how to handle the SLAB_CTOR_VERIFY flag
 155          */
 156         if (flags & SLAB_CTOR_VERIFY)
 157                 return;
 158
 159         if (flags & SLAB_CTOR_ATOMIC)
 160                 flags = KM_NOSLEEP;
 161         else
 162                 flags = KM_SLEEP;
 163
 164         /* We can be called with interrupts disabled so it is critical that
 165          * this function and the registered constructor never sleep.
 166          */
 167         while (!down_read_trylock(&kmem_cache_cb_sem));
 168
 169         /* Callback list must be in sync with linux slab caches */
 170         kcc = kmem_cache_find_cache_cb(cache);
 171         ASSERT(kcc);
 172         ASSERT(kcc->kcc_magic == KCC_MAGIC);
 173         atomic_inc(&kcc->kcc_ref);
 174
 175         constructor = kcc->kcc_constructor;
 176         private = kcc->kcc_private;
 177
 178         up_read(&kmem_cache_cb_sem);
 179
 180         if (constructor)
 181                 constructor(ptr, private, (int)flags);
 182
 183         atomic_dec(&kcc->kcc_ref);
 184
 185         /* Linux constructor has no return code, silently eat it */
 186 }
 187
 188 static void
 189 kmem_cache_generic_destructor(void *ptr, kmem_cache_t *cache, unsigned long flags)
 190 {
 191         kmem_cache_cb_t *kcc;
 192         kmem_destructor_t destructor;
 193         void *private;
 194
 195         /* No valid destructor flags */
 196         ASSERT(flags == 0);
 197
 198         /* We can be called with interrupts disabled so it is critical that
 199          * this function and the registered constructor never sleep.
 200          */
 201         while (!down_read_trylock(&kmem_cache_cb_sem));
 202
 203         /* Callback list must be in sync with linux slab caches */
 204         kcc = kmem_cache_find_cache_cb(cache);
 205         ASSERT(kcc);
 206         ASSERT(kcc->kcc_magic == KCC_MAGIC);
 207         atomic_inc(&kcc->kcc_ref);
 208
 209         destructor = kcc->kcc_destructor;
 210         private = kcc->kcc_private;
 211
 212         up_read(&kmem_cache_cb_sem);
 213
 214         /* Solaris destructor takes no flags, silently eat them */
 215         if (destructor)
 216                 destructor(ptr, private);
 217
 218         atomic_dec(&kcc->kcc_ref);
 219 }
 220
 221 /* XXX - Arguments are ignored */
 222 static int
 223 kmem_cache_generic_shrinker(int nr_to_scan, unsigned int gfp_mask)
 224 {
 225         kmem_cache_cb_t *kcc;
 226         int total = 0;
 227
 228         /* Under linux a shrinker is not tightly coupled with a slab
 229          * cache.  In fact linux always systematically trys calling all
 230          * registered shrinker callbacks until its target reclamation level
 231          * is reached.  Because of this we only register one shrinker
 232          * function in the shim layer for all slab caches.  And we always
 233          * attempt to shrink all caches when this generic shrinker is called.
 234          */
 235         down_read(&kmem_cache_cb_sem);
 236
 237         list_for_each_entry(kcc, &kmem_cache_cb_list, kcc_list) {
 238                 ASSERT(kcc);
 239                 ASSERT(kcc->kcc_magic == KCC_MAGIC);
 240
 241                 /* Take a reference on the cache in question.  If that
 242                  * cache is contended simply skip it, it may already be
 243                  * in the process of a reclaim or the ctor/dtor may be
 244                  * running in either case it's best to skip it.
 245                  */
 246                 atomic_inc(&kcc->kcc_ref);
 247                 if (atomic_read(&kcc->kcc_ref) > 1) {
 248                         atomic_dec(&kcc->kcc_ref);
 249                         continue;
 250                 }
 251
 252                 /* Under linux the desired number and gfp type of objects
 253                  * is passed to the reclaiming function as a sugested reclaim
 254                  * target.  I do not pass these args on because reclaim
 255                  * policy is entirely up to the owner under solaris.  We only
 256                  * pass on the pre-registered private data.
 257                  */
 258                 if (kcc->kcc_reclaim)
 259                         kcc->kcc_reclaim(kcc->kcc_private);
 260
 261                 atomic_dec(&kcc->kcc_ref);
 262                 total += 1;
 263         }
 264
 265         /* Under linux we should return the remaining number of entires in
 266          * the cache.  Unfortunately, I don't see an easy way to safely
 267          * emulate this behavior so I'm returning one entry per cache which
 268          * was registered with the generic shrinker.  This should fake out
 269          * the linux VM when it attempts to shrink caches.
 270          */
 271         up_read(&kmem_cache_cb_sem);
 272
 273         return total;
 274 }
 275
 276 /* Ensure the __kmem_cache_create/__kmem_cache_destroy macros are
 277  * removed here to prevent a recursive substitution, we want to call
 278  * the native linux version.
 279  */
 280 #undef kmem_cache_create
 281 #undef kmem_cache_destroy
 282 #undef kmem_cache_alloc
 283
 284 kmem_cache_t *
 285 __kmem_cache_create(char *name, size_t size, size_t align,
 286         kmem_constructor_t constructor,
 287         kmem_destructor_t destructor,
 288         kmem_reclaim_t reclaim,
 289         void *priv, void *vmp, int flags)
 290 {
 291         kmem_cache_t *cache;
 292         kmem_cache_cb_t *kcc;
 293         int shrinker_flag = 0;
 294         char *cache_name;
 295         ENTRY;
 296
 297         /* XXX: - Option currently unsupported by shim layer */
 298         ASSERT(!vmp);
 299         ASSERT(flags == 0);
 300
 301         cache_name = kzalloc(strlen(name) + 1, GFP_KERNEL);
 302         if (cache_name == NULL)
 303                 RETURN(NULL);
 304
 305         strcpy(cache_name, name);
 306         cache = kmem_cache_create(cache_name, size, align, flags,
 307                                   kmem_cache_generic_constructor,
 308                                   kmem_cache_generic_destructor);
 309         if (cache == NULL)
 310                 RETURN(NULL);
 311
 312         /* Register shared shrinker function on initial cache create */
 313         down_read(&kmem_cache_cb_sem);
 314         if (list_empty(&kmem_cache_cb_list)) {
 315                 kmem_cache_shrinker = set_shrinker(KMC_DEFAULT_SEEKS,
 316                                                  kmem_cache_generic_shrinker);
 317                 if (kmem_cache_shrinker == NULL) {
 318                         kmem_cache_destroy(cache);
 319                         up_read(&kmem_cache_cb_sem);
 320                         RETURN(NULL);
 321                 }
 322
 323         }
 324         up_read(&kmem_cache_cb_sem);
 325
 326         kcc = kmem_cache_add_cache_cb(cache, constructor, destructor,
 327                                       reclaim, priv, vmp);
 328         if (kcc == NULL) {
 329                 if (shrinker_flag) /* New shrinker registered must be removed */
 330                         remove_shrinker(kmem_cache_shrinker);
 331
 332                 kmem_cache_destroy(cache);
 333                 RETURN(NULL);
 334         }
 335
 336         RETURN(cache);
 337 }
 338 EXPORT_SYMBOL(__kmem_cache_create);
 339
 340 /* Return code provided despite Solaris's void return.  There should be no
 341  * harm here since the Solaris versions will ignore it anyway. */
 342 int
 343 __kmem_cache_destroy(kmem_cache_t *cache)
 344 {
 345         kmem_cache_cb_t *kcc;
 346         char *name;
 347         int rc;
 348         ENTRY;
 349
 350         down_read(&kmem_cache_cb_sem);
 351         kcc = kmem_cache_find_cache_cb(cache);
 352         if (kcc == NULL) {
 353                 up_read(&kmem_cache_cb_sem);
 354                 RETURN(-EINVAL);
 355         }
 356         atomic_inc(&kcc->kcc_ref);
 357         up_read(&kmem_cache_cb_sem);
 358
 359         name = (char *)kmem_cache_name(cache);
 360         rc = kmem_cache_destroy(cache);
 361
 362         atomic_dec(&kcc->kcc_ref);
 363         kmem_cache_remove_cache_cb(kcc);
 364         kfree(name);
 365
 366         /* Unregister generic shrinker on removal of all caches */
 367         down_read(&kmem_cache_cb_sem);
 368         if (list_empty(&kmem_cache_cb_list))
 369                 remove_shrinker(kmem_cache_shrinker);
 370
 371         up_read(&kmem_cache_cb_sem);
 372         RETURN(rc);
 373 }
 374 EXPORT_SYMBOL(__kmem_cache_destroy);
 375
 376 /* Under Solaris if the KM_SLEEP flag is passed we absolutely must
 377  * sleep until we are allocated the memory.  Under Linux you can still
 378  * get a memory allocation failure, so I'm forced to keep requesting
 379  * the memory even if the system is under substantial memory pressure
 380  * of fragmentation prevents the allocation from succeeded.  This is
 381  * not the correct fix, or even a good one.  But it will do for now.
 382  */
 383 void *
 384 __kmem_cache_alloc(kmem_cache_t *cache, gfp_t flags)
 385 {
 386         void *rc;
 387         ENTRY;
 388
 389 restart:
 390         rc = kmem_cache_alloc(cache, flags);
 391         if ((rc == NULL) && (flags & KM_SLEEP)) {
 392 #ifdef DEBUG_KMEM
 393                 atomic64_inc(&kmem_cache_alloc_failed);
 394 #endif /* DEBUG_KMEM */
 395                 GOTO(restart, rc);
 396         }
 397
 398         RETURN(rc);
 399 }
 400 EXPORT_SYMBOL(__kmem_cache_alloc);
 401
 402 void
 403 __kmem_reap(void)
 404 {
 405         ENTRY;
 406         /* Since there's no easy hook in to linux to force all the registered
 407          * shrinkers to run we just run the ones registered for this shim */
 408         kmem_cache_generic_shrinker(KMC_REAP_CHUNK, GFP_KERNEL);
 409         EXIT;
 410 }
 411 EXPORT_SYMBOL(__kmem_reap);
 412
 413 int
 414 kmem_init(void)
 415 {
 416         ENTRY;
 417
 418         init_rwsem(&kmem_cache_cb_sem);
 419         INIT_LIST_HEAD(&kmem_cache_cb_list);
 420 #ifdef DEBUG_KMEM
 421         {
 422                 int i;
 423                 atomic64_set(&kmem_alloc_used, 0);
 424                 atomic64_set(&vmem_alloc_used, 0);
 425
 426                 spin_lock_init(&kmem_lock);
 427                 INIT_LIST_HEAD(&kmem_list);
 428
 429                 for (i = 0; i < KMEM_TABLE_SIZE; i++)
 430                         INIT_HLIST_HEAD(&kmem_table[i]);
 431
 432                 spin_lock_init(&vmem_lock);
 433                 INIT_LIST_HEAD(&vmem_list);
 434
 435                 for (i = 0; i < VMEM_TABLE_SIZE; i++)
 436                         INIT_HLIST_HEAD(&vmem_table[i]);
 437
 438                 atomic64_set(&kmem_cache_alloc_failed, 0);
 439         }
 440 #endif
 441         RETURN(0);
 442 }
 443
 444 #ifdef DEBUG_KMEM
 445 static char *
 446 sprintf_addr(kmem_debug_t *kd, char *str, int len, int min)
 447 {
 448         int size = ((len - 1) < kd->kd_size) ? (len - 1) : kd->kd_size;
 449         int i, flag = 1;
 450
 451         ASSERT(str != NULL && len >= 17);
 452         memset(str, 0, len);
 453
 454         /* Check for a fully printable string, and while we are at
 455          * it place the printable characters in the passed buffer. */
 456         for (i = 0; i < size; i++) {
 457                 str[i] = ((char *)(kd->kd_addr))[i];
 458                 if (isprint(str[i])) {
 459                         continue;
 460                 } else {
 461                         /* Minimum number of printable characters found
 462                          * to make it worthwhile to print this as ascii. */
 463                         if (i > min)
 464                                 break;
 465
 466                          flag = 0;
 467                          break;
 468                 }
 469
 470         }
 471
 472         if (!flag) {
 473                 sprintf(str, "%02x%02x%02x%02x%02x%02x%02x%02x",
 474                         *((uint8_t *)kd->kd_addr),
 475                         *((uint8_t *)kd->kd_addr + 2),
 476                         *((uint8_t *)kd->kd_addr + 4),
 477                         *((uint8_t *)kd->kd_addr + 6),
 478                         *((uint8_t *)kd->kd_addr + 8),
 479                         *((uint8_t *)kd->kd_addr + 10),
 480                         *((uint8_t *)kd->kd_addr + 12),
 481                         *((uint8_t *)kd->kd_addr + 14));
 482         }
 483
 484         return str;
 485 }
 486 #endif /* DEBUG_KMEM */
 487
 488 void
 489 kmem_fini(void)
 490 {
 491         ENTRY;
 492 #ifdef DEBUG_KMEM
 493         {
 494                 unsigned long flags;
 495                 kmem_debug_t *kd;
 496                 char str[17];
 497
 498                 /* Display all unreclaimed memory addresses, including the
 499                  * allocation size and the first few bytes of what's located
 500                  * at that address to aid in debugging.  Performance is not
 501                  * a serious concern here since it is module unload time. */
 502                 if (atomic64_read(&kmem_alloc_used) != 0)
 503                         CWARN("kmem leaked %ld/%ld bytes\n",
 504                                atomic_read(&kmem_alloc_used), kmem_alloc_max);
 505
 506                 spin_lock_irqsave(&kmem_lock, flags);
 507                 if (!list_empty(&kmem_list))
 508                         CDEBUG(D_WARNING, "%-16s %-5s %-16s %s:%s\n",
 509                                "address", "size", "data", "func", "line");
 510
 511                 list_for_each_entry(kd, &kmem_list, kd_list)
 512                         CDEBUG(D_WARNING, "%p %-5d %-16s %s:%d\n",
 513                                kd->kd_addr, kd->kd_size,
 514                                sprintf_addr(kd, str, 17, 8),
 515                                kd->kd_func, kd->kd_line);
 516
 517                 spin_unlock_irqrestore(&kmem_lock, flags);
 518
 519                 if (atomic64_read(&vmem_alloc_used) != 0)
 520                         CWARN("vmem leaked %ld/%ld bytes\n",
 521                                atomic_read(&vmem_alloc_used), vmem_alloc_max);
 522
 523                 spin_lock_irqsave(&vmem_lock, flags);
 524                 if (!list_empty(&vmem_list))
 525                         CDEBUG(D_WARNING, "%-16s %-5s %-16s %s:%s\n",
 526                                "address", "size", "data", "func", "line");
 527
 528                 list_for_each_entry(kd, &vmem_list, kd_list)
 529                         CDEBUG(D_WARNING, "%p %-5d %-16s %s:%d\n",
 530                                kd->kd_addr, kd->kd_size,
 531                                sprintf_addr(kd, str, 17, 8),
 532                                kd->kd_func, kd->kd_line);
 533
 534                 spin_unlock_irqrestore(&vmem_lock, flags);
 535         }
 536 #endif
 537         EXIT;
 538 }