]> git.proxmox.com Git - mirror_spl-debian.git/blob - module/spl/spl-tsd.c
c63a552741c070de7d1cb1bb73deab478fe058f7
[mirror_spl-debian.git] / module / spl / spl-tsd.c
1 /*****************************************************************************\
2 * Copyright (C) 2010 Lawrence Livermore National Security, LLC.
3 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
4 * Written by Brian Behlendorf <behlendorf1@llnl.gov>.
5 * UCRL-CODE-235197
6 *
7 * This file is part of the SPL, Solaris Porting Layer.
8 * For details, see <http://github.com/behlendorf/spl/>.
9 *
10 * The SPL is free software; you can redistribute it and/or modify it
11 * under the terms of the GNU General Public License as published by the
12 * Free Software Foundation; either version 2 of the License, or (at your
13 * option) any later version.
14 *
15 * The SPL is distributed in the hope that it will be useful, but WITHOUT
16 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 * for more details.
19 *
20 * You should have received a copy of the GNU General Public License along
21 * with the SPL. If not, see <http://www.gnu.org/licenses/>.
22 *****************************************************************************
23 * Solaris Porting Layer (SPL) Thread Specific Data Implementation.
24 *
25 * Thread specific data has implemented using a hash table, this avoids
26 * the need to add a member to the task structure and allows maximum
27 * portability between kernels. This implementation has been optimized
28 * to keep the tsd_set() and tsd_get() times as small as possible.
29 *
30 * The majority of the entries in the hash table are for specific tsd
31 * entries. These entries are hashed by the product of their key and
32 * pid because by design the key and pid are guaranteed to be unique.
33 * Their product also has the desirable properly that it will be uniformly
34 * distributed over the hash bins providing neither the pid nor key is zero.
35 * Under linux the zero pid is always the init process and thus won't be
36 * used, and this implementation is careful to never to assign a zero key.
37 * By default the hash table is sized to 512 bins which is expected to
38 * be sufficient for light to moderate usage of thread specific data.
39 *
40 * The hash table contains two additional type of entries. They first
41 * type is entry is called a 'key' entry and it is added to the hash during
42 * tsd_create(). It is used to store the address of the destructor function
43 * and it is used as an anchor point. All tsd entries which use the same
44 * key will be linked to this entry. This is used during tsd_destory() to
45 * quickly call the destructor function for all tsd associated with the key.
46 * The 'key' entry may be looked up with tsd_hash_search() by passing the
47 * key you wish to lookup and DTOR_PID constant as the pid.
48 *
49 * The second type of entry is called a 'pid' entry and it is added to the
50 * hash the first time a process set a key. The 'pid' entry is also used
51 * as an anchor and all tsd for the process will be linked to it. This
52 * list is using during tsd_exit() to ensure all registered destructors
53 * are run for the process. The 'pid' entry may be looked up with
54 * tsd_hash_search() by passing the PID_KEY constant as the key, and
55 * the process pid. Note that tsd_exit() is called by thread_exit()
56 * so if your using the Solaris thread API you should not need to call
57 * tsd_exit() directly.
58 *
59 \*****************************************************************************/
60
61 #include <sys/kmem.h>
62 #include <sys/thread.h>
63 #include <sys/tsd.h>
64 #include <spl-debug.h>
65
66 #ifdef DEBUG_SUBSYSTEM
67 #undef DEBUG_SUBSYSTEM
68 #endif
69
70 #define DEBUG_SUBSYSTEM SS_TSD
71 #define DEBUG_SUBSYSTEM SS_TSD
72
73 typedef struct tsd_hash_bin {
74 spinlock_t hb_lock;
75 struct hlist_head hb_head;
76 } tsd_hash_bin_t;
77
78 typedef struct tsd_hash_table {
79 spinlock_t ht_lock;
80 uint_t ht_bits;
81 uint_t ht_key;
82 tsd_hash_bin_t *ht_bins;
83 } tsd_hash_table_t;
84
85 typedef struct tsd_hash_entry {
86 uint_t he_key;
87 pid_t he_pid;
88 dtor_func_t he_dtor;
89 void *he_value;
90 struct hlist_node he_list;
91 struct list_head he_key_list;
92 struct list_head he_pid_list;
93 } tsd_hash_entry_t;
94
95 static tsd_hash_table_t *tsd_hash_table = NULL;
96
97
98 /*
99 * tsd_hash_search - searches hash table for tsd_hash_entry
100 * @table: hash table
101 * @key: search key
102 * @pid: search pid
103 */
104 static tsd_hash_entry_t *
105 tsd_hash_search(tsd_hash_table_t *table, uint_t key, pid_t pid)
106 {
107 struct hlist_node *node;
108 tsd_hash_entry_t *entry;
109 tsd_hash_bin_t *bin;
110 ulong_t hash;
111 SENTRY;
112
113 hash = hash_long((ulong_t)key * (ulong_t)pid, table->ht_bits);
114 bin = &table->ht_bins[hash];
115 spin_lock(&bin->hb_lock);
116 hlist_for_each_entry(entry, node, &bin->hb_head, he_list) {
117 if ((entry->he_key == key) && (entry->he_pid == pid)) {
118 spin_unlock(&bin->hb_lock);
119 SRETURN(entry);
120 }
121 }
122
123 spin_unlock(&bin->hb_lock);
124 SRETURN(NULL);
125 }
126
127 /*
128 * tsd_hash_dtor - call the destructor and free all entries on the list
129 * @work: list of hash entries
130 *
131 * For a list of entries which have all already been removed from the
132 * hash call their registered destructor then free the associated memory.
133 */
134 static void
135 tsd_hash_dtor(struct hlist_head *work)
136 {
137 tsd_hash_entry_t *entry;
138 SENTRY;
139
140 while (!hlist_empty(work)) {
141 entry = hlist_entry(work->first, tsd_hash_entry_t, he_list);
142 hlist_del(&entry->he_list);
143
144 if (entry->he_dtor && entry->he_pid != DTOR_PID)
145 entry->he_dtor(entry->he_value);
146
147 kmem_free(entry, sizeof(tsd_hash_entry_t));
148 }
149
150 SEXIT;
151 }
152
153 /*
154 * tsd_hash_add - adds an entry to hash table
155 * @table: hash table
156 * @key: search key
157 * @pid: search pid
158 *
159 * The caller is responsible for ensuring the unique key/pid do not
160 * already exist in the hash table. This possible because all entries
161 * are thread specific thus a concurrent thread will never attempt to
162 * add this key/pid. Because multiple bins must be checked to add
163 * links to the dtor and pid entries the entire table is locked.
164 */
165 static int
166 tsd_hash_add(tsd_hash_table_t *table, uint_t key, pid_t pid, void *value)
167 {
168 tsd_hash_entry_t *entry, *dtor_entry, *pid_entry;
169 tsd_hash_bin_t *bin;
170 ulong_t hash;
171 int rc = 0;
172 SENTRY;
173
174 ASSERT3P(tsd_hash_search(table, key, pid), ==, NULL);
175
176 /* New entry allocate structure, set value, and add to hash */
177 entry = kmem_alloc(sizeof(tsd_hash_entry_t), KM_PUSHPAGE);
178 if (entry == NULL)
179 SRETURN(ENOMEM);
180
181 entry->he_key = key;
182 entry->he_pid = pid;
183 entry->he_value = value;
184 INIT_HLIST_NODE(&entry->he_list);
185 INIT_LIST_HEAD(&entry->he_key_list);
186 INIT_LIST_HEAD(&entry->he_pid_list);
187
188 spin_lock(&table->ht_lock);
189
190 /* Destructor entry must exist for all valid keys */
191 dtor_entry = tsd_hash_search(table, entry->he_key, DTOR_PID);
192 ASSERT3P(dtor_entry, !=, NULL);
193 entry->he_dtor = dtor_entry->he_dtor;
194
195 /* Process entry must exist for all valid processes */
196 pid_entry = tsd_hash_search(table, PID_KEY, entry->he_pid);
197 ASSERT3P(pid_entry, !=, NULL);
198
199 hash = hash_long((ulong_t)key * (ulong_t)pid, table->ht_bits);
200 bin = &table->ht_bins[hash];
201 spin_lock(&bin->hb_lock);
202
203 /* Add to the hash, key, and pid lists */
204 hlist_add_head(&entry->he_list, &bin->hb_head);
205 list_add(&entry->he_key_list, &dtor_entry->he_key_list);
206 list_add(&entry->he_pid_list, &pid_entry->he_pid_list);
207
208 spin_unlock(&bin->hb_lock);
209 spin_unlock(&table->ht_lock);
210
211 SRETURN(rc);
212 }
213
214 /*
215 * tsd_hash_add_key - adds a destructor entry to the hash table
216 * @table: hash table
217 * @keyp: search key
218 * @dtor: key destructor
219 *
220 * For every unique key there is a single entry in the hash which is used
221 * as anchor. All other thread specific entries for this key are linked
222 * to this anchor via the 'he_key_list' list head. On return they keyp
223 * will be set to the next available key for the hash table.
224 */
225 static int
226 tsd_hash_add_key(tsd_hash_table_t *table, uint_t *keyp, dtor_func_t dtor)
227 {
228 tsd_hash_entry_t *tmp_entry, *entry;
229 tsd_hash_bin_t *bin;
230 ulong_t hash;
231 int keys_checked = 0;
232 SENTRY;
233
234 ASSERT3P(table, !=, NULL);
235
236 /* Allocate entry to be used as a destructor for this key */
237 entry = kmem_alloc(sizeof(tsd_hash_entry_t), KM_PUSHPAGE);
238 if (entry == NULL)
239 SRETURN(ENOMEM);
240
241 /* Determine next available key value */
242 spin_lock(&table->ht_lock);
243 do {
244 /* Limited to TSD_KEYS_MAX concurrent unique keys */
245 if (table->ht_key++ > TSD_KEYS_MAX)
246 table->ht_key = 1;
247
248 /* Ensure failure when all TSD_KEYS_MAX keys are in use */
249 if (keys_checked++ >= TSD_KEYS_MAX) {
250 spin_unlock(&table->ht_lock);
251 SRETURN(ENOENT);
252 }
253
254 tmp_entry = tsd_hash_search(table, table->ht_key, DTOR_PID);
255 } while (tmp_entry);
256
257 /* Add destructor entry in to hash table */
258 entry->he_key = *keyp = table->ht_key;
259 entry->he_pid = DTOR_PID;
260 entry->he_dtor = dtor;
261 entry->he_value = NULL;
262 INIT_HLIST_NODE(&entry->he_list);
263 INIT_LIST_HEAD(&entry->he_key_list);
264 INIT_LIST_HEAD(&entry->he_pid_list);
265
266 hash = hash_long((ulong_t)*keyp * (ulong_t)DTOR_PID, table->ht_bits);
267 bin = &table->ht_bins[hash];
268 spin_lock(&bin->hb_lock);
269
270 hlist_add_head(&entry->he_list, &bin->hb_head);
271
272 spin_unlock(&bin->hb_lock);
273 spin_unlock(&table->ht_lock);
274
275 SRETURN(0);
276 }
277
278 /*
279 * tsd_hash_add_pid - adds a process entry to the hash table
280 * @table: hash table
281 * @pid: search pid
282 *
283 * For every process these is a single entry in the hash which is used
284 * as anchor. All other thread specific entries for this process are
285 * linked to this anchor via the 'he_pid_list' list head.
286 */
287 static int
288 tsd_hash_add_pid(tsd_hash_table_t *table, pid_t pid)
289 {
290 tsd_hash_entry_t *entry;
291 tsd_hash_bin_t *bin;
292 ulong_t hash;
293 SENTRY;
294
295 /* Allocate entry to be used as the process reference */
296 entry = kmem_alloc(sizeof(tsd_hash_entry_t), KM_PUSHPAGE);
297 if (entry == NULL)
298 SRETURN(ENOMEM);
299
300 spin_lock(&table->ht_lock);
301 entry->he_key = PID_KEY;
302 entry->he_pid = pid;
303 entry->he_dtor = NULL;
304 entry->he_value = NULL;
305 INIT_HLIST_NODE(&entry->he_list);
306 INIT_LIST_HEAD(&entry->he_key_list);
307 INIT_LIST_HEAD(&entry->he_pid_list);
308
309 hash = hash_long((ulong_t)PID_KEY * (ulong_t)pid, table->ht_bits);
310 bin = &table->ht_bins[hash];
311 spin_lock(&bin->hb_lock);
312
313 hlist_add_head(&entry->he_list, &bin->hb_head);
314
315 spin_unlock(&bin->hb_lock);
316 spin_unlock(&table->ht_lock);
317
318 SRETURN(0);
319 }
320
321 /*
322 * tsd_hash_del - delete an entry from hash table, key, and pid lists
323 * @table: hash table
324 * @key: search key
325 * @pid: search pid
326 */
327 static void
328 tsd_hash_del(tsd_hash_table_t *table, tsd_hash_entry_t *entry)
329 {
330 SENTRY;
331
332 ASSERT(spin_is_locked(&table->ht_lock));
333 hlist_del(&entry->he_list);
334 list_del_init(&entry->he_key_list);
335 list_del_init(&entry->he_pid_list);
336
337 SEXIT;
338 }
339
340 /*
341 * tsd_hash_table_init - allocate a hash table
342 * @bits: hash table size
343 *
344 * A hash table with 2^bits bins will be created, it may not be resized
345 * after the fact and must be free'd with tsd_hash_table_fini().
346 */
347 static tsd_hash_table_t *
348 tsd_hash_table_init(uint_t bits)
349 {
350 tsd_hash_table_t *table;
351 int hash, size = (1 << bits);
352 SENTRY;
353
354 table = kmem_zalloc(sizeof(tsd_hash_table_t), KM_SLEEP);
355 if (table == NULL)
356 SRETURN(NULL);
357
358 table->ht_bins = kmem_zalloc(sizeof(tsd_hash_bin_t) * size,
359 KM_SLEEP | KM_NODEBUG);
360 if (table->ht_bins == NULL) {
361 kmem_free(table, sizeof(tsd_hash_table_t));
362 SRETURN(NULL);
363 }
364
365 for (hash = 0; hash < size; hash++) {
366 spin_lock_init(&table->ht_bins[hash].hb_lock);
367 INIT_HLIST_HEAD(&table->ht_bins[hash].hb_head);
368 }
369
370 spin_lock_init(&table->ht_lock);
371 table->ht_bits = bits;
372 table->ht_key = 1;
373
374 SRETURN(table);
375 }
376
377 /*
378 * tsd_hash_table_fini - free a hash table
379 * @table: hash table
380 *
381 * Free a hash table allocated by tsd_hash_table_init(). If the hash
382 * table is not empty this function will call the proper destructor for
383 * all remaining entries before freeing the memory used by those entries.
384 */
385 static void
386 tsd_hash_table_fini(tsd_hash_table_t *table)
387 {
388 HLIST_HEAD(work);
389 tsd_hash_bin_t *bin;
390 tsd_hash_entry_t *entry;
391 int size, i;
392 SENTRY;
393
394 ASSERT3P(table, !=, NULL);
395 spin_lock(&table->ht_lock);
396 for (i = 0, size = (1 << table->ht_bits); i < size; i++) {
397 bin = &table->ht_bins[i];
398 spin_lock(&bin->hb_lock);
399 while (!hlist_empty(&bin->hb_head)) {
400 entry = hlist_entry(bin->hb_head.first,
401 tsd_hash_entry_t, he_list);
402 tsd_hash_del(table, entry);
403 hlist_add_head(&entry->he_list, &work);
404 }
405 spin_unlock(&bin->hb_lock);
406 }
407 spin_unlock(&table->ht_lock);
408
409 tsd_hash_dtor(&work);
410 kmem_free(table->ht_bins, sizeof(tsd_hash_bin_t)*(1<<table->ht_bits));
411 kmem_free(table, sizeof(tsd_hash_table_t));
412
413 SEXIT;
414 }
415
416 /*
417 * tsd_set - set thread specific data
418 * @key: lookup key
419 * @value: value to set
420 *
421 * Caller must prevent racing tsd_create() or tsd_destroy(), protected
422 * from racing tsd_get() or tsd_set() because it is thread specific.
423 * This function has been optimized to be fast for the update case.
424 * When setting the tsd initially it will be slower due to additional
425 * required locking and potential memory allocations.
426 */
427 int
428 tsd_set(uint_t key, void *value)
429 {
430 tsd_hash_table_t *table;
431 tsd_hash_entry_t *entry;
432 pid_t pid;
433 int rc;
434 SENTRY;
435
436 table = tsd_hash_table;
437 pid = curthread->pid;
438 ASSERT3P(table, !=, NULL);
439
440 if ((key == 0) || (key > TSD_KEYS_MAX))
441 SRETURN(EINVAL);
442
443 /* Entry already exists in hash table update value */
444 entry = tsd_hash_search(table, key, pid);
445 if (entry) {
446 entry->he_value = value;
447 SRETURN(0);
448 }
449
450 /* Add a process entry to the hash if not yet exists */
451 entry = tsd_hash_search(table, PID_KEY, pid);
452 if (entry == NULL) {
453 rc = tsd_hash_add_pid(table, pid);
454 if (rc)
455 SRETURN(rc);
456 }
457
458 rc = tsd_hash_add(table, key, pid, value);
459 SRETURN(rc);
460 }
461 EXPORT_SYMBOL(tsd_set);
462
463 /*
464 * tsd_get - get thread specific data
465 * @key: lookup key
466 *
467 * Caller must prevent racing tsd_create() or tsd_destroy(). This
468 * implementation is designed to be fast and scalable, it does not
469 * lock the entire table only a single hash bin.
470 */
471 void *
472 tsd_get(uint_t key)
473 {
474 tsd_hash_entry_t *entry;
475 SENTRY;
476
477 ASSERT3P(tsd_hash_table, !=, NULL);
478
479 if ((key == 0) || (key > TSD_KEYS_MAX))
480 SRETURN(NULL);
481
482 entry = tsd_hash_search(tsd_hash_table, key, curthread->pid);
483 if (entry == NULL)
484 SRETURN(NULL);
485
486 SRETURN(entry->he_value);
487 }
488 EXPORT_SYMBOL(tsd_get);
489
490 /*
491 * tsd_create - create thread specific data key
492 * @keyp: lookup key address
493 * @dtor: destructor called during tsd_destroy() or tsd_exit()
494 *
495 * Provided key must be set to 0 or it assumed to be already in use.
496 * The dtor is allowed to be NULL in which case no additional cleanup
497 * for the data is performed during tsd_destroy() or tsd_exit().
498 *
499 * Caller must prevent racing tsd_set() or tsd_get(), this function is
500 * safe from racing tsd_create(), tsd_destroy(), and tsd_exit().
501 */
502 void
503 tsd_create(uint_t *keyp, dtor_func_t dtor)
504 {
505 SENTRY;
506
507 ASSERT3P(keyp, !=, NULL);
508 if (*keyp) {
509 SEXIT;
510 return;
511 }
512
513 (void)tsd_hash_add_key(tsd_hash_table, keyp, dtor);
514
515 SEXIT;
516 }
517 EXPORT_SYMBOL(tsd_create);
518
519 /*
520 * tsd_destroy - destroy thread specific data
521 * @keyp: lookup key address
522 *
523 * Destroys the thread specific data on all threads which use this key.
524 *
525 * Caller must prevent racing tsd_set() or tsd_get(), this function is
526 * safe from racing tsd_create(), tsd_destroy(), and tsd_exit().
527 */
528 void
529 tsd_destroy(uint_t *keyp)
530 {
531 HLIST_HEAD(work);
532 tsd_hash_table_t *table;
533 tsd_hash_entry_t *dtor_entry, *entry;
534 tsd_hash_bin_t *dtor_entry_bin, *entry_bin;
535 ulong_t hash;
536 SENTRY;
537
538 table = tsd_hash_table;
539 ASSERT3P(table, !=, NULL);
540
541 spin_lock(&table->ht_lock);
542 dtor_entry = tsd_hash_search(table, *keyp, DTOR_PID);
543 if (dtor_entry == NULL) {
544 spin_unlock(&table->ht_lock);
545 SEXIT;
546 return;
547 }
548
549 /*
550 * All threads which use this key must be linked off of the
551 * DTOR_PID entry. They are removed from the hash table and
552 * linked in to a private working list to be destroyed.
553 */
554 while (!list_empty(&dtor_entry->he_key_list)) {
555 entry = list_entry(dtor_entry->he_key_list.next,
556 tsd_hash_entry_t, he_key_list);
557 ASSERT3U(dtor_entry->he_key, ==, entry->he_key);
558 ASSERT3P(dtor_entry->he_dtor, ==, entry->he_dtor);
559
560 hash = hash_long((ulong_t)entry->he_key *
561 (ulong_t)entry->he_pid, table->ht_bits);
562 entry_bin = &table->ht_bins[hash];
563
564 spin_lock(&entry_bin->hb_lock);
565 tsd_hash_del(table, entry);
566 hlist_add_head(&entry->he_list, &work);
567 spin_unlock(&entry_bin->hb_lock);
568 }
569
570 hash = hash_long((ulong_t)dtor_entry->he_key *
571 (ulong_t)dtor_entry->he_pid, table->ht_bits);
572 dtor_entry_bin = &table->ht_bins[hash];
573
574 spin_lock(&dtor_entry_bin->hb_lock);
575 tsd_hash_del(table, dtor_entry);
576 hlist_add_head(&dtor_entry->he_list, &work);
577 spin_unlock(&dtor_entry_bin->hb_lock);
578 spin_unlock(&table->ht_lock);
579
580 tsd_hash_dtor(&work);
581 *keyp = 0;
582
583 SEXIT;
584 }
585 EXPORT_SYMBOL(tsd_destroy);
586
587 /*
588 * tsd_exit - destroys all thread specific data for this thread
589 *
590 * Destroys all the thread specific data for this thread.
591 *
592 * Caller must prevent racing tsd_set() or tsd_get(), this function is
593 * safe from racing tsd_create(), tsd_destroy(), and tsd_exit().
594 */
595 void
596 tsd_exit(void)
597 {
598 HLIST_HEAD(work);
599 tsd_hash_table_t *table;
600 tsd_hash_entry_t *pid_entry, *entry;
601 tsd_hash_bin_t *pid_entry_bin, *entry_bin;
602 ulong_t hash;
603 SENTRY;
604
605 table = tsd_hash_table;
606 ASSERT3P(table, !=, NULL);
607
608 spin_lock(&table->ht_lock);
609 pid_entry = tsd_hash_search(table, PID_KEY, curthread->pid);
610 if (pid_entry == NULL) {
611 spin_unlock(&table->ht_lock);
612 SEXIT;
613 return;
614 }
615
616 /*
617 * All keys associated with this pid must be linked off of the
618 * PID_KEY entry. They are removed from the hash table and
619 * linked in to a private working list to be destroyed.
620 */
621
622 while (!list_empty(&pid_entry->he_pid_list)) {
623 entry = list_entry(pid_entry->he_pid_list.next,
624 tsd_hash_entry_t, he_pid_list);
625 ASSERT3U(pid_entry->he_pid, ==, entry->he_pid);
626
627 hash = hash_long((ulong_t)entry->he_key *
628 (ulong_t)entry->he_pid, table->ht_bits);
629 entry_bin = &table->ht_bins[hash];
630
631 spin_lock(&entry_bin->hb_lock);
632 tsd_hash_del(table, entry);
633 hlist_add_head(&entry->he_list, &work);
634 spin_unlock(&entry_bin->hb_lock);
635 }
636
637 hash = hash_long((ulong_t)pid_entry->he_key *
638 (ulong_t)pid_entry->he_pid, table->ht_bits);
639 pid_entry_bin = &table->ht_bins[hash];
640
641 spin_lock(&pid_entry_bin->hb_lock);
642 tsd_hash_del(table, pid_entry);
643 hlist_add_head(&pid_entry->he_list, &work);
644 spin_unlock(&pid_entry_bin->hb_lock);
645 spin_unlock(&table->ht_lock);
646
647 tsd_hash_dtor(&work);
648
649 SEXIT;
650 }
651 EXPORT_SYMBOL(tsd_exit);
652
653 int
654 spl_tsd_init(void)
655 {
656 SENTRY;
657
658 tsd_hash_table = tsd_hash_table_init(TSD_HASH_TABLE_BITS_DEFAULT);
659 if (tsd_hash_table == NULL)
660 SRETURN(1);
661
662 SRETURN(0);
663 }
664
665 void
666 spl_tsd_fini(void)
667 {
668 SENTRY;
669 tsd_hash_table_fini(tsd_hash_table);
670 tsd_hash_table = NULL;
671 SEXIT;
672 }