]> git.proxmox.com Git - mirror_spl-debian.git/blob - module/spl/spl-tsd.c
Merge branch 'linux-3.9'
[mirror_spl-debian.git] / module / spl / spl-tsd.c
1 /*****************************************************************************\
2 * Copyright (C) 2010 Lawrence Livermore National Security, LLC.
3 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
4 * Written by Brian Behlendorf <behlendorf1@llnl.gov>.
5 * UCRL-CODE-235197
6 *
7 * This file is part of the SPL, Solaris Porting Layer.
8 * For details, see <http://zfsonlinux.org/>.
9 *
10 * The SPL is free software; you can redistribute it and/or modify it
11 * under the terms of the GNU General Public License as published by the
12 * Free Software Foundation; either version 2 of the License, or (at your
13 * option) any later version.
14 *
15 * The SPL is distributed in the hope that it will be useful, but WITHOUT
16 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 * for more details.
19 *
20 * You should have received a copy of the GNU General Public License along
21 * with the SPL. If not, see <http://www.gnu.org/licenses/>.
22 *****************************************************************************
23 * Solaris Porting Layer (SPL) Thread Specific Data Implementation.
24 *
25 * Thread specific data has implemented using a hash table, this avoids
26 * the need to add a member to the task structure and allows maximum
27 * portability between kernels. This implementation has been optimized
28 * to keep the tsd_set() and tsd_get() times as small as possible.
29 *
30 * The majority of the entries in the hash table are for specific tsd
31 * entries. These entries are hashed by the product of their key and
32 * pid because by design the key and pid are guaranteed to be unique.
33 * Their product also has the desirable properly that it will be uniformly
34 * distributed over the hash bins providing neither the pid nor key is zero.
35 * Under linux the zero pid is always the init process and thus won't be
36 * used, and this implementation is careful to never to assign a zero key.
37 * By default the hash table is sized to 512 bins which is expected to
38 * be sufficient for light to moderate usage of thread specific data.
39 *
40 * The hash table contains two additional type of entries. They first
41 * type is entry is called a 'key' entry and it is added to the hash during
42 * tsd_create(). It is used to store the address of the destructor function
43 * and it is used as an anchor point. All tsd entries which use the same
44 * key will be linked to this entry. This is used during tsd_destory() to
45 * quickly call the destructor function for all tsd associated with the key.
46 * The 'key' entry may be looked up with tsd_hash_search() by passing the
47 * key you wish to lookup and DTOR_PID constant as the pid.
48 *
49 * The second type of entry is called a 'pid' entry and it is added to the
50 * hash the first time a process set a key. The 'pid' entry is also used
51 * as an anchor and all tsd for the process will be linked to it. This
52 * list is using during tsd_exit() to ensure all registered destructors
53 * are run for the process. The 'pid' entry may be looked up with
54 * tsd_hash_search() by passing the PID_KEY constant as the key, and
55 * the process pid. Note that tsd_exit() is called by thread_exit()
56 * so if your using the Solaris thread API you should not need to call
57 * tsd_exit() directly.
58 *
59 \*****************************************************************************/
60
61 #include <sys/kmem.h>
62 #include <sys/thread.h>
63 #include <sys/tsd.h>
64 #include <spl-debug.h>
65
66 #ifdef DEBUG_SUBSYSTEM
67 #undef DEBUG_SUBSYSTEM
68 #endif
69
70 #define DEBUG_SUBSYSTEM SS_TSD
71 #define DEBUG_SUBSYSTEM SS_TSD
72
73 typedef struct tsd_hash_bin {
74 spinlock_t hb_lock;
75 struct hlist_head hb_head;
76 } tsd_hash_bin_t;
77
78 typedef struct tsd_hash_table {
79 spinlock_t ht_lock;
80 uint_t ht_bits;
81 uint_t ht_key;
82 tsd_hash_bin_t *ht_bins;
83 } tsd_hash_table_t;
84
85 typedef struct tsd_hash_entry {
86 uint_t he_key;
87 pid_t he_pid;
88 dtor_func_t he_dtor;
89 void *he_value;
90 struct hlist_node he_list;
91 struct list_head he_key_list;
92 struct list_head he_pid_list;
93 } tsd_hash_entry_t;
94
95 static tsd_hash_table_t *tsd_hash_table = NULL;
96
97
98 /*
99 * tsd_hash_search - searches hash table for tsd_hash_entry
100 * @table: hash table
101 * @key: search key
102 * @pid: search pid
103 */
104 static tsd_hash_entry_t *
105 tsd_hash_search(tsd_hash_table_t *table, uint_t key, pid_t pid)
106 {
107 struct hlist_node *node;
108 tsd_hash_entry_t *entry;
109 tsd_hash_bin_t *bin;
110 ulong_t hash;
111 SENTRY;
112
113 hash = hash_long((ulong_t)key * (ulong_t)pid, table->ht_bits);
114 bin = &table->ht_bins[hash];
115 spin_lock(&bin->hb_lock);
116 hlist_for_each(node, &bin->hb_head) {
117 entry = list_entry(node, tsd_hash_entry_t, he_list);
118 if ((entry->he_key == key) && (entry->he_pid == pid)) {
119 spin_unlock(&bin->hb_lock);
120 SRETURN(entry);
121 }
122 }
123
124 spin_unlock(&bin->hb_lock);
125 SRETURN(NULL);
126 }
127
128 /*
129 * tsd_hash_dtor - call the destructor and free all entries on the list
130 * @work: list of hash entries
131 *
132 * For a list of entries which have all already been removed from the
133 * hash call their registered destructor then free the associated memory.
134 */
135 static void
136 tsd_hash_dtor(struct hlist_head *work)
137 {
138 tsd_hash_entry_t *entry;
139 SENTRY;
140
141 while (!hlist_empty(work)) {
142 entry = hlist_entry(work->first, tsd_hash_entry_t, he_list);
143 hlist_del(&entry->he_list);
144
145 if (entry->he_dtor && entry->he_pid != DTOR_PID)
146 entry->he_dtor(entry->he_value);
147
148 kmem_free(entry, sizeof(tsd_hash_entry_t));
149 }
150
151 SEXIT;
152 }
153
154 /*
155 * tsd_hash_add - adds an entry to hash table
156 * @table: hash table
157 * @key: search key
158 * @pid: search pid
159 *
160 * The caller is responsible for ensuring the unique key/pid do not
161 * already exist in the hash table. This possible because all entries
162 * are thread specific thus a concurrent thread will never attempt to
163 * add this key/pid. Because multiple bins must be checked to add
164 * links to the dtor and pid entries the entire table is locked.
165 */
166 static int
167 tsd_hash_add(tsd_hash_table_t *table, uint_t key, pid_t pid, void *value)
168 {
169 tsd_hash_entry_t *entry, *dtor_entry, *pid_entry;
170 tsd_hash_bin_t *bin;
171 ulong_t hash;
172 int rc = 0;
173 SENTRY;
174
175 ASSERT3P(tsd_hash_search(table, key, pid), ==, NULL);
176
177 /* New entry allocate structure, set value, and add to hash */
178 entry = kmem_alloc(sizeof(tsd_hash_entry_t), KM_PUSHPAGE);
179 if (entry == NULL)
180 SRETURN(ENOMEM);
181
182 entry->he_key = key;
183 entry->he_pid = pid;
184 entry->he_value = value;
185 INIT_HLIST_NODE(&entry->he_list);
186 INIT_LIST_HEAD(&entry->he_key_list);
187 INIT_LIST_HEAD(&entry->he_pid_list);
188
189 spin_lock(&table->ht_lock);
190
191 /* Destructor entry must exist for all valid keys */
192 dtor_entry = tsd_hash_search(table, entry->he_key, DTOR_PID);
193 ASSERT3P(dtor_entry, !=, NULL);
194 entry->he_dtor = dtor_entry->he_dtor;
195
196 /* Process entry must exist for all valid processes */
197 pid_entry = tsd_hash_search(table, PID_KEY, entry->he_pid);
198 ASSERT3P(pid_entry, !=, NULL);
199
200 hash = hash_long((ulong_t)key * (ulong_t)pid, table->ht_bits);
201 bin = &table->ht_bins[hash];
202 spin_lock(&bin->hb_lock);
203
204 /* Add to the hash, key, and pid lists */
205 hlist_add_head(&entry->he_list, &bin->hb_head);
206 list_add(&entry->he_key_list, &dtor_entry->he_key_list);
207 list_add(&entry->he_pid_list, &pid_entry->he_pid_list);
208
209 spin_unlock(&bin->hb_lock);
210 spin_unlock(&table->ht_lock);
211
212 SRETURN(rc);
213 }
214
215 /*
216 * tsd_hash_add_key - adds a destructor entry to the hash table
217 * @table: hash table
218 * @keyp: search key
219 * @dtor: key destructor
220 *
221 * For every unique key there is a single entry in the hash which is used
222 * as anchor. All other thread specific entries for this key are linked
223 * to this anchor via the 'he_key_list' list head. On return they keyp
224 * will be set to the next available key for the hash table.
225 */
226 static int
227 tsd_hash_add_key(tsd_hash_table_t *table, uint_t *keyp, dtor_func_t dtor)
228 {
229 tsd_hash_entry_t *tmp_entry, *entry;
230 tsd_hash_bin_t *bin;
231 ulong_t hash;
232 int keys_checked = 0;
233 SENTRY;
234
235 ASSERT3P(table, !=, NULL);
236
237 /* Allocate entry to be used as a destructor for this key */
238 entry = kmem_alloc(sizeof(tsd_hash_entry_t), KM_PUSHPAGE);
239 if (entry == NULL)
240 SRETURN(ENOMEM);
241
242 /* Determine next available key value */
243 spin_lock(&table->ht_lock);
244 do {
245 /* Limited to TSD_KEYS_MAX concurrent unique keys */
246 if (table->ht_key++ > TSD_KEYS_MAX)
247 table->ht_key = 1;
248
249 /* Ensure failure when all TSD_KEYS_MAX keys are in use */
250 if (keys_checked++ >= TSD_KEYS_MAX) {
251 spin_unlock(&table->ht_lock);
252 SRETURN(ENOENT);
253 }
254
255 tmp_entry = tsd_hash_search(table, table->ht_key, DTOR_PID);
256 } while (tmp_entry);
257
258 /* Add destructor entry in to hash table */
259 entry->he_key = *keyp = table->ht_key;
260 entry->he_pid = DTOR_PID;
261 entry->he_dtor = dtor;
262 entry->he_value = NULL;
263 INIT_HLIST_NODE(&entry->he_list);
264 INIT_LIST_HEAD(&entry->he_key_list);
265 INIT_LIST_HEAD(&entry->he_pid_list);
266
267 hash = hash_long((ulong_t)*keyp * (ulong_t)DTOR_PID, table->ht_bits);
268 bin = &table->ht_bins[hash];
269 spin_lock(&bin->hb_lock);
270
271 hlist_add_head(&entry->he_list, &bin->hb_head);
272
273 spin_unlock(&bin->hb_lock);
274 spin_unlock(&table->ht_lock);
275
276 SRETURN(0);
277 }
278
279 /*
280 * tsd_hash_add_pid - adds a process entry to the hash table
281 * @table: hash table
282 * @pid: search pid
283 *
284 * For every process these is a single entry in the hash which is used
285 * as anchor. All other thread specific entries for this process are
286 * linked to this anchor via the 'he_pid_list' list head.
287 */
288 static int
289 tsd_hash_add_pid(tsd_hash_table_t *table, pid_t pid)
290 {
291 tsd_hash_entry_t *entry;
292 tsd_hash_bin_t *bin;
293 ulong_t hash;
294 SENTRY;
295
296 /* Allocate entry to be used as the process reference */
297 entry = kmem_alloc(sizeof(tsd_hash_entry_t), KM_PUSHPAGE);
298 if (entry == NULL)
299 SRETURN(ENOMEM);
300
301 spin_lock(&table->ht_lock);
302 entry->he_key = PID_KEY;
303 entry->he_pid = pid;
304 entry->he_dtor = NULL;
305 entry->he_value = NULL;
306 INIT_HLIST_NODE(&entry->he_list);
307 INIT_LIST_HEAD(&entry->he_key_list);
308 INIT_LIST_HEAD(&entry->he_pid_list);
309
310 hash = hash_long((ulong_t)PID_KEY * (ulong_t)pid, table->ht_bits);
311 bin = &table->ht_bins[hash];
312 spin_lock(&bin->hb_lock);
313
314 hlist_add_head(&entry->he_list, &bin->hb_head);
315
316 spin_unlock(&bin->hb_lock);
317 spin_unlock(&table->ht_lock);
318
319 SRETURN(0);
320 }
321
322 /*
323 * tsd_hash_del - delete an entry from hash table, key, and pid lists
324 * @table: hash table
325 * @key: search key
326 * @pid: search pid
327 */
328 static void
329 tsd_hash_del(tsd_hash_table_t *table, tsd_hash_entry_t *entry)
330 {
331 SENTRY;
332
333 ASSERT(spin_is_locked(&table->ht_lock));
334 hlist_del(&entry->he_list);
335 list_del_init(&entry->he_key_list);
336 list_del_init(&entry->he_pid_list);
337
338 SEXIT;
339 }
340
341 /*
342 * tsd_hash_table_init - allocate a hash table
343 * @bits: hash table size
344 *
345 * A hash table with 2^bits bins will be created, it may not be resized
346 * after the fact and must be free'd with tsd_hash_table_fini().
347 */
348 static tsd_hash_table_t *
349 tsd_hash_table_init(uint_t bits)
350 {
351 tsd_hash_table_t *table;
352 int hash, size = (1 << bits);
353 SENTRY;
354
355 table = kmem_zalloc(sizeof(tsd_hash_table_t), KM_SLEEP);
356 if (table == NULL)
357 SRETURN(NULL);
358
359 table->ht_bins = kmem_zalloc(sizeof(tsd_hash_bin_t) * size,
360 KM_SLEEP | KM_NODEBUG);
361 if (table->ht_bins == NULL) {
362 kmem_free(table, sizeof(tsd_hash_table_t));
363 SRETURN(NULL);
364 }
365
366 for (hash = 0; hash < size; hash++) {
367 spin_lock_init(&table->ht_bins[hash].hb_lock);
368 INIT_HLIST_HEAD(&table->ht_bins[hash].hb_head);
369 }
370
371 spin_lock_init(&table->ht_lock);
372 table->ht_bits = bits;
373 table->ht_key = 1;
374
375 SRETURN(table);
376 }
377
378 /*
379 * tsd_hash_table_fini - free a hash table
380 * @table: hash table
381 *
382 * Free a hash table allocated by tsd_hash_table_init(). If the hash
383 * table is not empty this function will call the proper destructor for
384 * all remaining entries before freeing the memory used by those entries.
385 */
386 static void
387 tsd_hash_table_fini(tsd_hash_table_t *table)
388 {
389 HLIST_HEAD(work);
390 tsd_hash_bin_t *bin;
391 tsd_hash_entry_t *entry;
392 int size, i;
393 SENTRY;
394
395 ASSERT3P(table, !=, NULL);
396 spin_lock(&table->ht_lock);
397 for (i = 0, size = (1 << table->ht_bits); i < size; i++) {
398 bin = &table->ht_bins[i];
399 spin_lock(&bin->hb_lock);
400 while (!hlist_empty(&bin->hb_head)) {
401 entry = hlist_entry(bin->hb_head.first,
402 tsd_hash_entry_t, he_list);
403 tsd_hash_del(table, entry);
404 hlist_add_head(&entry->he_list, &work);
405 }
406 spin_unlock(&bin->hb_lock);
407 }
408 spin_unlock(&table->ht_lock);
409
410 tsd_hash_dtor(&work);
411 kmem_free(table->ht_bins, sizeof(tsd_hash_bin_t)*(1<<table->ht_bits));
412 kmem_free(table, sizeof(tsd_hash_table_t));
413
414 SEXIT;
415 }
416
417 /*
418 * tsd_set - set thread specific data
419 * @key: lookup key
420 * @value: value to set
421 *
422 * Caller must prevent racing tsd_create() or tsd_destroy(), protected
423 * from racing tsd_get() or tsd_set() because it is thread specific.
424 * This function has been optimized to be fast for the update case.
425 * When setting the tsd initially it will be slower due to additional
426 * required locking and potential memory allocations.
427 */
428 int
429 tsd_set(uint_t key, void *value)
430 {
431 tsd_hash_table_t *table;
432 tsd_hash_entry_t *entry;
433 pid_t pid;
434 int rc;
435 SENTRY;
436
437 table = tsd_hash_table;
438 pid = curthread->pid;
439 ASSERT3P(table, !=, NULL);
440
441 if ((key == 0) || (key > TSD_KEYS_MAX))
442 SRETURN(EINVAL);
443
444 /* Entry already exists in hash table update value */
445 entry = tsd_hash_search(table, key, pid);
446 if (entry) {
447 entry->he_value = value;
448 SRETURN(0);
449 }
450
451 /* Add a process entry to the hash if not yet exists */
452 entry = tsd_hash_search(table, PID_KEY, pid);
453 if (entry == NULL) {
454 rc = tsd_hash_add_pid(table, pid);
455 if (rc)
456 SRETURN(rc);
457 }
458
459 rc = tsd_hash_add(table, key, pid, value);
460 SRETURN(rc);
461 }
462 EXPORT_SYMBOL(tsd_set);
463
464 /*
465 * tsd_get - get thread specific data
466 * @key: lookup key
467 *
468 * Caller must prevent racing tsd_create() or tsd_destroy(). This
469 * implementation is designed to be fast and scalable, it does not
470 * lock the entire table only a single hash bin.
471 */
472 void *
473 tsd_get(uint_t key)
474 {
475 tsd_hash_entry_t *entry;
476 SENTRY;
477
478 ASSERT3P(tsd_hash_table, !=, NULL);
479
480 if ((key == 0) || (key > TSD_KEYS_MAX))
481 SRETURN(NULL);
482
483 entry = tsd_hash_search(tsd_hash_table, key, curthread->pid);
484 if (entry == NULL)
485 SRETURN(NULL);
486
487 SRETURN(entry->he_value);
488 }
489 EXPORT_SYMBOL(tsd_get);
490
491 /*
492 * tsd_create - create thread specific data key
493 * @keyp: lookup key address
494 * @dtor: destructor called during tsd_destroy() or tsd_exit()
495 *
496 * Provided key must be set to 0 or it assumed to be already in use.
497 * The dtor is allowed to be NULL in which case no additional cleanup
498 * for the data is performed during tsd_destroy() or tsd_exit().
499 *
500 * Caller must prevent racing tsd_set() or tsd_get(), this function is
501 * safe from racing tsd_create(), tsd_destroy(), and tsd_exit().
502 */
503 void
504 tsd_create(uint_t *keyp, dtor_func_t dtor)
505 {
506 SENTRY;
507
508 ASSERT3P(keyp, !=, NULL);
509 if (*keyp) {
510 SEXIT;
511 return;
512 }
513
514 (void)tsd_hash_add_key(tsd_hash_table, keyp, dtor);
515
516 SEXIT;
517 }
518 EXPORT_SYMBOL(tsd_create);
519
520 /*
521 * tsd_destroy - destroy thread specific data
522 * @keyp: lookup key address
523 *
524 * Destroys the thread specific data on all threads which use this key.
525 *
526 * Caller must prevent racing tsd_set() or tsd_get(), this function is
527 * safe from racing tsd_create(), tsd_destroy(), and tsd_exit().
528 */
529 void
530 tsd_destroy(uint_t *keyp)
531 {
532 HLIST_HEAD(work);
533 tsd_hash_table_t *table;
534 tsd_hash_entry_t *dtor_entry, *entry;
535 tsd_hash_bin_t *dtor_entry_bin, *entry_bin;
536 ulong_t hash;
537 SENTRY;
538
539 table = tsd_hash_table;
540 ASSERT3P(table, !=, NULL);
541
542 spin_lock(&table->ht_lock);
543 dtor_entry = tsd_hash_search(table, *keyp, DTOR_PID);
544 if (dtor_entry == NULL) {
545 spin_unlock(&table->ht_lock);
546 SEXIT;
547 return;
548 }
549
550 /*
551 * All threads which use this key must be linked off of the
552 * DTOR_PID entry. They are removed from the hash table and
553 * linked in to a private working list to be destroyed.
554 */
555 while (!list_empty(&dtor_entry->he_key_list)) {
556 entry = list_entry(dtor_entry->he_key_list.next,
557 tsd_hash_entry_t, he_key_list);
558 ASSERT3U(dtor_entry->he_key, ==, entry->he_key);
559 ASSERT3P(dtor_entry->he_dtor, ==, entry->he_dtor);
560
561 hash = hash_long((ulong_t)entry->he_key *
562 (ulong_t)entry->he_pid, table->ht_bits);
563 entry_bin = &table->ht_bins[hash];
564
565 spin_lock(&entry_bin->hb_lock);
566 tsd_hash_del(table, entry);
567 hlist_add_head(&entry->he_list, &work);
568 spin_unlock(&entry_bin->hb_lock);
569 }
570
571 hash = hash_long((ulong_t)dtor_entry->he_key *
572 (ulong_t)dtor_entry->he_pid, table->ht_bits);
573 dtor_entry_bin = &table->ht_bins[hash];
574
575 spin_lock(&dtor_entry_bin->hb_lock);
576 tsd_hash_del(table, dtor_entry);
577 hlist_add_head(&dtor_entry->he_list, &work);
578 spin_unlock(&dtor_entry_bin->hb_lock);
579 spin_unlock(&table->ht_lock);
580
581 tsd_hash_dtor(&work);
582 *keyp = 0;
583
584 SEXIT;
585 }
586 EXPORT_SYMBOL(tsd_destroy);
587
588 /*
589 * tsd_exit - destroys all thread specific data for this thread
590 *
591 * Destroys all the thread specific data for this thread.
592 *
593 * Caller must prevent racing tsd_set() or tsd_get(), this function is
594 * safe from racing tsd_create(), tsd_destroy(), and tsd_exit().
595 */
596 void
597 tsd_exit(void)
598 {
599 HLIST_HEAD(work);
600 tsd_hash_table_t *table;
601 tsd_hash_entry_t *pid_entry, *entry;
602 tsd_hash_bin_t *pid_entry_bin, *entry_bin;
603 ulong_t hash;
604 SENTRY;
605
606 table = tsd_hash_table;
607 ASSERT3P(table, !=, NULL);
608
609 spin_lock(&table->ht_lock);
610 pid_entry = tsd_hash_search(table, PID_KEY, curthread->pid);
611 if (pid_entry == NULL) {
612 spin_unlock(&table->ht_lock);
613 SEXIT;
614 return;
615 }
616
617 /*
618 * All keys associated with this pid must be linked off of the
619 * PID_KEY entry. They are removed from the hash table and
620 * linked in to a private working list to be destroyed.
621 */
622
623 while (!list_empty(&pid_entry->he_pid_list)) {
624 entry = list_entry(pid_entry->he_pid_list.next,
625 tsd_hash_entry_t, he_pid_list);
626 ASSERT3U(pid_entry->he_pid, ==, entry->he_pid);
627
628 hash = hash_long((ulong_t)entry->he_key *
629 (ulong_t)entry->he_pid, table->ht_bits);
630 entry_bin = &table->ht_bins[hash];
631
632 spin_lock(&entry_bin->hb_lock);
633 tsd_hash_del(table, entry);
634 hlist_add_head(&entry->he_list, &work);
635 spin_unlock(&entry_bin->hb_lock);
636 }
637
638 hash = hash_long((ulong_t)pid_entry->he_key *
639 (ulong_t)pid_entry->he_pid, table->ht_bits);
640 pid_entry_bin = &table->ht_bins[hash];
641
642 spin_lock(&pid_entry_bin->hb_lock);
643 tsd_hash_del(table, pid_entry);
644 hlist_add_head(&pid_entry->he_list, &work);
645 spin_unlock(&pid_entry_bin->hb_lock);
646 spin_unlock(&table->ht_lock);
647
648 tsd_hash_dtor(&work);
649
650 SEXIT;
651 }
652 EXPORT_SYMBOL(tsd_exit);
653
654 int
655 spl_tsd_init(void)
656 {
657 SENTRY;
658
659 tsd_hash_table = tsd_hash_table_init(TSD_HASH_TABLE_BITS_DEFAULT);
660 if (tsd_hash_table == NULL)
661 SRETURN(1);
662
663 SRETURN(0);
664 }
665
666 void
667 spl_tsd_fini(void)
668 {
669 SENTRY;
670 tsd_hash_table_fini(tsd_hash_table);
671 tsd_hash_table = NULL;
672 SEXIT;
673 }