]> git.proxmox.com Git - mirror_spl.git/blob - module/spl/spl-tsd.c
9a0987527b2705d9bd5a1b0f9be2a8add67ffda6
[mirror_spl.git] / module / spl / spl-tsd.c
1 /*****************************************************************************\
2 * Copyright (C) 2010 Lawrence Livermore National Security, LLC.
3 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
4 * Written by Brian Behlendorf <behlendorf1@llnl.gov>.
5 * UCRL-CODE-235197
6 *
7 * This file is part of the SPL, Solaris Porting Layer.
8 * For details, see <http://zfsonlinux.org/>.
9 *
10 * The SPL is free software; you can redistribute it and/or modify it
11 * under the terms of the GNU General Public License as published by the
12 * Free Software Foundation; either version 2 of the License, or (at your
13 * option) any later version.
14 *
15 * The SPL is distributed in the hope that it will be useful, but WITHOUT
16 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 * for more details.
19 *
20 * You should have received a copy of the GNU General Public License along
21 * with the SPL. If not, see <http://www.gnu.org/licenses/>.
22 *****************************************************************************
23 * Solaris Porting Layer (SPL) Thread Specific Data Implementation.
24 *
25 * Thread specific data has implemented using a hash table, this avoids
26 * the need to add a member to the task structure and allows maximum
27 * portability between kernels. This implementation has been optimized
28 * to keep the tsd_set() and tsd_get() times as small as possible.
29 *
30 * The majority of the entries in the hash table are for specific tsd
31 * entries. These entries are hashed by the product of their key and
32 * pid because by design the key and pid are guaranteed to be unique.
33 * Their product also has the desirable properly that it will be uniformly
34 * distributed over the hash bins providing neither the pid nor key is zero.
35 * Under linux the zero pid is always the init process and thus won't be
36 * used, and this implementation is careful to never to assign a zero key.
37 * By default the hash table is sized to 512 bins which is expected to
38 * be sufficient for light to moderate usage of thread specific data.
39 *
40 * The hash table contains two additional type of entries. They first
41 * type is entry is called a 'key' entry and it is added to the hash during
42 * tsd_create(). It is used to store the address of the destructor function
43 * and it is used as an anchor point. All tsd entries which use the same
44 * key will be linked to this entry. This is used during tsd_destory() to
45 * quickly call the destructor function for all tsd associated with the key.
46 * The 'key' entry may be looked up with tsd_hash_search() by passing the
47 * key you wish to lookup and DTOR_PID constant as the pid.
48 *
49 * The second type of entry is called a 'pid' entry and it is added to the
50 * hash the first time a process set a key. The 'pid' entry is also used
51 * as an anchor and all tsd for the process will be linked to it. This
52 * list is using during tsd_exit() to ensure all registered destructors
53 * are run for the process. The 'pid' entry may be looked up with
54 * tsd_hash_search() by passing the PID_KEY constant as the key, and
55 * the process pid. Note that tsd_exit() is called by thread_exit()
56 * so if your using the Solaris thread API you should not need to call
57 * tsd_exit() directly.
58 *
59 \*****************************************************************************/
60
61 #include <sys/kmem.h>
62 #include <sys/thread.h>
63 #include <sys/tsd.h>
64 #include <linux/hash.h>
65
66 typedef struct tsd_hash_bin {
67 spinlock_t hb_lock;
68 struct hlist_head hb_head;
69 } tsd_hash_bin_t;
70
71 typedef struct tsd_hash_table {
72 spinlock_t ht_lock;
73 uint_t ht_bits;
74 uint_t ht_key;
75 tsd_hash_bin_t *ht_bins;
76 } tsd_hash_table_t;
77
78 typedef struct tsd_hash_entry {
79 uint_t he_key;
80 pid_t he_pid;
81 dtor_func_t he_dtor;
82 void *he_value;
83 struct hlist_node he_list;
84 struct list_head he_key_list;
85 struct list_head he_pid_list;
86 } tsd_hash_entry_t;
87
88 static tsd_hash_table_t *tsd_hash_table = NULL;
89
90
91 /*
92 * tsd_hash_search - searches hash table for tsd_hash_entry
93 * @table: hash table
94 * @key: search key
95 * @pid: search pid
96 */
97 static tsd_hash_entry_t *
98 tsd_hash_search(tsd_hash_table_t *table, uint_t key, pid_t pid)
99 {
100 struct hlist_node *node;
101 tsd_hash_entry_t *entry;
102 tsd_hash_bin_t *bin;
103 ulong_t hash;
104
105 hash = hash_long((ulong_t)key * (ulong_t)pid, table->ht_bits);
106 bin = &table->ht_bins[hash];
107 spin_lock(&bin->hb_lock);
108 hlist_for_each(node, &bin->hb_head) {
109 entry = list_entry(node, tsd_hash_entry_t, he_list);
110 if ((entry->he_key == key) && (entry->he_pid == pid)) {
111 spin_unlock(&bin->hb_lock);
112 return (entry);
113 }
114 }
115
116 spin_unlock(&bin->hb_lock);
117 return (NULL);
118 }
119
120 /*
121 * tsd_hash_dtor - call the destructor and free all entries on the list
122 * @work: list of hash entries
123 *
124 * For a list of entries which have all already been removed from the
125 * hash call their registered destructor then free the associated memory.
126 */
127 static void
128 tsd_hash_dtor(struct hlist_head *work)
129 {
130 tsd_hash_entry_t *entry;
131
132 while (!hlist_empty(work)) {
133 entry = hlist_entry(work->first, tsd_hash_entry_t, he_list);
134 hlist_del(&entry->he_list);
135
136 if (entry->he_dtor && entry->he_pid != DTOR_PID)
137 entry->he_dtor(entry->he_value);
138
139 kmem_free(entry, sizeof(tsd_hash_entry_t));
140 }
141 }
142
143 /*
144 * tsd_hash_add - adds an entry to hash table
145 * @table: hash table
146 * @key: search key
147 * @pid: search pid
148 *
149 * The caller is responsible for ensuring the unique key/pid do not
150 * already exist in the hash table. This possible because all entries
151 * are thread specific thus a concurrent thread will never attempt to
152 * add this key/pid. Because multiple bins must be checked to add
153 * links to the dtor and pid entries the entire table is locked.
154 */
155 static int
156 tsd_hash_add(tsd_hash_table_t *table, uint_t key, pid_t pid, void *value)
157 {
158 tsd_hash_entry_t *entry, *dtor_entry, *pid_entry;
159 tsd_hash_bin_t *bin;
160 ulong_t hash;
161 int rc = 0;
162
163 ASSERT3P(tsd_hash_search(table, key, pid), ==, NULL);
164
165 /* New entry allocate structure, set value, and add to hash */
166 entry = kmem_alloc(sizeof(tsd_hash_entry_t), KM_PUSHPAGE);
167 if (entry == NULL)
168 return (ENOMEM);
169
170 entry->he_key = key;
171 entry->he_pid = pid;
172 entry->he_value = value;
173 INIT_HLIST_NODE(&entry->he_list);
174 INIT_LIST_HEAD(&entry->he_key_list);
175 INIT_LIST_HEAD(&entry->he_pid_list);
176
177 spin_lock(&table->ht_lock);
178
179 /* Destructor entry must exist for all valid keys */
180 dtor_entry = tsd_hash_search(table, entry->he_key, DTOR_PID);
181 ASSERT3P(dtor_entry, !=, NULL);
182 entry->he_dtor = dtor_entry->he_dtor;
183
184 /* Process entry must exist for all valid processes */
185 pid_entry = tsd_hash_search(table, PID_KEY, entry->he_pid);
186 ASSERT3P(pid_entry, !=, NULL);
187
188 hash = hash_long((ulong_t)key * (ulong_t)pid, table->ht_bits);
189 bin = &table->ht_bins[hash];
190 spin_lock(&bin->hb_lock);
191
192 /* Add to the hash, key, and pid lists */
193 hlist_add_head(&entry->he_list, &bin->hb_head);
194 list_add(&entry->he_key_list, &dtor_entry->he_key_list);
195 list_add(&entry->he_pid_list, &pid_entry->he_pid_list);
196
197 spin_unlock(&bin->hb_lock);
198 spin_unlock(&table->ht_lock);
199
200 return (rc);
201 }
202
203 /*
204 * tsd_hash_add_key - adds a destructor entry to the hash table
205 * @table: hash table
206 * @keyp: search key
207 * @dtor: key destructor
208 *
209 * For every unique key there is a single entry in the hash which is used
210 * as anchor. All other thread specific entries for this key are linked
211 * to this anchor via the 'he_key_list' list head. On return they keyp
212 * will be set to the next available key for the hash table.
213 */
214 static int
215 tsd_hash_add_key(tsd_hash_table_t *table, uint_t *keyp, dtor_func_t dtor)
216 {
217 tsd_hash_entry_t *tmp_entry, *entry;
218 tsd_hash_bin_t *bin;
219 ulong_t hash;
220 int keys_checked = 0;
221
222 ASSERT3P(table, !=, NULL);
223
224 /* Allocate entry to be used as a destructor for this key */
225 entry = kmem_alloc(sizeof(tsd_hash_entry_t), KM_PUSHPAGE);
226 if (entry == NULL)
227 return (ENOMEM);
228
229 /* Determine next available key value */
230 spin_lock(&table->ht_lock);
231 do {
232 /* Limited to TSD_KEYS_MAX concurrent unique keys */
233 if (table->ht_key++ > TSD_KEYS_MAX)
234 table->ht_key = 1;
235
236 /* Ensure failure when all TSD_KEYS_MAX keys are in use */
237 if (keys_checked++ >= TSD_KEYS_MAX) {
238 spin_unlock(&table->ht_lock);
239 return (ENOENT);
240 }
241
242 tmp_entry = tsd_hash_search(table, table->ht_key, DTOR_PID);
243 } while (tmp_entry);
244
245 /* Add destructor entry in to hash table */
246 entry->he_key = *keyp = table->ht_key;
247 entry->he_pid = DTOR_PID;
248 entry->he_dtor = dtor;
249 entry->he_value = NULL;
250 INIT_HLIST_NODE(&entry->he_list);
251 INIT_LIST_HEAD(&entry->he_key_list);
252 INIT_LIST_HEAD(&entry->he_pid_list);
253
254 hash = hash_long((ulong_t)*keyp * (ulong_t)DTOR_PID, table->ht_bits);
255 bin = &table->ht_bins[hash];
256 spin_lock(&bin->hb_lock);
257
258 hlist_add_head(&entry->he_list, &bin->hb_head);
259
260 spin_unlock(&bin->hb_lock);
261 spin_unlock(&table->ht_lock);
262
263 return (0);
264 }
265
266 /*
267 * tsd_hash_add_pid - adds a process entry to the hash table
268 * @table: hash table
269 * @pid: search pid
270 *
271 * For every process these is a single entry in the hash which is used
272 * as anchor. All other thread specific entries for this process are
273 * linked to this anchor via the 'he_pid_list' list head.
274 */
275 static int
276 tsd_hash_add_pid(tsd_hash_table_t *table, pid_t pid)
277 {
278 tsd_hash_entry_t *entry;
279 tsd_hash_bin_t *bin;
280 ulong_t hash;
281
282 /* Allocate entry to be used as the process reference */
283 entry = kmem_alloc(sizeof(tsd_hash_entry_t), KM_PUSHPAGE);
284 if (entry == NULL)
285 return (ENOMEM);
286
287 spin_lock(&table->ht_lock);
288 entry->he_key = PID_KEY;
289 entry->he_pid = pid;
290 entry->he_dtor = NULL;
291 entry->he_value = NULL;
292 INIT_HLIST_NODE(&entry->he_list);
293 INIT_LIST_HEAD(&entry->he_key_list);
294 INIT_LIST_HEAD(&entry->he_pid_list);
295
296 hash = hash_long((ulong_t)PID_KEY * (ulong_t)pid, table->ht_bits);
297 bin = &table->ht_bins[hash];
298 spin_lock(&bin->hb_lock);
299
300 hlist_add_head(&entry->he_list, &bin->hb_head);
301
302 spin_unlock(&bin->hb_lock);
303 spin_unlock(&table->ht_lock);
304
305 return (0);
306 }
307
308 /*
309 * tsd_hash_del - delete an entry from hash table, key, and pid lists
310 * @table: hash table
311 * @key: search key
312 * @pid: search pid
313 */
314 static void
315 tsd_hash_del(tsd_hash_table_t *table, tsd_hash_entry_t *entry)
316 {
317 ASSERT(spin_is_locked(&table->ht_lock));
318 hlist_del(&entry->he_list);
319 list_del_init(&entry->he_key_list);
320 list_del_init(&entry->he_pid_list);
321 }
322
323 /*
324 * tsd_hash_table_init - allocate a hash table
325 * @bits: hash table size
326 *
327 * A hash table with 2^bits bins will be created, it may not be resized
328 * after the fact and must be free'd with tsd_hash_table_fini().
329 */
330 static tsd_hash_table_t *
331 tsd_hash_table_init(uint_t bits)
332 {
333 tsd_hash_table_t *table;
334 int hash, size = (1 << bits);
335
336 table = kmem_zalloc(sizeof(tsd_hash_table_t), KM_SLEEP);
337 if (table == NULL)
338 return (NULL);
339
340 table->ht_bins = kmem_zalloc(sizeof(tsd_hash_bin_t) * size, KM_SLEEP);
341 if (table->ht_bins == NULL) {
342 kmem_free(table, sizeof(tsd_hash_table_t));
343 return (NULL);
344 }
345
346 for (hash = 0; hash < size; hash++) {
347 spin_lock_init(&table->ht_bins[hash].hb_lock);
348 INIT_HLIST_HEAD(&table->ht_bins[hash].hb_head);
349 }
350
351 spin_lock_init(&table->ht_lock);
352 table->ht_bits = bits;
353 table->ht_key = 1;
354
355 return (table);
356 }
357
358 /*
359 * tsd_hash_table_fini - free a hash table
360 * @table: hash table
361 *
362 * Free a hash table allocated by tsd_hash_table_init(). If the hash
363 * table is not empty this function will call the proper destructor for
364 * all remaining entries before freeing the memory used by those entries.
365 */
366 static void
367 tsd_hash_table_fini(tsd_hash_table_t *table)
368 {
369 HLIST_HEAD(work);
370 tsd_hash_bin_t *bin;
371 tsd_hash_entry_t *entry;
372 int size, i;
373
374 ASSERT3P(table, !=, NULL);
375 spin_lock(&table->ht_lock);
376 for (i = 0, size = (1 << table->ht_bits); i < size; i++) {
377 bin = &table->ht_bins[i];
378 spin_lock(&bin->hb_lock);
379 while (!hlist_empty(&bin->hb_head)) {
380 entry = hlist_entry(bin->hb_head.first,
381 tsd_hash_entry_t, he_list);
382 tsd_hash_del(table, entry);
383 hlist_add_head(&entry->he_list, &work);
384 }
385 spin_unlock(&bin->hb_lock);
386 }
387 spin_unlock(&table->ht_lock);
388
389 tsd_hash_dtor(&work);
390 kmem_free(table->ht_bins, sizeof(tsd_hash_bin_t)*(1<<table->ht_bits));
391 kmem_free(table, sizeof(tsd_hash_table_t));
392 }
393
394 /*
395 * tsd_set - set thread specific data
396 * @key: lookup key
397 * @value: value to set
398 *
399 * Caller must prevent racing tsd_create() or tsd_destroy(), protected
400 * from racing tsd_get() or tsd_set() because it is thread specific.
401 * This function has been optimized to be fast for the update case.
402 * When setting the tsd initially it will be slower due to additional
403 * required locking and potential memory allocations.
404 */
405 int
406 tsd_set(uint_t key, void *value)
407 {
408 tsd_hash_table_t *table;
409 tsd_hash_entry_t *entry;
410 pid_t pid;
411 int rc;
412
413 table = tsd_hash_table;
414 pid = curthread->pid;
415 ASSERT3P(table, !=, NULL);
416
417 if ((key == 0) || (key > TSD_KEYS_MAX))
418 return (EINVAL);
419
420 /* Entry already exists in hash table update value */
421 entry = tsd_hash_search(table, key, pid);
422 if (entry) {
423 entry->he_value = value;
424 return (0);
425 }
426
427 /* Add a process entry to the hash if not yet exists */
428 entry = tsd_hash_search(table, PID_KEY, pid);
429 if (entry == NULL) {
430 rc = tsd_hash_add_pid(table, pid);
431 if (rc)
432 return (rc);
433 }
434
435 rc = tsd_hash_add(table, key, pid, value);
436 return (rc);
437 }
438 EXPORT_SYMBOL(tsd_set);
439
440 /*
441 * tsd_get - get thread specific data
442 * @key: lookup key
443 *
444 * Caller must prevent racing tsd_create() or tsd_destroy(). This
445 * implementation is designed to be fast and scalable, it does not
446 * lock the entire table only a single hash bin.
447 */
448 void *
449 tsd_get(uint_t key)
450 {
451 tsd_hash_entry_t *entry;
452
453 ASSERT3P(tsd_hash_table, !=, NULL);
454
455 if ((key == 0) || (key > TSD_KEYS_MAX))
456 return (NULL);
457
458 entry = tsd_hash_search(tsd_hash_table, key, curthread->pid);
459 if (entry == NULL)
460 return (NULL);
461
462 return (entry->he_value);
463 }
464 EXPORT_SYMBOL(tsd_get);
465
466 /*
467 * tsd_create - create thread specific data key
468 * @keyp: lookup key address
469 * @dtor: destructor called during tsd_destroy() or tsd_exit()
470 *
471 * Provided key must be set to 0 or it assumed to be already in use.
472 * The dtor is allowed to be NULL in which case no additional cleanup
473 * for the data is performed during tsd_destroy() or tsd_exit().
474 *
475 * Caller must prevent racing tsd_set() or tsd_get(), this function is
476 * safe from racing tsd_create(), tsd_destroy(), and tsd_exit().
477 */
478 void
479 tsd_create(uint_t *keyp, dtor_func_t dtor)
480 {
481 ASSERT3P(keyp, !=, NULL);
482 if (*keyp)
483 return;
484
485 (void)tsd_hash_add_key(tsd_hash_table, keyp, dtor);
486 }
487 EXPORT_SYMBOL(tsd_create);
488
489 /*
490 * tsd_destroy - destroy thread specific data
491 * @keyp: lookup key address
492 *
493 * Destroys the thread specific data on all threads which use this key.
494 *
495 * Caller must prevent racing tsd_set() or tsd_get(), this function is
496 * safe from racing tsd_create(), tsd_destroy(), and tsd_exit().
497 */
498 void
499 tsd_destroy(uint_t *keyp)
500 {
501 HLIST_HEAD(work);
502 tsd_hash_table_t *table;
503 tsd_hash_entry_t *dtor_entry, *entry;
504 tsd_hash_bin_t *dtor_entry_bin, *entry_bin;
505 ulong_t hash;
506
507 table = tsd_hash_table;
508 ASSERT3P(table, !=, NULL);
509
510 spin_lock(&table->ht_lock);
511 dtor_entry = tsd_hash_search(table, *keyp, DTOR_PID);
512 if (dtor_entry == NULL) {
513 spin_unlock(&table->ht_lock);
514 return;
515 }
516
517 /*
518 * All threads which use this key must be linked off of the
519 * DTOR_PID entry. They are removed from the hash table and
520 * linked in to a private working list to be destroyed.
521 */
522 while (!list_empty(&dtor_entry->he_key_list)) {
523 entry = list_entry(dtor_entry->he_key_list.next,
524 tsd_hash_entry_t, he_key_list);
525 ASSERT3U(dtor_entry->he_key, ==, entry->he_key);
526 ASSERT3P(dtor_entry->he_dtor, ==, entry->he_dtor);
527
528 hash = hash_long((ulong_t)entry->he_key *
529 (ulong_t)entry->he_pid, table->ht_bits);
530 entry_bin = &table->ht_bins[hash];
531
532 spin_lock(&entry_bin->hb_lock);
533 tsd_hash_del(table, entry);
534 hlist_add_head(&entry->he_list, &work);
535 spin_unlock(&entry_bin->hb_lock);
536 }
537
538 hash = hash_long((ulong_t)dtor_entry->he_key *
539 (ulong_t)dtor_entry->he_pid, table->ht_bits);
540 dtor_entry_bin = &table->ht_bins[hash];
541
542 spin_lock(&dtor_entry_bin->hb_lock);
543 tsd_hash_del(table, dtor_entry);
544 hlist_add_head(&dtor_entry->he_list, &work);
545 spin_unlock(&dtor_entry_bin->hb_lock);
546 spin_unlock(&table->ht_lock);
547
548 tsd_hash_dtor(&work);
549 *keyp = 0;
550 }
551 EXPORT_SYMBOL(tsd_destroy);
552
553 /*
554 * tsd_exit - destroys all thread specific data for this thread
555 *
556 * Destroys all the thread specific data for this thread.
557 *
558 * Caller must prevent racing tsd_set() or tsd_get(), this function is
559 * safe from racing tsd_create(), tsd_destroy(), and tsd_exit().
560 */
561 void
562 tsd_exit(void)
563 {
564 HLIST_HEAD(work);
565 tsd_hash_table_t *table;
566 tsd_hash_entry_t *pid_entry, *entry;
567 tsd_hash_bin_t *pid_entry_bin, *entry_bin;
568 ulong_t hash;
569
570 table = tsd_hash_table;
571 ASSERT3P(table, !=, NULL);
572
573 spin_lock(&table->ht_lock);
574 pid_entry = tsd_hash_search(table, PID_KEY, curthread->pid);
575 if (pid_entry == NULL) {
576 spin_unlock(&table->ht_lock);
577 return;
578 }
579
580 /*
581 * All keys associated with this pid must be linked off of the
582 * PID_KEY entry. They are removed from the hash table and
583 * linked in to a private working list to be destroyed.
584 */
585
586 while (!list_empty(&pid_entry->he_pid_list)) {
587 entry = list_entry(pid_entry->he_pid_list.next,
588 tsd_hash_entry_t, he_pid_list);
589 ASSERT3U(pid_entry->he_pid, ==, entry->he_pid);
590
591 hash = hash_long((ulong_t)entry->he_key *
592 (ulong_t)entry->he_pid, table->ht_bits);
593 entry_bin = &table->ht_bins[hash];
594
595 spin_lock(&entry_bin->hb_lock);
596 tsd_hash_del(table, entry);
597 hlist_add_head(&entry->he_list, &work);
598 spin_unlock(&entry_bin->hb_lock);
599 }
600
601 hash = hash_long((ulong_t)pid_entry->he_key *
602 (ulong_t)pid_entry->he_pid, table->ht_bits);
603 pid_entry_bin = &table->ht_bins[hash];
604
605 spin_lock(&pid_entry_bin->hb_lock);
606 tsd_hash_del(table, pid_entry);
607 hlist_add_head(&pid_entry->he_list, &work);
608 spin_unlock(&pid_entry_bin->hb_lock);
609 spin_unlock(&table->ht_lock);
610
611 tsd_hash_dtor(&work);
612 }
613 EXPORT_SYMBOL(tsd_exit);
614
615 int
616 spl_tsd_init(void)
617 {
618 tsd_hash_table = tsd_hash_table_init(TSD_HASH_TABLE_BITS_DEFAULT);
619 if (tsd_hash_table == NULL)
620 return (1);
621
622 return (0);
623 }
624
625 void
626 spl_tsd_fini(void)
627 {
628 tsd_hash_table_fini(tsd_hash_table);
629 tsd_hash_table = NULL;
630 }