1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under the BSD-style license found in the
3 // LICENSE file in the root directory of this source tree. An additional grant
4 // of patent rights can be found in the PATENTS file in the same directory.
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
10 #include "util/thread_local.h"
11 #include "util/mutexlock.h"
12 #include "port/likely.h"
18 Entry() : ptr(nullptr) {}
19 Entry(const Entry
& e
) : ptr(e
.ptr
.load(std::memory_order_relaxed
)) {}
20 std::atomic
<void*> ptr
;
25 // This is the structure that is declared as "thread_local" storage.
26 // The vector keep list of atomic pointer for all instances for "current"
27 // thread. The vector is indexed by an Id that is unique in process and
28 // associated with one ThreadLocalPtr instance. The Id is assigned by a
29 // global StaticMeta singleton. So if we instantiated 3 ThreadLocalPtr
30 // instances, each thread will have a ThreadData with a vector of size 3:
31 // ---------------------------------------------------
32 // | | instance 1 | instance 2 | instnace 3 |
33 // ---------------------------------------------------
34 // | thread 1 | void* | void* | void* | <- ThreadData
35 // ---------------------------------------------------
36 // | thread 2 | void* | void* | void* | <- ThreadData
37 // ---------------------------------------------------
38 // | thread 3 | void* | void* | void* | <- ThreadData
39 // ---------------------------------------------------
41 explicit ThreadData(ThreadLocalPtr::StaticMeta
* _inst
) : entries(), inst(_inst
) {}
42 std::vector
<Entry
> entries
;
45 ThreadLocalPtr::StaticMeta
* inst
;
48 class ThreadLocalPtr::StaticMeta
{
52 // Return the next available Id
54 // Return the next available Id without claiming it
55 uint32_t PeekId() const;
56 // Return the given Id back to the free pool. This also triggers
57 // UnrefHandler for associated pointer value (if not NULL) for all threads.
58 void ReclaimId(uint32_t id
);
60 // Return the pointer value for the given id for the current thread.
61 void* Get(uint32_t id
) const;
62 // Reset the pointer value for the given id for the current thread.
63 void Reset(uint32_t id
, void* ptr
);
64 // Atomically swap the supplied ptr and return the previous value
65 void* Swap(uint32_t id
, void* ptr
);
66 // Atomically compare and swap the provided value only if it equals
68 bool CompareAndSwap(uint32_t id
, void* ptr
, void*& expected
);
69 // Reset all thread local data to replacement, and return non-nullptr
70 // data for all existing threads
71 void Scrape(uint32_t id
, autovector
<void*>* ptrs
, void* const replacement
);
72 // Update res by applying func on each thread-local value. Holds a lock that
73 // prevents unref handler from running during this call, but clients must
74 // still provide external synchronization since the owning thread can
75 // access the values without internal locking, e.g., via Get() and Reset().
76 void Fold(uint32_t id
, FoldFunc func
, void* res
);
78 // Register the UnrefHandler for id
79 void SetHandler(uint32_t id
, UnrefHandler handler
);
81 // protect inst, next_instance_id_, free_instance_ids_, head_,
84 // Note that here we prefer function static variable instead of the usual
85 // global static variable. The reason is that c++ destruction order of
86 // static variables in the reverse order of their construction order.
87 // However, C++ does not guarantee any construction order when global
88 // static variables are defined in different files, while the function
89 // static variables are initialized when their function are first called.
90 // As a result, the construction order of the function static variables
91 // can be controlled by properly invoke their first function calls in
94 // For instance, the following function contains a function static
95 // variable. We place a dummy function call of this inside
96 // Env::Default() to ensure the construction order of the construction
98 static port::Mutex
* Mutex();
100 // Returns the member mutex of the current StaticMeta. In general,
101 // Mutex() should be used instead of this one. However, in case where
102 // the static variable inside Instance() goes out of scope, MemberMutex()
103 // should be used. One example is OnThreadExit() function.
104 port::Mutex
* MemberMutex() { return &mutex_
; }
107 // Get UnrefHandler for id with acquiring mutex
108 // REQUIRES: mutex locked
109 UnrefHandler
GetHandler(uint32_t id
);
111 // Triggered before a thread terminates
112 static void OnThreadExit(void* ptr
);
114 // Add current thread's ThreadData to the global chain
115 // REQUIRES: mutex locked
116 void AddThreadData(ThreadData
* d
);
118 // Remove current thread's ThreadData from the global chain
119 // REQUIRES: mutex locked
120 void RemoveThreadData(ThreadData
* d
);
122 static ThreadData
* GetThreadLocal();
124 uint32_t next_instance_id_
;
125 // Used to recycle Ids in case ThreadLocalPtr is instantiated and destroyed
126 // frequently. This also prevents it from blowing up the vector space.
127 autovector
<uint32_t> free_instance_ids_
;
128 // Chain all thread local structure together. This is necessary since
129 // when one ThreadLocalPtr gets destroyed, we need to loop over each
130 // thread's version of pointer corresponding to that instance and
131 // call UnrefHandler for it.
134 std::unordered_map
<uint32_t, UnrefHandler
> handler_map_
;
136 // The private mutex. Developers should always use Mutex() instead of
137 // using this variable directly.
139 #ifdef ROCKSDB_SUPPORT_THREAD_LOCAL
140 // Thread local storage
141 static __thread ThreadData
* tls_
;
144 // Used to make thread exit trigger possible if !defined(OS_MACOSX).
145 // Otherwise, used to retrieve thread data.
146 pthread_key_t pthread_key_
;
150 #ifdef ROCKSDB_SUPPORT_THREAD_LOCAL
151 __thread ThreadData
* ThreadLocalPtr::StaticMeta::tls_
= nullptr;
154 // Windows doesn't support a per-thread destructor with its
155 // TLS primitives. So, we build it manually by inserting a
156 // function to be called on each thread's exit.
157 // See http://www.codeproject.com/Articles/8113/Thread-Local-Storage-The-C-Way
158 // and http://www.nynaeve.net/?p=183
160 // really we do this to have clear conscience since using TLS with thread-pools
162 // although OK within a request. But otherwise, threads have no identity in its
165 // This runs on windows only called from the System Loader
168 // Windows cleanup routine is invoked from a System Loader with a different
169 // signature so we can not directly hookup the original OnThreadExit which is
171 // so we make StaticMeta class share with the us the address of the function so
173 namespace wintlscleanup
{
175 // This is set to OnThreadExit in StaticMeta singleton constructor
176 UnrefHandler thread_local_inclass_routine
= nullptr;
177 pthread_key_t thread_local_key
= -1;
179 // Static callback function to call with each thread termination.
180 void NTAPI
WinOnThreadExit(PVOID module
, DWORD reason
, PVOID reserved
) {
181 // We decided to punt on PROCESS_EXIT
182 if (DLL_THREAD_DETACH
== reason
) {
183 if (thread_local_key
!= pthread_key_t(-1) && thread_local_inclass_routine
!= nullptr) {
184 void* tls
= pthread_getspecific(thread_local_key
);
185 if (tls
!= nullptr) {
186 thread_local_inclass_routine(tls
);
194 // extern "C" suppresses C++ name mangling so we know the symbol name for the
195 // linker /INCLUDE:symbol pragma above.
199 // The linker must not discard thread_callback_on_exit. (We force a reference
200 // to this variable with a linker /include:symbol pragma to ensure that.) If
201 // this variable is discarded, the OnThreadExit function will never be called.
204 // .CRT section is merged with .rdata on x64 so it must be constant data.
205 #pragma const_seg(".CRT$XLB")
206 // When defining a const variable, it must have external linkage to be sure the
207 // linker doesn't discard it.
208 extern const PIMAGE_TLS_CALLBACK p_thread_callback_on_exit
;
209 const PIMAGE_TLS_CALLBACK p_thread_callback_on_exit
=
210 wintlscleanup::WinOnThreadExit
;
211 // Reset the default section.
214 #pragma comment(linker, "/include:_tls_used")
215 #pragma comment(linker, "/include:p_thread_callback_on_exit")
219 #pragma data_seg(".CRT$XLB")
220 PIMAGE_TLS_CALLBACK p_thread_callback_on_exit
= wintlscleanup::WinOnThreadExit
;
221 // Reset the default section.
224 #pragma comment(linker, "/INCLUDE:__tls_used")
225 #pragma comment(linker, "/INCLUDE:_p_thread_callback_on_exit")
230 // https://github.com/couchbase/gperftools/blob/master/src/windows/port.cc
231 BOOL WINAPI
DllMain(HINSTANCE h
, DWORD dwReason
, PVOID pv
) {
232 if (dwReason
== DLL_THREAD_DETACH
)
233 wintlscleanup::WinOnThreadExit(h
, dwReason
, pv
);
241 void ThreadLocalPtr::InitSingletons() { ThreadLocalPtr::Instance(); }
243 ThreadLocalPtr::StaticMeta
* ThreadLocalPtr::Instance() {
244 // Here we prefer function static variable instead of global
245 // static variable as function static variable is initialized
246 // when the function is first call. As a result, we can properly
247 // control their construction order by properly preparing their
248 // first function call.
250 // Note that here we decide to make "inst" a static pointer w/o deleting
251 // it at the end instead of a static variable. This is to avoid the following
252 // destruction order disaster happens when a child thread using ThreadLocalPtr
253 // dies AFTER the main thread dies: When a child thread happens to use
254 // ThreadLocalPtr, it will try to delete its thread-local data on its
255 // OnThreadExit when the child thread dies. However, OnThreadExit depends
256 // on the following variable. As a result, if the main thread dies before any
257 // child thread happen to use ThreadLocalPtr dies, then the destruction of
258 // the following variable will go first, then OnThreadExit, therefore causing
261 // The above problem can be solved by using thread_local to store tls_ instead
262 // of using __thread. The major difference between thread_local and __thread
263 // is that thread_local supports dynamic construction and destruction of
264 // non-primitive typed variables. As a result, we can guarantee the
265 // destruction order even when the main thread dies before any child threads.
266 // However, thread_local is not supported in all compilers that accept -std=c++11
267 // (e.g., eg Mac with XCode < 8. XCode 8+ supports thread_local).
268 static ThreadLocalPtr::StaticMeta
* inst
= new ThreadLocalPtr::StaticMeta();
272 port::Mutex
* ThreadLocalPtr::StaticMeta::Mutex() { return &Instance()->mutex_
; }
274 void ThreadLocalPtr::StaticMeta::OnThreadExit(void* ptr
) {
275 auto* tls
= static_cast<ThreadData
*>(ptr
);
276 assert(tls
!= nullptr);
278 // Use the cached StaticMeta::Instance() instead of directly calling
279 // the variable inside StaticMeta::Instance() might already go out of
280 // scope here in case this OnThreadExit is called after the main thread
282 auto* inst
= tls
->inst
;
283 pthread_setspecific(inst
->pthread_key_
, nullptr);
285 MutexLock
l(inst
->MemberMutex());
286 inst
->RemoveThreadData(tls
);
287 // Unref stored pointers of current thread from all instances
289 for (auto& e
: tls
->entries
) {
290 void* raw
= e
.ptr
.load();
291 if (raw
!= nullptr) {
292 auto unref
= inst
->GetHandler(id
);
293 if (unref
!= nullptr) {
299 // Delete thread local structure no matter if it is Mac platform
303 ThreadLocalPtr::StaticMeta::StaticMeta() : next_instance_id_(0), head_(this) {
304 if (pthread_key_create(&pthread_key_
, &OnThreadExit
) != 0) {
308 // OnThreadExit is not getting called on the main thread.
309 // Call through the static destructor mechanism to avoid memory leak.
311 // Caveats: ~A() will be invoked _after_ ~StaticMeta for the global
312 // singleton (destructors are invoked in reverse order of constructor
313 // _completion_); the latter must not mutate internal members. This
314 // cleanup mechanism inherently relies on use-after-release of the
315 // StaticMeta, and is brittle with respect to compiler-specific handling
316 // of memory backing destructed statically-scoped objects. Perhaps
317 // registering with atexit(3) would be more robust.
319 // This is not required on Windows.
323 #ifndef ROCKSDB_SUPPORT_THREAD_LOCAL
325 static_cast<ThreadData
*>(pthread_getspecific(Instance()->pthread_key_
));
332 #endif // !defined(OS_WIN)
338 // Share with Windows its cleanup routine and the key
339 wintlscleanup::thread_local_inclass_routine
= OnThreadExit
;
340 wintlscleanup::thread_local_key
= pthread_key_
;
344 void ThreadLocalPtr::StaticMeta::AddThreadData(ThreadData
* d
) {
345 Mutex()->AssertHeld();
347 d
->prev
= head_
.prev
;
348 head_
.prev
->next
= d
;
352 void ThreadLocalPtr::StaticMeta::RemoveThreadData(
354 Mutex()->AssertHeld();
355 d
->next
->prev
= d
->prev
;
356 d
->prev
->next
= d
->next
;
357 d
->next
= d
->prev
= d
;
360 ThreadData
* ThreadLocalPtr::StaticMeta::GetThreadLocal() {
361 #ifndef ROCKSDB_SUPPORT_THREAD_LOCAL
362 // Make this local variable name look like a member variable so that we
363 // can share all the code below
365 static_cast<ThreadData
*>(pthread_getspecific(Instance()->pthread_key_
));
368 if (UNLIKELY(tls_
== nullptr)) {
369 auto* inst
= Instance();
370 tls_
= new ThreadData(inst
);
372 // Register it in the global chain, needs to be done before thread exit
373 // handler registration
374 MutexLock
l(Mutex());
375 inst
->AddThreadData(tls_
);
377 // Even it is not OS_MACOSX, need to register value for pthread_key_ so that
378 // its exit handler will be triggered.
379 if (pthread_setspecific(inst
->pthread_key_
, tls_
) != 0) {
381 MutexLock
l(Mutex());
382 inst
->RemoveThreadData(tls_
);
391 void* ThreadLocalPtr::StaticMeta::Get(uint32_t id
) const {
392 auto* tls
= GetThreadLocal();
393 if (UNLIKELY(id
>= tls
->entries
.size())) {
396 return tls
->entries
[id
].ptr
.load(std::memory_order_acquire
);
399 void ThreadLocalPtr::StaticMeta::Reset(uint32_t id
, void* ptr
) {
400 auto* tls
= GetThreadLocal();
401 if (UNLIKELY(id
>= tls
->entries
.size())) {
402 // Need mutex to protect entries access within ReclaimId
403 MutexLock
l(Mutex());
404 tls
->entries
.resize(id
+ 1);
406 tls
->entries
[id
].ptr
.store(ptr
, std::memory_order_release
);
409 void* ThreadLocalPtr::StaticMeta::Swap(uint32_t id
, void* ptr
) {
410 auto* tls
= GetThreadLocal();
411 if (UNLIKELY(id
>= tls
->entries
.size())) {
412 // Need mutex to protect entries access within ReclaimId
413 MutexLock
l(Mutex());
414 tls
->entries
.resize(id
+ 1);
416 return tls
->entries
[id
].ptr
.exchange(ptr
, std::memory_order_acquire
);
419 bool ThreadLocalPtr::StaticMeta::CompareAndSwap(uint32_t id
, void* ptr
,
421 auto* tls
= GetThreadLocal();
422 if (UNLIKELY(id
>= tls
->entries
.size())) {
423 // Need mutex to protect entries access within ReclaimId
424 MutexLock
l(Mutex());
425 tls
->entries
.resize(id
+ 1);
427 return tls
->entries
[id
].ptr
.compare_exchange_strong(
428 expected
, ptr
, std::memory_order_release
, std::memory_order_relaxed
);
431 void ThreadLocalPtr::StaticMeta::Scrape(uint32_t id
, autovector
<void*>* ptrs
,
432 void* const replacement
) {
433 MutexLock
l(Mutex());
434 for (ThreadData
* t
= head_
.next
; t
!= &head_
; t
= t
->next
) {
435 if (id
< t
->entries
.size()) {
437 t
->entries
[id
].ptr
.exchange(replacement
, std::memory_order_acquire
);
438 if (ptr
!= nullptr) {
439 ptrs
->push_back(ptr
);
445 void ThreadLocalPtr::StaticMeta::Fold(uint32_t id
, FoldFunc func
, void* res
) {
446 MutexLock
l(Mutex());
447 for (ThreadData
* t
= head_
.next
; t
!= &head_
; t
= t
->next
) {
448 if (id
< t
->entries
.size()) {
449 void* ptr
= t
->entries
[id
].ptr
.load();
450 if (ptr
!= nullptr) {
457 uint32_t ThreadLocalPtr::TEST_PeekId() {
458 return Instance()->PeekId();
461 void ThreadLocalPtr::StaticMeta::SetHandler(uint32_t id
, UnrefHandler handler
) {
462 MutexLock
l(Mutex());
463 handler_map_
[id
] = handler
;
466 UnrefHandler
ThreadLocalPtr::StaticMeta::GetHandler(uint32_t id
) {
467 Mutex()->AssertHeld();
468 auto iter
= handler_map_
.find(id
);
469 if (iter
== handler_map_
.end()) {
475 uint32_t ThreadLocalPtr::StaticMeta::GetId() {
476 MutexLock
l(Mutex());
477 if (free_instance_ids_
.empty()) {
478 return next_instance_id_
++;
481 uint32_t id
= free_instance_ids_
.back();
482 free_instance_ids_
.pop_back();
486 uint32_t ThreadLocalPtr::StaticMeta::PeekId() const {
487 MutexLock
l(Mutex());
488 if (!free_instance_ids_
.empty()) {
489 return free_instance_ids_
.back();
491 return next_instance_id_
;
494 void ThreadLocalPtr::StaticMeta::ReclaimId(uint32_t id
) {
495 // This id is not used, go through all thread local data and release
496 // corresponding value
497 MutexLock
l(Mutex());
498 auto unref
= GetHandler(id
);
499 for (ThreadData
* t
= head_
.next
; t
!= &head_
; t
= t
->next
) {
500 if (id
< t
->entries
.size()) {
501 void* ptr
= t
->entries
[id
].ptr
.exchange(nullptr);
502 if (ptr
!= nullptr && unref
!= nullptr) {
507 handler_map_
[id
] = nullptr;
508 free_instance_ids_
.push_back(id
);
511 ThreadLocalPtr::ThreadLocalPtr(UnrefHandler handler
)
512 : id_(Instance()->GetId()) {
513 if (handler
!= nullptr) {
514 Instance()->SetHandler(id_
, handler
);
518 ThreadLocalPtr::~ThreadLocalPtr() {
519 Instance()->ReclaimId(id_
);
522 void* ThreadLocalPtr::Get() const {
523 return Instance()->Get(id_
);
526 void ThreadLocalPtr::Reset(void* ptr
) {
527 Instance()->Reset(id_
, ptr
);
530 void* ThreadLocalPtr::Swap(void* ptr
) {
531 return Instance()->Swap(id_
, ptr
);
534 bool ThreadLocalPtr::CompareAndSwap(void* ptr
, void*& expected
) {
535 return Instance()->CompareAndSwap(id_
, ptr
, expected
);
538 void ThreadLocalPtr::Scrape(autovector
<void*>* ptrs
, void* const replacement
) {
539 Instance()->Scrape(id_
, ptrs
, replacement
);
542 void ThreadLocalPtr::Fold(FoldFunc func
, void* res
) {
543 Instance()->Fold(id_
, func
, res
);
546 } // namespace rocksdb