1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
10 #include "util/thread_local.h"
11 #include "util/mutexlock.h"
12 #include "port/likely.h"
18 Entry() : ptr(nullptr) {}
19 Entry(const Entry
& e
) : ptr(e
.ptr
.load(std::memory_order_relaxed
)) {}
20 std::atomic
<void*> ptr
;
25 // This is the structure that is declared as "thread_local" storage.
26 // The vector keep list of atomic pointer for all instances for "current"
27 // thread. The vector is indexed by an Id that is unique in process and
28 // associated with one ThreadLocalPtr instance. The Id is assigned by a
29 // global StaticMeta singleton. So if we instantiated 3 ThreadLocalPtr
30 // instances, each thread will have a ThreadData with a vector of size 3:
31 // ---------------------------------------------------
32 // | | instance 1 | instance 2 | instnace 3 |
33 // ---------------------------------------------------
34 // | thread 1 | void* | void* | void* | <- ThreadData
35 // ---------------------------------------------------
36 // | thread 2 | void* | void* | void* | <- ThreadData
37 // ---------------------------------------------------
38 // | thread 3 | void* | void* | void* | <- ThreadData
39 // ---------------------------------------------------
41 explicit ThreadData(ThreadLocalPtr::StaticMeta
* _inst
)
46 std::vector
<Entry
> entries
;
49 ThreadLocalPtr::StaticMeta
* inst
;
52 class ThreadLocalPtr::StaticMeta
{
56 // Return the next available Id
58 // Return the next available Id without claiming it
59 uint32_t PeekId() const;
60 // Return the given Id back to the free pool. This also triggers
61 // UnrefHandler for associated pointer value (if not NULL) for all threads.
62 void ReclaimId(uint32_t id
);
64 // Return the pointer value for the given id for the current thread.
65 void* Get(uint32_t id
) const;
66 // Reset the pointer value for the given id for the current thread.
67 void Reset(uint32_t id
, void* ptr
);
68 // Atomically swap the supplied ptr and return the previous value
69 void* Swap(uint32_t id
, void* ptr
);
70 // Atomically compare and swap the provided value only if it equals
72 bool CompareAndSwap(uint32_t id
, void* ptr
, void*& expected
);
73 // Reset all thread local data to replacement, and return non-nullptr
74 // data for all existing threads
75 void Scrape(uint32_t id
, autovector
<void*>* ptrs
, void* const replacement
);
76 // Update res by applying func on each thread-local value. Holds a lock that
77 // prevents unref handler from running during this call, but clients must
78 // still provide external synchronization since the owning thread can
79 // access the values without internal locking, e.g., via Get() and Reset().
80 void Fold(uint32_t id
, FoldFunc func
, void* res
);
82 // Register the UnrefHandler for id
83 void SetHandler(uint32_t id
, UnrefHandler handler
);
85 // protect inst, next_instance_id_, free_instance_ids_, head_,
88 // Note that here we prefer function static variable instead of the usual
89 // global static variable. The reason is that c++ destruction order of
90 // static variables in the reverse order of their construction order.
91 // However, C++ does not guarantee any construction order when global
92 // static variables are defined in different files, while the function
93 // static variables are initialized when their function are first called.
94 // As a result, the construction order of the function static variables
95 // can be controlled by properly invoke their first function calls in
98 // For instance, the following function contains a function static
99 // variable. We place a dummy function call of this inside
100 // Env::Default() to ensure the construction order of the construction
102 static port::Mutex
* Mutex();
104 // Returns the member mutex of the current StaticMeta. In general,
105 // Mutex() should be used instead of this one. However, in case where
106 // the static variable inside Instance() goes out of scope, MemberMutex()
107 // should be used. One example is OnThreadExit() function.
108 port::Mutex
* MemberMutex() { return &mutex_
; }
111 // Get UnrefHandler for id with acquiring mutex
112 // REQUIRES: mutex locked
113 UnrefHandler
GetHandler(uint32_t id
);
115 // Triggered before a thread terminates
116 static void OnThreadExit(void* ptr
);
118 // Add current thread's ThreadData to the global chain
119 // REQUIRES: mutex locked
120 void AddThreadData(ThreadData
* d
);
122 // Remove current thread's ThreadData from the global chain
123 // REQUIRES: mutex locked
124 void RemoveThreadData(ThreadData
* d
);
126 static ThreadData
* GetThreadLocal();
128 uint32_t next_instance_id_
;
129 // Used to recycle Ids in case ThreadLocalPtr is instantiated and destroyed
130 // frequently. This also prevents it from blowing up the vector space.
131 autovector
<uint32_t> free_instance_ids_
;
132 // Chain all thread local structure together. This is necessary since
133 // when one ThreadLocalPtr gets destroyed, we need to loop over each
134 // thread's version of pointer corresponding to that instance and
135 // call UnrefHandler for it.
138 std::unordered_map
<uint32_t, UnrefHandler
> handler_map_
;
140 // The private mutex. Developers should always use Mutex() instead of
141 // using this variable directly.
143 #ifdef ROCKSDB_SUPPORT_THREAD_LOCAL
144 // Thread local storage
145 static __thread ThreadData
* tls_
;
148 // Used to make thread exit trigger possible if !defined(OS_MACOSX).
149 // Otherwise, used to retrieve thread data.
150 pthread_key_t pthread_key_
;
154 #ifdef ROCKSDB_SUPPORT_THREAD_LOCAL
155 __thread ThreadData
* ThreadLocalPtr::StaticMeta::tls_
= nullptr;
158 // Windows doesn't support a per-thread destructor with its
159 // TLS primitives. So, we build it manually by inserting a
160 // function to be called on each thread's exit.
161 // See http://www.codeproject.com/Articles/8113/Thread-Local-Storage-The-C-Way
162 // and http://www.nynaeve.net/?p=183
164 // really we do this to have clear conscience since using TLS with thread-pools
166 // although OK within a request. But otherwise, threads have no identity in its
169 // This runs on windows only called from the System Loader
172 // Windows cleanup routine is invoked from a System Loader with a different
173 // signature so we can not directly hookup the original OnThreadExit which is
175 // so we make StaticMeta class share with the us the address of the function so
177 namespace wintlscleanup
{
179 // This is set to OnThreadExit in StaticMeta singleton constructor
180 UnrefHandler thread_local_inclass_routine
= nullptr;
181 pthread_key_t thread_local_key
= pthread_key_t (-1);
183 // Static callback function to call with each thread termination.
184 void NTAPI
WinOnThreadExit(PVOID module
, DWORD reason
, PVOID reserved
) {
185 // We decided to punt on PROCESS_EXIT
186 if (DLL_THREAD_DETACH
== reason
) {
187 if (thread_local_key
!= pthread_key_t(-1) &&
188 thread_local_inclass_routine
!= nullptr) {
189 void* tls
= TlsGetValue(thread_local_key
);
190 if (tls
!= nullptr) {
191 thread_local_inclass_routine(tls
);
199 // extern "C" suppresses C++ name mangling so we know the symbol name for the
200 // linker /INCLUDE:symbol pragma above.
204 // The linker must not discard thread_callback_on_exit. (We force a reference
205 // to this variable with a linker /include:symbol pragma to ensure that.) If
206 // this variable is discarded, the OnThreadExit function will never be called.
209 // .CRT section is merged with .rdata on x64 so it must be constant data.
210 #pragma const_seg(".CRT$XLB")
211 // When defining a const variable, it must have external linkage to be sure the
212 // linker doesn't discard it.
213 extern const PIMAGE_TLS_CALLBACK p_thread_callback_on_exit
;
214 const PIMAGE_TLS_CALLBACK p_thread_callback_on_exit
=
215 wintlscleanup::WinOnThreadExit
;
216 // Reset the default section.
219 #pragma comment(linker, "/include:_tls_used")
220 #pragma comment(linker, "/include:p_thread_callback_on_exit")
224 #pragma data_seg(".CRT$XLB")
225 PIMAGE_TLS_CALLBACK p_thread_callback_on_exit
= wintlscleanup::WinOnThreadExit
;
226 // Reset the default section.
229 #pragma comment(linker, "/INCLUDE:__tls_used")
230 #pragma comment(linker, "/INCLUDE:_p_thread_callback_on_exit")
235 // https://github.com/couchbase/gperftools/blob/master/src/windows/port.cc
236 BOOL WINAPI
DllMain(HINSTANCE h
, DWORD dwReason
, PVOID pv
) {
237 if (dwReason
== DLL_THREAD_DETACH
)
238 wintlscleanup::WinOnThreadExit(h
, dwReason
, pv
);
246 void ThreadLocalPtr::InitSingletons() { ThreadLocalPtr::Instance(); }
248 ThreadLocalPtr::StaticMeta
* ThreadLocalPtr::Instance() {
249 // Here we prefer function static variable instead of global
250 // static variable as function static variable is initialized
251 // when the function is first call. As a result, we can properly
252 // control their construction order by properly preparing their
253 // first function call.
255 // Note that here we decide to make "inst" a static pointer w/o deleting
256 // it at the end instead of a static variable. This is to avoid the following
257 // destruction order disaster happens when a child thread using ThreadLocalPtr
258 // dies AFTER the main thread dies: When a child thread happens to use
259 // ThreadLocalPtr, it will try to delete its thread-local data on its
260 // OnThreadExit when the child thread dies. However, OnThreadExit depends
261 // on the following variable. As a result, if the main thread dies before any
262 // child thread happen to use ThreadLocalPtr dies, then the destruction of
263 // the following variable will go first, then OnThreadExit, therefore causing
266 // The above problem can be solved by using thread_local to store tls_ instead
267 // of using __thread. The major difference between thread_local and __thread
268 // is that thread_local supports dynamic construction and destruction of
269 // non-primitive typed variables. As a result, we can guarantee the
270 // destruction order even when the main thread dies before any child threads.
271 // However, thread_local is not supported in all compilers that accept -std=c++11
272 // (e.g., eg Mac with XCode < 8. XCode 8+ supports thread_local).
273 static ThreadLocalPtr::StaticMeta
* inst
= new ThreadLocalPtr::StaticMeta();
277 port::Mutex
* ThreadLocalPtr::StaticMeta::Mutex() { return &Instance()->mutex_
; }
279 void ThreadLocalPtr::StaticMeta::OnThreadExit(void* ptr
) {
280 auto* tls
= static_cast<ThreadData
*>(ptr
);
281 assert(tls
!= nullptr);
283 // Use the cached StaticMeta::Instance() instead of directly calling
284 // the variable inside StaticMeta::Instance() might already go out of
285 // scope here in case this OnThreadExit is called after the main thread
287 auto* inst
= tls
->inst
;
288 pthread_setspecific(inst
->pthread_key_
, nullptr);
290 MutexLock
l(inst
->MemberMutex());
291 inst
->RemoveThreadData(tls
);
292 // Unref stored pointers of current thread from all instances
294 for (auto& e
: tls
->entries
) {
295 void* raw
= e
.ptr
.load();
296 if (raw
!= nullptr) {
297 auto unref
= inst
->GetHandler(id
);
298 if (unref
!= nullptr) {
304 // Delete thread local structure no matter if it is Mac platform
308 ThreadLocalPtr::StaticMeta::StaticMeta()
309 : next_instance_id_(0),
312 if (pthread_key_create(&pthread_key_
, &OnThreadExit
) != 0) {
316 // OnThreadExit is not getting called on the main thread.
317 // Call through the static destructor mechanism to avoid memory leak.
319 // Caveats: ~A() will be invoked _after_ ~StaticMeta for the global
320 // singleton (destructors are invoked in reverse order of constructor
321 // _completion_); the latter must not mutate internal members. This
322 // cleanup mechanism inherently relies on use-after-release of the
323 // StaticMeta, and is brittle with respect to compiler-specific handling
324 // of memory backing destructed statically-scoped objects. Perhaps
325 // registering with atexit(3) would be more robust.
327 // This is not required on Windows.
331 #ifndef ROCKSDB_SUPPORT_THREAD_LOCAL
333 static_cast<ThreadData
*>(pthread_getspecific(Instance()->pthread_key_
));
340 #endif // !defined(OS_WIN)
346 // Share with Windows its cleanup routine and the key
347 wintlscleanup::thread_local_inclass_routine
= OnThreadExit
;
348 wintlscleanup::thread_local_key
= pthread_key_
;
352 void ThreadLocalPtr::StaticMeta::AddThreadData(ThreadData
* d
) {
353 Mutex()->AssertHeld();
355 d
->prev
= head_
.prev
;
356 head_
.prev
->next
= d
;
360 void ThreadLocalPtr::StaticMeta::RemoveThreadData(
362 Mutex()->AssertHeld();
363 d
->next
->prev
= d
->prev
;
364 d
->prev
->next
= d
->next
;
365 d
->next
= d
->prev
= d
;
368 ThreadData
* ThreadLocalPtr::StaticMeta::GetThreadLocal() {
369 #ifndef ROCKSDB_SUPPORT_THREAD_LOCAL
370 // Make this local variable name look like a member variable so that we
371 // can share all the code below
373 static_cast<ThreadData
*>(pthread_getspecific(Instance()->pthread_key_
));
376 if (UNLIKELY(tls_
== nullptr)) {
377 auto* inst
= Instance();
378 tls_
= new ThreadData(inst
);
380 // Register it in the global chain, needs to be done before thread exit
381 // handler registration
382 MutexLock
l(Mutex());
383 inst
->AddThreadData(tls_
);
385 // Even it is not OS_MACOSX, need to register value for pthread_key_ so that
386 // its exit handler will be triggered.
387 if (pthread_setspecific(inst
->pthread_key_
, tls_
) != 0) {
389 MutexLock
l(Mutex());
390 inst
->RemoveThreadData(tls_
);
399 void* ThreadLocalPtr::StaticMeta::Get(uint32_t id
) const {
400 auto* tls
= GetThreadLocal();
401 if (UNLIKELY(id
>= tls
->entries
.size())) {
404 return tls
->entries
[id
].ptr
.load(std::memory_order_acquire
);
407 void ThreadLocalPtr::StaticMeta::Reset(uint32_t id
, void* ptr
) {
408 auto* tls
= GetThreadLocal();
409 if (UNLIKELY(id
>= tls
->entries
.size())) {
410 // Need mutex to protect entries access within ReclaimId
411 MutexLock
l(Mutex());
412 tls
->entries
.resize(id
+ 1);
414 tls
->entries
[id
].ptr
.store(ptr
, std::memory_order_release
);
417 void* ThreadLocalPtr::StaticMeta::Swap(uint32_t id
, void* ptr
) {
418 auto* tls
= GetThreadLocal();
419 if (UNLIKELY(id
>= tls
->entries
.size())) {
420 // Need mutex to protect entries access within ReclaimId
421 MutexLock
l(Mutex());
422 tls
->entries
.resize(id
+ 1);
424 return tls
->entries
[id
].ptr
.exchange(ptr
, std::memory_order_acquire
);
427 bool ThreadLocalPtr::StaticMeta::CompareAndSwap(uint32_t id
, void* ptr
,
429 auto* tls
= GetThreadLocal();
430 if (UNLIKELY(id
>= tls
->entries
.size())) {
431 // Need mutex to protect entries access within ReclaimId
432 MutexLock
l(Mutex());
433 tls
->entries
.resize(id
+ 1);
435 return tls
->entries
[id
].ptr
.compare_exchange_strong(
436 expected
, ptr
, std::memory_order_release
, std::memory_order_relaxed
);
439 void ThreadLocalPtr::StaticMeta::Scrape(uint32_t id
, autovector
<void*>* ptrs
,
440 void* const replacement
) {
441 MutexLock
l(Mutex());
442 for (ThreadData
* t
= head_
.next
; t
!= &head_
; t
= t
->next
) {
443 if (id
< t
->entries
.size()) {
445 t
->entries
[id
].ptr
.exchange(replacement
, std::memory_order_acquire
);
446 if (ptr
!= nullptr) {
447 ptrs
->push_back(ptr
);
453 void ThreadLocalPtr::StaticMeta::Fold(uint32_t id
, FoldFunc func
, void* res
) {
454 MutexLock
l(Mutex());
455 for (ThreadData
* t
= head_
.next
; t
!= &head_
; t
= t
->next
) {
456 if (id
< t
->entries
.size()) {
457 void* ptr
= t
->entries
[id
].ptr
.load();
458 if (ptr
!= nullptr) {
465 uint32_t ThreadLocalPtr::TEST_PeekId() {
466 return Instance()->PeekId();
469 void ThreadLocalPtr::StaticMeta::SetHandler(uint32_t id
, UnrefHandler handler
) {
470 MutexLock
l(Mutex());
471 handler_map_
[id
] = handler
;
474 UnrefHandler
ThreadLocalPtr::StaticMeta::GetHandler(uint32_t id
) {
475 Mutex()->AssertHeld();
476 auto iter
= handler_map_
.find(id
);
477 if (iter
== handler_map_
.end()) {
483 uint32_t ThreadLocalPtr::StaticMeta::GetId() {
484 MutexLock
l(Mutex());
485 if (free_instance_ids_
.empty()) {
486 return next_instance_id_
++;
489 uint32_t id
= free_instance_ids_
.back();
490 free_instance_ids_
.pop_back();
494 uint32_t ThreadLocalPtr::StaticMeta::PeekId() const {
495 MutexLock
l(Mutex());
496 if (!free_instance_ids_
.empty()) {
497 return free_instance_ids_
.back();
499 return next_instance_id_
;
502 void ThreadLocalPtr::StaticMeta::ReclaimId(uint32_t id
) {
503 // This id is not used, go through all thread local data and release
504 // corresponding value
505 MutexLock
l(Mutex());
506 auto unref
= GetHandler(id
);
507 for (ThreadData
* t
= head_
.next
; t
!= &head_
; t
= t
->next
) {
508 if (id
< t
->entries
.size()) {
509 void* ptr
= t
->entries
[id
].ptr
.exchange(nullptr);
510 if (ptr
!= nullptr && unref
!= nullptr) {
515 handler_map_
[id
] = nullptr;
516 free_instance_ids_
.push_back(id
);
519 ThreadLocalPtr::ThreadLocalPtr(UnrefHandler handler
)
520 : id_(Instance()->GetId()) {
521 if (handler
!= nullptr) {
522 Instance()->SetHandler(id_
, handler
);
526 ThreadLocalPtr::~ThreadLocalPtr() {
527 Instance()->ReclaimId(id_
);
530 void* ThreadLocalPtr::Get() const {
531 return Instance()->Get(id_
);
534 void ThreadLocalPtr::Reset(void* ptr
) {
535 Instance()->Reset(id_
, ptr
);
538 void* ThreadLocalPtr::Swap(void* ptr
) {
539 return Instance()->Swap(id_
, ptr
);
542 bool ThreadLocalPtr::CompareAndSwap(void* ptr
, void*& expected
) {
543 return Instance()->CompareAndSwap(id_
, ptr
, expected
);
546 void ThreadLocalPtr::Scrape(autovector
<void*>* ptrs
, void* const replacement
) {
547 Instance()->Scrape(id_
, ptrs
, replacement
);
550 void ThreadLocalPtr::Fold(FoldFunc func
, void* res
) {
551 Instance()->Fold(id_
, func
, res
);
554 } // namespace rocksdb