]> git.proxmox.com Git - ceph.git/blame - ceph/src/rocksdb/util/thread_local.cc
import 14.2.4 nautilus point release
[ceph.git] / ceph / src / rocksdb / util / thread_local.cc
CommitLineData
7c673cae 1// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
11fdf7f2
TL
2// This source code is licensed under both the GPLv2 (found in the
3// COPYING file in the root directory) and Apache 2.0 License
4// (found in the LICENSE.Apache file in the root directory).
7c673cae
FG
5//
6// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7// Use of this source code is governed by a BSD-style license that can be
8// found in the LICENSE file. See the AUTHORS file for names of contributors.
9
10#include "util/thread_local.h"
11#include "util/mutexlock.h"
12#include "port/likely.h"
13#include <stdlib.h>
14
15namespace rocksdb {
16
17struct Entry {
18 Entry() : ptr(nullptr) {}
19 Entry(const Entry& e) : ptr(e.ptr.load(std::memory_order_relaxed)) {}
20 std::atomic<void*> ptr;
21};
22
23class StaticMeta;
24
25// This is the structure that is declared as "thread_local" storage.
26// The vector keep list of atomic pointer for all instances for "current"
27// thread. The vector is indexed by an Id that is unique in process and
28// associated with one ThreadLocalPtr instance. The Id is assigned by a
29// global StaticMeta singleton. So if we instantiated 3 ThreadLocalPtr
30// instances, each thread will have a ThreadData with a vector of size 3:
31// ---------------------------------------------------
32// | | instance 1 | instance 2 | instnace 3 |
33// ---------------------------------------------------
34// | thread 1 | void* | void* | void* | <- ThreadData
35// ---------------------------------------------------
36// | thread 2 | void* | void* | void* | <- ThreadData
37// ---------------------------------------------------
38// | thread 3 | void* | void* | void* | <- ThreadData
39// ---------------------------------------------------
40struct ThreadData {
11fdf7f2
TL
41 explicit ThreadData(ThreadLocalPtr::StaticMeta* _inst)
42 : entries(),
43 next(nullptr),
44 prev(nullptr),
45 inst(_inst) {}
7c673cae
FG
46 std::vector<Entry> entries;
47 ThreadData* next;
48 ThreadData* prev;
49 ThreadLocalPtr::StaticMeta* inst;
50};
51
52class ThreadLocalPtr::StaticMeta {
53public:
54 StaticMeta();
55
56 // Return the next available Id
57 uint32_t GetId();
58 // Return the next available Id without claiming it
59 uint32_t PeekId() const;
60 // Return the given Id back to the free pool. This also triggers
61 // UnrefHandler for associated pointer value (if not NULL) for all threads.
62 void ReclaimId(uint32_t id);
63
64 // Return the pointer value for the given id for the current thread.
65 void* Get(uint32_t id) const;
66 // Reset the pointer value for the given id for the current thread.
67 void Reset(uint32_t id, void* ptr);
68 // Atomically swap the supplied ptr and return the previous value
69 void* Swap(uint32_t id, void* ptr);
70 // Atomically compare and swap the provided value only if it equals
71 // to expected value.
72 bool CompareAndSwap(uint32_t id, void* ptr, void*& expected);
73 // Reset all thread local data to replacement, and return non-nullptr
74 // data for all existing threads
75 void Scrape(uint32_t id, autovector<void*>* ptrs, void* const replacement);
76 // Update res by applying func on each thread-local value. Holds a lock that
77 // prevents unref handler from running during this call, but clients must
78 // still provide external synchronization since the owning thread can
79 // access the values without internal locking, e.g., via Get() and Reset().
80 void Fold(uint32_t id, FoldFunc func, void* res);
81
82 // Register the UnrefHandler for id
83 void SetHandler(uint32_t id, UnrefHandler handler);
84
85 // protect inst, next_instance_id_, free_instance_ids_, head_,
86 // ThreadData.entries
87 //
88 // Note that here we prefer function static variable instead of the usual
89 // global static variable. The reason is that c++ destruction order of
90 // static variables in the reverse order of their construction order.
91 // However, C++ does not guarantee any construction order when global
92 // static variables are defined in different files, while the function
93 // static variables are initialized when their function are first called.
94 // As a result, the construction order of the function static variables
95 // can be controlled by properly invoke their first function calls in
96 // the right order.
97 //
98 // For instance, the following function contains a function static
99 // variable. We place a dummy function call of this inside
100 // Env::Default() to ensure the construction order of the construction
101 // order.
102 static port::Mutex* Mutex();
103
104 // Returns the member mutex of the current StaticMeta. In general,
105 // Mutex() should be used instead of this one. However, in case where
106 // the static variable inside Instance() goes out of scope, MemberMutex()
107 // should be used. One example is OnThreadExit() function.
108 port::Mutex* MemberMutex() { return &mutex_; }
109
110private:
111 // Get UnrefHandler for id with acquiring mutex
112 // REQUIRES: mutex locked
113 UnrefHandler GetHandler(uint32_t id);
114
115 // Triggered before a thread terminates
116 static void OnThreadExit(void* ptr);
117
118 // Add current thread's ThreadData to the global chain
119 // REQUIRES: mutex locked
120 void AddThreadData(ThreadData* d);
121
122 // Remove current thread's ThreadData from the global chain
123 // REQUIRES: mutex locked
124 void RemoveThreadData(ThreadData* d);
125
126 static ThreadData* GetThreadLocal();
127
128 uint32_t next_instance_id_;
129 // Used to recycle Ids in case ThreadLocalPtr is instantiated and destroyed
130 // frequently. This also prevents it from blowing up the vector space.
131 autovector<uint32_t> free_instance_ids_;
132 // Chain all thread local structure together. This is necessary since
133 // when one ThreadLocalPtr gets destroyed, we need to loop over each
134 // thread's version of pointer corresponding to that instance and
135 // call UnrefHandler for it.
136 ThreadData head_;
137
138 std::unordered_map<uint32_t, UnrefHandler> handler_map_;
139
140 // The private mutex. Developers should always use Mutex() instead of
141 // using this variable directly.
142 port::Mutex mutex_;
143#ifdef ROCKSDB_SUPPORT_THREAD_LOCAL
144 // Thread local storage
145 static __thread ThreadData* tls_;
146#endif
147
148 // Used to make thread exit trigger possible if !defined(OS_MACOSX).
149 // Otherwise, used to retrieve thread data.
150 pthread_key_t pthread_key_;
151};
152
153
154#ifdef ROCKSDB_SUPPORT_THREAD_LOCAL
155__thread ThreadData* ThreadLocalPtr::StaticMeta::tls_ = nullptr;
156#endif
157
158// Windows doesn't support a per-thread destructor with its
159// TLS primitives. So, we build it manually by inserting a
160// function to be called on each thread's exit.
161// See http://www.codeproject.com/Articles/8113/Thread-Local-Storage-The-C-Way
162// and http://www.nynaeve.net/?p=183
163//
164// really we do this to have clear conscience since using TLS with thread-pools
165// is iffy
166// although OK within a request. But otherwise, threads have no identity in its
167// modern use.
168
169// This runs on windows only called from the System Loader
170#ifdef OS_WIN
171
172// Windows cleanup routine is invoked from a System Loader with a different
173// signature so we can not directly hookup the original OnThreadExit which is
174// private member
175// so we make StaticMeta class share with the us the address of the function so
176// we can invoke it.
177namespace wintlscleanup {
178
179// This is set to OnThreadExit in StaticMeta singleton constructor
180UnrefHandler thread_local_inclass_routine = nullptr;
11fdf7f2 181pthread_key_t thread_local_key = pthread_key_t (-1);
7c673cae
FG
182
183// Static callback function to call with each thread termination.
184void NTAPI WinOnThreadExit(PVOID module, DWORD reason, PVOID reserved) {
185 // We decided to punt on PROCESS_EXIT
186 if (DLL_THREAD_DETACH == reason) {
11fdf7f2
TL
187 if (thread_local_key != pthread_key_t(-1) &&
188 thread_local_inclass_routine != nullptr) {
189 void* tls = TlsGetValue(thread_local_key);
7c673cae
FG
190 if (tls != nullptr) {
191 thread_local_inclass_routine(tls);
192 }
193 }
194 }
195}
196
197} // wintlscleanup
198
199// extern "C" suppresses C++ name mangling so we know the symbol name for the
200// linker /INCLUDE:symbol pragma above.
201extern "C" {
202
203#ifdef _MSC_VER
204// The linker must not discard thread_callback_on_exit. (We force a reference
205// to this variable with a linker /include:symbol pragma to ensure that.) If
206// this variable is discarded, the OnThreadExit function will never be called.
494da23a 207#ifndef _X86_
7c673cae
FG
208
209// .CRT section is merged with .rdata on x64 so it must be constant data.
210#pragma const_seg(".CRT$XLB")
211// When defining a const variable, it must have external linkage to be sure the
212// linker doesn't discard it.
213extern const PIMAGE_TLS_CALLBACK p_thread_callback_on_exit;
214const PIMAGE_TLS_CALLBACK p_thread_callback_on_exit =
215 wintlscleanup::WinOnThreadExit;
216// Reset the default section.
217#pragma const_seg()
218
219#pragma comment(linker, "/include:_tls_used")
220#pragma comment(linker, "/include:p_thread_callback_on_exit")
221
494da23a 222#else // _X86_
7c673cae
FG
223
224#pragma data_seg(".CRT$XLB")
225PIMAGE_TLS_CALLBACK p_thread_callback_on_exit = wintlscleanup::WinOnThreadExit;
226// Reset the default section.
227#pragma data_seg()
228
229#pragma comment(linker, "/INCLUDE:__tls_used")
230#pragma comment(linker, "/INCLUDE:_p_thread_callback_on_exit")
231
494da23a 232#endif // _X86_
7c673cae
FG
233
234#else
235// https://github.com/couchbase/gperftools/blob/master/src/windows/port.cc
236BOOL WINAPI DllMain(HINSTANCE h, DWORD dwReason, PVOID pv) {
237 if (dwReason == DLL_THREAD_DETACH)
238 wintlscleanup::WinOnThreadExit(h, dwReason, pv);
239 return TRUE;
240}
241#endif
242} // extern "C"
243
244#endif // OS_WIN
245
246void ThreadLocalPtr::InitSingletons() { ThreadLocalPtr::Instance(); }
247
248ThreadLocalPtr::StaticMeta* ThreadLocalPtr::Instance() {
249 // Here we prefer function static variable instead of global
250 // static variable as function static variable is initialized
251 // when the function is first call. As a result, we can properly
252 // control their construction order by properly preparing their
253 // first function call.
254 //
255 // Note that here we decide to make "inst" a static pointer w/o deleting
256 // it at the end instead of a static variable. This is to avoid the following
257 // destruction order disaster happens when a child thread using ThreadLocalPtr
258 // dies AFTER the main thread dies: When a child thread happens to use
259 // ThreadLocalPtr, it will try to delete its thread-local data on its
260 // OnThreadExit when the child thread dies. However, OnThreadExit depends
261 // on the following variable. As a result, if the main thread dies before any
262 // child thread happen to use ThreadLocalPtr dies, then the destruction of
263 // the following variable will go first, then OnThreadExit, therefore causing
264 // invalid access.
265 //
266 // The above problem can be solved by using thread_local to store tls_ instead
267 // of using __thread. The major difference between thread_local and __thread
268 // is that thread_local supports dynamic construction and destruction of
269 // non-primitive typed variables. As a result, we can guarantee the
270 // destruction order even when the main thread dies before any child threads.
271 // However, thread_local is not supported in all compilers that accept -std=c++11
272 // (e.g., eg Mac with XCode < 8. XCode 8+ supports thread_local).
273 static ThreadLocalPtr::StaticMeta* inst = new ThreadLocalPtr::StaticMeta();
274 return inst;
275}
276
277port::Mutex* ThreadLocalPtr::StaticMeta::Mutex() { return &Instance()->mutex_; }
278
279void ThreadLocalPtr::StaticMeta::OnThreadExit(void* ptr) {
280 auto* tls = static_cast<ThreadData*>(ptr);
281 assert(tls != nullptr);
282
283 // Use the cached StaticMeta::Instance() instead of directly calling
284 // the variable inside StaticMeta::Instance() might already go out of
285 // scope here in case this OnThreadExit is called after the main thread
286 // dies.
287 auto* inst = tls->inst;
288 pthread_setspecific(inst->pthread_key_, nullptr);
289
290 MutexLock l(inst->MemberMutex());
291 inst->RemoveThreadData(tls);
292 // Unref stored pointers of current thread from all instances
293 uint32_t id = 0;
294 for (auto& e : tls->entries) {
295 void* raw = e.ptr.load();
296 if (raw != nullptr) {
297 auto unref = inst->GetHandler(id);
298 if (unref != nullptr) {
299 unref(raw);
300 }
301 }
302 ++id;
303 }
304 // Delete thread local structure no matter if it is Mac platform
305 delete tls;
306}
307
11fdf7f2
TL
308ThreadLocalPtr::StaticMeta::StaticMeta()
309 : next_instance_id_(0),
310 head_(this),
311 pthread_key_(0) {
7c673cae
FG
312 if (pthread_key_create(&pthread_key_, &OnThreadExit) != 0) {
313 abort();
314 }
315
316 // OnThreadExit is not getting called on the main thread.
317 // Call through the static destructor mechanism to avoid memory leak.
318 //
319 // Caveats: ~A() will be invoked _after_ ~StaticMeta for the global
320 // singleton (destructors are invoked in reverse order of constructor
321 // _completion_); the latter must not mutate internal members. This
322 // cleanup mechanism inherently relies on use-after-release of the
323 // StaticMeta, and is brittle with respect to compiler-specific handling
324 // of memory backing destructed statically-scoped objects. Perhaps
325 // registering with atexit(3) would be more robust.
326 //
327// This is not required on Windows.
328#if !defined(OS_WIN)
329 static struct A {
330 ~A() {
331#ifndef ROCKSDB_SUPPORT_THREAD_LOCAL
332 ThreadData* tls_ =
333 static_cast<ThreadData*>(pthread_getspecific(Instance()->pthread_key_));
334#endif
335 if (tls_) {
336 OnThreadExit(tls_);
337 }
338 }
339 } a;
340#endif // !defined(OS_WIN)
341
342 head_.next = &head_;
343 head_.prev = &head_;
344
345#ifdef OS_WIN
346 // Share with Windows its cleanup routine and the key
347 wintlscleanup::thread_local_inclass_routine = OnThreadExit;
348 wintlscleanup::thread_local_key = pthread_key_;
349#endif
350}
351
352void ThreadLocalPtr::StaticMeta::AddThreadData(ThreadData* d) {
353 Mutex()->AssertHeld();
354 d->next = &head_;
355 d->prev = head_.prev;
356 head_.prev->next = d;
357 head_.prev = d;
358}
359
360void ThreadLocalPtr::StaticMeta::RemoveThreadData(
361 ThreadData* d) {
362 Mutex()->AssertHeld();
363 d->next->prev = d->prev;
364 d->prev->next = d->next;
365 d->next = d->prev = d;
366}
367
368ThreadData* ThreadLocalPtr::StaticMeta::GetThreadLocal() {
369#ifndef ROCKSDB_SUPPORT_THREAD_LOCAL
370 // Make this local variable name look like a member variable so that we
371 // can share all the code below
372 ThreadData* tls_ =
373 static_cast<ThreadData*>(pthread_getspecific(Instance()->pthread_key_));
374#endif
375
376 if (UNLIKELY(tls_ == nullptr)) {
377 auto* inst = Instance();
378 tls_ = new ThreadData(inst);
379 {
380 // Register it in the global chain, needs to be done before thread exit
381 // handler registration
382 MutexLock l(Mutex());
383 inst->AddThreadData(tls_);
384 }
385 // Even it is not OS_MACOSX, need to register value for pthread_key_ so that
386 // its exit handler will be triggered.
387 if (pthread_setspecific(inst->pthread_key_, tls_) != 0) {
388 {
389 MutexLock l(Mutex());
390 inst->RemoveThreadData(tls_);
391 }
392 delete tls_;
393 abort();
394 }
395 }
396 return tls_;
397}
398
399void* ThreadLocalPtr::StaticMeta::Get(uint32_t id) const {
400 auto* tls = GetThreadLocal();
401 if (UNLIKELY(id >= tls->entries.size())) {
402 return nullptr;
403 }
404 return tls->entries[id].ptr.load(std::memory_order_acquire);
405}
406
407void ThreadLocalPtr::StaticMeta::Reset(uint32_t id, void* ptr) {
408 auto* tls = GetThreadLocal();
409 if (UNLIKELY(id >= tls->entries.size())) {
410 // Need mutex to protect entries access within ReclaimId
411 MutexLock l(Mutex());
412 tls->entries.resize(id + 1);
413 }
414 tls->entries[id].ptr.store(ptr, std::memory_order_release);
415}
416
417void* ThreadLocalPtr::StaticMeta::Swap(uint32_t id, void* ptr) {
418 auto* tls = GetThreadLocal();
419 if (UNLIKELY(id >= tls->entries.size())) {
420 // Need mutex to protect entries access within ReclaimId
421 MutexLock l(Mutex());
422 tls->entries.resize(id + 1);
423 }
424 return tls->entries[id].ptr.exchange(ptr, std::memory_order_acquire);
425}
426
427bool ThreadLocalPtr::StaticMeta::CompareAndSwap(uint32_t id, void* ptr,
428 void*& expected) {
429 auto* tls = GetThreadLocal();
430 if (UNLIKELY(id >= tls->entries.size())) {
431 // Need mutex to protect entries access within ReclaimId
432 MutexLock l(Mutex());
433 tls->entries.resize(id + 1);
434 }
435 return tls->entries[id].ptr.compare_exchange_strong(
436 expected, ptr, std::memory_order_release, std::memory_order_relaxed);
437}
438
439void ThreadLocalPtr::StaticMeta::Scrape(uint32_t id, autovector<void*>* ptrs,
440 void* const replacement) {
441 MutexLock l(Mutex());
442 for (ThreadData* t = head_.next; t != &head_; t = t->next) {
443 if (id < t->entries.size()) {
444 void* ptr =
445 t->entries[id].ptr.exchange(replacement, std::memory_order_acquire);
446 if (ptr != nullptr) {
447 ptrs->push_back(ptr);
448 }
449 }
450 }
451}
452
453void ThreadLocalPtr::StaticMeta::Fold(uint32_t id, FoldFunc func, void* res) {
454 MutexLock l(Mutex());
455 for (ThreadData* t = head_.next; t != &head_; t = t->next) {
456 if (id < t->entries.size()) {
457 void* ptr = t->entries[id].ptr.load();
458 if (ptr != nullptr) {
459 func(ptr, res);
460 }
461 }
462 }
463}
464
465uint32_t ThreadLocalPtr::TEST_PeekId() {
466 return Instance()->PeekId();
467}
468
469void ThreadLocalPtr::StaticMeta::SetHandler(uint32_t id, UnrefHandler handler) {
470 MutexLock l(Mutex());
471 handler_map_[id] = handler;
472}
473
474UnrefHandler ThreadLocalPtr::StaticMeta::GetHandler(uint32_t id) {
475 Mutex()->AssertHeld();
476 auto iter = handler_map_.find(id);
477 if (iter == handler_map_.end()) {
478 return nullptr;
479 }
480 return iter->second;
481}
482
483uint32_t ThreadLocalPtr::StaticMeta::GetId() {
484 MutexLock l(Mutex());
485 if (free_instance_ids_.empty()) {
486 return next_instance_id_++;
487 }
488
489 uint32_t id = free_instance_ids_.back();
490 free_instance_ids_.pop_back();
491 return id;
492}
493
494uint32_t ThreadLocalPtr::StaticMeta::PeekId() const {
495 MutexLock l(Mutex());
496 if (!free_instance_ids_.empty()) {
497 return free_instance_ids_.back();
498 }
499 return next_instance_id_;
500}
501
502void ThreadLocalPtr::StaticMeta::ReclaimId(uint32_t id) {
503 // This id is not used, go through all thread local data and release
504 // corresponding value
505 MutexLock l(Mutex());
506 auto unref = GetHandler(id);
507 for (ThreadData* t = head_.next; t != &head_; t = t->next) {
508 if (id < t->entries.size()) {
509 void* ptr = t->entries[id].ptr.exchange(nullptr);
510 if (ptr != nullptr && unref != nullptr) {
511 unref(ptr);
512 }
513 }
514 }
515 handler_map_[id] = nullptr;
516 free_instance_ids_.push_back(id);
517}
518
519ThreadLocalPtr::ThreadLocalPtr(UnrefHandler handler)
520 : id_(Instance()->GetId()) {
521 if (handler != nullptr) {
522 Instance()->SetHandler(id_, handler);
523 }
524}
525
526ThreadLocalPtr::~ThreadLocalPtr() {
527 Instance()->ReclaimId(id_);
528}
529
530void* ThreadLocalPtr::Get() const {
531 return Instance()->Get(id_);
532}
533
534void ThreadLocalPtr::Reset(void* ptr) {
535 Instance()->Reset(id_, ptr);
536}
537
538void* ThreadLocalPtr::Swap(void* ptr) {
539 return Instance()->Swap(id_, ptr);
540}
541
542bool ThreadLocalPtr::CompareAndSwap(void* ptr, void*& expected) {
543 return Instance()->CompareAndSwap(id_, ptr, expected);
544}
545
546void ThreadLocalPtr::Scrape(autovector<void*>* ptrs, void* const replacement) {
547 Instance()->Scrape(id_, ptrs, replacement);
548}
549
550void ThreadLocalPtr::Fold(FoldFunc func, void* res) {
551 Instance()->Fold(id_, func, res);
552}
553
554} // namespace rocksdb