library/std/src/sys/windows/thread_local_key.rs

   1 use crate::mem;
   2 use crate::ptr;
   3 use crate::sync::atomic::AtomicPtr;
   4 use crate::sync::atomic::Ordering::SeqCst;
   5 use crate::sys::c;
   6
   7 pub type Key = c::DWORD;
   8 pub type Dtor = unsafe extern "C" fn(*mut u8);
   9
  10 // Turns out, like pretty much everything, Windows is pretty close the
  11 // functionality that Unix provides, but slightly different! In the case of
  12 // TLS, Windows does not provide an API to provide a destructor for a TLS
  13 // variable. This ends up being pretty crucial to this implementation, so we
  14 // need a way around this.
  15 //
  16 // The solution here ended up being a little obscure, but fear not, the
  17 // internet has informed me [1][2] that this solution is not unique (no way
  18 // I could have thought of it as well!). The key idea is to insert some hook
  19 // somewhere to run arbitrary code on thread termination. With this in place
  20 // we'll be able to run anything we like, including all TLS destructors!
  21 //
  22 // To accomplish this feat, we perform a number of threads, all contained
  23 // within this module:
  24 //
  25 // * All TLS destructors are tracked by *us*, not the windows runtime. This
  26 //   means that we have a global list of destructors for each TLS key that
  27 //   we know about.
  28 // * When a thread exits, we run over the entire list and run dtors for all
  29 //   non-null keys. This attempts to match Unix semantics in this regard.
  30 //
  31 // This ends up having the overhead of using a global list, having some
  32 // locks here and there, and in general just adding some more code bloat. We
  33 // attempt to optimize runtime by forgetting keys that don't have
  34 // destructors, but this only gets us so far.
  35 //
  36 // For more details and nitty-gritty, see the code sections below!
  37 //
  38 // [1]: http://www.codeproject.com/Articles/8113/Thread-Local-Storage-The-C-Way
  39 // [2]: https://github.com/ChromiumWebApps/chromium/blob/master/base
  40 //                        /threading/thread_local_storage_win.cc#L42
  41
  42 // -------------------------------------------------------------------------
  43 // Native bindings
  44 //
  45 // This section is just raw bindings to the native functions that Windows
  46 // provides, There's a few extra calls to deal with destructors.
  47
  48 #[inline]
  49 pub unsafe fn create(dtor: Option<Dtor>) -> Key {
  50     let key = c::TlsAlloc();
  51     assert!(key != c::TLS_OUT_OF_INDEXES);
  52     if let Some(f) = dtor {
  53         register_dtor(key, f);
  54     }
  55     key
  56 }
  57
  58 #[inline]
  59 pub unsafe fn set(key: Key, value: *mut u8) {
  60     let r = c::TlsSetValue(key, value as c::LPVOID);
  61     debug_assert!(r != 0);
  62 }
  63
  64 #[inline]
  65 pub unsafe fn get(key: Key) -> *mut u8 {
  66     c::TlsGetValue(key) as *mut u8
  67 }
  68
  69 #[inline]
  70 pub unsafe fn destroy(_key: Key) {
  71     rtabort!("can't destroy tls keys on windows")
  72 }
  73
  74 #[inline]
  75 pub fn requires_synchronized_create() -> bool {
  76     true
  77 }
  78
  79 // -------------------------------------------------------------------------
  80 // Dtor registration
  81 //
  82 // Windows has no native support for running destructors so we manage our own
  83 // list of destructors to keep track of how to destroy keys. We then install a
  84 // callback later to get invoked whenever a thread exits, running all
  85 // appropriate destructors.
  86 //
  87 // Currently unregistration from this list is not supported. A destructor can be
  88 // registered but cannot be unregistered. There's various simplifying reasons
  89 // for doing this, the big ones being:
  90 //
  91 // 1. Currently we don't even support deallocating TLS keys, so normal operation
  92 //    doesn't need to deallocate a destructor.
  93 // 2. There is no point in time where we know we can unregister a destructor
  94 //    because it could always be getting run by some remote thread.
  95 //
  96 // Typically processes have a statically known set of TLS keys which is pretty
  97 // small, and we'd want to keep this memory alive for the whole process anyway
  98 // really.
  99 //
 100 // Perhaps one day we can fold the `Box` here into a static allocation,
 101 // expanding the `StaticKey` structure to contain not only a slot for the TLS
 102 // key but also a slot for the destructor queue on windows. An optimization for
 103 // another day!
 104
 105 static DTORS: AtomicPtr<Node> = AtomicPtr::new(ptr::null_mut());
 106
 107 struct Node {
 108     dtor: Dtor,
 109     key: Key,
 110     next: *mut Node,
 111 }
 112
 113 #[cfg(miri)]
 114 extern "Rust" {
 115     /// Miri-provided extern function to mark the block `ptr` points to as a "root"
 116     /// for some static memory. This memory and everything reachable by it is not
 117     /// considered leaking even if it still exists when the program terminates.
 118     ///
 119     /// `ptr` has to point to the beginning of an allocated block.
 120     fn miri_static_root(ptr: *const u8);
 121 }
 122
 123 unsafe fn register_dtor(key: Key, dtor: Dtor) {
 124     let mut node = Box::new(Node { key, dtor, next: ptr::null_mut() });
 125
 126     let mut head = DTORS.load(SeqCst);
 127     loop {
 128         node.next = head;
 129         match DTORS.compare_exchange(head, &mut *node, SeqCst, SeqCst) {
 130             Ok(_) => {
 131                 #[cfg(miri)]
 132                 miri_static_root(&*node as *const _ as *const u8);
 133
 134                 mem::forget(node);
 135                 return;
 136             }
 137             Err(cur) => head = cur,
 138         }
 139     }
 140 }
 141
 142 // -------------------------------------------------------------------------
 143 // Where the Magic (TM) Happens
 144 //
 145 // If you're looking at this code, and wondering "what is this doing?",
 146 // you're not alone! I'll try to break this down step by step:
 147 //
 148 // # What's up with CRT$XLB?
 149 //
 150 // For anything about TLS destructors to work on Windows, we have to be able
 151 // to run *something* when a thread exits. To do so, we place a very special
 152 // static in a very special location. If this is encoded in just the right
 153 // way, the kernel's loader is apparently nice enough to run some function
 154 // of ours whenever a thread exits! How nice of the kernel!
 155 //
 156 // Lots of detailed information can be found in source [1] above, but the
 157 // gist of it is that this is leveraging a feature of Microsoft's PE format
 158 // (executable format) which is not actually used by any compilers today.
 159 // This apparently translates to any callbacks in the ".CRT$XLB" section
 160 // being run on certain events.
 161 //
 162 // So after all that, we use the compiler's #[link_section] feature to place
 163 // a callback pointer into the magic section so it ends up being called.
 164 //
 165 // # What's up with this callback?
 166 //
 167 // The callback specified receives a number of parameters from... someone!
 168 // (the kernel? the runtime? I'm not quite sure!) There are a few events that
 169 // this gets invoked for, but we're currently only interested on when a
 170 // thread or a process "detaches" (exits). The process part happens for the
 171 // last thread and the thread part happens for any normal thread.
 172 //
 173 // # Ok, what's up with running all these destructors?
 174 //
 175 // This will likely need to be improved over time, but this function
 176 // attempts a "poor man's" destructor callback system. Once we've got a list
 177 // of what to run, we iterate over all keys, check their values, and then run
 178 // destructors if the values turn out to be non null (setting them to null just
 179 // beforehand). We do this a few times in a loop to basically match Unix
 180 // semantics. If we don't reach a fixed point after a short while then we just
 181 // inevitably leak something most likely.
 182 //
 183 // # The article mentions weird stuff about "/INCLUDE"?
 184 //
 185 // It sure does! Specifically we're talking about this quote:
 186 //
 187 //      The Microsoft run-time library facilitates this process by defining a
 188 //      memory image of the TLS Directory and giving it the special name
 189 //      “__tls_used” (Intel x86 platforms) or “_tls_used” (other platforms). The
 190 //      linker looks for this memory image and uses the data there to create the
 191 //      TLS Directory. Other compilers that support TLS and work with the
 192 //      Microsoft linker must use this same technique.
 193 //
 194 // Basically what this means is that if we want support for our TLS
 195 // destructors/our hook being called then we need to make sure the linker does
 196 // not omit this symbol. Otherwise it will omit it and our callback won't be
 197 // wired up.
 198 //
 199 // We don't actually use the `/INCLUDE` linker flag here like the article
 200 // mentions because the Rust compiler doesn't propagate linker flags, but
 201 // instead we use a shim function which performs a volatile 1-byte load from
 202 // the address of the symbol to ensure it sticks around.
 203
 204 #[link_section = ".CRT$XLB"]
 205 #[allow(dead_code, unused_variables)]
 206 #[used] // we don't want LLVM eliminating this symbol for any reason, and
 207 // when the symbol makes it to the linker the linker will take over
 208 pub static p_thread_callback: unsafe extern "system" fn(c::LPVOID, c::DWORD, c::LPVOID) =
 209     on_tls_callback;
 210
 211 #[allow(dead_code, unused_variables)]
 212 unsafe extern "system" fn on_tls_callback(h: c::LPVOID, dwReason: c::DWORD, pv: c::LPVOID) {
 213     if dwReason == c::DLL_THREAD_DETACH || dwReason == c::DLL_PROCESS_DETACH {
 214         run_dtors();
 215     }
 216
 217     // See comments above for what this is doing. Note that we don't need this
 218     // trickery on GNU windows, just on MSVC.
 219     reference_tls_used();
 220     #[cfg(target_env = "msvc")]
 221     unsafe fn reference_tls_used() {
 222         extern "C" {
 223             static _tls_used: u8;
 224         }
 225         crate::intrinsics::volatile_load(&_tls_used);
 226     }
 227     #[cfg(not(target_env = "msvc"))]
 228     unsafe fn reference_tls_used() {}
 229 }
 230
 231 #[allow(dead_code)] // actually called above
 232 unsafe fn run_dtors() {
 233     let mut any_run = true;
 234     for _ in 0..5 {
 235         if !any_run {
 236             break;
 237         }
 238         any_run = false;
 239         let mut cur = DTORS.load(SeqCst);
 240         while !cur.is_null() {
 241             let ptr = c::TlsGetValue((*cur).key);
 242
 243             if !ptr.is_null() {
 244                 c::TlsSetValue((*cur).key, ptr::null_mut());
 245                 ((*cur).dtor)(ptr as *mut _);
 246                 any_run = true;
 247             }
 248
 249             cur = (*cur).next;
 250         }
 251     }
 252 }