library/std/src/sys/windows/thread_local_key.rs

   1 use crate::mem::ManuallyDrop;
   2 use crate::ptr;
   3 use crate::sync::atomic::AtomicPtr;
   4 use crate::sync::atomic::Ordering::SeqCst;
   5 use crate::sys::c;
   6
   7 pub type Key = c::DWORD;
   8 pub type Dtor = unsafe extern "C" fn(*mut u8);
   9
  10 // Turns out, like pretty much everything, Windows is pretty close the
  11 // functionality that Unix provides, but slightly different! In the case of
  12 // TLS, Windows does not provide an API to provide a destructor for a TLS
  13 // variable. This ends up being pretty crucial to this implementation, so we
  14 // need a way around this.
  15 //
  16 // The solution here ended up being a little obscure, but fear not, the
  17 // internet has informed me [1][2] that this solution is not unique (no way
  18 // I could have thought of it as well!). The key idea is to insert some hook
  19 // somewhere to run arbitrary code on thread termination. With this in place
  20 // we'll be able to run anything we like, including all TLS destructors!
  21 //
  22 // To accomplish this feat, we perform a number of threads, all contained
  23 // within this module:
  24 //
  25 // * All TLS destructors are tracked by *us*, not the windows runtime. This
  26 //   means that we have a global list of destructors for each TLS key that
  27 //   we know about.
  28 // * When a thread exits, we run over the entire list and run dtors for all
  29 //   non-null keys. This attempts to match Unix semantics in this regard.
  30 //
  31 // This ends up having the overhead of using a global list, having some
  32 // locks here and there, and in general just adding some more code bloat. We
  33 // attempt to optimize runtime by forgetting keys that don't have
  34 // destructors, but this only gets us so far.
  35 //
  36 // For more details and nitty-gritty, see the code sections below!
  37 //
  38 // [1]: https://www.codeproject.com/Articles/8113/Thread-Local-Storage-The-C-Way
  39 // [2]: https://github.com/ChromiumWebApps/chromium/blob/master/base
  40 //                        /threading/thread_local_storage_win.cc#L42
  41
  42 // -------------------------------------------------------------------------
  43 // Native bindings
  44 //
  45 // This section is just raw bindings to the native functions that Windows
  46 // provides, There's a few extra calls to deal with destructors.
  47
  48 #[inline]
  49 pub unsafe fn create(dtor: Option<Dtor>) -> Key {
  50     let key = c::TlsAlloc();
  51     assert!(key != c::TLS_OUT_OF_INDEXES);
  52     if let Some(f) = dtor {
  53         register_dtor(key, f);
  54     }
  55     key
  56 }
  57
  58 #[inline]
  59 pub unsafe fn set(key: Key, value: *mut u8) {
  60     let r = c::TlsSetValue(key, value as c::LPVOID);
  61     debug_assert!(r != 0);
  62 }
  63
  64 #[inline]
  65 pub unsafe fn get(key: Key) -> *mut u8 {
  66     c::TlsGetValue(key) as *mut u8
  67 }
  68
  69 #[inline]
  70 pub unsafe fn destroy(_key: Key) {
  71     rtabort!("can't destroy tls keys on windows")
  72 }
  73
  74 #[inline]
  75 pub fn requires_synchronized_create() -> bool {
  76     true
  77 }
  78
  79 // -------------------------------------------------------------------------
  80 // Dtor registration
  81 //
  82 // Windows has no native support for running destructors so we manage our own
  83 // list of destructors to keep track of how to destroy keys. We then install a
  84 // callback later to get invoked whenever a thread exits, running all
  85 // appropriate destructors.
  86 //
  87 // Currently unregistration from this list is not supported. A destructor can be
  88 // registered but cannot be unregistered. There's various simplifying reasons
  89 // for doing this, the big ones being:
  90 //
  91 // 1. Currently we don't even support deallocating TLS keys, so normal operation
  92 //    doesn't need to deallocate a destructor.
  93 // 2. There is no point in time where we know we can unregister a destructor
  94 //    because it could always be getting run by some remote thread.
  95 //
  96 // Typically processes have a statically known set of TLS keys which is pretty
  97 // small, and we'd want to keep this memory alive for the whole process anyway
  98 // really.
  99 //
 100 // Perhaps one day we can fold the `Box` here into a static allocation,
 101 // expanding the `StaticKey` structure to contain not only a slot for the TLS
 102 // key but also a slot for the destructor queue on windows. An optimization for
 103 // another day!
 104
 105 static DTORS: AtomicPtr<Node> = AtomicPtr::new(ptr::null_mut());
 106
 107 struct Node {
 108     dtor: Dtor,
 109     key: Key,
 110     next: *mut Node,
 111 }
 112
 113 unsafe fn register_dtor(key: Key, dtor: Dtor) {
 114     let mut node = ManuallyDrop::new(Box::new(Node { key, dtor, next: ptr::null_mut() }));
 115
 116     let mut head = DTORS.load(SeqCst);
 117     loop {
 118         node.next = head;
 119         match DTORS.compare_exchange(head, &mut **node, SeqCst, SeqCst) {
 120             Ok(_) => return, // nothing to drop, we successfully added the node to the list
 121             Err(cur) => head = cur,
 122         }
 123     }
 124 }
 125
 126 // -------------------------------------------------------------------------
 127 // Where the Magic (TM) Happens
 128 //
 129 // If you're looking at this code, and wondering "what is this doing?",
 130 // you're not alone! I'll try to break this down step by step:
 131 //
 132 // # What's up with CRT$XLB?
 133 //
 134 // For anything about TLS destructors to work on Windows, we have to be able
 135 // to run *something* when a thread exits. To do so, we place a very special
 136 // static in a very special location. If this is encoded in just the right
 137 // way, the kernel's loader is apparently nice enough to run some function
 138 // of ours whenever a thread exits! How nice of the kernel!
 139 //
 140 // Lots of detailed information can be found in source [1] above, but the
 141 // gist of it is that this is leveraging a feature of Microsoft's PE format
 142 // (executable format) which is not actually used by any compilers today.
 143 // This apparently translates to any callbacks in the ".CRT$XLB" section
 144 // being run on certain events.
 145 //
 146 // So after all that, we use the compiler's #[link_section] feature to place
 147 // a callback pointer into the magic section so it ends up being called.
 148 //
 149 // # What's up with this callback?
 150 //
 151 // The callback specified receives a number of parameters from... someone!
 152 // (the kernel? the runtime? I'm not quite sure!) There are a few events that
 153 // this gets invoked for, but we're currently only interested on when a
 154 // thread or a process "detaches" (exits). The process part happens for the
 155 // last thread and the thread part happens for any normal thread.
 156 //
 157 // # Ok, what's up with running all these destructors?
 158 //
 159 // This will likely need to be improved over time, but this function
 160 // attempts a "poor man's" destructor callback system. Once we've got a list
 161 // of what to run, we iterate over all keys, check their values, and then run
 162 // destructors if the values turn out to be non null (setting them to null just
 163 // beforehand). We do this a few times in a loop to basically match Unix
 164 // semantics. If we don't reach a fixed point after a short while then we just
 165 // inevitably leak something most likely.
 166 //
 167 // # The article mentions weird stuff about "/INCLUDE"?
 168 //
 169 // It sure does! Specifically we're talking about this quote:
 170 //
 171 //      The Microsoft run-time library facilitates this process by defining a
 172 //      memory image of the TLS Directory and giving it the special name
 173 //      “__tls_used” (Intel x86 platforms) or “_tls_used” (other platforms). The
 174 //      linker looks for this memory image and uses the data there to create the
 175 //      TLS Directory. Other compilers that support TLS and work with the
 176 //      Microsoft linker must use this same technique.
 177 //
 178 // Basically what this means is that if we want support for our TLS
 179 // destructors/our hook being called then we need to make sure the linker does
 180 // not omit this symbol. Otherwise it will omit it and our callback won't be
 181 // wired up.
 182 //
 183 // We don't actually use the `/INCLUDE` linker flag here like the article
 184 // mentions because the Rust compiler doesn't propagate linker flags, but
 185 // instead we use a shim function which performs a volatile 1-byte load from
 186 // the address of the symbol to ensure it sticks around.
 187
 188 #[link_section = ".CRT$XLB"]
 189 #[allow(dead_code, unused_variables)]
 190 #[used] // we don't want LLVM eliminating this symbol for any reason, and
 191 // when the symbol makes it to the linker the linker will take over
 192 pub static p_thread_callback: unsafe extern "system" fn(c::LPVOID, c::DWORD, c::LPVOID) =
 193     on_tls_callback;
 194
 195 #[allow(dead_code, unused_variables)]
 196 unsafe extern "system" fn on_tls_callback(h: c::LPVOID, dwReason: c::DWORD, pv: c::LPVOID) {
 197     if dwReason == c::DLL_THREAD_DETACH || dwReason == c::DLL_PROCESS_DETACH {
 198         run_dtors();
 199     }
 200
 201     // See comments above for what this is doing. Note that we don't need this
 202     // trickery on GNU windows, just on MSVC.
 203     reference_tls_used();
 204     #[cfg(target_env = "msvc")]
 205     unsafe fn reference_tls_used() {
 206         extern "C" {
 207             static _tls_used: u8;
 208         }
 209         crate::intrinsics::volatile_load(&_tls_used);
 210     }
 211     #[cfg(not(target_env = "msvc"))]
 212     unsafe fn reference_tls_used() {}
 213 }
 214
 215 #[allow(dead_code)] // actually called above
 216 unsafe fn run_dtors() {
 217     let mut any_run = true;
 218     for _ in 0..5 {
 219         if !any_run {
 220             break;
 221         }
 222         any_run = false;
 223         let mut cur = DTORS.load(SeqCst);
 224         while !cur.is_null() {
 225             let ptr = c::TlsGetValue((*cur).key);
 226
 227             if !ptr.is_null() {
 228                 c::TlsSetValue((*cur).key, ptr::null_mut());
 229                 ((*cur).dtor)(ptr as *mut _);
 230                 any_run = true;
 231             }
 232
 233             cur = (*cur).next;
 234         }
 235     }
 236 }