]>
Commit | Line | Data |
---|---|---|
1a4d82fc JJ |
1 | // Copyright 2014 The Rust Project Developers. See the COPYRIGHT |
2 | // file at the top-level directory of this distribution and at | |
3 | // http://rust-lang.org/COPYRIGHT. | |
4 | // | |
5 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or | |
6 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license | |
7 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your | |
8 | // option. This file may not be copied, modified, or distributed | |
9 | // except according to those terms. | |
10 | ||
7cac9316 | 11 | use mem; |
85aaf69f | 12 | use ptr; |
7cac9316 XL |
13 | use sync::atomic::AtomicPtr; |
14 | use sync::atomic::Ordering::SeqCst; | |
92a42be0 | 15 | use sys::c; |
1a4d82fc | 16 | |
92a42be0 | 17 | pub type Key = c::DWORD; |
1a4d82fc JJ |
18 | pub type Dtor = unsafe extern fn(*mut u8); |
19 | ||
20 | // Turns out, like pretty much everything, Windows is pretty close the | |
21 | // functionality that Unix provides, but slightly different! In the case of | |
22 | // TLS, Windows does not provide an API to provide a destructor for a TLS | |
23 | // variable. This ends up being pretty crucial to this implementation, so we | |
24 | // need a way around this. | |
25 | // | |
26 | // The solution here ended up being a little obscure, but fear not, the | |
27 | // internet has informed me [1][2] that this solution is not unique (no way | |
28 | // I could have thought of it as well!). The key idea is to insert some hook | |
29 | // somewhere to run arbitrary code on thread termination. With this in place | |
30 | // we'll be able to run anything we like, including all TLS destructors! | |
31 | // | |
bd371182 | 32 | // To accomplish this feat, we perform a number of threads, all contained |
1a4d82fc JJ |
33 | // within this module: |
34 | // | |
35 | // * All TLS destructors are tracked by *us*, not the windows runtime. This | |
36 | // means that we have a global list of destructors for each TLS key that | |
37 | // we know about. | |
1a4d82fc JJ |
38 | // * When a thread exits, we run over the entire list and run dtors for all |
39 | // non-null keys. This attempts to match Unix semantics in this regard. | |
40 | // | |
41 | // This ends up having the overhead of using a global list, having some | |
42 | // locks here and there, and in general just adding some more code bloat. We | |
43 | // attempt to optimize runtime by forgetting keys that don't have | |
44 | // destructors, but this only gets us so far. | |
45 | // | |
46 | // For more details and nitty-gritty, see the code sections below! | |
47 | // | |
48 | // [1]: http://www.codeproject.com/Articles/8113/Thread-Local-Storage-The-C-Way | |
49 | // [2]: https://github.com/ChromiumWebApps/chromium/blob/master/base | |
50 | // /threading/thread_local_storage_win.cc#L42 | |
51 | ||
1a4d82fc JJ |
52 | // ------------------------------------------------------------------------- |
53 | // Native bindings | |
54 | // | |
55 | // This section is just raw bindings to the native functions that Windows | |
56 | // provides, There's a few extra calls to deal with destructors. | |
57 | ||
58 | #[inline] | |
59 | pub unsafe fn create(dtor: Option<Dtor>) -> Key { | |
92a42be0 SL |
60 | let key = c::TlsAlloc(); |
61 | assert!(key != c::TLS_OUT_OF_INDEXES); | |
7453a54e SL |
62 | if let Some(f) = dtor { |
63 | register_dtor(key, f); | |
1a4d82fc JJ |
64 | } |
65 | return key; | |
66 | } | |
67 | ||
68 | #[inline] | |
69 | pub unsafe fn set(key: Key, value: *mut u8) { | |
92a42be0 | 70 | let r = c::TlsSetValue(key, value as c::LPVOID); |
1a4d82fc JJ |
71 | debug_assert!(r != 0); |
72 | } | |
73 | ||
74 | #[inline] | |
75 | pub unsafe fn get(key: Key) -> *mut u8 { | |
92a42be0 | 76 | c::TlsGetValue(key) as *mut u8 |
1a4d82fc JJ |
77 | } |
78 | ||
79 | #[inline] | |
7cac9316 XL |
80 | pub unsafe fn destroy(_key: Key) { |
81 | rtabort!("can't destroy tls keys on windows") | |
82 | } | |
83 | ||
84 | #[inline] | |
85 | pub fn requires_synchronized_create() -> bool { | |
86 | true | |
1a4d82fc JJ |
87 | } |
88 | ||
1a4d82fc JJ |
89 | // ------------------------------------------------------------------------- |
90 | // Dtor registration | |
91 | // | |
7cac9316 XL |
92 | // Windows has no native support for running destructors so we manage our own |
93 | // list of destructors to keep track of how to destroy keys. We then install a | |
94 | // callback later to get invoked whenever a thread exits, running all | |
95 | // appropriate destructors. | |
1a4d82fc | 96 | // |
7cac9316 XL |
97 | // Currently unregistration from this list is not supported. A destructor can be |
98 | // registered but cannot be unregistered. There's various simplifying reasons | |
99 | // for doing this, the big ones being: | |
100 | // | |
101 | // 1. Currently we don't even support deallocating TLS keys, so normal operation | |
102 | // doesn't need to deallocate a destructor. | |
103 | // 2. There is no point in time where we know we can unregister a destructor | |
104 | // because it could always be getting run by some remote thread. | |
105 | // | |
106 | // Typically processes have a statically known set of TLS keys which is pretty | |
107 | // small, and we'd want to keep this memory alive for the whole process anyway | |
108 | // really. | |
109 | // | |
110 | // Perhaps one day we can fold the `Box` here into a static allocation, | |
111 | // expanding the `StaticKey` structure to contain not only a slot for the TLS | |
112 | // key but also a slot for the destructor queue on windows. An optimization for | |
113 | // another day! | |
1a4d82fc | 114 | |
7cac9316 | 115 | static DTORS: AtomicPtr<Node> = AtomicPtr::new(ptr::null_mut()); |
1a4d82fc | 116 | |
7cac9316 XL |
117 | struct Node { |
118 | dtor: Dtor, | |
119 | key: Key, | |
120 | next: *mut Node, | |
1a4d82fc JJ |
121 | } |
122 | ||
123 | unsafe fn register_dtor(key: Key, dtor: Dtor) { | |
7cac9316 XL |
124 | let mut node = Box::new(Node { |
125 | key: key, | |
126 | dtor: dtor, | |
127 | next: ptr::null_mut(), | |
128 | }); | |
1a4d82fc | 129 | |
7cac9316 XL |
130 | let mut head = DTORS.load(SeqCst); |
131 | loop { | |
132 | node.next = head; | |
133 | match DTORS.compare_exchange(head, &mut *node, SeqCst, SeqCst) { | |
134 | Ok(_) => return mem::forget(node), | |
135 | Err(cur) => head = cur, | |
136 | } | |
137 | } | |
1a4d82fc JJ |
138 | } |
139 | ||
140 | // ------------------------------------------------------------------------- | |
141 | // Where the Magic (TM) Happens | |
142 | // | |
143 | // If you're looking at this code, and wondering "what is this doing?", | |
144 | // you're not alone! I'll try to break this down step by step: | |
145 | // | |
146 | // # What's up with CRT$XLB? | |
147 | // | |
148 | // For anything about TLS destructors to work on Windows, we have to be able | |
149 | // to run *something* when a thread exits. To do so, we place a very special | |
150 | // static in a very special location. If this is encoded in just the right | |
151 | // way, the kernel's loader is apparently nice enough to run some function | |
152 | // of ours whenever a thread exits! How nice of the kernel! | |
153 | // | |
154 | // Lots of detailed information can be found in source [1] above, but the | |
155 | // gist of it is that this is leveraging a feature of Microsoft's PE format | |
156 | // (executable format) which is not actually used by any compilers today. | |
157 | // This apparently translates to any callbacks in the ".CRT$XLB" section | |
158 | // being run on certain events. | |
159 | // | |
160 | // So after all that, we use the compiler's #[link_section] feature to place | |
161 | // a callback pointer into the magic section so it ends up being called. | |
162 | // | |
163 | // # What's up with this callback? | |
164 | // | |
165 | // The callback specified receives a number of parameters from... someone! | |
85aaf69f | 166 | // (the kernel? the runtime? I'm not quite sure!) There are a few events that |
1a4d82fc JJ |
167 | // this gets invoked for, but we're currently only interested on when a |
168 | // thread or a process "detaches" (exits). The process part happens for the | |
169 | // last thread and the thread part happens for any normal thread. | |
170 | // | |
171 | // # Ok, what's up with running all these destructors? | |
172 | // | |
173 | // This will likely need to be improved over time, but this function | |
7cac9316 XL |
174 | // attempts a "poor man's" destructor callback system. Once we've got a list |
175 | // of what to run, we iterate over all keys, check their values, and then run | |
176 | // destructors if the values turn out to be non null (setting them to null just | |
177 | // beforehand). We do this a few times in a loop to basically match Unix | |
178 | // semantics. If we don't reach a fixed point after a short while then we just | |
179 | // inevitably leak something most likely. | |
1a4d82fc | 180 | // |
32a655c1 | 181 | // # The article mentions weird stuff about "/INCLUDE"? |
1a4d82fc | 182 | // |
e9174d1e SL |
183 | // It sure does! Specifically we're talking about this quote: |
184 | // | |
185 | // The Microsoft run-time library facilitates this process by defining a | |
186 | // memory image of the TLS Directory and giving it the special name | |
187 | // “__tls_used” (Intel x86 platforms) or “_tls_used” (other platforms). The | |
188 | // linker looks for this memory image and uses the data there to create the | |
189 | // TLS Directory. Other compilers that support TLS and work with the | |
190 | // Microsoft linker must use this same technique. | |
191 | // | |
192 | // Basically what this means is that if we want support for our TLS | |
193 | // destructors/our hook being called then we need to make sure the linker does | |
194 | // not omit this symbol. Otherwise it will omit it and our callback won't be | |
195 | // wired up. | |
196 | // | |
197 | // We don't actually use the `/INCLUDE` linker flag here like the article | |
198 | // mentions because the Rust compiler doesn't propagate linker flags, but | |
199 | // instead we use a shim function which performs a volatile 1-byte load from | |
200 | // the address of the symbol to ensure it sticks around. | |
1a4d82fc JJ |
201 | |
202 | #[link_section = ".CRT$XLB"] | |
203 | #[linkage = "external"] | |
7453a54e | 204 | #[allow(dead_code, unused_variables)] |
92a42be0 SL |
205 | pub static p_thread_callback: unsafe extern "system" fn(c::LPVOID, c::DWORD, |
206 | c::LPVOID) = | |
1a4d82fc JJ |
207 | on_tls_callback; |
208 | ||
7453a54e | 209 | #[allow(dead_code, unused_variables)] |
92a42be0 SL |
210 | unsafe extern "system" fn on_tls_callback(h: c::LPVOID, |
211 | dwReason: c::DWORD, | |
212 | pv: c::LPVOID) { | |
213 | if dwReason == c::DLL_THREAD_DETACH || dwReason == c::DLL_PROCESS_DETACH { | |
1a4d82fc JJ |
214 | run_dtors(); |
215 | } | |
e9174d1e SL |
216 | |
217 | // See comments above for what this is doing. Note that we don't need this | |
218 | // trickery on GNU windows, just on MSVC. | |
219 | reference_tls_used(); | |
220 | #[cfg(target_env = "msvc")] | |
221 | unsafe fn reference_tls_used() { | |
222 | extern { static _tls_used: u8; } | |
223 | ::intrinsics::volatile_load(&_tls_used); | |
224 | } | |
225 | #[cfg(not(target_env = "msvc"))] | |
226 | unsafe fn reference_tls_used() {} | |
1a4d82fc JJ |
227 | } |
228 | ||
85aaf69f | 229 | #[allow(dead_code)] // actually called above |
1a4d82fc JJ |
230 | unsafe fn run_dtors() { |
231 | let mut any_run = true; | |
85aaf69f | 232 | for _ in 0..5 { |
7cac9316 XL |
233 | if !any_run { |
234 | break | |
235 | } | |
1a4d82fc | 236 | any_run = false; |
7cac9316 XL |
237 | let mut cur = DTORS.load(SeqCst); |
238 | while !cur.is_null() { | |
239 | let ptr = c::TlsGetValue((*cur).key); | |
240 | ||
1a4d82fc | 241 | if !ptr.is_null() { |
7cac9316 XL |
242 | c::TlsSetValue((*cur).key, ptr::null_mut()); |
243 | ((*cur).dtor)(ptr as *mut _); | |
1a4d82fc JJ |
244 | any_run = true; |
245 | } | |
7cac9316 XL |
246 | |
247 | cur = (*cur).next; | |
1a4d82fc JJ |
248 | } |
249 | } | |
250 | } |