]>
Commit | Line | Data |
---|---|---|
f67539c2 TL |
1 | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
2 | // This source code is licensed under both the GPLv2 (found in the | |
3 | // COPYING file in the root directory) and Apache 2.0 License | |
4 | // (found in the LICENSE.Apache file in the root directory). | |
5 | ||
6 | #pragma once | |
7 | ||
8 | #include <assert.h> | |
9 | #include <errno.h> | |
10 | #include <stdint.h> | |
11 | #include <atomic> | |
12 | #include <thread> | |
13 | ||
14 | #include <folly/detail/Futex.h> | |
15 | #include <folly/portability/Asm.h> | |
16 | #include <folly/synchronization/WaitOptions.h> | |
17 | #include <folly/synchronization/detail/Spin.h> | |
18 | ||
19 | namespace folly { | |
20 | ||
21 | /// A Baton allows a thread to block once and be awoken. Captures a | |
22 | /// single handoff, and during its lifecycle (from construction/reset | |
23 | /// to destruction/reset) a baton must either be post()ed and wait()ed | |
24 | /// exactly once each, or not at all. | |
25 | /// | |
26 | /// Baton includes no internal padding, and is only 4 bytes in size. | |
27 | /// Any alignment or padding to avoid false sharing is up to the user. | |
28 | /// | |
29 | /// This is basically a stripped-down semaphore that supports only a | |
30 | /// single call to sem_post and a single call to sem_wait. | |
31 | /// | |
32 | /// The non-blocking version (MayBlock == false) provides more speed | |
33 | /// by using only load acquire and store release operations in the | |
34 | /// critical path, at the cost of disallowing blocking. | |
35 | /// | |
36 | /// The current posix semaphore sem_t isn't too bad, but this provides | |
37 | /// more a bit more speed, inlining, smaller size, a guarantee that | |
38 | /// the implementation won't change, and compatibility with | |
39 | /// DeterministicSchedule. By having a much more restrictive | |
40 | /// lifecycle we can also add a bunch of assertions that can help to | |
41 | /// catch race conditions ahead of time. | |
42 | template <bool MayBlock = true, template <typename> class Atom = std::atomic> | |
43 | class Baton { | |
44 | public: | |
45 | static constexpr WaitOptions wait_options() { | |
46 | return {}; | |
47 | } | |
48 | ||
49 | constexpr Baton() noexcept : state_(INIT) {} | |
50 | ||
51 | Baton(Baton const&) = delete; | |
52 | Baton& operator=(Baton const&) = delete; | |
53 | ||
54 | /// It is an error to destroy a Baton on which a thread is currently | |
55 | /// wait()ing. In practice this means that the waiter usually takes | |
56 | /// responsibility for destroying the Baton. | |
57 | ~Baton() noexcept { | |
58 | // The docblock for this function says that it can't be called when | |
59 | // there is a concurrent waiter. We assume a strong version of this | |
60 | // requirement in which the caller must _know_ that this is true, they | |
61 | // are not allowed to be merely lucky. If two threads are involved, | |
62 | // the destroying thread must actually have synchronized with the | |
63 | // waiting thread after wait() returned. To convey causality the the | |
64 | // waiting thread must have used release semantics and the destroying | |
65 | // thread must have used acquire semantics for that communication, | |
66 | // so we are guaranteed to see the post-wait() value of state_, | |
67 | // which cannot be WAITING. | |
68 | // | |
69 | // Note that since we only care about a single memory location, | |
70 | // the only two plausible memory orders here are relaxed and seq_cst. | |
71 | assert(state_.load(std::memory_order_relaxed) != WAITING); | |
72 | } | |
73 | ||
74 | bool ready() const noexcept { | |
75 | auto s = state_.load(std::memory_order_acquire); | |
76 | assert(s == INIT || s == EARLY_DELIVERY); | |
77 | return (s == EARLY_DELIVERY); | |
78 | } | |
79 | ||
80 | /// Equivalent to destroying the Baton and creating a new one. It is | |
81 | /// a bug to call this while there is a waiting thread, so in practice | |
82 | /// the waiter will be the one that resets the baton. | |
83 | void reset() noexcept { | |
84 | // See ~Baton for a discussion about why relaxed is okay here | |
85 | assert(state_.load(std::memory_order_relaxed) != WAITING); | |
86 | ||
87 | // We use a similar argument to justify the use of a relaxed store | |
88 | // here. Since both wait() and post() are required to be called | |
89 | // only once per lifetime, no thread can actually call those methods | |
90 | // correctly after a reset() unless it synchronizes with the thread | |
91 | // that performed the reset(). If a post() or wait() on another thread | |
92 | // didn't synchronize, then regardless of what operation we performed | |
93 | // here there would be a race on proper use of the Baton's spec | |
94 | // (although not on any particular load and store). Put another way, | |
95 | // we don't need to synchronize here because anybody that might rely | |
96 | // on such synchronization is required by the baton rules to perform | |
97 | // an additional synchronization that has the desired effect anyway. | |
98 | // | |
99 | // There is actually a similar argument to be made about the | |
100 | // constructor, in which the fenceless constructor initialization | |
101 | // of state_ is piggybacked on whatever synchronization mechanism | |
102 | // distributes knowledge of the Baton's existence | |
103 | state_.store(INIT, std::memory_order_relaxed); | |
104 | } | |
105 | ||
106 | /// Causes wait() to wake up. For each lifetime of a Baton (where a | |
107 | /// lifetime starts at construction or reset() and ends at | |
108 | /// destruction or reset()) there can be at most one call to post(), | |
109 | /// in the single poster version. Any thread may call post(). | |
110 | void post() noexcept { | |
111 | if (!MayBlock) { | |
112 | /// Spin-only version | |
113 | /// | |
114 | assert( | |
115 | ((1 << state_.load(std::memory_order_relaxed)) & | |
116 | ((1 << INIT) | (1 << EARLY_DELIVERY))) != 0); | |
117 | state_.store(EARLY_DELIVERY, std::memory_order_release); | |
118 | return; | |
119 | } | |
120 | ||
121 | /// May-block versions | |
122 | /// | |
123 | uint32_t before = state_.load(std::memory_order_acquire); | |
124 | ||
125 | assert(before == INIT || before == WAITING || before == TIMED_OUT); | |
126 | ||
127 | if (before == INIT && | |
128 | state_.compare_exchange_strong( | |
129 | before, | |
130 | EARLY_DELIVERY, | |
131 | std::memory_order_release, | |
132 | std::memory_order_relaxed)) { | |
133 | return; | |
134 | } | |
135 | ||
136 | assert(before == WAITING || before == TIMED_OUT); | |
137 | ||
138 | if (before == TIMED_OUT) { | |
139 | return; | |
140 | } | |
141 | ||
142 | assert(before == WAITING); | |
143 | state_.store(LATE_DELIVERY, std::memory_order_release); | |
144 | detail::futexWake(&state_, 1); | |
145 | } | |
146 | ||
147 | /// Waits until post() has been called in the current Baton lifetime. | |
148 | /// May be called at most once during a Baton lifetime (construction | |
149 | /// |reset until destruction|reset). If post is called before wait in | |
150 | /// the current lifetime then this method returns immediately. | |
151 | /// | |
152 | /// The restriction that there can be at most one wait() per lifetime | |
153 | /// could be relaxed somewhat without any perf or size regressions, | |
154 | /// but by making this condition very restrictive we can provide better | |
155 | /// checking in debug builds. | |
156 | void wait(const WaitOptions& opt = wait_options()) noexcept { | |
157 | if (try_wait()) { | |
158 | return; | |
159 | } | |
160 | ||
161 | auto const deadline = std::chrono::steady_clock::time_point::max(); | |
162 | tryWaitSlow(deadline, opt); | |
163 | } | |
164 | ||
165 | /// Similar to wait, but doesn't block the thread if it hasn't been posted. | |
166 | /// | |
167 | /// try_wait has the following semantics: | |
168 | /// - It is ok to call try_wait any number times on the same baton until | |
169 | /// try_wait reports that the baton has been posted. | |
170 | /// - It is ok to call timed_wait or wait on the same baton if try_wait | |
171 | /// reports that baton hasn't been posted. | |
172 | /// - If try_wait indicates that the baton has been posted, it is invalid to | |
173 | /// call wait, try_wait or timed_wait on the same baton without resetting | |
174 | /// | |
175 | /// @return true if baton has been posted, false othewise | |
176 | bool try_wait() const noexcept { | |
177 | return ready(); | |
178 | } | |
179 | ||
180 | /// Similar to wait, but with a timeout. The thread is unblocked if the | |
181 | /// timeout expires. | |
182 | /// Note: Only a single call to wait/try_wait_for/try_wait_until is allowed | |
183 | /// during a baton's life-cycle (from ctor/reset to dtor/reset). In other | |
184 | /// words, after try_wait_for the caller can't invoke | |
185 | /// wait/try_wait/try_wait_for/try_wait_until | |
186 | /// again on the same baton without resetting it. | |
187 | /// | |
188 | /// @param timeout Time until which the thread can block | |
189 | /// @return true if the baton was posted to before timeout, | |
190 | /// false otherwise | |
191 | template <typename Rep, typename Period> | |
192 | bool try_wait_for( | |
193 | const std::chrono::duration<Rep, Period>& timeout, | |
194 | const WaitOptions& opt = wait_options()) noexcept { | |
195 | if (try_wait()) { | |
196 | return true; | |
197 | } | |
198 | ||
199 | auto const deadline = std::chrono::steady_clock::now() + timeout; | |
200 | return tryWaitSlow(deadline, opt); | |
201 | } | |
202 | ||
203 | /// Similar to wait, but with a deadline. The thread is unblocked if the | |
204 | /// deadline expires. | |
205 | /// Note: Only a single call to wait/try_wait_for/try_wait_until is allowed | |
206 | /// during a baton's life-cycle (from ctor/reset to dtor/reset). In other | |
207 | /// words, after try_wait_until the caller can't invoke | |
208 | /// wait/try_wait/try_wait_for/try_wait_until | |
209 | /// again on the same baton without resetting it. | |
210 | /// | |
211 | /// @param deadline Time until which the thread can block | |
212 | /// @return true if the baton was posted to before deadline, | |
213 | /// false otherwise | |
214 | template <typename Clock, typename Duration> | |
215 | bool try_wait_until( | |
216 | const std::chrono::time_point<Clock, Duration>& deadline, | |
217 | const WaitOptions& opt = wait_options()) noexcept { | |
218 | if (try_wait()) { | |
219 | return true; | |
220 | } | |
221 | ||
222 | return tryWaitSlow(deadline, opt); | |
223 | } | |
224 | ||
225 | /// Alias to try_wait_for. Deprecated. | |
226 | template <typename Rep, typename Period> | |
227 | bool timed_wait( | |
228 | const std::chrono::duration<Rep, Period>& timeout) noexcept { | |
229 | return try_wait_for(timeout); | |
230 | } | |
231 | ||
232 | /// Alias to try_wait_until. Deprecated. | |
233 | template <typename Clock, typename Duration> | |
234 | bool timed_wait( | |
235 | const std::chrono::time_point<Clock, Duration>& deadline) noexcept { | |
236 | return try_wait_until(deadline); | |
237 | } | |
238 | ||
239 | private: | |
240 | enum State : uint32_t { | |
241 | INIT = 0, | |
242 | EARLY_DELIVERY = 1, | |
243 | WAITING = 2, | |
244 | LATE_DELIVERY = 3, | |
245 | TIMED_OUT = 4, | |
246 | }; | |
247 | ||
248 | template <typename Clock, typename Duration> | |
249 | bool tryWaitSlow( | |
250 | const std::chrono::time_point<Clock, Duration>& deadline, | |
251 | const WaitOptions& opt) noexcept { | |
252 | switch (detail::spin_pause_until(deadline, opt, [=] { return ready(); })) { | |
253 | case detail::spin_result::success: | |
254 | return true; | |
255 | case detail::spin_result::timeout: | |
256 | return false; | |
257 | case detail::spin_result::advance: | |
258 | break; | |
259 | } | |
260 | ||
261 | if (!MayBlock) { | |
262 | switch (detail::spin_yield_until(deadline, [=] { return ready(); })) { | |
263 | case detail::spin_result::success: | |
264 | return true; | |
265 | case detail::spin_result::timeout: | |
266 | return false; | |
267 | case detail::spin_result::advance: | |
268 | break; | |
269 | } | |
270 | } | |
271 | ||
272 | // guess we have to block :( | |
273 | uint32_t expected = INIT; | |
274 | if (!state_.compare_exchange_strong( | |
275 | expected, | |
276 | WAITING, | |
277 | std::memory_order_relaxed, | |
278 | std::memory_order_relaxed)) { | |
279 | // CAS failed, last minute reprieve | |
280 | assert(expected == EARLY_DELIVERY); | |
281 | // TODO: move the acquire to the compare_exchange failure load after C++17 | |
282 | std::atomic_thread_fence(std::memory_order_acquire); | |
283 | return true; | |
284 | } | |
285 | ||
286 | while (true) { | |
287 | auto rv = detail::futexWaitUntil(&state_, WAITING, deadline); | |
288 | ||
289 | // Awoken by the deadline passing. | |
290 | if (rv == detail::FutexResult::TIMEDOUT) { | |
291 | assert(deadline != (std::chrono::time_point<Clock, Duration>::max())); | |
292 | state_.store(TIMED_OUT, std::memory_order_release); | |
293 | return false; | |
294 | } | |
295 | ||
296 | // Probably awoken by a matching wake event, but could also by awoken | |
297 | // by an asynchronous signal or by a spurious wakeup. | |
298 | // | |
299 | // state_ is the truth even if FUTEX_WAIT reported a matching | |
300 | // FUTEX_WAKE, since we aren't using type-stable storage and we | |
301 | // don't guarantee reuse. The scenario goes like this: thread | |
302 | // A's last touch of a Baton is a call to wake(), which stores | |
303 | // LATE_DELIVERY and gets an unlucky context switch before delivering | |
304 | // the corresponding futexWake. Thread B sees LATE_DELIVERY | |
305 | // without consuming a futex event, because it calls futexWait | |
306 | // with an expected value of WAITING and hence doesn't go to sleep. | |
307 | // B returns, so the Baton's memory is reused and becomes another | |
308 | // Baton (or a reuse of this one). B calls futexWait on the new | |
309 | // Baton lifetime, then A wakes up and delivers a spurious futexWake | |
310 | // to the same memory location. B's futexWait will then report a | |
311 | // consumed wake event even though state_ is still WAITING. | |
312 | // | |
313 | // It would be possible to add an extra state_ dance to communicate | |
314 | // that the futexWake has been sent so that we can be sure to consume | |
315 | // it before returning, but that would be a perf and complexity hit. | |
316 | uint32_t s = state_.load(std::memory_order_acquire); | |
317 | assert(s == WAITING || s == LATE_DELIVERY); | |
318 | if (s == LATE_DELIVERY) { | |
319 | return true; | |
320 | } | |
321 | } | |
322 | } | |
323 | ||
324 | detail::Futex<Atom> state_; | |
325 | }; | |
326 | ||
327 | } // namespace folly |