]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/third-party/folly/folly/detail/Futex.cpp
buildsys: change download over to reef release
[ceph.git] / ceph / src / rocksdb / third-party / folly / folly / detail / Futex.cpp
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5
6 #include <folly/detail/Futex.h>
7 #include <folly/portability/SysSyscall.h>
8 #include <stdint.h>
9 #include <string.h>
10 #include <array>
11 #include <cerrno>
12
13 #include <folly/synchronization/ParkingLot.h>
14
15 #ifdef __linux__
16 #include <linux/futex.h>
17 #endif
18
19 #ifndef _WIN32
20 #include <unistd.h>
21 #endif
22
23 using namespace std::chrono;
24
25 namespace folly {
26 namespace detail {
27
28 namespace {
29
30 ////////////////////////////////////////////////////
31 // native implementation using the futex() syscall
32
33 #ifdef __linux__
34
35 /// Certain toolchains (like Android's) don't include the full futex API in
36 /// their headers even though they support it. Make sure we have our constants
37 /// even if the headers don't have them.
38 #ifndef FUTEX_WAIT_BITSET
39 #define FUTEX_WAIT_BITSET 9
40 #endif
41 #ifndef FUTEX_WAKE_BITSET
42 #define FUTEX_WAKE_BITSET 10
43 #endif
44 #ifndef FUTEX_PRIVATE_FLAG
45 #define FUTEX_PRIVATE_FLAG 128
46 #endif
47 #ifndef FUTEX_CLOCK_REALTIME
48 #define FUTEX_CLOCK_REALTIME 256
49 #endif
50
51 int nativeFutexWake(const void* addr, int count, uint32_t wakeMask) {
52 long rv = syscall(
53 __NR_futex,
54 addr, /* addr1 */
55 FUTEX_WAKE_BITSET | FUTEX_PRIVATE_FLAG, /* op */
56 count, /* val */
57 nullptr, /* timeout */
58 nullptr, /* addr2 */
59 wakeMask); /* val3 */
60
61 /* NOTE: we ignore errors on wake for the case of a futex
62 guarding its own destruction, similar to this
63 glibc bug with sem_post/sem_wait:
64 https://sourceware.org/bugzilla/show_bug.cgi?id=12674 */
65 if (rv < 0) {
66 return 0;
67 }
68 return static_cast<int>(rv);
69 }
70
71 template <class Clock>
72 struct timespec timeSpecFromTimePoint(time_point<Clock> absTime) {
73 auto epoch = absTime.time_since_epoch();
74 if (epoch.count() < 0) {
75 // kernel timespec_valid requires non-negative seconds and nanos in [0,1G)
76 epoch = Clock::duration::zero();
77 }
78
79 // timespec-safe seconds and nanoseconds;
80 // chrono::{nano,}seconds are `long long int`
81 // whereas timespec uses smaller types
82 using time_t_seconds = duration<std::time_t, seconds::period>;
83 using long_nanos = duration<long int, nanoseconds::period>;
84
85 auto secs = duration_cast<time_t_seconds>(epoch);
86 auto nanos = duration_cast<long_nanos>(epoch - secs);
87 struct timespec result = {secs.count(), nanos.count()};
88 return result;
89 }
90
91 FutexResult nativeFutexWaitImpl(
92 const void* addr,
93 uint32_t expected,
94 system_clock::time_point const* absSystemTime,
95 steady_clock::time_point const* absSteadyTime,
96 uint32_t waitMask) {
97 assert(absSystemTime == nullptr || absSteadyTime == nullptr);
98
99 int op = FUTEX_WAIT_BITSET | FUTEX_PRIVATE_FLAG;
100 struct timespec ts;
101 struct timespec* timeout = nullptr;
102
103 if (absSystemTime != nullptr) {
104 op |= FUTEX_CLOCK_REALTIME;
105 ts = timeSpecFromTimePoint(*absSystemTime);
106 timeout = &ts;
107 } else if (absSteadyTime != nullptr) {
108 ts = timeSpecFromTimePoint(*absSteadyTime);
109 timeout = &ts;
110 }
111
112 // Unlike FUTEX_WAIT, FUTEX_WAIT_BITSET requires an absolute timeout
113 // value - http://locklessinc.com/articles/futex_cheat_sheet/
114 long rv = syscall(
115 __NR_futex,
116 addr, /* addr1 */
117 op, /* op */
118 expected, /* val */
119 timeout, /* timeout */
120 nullptr, /* addr2 */
121 waitMask); /* val3 */
122
123 if (rv == 0) {
124 return FutexResult::AWOKEN;
125 } else {
126 switch (errno) {
127 case ETIMEDOUT:
128 assert(timeout != nullptr);
129 return FutexResult::TIMEDOUT;
130 case EINTR:
131 return FutexResult::INTERRUPTED;
132 case EWOULDBLOCK:
133 return FutexResult::VALUE_CHANGED;
134 default:
135 assert(false);
136 // EINVAL, EACCESS, or EFAULT. EINVAL means there was an invalid
137 // op (should be impossible) or an invalid timeout (should have
138 // been sanitized by timeSpecFromTimePoint). EACCESS or EFAULT
139 // means *addr points to invalid memory, which is unlikely because
140 // the caller should have segfaulted already. We can either
141 // crash, or return a value that lets the process continue for
142 // a bit. We choose the latter. VALUE_CHANGED probably turns the
143 // caller into a spin lock.
144 return FutexResult::VALUE_CHANGED;
145 }
146 }
147 }
148
149 #endif // __linux__
150
151 ///////////////////////////////////////////////////////
152 // compatibility implementation using standard C++ API
153
154 using Lot = ParkingLot<uint32_t>;
155 Lot parkingLot;
156
157 int emulatedFutexWake(const void* addr, int count, uint32_t waitMask) {
158 int woken = 0;
159 parkingLot.unpark(addr, [&](const uint32_t& mask) {
160 if ((mask & waitMask) == 0) {
161 return UnparkControl::RetainContinue;
162 }
163 assert(count > 0);
164 count--;
165 woken++;
166 return count > 0 ? UnparkControl::RemoveContinue
167 : UnparkControl::RemoveBreak;
168 });
169 return woken;
170 }
171
172 template <typename F>
173 FutexResult emulatedFutexWaitImpl(
174 F* futex,
175 uint32_t expected,
176 system_clock::time_point const* absSystemTime,
177 steady_clock::time_point const* absSteadyTime,
178 uint32_t waitMask) {
179 static_assert(
180 std::is_same<F, const Futex<std::atomic>>::value ||
181 std::is_same<F, const Futex<EmulatedFutexAtomic>>::value,
182 "Type F must be either Futex<std::atomic> or Futex<EmulatedFutexAtomic>");
183 ParkResult res;
184 if (absSystemTime) {
185 res = parkingLot.park_until(
186 futex,
187 waitMask,
188 [&] { return *futex == expected; },
189 [] {},
190 *absSystemTime);
191 } else if (absSteadyTime) {
192 res = parkingLot.park_until(
193 futex,
194 waitMask,
195 [&] { return *futex == expected; },
196 [] {},
197 *absSteadyTime);
198 } else {
199 res = parkingLot.park(
200 futex, waitMask, [&] { return *futex == expected; }, [] {});
201 }
202 switch (res) {
203 case ParkResult::Skip:
204 return FutexResult::VALUE_CHANGED;
205 case ParkResult::Unpark:
206 return FutexResult::AWOKEN;
207 case ParkResult::Timeout:
208 return FutexResult::TIMEDOUT;
209 }
210
211 return FutexResult::INTERRUPTED;
212 }
213
214 } // namespace
215
216 /////////////////////////////////
217 // Futex<> overloads
218
219 int futexWakeImpl(
220 const Futex<std::atomic>* futex,
221 int count,
222 uint32_t wakeMask) {
223 #ifdef __linux__
224 return nativeFutexWake(futex, count, wakeMask);
225 #else
226 return emulatedFutexWake(futex, count, wakeMask);
227 #endif
228 }
229
230 int futexWakeImpl(
231 const Futex<EmulatedFutexAtomic>* futex,
232 int count,
233 uint32_t wakeMask) {
234 return emulatedFutexWake(futex, count, wakeMask);
235 }
236
237 FutexResult futexWaitImpl(
238 const Futex<std::atomic>* futex,
239 uint32_t expected,
240 system_clock::time_point const* absSystemTime,
241 steady_clock::time_point const* absSteadyTime,
242 uint32_t waitMask) {
243 #ifdef __linux__
244 return nativeFutexWaitImpl(
245 futex, expected, absSystemTime, absSteadyTime, waitMask);
246 #else
247 return emulatedFutexWaitImpl(
248 futex, expected, absSystemTime, absSteadyTime, waitMask);
249 #endif
250 }
251
252 FutexResult futexWaitImpl(
253 const Futex<EmulatedFutexAtomic>* futex,
254 uint32_t expected,
255 system_clock::time_point const* absSystemTime,
256 steady_clock::time_point const* absSteadyTime,
257 uint32_t waitMask) {
258 return emulatedFutexWaitImpl(
259 futex, expected, absSystemTime, absSteadyTime, waitMask);
260 }
261
262 } // namespace detail
263 } // namespace folly