]>
Commit | Line | Data |
---|---|---|
532ac7d7 XL |
1 | use crate::cmp; |
2 | use crate::ffi::CStr; | |
3 | use crate::io; | |
4 | use crate::mem; | |
136023e0 | 5 | use crate::num::NonZeroUsize; |
532ac7d7 | 6 | use crate::ptr; |
ba9703b0 | 7 | use crate::sys::{os, stack_overflow}; |
532ac7d7 XL |
8 | use crate::time::Duration; |
9 | ||
a2a8927a XL |
10 | #[cfg(all(target_os = "linux", target_env = "gnu"))] |
11 | use crate::sys::weak::dlsym; | |
12 | #[cfg(any(target_os = "solaris", target_os = "illumos"))] | |
136023e0 | 13 | use crate::sys::weak::weak; |
94222f64 | 14 | #[cfg(not(any(target_os = "l4re", target_os = "vxworks", target_os = "espidf")))] |
ea8adc8c XL |
15 | pub const DEFAULT_MIN_STACK_SIZE: usize = 2 * 1024 * 1024; |
16 | #[cfg(target_os = "l4re")] | |
17 | pub const DEFAULT_MIN_STACK_SIZE: usize = 1024 * 1024; | |
29967ef6 XL |
18 | #[cfg(target_os = "vxworks")] |
19 | pub const DEFAULT_MIN_STACK_SIZE: usize = 256 * 1024; | |
94222f64 XL |
20 | #[cfg(target_os = "espidf")] |
21 | pub const DEFAULT_MIN_STACK_SIZE: usize = 0; // 0 indicates that the stack size configured in the ESP-IDF menuconfig system should be used | |
22 | ||
23 | #[cfg(target_os = "fuchsia")] | |
24 | mod zircon { | |
25 | type zx_handle_t = u32; | |
26 | type zx_status_t = i32; | |
27 | pub const ZX_PROP_NAME: u32 = 3; | |
28 | ||
29 | extern "C" { | |
30 | pub fn zx_object_set_property( | |
31 | handle: zx_handle_t, | |
32 | property: u32, | |
33 | value: *const libc::c_void, | |
34 | value_size: libc::size_t, | |
35 | ) -> zx_status_t; | |
36 | pub fn zx_thread_self() -> zx_handle_t; | |
37 | } | |
38 | } | |
ea8adc8c | 39 | |
d9579d0f AL |
40 | pub struct Thread { |
41 | id: libc::pthread_t, | |
42 | } | |
43 | ||
44 | // Some platforms may have pthread_t as a pointer in which case we still want | |
45 | // a thread to be Send/Sync | |
46 | unsafe impl Send for Thread {} | |
47 | unsafe impl Sync for Thread {} | |
48 | ||
49 | impl Thread { | |
0731742a | 50 | // unsafe: see thread::Builder::spawn_unchecked for safety requirements |
60c5eb7d | 51 | pub unsafe fn new(stack: usize, p: Box<dyn FnOnce()>) -> io::Result<Thread> { |
ba9703b0 | 52 | let p = Box::into_raw(box p); |
d9579d0f AL |
53 | let mut native: libc::pthread_t = mem::zeroed(); |
54 | let mut attr: libc::pthread_attr_t = mem::zeroed(); | |
92a42be0 | 55 | assert_eq!(libc::pthread_attr_init(&mut attr), 0); |
d9579d0f | 56 | |
94222f64 XL |
57 | #[cfg(target_os = "espidf")] |
58 | if stack > 0 { | |
59 | // Only set the stack if a non-zero value is passed | |
60 | // 0 is used as an indication that the default stack size configured in the ESP-IDF menuconfig system should be used | |
61 | assert_eq!( | |
62 | libc::pthread_attr_setstacksize(&mut attr, cmp::max(stack, min_stack_size(&attr))), | |
63 | 0 | |
64 | ); | |
65 | } | |
66 | ||
67 | #[cfg(not(target_os = "espidf"))] | |
68 | { | |
69 | let stack_size = cmp::max(stack, min_stack_size(&attr)); | |
70 | ||
71 | match libc::pthread_attr_setstacksize(&mut attr, stack_size) { | |
72 | 0 => {} | |
73 | n => { | |
74 | assert_eq!(n, libc::EINVAL); | |
75 | // EINVAL means |stack_size| is either too small or not a | |
76 | // multiple of the system page size. Because it's definitely | |
77 | // >= PTHREAD_STACK_MIN, it must be an alignment issue. | |
78 | // Round up to the nearest page and try again. | |
79 | let page_size = os::page_size(); | |
80 | let stack_size = | |
81 | (stack_size + page_size - 1) & (-(page_size as isize - 1) as usize - 1); | |
82 | assert_eq!(libc::pthread_attr_setstacksize(&mut attr, stack_size), 0); | |
83 | } | |
84 | }; | |
85 | } | |
d9579d0f | 86 | |
ba9703b0 XL |
87 | let ret = libc::pthread_create(&mut native, &attr, thread_start, p as *mut _); |
88 | // Note: if the thread creation fails and this assert fails, then p will | |
89 | // be leaked. However, an alternative design could cause double-free | |
90 | // which is clearly worse. | |
92a42be0 | 91 | assert_eq!(libc::pthread_attr_destroy(&mut attr), 0); |
d9579d0f AL |
92 | |
93 | return if ret != 0 { | |
ba9703b0 XL |
94 | // The thread failed to start and as a result p was not consumed. Therefore, it is |
95 | // safe to reconstruct the box so that it gets deallocated. | |
96 | drop(Box::from_raw(p)); | |
d9579d0f AL |
97 | Err(io::Error::from_raw_os_error(ret)) |
98 | } else { | |
d9579d0f AL |
99 | Ok(Thread { id: native }) |
100 | }; | |
101 | ||
60c5eb7d XL |
102 | extern "C" fn thread_start(main: *mut libc::c_void) -> *mut libc::c_void { |
103 | unsafe { | |
ba9703b0 XL |
104 | // Next, set up our stack overflow handler which may get triggered if we run |
105 | // out of stack. | |
106 | let _handler = stack_overflow::Handler::new(); | |
107 | // Finally, let's run some code. | |
108 | Box::from_raw(main as *mut Box<dyn FnOnce()>)(); | |
60c5eb7d | 109 | } |
e9174d1e | 110 | ptr::null_mut() |
d9579d0f AL |
111 | } |
112 | } | |
113 | ||
114 | pub fn yield_now() { | |
92a42be0 | 115 | let ret = unsafe { libc::sched_yield() }; |
d9579d0f AL |
116 | debug_assert_eq!(ret, 0); |
117 | } | |
118 | ||
f2b60f7d | 119 | #[cfg(target_os = "android")] |
54a0048b | 120 | pub fn set_name(name: &CStr) { |
d9579d0f | 121 | const PR_SET_NAME: libc::c_int = 15; |
d9579d0f | 122 | unsafe { |
04454e1e FG |
123 | libc::prctl( |
124 | PR_SET_NAME, | |
125 | name.as_ptr(), | |
126 | 0 as libc::c_ulong, | |
127 | 0 as libc::c_ulong, | |
128 | 0 as libc::c_ulong, | |
129 | ); | |
d9579d0f AL |
130 | } |
131 | } | |
132 | ||
f2b60f7d FG |
133 | #[cfg(target_os = "linux")] |
134 | pub fn set_name(name: &CStr) { | |
135 | const TASK_COMM_LEN: usize = 16; | |
136 | ||
137 | unsafe { | |
138 | // Available since glibc 2.12, musl 1.1.16, and uClibc 1.0.20. | |
139 | let name = truncate_cstr(name, TASK_COMM_LEN); | |
2b03887a FG |
140 | let res = libc::pthread_setname_np(libc::pthread_self(), name.as_ptr()); |
141 | // We have no good way of propagating errors here, but in debug-builds let's check that this actually worked. | |
142 | debug_assert_eq!(res, 0); | |
f2b60f7d FG |
143 | } |
144 | } | |
145 | ||
60c5eb7d | 146 | #[cfg(any(target_os = "freebsd", target_os = "dragonfly", target_os = "openbsd"))] |
54a0048b | 147 | pub fn set_name(name: &CStr) { |
d9579d0f | 148 | unsafe { |
54a0048b | 149 | libc::pthread_set_name_np(libc::pthread_self(), name.as_ptr()); |
d9579d0f AL |
150 | } |
151 | } | |
152 | ||
064997fb | 153 | #[cfg(any(target_os = "macos", target_os = "ios", target_os = "watchos"))] |
54a0048b | 154 | pub fn set_name(name: &CStr) { |
d9579d0f | 155 | unsafe { |
f2b60f7d | 156 | let name = truncate_cstr(name, libc::MAXTHREADNAMESIZE); |
2b03887a FG |
157 | let res = libc::pthread_setname_np(name.as_ptr()); |
158 | // We have no good way of propagating errors here, but in debug-builds let's check that this actually worked. | |
159 | debug_assert_eq!(res, 0); | |
d9579d0f AL |
160 | } |
161 | } | |
162 | ||
b039eaaf | 163 | #[cfg(target_os = "netbsd")] |
54a0048b | 164 | pub fn set_name(name: &CStr) { |
b039eaaf | 165 | unsafe { |
2b03887a FG |
166 | let cname = CStr::from_bytes_with_nul_unchecked(b"%s\0".as_slice()); |
167 | let res = libc::pthread_setname_np( | |
60c5eb7d XL |
168 | libc::pthread_self(), |
169 | cname.as_ptr(), | |
170 | name.as_ptr() as *mut libc::c_void, | |
171 | ); | |
2b03887a | 172 | debug_assert_eq!(res, 0); |
b039eaaf SL |
173 | } |
174 | } | |
416331ca | 175 | |
ba9703b0 | 176 | #[cfg(any(target_os = "solaris", target_os = "illumos"))] |
416331ca XL |
177 | pub fn set_name(name: &CStr) { |
178 | weak! { | |
179 | fn pthread_setname_np( | |
180 | libc::pthread_t, *const libc::c_char | |
181 | ) -> libc::c_int | |
182 | } | |
183 | ||
184 | if let Some(f) = pthread_setname_np.get() { | |
2b03887a FG |
185 | let res = unsafe { f(libc::pthread_self(), name.as_ptr()) }; |
186 | debug_assert_eq!(res, 0); | |
416331ca XL |
187 | } |
188 | } | |
189 | ||
94222f64 XL |
190 | #[cfg(target_os = "fuchsia")] |
191 | pub fn set_name(name: &CStr) { | |
192 | use self::zircon::*; | |
193 | unsafe { | |
194 | zx_object_set_property( | |
195 | zx_thread_self(), | |
196 | ZX_PROP_NAME, | |
197 | name.as_ptr() as *const libc::c_void, | |
198 | name.to_bytes().len(), | |
199 | ); | |
200 | } | |
201 | } | |
202 | ||
203 | #[cfg(target_os = "haiku")] | |
204 | pub fn set_name(name: &CStr) { | |
205 | unsafe { | |
206 | let thread_self = libc::find_thread(ptr::null_mut()); | |
207 | libc::rename_thread(thread_self, name.as_ptr()); | |
208 | } | |
209 | } | |
210 | ||
60c5eb7d XL |
211 | #[cfg(any( |
212 | target_env = "newlib", | |
60c5eb7d XL |
213 | target_os = "l4re", |
214 | target_os = "emscripten", | |
29967ef6 XL |
215 | target_os = "redox", |
216 | target_os = "vxworks" | |
60c5eb7d | 217 | ))] |
54a0048b | 218 | pub fn set_name(_name: &CStr) { |
94222f64 | 219 | // Newlib, Emscripten, and VxWorks have no way to set a thread name. |
c30ab7b3 | 220 | } |
b039eaaf | 221 | |
94222f64 | 222 | #[cfg(not(target_os = "espidf"))] |
d9579d0f | 223 | pub fn sleep(dur: Duration) { |
3157f602 | 224 | let mut secs = dur.as_secs(); |
abe05a73 | 225 | let mut nsecs = dur.subsec_nanos() as _; |
d9579d0f AL |
226 | |
227 | // If we're awoken with a signal then the return value will be -1 and | |
228 | // nanosleep will fill in `ts` with the remaining time. | |
229 | unsafe { | |
3157f602 XL |
230 | while secs > 0 || nsecs > 0 { |
231 | let mut ts = libc::timespec { | |
f035d41b | 232 | tv_sec: cmp::min(libc::time_t::MAX as u64, secs) as libc::time_t, |
3157f602 XL |
233 | tv_nsec: nsecs, |
234 | }; | |
235 | secs -= ts.tv_sec as u64; | |
29967ef6 XL |
236 | let ts_ptr = &mut ts as *mut _; |
237 | if libc::nanosleep(ts_ptr, ts_ptr) == -1 { | |
3157f602 XL |
238 | assert_eq!(os::errno(), libc::EINTR); |
239 | secs += ts.tv_sec as u64; | |
240 | nsecs = ts.tv_nsec; | |
241 | } else { | |
242 | nsecs = 0; | |
243 | } | |
d9579d0f AL |
244 | } |
245 | } | |
246 | } | |
247 | ||
94222f64 XL |
248 | #[cfg(target_os = "espidf")] |
249 | pub fn sleep(dur: Duration) { | |
250 | let mut micros = dur.as_micros(); | |
251 | unsafe { | |
252 | while micros > 0 { | |
253 | let st = if micros > u32::MAX as u128 { u32::MAX } else { micros as u32 }; | |
254 | libc::usleep(st); | |
255 | ||
256 | micros -= st as u128; | |
257 | } | |
258 | } | |
259 | } | |
260 | ||
d9579d0f AL |
261 | pub fn join(self) { |
262 | unsafe { | |
92a42be0 | 263 | let ret = libc::pthread_join(self.id, ptr::null_mut()); |
d9579d0f | 264 | mem::forget(self); |
60c5eb7d | 265 | assert!(ret == 0, "failed to join thread: {}", io::Error::from_raw_os_error(ret)); |
d9579d0f AL |
266 | } |
267 | } | |
92a42be0 | 268 | |
60c5eb7d XL |
269 | pub fn id(&self) -> libc::pthread_t { |
270 | self.id | |
271 | } | |
92a42be0 SL |
272 | |
273 | pub fn into_id(self) -> libc::pthread_t { | |
274 | let id = self.id; | |
275 | mem::forget(self); | |
276 | id | |
277 | } | |
d9579d0f AL |
278 | } |
279 | ||
280 | impl Drop for Thread { | |
281 | fn drop(&mut self) { | |
92a42be0 | 282 | let ret = unsafe { libc::pthread_detach(self.id) }; |
d9579d0f AL |
283 | debug_assert_eq!(ret, 0); |
284 | } | |
285 | } | |
1a4d82fc | 286 | |
f2b60f7d FG |
287 | #[cfg(any(target_os = "linux", target_os = "macos", target_os = "ios", target_os = "watchos"))] |
288 | fn truncate_cstr(cstr: &CStr, max_with_nul: usize) -> crate::borrow::Cow<'_, CStr> { | |
289 | use crate::{borrow::Cow, ffi::CString}; | |
290 | ||
291 | if cstr.to_bytes_with_nul().len() > max_with_nul { | |
292 | let bytes = cstr.to_bytes()[..max_with_nul - 1].to_vec(); | |
293 | // SAFETY: the non-nul bytes came straight from a CStr. | |
294 | // (CString will add the terminating nul.) | |
295 | Cow::Owned(unsafe { CString::from_vec_unchecked(bytes) }) | |
296 | } else { | |
297 | Cow::Borrowed(cstr) | |
298 | } | |
299 | } | |
300 | ||
c295e0f8 | 301 | pub fn available_parallelism() -> io::Result<NonZeroUsize> { |
136023e0 XL |
302 | cfg_if::cfg_if! { |
303 | if #[cfg(any( | |
304 | target_os = "android", | |
305 | target_os = "emscripten", | |
306 | target_os = "fuchsia", | |
307 | target_os = "ios", | |
308 | target_os = "linux", | |
309 | target_os = "macos", | |
310 | target_os = "solaris", | |
311 | target_os = "illumos", | |
312 | ))] { | |
3c0e092e XL |
313 | #[cfg(any(target_os = "android", target_os = "linux"))] |
314 | { | |
064997fb | 315 | let quota = cgroups::quota().max(1); |
3c0e092e | 316 | let mut set: libc::cpu_set_t = unsafe { mem::zeroed() }; |
5e7ed085 FG |
317 | unsafe { |
318 | if libc::sched_getaffinity(0, mem::size_of::<libc::cpu_set_t>(), &mut set) == 0 { | |
319 | let count = libc::CPU_COUNT(&set) as usize; | |
320 | let count = count.min(quota); | |
321 | // SAFETY: affinity mask can't be empty and the quota gets clamped to a minimum of 1 | |
322 | return Ok(NonZeroUsize::new_unchecked(count)); | |
323 | } | |
3c0e092e XL |
324 | } |
325 | } | |
136023e0 XL |
326 | match unsafe { libc::sysconf(libc::_SC_NPROCESSORS_ONLN) } { |
327 | -1 => Err(io::Error::last_os_error()), | |
5099ac24 | 328 | 0 => Err(io::const_io_error!(io::ErrorKind::NotFound, "The number of hardware threads is not known for the target platform")), |
136023e0 XL |
329 | cpus => Ok(unsafe { NonZeroUsize::new_unchecked(cpus as usize) }), |
330 | } | |
331 | } else if #[cfg(any(target_os = "freebsd", target_os = "dragonfly", target_os = "netbsd"))] { | |
332 | use crate::ptr; | |
333 | ||
334 | let mut cpus: libc::c_uint = 0; | |
335 | let mut cpus_size = crate::mem::size_of_val(&cpus); | |
336 | ||
337 | unsafe { | |
338 | cpus = libc::sysconf(libc::_SC_NPROCESSORS_ONLN) as libc::c_uint; | |
339 | } | |
340 | ||
341 | // Fallback approach in case of errors or no hardware threads. | |
342 | if cpus < 1 { | |
343 | let mut mib = [libc::CTL_HW, libc::HW_NCPU, 0, 0]; | |
344 | let res = unsafe { | |
345 | libc::sysctl( | |
346 | mib.as_mut_ptr(), | |
347 | 2, | |
348 | &mut cpus as *mut _ as *mut _, | |
349 | &mut cpus_size as *mut _ as *mut _, | |
350 | ptr::null_mut(), | |
351 | 0, | |
352 | ) | |
353 | }; | |
354 | ||
355 | // Handle errors if any. | |
356 | if res == -1 { | |
357 | return Err(io::Error::last_os_error()); | |
358 | } else if cpus == 0 { | |
5099ac24 | 359 | return Err(io::const_io_error!(io::ErrorKind::NotFound, "The number of hardware threads is not known for the target platform")); |
136023e0 XL |
360 | } |
361 | } | |
362 | Ok(unsafe { NonZeroUsize::new_unchecked(cpus as usize) }) | |
363 | } else if #[cfg(target_os = "openbsd")] { | |
364 | use crate::ptr; | |
365 | ||
366 | let mut cpus: libc::c_uint = 0; | |
367 | let mut cpus_size = crate::mem::size_of_val(&cpus); | |
368 | let mut mib = [libc::CTL_HW, libc::HW_NCPU, 0, 0]; | |
369 | ||
370 | let res = unsafe { | |
371 | libc::sysctl( | |
372 | mib.as_mut_ptr(), | |
373 | 2, | |
374 | &mut cpus as *mut _ as *mut _, | |
375 | &mut cpus_size as *mut _ as *mut _, | |
376 | ptr::null_mut(), | |
377 | 0, | |
378 | ) | |
379 | }; | |
380 | ||
381 | // Handle errors if any. | |
382 | if res == -1 { | |
383 | return Err(io::Error::last_os_error()); | |
384 | } else if cpus == 0 { | |
5099ac24 | 385 | return Err(io::const_io_error!(io::ErrorKind::NotFound, "The number of hardware threads is not known for the target platform")); |
136023e0 XL |
386 | } |
387 | ||
388 | Ok(unsafe { NonZeroUsize::new_unchecked(cpus as usize) }) | |
c295e0f8 XL |
389 | } else if #[cfg(target_os = "haiku")] { |
390 | // system_info cpu_count field gets the static data set at boot time with `smp_set_num_cpus` | |
391 | // `get_system_info` calls then `smp_get_num_cpus` | |
392 | unsafe { | |
393 | let mut sinfo: libc::system_info = crate::mem::zeroed(); | |
394 | let res = libc::get_system_info(&mut sinfo); | |
395 | ||
396 | if res != libc::B_OK { | |
5099ac24 | 397 | return Err(io::const_io_error!(io::ErrorKind::NotFound, "The number of hardware threads is not known for the target platform")); |
c295e0f8 XL |
398 | } |
399 | ||
400 | Ok(NonZeroUsize::new_unchecked(sinfo.cpu_count as usize)) | |
401 | } | |
136023e0 | 402 | } else { |
c295e0f8 | 403 | // FIXME: implement on vxWorks, Redox, l4re |
5099ac24 | 404 | Err(io::const_io_error!(io::ErrorKind::Unsupported, "Getting the number of hardware threads is not supported on the target platform")) |
136023e0 XL |
405 | } |
406 | } | |
407 | } | |
408 | ||
5e7ed085 | 409 | #[cfg(any(target_os = "android", target_os = "linux"))] |
064997fb FG |
410 | mod cgroups { |
411 | //! Currently not covered | |
412 | //! * cgroup v2 in non-standard mountpoints | |
413 | //! * paths containing control characters or spaces, since those would be escaped in procfs | |
414 | //! output and we don't unescape | |
415 | use crate::borrow::Cow; | |
5e7ed085 FG |
416 | use crate::ffi::OsString; |
417 | use crate::fs::{try_exists, File}; | |
418 | use crate::io::Read; | |
064997fb | 419 | use crate::io::{BufRead, BufReader}; |
5e7ed085 | 420 | use crate::os::unix::ffi::OsStringExt; |
064997fb | 421 | use crate::path::Path; |
5e7ed085 | 422 | use crate::path::PathBuf; |
064997fb | 423 | use crate::str::from_utf8; |
5e7ed085 | 424 | |
064997fb FG |
425 | #[derive(PartialEq)] |
426 | enum Cgroup { | |
427 | V1, | |
428 | V2, | |
5e7ed085 FG |
429 | } |
430 | ||
064997fb FG |
431 | /// Returns cgroup CPU quota in core-equivalents, rounded down or usize::MAX if the quota cannot |
432 | /// be determined or is not set. | |
433 | pub(super) fn quota() -> usize { | |
434 | let mut quota = usize::MAX; | |
435 | if cfg!(miri) { | |
436 | // Attempting to open a file fails under default flags due to isolation. | |
437 | // And Miri does not have parallelism anyway. | |
438 | return quota; | |
439 | } | |
440 | ||
441 | let _: Option<()> = try { | |
442 | let mut buf = Vec::with_capacity(128); | |
443 | // find our place in the cgroup hierarchy | |
444 | File::open("/proc/self/cgroup").ok()?.read_to_end(&mut buf).ok()?; | |
445 | let (cgroup_path, version) = | |
446 | buf.split(|&c| c == b'\n').fold(None, |previous, line| { | |
447 | let mut fields = line.splitn(3, |&c| c == b':'); | |
448 | // 2nd field is a list of controllers for v1 or empty for v2 | |
449 | let version = match fields.nth(1) { | |
450 | Some(b"") => Cgroup::V2, | |
451 | Some(controllers) | |
452 | if from_utf8(controllers) | |
f2b60f7d | 453 | .is_ok_and(|c| c.split(',').any(|c| c == "cpu")) => |
064997fb FG |
454 | { |
455 | Cgroup::V1 | |
456 | } | |
457 | _ => return previous, | |
458 | }; | |
459 | ||
460 | // already-found v1 trumps v2 since it explicitly specifies its controllers | |
461 | if previous.is_some() && version == Cgroup::V2 { | |
462 | return previous; | |
463 | } | |
464 | ||
465 | let path = fields.last()?; | |
466 | // skip leading slash | |
467 | Some((path[1..].to_owned(), version)) | |
468 | })?; | |
469 | let cgroup_path = PathBuf::from(OsString::from_vec(cgroup_path)); | |
470 | ||
471 | quota = match version { | |
472 | Cgroup::V1 => quota_v1(cgroup_path), | |
473 | Cgroup::V2 => quota_v2(cgroup_path), | |
474 | }; | |
475 | }; | |
476 | ||
477 | quota | |
478 | } | |
479 | ||
480 | fn quota_v2(group_path: PathBuf) -> usize { | |
481 | let mut quota = usize::MAX; | |
5e7ed085 FG |
482 | |
483 | let mut path = PathBuf::with_capacity(128); | |
484 | let mut read_buf = String::with_capacity(20); | |
485 | ||
064997fb | 486 | // standard mount location defined in file-hierarchy(7) manpage |
5e7ed085 FG |
487 | let cgroup_mount = "/sys/fs/cgroup"; |
488 | ||
489 | path.push(cgroup_mount); | |
064997fb | 490 | path.push(&group_path); |
5e7ed085 FG |
491 | |
492 | path.push("cgroup.controllers"); | |
493 | ||
494 | // skip if we're not looking at cgroup2 | |
495 | if matches!(try_exists(&path), Err(_) | Ok(false)) { | |
496 | return usize::MAX; | |
497 | }; | |
498 | ||
499 | path.pop(); | |
500 | ||
064997fb FG |
501 | let _: Option<()> = try { |
502 | while path.starts_with(cgroup_mount) { | |
503 | path.push("cpu.max"); | |
504 | ||
505 | read_buf.clear(); | |
506 | ||
507 | if File::open(&path).and_then(|mut f| f.read_to_string(&mut read_buf)).is_ok() { | |
508 | let raw_quota = read_buf.lines().next()?; | |
509 | let mut raw_quota = raw_quota.split(' '); | |
510 | let limit = raw_quota.next()?; | |
511 | let period = raw_quota.next()?; | |
512 | match (limit.parse::<usize>(), period.parse::<usize>()) { | |
513 | (Ok(limit), Ok(period)) => { | |
514 | quota = quota.min(limit / period); | |
515 | } | |
516 | _ => {} | |
517 | } | |
518 | } | |
519 | ||
520 | path.pop(); // pop filename | |
521 | path.pop(); // pop dir | |
522 | } | |
523 | }; | |
5e7ed085 | 524 | |
064997fb FG |
525 | quota |
526 | } | |
5e7ed085 | 527 | |
064997fb FG |
528 | fn quota_v1(group_path: PathBuf) -> usize { |
529 | let mut quota = usize::MAX; | |
530 | let mut path = PathBuf::with_capacity(128); | |
531 | let mut read_buf = String::with_capacity(20); | |
532 | ||
533 | // Hardcode commonly used locations mentioned in the cgroups(7) manpage | |
534 | // if that doesn't work scan mountinfo and adjust `group_path` for bind-mounts | |
535 | let mounts: &[fn(&Path) -> Option<(_, &Path)>] = &[ | |
536 | |p| Some((Cow::Borrowed("/sys/fs/cgroup/cpu"), p)), | |
537 | |p| Some((Cow::Borrowed("/sys/fs/cgroup/cpu,cpuacct"), p)), | |
538 | // this can be expensive on systems with tons of mountpoints | |
539 | // but we only get to this point when /proc/self/cgroups explicitly indicated | |
540 | // this process belongs to a cpu-controller cgroup v1 and the defaults didn't work | |
541 | find_mountpoint, | |
542 | ]; | |
543 | ||
544 | for mount in mounts { | |
545 | let Some((mount, group_path)) = mount(&group_path) else { continue }; | |
546 | ||
547 | path.clear(); | |
548 | path.push(mount.as_ref()); | |
549 | path.push(&group_path); | |
550 | ||
551 | // skip if we guessed the mount incorrectly | |
552 | if matches!(try_exists(&path), Err(_) | Ok(false)) { | |
553 | continue; | |
554 | } | |
555 | ||
556 | while path.starts_with(mount.as_ref()) { | |
557 | let mut parse_file = |name| { | |
558 | path.push(name); | |
559 | read_buf.clear(); | |
560 | ||
561 | let f = File::open(&path); | |
562 | path.pop(); // restore buffer before any early returns | |
563 | f.ok()?.read_to_string(&mut read_buf).ok()?; | |
564 | let parsed = read_buf.trim().parse::<usize>().ok()?; | |
565 | ||
566 | Some(parsed) | |
567 | }; | |
568 | ||
569 | let limit = parse_file("cpu.cfs_quota_us"); | |
570 | let period = parse_file("cpu.cfs_period_us"); | |
571 | ||
572 | match (limit, period) { | |
573 | (Some(limit), Some(period)) => quota = quota.min(limit / period), | |
5e7ed085 FG |
574 | _ => {} |
575 | } | |
064997fb FG |
576 | |
577 | path.pop(); | |
578 | } | |
579 | ||
580 | // we passed the try_exists above so we should have traversed the correct hierarchy | |
581 | // when reaching this line | |
582 | break; | |
583 | } | |
584 | ||
585 | quota | |
586 | } | |
587 | ||
588 | /// Scan mountinfo for cgroup v1 mountpoint with a cpu controller | |
589 | /// | |
590 | /// If the cgroupfs is a bind mount then `group_path` is adjusted to skip | |
591 | /// over the already-included prefix | |
592 | fn find_mountpoint(group_path: &Path) -> Option<(Cow<'static, str>, &Path)> { | |
593 | let mut reader = BufReader::new(File::open("/proc/self/mountinfo").ok()?); | |
594 | let mut line = String::with_capacity(256); | |
595 | loop { | |
596 | line.clear(); | |
597 | if reader.read_line(&mut line).ok()? == 0 { | |
598 | break; | |
599 | } | |
600 | ||
601 | let line = line.trim(); | |
602 | let mut items = line.split(' '); | |
603 | ||
604 | let sub_path = items.nth(3)?; | |
605 | let mount_point = items.next()?; | |
606 | let mount_opts = items.next_back()?; | |
607 | let filesystem_type = items.nth_back(1)?; | |
608 | ||
609 | if filesystem_type != "cgroup" || !mount_opts.split(',').any(|opt| opt == "cpu") { | |
610 | // not a cgroup / not a cpu-controller | |
611 | continue; | |
5e7ed085 FG |
612 | } |
613 | ||
064997fb FG |
614 | let sub_path = Path::new(sub_path).strip_prefix("/").ok()?; |
615 | ||
616 | if !group_path.starts_with(sub_path) { | |
617 | // this is a bind-mount and the bound subdirectory | |
618 | // does not contain the cgroup this process belongs to | |
619 | continue; | |
620 | } | |
621 | ||
622 | let trimmed_group_path = group_path.strip_prefix(sub_path).ok()?; | |
623 | ||
624 | return Some((Cow::Owned(mount_point.to_owned()), trimmed_group_path)); | |
5e7ed085 | 625 | } |
5e7ed085 | 626 | |
064997fb FG |
627 | None |
628 | } | |
5e7ed085 FG |
629 | } |
630 | ||
60c5eb7d | 631 | #[cfg(all( |
3dfed10e | 632 | not(target_os = "linux"), |
60c5eb7d XL |
633 | not(target_os = "freebsd"), |
634 | not(target_os = "macos"), | |
6a06907d | 635 | not(target_os = "netbsd"), |
60c5eb7d XL |
636 | not(target_os = "openbsd"), |
637 | not(target_os = "solaris") | |
638 | ))] | |
7453a54e | 639 | #[cfg_attr(test, allow(dead_code))] |
1a4d82fc | 640 | pub mod guard { |
532ac7d7 | 641 | use crate::ops::Range; |
2c00a5a8 | 642 | pub type Guard = Range<usize>; |
60c5eb7d XL |
643 | pub unsafe fn current() -> Option<Guard> { |
644 | None | |
645 | } | |
646 | pub unsafe fn init() -> Option<Guard> { | |
647 | None | |
648 | } | |
1a4d82fc JJ |
649 | } |
650 | ||
60c5eb7d | 651 | #[cfg(any( |
3dfed10e | 652 | target_os = "linux", |
60c5eb7d XL |
653 | target_os = "freebsd", |
654 | target_os = "macos", | |
6a06907d | 655 | target_os = "netbsd", |
60c5eb7d XL |
656 | target_os = "openbsd", |
657 | target_os = "solaris" | |
658 | ))] | |
7453a54e | 659 | #[cfg_attr(test, allow(dead_code))] |
1a4d82fc | 660 | pub mod guard { |
83c7162d | 661 | use libc::{mmap, mprotect}; |
60c5eb7d | 662 | use libc::{MAP_ANON, MAP_FAILED, MAP_FIXED, MAP_PRIVATE, PROT_NONE, PROT_READ, PROT_WRITE}; |
532ac7d7 | 663 | |
6a06907d | 664 | use crate::io; |
532ac7d7 | 665 | use crate::ops::Range; |
f035d41b | 666 | use crate::sync::atomic::{AtomicUsize, Ordering}; |
532ac7d7 | 667 | use crate::sys::os; |
1a4d82fc | 668 | |
2c00a5a8 | 669 | // This is initialized in init() and only read from after |
f035d41b | 670 | static PAGE_SIZE: AtomicUsize = AtomicUsize::new(0); |
2c00a5a8 XL |
671 | |
672 | pub type Guard = Range<usize>; | |
673 | ||
674 | #[cfg(target_os = "solaris")] | |
675 | unsafe fn get_stack_start() -> Option<*mut libc::c_void> { | |
532ac7d7 | 676 | let mut current_stack: libc::stack_t = crate::mem::zeroed(); |
2c00a5a8 XL |
677 | assert_eq!(libc::stack_getbounds(&mut current_stack), 0); |
678 | Some(current_stack.ss_sp) | |
679 | } | |
680 | ||
681 | #[cfg(target_os = "macos")] | |
c1a9b12d | 682 | unsafe fn get_stack_start() -> Option<*mut libc::c_void> { |
74b04a01 | 683 | let th = libc::pthread_self(); |
5e7ed085 FG |
684 | let stackptr = libc::pthread_get_stackaddr_np(th); |
685 | Some(stackptr.map_addr(|addr| addr - libc::pthread_get_stacksize_np(th))) | |
2c00a5a8 XL |
686 | } |
687 | ||
48663c56 | 688 | #[cfg(target_os = "openbsd")] |
2c00a5a8 | 689 | unsafe fn get_stack_start() -> Option<*mut libc::c_void> { |
532ac7d7 | 690 | let mut current_stack: libc::stack_t = crate::mem::zeroed(); |
60c5eb7d | 691 | assert_eq!(libc::pthread_stackseg_np(libc::pthread_self(), &mut current_stack), 0); |
2c00a5a8 | 692 | |
5e7ed085 | 693 | let stack_ptr = current_stack.ss_sp; |
2c00a5a8 XL |
694 | let stackaddr = if libc::pthread_main_np() == 1 { |
695 | // main thread | |
5e7ed085 | 696 | stack_ptr.addr() - current_stack.ss_size + PAGE_SIZE.load(Ordering::Relaxed) |
2c00a5a8 XL |
697 | } else { |
698 | // new thread | |
5e7ed085 | 699 | stack_ptr.addr() - current_stack.ss_size |
2c00a5a8 | 700 | }; |
5e7ed085 | 701 | Some(stack_ptr.with_addr(stackaddr)) |
1a4d82fc JJ |
702 | } |
703 | ||
60c5eb7d XL |
704 | #[cfg(any( |
705 | target_os = "android", | |
706 | target_os = "freebsd", | |
707 | target_os = "linux", | |
708 | target_os = "netbsd", | |
709 | target_os = "l4re" | |
710 | ))] | |
c1a9b12d | 711 | unsafe fn get_stack_start() -> Option<*mut libc::c_void> { |
c1a9b12d | 712 | let mut ret = None; |
532ac7d7 | 713 | let mut attr: libc::pthread_attr_t = crate::mem::zeroed(); |
1b1a35ee | 714 | #[cfg(target_os = "freebsd")] |
92a42be0 | 715 | assert_eq!(libc::pthread_attr_init(&mut attr), 0); |
a7813a04 | 716 | #[cfg(target_os = "freebsd")] |
60c5eb7d | 717 | let e = libc::pthread_attr_get_np(libc::pthread_self(), &mut attr); |
a7813a04 | 718 | #[cfg(not(target_os = "freebsd"))] |
60c5eb7d | 719 | let e = libc::pthread_getattr_np(libc::pthread_self(), &mut attr); |
a7813a04 | 720 | if e == 0 { |
532ac7d7 | 721 | let mut stackaddr = crate::ptr::null_mut(); |
c1a9b12d | 722 | let mut stacksize = 0; |
60c5eb7d | 723 | assert_eq!(libc::pthread_attr_getstack(&attr, &mut stackaddr, &mut stacksize), 0); |
c1a9b12d SL |
724 | ret = Some(stackaddr); |
725 | } | |
1b1a35ee XL |
726 | if e == 0 || cfg!(target_os = "freebsd") { |
727 | assert_eq!(libc::pthread_attr_destroy(&mut attr), 0); | |
728 | } | |
c1a9b12d | 729 | ret |
1a4d82fc JJ |
730 | } |
731 | ||
83c7162d XL |
732 | // Precondition: PAGE_SIZE is initialized. |
733 | unsafe fn get_stack_start_aligned() -> Option<*mut libc::c_void> { | |
f035d41b XL |
734 | let page_size = PAGE_SIZE.load(Ordering::Relaxed); |
735 | assert!(page_size != 0); | |
5e7ed085 FG |
736 | let stackptr = get_stack_start()?; |
737 | let stackaddr = stackptr.addr(); | |
85aaf69f SL |
738 | |
739 | // Ensure stackaddr is page aligned! A parent process might | |
740 | // have reset RLIMIT_STACK to be non-page aligned. The | |
741 | // pthread_attr_getstack() reports the usable stack area | |
742 | // stackaddr < stackaddr + stacksize, so if stackaddr is not | |
743 | // page-aligned, calculate the fix such that stackaddr < | |
744 | // new_page_aligned_stackaddr < stackaddr + stacksize | |
5e7ed085 | 745 | let remainder = stackaddr % page_size; |
83c7162d | 746 | Some(if remainder == 0 { |
5e7ed085 | 747 | stackptr |
83c7162d | 748 | } else { |
5e7ed085 | 749 | stackptr.with_addr(stackaddr + page_size - remainder) |
83c7162d XL |
750 | }) |
751 | } | |
752 | ||
753 | pub unsafe fn init() -> Option<Guard> { | |
f035d41b XL |
754 | let page_size = os::page_size(); |
755 | PAGE_SIZE.store(page_size, Ordering::Relaxed); | |
83c7162d | 756 | |
3dfed10e | 757 | if cfg!(all(target_os = "linux", not(target_env = "musl"))) { |
041b39d2 XL |
758 | // Linux doesn't allocate the whole stack right away, and |
759 | // the kernel has its own stack-guard mechanism to fault | |
760 | // when growing too close to an existing mapping. If we map | |
761 | // our own guard, then the kernel starts enforcing a rather | |
762 | // large gap above that, rendering much of the possible | |
763 | // stack space useless. See #43052. | |
764 | // | |
765 | // Instead, we'll just note where we expect rlimit to start | |
766 | // faulting, so our handler can report "stack overflow", and | |
767 | // trust that the kernel's own stack guard will work. | |
5e7ed085 FG |
768 | let stackptr = get_stack_start_aligned()?; |
769 | let stackaddr = stackptr.addr(); | |
f035d41b | 770 | Some(stackaddr - page_size..stackaddr) |
3dfed10e XL |
771 | } else if cfg!(all(target_os = "linux", target_env = "musl")) { |
772 | // For the main thread, the musl's pthread_attr_getstack | |
773 | // returns the current stack size, rather than maximum size | |
774 | // it can eventually grow to. It cannot be used to determine | |
775 | // the position of kernel's stack guard. | |
776 | None | |
cdc7bbd5 XL |
777 | } else if cfg!(target_os = "freebsd") { |
778 | // FreeBSD's stack autogrows, and optionally includes a guard page | |
779 | // at the bottom. If we try to remap the bottom of the stack | |
780 | // ourselves, FreeBSD's guard page moves upwards. So we'll just use | |
781 | // the builtin guard page. | |
5e7ed085 FG |
782 | let stackptr = get_stack_start_aligned()?; |
783 | let guardaddr = stackptr.addr(); | |
cdc7bbd5 XL |
784 | // Technically the number of guard pages is tunable and controlled |
785 | // by the security.bsd.stack_guard_page sysctl, but there are | |
786 | // few reasons to change it from the default. The default value has | |
787 | // been 1 ever since FreeBSD 11.1 and 10.4. | |
788 | const GUARD_PAGES: usize = 1; | |
789 | let guard = guardaddr..guardaddr + GUARD_PAGES * page_size; | |
790 | Some(guard) | |
2b03887a FG |
791 | } else if cfg!(target_os = "openbsd") { |
792 | // OpenBSD stack already includes a guard page, and stack is | |
793 | // immutable. | |
794 | // | |
795 | // We'll just note where we expect rlimit to start | |
796 | // faulting, so our handler can report "stack overflow", and | |
797 | // trust that the kernel's own stack guard will work. | |
798 | let stackptr = get_stack_start_aligned()?; | |
799 | let stackaddr = stackptr.addr(); | |
800 | Some(stackaddr - page_size..stackaddr) | |
a7813a04 | 801 | } else { |
041b39d2 XL |
802 | // Reallocate the last page of the stack. |
803 | // This ensures SIGBUS will be raised on | |
804 | // stack overflow. | |
94b46f34 XL |
805 | // Systems which enforce strict PAX MPROTECT do not allow |
806 | // to mprotect() a mapping with less restrictive permissions | |
807 | // than the initial mmap() used, so we mmap() here with | |
808 | // read/write permissions and only then mprotect() it to | |
809 | // no permissions at all. See issue #50313. | |
5e7ed085 | 810 | let stackptr = get_stack_start_aligned()?; |
60c5eb7d | 811 | let result = mmap( |
5e7ed085 | 812 | stackptr, |
f035d41b | 813 | page_size, |
60c5eb7d XL |
814 | PROT_READ | PROT_WRITE, |
815 | MAP_PRIVATE | MAP_ANON | MAP_FIXED, | |
816 | -1, | |
817 | 0, | |
818 | ); | |
5e7ed085 | 819 | if result != stackptr || result == MAP_FAILED { |
6a06907d | 820 | panic!("failed to allocate a guard page: {}", io::Error::last_os_error()); |
041b39d2 XL |
821 | } |
822 | ||
5e7ed085 | 823 | let result = mprotect(stackptr, page_size, PROT_NONE); |
94b46f34 | 824 | if result != 0 { |
6a06907d | 825 | panic!("failed to protect the guard page: {}", io::Error::last_os_error()); |
94b46f34 XL |
826 | } |
827 | ||
5e7ed085 | 828 | let guardaddr = stackptr.addr(); |
1a4d82fc | 829 | |
cdc7bbd5 | 830 | Some(guardaddr..guardaddr + page_size) |
041b39d2 | 831 | } |
1a4d82fc JJ |
832 | } |
833 | ||
60c5eb7d | 834 | #[cfg(any(target_os = "macos", target_os = "openbsd", target_os = "solaris"))] |
2c00a5a8 | 835 | pub unsafe fn current() -> Option<Guard> { |
5e7ed085 FG |
836 | let stackptr = get_stack_start()?; |
837 | let stackaddr = stackptr.addr(); | |
f035d41b | 838 | Some(stackaddr - PAGE_SIZE.load(Ordering::Relaxed)..stackaddr) |
85aaf69f SL |
839 | } |
840 | ||
60c5eb7d XL |
841 | #[cfg(any( |
842 | target_os = "android", | |
843 | target_os = "freebsd", | |
844 | target_os = "linux", | |
845 | target_os = "netbsd", | |
846 | target_os = "l4re" | |
847 | ))] | |
2c00a5a8 | 848 | pub unsafe fn current() -> Option<Guard> { |
c1a9b12d | 849 | let mut ret = None; |
532ac7d7 | 850 | let mut attr: libc::pthread_attr_t = crate::mem::zeroed(); |
1b1a35ee | 851 | #[cfg(target_os = "freebsd")] |
92a42be0 | 852 | assert_eq!(libc::pthread_attr_init(&mut attr), 0); |
a7813a04 | 853 | #[cfg(target_os = "freebsd")] |
60c5eb7d | 854 | let e = libc::pthread_attr_get_np(libc::pthread_self(), &mut attr); |
a7813a04 | 855 | #[cfg(not(target_os = "freebsd"))] |
60c5eb7d | 856 | let e = libc::pthread_getattr_np(libc::pthread_self(), &mut attr); |
a7813a04 | 857 | if e == 0 { |
c1a9b12d | 858 | let mut guardsize = 0; |
92a42be0 | 859 | assert_eq!(libc::pthread_attr_getguardsize(&attr, &mut guardsize), 0); |
c1a9b12d | 860 | if guardsize == 0 { |
3dfed10e XL |
861 | if cfg!(all(target_os = "linux", target_env = "musl")) { |
862 | // musl versions before 1.1.19 always reported guard | |
863 | // size obtained from pthread_attr_get_np as zero. | |
864 | // Use page size as a fallback. | |
865 | guardsize = PAGE_SIZE.load(Ordering::Relaxed); | |
866 | } else { | |
867 | panic!("there is no guard page"); | |
868 | } | |
c1a9b12d | 869 | } |
5e7ed085 | 870 | let mut stackptr = crate::ptr::null_mut::<libc::c_void>(); |
c1a9b12d | 871 | let mut size = 0; |
5e7ed085 | 872 | assert_eq!(libc::pthread_attr_getstack(&attr, &mut stackptr, &mut size), 0); |
c1a9b12d | 873 | |
5e7ed085 | 874 | let stackaddr = stackptr.addr(); |
cdc7bbd5 | 875 | ret = if cfg!(any(target_os = "freebsd", target_os = "netbsd")) { |
2c00a5a8 | 876 | Some(stackaddr - guardsize..stackaddr) |
3dfed10e XL |
877 | } else if cfg!(all(target_os = "linux", target_env = "musl")) { |
878 | Some(stackaddr - guardsize..stackaddr) | |
c295e0f8 XL |
879 | } else if cfg!(all(target_os = "linux", any(target_env = "gnu", target_env = "uclibc"))) |
880 | { | |
2c00a5a8 XL |
881 | // glibc used to include the guard area within the stack, as noted in the BUGS |
882 | // section of `man pthread_attr_getguardsize`. This has been corrected starting | |
883 | // with glibc 2.27, and in some distro backports, so the guard is now placed at the | |
884 | // end (below) the stack. There's no easy way for us to know which we have at | |
885 | // runtime, so we'll just match any fault in the range right above or below the | |
886 | // stack base to call that fault a stack overflow. | |
887 | Some(stackaddr - guardsize..stackaddr + guardsize) | |
b039eaaf | 888 | } else { |
2c00a5a8 | 889 | Some(stackaddr..stackaddr + guardsize) |
b039eaaf | 890 | }; |
1a4d82fc | 891 | } |
1b1a35ee XL |
892 | if e == 0 || cfg!(target_os = "freebsd") { |
893 | assert_eq!(libc::pthread_attr_destroy(&mut attr), 0); | |
894 | } | |
e9174d1e | 895 | ret |
c34b1796 | 896 | } |
1a4d82fc JJ |
897 | } |
898 | ||
1a4d82fc | 899 | // glibc >= 2.15 has a __pthread_get_minstack() function that returns |
74b04a01 XL |
900 | // PTHREAD_STACK_MIN plus bytes needed for thread-local storage. |
901 | // We need that information to avoid blowing up when a small stack | |
1a4d82fc JJ |
902 | // is created in an application with big thread-local storage requirements. |
903 | // See #6233 for rationale and details. | |
a2a8927a | 904 | #[cfg(all(target_os = "linux", target_env = "gnu"))] |
d9579d0f | 905 | fn min_stack_size(attr: *const libc::pthread_attr_t) -> usize { |
a2a8927a XL |
906 | // We use dlsym to avoid an ELF version dependency on GLIBC_PRIVATE. (#23628) |
907 | // We shouldn't really be using such an internal symbol, but there's currently | |
908 | // no other way to account for the TLS size. | |
909 | dlsym!(fn __pthread_get_minstack(*const libc::pthread_attr_t) -> libc::size_t); | |
c34b1796 | 910 | |
7453a54e | 911 | match __pthread_get_minstack.get() { |
c30ab7b3 SL |
912 | None => libc::PTHREAD_STACK_MIN, |
913 | Some(f) => unsafe { f(attr) }, | |
1a4d82fc JJ |
914 | } |
915 | } | |
916 | ||
a2a8927a XL |
917 | // No point in looking up __pthread_get_minstack() on non-glibc platforms. |
918 | #[cfg(all(not(all(target_os = "linux", target_env = "gnu")), not(target_os = "netbsd")))] | |
7453a54e | 919 | fn min_stack_size(_: *const libc::pthread_attr_t) -> usize { |
c30ab7b3 | 920 | libc::PTHREAD_STACK_MIN |
7453a54e SL |
921 | } |
922 | ||
923 | #[cfg(target_os = "netbsd")] | |
d9579d0f | 924 | fn min_stack_size(_: *const libc::pthread_attr_t) -> usize { |
7453a54e | 925 | 2048 // just a guess |
1a4d82fc | 926 | } |