std/sys/thread/
unix.rs

1#[cfg(not(any(
2    target_env = "newlib",
3    target_os = "l4re",
4    target_os = "emscripten",
5    target_os = "redox",
6    target_os = "hurd",
7    target_os = "aix",
8)))]
9use crate::ffi::CStr;
10use crate::mem::{self, DropGuard, ManuallyDrop};
11use crate::num::NonZero;
12#[cfg(all(target_os = "linux", target_env = "gnu"))]
13use crate::sys::weak::dlsym;
14#[cfg(any(target_os = "solaris", target_os = "illumos", target_os = "nto",))]
15use crate::sys::weak::weak;
16use crate::sys::{os, stack_overflow};
17use crate::thread::ThreadInit;
18use crate::time::Duration;
19use crate::{cmp, io, ptr};
20#[cfg(not(any(
21    target_os = "l4re",
22    target_os = "vxworks",
23    target_os = "espidf",
24    target_os = "nuttx"
25)))]
26pub const DEFAULT_MIN_STACK_SIZE: usize = 2 * 1024 * 1024;
27#[cfg(target_os = "l4re")]
28pub const DEFAULT_MIN_STACK_SIZE: usize = 1024 * 1024;
29#[cfg(target_os = "vxworks")]
30pub const DEFAULT_MIN_STACK_SIZE: usize = 256 * 1024;
31#[cfg(any(target_os = "espidf", target_os = "nuttx"))]
32pub const DEFAULT_MIN_STACK_SIZE: usize = 0; // 0 indicates that the stack size configured in the ESP-IDF/NuttX menuconfig system should be used
33
34pub struct Thread {
35    id: libc::pthread_t,
36}
37
38// Some platforms may have pthread_t as a pointer in which case we still want
39// a thread to be Send/Sync
40unsafe impl Send for Thread {}
41unsafe impl Sync for Thread {}
42
43impl Thread {
44    // unsafe: see thread::Builder::spawn_unchecked for safety requirements
45    #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
46    pub unsafe fn new(stack: usize, init: Box<ThreadInit>) -> io::Result<Thread> {
47        let data = init;
48        let mut attr: mem::MaybeUninit<libc::pthread_attr_t> = mem::MaybeUninit::uninit();
49        assert_eq!(libc::pthread_attr_init(attr.as_mut_ptr()), 0);
50        let mut attr = DropGuard::new(&mut attr, |attr| {
51            assert_eq!(libc::pthread_attr_destroy(attr.as_mut_ptr()), 0)
52        });
53
54        #[cfg(any(target_os = "espidf", target_os = "nuttx"))]
55        if stack > 0 {
56            // Only set the stack if a non-zero value is passed
57            // 0 is used as an indication that the default stack size configured in the ESP-IDF/NuttX menuconfig system should be used
58            assert_eq!(
59                libc::pthread_attr_setstacksize(
60                    attr.as_mut_ptr(),
61                    cmp::max(stack, min_stack_size(attr.as_ptr()))
62                ),
63                0
64            );
65        }
66
67        #[cfg(not(any(target_os = "espidf", target_os = "nuttx")))]
68        {
69            let stack_size = cmp::max(stack, min_stack_size(attr.as_ptr()));
70
71            match libc::pthread_attr_setstacksize(attr.as_mut_ptr(), stack_size) {
72                0 => {}
73                n => {
74                    assert_eq!(n, libc::EINVAL);
75                    // EINVAL means |stack_size| is either too small or not a
76                    // multiple of the system page size. Because it's definitely
77                    // >= PTHREAD_STACK_MIN, it must be an alignment issue.
78                    // Round up to the nearest page and try again.
79                    let page_size = os::page_size();
80                    let stack_size =
81                        (stack_size + page_size - 1) & (-(page_size as isize - 1) as usize - 1);
82
83                    // Some libc implementations, e.g. musl, place an upper bound
84                    // on the stack size, in which case we can only gracefully return
85                    // an error here.
86                    if libc::pthread_attr_setstacksize(attr.as_mut_ptr(), stack_size) != 0 {
87                        return Err(io::const_error!(
88                            io::ErrorKind::InvalidInput,
89                            "invalid stack size"
90                        ));
91                    }
92                }
93            };
94        }
95
96        let data = Box::into_raw(data);
97        let mut native: libc::pthread_t = mem::zeroed();
98        let ret = libc::pthread_create(&mut native, attr.as_ptr(), thread_start, data as *mut _);
99        return if ret == 0 {
100            Ok(Thread { id: native })
101        } else {
102            // The thread failed to start and as a result `data` was not consumed.
103            // Therefore, it is safe to reconstruct the box so that it gets deallocated.
104            drop(Box::from_raw(data));
105            Err(io::Error::from_raw_os_error(ret))
106        };
107
108        extern "C" fn thread_start(data: *mut libc::c_void) -> *mut libc::c_void {
109            unsafe {
110                // SAFETY: we are simply recreating the box that was leaked earlier.
111                let init = Box::from_raw(data as *mut ThreadInit);
112                let rust_start = init.init();
113
114                // Now that the thread information is set, set up our stack
115                // overflow handler.
116                let _handler = stack_overflow::Handler::new();
117
118                rust_start();
119            }
120            ptr::null_mut()
121        }
122    }
123
124    pub fn join(self) {
125        let id = self.into_id();
126        let ret = unsafe { libc::pthread_join(id, ptr::null_mut()) };
127        assert!(ret == 0, "failed to join thread: {}", io::Error::from_raw_os_error(ret));
128    }
129
130    pub fn id(&self) -> libc::pthread_t {
131        self.id
132    }
133
134    pub fn into_id(self) -> libc::pthread_t {
135        ManuallyDrop::new(self).id
136    }
137}
138
139impl Drop for Thread {
140    fn drop(&mut self) {
141        let ret = unsafe { libc::pthread_detach(self.id) };
142        debug_assert_eq!(ret, 0);
143    }
144}
145
146pub fn available_parallelism() -> io::Result<NonZero<usize>> {
147    cfg_select! {
148        any(
149            target_os = "android",
150            target_os = "emscripten",
151            target_os = "fuchsia",
152            target_os = "hurd",
153            target_os = "linux",
154            target_os = "aix",
155            target_vendor = "apple",
156            target_os = "cygwin",
157        ) => {
158            #[allow(unused_assignments)]
159            #[allow(unused_mut)]
160            let mut quota = usize::MAX;
161
162            #[cfg(any(target_os = "android", target_os = "linux"))]
163            {
164                quota = cgroups::quota().max(1);
165                let mut set: libc::cpu_set_t = unsafe { mem::zeroed() };
166                unsafe {
167                    if libc::sched_getaffinity(0, size_of::<libc::cpu_set_t>(), &mut set) == 0 {
168                        let count = libc::CPU_COUNT(&set) as usize;
169                        let count = count.min(quota);
170
171                        // According to sched_getaffinity's API it should always be non-zero, but
172                        // some old MIPS kernels were buggy and zero-initialized the mask if
173                        // none was explicitly set.
174                        // In that case we use the sysconf fallback.
175                        if let Some(count) = NonZero::new(count) {
176                            return Ok(count)
177                        }
178                    }
179                }
180            }
181            match unsafe { libc::sysconf(libc::_SC_NPROCESSORS_ONLN) } {
182                -1 => Err(io::Error::last_os_error()),
183                0 => Err(io::Error::UNKNOWN_THREAD_COUNT),
184                cpus => {
185                    let count = cpus as usize;
186                    // Cover the unusual situation where we were able to get the quota but not the affinity mask
187                    let count = count.min(quota);
188                    Ok(unsafe { NonZero::new_unchecked(count) })
189                }
190            }
191        }
192        any(
193           target_os = "freebsd",
194           target_os = "dragonfly",
195           target_os = "openbsd",
196           target_os = "netbsd",
197        ) => {
198            use crate::ptr;
199
200            #[cfg(target_os = "freebsd")]
201            {
202                let mut set: libc::cpuset_t = unsafe { mem::zeroed() };
203                unsafe {
204                    if libc::cpuset_getaffinity(
205                        libc::CPU_LEVEL_WHICH,
206                        libc::CPU_WHICH_PID,
207                        -1,
208                        size_of::<libc::cpuset_t>(),
209                        &mut set,
210                    ) == 0 {
211                        let count = libc::CPU_COUNT(&set) as usize;
212                        if count > 0 {
213                            return Ok(NonZero::new_unchecked(count));
214                        }
215                    }
216                }
217            }
218
219            #[cfg(target_os = "netbsd")]
220            {
221                unsafe {
222                    let set = libc::_cpuset_create();
223                    if !set.is_null() {
224                        let mut count: usize = 0;
225                        if libc::pthread_getaffinity_np(libc::pthread_self(), libc::_cpuset_size(set), set) == 0 {
226                            for i in 0..libc::cpuid_t::MAX {
227                                match libc::_cpuset_isset(i, set) {
228                                    -1 => break,
229                                    0 => continue,
230                                    _ => count = count + 1,
231                                }
232                            }
233                        }
234                        libc::_cpuset_destroy(set);
235                        if let Some(count) = NonZero::new(count) {
236                            return Ok(count);
237                        }
238                    }
239                }
240            }
241
242            let mut cpus: libc::c_uint = 0;
243            let mut cpus_size = size_of_val(&cpus);
244
245            unsafe {
246                cpus = libc::sysconf(libc::_SC_NPROCESSORS_ONLN) as libc::c_uint;
247            }
248
249            // Fallback approach in case of errors or no hardware threads.
250            if cpus < 1 {
251                let mut mib = [libc::CTL_HW, libc::HW_NCPU, 0, 0];
252                let res = unsafe {
253                    libc::sysctl(
254                        mib.as_mut_ptr(),
255                        2,
256                        (&raw mut cpus) as *mut _,
257                        (&raw mut cpus_size) as *mut _,
258                        ptr::null_mut(),
259                        0,
260                    )
261                };
262
263                // Handle errors if any.
264                if res == -1 {
265                    return Err(io::Error::last_os_error());
266                } else if cpus == 0 {
267                    return Err(io::Error::UNKNOWN_THREAD_COUNT);
268                }
269            }
270
271            Ok(unsafe { NonZero::new_unchecked(cpus as usize) })
272        }
273        target_os = "nto" => {
274            unsafe {
275                use libc::_syspage_ptr;
276                if _syspage_ptr.is_null() {
277                    Err(io::const_error!(io::ErrorKind::NotFound, "no syspage available"))
278                } else {
279                    let cpus = (*_syspage_ptr).num_cpu;
280                    NonZero::new(cpus as usize)
281                        .ok_or(io::Error::UNKNOWN_THREAD_COUNT)
282                }
283            }
284        }
285        any(target_os = "solaris", target_os = "illumos") => {
286            let mut cpus = 0u32;
287            if unsafe { libc::pset_info(libc::PS_MYID, core::ptr::null_mut(), &mut cpus, core::ptr::null_mut()) } != 0 {
288                return Err(io::Error::UNKNOWN_THREAD_COUNT);
289            }
290            Ok(unsafe { NonZero::new_unchecked(cpus as usize) })
291        }
292        target_os = "haiku" => {
293            // system_info cpu_count field gets the static data set at boot time with `smp_set_num_cpus`
294            // `get_system_info` calls then `smp_get_num_cpus`
295            unsafe {
296                let mut sinfo: libc::system_info = crate::mem::zeroed();
297                let res = libc::get_system_info(&mut sinfo);
298
299                if res != libc::B_OK {
300                    return Err(io::Error::UNKNOWN_THREAD_COUNT);
301                }
302
303                Ok(NonZero::new_unchecked(sinfo.cpu_count as usize))
304            }
305        }
306        target_os = "vxworks" => {
307            // Note: there is also `vxCpuConfiguredGet`, closer to _SC_NPROCESSORS_CONF
308            // expectations than the actual cores availability.
309
310            // SAFETY: `vxCpuEnabledGet` always fetches a mask with at least one bit set
311            unsafe{
312                let set = libc::vxCpuEnabledGet();
313                Ok(NonZero::new_unchecked(set.count_ones() as usize))
314            }
315        }
316        _ => {
317            // FIXME: implement on Redox, l4re
318            Err(io::const_error!(io::ErrorKind::Unsupported, "getting the number of hardware threads is not supported on the target platform"))
319        }
320    }
321}
322
323pub fn current_os_id() -> Option<u64> {
324    // Most Unix platforms have a way to query an integer ID of the current thread, all with
325    // slightly different spellings.
326    //
327    // The OS thread ID is used rather than `pthread_self` so as to match what will be displayed
328    // for process inspection (debuggers, trace, `top`, etc.).
329    cfg_select! {
330        // Most platforms have a function returning a `pid_t` or int, which is an `i32`.
331        any(target_os = "android", target_os = "linux") => {
332            use crate::sys::pal::weak::syscall;
333
334            // `libc::gettid` is only available on glibc 2.30+, but the syscall is available
335            // since Linux 2.4.11.
336            syscall!(fn gettid() -> libc::pid_t;);
337
338            // SAFETY: FFI call with no preconditions.
339            let id: libc::pid_t = unsafe { gettid() };
340            Some(id as u64)
341        }
342        target_os = "nto" => {
343            // SAFETY: FFI call with no preconditions.
344            let id: libc::pid_t = unsafe { libc::gettid() };
345            Some(id as u64)
346        }
347        target_os = "openbsd" => {
348            // SAFETY: FFI call with no preconditions.
349            let id: libc::pid_t = unsafe { libc::getthrid() };
350            Some(id as u64)
351        }
352        target_os = "freebsd" => {
353            // SAFETY: FFI call with no preconditions.
354            let id: libc::c_int = unsafe { libc::pthread_getthreadid_np() };
355            Some(id as u64)
356        }
357        target_os = "netbsd" => {
358            // SAFETY: FFI call with no preconditions.
359            let id: libc::lwpid_t = unsafe { libc::_lwp_self() };
360            Some(id as u64)
361        }
362        any(target_os = "illumos", target_os = "solaris") => {
363            // On Illumos and Solaris, the `pthread_t` is the same as the OS thread ID.
364            // SAFETY: FFI call with no preconditions.
365            let id: libc::pthread_t = unsafe { libc::pthread_self() };
366            Some(id as u64)
367        }
368        target_vendor = "apple" => {
369            // Apple allows querying arbitrary thread IDs, `thread=NULL` queries the current thread.
370            let mut id = 0u64;
371            // SAFETY: `thread_id` is a valid pointer, no other preconditions.
372            let status: libc::c_int = unsafe { libc::pthread_threadid_np(0, &mut id) };
373            if status == 0 {
374                Some(id)
375            } else {
376                None
377            }
378        }
379        // Other platforms don't have an OS thread ID or don't have a way to access it.
380        _ => None,
381    }
382}
383
384#[cfg(any(
385    target_os = "linux",
386    target_os = "nto",
387    target_os = "solaris",
388    target_os = "illumos",
389    target_os = "vxworks",
390    target_os = "cygwin",
391    target_vendor = "apple",
392))]
393fn truncate_cstr<const MAX_WITH_NUL: usize>(cstr: &CStr) -> [libc::c_char; MAX_WITH_NUL] {
394    let mut result = [0; MAX_WITH_NUL];
395    for (src, dst) in cstr.to_bytes().iter().zip(&mut result[..MAX_WITH_NUL - 1]) {
396        *dst = *src as libc::c_char;
397    }
398    result
399}
400
401#[cfg(target_os = "android")]
402pub fn set_name(name: &CStr) {
403    const PR_SET_NAME: libc::c_int = 15;
404    unsafe {
405        let res = libc::prctl(
406            PR_SET_NAME,
407            name.as_ptr(),
408            0 as libc::c_ulong,
409            0 as libc::c_ulong,
410            0 as libc::c_ulong,
411        );
412        // We have no good way of propagating errors here, but in debug-builds let's check that this actually worked.
413        debug_assert_eq!(res, 0);
414    }
415}
416
417#[cfg(any(
418    target_os = "linux",
419    target_os = "freebsd",
420    target_os = "dragonfly",
421    target_os = "nuttx",
422    target_os = "cygwin"
423))]
424pub fn set_name(name: &CStr) {
425    unsafe {
426        cfg_select! {
427            any(target_os = "linux", target_os = "cygwin") => {
428                // Linux and Cygwin limits the allowed length of the name.
429                const TASK_COMM_LEN: usize = 16;
430                let name = truncate_cstr::<{ TASK_COMM_LEN }>(name);
431            }
432            _ => {
433                // FreeBSD, DragonFly BSD and NuttX do not enforce length limits.
434            }
435        };
436        // Available since glibc 2.12, musl 1.1.16, and uClibc 1.0.20 for Linux,
437        // FreeBSD 12.2 and 13.0, and DragonFly BSD 6.0.
438        let res = libc::pthread_setname_np(libc::pthread_self(), name.as_ptr());
439        // We have no good way of propagating errors here, but in debug-builds let's check that this actually worked.
440        debug_assert_eq!(res, 0);
441    }
442}
443
444#[cfg(target_os = "openbsd")]
445pub fn set_name(name: &CStr) {
446    unsafe {
447        libc::pthread_set_name_np(libc::pthread_self(), name.as_ptr());
448    }
449}
450
451#[cfg(target_vendor = "apple")]
452pub fn set_name(name: &CStr) {
453    unsafe {
454        let name = truncate_cstr::<{ libc::MAXTHREADNAMESIZE }>(name);
455        let res = libc::pthread_setname_np(name.as_ptr());
456        // We have no good way of propagating errors here, but in debug-builds let's check that this actually worked.
457        debug_assert_eq!(res, 0);
458    }
459}
460
461#[cfg(target_os = "netbsd")]
462pub fn set_name(name: &CStr) {
463    unsafe {
464        let res = libc::pthread_setname_np(
465            libc::pthread_self(),
466            c"%s".as_ptr(),
467            name.as_ptr() as *mut libc::c_void,
468        );
469        debug_assert_eq!(res, 0);
470    }
471}
472
473#[cfg(any(target_os = "solaris", target_os = "illumos", target_os = "nto"))]
474pub fn set_name(name: &CStr) {
475    weak!(
476        fn pthread_setname_np(thread: libc::pthread_t, name: *const libc::c_char) -> libc::c_int;
477    );
478
479    if let Some(f) = pthread_setname_np.get() {
480        #[cfg(target_os = "nto")]
481        const THREAD_NAME_MAX: usize = libc::_NTO_THREAD_NAME_MAX as usize;
482        #[cfg(any(target_os = "solaris", target_os = "illumos"))]
483        const THREAD_NAME_MAX: usize = 32;
484
485        let name = truncate_cstr::<{ THREAD_NAME_MAX }>(name);
486        let res = unsafe { f(libc::pthread_self(), name.as_ptr()) };
487        debug_assert_eq!(res, 0);
488    }
489}
490
491#[cfg(target_os = "fuchsia")]
492pub fn set_name(name: &CStr) {
493    use crate::sys::pal::fuchsia::*;
494    unsafe {
495        zx_object_set_property(
496            zx_thread_self(),
497            ZX_PROP_NAME,
498            name.as_ptr() as *const libc::c_void,
499            name.to_bytes().len(),
500        );
501    }
502}
503
504#[cfg(target_os = "haiku")]
505pub fn set_name(name: &CStr) {
506    unsafe {
507        let thread_self = libc::find_thread(ptr::null_mut());
508        let res = libc::rename_thread(thread_self, name.as_ptr());
509        // We have no good way of propagating errors here, but in debug-builds let's check that this actually worked.
510        debug_assert_eq!(res, libc::B_OK);
511    }
512}
513
514#[cfg(target_os = "vxworks")]
515pub fn set_name(name: &CStr) {
516    let mut name = truncate_cstr::<{ (libc::VX_TASK_RENAME_LENGTH - 1) as usize }>(name);
517    let res = unsafe { libc::taskNameSet(libc::taskIdSelf(), name.as_mut_ptr()) };
518    debug_assert_eq!(res, libc::OK);
519}
520
521#[cfg(not(target_os = "espidf"))]
522pub fn sleep(dur: Duration) {
523    let mut secs = dur.as_secs();
524    let mut nsecs = dur.subsec_nanos() as _;
525
526    // If we're awoken with a signal then the return value will be -1 and
527    // nanosleep will fill in `ts` with the remaining time.
528    unsafe {
529        while secs > 0 || nsecs > 0 {
530            let mut ts = libc::timespec {
531                tv_sec: cmp::min(libc::time_t::MAX as u64, secs) as libc::time_t,
532                tv_nsec: nsecs,
533            };
534            secs -= ts.tv_sec as u64;
535            let ts_ptr = &raw mut ts;
536            if libc::nanosleep(ts_ptr, ts_ptr) == -1 {
537                assert_eq!(os::errno(), libc::EINTR);
538                secs += ts.tv_sec as u64;
539                nsecs = ts.tv_nsec;
540            } else {
541                nsecs = 0;
542            }
543        }
544    }
545}
546
547#[cfg(target_os = "espidf")]
548pub fn sleep(dur: Duration) {
549    // ESP-IDF does not have `nanosleep`, so we use `usleep` instead.
550    // As per the documentation of `usleep`, it is expected to support
551    // sleep times as big as at least up to 1 second.
552    //
553    // ESP-IDF does support almost up to `u32::MAX`, but due to a potential integer overflow in its
554    // `usleep` implementation
555    // (https://github.com/espressif/esp-idf/blob/d7ca8b94c852052e3bc33292287ef4dd62c9eeb1/components/newlib/time.c#L210),
556    // we limit the sleep time to the maximum one that would not cause the underlying `usleep` implementation to overflow
557    // (`portTICK_PERIOD_MS` can be anything between 1 to 1000, and is 10 by default).
558    const MAX_MICROS: u32 = u32::MAX - 1_000_000 - 1;
559
560    // Add any nanoseconds smaller than a microsecond as an extra microsecond
561    // so as to comply with the `std::thread::sleep` contract which mandates
562    // implementations to sleep for _at least_ the provided `dur`.
563    // We can't overflow `micros` as it is a `u128`, while `Duration` is a pair of
564    // (`u64` secs, `u32` nanos), where the nanos are strictly smaller than 1 second
565    // (i.e. < 1_000_000_000)
566    let mut micros = dur.as_micros() + if dur.subsec_nanos() % 1_000 > 0 { 1 } else { 0 };
567
568    while micros > 0 {
569        let st = if micros > MAX_MICROS as u128 { MAX_MICROS } else { micros as u32 };
570        unsafe {
571            libc::usleep(st);
572        }
573
574        micros -= st as u128;
575    }
576}
577
578// Any unix that has clock_nanosleep
579// If this list changes update the MIRI chock_nanosleep shim
580#[cfg(any(
581    target_os = "freebsd",
582    target_os = "netbsd",
583    target_os = "linux",
584    target_os = "android",
585    target_os = "solaris",
586    target_os = "illumos",
587    target_os = "dragonfly",
588    target_os = "hurd",
589    target_os = "fuchsia",
590    target_os = "vxworks",
591))]
592pub fn sleep_until(deadline: crate::time::Instant) {
593    use crate::time::Instant;
594
595    let Some(ts) = deadline.into_inner().into_timespec().to_timespec() else {
596        // The deadline is further in the future then can be passed to
597        // clock_nanosleep. We have to use Self::sleep instead. This might
598        // happen on 32 bit platforms, especially closer to 2038.
599        let now = Instant::now();
600        if let Some(delay) = deadline.checked_duration_since(now) {
601            sleep(delay);
602        }
603        return;
604    };
605
606    unsafe {
607        // When we get interrupted (res = EINTR) call clock_nanosleep again
608        loop {
609            let res = libc::clock_nanosleep(
610                crate::sys::time::Instant::CLOCK_ID,
611                libc::TIMER_ABSTIME,
612                &ts,
613                core::ptr::null_mut(), // not required with TIMER_ABSTIME
614            );
615
616            if res == 0 {
617                break;
618            } else {
619                assert_eq!(
620                    res,
621                    libc::EINTR,
622                    "timespec is in range,
623                         clockid is valid and kernel should support it"
624                );
625            }
626        }
627    }
628}
629
630pub fn yield_now() {
631    let ret = unsafe { libc::sched_yield() };
632    debug_assert_eq!(ret, 0);
633}
634
635#[cfg(any(target_os = "android", target_os = "linux"))]
636mod cgroups {
637    //! Currently not covered
638    //! * cgroup v2 in non-standard mountpoints
639    //! * paths containing control characters or spaces, since those would be escaped in procfs
640    //!   output and we don't unescape
641
642    use crate::borrow::Cow;
643    use crate::ffi::OsString;
644    use crate::fs::{File, exists};
645    use crate::io::{BufRead, Read};
646    use crate::os::unix::ffi::OsStringExt;
647    use crate::path::{Path, PathBuf};
648    use crate::str::from_utf8;
649
650    #[derive(PartialEq)]
651    enum Cgroup {
652        V1,
653        V2,
654    }
655
656    /// Returns cgroup CPU quota in core-equivalents, rounded down or usize::MAX if the quota cannot
657    /// be determined or is not set.
658    pub(super) fn quota() -> usize {
659        let mut quota = usize::MAX;
660        if cfg!(miri) {
661            // Attempting to open a file fails under default flags due to isolation.
662            // And Miri does not have parallelism anyway.
663            return quota;
664        }
665
666        let _: Option<()> = try {
667            let mut buf = Vec::with_capacity(128);
668            // find our place in the cgroup hierarchy
669            File::open("/proc/self/cgroup").ok()?.read_to_end(&mut buf).ok()?;
670            let (cgroup_path, version) =
671                buf.split(|&c| c == b'\n').fold(None, |previous, line| {
672                    let mut fields = line.splitn(3, |&c| c == b':');
673                    // 2nd field is a list of controllers for v1 or empty for v2
674                    let version = match fields.nth(1) {
675                        Some(b"") => Cgroup::V2,
676                        Some(controllers)
677                            if from_utf8(controllers)
678                                .is_ok_and(|c| c.split(',').any(|c| c == "cpu")) =>
679                        {
680                            Cgroup::V1
681                        }
682                        _ => return previous,
683                    };
684
685                    // already-found v1 trumps v2 since it explicitly specifies its controllers
686                    if previous.is_some() && version == Cgroup::V2 {
687                        return previous;
688                    }
689
690                    let path = fields.last()?;
691                    // skip leading slash
692                    Some((path[1..].to_owned(), version))
693                })?;
694            let cgroup_path = PathBuf::from(OsString::from_vec(cgroup_path));
695
696            quota = match version {
697                Cgroup::V1 => quota_v1(cgroup_path),
698                Cgroup::V2 => quota_v2(cgroup_path),
699            };
700        };
701
702        quota
703    }
704
705    fn quota_v2(group_path: PathBuf) -> usize {
706        let mut quota = usize::MAX;
707
708        let mut path = PathBuf::with_capacity(128);
709        let mut read_buf = String::with_capacity(20);
710
711        // standard mount location defined in file-hierarchy(7) manpage
712        let cgroup_mount = "/sys/fs/cgroup";
713
714        path.push(cgroup_mount);
715        path.push(&group_path);
716
717        path.push("cgroup.controllers");
718
719        // skip if we're not looking at cgroup2
720        if matches!(exists(&path), Err(_) | Ok(false)) {
721            return usize::MAX;
722        };
723
724        path.pop();
725
726        let _: Option<()> = try {
727            while path.starts_with(cgroup_mount) {
728                path.push("cpu.max");
729
730                read_buf.clear();
731
732                if File::open(&path).and_then(|mut f| f.read_to_string(&mut read_buf)).is_ok() {
733                    let raw_quota = read_buf.lines().next()?;
734                    let mut raw_quota = raw_quota.split(' ');
735                    let limit = raw_quota.next()?;
736                    let period = raw_quota.next()?;
737                    match (limit.parse::<usize>(), period.parse::<usize>()) {
738                        (Ok(limit), Ok(period)) if period > 0 => {
739                            quota = quota.min(limit / period);
740                        }
741                        _ => {}
742                    }
743                }
744
745                path.pop(); // pop filename
746                path.pop(); // pop dir
747            }
748        };
749
750        quota
751    }
752
753    fn quota_v1(group_path: PathBuf) -> usize {
754        let mut quota = usize::MAX;
755        let mut path = PathBuf::with_capacity(128);
756        let mut read_buf = String::with_capacity(20);
757
758        // Hardcode commonly used locations mentioned in the cgroups(7) manpage
759        // if that doesn't work scan mountinfo and adjust `group_path` for bind-mounts
760        let mounts: &[fn(&Path) -> Option<(_, &Path)>] = &[
761            |p| Some((Cow::Borrowed("/sys/fs/cgroup/cpu"), p)),
762            |p| Some((Cow::Borrowed("/sys/fs/cgroup/cpu,cpuacct"), p)),
763            // this can be expensive on systems with tons of mountpoints
764            // but we only get to this point when /proc/self/cgroups explicitly indicated
765            // this process belongs to a cpu-controller cgroup v1 and the defaults didn't work
766            find_mountpoint,
767        ];
768
769        for mount in mounts {
770            let Some((mount, group_path)) = mount(&group_path) else { continue };
771
772            path.clear();
773            path.push(mount.as_ref());
774            path.push(&group_path);
775
776            // skip if we guessed the mount incorrectly
777            if matches!(exists(&path), Err(_) | Ok(false)) {
778                continue;
779            }
780
781            while path.starts_with(mount.as_ref()) {
782                let mut parse_file = |name| {
783                    path.push(name);
784                    read_buf.clear();
785
786                    let f = File::open(&path);
787                    path.pop(); // restore buffer before any early returns
788                    f.ok()?.read_to_string(&mut read_buf).ok()?;
789                    let parsed = read_buf.trim().parse::<usize>().ok()?;
790
791                    Some(parsed)
792                };
793
794                let limit = parse_file("cpu.cfs_quota_us");
795                let period = parse_file("cpu.cfs_period_us");
796
797                match (limit, period) {
798                    (Some(limit), Some(period)) if period > 0 => quota = quota.min(limit / period),
799                    _ => {}
800                }
801
802                path.pop();
803            }
804
805            // we passed the try_exists above so we should have traversed the correct hierarchy
806            // when reaching this line
807            break;
808        }
809
810        quota
811    }
812
813    /// Scan mountinfo for cgroup v1 mountpoint with a cpu controller
814    ///
815    /// If the cgroupfs is a bind mount then `group_path` is adjusted to skip
816    /// over the already-included prefix
817    fn find_mountpoint(group_path: &Path) -> Option<(Cow<'static, str>, &Path)> {
818        let mut reader = File::open_buffered("/proc/self/mountinfo").ok()?;
819        let mut line = String::with_capacity(256);
820        loop {
821            line.clear();
822            if reader.read_line(&mut line).ok()? == 0 {
823                break;
824            }
825
826            let line = line.trim();
827            let mut items = line.split(' ');
828
829            let sub_path = items.nth(3)?;
830            let mount_point = items.next()?;
831            let mount_opts = items.next_back()?;
832            let filesystem_type = items.nth_back(1)?;
833
834            if filesystem_type != "cgroup" || !mount_opts.split(',').any(|opt| opt == "cpu") {
835                // not a cgroup / not a cpu-controller
836                continue;
837            }
838
839            let sub_path = Path::new(sub_path).strip_prefix("/").ok()?;
840
841            if !group_path.starts_with(sub_path) {
842                // this is a bind-mount and the bound subdirectory
843                // does not contain the cgroup this process belongs to
844                continue;
845            }
846
847            let trimmed_group_path = group_path.strip_prefix(sub_path).ok()?;
848
849            return Some((Cow::Owned(mount_point.to_owned()), trimmed_group_path));
850        }
851
852        None
853    }
854}
855
856// glibc >= 2.15 has a __pthread_get_minstack() function that returns
857// PTHREAD_STACK_MIN plus bytes needed for thread-local storage.
858// We need that information to avoid blowing up when a small stack
859// is created in an application with big thread-local storage requirements.
860// See #6233 for rationale and details.
861#[cfg(all(target_os = "linux", target_env = "gnu"))]
862unsafe fn min_stack_size(attr: *const libc::pthread_attr_t) -> usize {
863    // We use dlsym to avoid an ELF version dependency on GLIBC_PRIVATE. (#23628)
864    // We shouldn't really be using such an internal symbol, but there's currently
865    // no other way to account for the TLS size.
866    dlsym!(
867        fn __pthread_get_minstack(attr: *const libc::pthread_attr_t) -> libc::size_t;
868    );
869
870    match __pthread_get_minstack.get() {
871        None => libc::PTHREAD_STACK_MIN,
872        Some(f) => unsafe { f(attr) },
873    }
874}
875
876// No point in looking up __pthread_get_minstack() on non-glibc platforms.
877#[cfg(all(
878    not(all(target_os = "linux", target_env = "gnu")),
879    not(any(target_os = "netbsd", target_os = "nuttx"))
880))]
881unsafe fn min_stack_size(_: *const libc::pthread_attr_t) -> usize {
882    libc::PTHREAD_STACK_MIN
883}
884
885#[cfg(any(target_os = "netbsd", target_os = "nuttx"))]
886unsafe fn min_stack_size(_: *const libc::pthread_attr_t) -> usize {
887    static STACK: crate::sync::OnceLock<usize> = crate::sync::OnceLock::new();
888
889    *STACK.get_or_init(|| {
890        let mut stack = unsafe { libc::sysconf(libc::_SC_THREAD_STACK_MIN) };
891        if stack < 0 {
892            stack = 2048; // just a guess
893        }
894
895        stack as usize
896    })
897}