]>
Commit | Line | Data |
---|---|---|
3bbd1db0 WB |
1 | //! User and capability management. |
2 | ||
3 | use std::ffi::{OsStr, OsString}; | |
4 | use std::io; | |
5 | use std::os::unix::ffi::OsStrExt; | |
6 | ||
8150a439 | 7 | use anyhow::Error; |
3bbd1db0 WB |
8 | |
9 | use super::PidFd; | |
10 | use crate::capability::Capabilities; | |
11 | ||
12 | /// Helper to enter a process' permission-check environment. | |
13 | /// | |
14 | /// When we execute a syscall on behalf of another process, we should try to trigger as many | |
15 | /// permission checks as we can. It is impractical to implement them all manually, so the best | |
16 | /// thing to do is cause as many of them to happen on the kernel-side as we can. | |
17 | /// | |
18 | /// We start by entering the process' devices and v2 cgroup. As calls like `mknod()` may be | |
19 | /// affected, and access to devices as well. | |
20 | /// | |
21 | /// Then we must enter the mount namespace, chroot and current working directory, in order to get | |
22 | /// the correct view of paths. | |
23 | /// | |
24 | /// Next we copy the caller's `umask`. | |
25 | /// | |
26 | /// Then switch over our effective and file system uid and gid. This has 2 reasons: First, it means | |
27 | /// we do not need to run `chown()` on files we create, secondly, the user may have dropped | |
28 | /// `CAP_DAC_OVERRIDE` / `CAP_DAC_READ_SEARCH` which may have prevented the creation of the file in | |
29 | /// the first place (for example, the container program may be a non-root executable with | |
30 | /// `cap_mknod=ep` as file-capabilities, in which case we do not want a user to be allowed to run | |
31 | /// `mknod()` on a path owned by different user (and checking file system permissions would | |
32 | /// require us to handle ACLs, quotas, which are all file system tyep dependent as well, so better | |
33 | /// leave all that up to the kernel, too!)). | |
34 | /// | |
35 | /// Next we clone the process' capability set. This is because the process may have dropped | |
36 | /// capabilties which under normal conditions would prevent them from executing the syscall. For | |
37 | /// example a process may be executing `mknod()` after having dropped `CAP_MKNOD`. | |
38 | #[derive(Clone)] | |
39 | #[must_use = "not using UserCaps may be a security issue"] | |
40 | pub struct UserCaps<'a> { | |
41 | pidfd: &'a PidFd, | |
42 | apply_uids: bool, | |
43 | euid: libc::uid_t, | |
44 | egid: libc::gid_t, | |
45 | fsuid: libc::uid_t, | |
46 | fsgid: libc::gid_t, | |
47 | capabilities: Capabilities, | |
48 | umask: libc::mode_t, | |
49 | cgroup_v1_devices: Option<OsString>, | |
fe73c2fb | 50 | cgroup_v2_base: &'static str, |
3bbd1db0 WB |
51 | cgroup_v2: Option<OsString>, |
52 | apparmor_profile: Option<OsString>, | |
53 | } | |
54 | ||
55 | impl UserCaps<'_> { | |
56 | pub fn new(pidfd: &PidFd) -> Result<UserCaps, Error> { | |
57 | let status = pidfd.get_status()?; | |
58 | let cgroups = pidfd.get_cgroups()?; | |
59 | let apparmor_profile = crate::apparmor::get_label(pidfd)?; | |
60 | ||
61 | Ok(UserCaps { | |
62 | pidfd, | |
63 | apply_uids: true, | |
64 | euid: status.uids.euid, | |
65 | egid: status.uids.egid, | |
66 | fsuid: status.uids.fsuid, | |
67 | fsgid: status.uids.fsgid, | |
68 | capabilities: status.capabilities, | |
69 | umask: status.umask, | |
70 | cgroup_v1_devices: cgroups.get("devices").map(|s| s.to_owned()), | |
fe73c2fb | 71 | cgroup_v2_base: if cgroups.has_v1() { "unified/" } else { "" }, |
3bbd1db0 WB |
72 | cgroup_v2: cgroups.v2().map(|s| s.to_owned()), |
73 | apparmor_profile, | |
74 | }) | |
75 | } | |
76 | ||
77 | fn apply_cgroups(&self) -> io::Result<()> { | |
fe73c2fb WB |
78 | // FIXME: Handle `kind` taking /proc/self/mountinfo into account instead of assuming |
79 | // "unified/" | |
3bbd1db0 WB |
80 | fn enter_cgroup(kind: &str, name: &OsStr) -> io::Result<()> { |
81 | let mut path = OsString::with_capacity(15 + kind.len() + name.len() + 13 + 1); | |
82 | path.push(OsStr::from_bytes(b"/sys/fs/cgroup/")); | |
83 | path.push(kind); | |
84 | path.push(name); | |
85 | path.push(OsStr::from_bytes(b"/cgroup.procs")); | |
86 | std::fs::write(path, b"0") | |
87 | } | |
88 | ||
89 | if let Some(ref cg) = self.cgroup_v1_devices { | |
90 | enter_cgroup("devices/", cg)?; | |
91 | } | |
92 | ||
93 | if let Some(ref cg) = self.cgroup_v2 { | |
fe73c2fb | 94 | enter_cgroup(self.cgroup_v2_base, cg)?; |
3bbd1db0 WB |
95 | } |
96 | ||
97 | Ok(()) | |
98 | } | |
99 | ||
100 | fn apply_user_caps(&self) -> io::Result<()> { | |
101 | use crate::capability::SecureBits; | |
102 | if self.apply_uids { | |
103 | unsafe { | |
104 | libc::umask(self.umask); | |
105 | } | |
106 | let mut secbits = SecureBits::get_current()?; | |
107 | secbits |= SecureBits::KEEP_CAPS | SecureBits::NO_SETUID_FIXUP; | |
108 | secbits.apply()?; | |
109 | c_try!(unsafe { libc::setegid(self.egid) }); | |
110 | c_try!(unsafe { libc::setfsgid(self.fsgid) }); | |
111 | c_try!(unsafe { libc::seteuid(self.euid) }); | |
112 | c_try!(unsafe { libc::setfsuid(self.fsuid) }); | |
113 | } | |
114 | self.capabilities.capset()?; | |
115 | Ok(()) | |
116 | } | |
117 | ||
118 | pub fn disable_uid_change(&mut self) { | |
119 | self.apply_uids = false; | |
120 | } | |
121 | ||
122 | pub fn disable_cgroup_change(&mut self) { | |
123 | self.cgroup_v1_devices = None; | |
124 | self.cgroup_v2 = None; | |
125 | } | |
126 | ||
127 | pub fn apply(self, own_pidfd: &PidFd) -> io::Result<()> { | |
128 | self.apply_cgroups()?; | |
129 | self.pidfd.mount_namespace()?.setns()?; | |
130 | self.pidfd.enter_chroot()?; | |
131 | self.pidfd.enter_cwd()?; | |
132 | if let Some(ref label) = self.apparmor_profile { | |
133 | crate::apparmor::set_label(own_pidfd, label)?; | |
134 | } | |
135 | self.apply_user_caps()?; | |
136 | Ok(()) | |
137 | } | |
138 | } |