]> git.proxmox.com Git - pve-lxc-syscalld.git/blob - src/process/user_caps.rs
a3d6bcb0a8ecf157e820b5b09a64108e8cad9438
[pve-lxc-syscalld.git] / src / process / user_caps.rs
1 //! User and capability management.
2
3 use std::ffi::{OsStr, OsString};
4 use std::io;
5 use std::os::unix::ffi::OsStrExt;
6
7 use anyhow::Error;
8
9 use super::PidFd;
10 use crate::capability::Capabilities;
11
12 /// Helper to enter a process' permission-check environment.
13 ///
14 /// When we execute a syscall on behalf of another process, we should try to trigger as many
15 /// permission checks as we can. It is impractical to implement them all manually, so the best
16 /// thing to do is cause as many of them to happen on the kernel-side as we can.
17 ///
18 /// We start by entering the process' devices and v2 cgroup. As calls like `mknod()` may be
19 /// affected, and access to devices as well.
20 ///
21 /// Then we must enter the mount namespace, chroot and current working directory, in order to get
22 /// the correct view of paths.
23 ///
24 /// Next we copy the caller's `umask`.
25 ///
26 /// Then switch over our effective and file system uid and gid. This has 2 reasons: First, it means
27 /// we do not need to run `chown()` on files we create, secondly, the user may have dropped
28 /// `CAP_DAC_OVERRIDE` / `CAP_DAC_READ_SEARCH` which may have prevented the creation of the file in
29 /// the first place (for example, the container program may be a non-root executable with
30 /// `cap_mknod=ep` as file-capabilities, in which case we do not want a user to be allowed to run
31 /// `mknod()` on a path owned by different user (and checking file system permissions would
32 /// require us to handle ACLs, quotas, which are all file system tyep dependent as well, so better
33 /// leave all that up to the kernel, too!)).
34 ///
35 /// Next we clone the process' capability set. This is because the process may have dropped
36 /// capabilties which under normal conditions would prevent them from executing the syscall. For
37 /// example a process may be executing `mknod()` after having dropped `CAP_MKNOD`.
38 #[derive(Clone)]
39 #[must_use = "not using UserCaps may be a security issue"]
40 pub struct UserCaps<'a> {
41 pidfd: &'a PidFd,
42 apply_uids: bool,
43 euid: libc::uid_t,
44 egid: libc::gid_t,
45 fsuid: libc::uid_t,
46 fsgid: libc::gid_t,
47 capabilities: Capabilities,
48 umask: libc::mode_t,
49 cgroup_v1_devices: Option<OsString>,
50 cgroup_v2: Option<OsString>,
51 apparmor_profile: Option<OsString>,
52 }
53
54 impl UserCaps<'_> {
55 pub fn new(pidfd: &PidFd) -> Result<UserCaps, Error> {
56 let status = pidfd.get_status()?;
57 let cgroups = pidfd.get_cgroups()?;
58 let apparmor_profile = crate::apparmor::get_label(pidfd)?;
59
60 Ok(UserCaps {
61 pidfd,
62 apply_uids: true,
63 euid: status.uids.euid,
64 egid: status.uids.egid,
65 fsuid: status.uids.fsuid,
66 fsgid: status.uids.fsgid,
67 capabilities: status.capabilities,
68 umask: status.umask,
69 cgroup_v1_devices: cgroups.get("devices").map(|s| s.to_owned()),
70 cgroup_v2: cgroups.v2().map(|s| s.to_owned()),
71 apparmor_profile,
72 })
73 }
74
75 fn apply_cgroups(&self) -> io::Result<()> {
76 fn enter_cgroup(kind: &str, name: &OsStr) -> io::Result<()> {
77 let mut path = OsString::with_capacity(15 + kind.len() + name.len() + 13 + 1);
78 path.push(OsStr::from_bytes(b"/sys/fs/cgroup/"));
79 path.push(kind);
80 path.push(name);
81 path.push(OsStr::from_bytes(b"/cgroup.procs"));
82 std::fs::write(path, b"0")
83 }
84
85 if let Some(ref cg) = self.cgroup_v1_devices {
86 enter_cgroup("devices/", cg)?;
87 }
88
89 if let Some(ref cg) = self.cgroup_v2 {
90 enter_cgroup("unified/", cg)?;
91 }
92
93 Ok(())
94 }
95
96 fn apply_user_caps(&self) -> io::Result<()> {
97 use crate::capability::SecureBits;
98 if self.apply_uids {
99 unsafe {
100 libc::umask(self.umask);
101 }
102 let mut secbits = SecureBits::get_current()?;
103 secbits |= SecureBits::KEEP_CAPS | SecureBits::NO_SETUID_FIXUP;
104 secbits.apply()?;
105 c_try!(unsafe { libc::setegid(self.egid) });
106 c_try!(unsafe { libc::setfsgid(self.fsgid) });
107 c_try!(unsafe { libc::seteuid(self.euid) });
108 c_try!(unsafe { libc::setfsuid(self.fsuid) });
109 }
110 self.capabilities.capset()?;
111 Ok(())
112 }
113
114 pub fn disable_uid_change(&mut self) {
115 self.apply_uids = false;
116 }
117
118 pub fn disable_cgroup_change(&mut self) {
119 self.cgroup_v1_devices = None;
120 self.cgroup_v2 = None;
121 }
122
123 pub fn apply(self, own_pidfd: &PidFd) -> io::Result<()> {
124 self.apply_cgroups()?;
125 self.pidfd.mount_namespace()?.setns()?;
126 self.pidfd.enter_chroot()?;
127 self.pidfd.enter_cwd()?;
128 if let Some(ref label) = self.apparmor_profile {
129 crate::apparmor::set_label(own_pidfd, label)?;
130 }
131 self.apply_user_caps()?;
132 Ok(())
133 }
134 }