]> git.proxmox.com Git - pve-lxc-syscalld.git/blame - src/lxcseccomp.rs
macros are namespaced differently now
[pve-lxc-syscalld.git] / src / lxcseccomp.rs
CommitLineData
9cffeac4
WB
1//! Module for LXC specific related seccomp handling.
2
3use std::convert::TryFrom;
c95be5f6 4use std::ffi::CString;
7470f14b 5use std::os::raw::{c_int, c_uint};
c95be5f6 6use std::os::unix::fs::FileExt;
512f780a 7use std::os::unix::io::RawFd;
c95be5f6 8use std::{io, mem};
9cffeac4 9
512f780a 10use failure::{bail, format_err, Error};
9aa2a15a 11use io_uring::socket::SeqPacketSocket;
41ff6d28 12use lazy_static::lazy_static;
9cffeac4 13use libc::pid_t;
937921aa 14use nix::errno::Errno;
9cffeac4 15
c95be5f6 16use crate::pidfd::PidFd;
e420f6f9 17use crate::seccomp::{SeccompNotif, SeccompNotifResp, SeccompNotifSizes};
512f780a 18use crate::tools::{Fd, FromFd, IoVec, IoVecMut};
9cffeac4
WB
19
20/// Seccomp notification proxy message sent by the lxc monitor.
21///
22/// Whenever a process in a container triggers a seccomp notification, and lxc has a seccomp
23/// notification proxy configured, this is sent over to the proxy, together with a `SeccompNotif`,
24/// `SeccompNotifResp` and a cookie.
25///
26/// Using this struct may be inconvenient. See the [`ProxyMessageBuffer`] for a convenient helper
27/// for communcation.
28#[repr(C)]
29pub struct SeccompNotifyProxyMsg {
30 /// Reserved data must be zero.
31 reserved0: u64,
32
33 /// The lxc monitor pid.
34 ///
35 /// Unless some other proxy forwards proxy messages, this should be the same pid as the peer
36 /// we receive this message from.
37 monitor_pid: pid_t,
38
39 /// The container's init pid.
40 ///
41 /// If supported by the kernel, the lxc monitor should keep a pidfd open to this process, so
42 /// this pid should be valid as long as `monitor_pid` is valid.
43 init_pid: pid_t,
44
45 /// Information about the seccomp structure sizes.
46 ///
47 /// This must be equal to `SeccompNotifSizes::get()`, otherwise the proxy and lxc monitor have
48 /// inconsistent views of the kernel's seccomp API.
49 sizes: SeccompNotifSizes,
50
51 /// The length of the container's configured `lxc.seccomp.notify.cookie` value.
52 cookie_len: u64,
53}
54
55/// Helper to receive and verify proxy notification messages.
9cffeac4 56pub struct ProxyMessageBuffer {
571dbe03
WB
57 proxy_msg: SeccompNotifyProxyMsg,
58 seccomp_notif: SeccompNotif,
59 seccomp_resp: SeccompNotifResp,
60 cookie_buf: Vec<u8>,
61
9cffeac4
WB
62 sizes: SeccompNotifSizes,
63 seccomp_packet_size: usize,
41214ae2 64
c95be5f6
WB
65 pid_fd: Option<PidFd>,
66 mem_fd: Option<std::fs::File>,
9cffeac4
WB
67}
68
571dbe03
WB
69unsafe fn io_vec_mut<T>(value: &mut T) -> IoVecMut {
70 IoVecMut::new(std::slice::from_raw_parts_mut(
71 value as *mut T as *mut u8,
72 mem::size_of::<T>(),
73 ))
74}
75
76unsafe fn io_vec<T>(value: &T) -> IoVec {
77 IoVec::new(std::slice::from_raw_parts(
78 value as *const T as *const u8,
79 mem::size_of::<T>(),
80 ))
81}
82
41ff6d28 83lazy_static! {
e420f6f9
WB
84 static ref SECCOMP_SIZES: SeccompNotifSizes = SeccompNotifSizes::get_checked()
85 .map_err(|e| panic!("{}\nrefusing to run", e))
86 .unwrap();
41ff6d28
WB
87}
88
9cffeac4
WB
89impl ProxyMessageBuffer {
90 /// Allocate a new proxy message buffer with a specific maximum cookie size.
e420f6f9 91 pub fn new(max_cookie: usize) -> Self {
41ff6d28 92 let sizes = SECCOMP_SIZES.clone();
571dbe03 93
9cffeac4
WB
94 let seccomp_packet_size = mem::size_of::<SeccompNotifyProxyMsg>()
95 + sizes.notif as usize
96 + sizes.notif_resp as usize;
571dbe03 97
e420f6f9 98 Self {
571dbe03
WB
99 proxy_msg: unsafe { mem::zeroed() },
100 seccomp_notif: unsafe { mem::zeroed() },
101 seccomp_resp: unsafe { mem::zeroed() },
102 cookie_buf: unsafe { super::tools::vec::uninitialized(max_cookie) },
9cffeac4
WB
103 sizes,
104 seccomp_packet_size,
41214ae2
WB
105 pid_fd: None,
106 mem_fd: None,
e420f6f9 107 }
9cffeac4
WB
108 }
109
1349eed4
WB
110 fn reset(&mut self) {
111 self.proxy_msg.cookie_len = 0;
112 self.mem_fd = None;
113 self.pid_fd = None;
114 }
115
0e2d0fa2 116 /// Returns None on EOF.
9aa2a15a 117 pub async fn recv(&mut self, socket: &SeqPacketSocket) -> Result<bool, Error> {
1349eed4 118 self.reset();
571dbe03
WB
119
120 unsafe {
121 self.cookie_buf.set_len(self.cookie_buf.capacity());
122 }
123
0e2d0fa2 124 let mut iovec = [
571dbe03
WB
125 unsafe { io_vec_mut(&mut self.proxy_msg) },
126 unsafe { io_vec_mut(&mut self.seccomp_notif) },
127 unsafe { io_vec_mut(&mut self.seccomp_resp) },
128 IoVecMut::new(self.cookie_buf.as_mut_slice()),
129 ];
9cffeac4 130
9cffeac4 131 unsafe {
571dbe03 132 self.cookie_buf.set_len(0);
9cffeac4 133 }
571dbe03 134
41214ae2
WB
135 let (size, fds) = socket.recv_fds_vectored(&mut iovec, 2).await?;
136 if size == 0 {
137 return Ok(false);
138 }
139
0e2d0fa2
WB
140 self.set_len(size)?;
141
41214ae2 142 let mut fds = fds.into_iter();
512f780a
WB
143 let pid_fd = unsafe {
144 PidFd::try_from_fd(
145 fds.next()
146 .ok_or_else(|| format_err!("lxc seccomp message without pidfd"))?,
147 )?
148 };
149 let mem_fd = fds
c95be5f6 150 .next()
512f780a
WB
151 .ok_or_else(|| format_err!("lxc seccomp message without memfd"))?;
152
153 self.pid_fd = Some(pid_fd);
154 self.mem_fd = Some(std::fs::File::from_fd(mem_fd));
41214ae2
WB
155
156 Ok(true)
157 }
158
c95be5f6
WB
159 /// Get the process' pidfd.
160 ///
161 /// Note that the message must be valid, otherwise this panics!
162 pub fn pid_fd(&self) -> &PidFd {
163 self.pid_fd.as_ref().unwrap()
164 }
165
166 /// Get the process' mem fd.
167 ///
168 /// Note that this returns a non-mut trait object. This is because positional I/O does not need
169 /// mutable self and the standard library correctly represents this in its `FileExt` trait!
170 ///
171 /// Note that the message must be valid, otherwise this panics!
172 pub fn mem_fd(&self) -> &dyn FileExt {
173 self.mem_fd.as_ref().unwrap()
174 }
175
0e2d0fa2 176 /// Send the current data as response.
9aa2a15a 177 pub async fn respond(&mut self, socket: &SeqPacketSocket) -> io::Result<()> {
0e2d0fa2 178 let iov = [
571dbe03
WB
179 unsafe { io_vec(&self.proxy_msg) },
180 unsafe { io_vec(&self.seccomp_notif) },
181 unsafe { io_vec(&self.seccomp_resp) },
0e2d0fa2 182 ];
9aa2a15a
WB
183 let len = iov.iter().map(|e| e.len()).sum();
184 if socket.sendmsg_vectored(&iov, &[]).await? != len {
185 io_bail!("truncated message?");
186 }
187 Ok(())
9cffeac4
WB
188 }
189
190 #[inline]
191 fn prepare_response(&mut self) {
192 let id = self.request().id;
193 let resp = self.response_mut();
194 resp.id = id;
195 resp.val = -1;
196 resp.error = -libc::ENOSYS;
197 resp.flags = 0;
198 }
199
34f32e25
WB
200 /// Called by recv() after the callback returned the new size. This verifies that there's
201 /// enough data available.
202 fn set_len(&mut self, len: usize) -> Result<(), Error> {
571dbe03
WB
203 if len < self.seccomp_packet_size {
204 bail!("seccomp proxy message too short");
9cffeac4
WB
205 }
206
571dbe03
WB
207 if self.proxy_msg.reserved0 != 0 {
208 bail!("reserved data wasn't 0, liblxc secocmp notify protocol mismatch");
209 }
210
211 if !self.check_sizes() {
9cffeac4
WB
212 bail!("seccomp proxy message content size validation failed");
213 }
214
571dbe03
WB
215 if len - self.seccomp_packet_size > self.cookie_buf.capacity() {
216 bail!("seccomp proxy message too long");
217 }
218
219 let cookie_len = match usize::try_from(self.proxy_msg.cookie_len) {
220 Ok(cl) => cl,
221 Err(_) => {
222 self.proxy_msg.cookie_len = 0;
223 bail!("cookie length exceeds our size type!");
224 }
225 };
226
227 if len != self.seccomp_packet_size + cookie_len {
52f50bd4
WB
228 bail!(
229 "seccomp proxy packet contains unexpected cookie length {} + {} != {}",
230 self.seccomp_packet_size,
571dbe03 231 cookie_len,
52f50bd4
WB
232 len
233 );
9cffeac4
WB
234 }
235
236 unsafe {
571dbe03 237 self.cookie_buf.set_len(cookie_len);
9cffeac4
WB
238 }
239
240 self.prepare_response();
241
242 Ok(())
243 }
244
571dbe03
WB
245 fn check_sizes(&self) -> bool {
246 let got = self.proxy_msg.sizes.clone();
9cffeac4
WB
247 got.notif == self.sizes.notif
248 && got.notif_resp == self.sizes.notif_resp
249 && got.data == self.sizes.data
250 }
251
9cffeac4
WB
252 /// Get the monitor pid from the current message.
253 ///
254 /// There's no guarantee that the pid is valid.
a0d68fed 255 #[inline]
9cffeac4 256 pub fn monitor_pid(&self) -> pid_t {
571dbe03 257 self.proxy_msg.monitor_pid
9cffeac4
WB
258 }
259
260 /// Get the container's init pid from the current message.
261 ///
262 /// There's no guarantee that the pid is valid.
a0d68fed 263 #[inline]
9cffeac4 264 pub fn init_pid(&self) -> pid_t {
571dbe03 265 self.proxy_msg.init_pid
9cffeac4
WB
266 }
267
268 /// Get the syscall request structure of this message.
a0d68fed 269 #[inline]
9cffeac4 270 pub fn request(&self) -> &SeccompNotif {
571dbe03 271 &self.seccomp_notif
9cffeac4
WB
272 }
273
274 /// Access the response buffer of this message.
a0d68fed 275 #[inline]
9cffeac4 276 pub fn response_mut(&mut self) -> &mut SeccompNotifResp {
571dbe03 277 &mut self.seccomp_resp
9cffeac4
WB
278 }
279
280 /// Get the cookie's length.
a0d68fed 281 #[inline]
9cffeac4 282 pub fn cookie_len(&self) -> usize {
571dbe03 283 usize::try_from(self.proxy_msg.cookie_len).expect("cookie size should fit in an usize")
9cffeac4
WB
284 }
285
286 /// Get the cookie sent along with this message.
a0d68fed 287 #[inline]
9cffeac4 288 pub fn cookie(&self) -> &[u8] {
571dbe03 289 &self.cookie_buf
9cffeac4 290 }
c95be5f6 291
937921aa 292 /// Shortcut to get a parameter value.
c95be5f6 293 #[inline]
937921aa 294 fn arg(&self, arg: u32) -> Result<u64, Error> {
c95be5f6
WB
295 self.request()
296 .data
297 .args
298 .get(arg as usize)
9486338a 299 .copied()
c95be5f6
WB
300 .ok_or_else(|| nix::errno::Errno::ERANGE.into())
301 }
302
7970b0ea
WB
303 /// Get a parameter as C String where the pointer may be `NULL`.
304 ///
305 /// Strings are limited to 4k bytes currently.
a0d68fed 306 #[inline]
7970b0ea
WB
307 pub fn arg_opt_c_string(&self, arg: u32) -> Result<Option<CString>, Error> {
308 let offset = self.arg(arg)?;
309 if offset == 0 {
310 Ok(None)
311 } else {
312 Ok(Some(crate::syscall::get_c_string(self, offset)?))
313 }
314 }
315
c95be5f6
WB
316 /// Get a parameter as C String.
317 ///
318 /// Strings are limited to 4k bytes currently.
a0d68fed 319 #[inline]
c95be5f6 320 pub fn arg_c_string(&self, arg: u32) -> Result<CString, Error> {
7970b0ea
WB
321 self.arg_opt_c_string(arg)?
322 .ok_or_else(|| Errno::EINVAL.into())
937921aa
WB
323 }
324
7470f14b
WB
325 /// Read a user space pointer parameter.
326 #[inline]
1349eed4 327 pub fn arg_struct_by_ptr<T>(&self, arg: u32) -> Result<T, Error> {
7470f14b
WB
328 let offset = self.arg(arg)?;
329 let mut data: T = unsafe { mem::zeroed() };
330 let slice = unsafe {
331 std::slice::from_raw_parts_mut(&mut data as *mut _ as *mut u8, mem::size_of::<T>())
332 };
333 let got = self.mem_fd().read_at(slice, offset)?;
334 if got != mem::size_of::<T>() {
335 Err(Errno::EINVAL.into())
336 } else {
337 Ok(data)
338 }
339 }
340
341 /// Read a user space pointer parameter.
342 #[inline]
343 pub fn mem_write_struct<T>(&self, offset: u64, data: &T) -> io::Result<()> {
344 let slice = unsafe {
345 std::slice::from_raw_parts(data as *const T as *const u8, mem::size_of::<T>())
346 };
347 let got = self.mem_fd().write_at(slice, offset)?;
348 if got != mem::size_of::<T>() {
349 Err(Errno::EINVAL.into())
350 } else {
351 Ok(())
352 }
353 }
354
937921aa 355 /// Checked way to get a `mode_t` argument.
a0d68fed 356 #[inline]
61bfa355
WB
357 pub fn arg_mode_t(&self, arg: u32) -> Result<nix::sys::stat::mode_t, Error> {
358 nix::sys::stat::mode_t::try_from(self.arg(arg)?).map_err(|_| Error::from(Errno::EINVAL))
937921aa
WB
359 }
360
361 /// Checked way to get a `dev_t` argument.
a0d68fed 362 #[inline]
937921aa
WB
363 pub fn arg_dev_t(&self, arg: u32) -> Result<nix::sys::stat::dev_t, Error> {
364 nix::sys::stat::dev_t::try_from(self.arg(arg)?).map_err(|_| Errno::EINVAL.into())
365 }
366
367 /// Checked way to get a file descriptor argument.
a0d68fed 368 #[inline]
937921aa
WB
369 pub fn arg_fd(&self, arg: u32, flags: c_int) -> Result<Fd, Error> {
370 let fd = RawFd::try_from(self.arg(arg)?).map_err(|_| Error::from(Errno::EINVAL))?;
371 if fd == libc::AT_FDCWD {
372 Ok(self.pid_fd().fd_cwd()?)
373 } else {
374 Ok(self.pid_fd().fd_num(fd, flags)?)
375 }
c95be5f6 376 }
7970b0ea 377
7470f14b
WB
378 /// Checked way to get a c_uint argument.
379 #[inline]
380 pub fn arg_uint(&self, arg: u32) -> Result<c_uint, Error> {
381 c_uint::try_from(self.arg(arg)?).map_err(|_| Errno::EINVAL.into())
382 }
383
7970b0ea 384 /// Checked way to get a c_int argument.
a0d68fed 385 #[inline]
7970b0ea 386 pub fn arg_int(&self, arg: u32) -> Result<c_int, Error> {
7470f14b 387 self.arg_uint(arg).map(|u| u as c_int)
7970b0ea
WB
388 }
389
390 /// Checked way to get a `caddr_t` argument.
a0d68fed
WB
391 #[inline]
392 pub fn arg_caddr_t(&self, arg: u32) -> Result<*mut i8, Error> {
393 Ok(self.arg(arg)? as *mut i8)
394 }
395
396 /// Checked way to get a raw pointer argument
397 #[inline]
398 pub fn arg_pointer(&self, arg: u32) -> Result<*const u8, Error> {
399 Ok(self.arg(arg)? as usize as *const u8)
400 }
401
402 /// Checked way to get a raw char pointer.
403 #[inline]
404 pub fn arg_char_ptr(&self, arg: u32) -> Result<*const libc::c_char, Error> {
405 Ok(self.arg(arg)? as usize as *const libc::c_char)
7970b0ea 406 }
9cffeac4 407}