]> git.proxmox.com Git - pve-lxc-syscalld.git/blame - src/lxcseccomp.rs
replace failure with anyhow
[pve-lxc-syscalld.git] / src / lxcseccomp.rs
CommitLineData
f42e0795 1//! Module for LXC specific seccomp handling.
9cffeac4
WB
2
3use std::convert::TryFrom;
c95be5f6 4use std::ffi::CString;
7470f14b 5use std::os::raw::{c_int, c_uint};
c95be5f6 6use std::os::unix::fs::FileExt;
2477831e 7use std::os::unix::io::{FromRawFd, RawFd};
c95be5f6 8use std::{io, mem};
9cffeac4 9
8150a439 10use anyhow::{bail, format_err, Error};
41ff6d28 11use lazy_static::lazy_static;
9cffeac4 12use libc::pid_t;
937921aa 13use nix::errno::Errno;
9cffeac4 14
8dd26985 15use crate::io::cmsg;
43b5e594 16use crate::io::iovec::{IoVec, IoVecMut};
8dd26985 17use crate::io::seq_packet::SeqPacketSocket;
3bbd1db0 18use crate::process::PidFd;
e420f6f9 19use crate::seccomp::{SeccompNotif, SeccompNotifResp, SeccompNotifSizes};
8dd26985 20use crate::tools::{Fd, FromFd};
9cffeac4
WB
21
22/// Seccomp notification proxy message sent by the lxc monitor.
23///
24/// Whenever a process in a container triggers a seccomp notification, and lxc has a seccomp
25/// notification proxy configured, this is sent over to the proxy, together with a `SeccompNotif`,
26/// `SeccompNotifResp` and a cookie.
27///
28/// Using this struct may be inconvenient. See the [`ProxyMessageBuffer`] for a convenient helper
29/// for communcation.
30#[repr(C)]
31pub struct SeccompNotifyProxyMsg {
32 /// Reserved data must be zero.
33 reserved0: u64,
34
35 /// The lxc monitor pid.
36 ///
37 /// Unless some other proxy forwards proxy messages, this should be the same pid as the peer
38 /// we receive this message from.
39 monitor_pid: pid_t,
40
41 /// The container's init pid.
42 ///
43 /// If supported by the kernel, the lxc monitor should keep a pidfd open to this process, so
44 /// this pid should be valid as long as `monitor_pid` is valid.
45 init_pid: pid_t,
46
47 /// Information about the seccomp structure sizes.
48 ///
49 /// This must be equal to `SeccompNotifSizes::get()`, otherwise the proxy and lxc monitor have
50 /// inconsistent views of the kernel's seccomp API.
51 sizes: SeccompNotifSizes,
52
53 /// The length of the container's configured `lxc.seccomp.notify.cookie` value.
54 cookie_len: u64,
55}
56
57/// Helper to receive and verify proxy notification messages.
9cffeac4 58pub struct ProxyMessageBuffer {
571dbe03
WB
59 proxy_msg: SeccompNotifyProxyMsg,
60 seccomp_notif: SeccompNotif,
61 seccomp_resp: SeccompNotifResp,
62 cookie_buf: Vec<u8>,
63
9cffeac4
WB
64 sizes: SeccompNotifSizes,
65 seccomp_packet_size: usize,
41214ae2 66
c95be5f6
WB
67 pid_fd: Option<PidFd>,
68 mem_fd: Option<std::fs::File>,
9cffeac4
WB
69}
70
571dbe03
WB
71unsafe fn io_vec_mut<T>(value: &mut T) -> IoVecMut {
72 IoVecMut::new(std::slice::from_raw_parts_mut(
73 value as *mut T as *mut u8,
74 mem::size_of::<T>(),
75 ))
76}
77
78unsafe fn io_vec<T>(value: &T) -> IoVec {
79 IoVec::new(std::slice::from_raw_parts(
80 value as *const T as *const u8,
81 mem::size_of::<T>(),
82 ))
83}
84
41ff6d28 85lazy_static! {
e420f6f9
WB
86 static ref SECCOMP_SIZES: SeccompNotifSizes = SeccompNotifSizes::get_checked()
87 .map_err(|e| panic!("{}\nrefusing to run", e))
88 .unwrap();
41ff6d28
WB
89}
90
9cffeac4
WB
91impl ProxyMessageBuffer {
92 /// Allocate a new proxy message buffer with a specific maximum cookie size.
e420f6f9 93 pub fn new(max_cookie: usize) -> Self {
41ff6d28 94 let sizes = SECCOMP_SIZES.clone();
571dbe03 95
9cffeac4
WB
96 let seccomp_packet_size = mem::size_of::<SeccompNotifyProxyMsg>()
97 + sizes.notif as usize
98 + sizes.notif_resp as usize;
571dbe03 99
e420f6f9 100 Self {
571dbe03
WB
101 proxy_msg: unsafe { mem::zeroed() },
102 seccomp_notif: unsafe { mem::zeroed() },
103 seccomp_resp: unsafe { mem::zeroed() },
104 cookie_buf: unsafe { super::tools::vec::uninitialized(max_cookie) },
9cffeac4
WB
105 sizes,
106 seccomp_packet_size,
41214ae2
WB
107 pid_fd: None,
108 mem_fd: None,
e420f6f9 109 }
9cffeac4
WB
110 }
111
1349eed4
WB
112 fn reset(&mut self) {
113 self.proxy_msg.cookie_len = 0;
114 self.mem_fd = None;
115 self.pid_fd = None;
116 }
117
f42e0795 118 /// Returns false on EOF.
9aa2a15a 119 pub async fn recv(&mut self, socket: &SeqPacketSocket) -> Result<bool, Error> {
f42e0795 120 // prepare buffers:
1349eed4 121 self.reset();
571dbe03
WB
122
123 unsafe {
124 self.cookie_buf.set_len(self.cookie_buf.capacity());
125 }
126
0e2d0fa2 127 let mut iovec = [
571dbe03
WB
128 unsafe { io_vec_mut(&mut self.proxy_msg) },
129 unsafe { io_vec_mut(&mut self.seccomp_notif) },
130 unsafe { io_vec_mut(&mut self.seccomp_resp) },
131 IoVecMut::new(self.cookie_buf.as_mut_slice()),
132 ];
9cffeac4 133
9cffeac4 134 unsafe {
571dbe03 135 self.cookie_buf.set_len(0);
9cffeac4 136 }
571dbe03 137
f42e0795 138 // receive:
8dd26985
WB
139 let mut fd_cmsg_buf = cmsg::buffer::<[RawFd; 2]>();
140 let (datalen, cmsglen) = socket
141 .recvmsg_vectored(&mut iovec, &mut fd_cmsg_buf)
2477831e
WB
142 .await?;
143
8dd26985 144 if datalen == 0 {
41214ae2
WB
145 return Ok(false);
146 }
147
8dd26985 148 self.set_len(datalen)?;
2477831e 149
f42e0795
WB
150 // iterate through control messages:
151
8dd26985 152 let cmsg = cmsg::iter(&fd_cmsg_buf[..cmsglen])
2477831e
WB
153 .next()
154 .ok_or_else(|| format_err!("missing file descriptors in message"))?;
155
156 if cmsg.cmsg_level != libc::SOL_SOCKET && cmsg.cmsg_type != libc::SCM_RIGHTS {
157 bail!("expected SCM_RIGHTS control message");
158 }
159
160 let fds: Vec<Fd> = cmsg
161 .data
162 .chunks_exact(mem::size_of::<RawFd>())
92eface0
WB
163 .map(|chunk| unsafe {
164 // clippy bug
165 #[allow(clippy::cast_ptr_alignment)]
166 Fd::from_raw_fd(std::ptr::read_unaligned(chunk.as_ptr() as _))
167 })
2477831e
WB
168 .collect();
169
170 if fds.len() != 2 {
171 bail!("expected exactly 2 file descriptors in control message");
172 }
0e2d0fa2 173
41214ae2 174 let mut fds = fds.into_iter();
512f780a
WB
175 let pid_fd = unsafe {
176 PidFd::try_from_fd(
177 fds.next()
178 .ok_or_else(|| format_err!("lxc seccomp message without pidfd"))?,
179 )?
180 };
181 let mem_fd = fds
c95be5f6 182 .next()
512f780a
WB
183 .ok_or_else(|| format_err!("lxc seccomp message without memfd"))?;
184
185 self.pid_fd = Some(pid_fd);
186 self.mem_fd = Some(std::fs::File::from_fd(mem_fd));
41214ae2
WB
187
188 Ok(true)
189 }
190
c95be5f6
WB
191 /// Get the process' pidfd.
192 ///
193 /// Note that the message must be valid, otherwise this panics!
194 pub fn pid_fd(&self) -> &PidFd {
195 self.pid_fd.as_ref().unwrap()
196 }
197
198 /// Get the process' mem fd.
199 ///
200 /// Note that this returns a non-mut trait object. This is because positional I/O does not need
201 /// mutable self and the standard library correctly represents this in its `FileExt` trait!
202 ///
203 /// Note that the message must be valid, otherwise this panics!
204 pub fn mem_fd(&self) -> &dyn FileExt {
205 self.mem_fd.as_ref().unwrap()
206 }
207
0e2d0fa2 208 /// Send the current data as response.
9aa2a15a 209 pub async fn respond(&mut self, socket: &SeqPacketSocket) -> io::Result<()> {
0e2d0fa2 210 let iov = [
571dbe03
WB
211 unsafe { io_vec(&self.proxy_msg) },
212 unsafe { io_vec(&self.seccomp_notif) },
213 unsafe { io_vec(&self.seccomp_resp) },
0e2d0fa2 214 ];
9aa2a15a 215 let len = iov.iter().map(|e| e.len()).sum();
8dd26985 216 if socket.sendmsg_vectored(&iov).await? != len {
9aa2a15a
WB
217 io_bail!("truncated message?");
218 }
219 Ok(())
9cffeac4
WB
220 }
221
222 #[inline]
223 fn prepare_response(&mut self) {
224 let id = self.request().id;
225 let resp = self.response_mut();
226 resp.id = id;
227 resp.val = -1;
228 resp.error = -libc::ENOSYS;
229 resp.flags = 0;
230 }
231
34f32e25
WB
232 /// Called by recv() after the callback returned the new size. This verifies that there's
233 /// enough data available.
234 fn set_len(&mut self, len: usize) -> Result<(), Error> {
571dbe03
WB
235 if len < self.seccomp_packet_size {
236 bail!("seccomp proxy message too short");
9cffeac4
WB
237 }
238
571dbe03
WB
239 if self.proxy_msg.reserved0 != 0 {
240 bail!("reserved data wasn't 0, liblxc secocmp notify protocol mismatch");
241 }
242
243 if !self.check_sizes() {
9cffeac4
WB
244 bail!("seccomp proxy message content size validation failed");
245 }
246
571dbe03
WB
247 if len - self.seccomp_packet_size > self.cookie_buf.capacity() {
248 bail!("seccomp proxy message too long");
249 }
250
251 let cookie_len = match usize::try_from(self.proxy_msg.cookie_len) {
252 Ok(cl) => cl,
253 Err(_) => {
254 self.proxy_msg.cookie_len = 0;
255 bail!("cookie length exceeds our size type!");
256 }
257 };
258
259 if len != self.seccomp_packet_size + cookie_len {
52f50bd4
WB
260 bail!(
261 "seccomp proxy packet contains unexpected cookie length {} + {} != {}",
262 self.seccomp_packet_size,
571dbe03 263 cookie_len,
52f50bd4
WB
264 len
265 );
9cffeac4
WB
266 }
267
268 unsafe {
571dbe03 269 self.cookie_buf.set_len(cookie_len);
9cffeac4
WB
270 }
271
272 self.prepare_response();
273
274 Ok(())
275 }
276
571dbe03
WB
277 fn check_sizes(&self) -> bool {
278 let got = self.proxy_msg.sizes.clone();
9cffeac4
WB
279 got.notif == self.sizes.notif
280 && got.notif_resp == self.sizes.notif_resp
281 && got.data == self.sizes.data
282 }
283
9cffeac4
WB
284 /// Get the monitor pid from the current message.
285 ///
286 /// There's no guarantee that the pid is valid.
a0d68fed 287 #[inline]
9cffeac4 288 pub fn monitor_pid(&self) -> pid_t {
571dbe03 289 self.proxy_msg.monitor_pid
9cffeac4
WB
290 }
291
292 /// Get the container's init pid from the current message.
293 ///
294 /// There's no guarantee that the pid is valid.
a0d68fed 295 #[inline]
9cffeac4 296 pub fn init_pid(&self) -> pid_t {
571dbe03 297 self.proxy_msg.init_pid
9cffeac4
WB
298 }
299
300 /// Get the syscall request structure of this message.
a0d68fed 301 #[inline]
9cffeac4 302 pub fn request(&self) -> &SeccompNotif {
571dbe03 303 &self.seccomp_notif
9cffeac4
WB
304 }
305
306 /// Access the response buffer of this message.
a0d68fed 307 #[inline]
9cffeac4 308 pub fn response_mut(&mut self) -> &mut SeccompNotifResp {
571dbe03 309 &mut self.seccomp_resp
9cffeac4
WB
310 }
311
312 /// Get the cookie's length.
a0d68fed 313 #[inline]
9cffeac4 314 pub fn cookie_len(&self) -> usize {
571dbe03 315 usize::try_from(self.proxy_msg.cookie_len).expect("cookie size should fit in an usize")
9cffeac4
WB
316 }
317
318 /// Get the cookie sent along with this message.
a0d68fed 319 #[inline]
9cffeac4 320 pub fn cookie(&self) -> &[u8] {
571dbe03 321 &self.cookie_buf
9cffeac4 322 }
c95be5f6 323
937921aa 324 /// Shortcut to get a parameter value.
c95be5f6 325 #[inline]
937921aa 326 fn arg(&self, arg: u32) -> Result<u64, Error> {
c95be5f6
WB
327 self.request()
328 .data
329 .args
330 .get(arg as usize)
9486338a 331 .copied()
c95be5f6
WB
332 .ok_or_else(|| nix::errno::Errno::ERANGE.into())
333 }
334
7970b0ea
WB
335 /// Get a parameter as C String where the pointer may be `NULL`.
336 ///
337 /// Strings are limited to 4k bytes currently.
a0d68fed 338 #[inline]
7970b0ea
WB
339 pub fn arg_opt_c_string(&self, arg: u32) -> Result<Option<CString>, Error> {
340 let offset = self.arg(arg)?;
341 if offset == 0 {
342 Ok(None)
343 } else {
344 Ok(Some(crate::syscall::get_c_string(self, offset)?))
345 }
346 }
347
c95be5f6
WB
348 /// Get a parameter as C String.
349 ///
350 /// Strings are limited to 4k bytes currently.
a0d68fed 351 #[inline]
c95be5f6 352 pub fn arg_c_string(&self, arg: u32) -> Result<CString, Error> {
7970b0ea
WB
353 self.arg_opt_c_string(arg)?
354 .ok_or_else(|| Errno::EINVAL.into())
937921aa
WB
355 }
356
7470f14b
WB
357 /// Read a user space pointer parameter.
358 #[inline]
1349eed4 359 pub fn arg_struct_by_ptr<T>(&self, arg: u32) -> Result<T, Error> {
7470f14b
WB
360 let offset = self.arg(arg)?;
361 let mut data: T = unsafe { mem::zeroed() };
362 let slice = unsafe {
363 std::slice::from_raw_parts_mut(&mut data as *mut _ as *mut u8, mem::size_of::<T>())
364 };
365 let got = self.mem_fd().read_at(slice, offset)?;
366 if got != mem::size_of::<T>() {
367 Err(Errno::EINVAL.into())
368 } else {
369 Ok(data)
370 }
371 }
372
373 /// Read a user space pointer parameter.
374 #[inline]
375 pub fn mem_write_struct<T>(&self, offset: u64, data: &T) -> io::Result<()> {
376 let slice = unsafe {
377 std::slice::from_raw_parts(data as *const T as *const u8, mem::size_of::<T>())
378 };
379 let got = self.mem_fd().write_at(slice, offset)?;
380 if got != mem::size_of::<T>() {
381 Err(Errno::EINVAL.into())
382 } else {
383 Ok(())
384 }
385 }
386
937921aa 387 /// Checked way to get a `mode_t` argument.
a0d68fed 388 #[inline]
61bfa355
WB
389 pub fn arg_mode_t(&self, arg: u32) -> Result<nix::sys::stat::mode_t, Error> {
390 nix::sys::stat::mode_t::try_from(self.arg(arg)?).map_err(|_| Error::from(Errno::EINVAL))
937921aa
WB
391 }
392
393 /// Checked way to get a `dev_t` argument.
a0d68fed 394 #[inline]
937921aa
WB
395 pub fn arg_dev_t(&self, arg: u32) -> Result<nix::sys::stat::dev_t, Error> {
396 nix::sys::stat::dev_t::try_from(self.arg(arg)?).map_err(|_| Errno::EINVAL.into())
397 }
398
399 /// Checked way to get a file descriptor argument.
a0d68fed 400 #[inline]
937921aa
WB
401 pub fn arg_fd(&self, arg: u32, flags: c_int) -> Result<Fd, Error> {
402 let fd = RawFd::try_from(self.arg(arg)?).map_err(|_| Error::from(Errno::EINVAL))?;
403 if fd == libc::AT_FDCWD {
404 Ok(self.pid_fd().fd_cwd()?)
405 } else {
406 Ok(self.pid_fd().fd_num(fd, flags)?)
407 }
c95be5f6 408 }
7970b0ea 409
7470f14b
WB
410 /// Checked way to get a c_uint argument.
411 #[inline]
412 pub fn arg_uint(&self, arg: u32) -> Result<c_uint, Error> {
413 c_uint::try_from(self.arg(arg)?).map_err(|_| Errno::EINVAL.into())
414 }
415
7970b0ea 416 /// Checked way to get a c_int argument.
a0d68fed 417 #[inline]
7970b0ea 418 pub fn arg_int(&self, arg: u32) -> Result<c_int, Error> {
7470f14b 419 self.arg_uint(arg).map(|u| u as c_int)
7970b0ea
WB
420 }
421
422 /// Checked way to get a `caddr_t` argument.
a0d68fed
WB
423 #[inline]
424 pub fn arg_caddr_t(&self, arg: u32) -> Result<*mut i8, Error> {
425 Ok(self.arg(arg)? as *mut i8)
426 }
427
428 /// Checked way to get a raw pointer argument
429 #[inline]
430 pub fn arg_pointer(&self, arg: u32) -> Result<*const u8, Error> {
431 Ok(self.arg(arg)? as usize as *const u8)
432 }
433
434 /// Checked way to get a raw char pointer.
435 #[inline]
436 pub fn arg_char_ptr(&self, arg: u32) -> Result<*const libc::c_char, Error> {
437 Ok(self.arg(arg)? as usize as *const libc::c_char)
7970b0ea 438 }
9cffeac4 439}