]> git.proxmox.com Git - pve-lxc-syscalld.git/blob - src/lxcseccomp.rs
273f3c988b2eb4d7fedd10bbe84d52ba0420e286
[pve-lxc-syscalld.git] / src / lxcseccomp.rs
1 //! Module for LXC specific seccomp handling.
2
3 use std::convert::TryFrom;
4 use std::ffi::CString;
5 use std::os::raw::{c_int, c_uint};
6 use std::os::unix::fs::FileExt;
7 use std::os::unix::io::{FromRawFd, RawFd};
8 use std::{io, mem};
9
10 use anyhow::{bail, format_err, Error};
11 use lazy_static::lazy_static;
12 use libc::pid_t;
13 use nix::errno::Errno;
14
15 use crate::io::cmsg;
16 use crate::io::iovec::{IoVec, IoVecMut};
17 use crate::io::seq_packet::SeqPacketSocket;
18 use crate::process::PidFd;
19 use crate::seccomp::{SeccompNotif, SeccompNotifResp, SeccompNotifSizes};
20 use crate::tools::{Fd, FromFd};
21
22 /// Seccomp notification proxy message sent by the lxc monitor.
23 ///
24 /// Whenever a process in a container triggers a seccomp notification, and lxc has a seccomp
25 /// notification proxy configured, this is sent over to the proxy, together with a `SeccompNotif`,
26 /// `SeccompNotifResp` and a cookie.
27 ///
28 /// Using this struct may be inconvenient. See the [`ProxyMessageBuffer`] for a convenient helper
29 /// for communcation.
30 #[repr(C)]
31 pub struct SeccompNotifyProxyMsg {
32 /// Reserved data must be zero.
33 reserved0: u64,
34
35 /// The lxc monitor pid.
36 ///
37 /// Unless some other proxy forwards proxy messages, this should be the same pid as the peer
38 /// we receive this message from.
39 monitor_pid: pid_t,
40
41 /// The container's init pid.
42 ///
43 /// If supported by the kernel, the lxc monitor should keep a pidfd open to this process, so
44 /// this pid should be valid as long as `monitor_pid` is valid.
45 init_pid: pid_t,
46
47 /// Information about the seccomp structure sizes.
48 ///
49 /// This must be equal to `SeccompNotifSizes::get()`, otherwise the proxy and lxc monitor have
50 /// inconsistent views of the kernel's seccomp API.
51 sizes: SeccompNotifSizes,
52
53 /// The length of the container's configured `lxc.seccomp.notify.cookie` value.
54 cookie_len: u64,
55 }
56
57 /// Helper to receive and verify proxy notification messages.
58 pub struct ProxyMessageBuffer {
59 proxy_msg: SeccompNotifyProxyMsg,
60 seccomp_notif: SeccompNotif,
61 seccomp_resp: SeccompNotifResp,
62 cookie_buf: Vec<u8>,
63
64 sizes: SeccompNotifSizes,
65 seccomp_packet_size: usize,
66
67 pid_fd: Option<PidFd>,
68 mem_fd: Option<std::fs::File>,
69 }
70
71 unsafe fn io_vec_mut<T>(value: &mut T) -> IoVecMut {
72 IoVecMut::new(unsafe {
73 std::slice::from_raw_parts_mut(value as *mut T as *mut u8, mem::size_of::<T>())
74 })
75 }
76
77 unsafe fn io_vec<T>(value: &T) -> IoVec {
78 IoVec::new(unsafe {
79 std::slice::from_raw_parts(value as *const T as *const u8, mem::size_of::<T>())
80 })
81 }
82
83 lazy_static! {
84 static ref SECCOMP_SIZES: SeccompNotifSizes = SeccompNotifSizes::get_checked()
85 .map_err(|e| panic!("{}\nrefusing to run", e))
86 .unwrap();
87 }
88
89 impl ProxyMessageBuffer {
90 /// Allocate a new proxy message buffer with a specific maximum cookie size.
91 pub fn new(max_cookie: usize) -> Self {
92 let sizes = SECCOMP_SIZES.clone();
93
94 let seccomp_packet_size = mem::size_of::<SeccompNotifyProxyMsg>()
95 + sizes.notif as usize
96 + sizes.notif_resp as usize;
97
98 Self {
99 proxy_msg: unsafe { mem::zeroed() },
100 seccomp_notif: unsafe { mem::zeroed() },
101 seccomp_resp: unsafe { mem::zeroed() },
102 cookie_buf: unsafe { super::tools::vec::uninitialized(max_cookie) },
103 sizes,
104 seccomp_packet_size,
105 pid_fd: None,
106 mem_fd: None,
107 }
108 }
109
110 fn reset(&mut self) {
111 self.proxy_msg.cookie_len = 0;
112 self.mem_fd = None;
113 self.pid_fd = None;
114 }
115
116 /// Returns false on EOF.
117 pub async fn recv(&mut self, socket: &SeqPacketSocket) -> Result<bool, Error> {
118 // prepare buffers:
119 self.reset();
120
121 unsafe {
122 self.cookie_buf.set_len(self.cookie_buf.capacity());
123 }
124
125 let mut iovec = [
126 unsafe { io_vec_mut(&mut self.proxy_msg) },
127 unsafe { io_vec_mut(&mut self.seccomp_notif) },
128 unsafe { io_vec_mut(&mut self.seccomp_resp) },
129 IoVecMut::new(self.cookie_buf.as_mut_slice()),
130 ];
131
132 unsafe {
133 self.cookie_buf.set_len(0);
134 }
135
136 // receive:
137 let mut fd_cmsg_buf = cmsg::buffer::<[RawFd; 2]>();
138 let (datalen, cmsglen) = socket
139 .recvmsg_vectored(&mut iovec, &mut fd_cmsg_buf)
140 .await?;
141
142 if datalen == 0 {
143 return Ok(false);
144 }
145
146 self.set_len(datalen)?;
147
148 // iterate through control messages:
149
150 let cmsg = cmsg::iter(&fd_cmsg_buf[..cmsglen])
151 .next()
152 .ok_or_else(|| format_err!("missing file descriptors in message"))?;
153
154 if cmsg.cmsg_level != libc::SOL_SOCKET && cmsg.cmsg_type != libc::SCM_RIGHTS {
155 bail!("expected SCM_RIGHTS control message");
156 }
157
158 let fds: Vec<Fd> = cmsg
159 .data
160 .chunks_exact(mem::size_of::<RawFd>())
161 .map(|chunk| unsafe {
162 // clippy bug
163 #[allow(clippy::cast_ptr_alignment)]
164 Fd::from_raw_fd(std::ptr::read_unaligned(chunk.as_ptr() as _))
165 })
166 .collect();
167
168 if fds.len() != 2 {
169 bail!("expected exactly 2 file descriptors in control message");
170 }
171
172 let mut fds = fds.into_iter();
173 let pid_fd = unsafe {
174 PidFd::try_from_fd(
175 fds.next()
176 .ok_or_else(|| format_err!("lxc seccomp message without pidfd"))?,
177 )?
178 };
179 let mem_fd = fds
180 .next()
181 .ok_or_else(|| format_err!("lxc seccomp message without memfd"))?;
182
183 self.pid_fd = Some(pid_fd);
184 self.mem_fd = Some(std::fs::File::from_fd(mem_fd));
185
186 Ok(true)
187 }
188
189 /// Get the process' pidfd.
190 ///
191 /// Note that the message must be valid, otherwise this panics!
192 pub fn pid_fd(&self) -> &PidFd {
193 self.pid_fd.as_ref().unwrap()
194 }
195
196 /// Get the process' mem fd.
197 ///
198 /// Note that this returns a non-mut trait object. This is because positional I/O does not need
199 /// mutable self and the standard library correctly represents this in its `FileExt` trait!
200 ///
201 /// Note that the message must be valid, otherwise this panics!
202 pub fn mem_fd(&self) -> &dyn FileExt {
203 self.mem_fd.as_ref().unwrap()
204 }
205
206 /// Send the current data as response.
207 pub async fn respond(&mut self, socket: &SeqPacketSocket) -> io::Result<()> {
208 let iov = [
209 unsafe { io_vec(&self.proxy_msg) },
210 unsafe { io_vec(&self.seccomp_notif) },
211 unsafe { io_vec(&self.seccomp_resp) },
212 ];
213 let len = iov.iter().map(|e| e.len()).sum();
214 if socket.sendmsg_vectored(&iov).await? != len {
215 io_bail!("truncated message?");
216 }
217 Ok(())
218 }
219
220 #[inline]
221 fn prepare_response(&mut self) {
222 let id = self.request().id;
223 let resp = self.response_mut();
224 resp.id = id;
225 resp.val = -1;
226 resp.error = -libc::ENOSYS;
227 resp.flags = 0;
228 }
229
230 /// Called by recv() after the callback returned the new size. This verifies that there's
231 /// enough data available.
232 fn set_len(&mut self, len: usize) -> Result<(), Error> {
233 if len < self.seccomp_packet_size {
234 bail!("seccomp proxy message too short");
235 }
236
237 if self.proxy_msg.reserved0 != 0 {
238 bail!("reserved data wasn't 0, liblxc secocmp notify protocol mismatch");
239 }
240
241 if !self.check_sizes() {
242 bail!("seccomp proxy message content size validation failed");
243 }
244
245 if len - self.seccomp_packet_size > self.cookie_buf.capacity() {
246 bail!("seccomp proxy message too long");
247 }
248
249 let cookie_len = match usize::try_from(self.proxy_msg.cookie_len) {
250 Ok(cl) => cl,
251 Err(_) => {
252 self.proxy_msg.cookie_len = 0;
253 bail!("cookie length exceeds our size type!");
254 }
255 };
256
257 if len != self.seccomp_packet_size + cookie_len {
258 bail!(
259 "seccomp proxy packet contains unexpected cookie length {} + {} != {}",
260 self.seccomp_packet_size,
261 cookie_len,
262 len
263 );
264 }
265
266 unsafe {
267 self.cookie_buf.set_len(cookie_len);
268 }
269
270 self.prepare_response();
271
272 Ok(())
273 }
274
275 fn check_sizes(&self) -> bool {
276 let got = self.proxy_msg.sizes.clone();
277 got.notif == self.sizes.notif
278 && got.notif_resp == self.sizes.notif_resp
279 && got.data == self.sizes.data
280 }
281
282 /// Get the monitor pid from the current message.
283 ///
284 /// There's no guarantee that the pid is valid.
285 #[inline]
286 pub fn monitor_pid(&self) -> pid_t {
287 self.proxy_msg.monitor_pid
288 }
289
290 /// Get the container's init pid from the current message.
291 ///
292 /// There's no guarantee that the pid is valid.
293 #[inline]
294 pub fn init_pid(&self) -> pid_t {
295 self.proxy_msg.init_pid
296 }
297
298 /// Get the syscall request structure of this message.
299 #[inline]
300 pub fn request(&self) -> &SeccompNotif {
301 &self.seccomp_notif
302 }
303
304 /// Access the response buffer of this message.
305 #[inline]
306 pub fn response_mut(&mut self) -> &mut SeccompNotifResp {
307 &mut self.seccomp_resp
308 }
309
310 /// Get the cookie's length.
311 #[inline]
312 pub fn cookie_len(&self) -> usize {
313 usize::try_from(self.proxy_msg.cookie_len).expect("cookie size should fit in an usize")
314 }
315
316 /// Get the cookie sent along with this message.
317 #[inline]
318 pub fn cookie(&self) -> &[u8] {
319 &self.cookie_buf
320 }
321
322 /// Shortcut to get a parameter value.
323 #[inline]
324 fn arg(&self, arg: u32) -> Result<u64, Error> {
325 self.request()
326 .data
327 .args
328 .get(arg as usize)
329 .copied()
330 .ok_or_else(|| nix::errno::Errno::ERANGE.into())
331 }
332
333 /// Get a parameter as C String where the pointer may be `NULL`.
334 ///
335 /// Strings are limited to 4k bytes currently.
336 #[inline]
337 pub fn arg_opt_c_string(&self, arg: u32) -> Result<Option<CString>, Error> {
338 let offset = self.arg(arg)?;
339 if offset == 0 {
340 Ok(None)
341 } else {
342 Ok(Some(crate::syscall::get_c_string(self, offset)?))
343 }
344 }
345
346 /// Get a parameter as C String.
347 ///
348 /// Strings are limited to 4k bytes currently.
349 #[inline]
350 pub fn arg_c_string(&self, arg: u32) -> Result<CString, Error> {
351 self.arg_opt_c_string(arg)?
352 .ok_or_else(|| Errno::EINVAL.into())
353 }
354
355 /// Read a user space pointer parameter.
356 #[inline]
357 pub fn arg_struct_by_ptr<T>(&self, arg: u32) -> Result<T, Error> {
358 let offset = self.arg(arg)?;
359 let mut data: T = unsafe { mem::zeroed() };
360 let slice = unsafe {
361 std::slice::from_raw_parts_mut(&mut data as *mut _ as *mut u8, mem::size_of::<T>())
362 };
363 let got = self.mem_fd().read_at(slice, offset)?;
364 if got != mem::size_of::<T>() {
365 Err(Errno::EINVAL.into())
366 } else {
367 Ok(data)
368 }
369 }
370
371 /// Read a user space pointer parameter.
372 #[inline]
373 pub fn mem_write_struct<T>(&self, offset: u64, data: &T) -> io::Result<()> {
374 let slice = unsafe {
375 std::slice::from_raw_parts(data as *const T as *const u8, mem::size_of::<T>())
376 };
377 let got = self.mem_fd().write_at(slice, offset)?;
378 if got != mem::size_of::<T>() {
379 Err(Errno::EINVAL.into())
380 } else {
381 Ok(())
382 }
383 }
384
385 /// Checked way to get a `mode_t` argument.
386 #[inline]
387 pub fn arg_mode_t(&self, arg: u32) -> Result<nix::sys::stat::mode_t, Error> {
388 nix::sys::stat::mode_t::try_from(self.arg(arg)?).map_err(|_| Error::from(Errno::EINVAL))
389 }
390
391 /// Checked way to get a `dev_t` argument.
392 #[inline]
393 pub fn arg_dev_t(&self, arg: u32) -> Result<nix::sys::stat::dev_t, Error> {
394 self.arg(arg)
395 }
396
397 /// Checked way to get a file descriptor argument.
398 #[inline]
399 pub fn arg_fd(&self, arg: u32, flags: c_int) -> Result<Fd, Error> {
400 let fd = self.arg(arg)? as RawFd;
401 if fd < 0 {
402 // we pass those "as-is' to syscalls.
403 return Ok(Fd(fd));
404 }
405 // otherwise we'll open them from the process:
406 if fd == libc::AT_FDCWD {
407 Ok(self.pid_fd().fd_cwd()?)
408 } else {
409 Ok(self.pid_fd().fd_num(fd, flags)?)
410 }
411 }
412
413 /// Checked way to get a c_uint argument.
414 #[inline]
415 pub fn arg_uint(&self, arg: u32) -> Result<c_uint, Error> {
416 c_uint::try_from(self.arg(arg)?).map_err(|_| Errno::EINVAL.into())
417 }
418
419 /// Checked way to get a c_int argument.
420 #[inline]
421 pub fn arg_int(&self, arg: u32) -> Result<c_int, Error> {
422 self.arg_uint(arg).map(|u| u as c_int)
423 }
424
425 /// Checked way to get a `caddr_t` argument.
426 #[inline]
427 pub fn arg_caddr_t(&self, arg: u32) -> Result<*mut i8, Error> {
428 Ok(self.arg(arg)? as *mut i8)
429 }
430
431 /// Checked way to get a raw pointer argument
432 #[inline]
433 pub fn arg_pointer(&self, arg: u32) -> Result<*const u8, Error> {
434 Ok(self.arg(arg)? as usize as *const u8)
435 }
436
437 /// Checked way to get a raw char pointer.
438 #[inline]
439 pub fn arg_char_ptr(&self, arg: u32) -> Result<*const libc::c_char, Error> {
440 Ok(self.arg(arg)? as usize as *const libc::c_char)
441 }
442 }