1 //! Module for LXC specific seccomp handling.
3 use std
::convert
::TryFrom
;
5 use std
::os
::raw
::{c_int, c_uint}
;
6 use std
::os
::unix
::fs
::FileExt
;
7 use std
::os
::unix
::io
::{FromRawFd, RawFd}
;
10 use anyhow
::{bail, format_err, Error}
;
11 use lazy_static
::lazy_static
;
13 use nix
::errno
::Errno
;
16 use crate::io
::iovec
::{IoVec, IoVecMut}
;
17 use crate::io
::seq_packet
::SeqPacketSocket
;
18 use crate::process
::PidFd
;
19 use crate::seccomp
::{SeccompNotif, SeccompNotifResp, SeccompNotifSizes}
;
20 use crate::tools
::{Fd, FromFd}
;
22 /// Seccomp notification proxy message sent by the lxc monitor.
24 /// Whenever a process in a container triggers a seccomp notification, and lxc has a seccomp
25 /// notification proxy configured, this is sent over to the proxy, together with a `SeccompNotif`,
26 /// `SeccompNotifResp` and a cookie.
28 /// Using this struct may be inconvenient. See the [`ProxyMessageBuffer`] for a convenient helper
31 pub struct SeccompNotifyProxyMsg
{
32 /// Reserved data must be zero.
35 /// The lxc monitor pid.
37 /// Unless some other proxy forwards proxy messages, this should be the same pid as the peer
38 /// we receive this message from.
41 /// The container's init pid.
43 /// If supported by the kernel, the lxc monitor should keep a pidfd open to this process, so
44 /// this pid should be valid as long as `monitor_pid` is valid.
47 /// Information about the seccomp structure sizes.
49 /// This must be equal to `SeccompNotifSizes::get()`, otherwise the proxy and lxc monitor have
50 /// inconsistent views of the kernel's seccomp API.
51 sizes
: SeccompNotifSizes
,
53 /// The length of the container's configured `lxc.seccomp.notify.cookie` value.
57 /// Helper to receive and verify proxy notification messages.
58 pub struct ProxyMessageBuffer
{
59 proxy_msg
: SeccompNotifyProxyMsg
,
60 seccomp_notif
: SeccompNotif
,
61 seccomp_resp
: SeccompNotifResp
,
64 sizes
: SeccompNotifSizes
,
65 seccomp_packet_size
: usize,
67 pid_fd
: Option
<PidFd
>,
68 mem_fd
: Option
<std
::fs
::File
>,
71 unsafe fn io_vec_mut
<T
>(value
: &mut T
) -> IoVecMut
{
72 IoVecMut
::new(unsafe {
73 std
::slice
::from_raw_parts_mut(value
as *mut T
as *mut u8, mem
::size_of
::<T
>())
77 unsafe fn io_vec
<T
>(value
: &T
) -> IoVec
{
79 std
::slice
::from_raw_parts(value
as *const T
as *const u8, mem
::size_of
::<T
>())
84 static ref SECCOMP_SIZES
: SeccompNotifSizes
= SeccompNotifSizes
::get_checked()
85 .map_err(|e
| panic
!("{}\nrefusing to run", e
))
89 impl ProxyMessageBuffer
{
90 /// Allocate a new proxy message buffer with a specific maximum cookie size.
91 pub fn new(max_cookie
: usize) -> Self {
92 let sizes
= SECCOMP_SIZES
.clone();
94 let seccomp_packet_size
= mem
::size_of
::<SeccompNotifyProxyMsg
>()
95 + sizes
.notif
as usize
96 + sizes
.notif_resp
as usize;
99 proxy_msg
: unsafe { mem::zeroed() }
,
100 seccomp_notif
: unsafe { mem::zeroed() }
,
101 seccomp_resp
: unsafe { mem::zeroed() }
,
102 cookie_buf
: unsafe { super::tools::vec::uninitialized(max_cookie) }
,
110 fn reset(&mut self) {
111 self.proxy_msg
.cookie_len
= 0;
116 /// Returns false on EOF.
117 pub async
fn recv(&mut self, socket
: &SeqPacketSocket
) -> Result
<bool
, Error
> {
122 self.cookie_buf
.set_len(self.cookie_buf
.capacity());
126 unsafe { io_vec_mut(&mut self.proxy_msg) }
,
127 unsafe { io_vec_mut(&mut self.seccomp_notif) }
,
128 unsafe { io_vec_mut(&mut self.seccomp_resp) }
,
129 IoVecMut
::new(self.cookie_buf
.as_mut_slice()),
133 self.cookie_buf
.set_len(0);
137 let mut fd_cmsg_buf
= cmsg
::buffer
::<[RawFd
; 2]>();
138 let (datalen
, cmsglen
) = socket
139 .recvmsg_vectored(&mut iovec
, &mut fd_cmsg_buf
)
146 self.set_len(datalen
)?
;
148 // iterate through control messages:
150 let cmsg
= cmsg
::iter(&fd_cmsg_buf
[..cmsglen
])
152 .ok_or_else(|| format_err
!("missing file descriptors in message"))?
;
154 if cmsg
.cmsg_level
!= libc
::SOL_SOCKET
&& cmsg
.cmsg_type
!= libc
::SCM_RIGHTS
{
155 bail
!("expected SCM_RIGHTS control message");
158 let fds
: Vec
<Fd
> = cmsg
160 .chunks_exact(mem
::size_of
::<RawFd
>())
161 .map(|chunk
| unsafe {
163 #[allow(clippy::cast_ptr_alignment)]
164 Fd
::from_raw_fd(std
::ptr
::read_unaligned(chunk
.as_ptr() as _
))
169 bail
!("expected exactly 2 file descriptors in control message");
172 let mut fds
= fds
.into_iter();
173 let pid_fd
= unsafe {
176 .ok_or_else(|| format_err
!("lxc seccomp message without pidfd"))?
,
181 .ok_or_else(|| format_err
!("lxc seccomp message without memfd"))?
;
183 self.pid_fd
= Some(pid_fd
);
184 self.mem_fd
= Some(std
::fs
::File
::from_fd(mem_fd
));
189 /// Get the process' pidfd.
191 /// Note that the message must be valid, otherwise this panics!
192 pub fn pid_fd(&self) -> &PidFd
{
193 self.pid_fd
.as_ref().unwrap()
196 /// Get the process' mem fd.
198 /// Note that this returns a non-mut trait object. This is because positional I/O does not need
199 /// mutable self and the standard library correctly represents this in its `FileExt` trait!
201 /// Note that the message must be valid, otherwise this panics!
202 pub fn mem_fd(&self) -> &dyn FileExt
{
203 self.mem_fd
.as_ref().unwrap()
206 /// Send the current data as response.
207 pub async
fn respond(&mut self, socket
: &SeqPacketSocket
) -> io
::Result
<()> {
209 unsafe { io_vec(&self.proxy_msg) }
,
210 unsafe { io_vec(&self.seccomp_notif) }
,
211 unsafe { io_vec(&self.seccomp_resp) }
,
213 let len
= iov
.iter().map(|e
| e
.len()).sum();
214 if socket
.sendmsg_vectored(&iov
).await?
!= len
{
215 io_bail
!("truncated message?");
221 fn prepare_response(&mut self) {
222 let id
= self.request().id
;
223 let resp
= self.response_mut();
226 resp
.error
= -libc
::ENOSYS
;
230 /// Called by recv() after the callback returned the new size. This verifies that there's
231 /// enough data available.
232 fn set_len(&mut self, len
: usize) -> Result
<(), Error
> {
233 if len
< self.seccomp_packet_size
{
234 bail
!("seccomp proxy message too short");
237 if self.proxy_msg
.reserved0
!= 0 {
238 bail
!("reserved data wasn't 0, liblxc secocmp notify protocol mismatch");
241 if !self.check_sizes() {
242 bail
!("seccomp proxy message content size validation failed");
245 if len
- self.seccomp_packet_size
> self.cookie_buf
.capacity() {
246 bail
!("seccomp proxy message too long");
249 let cookie_len
= match usize::try_from(self.proxy_msg
.cookie_len
) {
252 self.proxy_msg
.cookie_len
= 0;
253 bail
!("cookie length exceeds our size type!");
257 if len
!= self.seccomp_packet_size
+ cookie_len
{
259 "seccomp proxy packet contains unexpected cookie length {} + {} != {}",
260 self.seccomp_packet_size
,
267 self.cookie_buf
.set_len(cookie_len
);
270 self.prepare_response();
275 fn check_sizes(&self) -> bool
{
276 let got
= self.proxy_msg
.sizes
.clone();
277 got
.notif
== self.sizes
.notif
278 && got
.notif_resp
== self.sizes
.notif_resp
279 && got
.data
== self.sizes
.data
282 /// Get the monitor pid from the current message.
284 /// There's no guarantee that the pid is valid.
286 pub fn monitor_pid(&self) -> pid_t
{
287 self.proxy_msg
.monitor_pid
290 /// Get the container's init pid from the current message.
292 /// There's no guarantee that the pid is valid.
294 pub fn init_pid(&self) -> pid_t
{
295 self.proxy_msg
.init_pid
298 /// Get the syscall request structure of this message.
300 pub fn request(&self) -> &SeccompNotif
{
304 /// Access the response buffer of this message.
306 pub fn response_mut(&mut self) -> &mut SeccompNotifResp
{
307 &mut self.seccomp_resp
310 /// Get the cookie's length.
312 pub fn cookie_len(&self) -> usize {
313 usize::try_from(self.proxy_msg
.cookie_len
).expect("cookie size should fit in an usize")
316 /// Get the cookie sent along with this message.
318 pub fn cookie(&self) -> &[u8] {
322 /// Shortcut to get a parameter value.
324 fn arg(&self, arg
: u32) -> Result
<u64, Error
> {
330 .ok_or_else(|| nix
::errno
::Errno
::ERANGE
.into())
333 /// Get a parameter as C String where the pointer may be `NULL`.
335 /// Strings are limited to 4k bytes currently.
337 pub fn arg_opt_c_string(&self, arg
: u32) -> Result
<Option
<CString
>, Error
> {
338 let offset
= self.arg(arg
)?
;
342 Ok(Some(crate::syscall
::get_c_string(self, offset
)?
))
346 /// Get a parameter as C String.
348 /// Strings are limited to 4k bytes currently.
350 pub fn arg_c_string(&self, arg
: u32) -> Result
<CString
, Error
> {
351 self.arg_opt_c_string(arg
)?
352 .ok_or_else(|| Errno
::EINVAL
.into())
355 /// Read a user space pointer parameter.
357 pub fn arg_struct_by_ptr
<T
>(&self, arg
: u32) -> Result
<T
, Error
> {
358 let offset
= self.arg(arg
)?
;
359 let mut data
: T
= unsafe { mem::zeroed() }
;
361 std
::slice
::from_raw_parts_mut(&mut data
as *mut _
as *mut u8, mem
::size_of
::<T
>())
363 let got
= self.mem_fd().read_at(slice
, offset
)?
;
364 if got
!= mem
::size_of
::<T
>() {
365 Err(Errno
::EINVAL
.into())
371 /// Read a user space pointer parameter.
373 pub fn mem_write_struct
<T
>(&self, offset
: u64, data
: &T
) -> io
::Result
<()> {
375 std
::slice
::from_raw_parts(data
as *const T
as *const u8, mem
::size_of
::<T
>())
377 let got
= self.mem_fd().write_at(slice
, offset
)?
;
378 if got
!= mem
::size_of
::<T
>() {
379 Err(Errno
::EINVAL
.into())
385 /// Checked way to get a `mode_t` argument.
387 pub fn arg_mode_t(&self, arg
: u32) -> Result
<nix
::sys
::stat
::mode_t
, Error
> {
388 nix
::sys
::stat
::mode_t
::try_from(self.arg(arg
)?
).map_err(|_
| Error
::from(Errno
::EINVAL
))
391 /// Checked way to get a `dev_t` argument.
393 pub fn arg_dev_t(&self, arg
: u32) -> Result
<nix
::sys
::stat
::dev_t
, Error
> {
397 /// Checked way to get a file descriptor argument.
399 pub fn arg_fd(&self, arg
: u32, flags
: c_int
) -> Result
<Fd
, Error
> {
400 let fd
= self.arg(arg
)?
as RawFd
;
402 // we pass those "as-is' to syscalls.
405 // otherwise we'll open them from the process:
406 if fd
== libc
::AT_FDCWD
{
407 Ok(self.pid_fd().fd_cwd()?
)
409 Ok(self.pid_fd().fd_num(fd
, flags
)?
)
413 /// Checked way to get a c_uint argument.
415 pub fn arg_uint(&self, arg
: u32) -> Result
<c_uint
, Error
> {
416 c_uint
::try_from(self.arg(arg
)?
).map_err(|_
| Errno
::EINVAL
.into())
419 /// Checked way to get a c_int argument.
421 pub fn arg_int(&self, arg
: u32) -> Result
<c_int
, Error
> {
422 self.arg_uint(arg
).map(|u
| u
as c_int
)
425 /// Checked way to get a `caddr_t` argument.
427 pub fn arg_caddr_t(&self, arg
: u32) -> Result
<*mut i8, Error
> {
428 Ok(self.arg(arg
)?
as *mut i8)
431 /// Checked way to get a raw pointer argument
433 pub fn arg_pointer(&self, arg
: u32) -> Result
<*const u8, Error
> {
434 Ok(self.arg(arg
)?
as usize as *const u8)
437 /// Checked way to get a raw char pointer.
439 pub fn arg_char_ptr(&self, arg
: u32) -> Result
<*const libc
::c_char
, Error
> {
440 Ok(self.arg(arg
)?
as usize as *const libc
::c_char
)