]>
Commit | Line | Data |
---|---|---|
f42e0795 | 1 | //! Module for LXC specific seccomp handling. |
9cffeac4 WB |
2 | |
3 | use std::convert::TryFrom; | |
c95be5f6 | 4 | use std::ffi::CString; |
7470f14b | 5 | use std::os::raw::{c_int, c_uint}; |
c95be5f6 | 6 | use std::os::unix::fs::FileExt; |
2477831e | 7 | use std::os::unix::io::{FromRawFd, RawFd}; |
c95be5f6 | 8 | use std::{io, mem}; |
9cffeac4 | 9 | |
8150a439 | 10 | use anyhow::{bail, format_err, Error}; |
41ff6d28 | 11 | use lazy_static::lazy_static; |
9cffeac4 | 12 | use libc::pid_t; |
937921aa | 13 | use nix::errno::Errno; |
9cffeac4 | 14 | |
8dd26985 | 15 | use crate::io::cmsg; |
43b5e594 | 16 | use crate::io::iovec::{IoVec, IoVecMut}; |
8dd26985 | 17 | use crate::io::seq_packet::SeqPacketSocket; |
3bbd1db0 | 18 | use crate::process::PidFd; |
e420f6f9 | 19 | use crate::seccomp::{SeccompNotif, SeccompNotifResp, SeccompNotifSizes}; |
8dd26985 | 20 | use crate::tools::{Fd, FromFd}; |
9cffeac4 WB |
21 | |
22 | /// Seccomp notification proxy message sent by the lxc monitor. | |
23 | /// | |
24 | /// Whenever a process in a container triggers a seccomp notification, and lxc has a seccomp | |
25 | /// notification proxy configured, this is sent over to the proxy, together with a `SeccompNotif`, | |
26 | /// `SeccompNotifResp` and a cookie. | |
27 | /// | |
28 | /// Using this struct may be inconvenient. See the [`ProxyMessageBuffer`] for a convenient helper | |
29 | /// for communcation. | |
30 | #[repr(C)] | |
31 | pub struct SeccompNotifyProxyMsg { | |
32 | /// Reserved data must be zero. | |
33 | reserved0: u64, | |
34 | ||
35 | /// The lxc monitor pid. | |
36 | /// | |
37 | /// Unless some other proxy forwards proxy messages, this should be the same pid as the peer | |
38 | /// we receive this message from. | |
39 | monitor_pid: pid_t, | |
40 | ||
41 | /// The container's init pid. | |
42 | /// | |
43 | /// If supported by the kernel, the lxc monitor should keep a pidfd open to this process, so | |
44 | /// this pid should be valid as long as `monitor_pid` is valid. | |
45 | init_pid: pid_t, | |
46 | ||
47 | /// Information about the seccomp structure sizes. | |
48 | /// | |
49 | /// This must be equal to `SeccompNotifSizes::get()`, otherwise the proxy and lxc monitor have | |
50 | /// inconsistent views of the kernel's seccomp API. | |
51 | sizes: SeccompNotifSizes, | |
52 | ||
53 | /// The length of the container's configured `lxc.seccomp.notify.cookie` value. | |
54 | cookie_len: u64, | |
55 | } | |
56 | ||
57 | /// Helper to receive and verify proxy notification messages. | |
9cffeac4 | 58 | pub struct ProxyMessageBuffer { |
571dbe03 WB |
59 | proxy_msg: SeccompNotifyProxyMsg, |
60 | seccomp_notif: SeccompNotif, | |
61 | seccomp_resp: SeccompNotifResp, | |
62 | cookie_buf: Vec<u8>, | |
63 | ||
9cffeac4 WB |
64 | sizes: SeccompNotifSizes, |
65 | seccomp_packet_size: usize, | |
41214ae2 | 66 | |
c95be5f6 WB |
67 | pid_fd: Option<PidFd>, |
68 | mem_fd: Option<std::fs::File>, | |
9cffeac4 WB |
69 | } |
70 | ||
571dbe03 WB |
71 | unsafe fn io_vec_mut<T>(value: &mut T) -> IoVecMut { |
72 | IoVecMut::new(std::slice::from_raw_parts_mut( | |
73 | value as *mut T as *mut u8, | |
74 | mem::size_of::<T>(), | |
75 | )) | |
76 | } | |
77 | ||
78 | unsafe fn io_vec<T>(value: &T) -> IoVec { | |
79 | IoVec::new(std::slice::from_raw_parts( | |
80 | value as *const T as *const u8, | |
81 | mem::size_of::<T>(), | |
82 | )) | |
83 | } | |
84 | ||
41ff6d28 | 85 | lazy_static! { |
e420f6f9 WB |
86 | static ref SECCOMP_SIZES: SeccompNotifSizes = SeccompNotifSizes::get_checked() |
87 | .map_err(|e| panic!("{}\nrefusing to run", e)) | |
88 | .unwrap(); | |
41ff6d28 WB |
89 | } |
90 | ||
9cffeac4 WB |
91 | impl ProxyMessageBuffer { |
92 | /// Allocate a new proxy message buffer with a specific maximum cookie size. | |
e420f6f9 | 93 | pub fn new(max_cookie: usize) -> Self { |
41ff6d28 | 94 | let sizes = SECCOMP_SIZES.clone(); |
571dbe03 | 95 | |
9cffeac4 WB |
96 | let seccomp_packet_size = mem::size_of::<SeccompNotifyProxyMsg>() |
97 | + sizes.notif as usize | |
98 | + sizes.notif_resp as usize; | |
571dbe03 | 99 | |
e420f6f9 | 100 | Self { |
571dbe03 WB |
101 | proxy_msg: unsafe { mem::zeroed() }, |
102 | seccomp_notif: unsafe { mem::zeroed() }, | |
103 | seccomp_resp: unsafe { mem::zeroed() }, | |
104 | cookie_buf: unsafe { super::tools::vec::uninitialized(max_cookie) }, | |
9cffeac4 WB |
105 | sizes, |
106 | seccomp_packet_size, | |
41214ae2 WB |
107 | pid_fd: None, |
108 | mem_fd: None, | |
e420f6f9 | 109 | } |
9cffeac4 WB |
110 | } |
111 | ||
1349eed4 WB |
112 | fn reset(&mut self) { |
113 | self.proxy_msg.cookie_len = 0; | |
114 | self.mem_fd = None; | |
115 | self.pid_fd = None; | |
116 | } | |
117 | ||
f42e0795 | 118 | /// Returns false on EOF. |
9aa2a15a | 119 | pub async fn recv(&mut self, socket: &SeqPacketSocket) -> Result<bool, Error> { |
f42e0795 | 120 | // prepare buffers: |
1349eed4 | 121 | self.reset(); |
571dbe03 WB |
122 | |
123 | unsafe { | |
124 | self.cookie_buf.set_len(self.cookie_buf.capacity()); | |
125 | } | |
126 | ||
0e2d0fa2 | 127 | let mut iovec = [ |
571dbe03 WB |
128 | unsafe { io_vec_mut(&mut self.proxy_msg) }, |
129 | unsafe { io_vec_mut(&mut self.seccomp_notif) }, | |
130 | unsafe { io_vec_mut(&mut self.seccomp_resp) }, | |
131 | IoVecMut::new(self.cookie_buf.as_mut_slice()), | |
132 | ]; | |
9cffeac4 | 133 | |
9cffeac4 | 134 | unsafe { |
571dbe03 | 135 | self.cookie_buf.set_len(0); |
9cffeac4 | 136 | } |
571dbe03 | 137 | |
f42e0795 | 138 | // receive: |
8dd26985 WB |
139 | let mut fd_cmsg_buf = cmsg::buffer::<[RawFd; 2]>(); |
140 | let (datalen, cmsglen) = socket | |
141 | .recvmsg_vectored(&mut iovec, &mut fd_cmsg_buf) | |
2477831e WB |
142 | .await?; |
143 | ||
8dd26985 | 144 | if datalen == 0 { |
41214ae2 WB |
145 | return Ok(false); |
146 | } | |
147 | ||
8dd26985 | 148 | self.set_len(datalen)?; |
2477831e | 149 | |
f42e0795 WB |
150 | // iterate through control messages: |
151 | ||
8dd26985 | 152 | let cmsg = cmsg::iter(&fd_cmsg_buf[..cmsglen]) |
2477831e WB |
153 | .next() |
154 | .ok_or_else(|| format_err!("missing file descriptors in message"))?; | |
155 | ||
156 | if cmsg.cmsg_level != libc::SOL_SOCKET && cmsg.cmsg_type != libc::SCM_RIGHTS { | |
157 | bail!("expected SCM_RIGHTS control message"); | |
158 | } | |
159 | ||
160 | let fds: Vec<Fd> = cmsg | |
161 | .data | |
162 | .chunks_exact(mem::size_of::<RawFd>()) | |
92eface0 WB |
163 | .map(|chunk| unsafe { |
164 | // clippy bug | |
165 | #[allow(clippy::cast_ptr_alignment)] | |
166 | Fd::from_raw_fd(std::ptr::read_unaligned(chunk.as_ptr() as _)) | |
167 | }) | |
2477831e WB |
168 | .collect(); |
169 | ||
170 | if fds.len() != 2 { | |
171 | bail!("expected exactly 2 file descriptors in control message"); | |
172 | } | |
0e2d0fa2 | 173 | |
41214ae2 | 174 | let mut fds = fds.into_iter(); |
512f780a WB |
175 | let pid_fd = unsafe { |
176 | PidFd::try_from_fd( | |
177 | fds.next() | |
178 | .ok_or_else(|| format_err!("lxc seccomp message without pidfd"))?, | |
179 | )? | |
180 | }; | |
181 | let mem_fd = fds | |
c95be5f6 | 182 | .next() |
512f780a WB |
183 | .ok_or_else(|| format_err!("lxc seccomp message without memfd"))?; |
184 | ||
185 | self.pid_fd = Some(pid_fd); | |
186 | self.mem_fd = Some(std::fs::File::from_fd(mem_fd)); | |
41214ae2 WB |
187 | |
188 | Ok(true) | |
189 | } | |
190 | ||
c95be5f6 WB |
191 | /// Get the process' pidfd. |
192 | /// | |
193 | /// Note that the message must be valid, otherwise this panics! | |
194 | pub fn pid_fd(&self) -> &PidFd { | |
195 | self.pid_fd.as_ref().unwrap() | |
196 | } | |
197 | ||
198 | /// Get the process' mem fd. | |
199 | /// | |
200 | /// Note that this returns a non-mut trait object. This is because positional I/O does not need | |
201 | /// mutable self and the standard library correctly represents this in its `FileExt` trait! | |
202 | /// | |
203 | /// Note that the message must be valid, otherwise this panics! | |
204 | pub fn mem_fd(&self) -> &dyn FileExt { | |
205 | self.mem_fd.as_ref().unwrap() | |
206 | } | |
207 | ||
0e2d0fa2 | 208 | /// Send the current data as response. |
9aa2a15a | 209 | pub async fn respond(&mut self, socket: &SeqPacketSocket) -> io::Result<()> { |
0e2d0fa2 | 210 | let iov = [ |
571dbe03 WB |
211 | unsafe { io_vec(&self.proxy_msg) }, |
212 | unsafe { io_vec(&self.seccomp_notif) }, | |
213 | unsafe { io_vec(&self.seccomp_resp) }, | |
0e2d0fa2 | 214 | ]; |
9aa2a15a | 215 | let len = iov.iter().map(|e| e.len()).sum(); |
8dd26985 | 216 | if socket.sendmsg_vectored(&iov).await? != len { |
9aa2a15a WB |
217 | io_bail!("truncated message?"); |
218 | } | |
219 | Ok(()) | |
9cffeac4 WB |
220 | } |
221 | ||
222 | #[inline] | |
223 | fn prepare_response(&mut self) { | |
224 | let id = self.request().id; | |
225 | let resp = self.response_mut(); | |
226 | resp.id = id; | |
227 | resp.val = -1; | |
228 | resp.error = -libc::ENOSYS; | |
229 | resp.flags = 0; | |
230 | } | |
231 | ||
34f32e25 WB |
232 | /// Called by recv() after the callback returned the new size. This verifies that there's |
233 | /// enough data available. | |
234 | fn set_len(&mut self, len: usize) -> Result<(), Error> { | |
571dbe03 WB |
235 | if len < self.seccomp_packet_size { |
236 | bail!("seccomp proxy message too short"); | |
9cffeac4 WB |
237 | } |
238 | ||
571dbe03 WB |
239 | if self.proxy_msg.reserved0 != 0 { |
240 | bail!("reserved data wasn't 0, liblxc secocmp notify protocol mismatch"); | |
241 | } | |
242 | ||
243 | if !self.check_sizes() { | |
9cffeac4 WB |
244 | bail!("seccomp proxy message content size validation failed"); |
245 | } | |
246 | ||
571dbe03 WB |
247 | if len - self.seccomp_packet_size > self.cookie_buf.capacity() { |
248 | bail!("seccomp proxy message too long"); | |
249 | } | |
250 | ||
251 | let cookie_len = match usize::try_from(self.proxy_msg.cookie_len) { | |
252 | Ok(cl) => cl, | |
253 | Err(_) => { | |
254 | self.proxy_msg.cookie_len = 0; | |
255 | bail!("cookie length exceeds our size type!"); | |
256 | } | |
257 | }; | |
258 | ||
259 | if len != self.seccomp_packet_size + cookie_len { | |
52f50bd4 WB |
260 | bail!( |
261 | "seccomp proxy packet contains unexpected cookie length {} + {} != {}", | |
262 | self.seccomp_packet_size, | |
571dbe03 | 263 | cookie_len, |
52f50bd4 WB |
264 | len |
265 | ); | |
9cffeac4 WB |
266 | } |
267 | ||
268 | unsafe { | |
571dbe03 | 269 | self.cookie_buf.set_len(cookie_len); |
9cffeac4 WB |
270 | } |
271 | ||
272 | self.prepare_response(); | |
273 | ||
274 | Ok(()) | |
275 | } | |
276 | ||
571dbe03 WB |
277 | fn check_sizes(&self) -> bool { |
278 | let got = self.proxy_msg.sizes.clone(); | |
9cffeac4 WB |
279 | got.notif == self.sizes.notif |
280 | && got.notif_resp == self.sizes.notif_resp | |
281 | && got.data == self.sizes.data | |
282 | } | |
283 | ||
9cffeac4 WB |
284 | /// Get the monitor pid from the current message. |
285 | /// | |
286 | /// There's no guarantee that the pid is valid. | |
a0d68fed | 287 | #[inline] |
9cffeac4 | 288 | pub fn monitor_pid(&self) -> pid_t { |
571dbe03 | 289 | self.proxy_msg.monitor_pid |
9cffeac4 WB |
290 | } |
291 | ||
292 | /// Get the container's init pid from the current message. | |
293 | /// | |
294 | /// There's no guarantee that the pid is valid. | |
a0d68fed | 295 | #[inline] |
9cffeac4 | 296 | pub fn init_pid(&self) -> pid_t { |
571dbe03 | 297 | self.proxy_msg.init_pid |
9cffeac4 WB |
298 | } |
299 | ||
300 | /// Get the syscall request structure of this message. | |
a0d68fed | 301 | #[inline] |
9cffeac4 | 302 | pub fn request(&self) -> &SeccompNotif { |
571dbe03 | 303 | &self.seccomp_notif |
9cffeac4 WB |
304 | } |
305 | ||
306 | /// Access the response buffer of this message. | |
a0d68fed | 307 | #[inline] |
9cffeac4 | 308 | pub fn response_mut(&mut self) -> &mut SeccompNotifResp { |
571dbe03 | 309 | &mut self.seccomp_resp |
9cffeac4 WB |
310 | } |
311 | ||
312 | /// Get the cookie's length. | |
a0d68fed | 313 | #[inline] |
9cffeac4 | 314 | pub fn cookie_len(&self) -> usize { |
571dbe03 | 315 | usize::try_from(self.proxy_msg.cookie_len).expect("cookie size should fit in an usize") |
9cffeac4 WB |
316 | } |
317 | ||
318 | /// Get the cookie sent along with this message. | |
a0d68fed | 319 | #[inline] |
9cffeac4 | 320 | pub fn cookie(&self) -> &[u8] { |
571dbe03 | 321 | &self.cookie_buf |
9cffeac4 | 322 | } |
c95be5f6 | 323 | |
937921aa | 324 | /// Shortcut to get a parameter value. |
c95be5f6 | 325 | #[inline] |
937921aa | 326 | fn arg(&self, arg: u32) -> Result<u64, Error> { |
c95be5f6 WB |
327 | self.request() |
328 | .data | |
329 | .args | |
330 | .get(arg as usize) | |
9486338a | 331 | .copied() |
c95be5f6 WB |
332 | .ok_or_else(|| nix::errno::Errno::ERANGE.into()) |
333 | } | |
334 | ||
7970b0ea WB |
335 | /// Get a parameter as C String where the pointer may be `NULL`. |
336 | /// | |
337 | /// Strings are limited to 4k bytes currently. | |
a0d68fed | 338 | #[inline] |
7970b0ea WB |
339 | pub fn arg_opt_c_string(&self, arg: u32) -> Result<Option<CString>, Error> { |
340 | let offset = self.arg(arg)?; | |
341 | if offset == 0 { | |
342 | Ok(None) | |
343 | } else { | |
344 | Ok(Some(crate::syscall::get_c_string(self, offset)?)) | |
345 | } | |
346 | } | |
347 | ||
c95be5f6 WB |
348 | /// Get a parameter as C String. |
349 | /// | |
350 | /// Strings are limited to 4k bytes currently. | |
a0d68fed | 351 | #[inline] |
c95be5f6 | 352 | pub fn arg_c_string(&self, arg: u32) -> Result<CString, Error> { |
7970b0ea WB |
353 | self.arg_opt_c_string(arg)? |
354 | .ok_or_else(|| Errno::EINVAL.into()) | |
937921aa WB |
355 | } |
356 | ||
7470f14b WB |
357 | /// Read a user space pointer parameter. |
358 | #[inline] | |
1349eed4 | 359 | pub fn arg_struct_by_ptr<T>(&self, arg: u32) -> Result<T, Error> { |
7470f14b WB |
360 | let offset = self.arg(arg)?; |
361 | let mut data: T = unsafe { mem::zeroed() }; | |
362 | let slice = unsafe { | |
363 | std::slice::from_raw_parts_mut(&mut data as *mut _ as *mut u8, mem::size_of::<T>()) | |
364 | }; | |
365 | let got = self.mem_fd().read_at(slice, offset)?; | |
366 | if got != mem::size_of::<T>() { | |
367 | Err(Errno::EINVAL.into()) | |
368 | } else { | |
369 | Ok(data) | |
370 | } | |
371 | } | |
372 | ||
373 | /// Read a user space pointer parameter. | |
374 | #[inline] | |
375 | pub fn mem_write_struct<T>(&self, offset: u64, data: &T) -> io::Result<()> { | |
376 | let slice = unsafe { | |
377 | std::slice::from_raw_parts(data as *const T as *const u8, mem::size_of::<T>()) | |
378 | }; | |
379 | let got = self.mem_fd().write_at(slice, offset)?; | |
380 | if got != mem::size_of::<T>() { | |
381 | Err(Errno::EINVAL.into()) | |
382 | } else { | |
383 | Ok(()) | |
384 | } | |
385 | } | |
386 | ||
937921aa | 387 | /// Checked way to get a `mode_t` argument. |
a0d68fed | 388 | #[inline] |
61bfa355 WB |
389 | pub fn arg_mode_t(&self, arg: u32) -> Result<nix::sys::stat::mode_t, Error> { |
390 | nix::sys::stat::mode_t::try_from(self.arg(arg)?).map_err(|_| Error::from(Errno::EINVAL)) | |
937921aa WB |
391 | } |
392 | ||
393 | /// Checked way to get a `dev_t` argument. | |
a0d68fed | 394 | #[inline] |
937921aa WB |
395 | pub fn arg_dev_t(&self, arg: u32) -> Result<nix::sys::stat::dev_t, Error> { |
396 | nix::sys::stat::dev_t::try_from(self.arg(arg)?).map_err(|_| Errno::EINVAL.into()) | |
397 | } | |
398 | ||
399 | /// Checked way to get a file descriptor argument. | |
a0d68fed | 400 | #[inline] |
937921aa WB |
401 | pub fn arg_fd(&self, arg: u32, flags: c_int) -> Result<Fd, Error> { |
402 | let fd = RawFd::try_from(self.arg(arg)?).map_err(|_| Error::from(Errno::EINVAL))?; | |
403 | if fd == libc::AT_FDCWD { | |
404 | Ok(self.pid_fd().fd_cwd()?) | |
405 | } else { | |
406 | Ok(self.pid_fd().fd_num(fd, flags)?) | |
407 | } | |
c95be5f6 | 408 | } |
7970b0ea | 409 | |
7470f14b WB |
410 | /// Checked way to get a c_uint argument. |
411 | #[inline] | |
412 | pub fn arg_uint(&self, arg: u32) -> Result<c_uint, Error> { | |
413 | c_uint::try_from(self.arg(arg)?).map_err(|_| Errno::EINVAL.into()) | |
414 | } | |
415 | ||
7970b0ea | 416 | /// Checked way to get a c_int argument. |
a0d68fed | 417 | #[inline] |
7970b0ea | 418 | pub fn arg_int(&self, arg: u32) -> Result<c_int, Error> { |
7470f14b | 419 | self.arg_uint(arg).map(|u| u as c_int) |
7970b0ea WB |
420 | } |
421 | ||
422 | /// Checked way to get a `caddr_t` argument. | |
a0d68fed WB |
423 | #[inline] |
424 | pub fn arg_caddr_t(&self, arg: u32) -> Result<*mut i8, Error> { | |
425 | Ok(self.arg(arg)? as *mut i8) | |
426 | } | |
427 | ||
428 | /// Checked way to get a raw pointer argument | |
429 | #[inline] | |
430 | pub fn arg_pointer(&self, arg: u32) -> Result<*const u8, Error> { | |
431 | Ok(self.arg(arg)? as usize as *const u8) | |
432 | } | |
433 | ||
434 | /// Checked way to get a raw char pointer. | |
435 | #[inline] | |
436 | pub fn arg_char_ptr(&self, arg: u32) -> Result<*const libc::c_char, Error> { | |
437 | Ok(self.arg(arg)? as usize as *const libc::c_char) | |
7970b0ea | 438 | } |
9cffeac4 | 439 | } |