]>
Commit | Line | Data |
---|---|---|
9cffeac4 WB |
1 | //! Module for LXC specific related seccomp handling. |
2 | ||
3 | use std::convert::TryFrom; | |
c95be5f6 | 4 | use std::ffi::CString; |
7470f14b | 5 | use std::os::raw::{c_int, c_uint}; |
c95be5f6 | 6 | use std::os::unix::fs::FileExt; |
512f780a | 7 | use std::os::unix::io::RawFd; |
c95be5f6 | 8 | use std::{io, mem}; |
9cffeac4 | 9 | |
512f780a | 10 | use failure::{bail, format_err, Error}; |
9aa2a15a | 11 | use io_uring::socket::SeqPacketSocket; |
41ff6d28 | 12 | use lazy_static::lazy_static; |
9cffeac4 | 13 | use libc::pid_t; |
937921aa | 14 | use nix::errno::Errno; |
9cffeac4 | 15 | |
c95be5f6 | 16 | use crate::pidfd::PidFd; |
e420f6f9 | 17 | use crate::seccomp::{SeccompNotif, SeccompNotifResp, SeccompNotifSizes}; |
512f780a | 18 | use crate::tools::{Fd, FromFd, IoVec, IoVecMut}; |
9cffeac4 WB |
19 | |
20 | /// Seccomp notification proxy message sent by the lxc monitor. | |
21 | /// | |
22 | /// Whenever a process in a container triggers a seccomp notification, and lxc has a seccomp | |
23 | /// notification proxy configured, this is sent over to the proxy, together with a `SeccompNotif`, | |
24 | /// `SeccompNotifResp` and a cookie. | |
25 | /// | |
26 | /// Using this struct may be inconvenient. See the [`ProxyMessageBuffer`] for a convenient helper | |
27 | /// for communcation. | |
28 | #[repr(C)] | |
29 | pub struct SeccompNotifyProxyMsg { | |
30 | /// Reserved data must be zero. | |
31 | reserved0: u64, | |
32 | ||
33 | /// The lxc monitor pid. | |
34 | /// | |
35 | /// Unless some other proxy forwards proxy messages, this should be the same pid as the peer | |
36 | /// we receive this message from. | |
37 | monitor_pid: pid_t, | |
38 | ||
39 | /// The container's init pid. | |
40 | /// | |
41 | /// If supported by the kernel, the lxc monitor should keep a pidfd open to this process, so | |
42 | /// this pid should be valid as long as `monitor_pid` is valid. | |
43 | init_pid: pid_t, | |
44 | ||
45 | /// Information about the seccomp structure sizes. | |
46 | /// | |
47 | /// This must be equal to `SeccompNotifSizes::get()`, otherwise the proxy and lxc monitor have | |
48 | /// inconsistent views of the kernel's seccomp API. | |
49 | sizes: SeccompNotifSizes, | |
50 | ||
51 | /// The length of the container's configured `lxc.seccomp.notify.cookie` value. | |
52 | cookie_len: u64, | |
53 | } | |
54 | ||
55 | /// Helper to receive and verify proxy notification messages. | |
9cffeac4 | 56 | pub struct ProxyMessageBuffer { |
571dbe03 WB |
57 | proxy_msg: SeccompNotifyProxyMsg, |
58 | seccomp_notif: SeccompNotif, | |
59 | seccomp_resp: SeccompNotifResp, | |
60 | cookie_buf: Vec<u8>, | |
61 | ||
9cffeac4 WB |
62 | sizes: SeccompNotifSizes, |
63 | seccomp_packet_size: usize, | |
41214ae2 | 64 | |
c95be5f6 WB |
65 | pid_fd: Option<PidFd>, |
66 | mem_fd: Option<std::fs::File>, | |
9cffeac4 WB |
67 | } |
68 | ||
571dbe03 WB |
69 | unsafe fn io_vec_mut<T>(value: &mut T) -> IoVecMut { |
70 | IoVecMut::new(std::slice::from_raw_parts_mut( | |
71 | value as *mut T as *mut u8, | |
72 | mem::size_of::<T>(), | |
73 | )) | |
74 | } | |
75 | ||
76 | unsafe fn io_vec<T>(value: &T) -> IoVec { | |
77 | IoVec::new(std::slice::from_raw_parts( | |
78 | value as *const T as *const u8, | |
79 | mem::size_of::<T>(), | |
80 | )) | |
81 | } | |
82 | ||
41ff6d28 | 83 | lazy_static! { |
e420f6f9 WB |
84 | static ref SECCOMP_SIZES: SeccompNotifSizes = SeccompNotifSizes::get_checked() |
85 | .map_err(|e| panic!("{}\nrefusing to run", e)) | |
86 | .unwrap(); | |
41ff6d28 WB |
87 | } |
88 | ||
9cffeac4 WB |
89 | impl ProxyMessageBuffer { |
90 | /// Allocate a new proxy message buffer with a specific maximum cookie size. | |
e420f6f9 | 91 | pub fn new(max_cookie: usize) -> Self { |
41ff6d28 | 92 | let sizes = SECCOMP_SIZES.clone(); |
571dbe03 | 93 | |
9cffeac4 WB |
94 | let seccomp_packet_size = mem::size_of::<SeccompNotifyProxyMsg>() |
95 | + sizes.notif as usize | |
96 | + sizes.notif_resp as usize; | |
571dbe03 | 97 | |
e420f6f9 | 98 | Self { |
571dbe03 WB |
99 | proxy_msg: unsafe { mem::zeroed() }, |
100 | seccomp_notif: unsafe { mem::zeroed() }, | |
101 | seccomp_resp: unsafe { mem::zeroed() }, | |
102 | cookie_buf: unsafe { super::tools::vec::uninitialized(max_cookie) }, | |
9cffeac4 WB |
103 | sizes, |
104 | seccomp_packet_size, | |
41214ae2 WB |
105 | pid_fd: None, |
106 | mem_fd: None, | |
e420f6f9 | 107 | } |
9cffeac4 WB |
108 | } |
109 | ||
1349eed4 WB |
110 | fn reset(&mut self) { |
111 | self.proxy_msg.cookie_len = 0; | |
112 | self.mem_fd = None; | |
113 | self.pid_fd = None; | |
114 | } | |
115 | ||
0e2d0fa2 | 116 | /// Returns None on EOF. |
9aa2a15a | 117 | pub async fn recv(&mut self, socket: &SeqPacketSocket) -> Result<bool, Error> { |
1349eed4 | 118 | self.reset(); |
571dbe03 WB |
119 | |
120 | unsafe { | |
121 | self.cookie_buf.set_len(self.cookie_buf.capacity()); | |
122 | } | |
123 | ||
0e2d0fa2 | 124 | let mut iovec = [ |
571dbe03 WB |
125 | unsafe { io_vec_mut(&mut self.proxy_msg) }, |
126 | unsafe { io_vec_mut(&mut self.seccomp_notif) }, | |
127 | unsafe { io_vec_mut(&mut self.seccomp_resp) }, | |
128 | IoVecMut::new(self.cookie_buf.as_mut_slice()), | |
129 | ]; | |
9cffeac4 | 130 | |
9cffeac4 | 131 | unsafe { |
571dbe03 | 132 | self.cookie_buf.set_len(0); |
9cffeac4 | 133 | } |
571dbe03 | 134 | |
41214ae2 WB |
135 | let (size, fds) = socket.recv_fds_vectored(&mut iovec, 2).await?; |
136 | if size == 0 { | |
137 | return Ok(false); | |
138 | } | |
139 | ||
0e2d0fa2 WB |
140 | self.set_len(size)?; |
141 | ||
41214ae2 | 142 | let mut fds = fds.into_iter(); |
512f780a WB |
143 | let pid_fd = unsafe { |
144 | PidFd::try_from_fd( | |
145 | fds.next() | |
146 | .ok_or_else(|| format_err!("lxc seccomp message without pidfd"))?, | |
147 | )? | |
148 | }; | |
149 | let mem_fd = fds | |
c95be5f6 | 150 | .next() |
512f780a WB |
151 | .ok_or_else(|| format_err!("lxc seccomp message without memfd"))?; |
152 | ||
153 | self.pid_fd = Some(pid_fd); | |
154 | self.mem_fd = Some(std::fs::File::from_fd(mem_fd)); | |
41214ae2 WB |
155 | |
156 | Ok(true) | |
157 | } | |
158 | ||
c95be5f6 WB |
159 | /// Get the process' pidfd. |
160 | /// | |
161 | /// Note that the message must be valid, otherwise this panics! | |
162 | pub fn pid_fd(&self) -> &PidFd { | |
163 | self.pid_fd.as_ref().unwrap() | |
164 | } | |
165 | ||
166 | /// Get the process' mem fd. | |
167 | /// | |
168 | /// Note that this returns a non-mut trait object. This is because positional I/O does not need | |
169 | /// mutable self and the standard library correctly represents this in its `FileExt` trait! | |
170 | /// | |
171 | /// Note that the message must be valid, otherwise this panics! | |
172 | pub fn mem_fd(&self) -> &dyn FileExt { | |
173 | self.mem_fd.as_ref().unwrap() | |
174 | } | |
175 | ||
0e2d0fa2 | 176 | /// Send the current data as response. |
9aa2a15a | 177 | pub async fn respond(&mut self, socket: &SeqPacketSocket) -> io::Result<()> { |
0e2d0fa2 | 178 | let iov = [ |
571dbe03 WB |
179 | unsafe { io_vec(&self.proxy_msg) }, |
180 | unsafe { io_vec(&self.seccomp_notif) }, | |
181 | unsafe { io_vec(&self.seccomp_resp) }, | |
0e2d0fa2 | 182 | ]; |
9aa2a15a WB |
183 | let len = iov.iter().map(|e| e.len()).sum(); |
184 | if socket.sendmsg_vectored(&iov, &[]).await? != len { | |
185 | io_bail!("truncated message?"); | |
186 | } | |
187 | Ok(()) | |
9cffeac4 WB |
188 | } |
189 | ||
190 | #[inline] | |
191 | fn prepare_response(&mut self) { | |
192 | let id = self.request().id; | |
193 | let resp = self.response_mut(); | |
194 | resp.id = id; | |
195 | resp.val = -1; | |
196 | resp.error = -libc::ENOSYS; | |
197 | resp.flags = 0; | |
198 | } | |
199 | ||
34f32e25 WB |
200 | /// Called by recv() after the callback returned the new size. This verifies that there's |
201 | /// enough data available. | |
202 | fn set_len(&mut self, len: usize) -> Result<(), Error> { | |
571dbe03 WB |
203 | if len < self.seccomp_packet_size { |
204 | bail!("seccomp proxy message too short"); | |
9cffeac4 WB |
205 | } |
206 | ||
571dbe03 WB |
207 | if self.proxy_msg.reserved0 != 0 { |
208 | bail!("reserved data wasn't 0, liblxc secocmp notify protocol mismatch"); | |
209 | } | |
210 | ||
211 | if !self.check_sizes() { | |
9cffeac4 WB |
212 | bail!("seccomp proxy message content size validation failed"); |
213 | } | |
214 | ||
571dbe03 WB |
215 | if len - self.seccomp_packet_size > self.cookie_buf.capacity() { |
216 | bail!("seccomp proxy message too long"); | |
217 | } | |
218 | ||
219 | let cookie_len = match usize::try_from(self.proxy_msg.cookie_len) { | |
220 | Ok(cl) => cl, | |
221 | Err(_) => { | |
222 | self.proxy_msg.cookie_len = 0; | |
223 | bail!("cookie length exceeds our size type!"); | |
224 | } | |
225 | }; | |
226 | ||
227 | if len != self.seccomp_packet_size + cookie_len { | |
52f50bd4 WB |
228 | bail!( |
229 | "seccomp proxy packet contains unexpected cookie length {} + {} != {}", | |
230 | self.seccomp_packet_size, | |
571dbe03 | 231 | cookie_len, |
52f50bd4 WB |
232 | len |
233 | ); | |
9cffeac4 WB |
234 | } |
235 | ||
236 | unsafe { | |
571dbe03 | 237 | self.cookie_buf.set_len(cookie_len); |
9cffeac4 WB |
238 | } |
239 | ||
240 | self.prepare_response(); | |
241 | ||
242 | Ok(()) | |
243 | } | |
244 | ||
571dbe03 WB |
245 | fn check_sizes(&self) -> bool { |
246 | let got = self.proxy_msg.sizes.clone(); | |
9cffeac4 WB |
247 | got.notif == self.sizes.notif |
248 | && got.notif_resp == self.sizes.notif_resp | |
249 | && got.data == self.sizes.data | |
250 | } | |
251 | ||
9cffeac4 WB |
252 | /// Get the monitor pid from the current message. |
253 | /// | |
254 | /// There's no guarantee that the pid is valid. | |
a0d68fed | 255 | #[inline] |
9cffeac4 | 256 | pub fn monitor_pid(&self) -> pid_t { |
571dbe03 | 257 | self.proxy_msg.monitor_pid |
9cffeac4 WB |
258 | } |
259 | ||
260 | /// Get the container's init pid from the current message. | |
261 | /// | |
262 | /// There's no guarantee that the pid is valid. | |
a0d68fed | 263 | #[inline] |
9cffeac4 | 264 | pub fn init_pid(&self) -> pid_t { |
571dbe03 | 265 | self.proxy_msg.init_pid |
9cffeac4 WB |
266 | } |
267 | ||
268 | /// Get the syscall request structure of this message. | |
a0d68fed | 269 | #[inline] |
9cffeac4 | 270 | pub fn request(&self) -> &SeccompNotif { |
571dbe03 | 271 | &self.seccomp_notif |
9cffeac4 WB |
272 | } |
273 | ||
274 | /// Access the response buffer of this message. | |
a0d68fed | 275 | #[inline] |
9cffeac4 | 276 | pub fn response_mut(&mut self) -> &mut SeccompNotifResp { |
571dbe03 | 277 | &mut self.seccomp_resp |
9cffeac4 WB |
278 | } |
279 | ||
280 | /// Get the cookie's length. | |
a0d68fed | 281 | #[inline] |
9cffeac4 | 282 | pub fn cookie_len(&self) -> usize { |
571dbe03 | 283 | usize::try_from(self.proxy_msg.cookie_len).expect("cookie size should fit in an usize") |
9cffeac4 WB |
284 | } |
285 | ||
286 | /// Get the cookie sent along with this message. | |
a0d68fed | 287 | #[inline] |
9cffeac4 | 288 | pub fn cookie(&self) -> &[u8] { |
571dbe03 | 289 | &self.cookie_buf |
9cffeac4 | 290 | } |
c95be5f6 | 291 | |
937921aa | 292 | /// Shortcut to get a parameter value. |
c95be5f6 | 293 | #[inline] |
937921aa | 294 | fn arg(&self, arg: u32) -> Result<u64, Error> { |
c95be5f6 WB |
295 | self.request() |
296 | .data | |
297 | .args | |
298 | .get(arg as usize) | |
9486338a | 299 | .copied() |
c95be5f6 WB |
300 | .ok_or_else(|| nix::errno::Errno::ERANGE.into()) |
301 | } | |
302 | ||
7970b0ea WB |
303 | /// Get a parameter as C String where the pointer may be `NULL`. |
304 | /// | |
305 | /// Strings are limited to 4k bytes currently. | |
a0d68fed | 306 | #[inline] |
7970b0ea WB |
307 | pub fn arg_opt_c_string(&self, arg: u32) -> Result<Option<CString>, Error> { |
308 | let offset = self.arg(arg)?; | |
309 | if offset == 0 { | |
310 | Ok(None) | |
311 | } else { | |
312 | Ok(Some(crate::syscall::get_c_string(self, offset)?)) | |
313 | } | |
314 | } | |
315 | ||
c95be5f6 WB |
316 | /// Get a parameter as C String. |
317 | /// | |
318 | /// Strings are limited to 4k bytes currently. | |
a0d68fed | 319 | #[inline] |
c95be5f6 | 320 | pub fn arg_c_string(&self, arg: u32) -> Result<CString, Error> { |
7970b0ea WB |
321 | self.arg_opt_c_string(arg)? |
322 | .ok_or_else(|| Errno::EINVAL.into()) | |
937921aa WB |
323 | } |
324 | ||
7470f14b WB |
325 | /// Read a user space pointer parameter. |
326 | #[inline] | |
1349eed4 | 327 | pub fn arg_struct_by_ptr<T>(&self, arg: u32) -> Result<T, Error> { |
7470f14b WB |
328 | let offset = self.arg(arg)?; |
329 | let mut data: T = unsafe { mem::zeroed() }; | |
330 | let slice = unsafe { | |
331 | std::slice::from_raw_parts_mut(&mut data as *mut _ as *mut u8, mem::size_of::<T>()) | |
332 | }; | |
333 | let got = self.mem_fd().read_at(slice, offset)?; | |
334 | if got != mem::size_of::<T>() { | |
335 | Err(Errno::EINVAL.into()) | |
336 | } else { | |
337 | Ok(data) | |
338 | } | |
339 | } | |
340 | ||
341 | /// Read a user space pointer parameter. | |
342 | #[inline] | |
343 | pub fn mem_write_struct<T>(&self, offset: u64, data: &T) -> io::Result<()> { | |
344 | let slice = unsafe { | |
345 | std::slice::from_raw_parts(data as *const T as *const u8, mem::size_of::<T>()) | |
346 | }; | |
347 | let got = self.mem_fd().write_at(slice, offset)?; | |
348 | if got != mem::size_of::<T>() { | |
349 | Err(Errno::EINVAL.into()) | |
350 | } else { | |
351 | Ok(()) | |
352 | } | |
353 | } | |
354 | ||
937921aa | 355 | /// Checked way to get a `mode_t` argument. |
a0d68fed | 356 | #[inline] |
61bfa355 WB |
357 | pub fn arg_mode_t(&self, arg: u32) -> Result<nix::sys::stat::mode_t, Error> { |
358 | nix::sys::stat::mode_t::try_from(self.arg(arg)?).map_err(|_| Error::from(Errno::EINVAL)) | |
937921aa WB |
359 | } |
360 | ||
361 | /// Checked way to get a `dev_t` argument. | |
a0d68fed | 362 | #[inline] |
937921aa WB |
363 | pub fn arg_dev_t(&self, arg: u32) -> Result<nix::sys::stat::dev_t, Error> { |
364 | nix::sys::stat::dev_t::try_from(self.arg(arg)?).map_err(|_| Errno::EINVAL.into()) | |
365 | } | |
366 | ||
367 | /// Checked way to get a file descriptor argument. | |
a0d68fed | 368 | #[inline] |
937921aa WB |
369 | pub fn arg_fd(&self, arg: u32, flags: c_int) -> Result<Fd, Error> { |
370 | let fd = RawFd::try_from(self.arg(arg)?).map_err(|_| Error::from(Errno::EINVAL))?; | |
371 | if fd == libc::AT_FDCWD { | |
372 | Ok(self.pid_fd().fd_cwd()?) | |
373 | } else { | |
374 | Ok(self.pid_fd().fd_num(fd, flags)?) | |
375 | } | |
c95be5f6 | 376 | } |
7970b0ea | 377 | |
7470f14b WB |
378 | /// Checked way to get a c_uint argument. |
379 | #[inline] | |
380 | pub fn arg_uint(&self, arg: u32) -> Result<c_uint, Error> { | |
381 | c_uint::try_from(self.arg(arg)?).map_err(|_| Errno::EINVAL.into()) | |
382 | } | |
383 | ||
7970b0ea | 384 | /// Checked way to get a c_int argument. |
a0d68fed | 385 | #[inline] |
7970b0ea | 386 | pub fn arg_int(&self, arg: u32) -> Result<c_int, Error> { |
7470f14b | 387 | self.arg_uint(arg).map(|u| u as c_int) |
7970b0ea WB |
388 | } |
389 | ||
390 | /// Checked way to get a `caddr_t` argument. | |
a0d68fed WB |
391 | #[inline] |
392 | pub fn arg_caddr_t(&self, arg: u32) -> Result<*mut i8, Error> { | |
393 | Ok(self.arg(arg)? as *mut i8) | |
394 | } | |
395 | ||
396 | /// Checked way to get a raw pointer argument | |
397 | #[inline] | |
398 | pub fn arg_pointer(&self, arg: u32) -> Result<*const u8, Error> { | |
399 | Ok(self.arg(arg)? as usize as *const u8) | |
400 | } | |
401 | ||
402 | /// Checked way to get a raw char pointer. | |
403 | #[inline] | |
404 | pub fn arg_char_ptr(&self, arg: u32) -> Result<*const libc::c_char, Error> { | |
405 | Ok(self.arg(arg)? as usize as *const libc::c_char) | |
7970b0ea | 406 | } |
9cffeac4 | 407 | } |