]> git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/mount_utils.c
start: don't overwrite file descriptors during namespace preservation
[mirror_lxc.git] / src / lxc / mount_utils.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #include "config.h"
4
5 #include <fcntl.h>
6 #include <stdbool.h>
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <sys/mount.h>
10 #include <sys/stat.h>
11 #include <sys/types.h>
12
13 #include "conf.h"
14 #include "file_utils.h"
15 #include "log.h"
16 #include "macro.h"
17 #include "memory_utils.h"
18 #include "mount_utils.h"
19 #include "syscall_numbers.h"
20 #include "syscall_wrappers.h"
21
22 #ifdef HAVE_STATVFS
23 #include <sys/statvfs.h>
24 #endif
25
26 lxc_log_define(mount_utils, lxc);
27
28 /*
29 * Since the MOUNT_ATTR_<atime> values are an enum, not a bitmap, users wanting
30 * to transition to a different atime setting cannot simply specify the atime
31 * setting in @attr_set, but must also specify MOUNT_ATTR__ATIME in the
32 * @attr_clr field.
33 */
34 static inline void set_atime(struct lxc_mount_attr *attr)
35 {
36 switch (attr->attr_set & MOUNT_ATTR__ATIME) {
37 case MOUNT_ATTR_RELATIME:
38 __fallthrough;
39 case MOUNT_ATTR_NOATIME:
40 __fallthrough;
41 case MOUNT_ATTR_STRICTATIME:
42 attr->attr_clr = MOUNT_ATTR__ATIME;
43 break;
44 }
45 }
46
47 int mnt_attributes_new(unsigned int old_flags, unsigned int *new_flags)
48 {
49 unsigned int flags = 0;
50
51 if (old_flags & MS_RDONLY) {
52 flags |= MOUNT_ATTR_RDONLY;
53 old_flags &= ~MS_RDONLY;
54 }
55
56 if (old_flags & MS_NOSUID) {
57 flags |= MOUNT_ATTR_NOSUID;
58 old_flags &= ~MS_NOSUID;
59 }
60
61 if (old_flags & MS_NODEV) {
62 flags |= MOUNT_ATTR_NODEV;
63 old_flags &= ~MS_NODEV;
64 }
65
66 if (old_flags & MS_NOEXEC) {
67 flags |= MOUNT_ATTR_NOEXEC;
68 old_flags &= ~MS_NOEXEC;
69 }
70
71 if (old_flags & MS_RELATIME) {
72 flags |= MOUNT_ATTR_RELATIME;
73 old_flags &= ~MS_RELATIME;
74 }
75
76 if (old_flags & MS_NOATIME) {
77 flags |= MOUNT_ATTR_NOATIME;
78 old_flags &= ~MS_NOATIME;
79 }
80
81 if (old_flags & MS_STRICTATIME) {
82 flags |= MOUNT_ATTR_STRICTATIME;
83 old_flags &= ~MS_STRICTATIME;
84 }
85
86 if (old_flags & MS_NODIRATIME) {
87 flags |= MOUNT_ATTR_NODIRATIME;
88 old_flags &= ~MS_NODIRATIME;
89 }
90
91 *new_flags |= flags;
92 return old_flags;
93 }
94
95 int mnt_attributes_old(unsigned int new_flags, unsigned int *old_flags)
96 {
97 unsigned int flags = 0;
98
99 if (new_flags & MOUNT_ATTR_RDONLY) {
100 flags |= MS_RDONLY;
101 new_flags &= ~MOUNT_ATTR_RDONLY;
102 }
103
104 if (new_flags & MOUNT_ATTR_NOSUID) {
105 flags |= MS_NOSUID;
106 new_flags &= ~MOUNT_ATTR_NOSUID;
107 }
108
109 if (new_flags & MS_NODEV) {
110 flags |= MOUNT_ATTR_NODEV;
111 new_flags &= ~MS_NODEV;
112 }
113
114 if (new_flags & MOUNT_ATTR_NOEXEC) {
115 flags |= MS_NOEXEC;
116 new_flags &= ~MOUNT_ATTR_NOEXEC;
117 }
118
119 if (new_flags & MS_RELATIME) {
120 flags |= MS_RELATIME;
121 new_flags &= ~MOUNT_ATTR_RELATIME;
122 }
123
124 if (new_flags & MS_NOATIME) {
125 flags |= MS_NOATIME;
126 new_flags &= ~MOUNT_ATTR_NOATIME;
127 }
128
129 if (new_flags & MS_STRICTATIME) {
130 flags |= MS_STRICTATIME;
131 new_flags &= ~MOUNT_ATTR_STRICTATIME;
132 }
133
134 if (new_flags & MS_NODIRATIME) {
135 flags |= MS_NODIRATIME;
136 new_flags &= ~MOUNT_ATTR_NODIRATIME;
137 }
138
139 *old_flags |= flags;
140 return new_flags;
141 }
142
143 static int __fs_prepare(const char *fs_name, int fd_from)
144 {
145 __do_close int fd_fs = -EBADF;
146 char source[LXC_PROC_PID_FD_LEN];
147 int ret;
148
149 /* This helper is only concerned with filesystems. */
150 if (is_empty_string(fs_name))
151 return ret_errno(EINVAL);
152
153 /*
154 * So here is where I'm a bit disappointed. The new mount api doesn't
155 * let you specify the block device source through an fd. You need to
156 * pass a path which is obviously crap and runs afoul of the mission to
157 * only use fds for mount.
158 */
159 if (fd_from >= 0) {
160 ret = strnprintf(source, sizeof(source), "/proc/self/fd/%d", fd_from);
161 if (ret < 0)
162 return log_error_errno(-EIO, EIO, "Failed to create /proc/self/fd/%d", fd_from);
163 }
164
165 fd_fs = fsopen(fs_name, FSOPEN_CLOEXEC);
166 if (fd_fs < 0)
167 return log_error_errno(-errno, errno, "Failed to create new open new %s filesystem context", fs_name);
168
169 if (fd_from >= 0) {
170 ret = fsconfig(fd_fs, FSCONFIG_SET_STRING, "source", source, 0);
171 if (ret)
172 return log_error_errno(-errno, errno, "Failed to set %s filesystem source to %s", fs_name, source);
173
174 TRACE("Set %s filesystem source property to %s", fs_name, source);
175 }
176
177 TRACE("Finished initializing new %s filesystem context %d", fs_name, fd_fs);
178 return move_fd(fd_fs);
179 }
180
181 int fs_prepare(const char *fs_name,
182 int dfd_from, const char *path_from,
183 __u64 o_flags_from, __u64 resolve_flags_from)
184 {
185 __do_close int __fd_from = -EBADF;
186 int fd_from;
187
188 if (!is_empty_string(path_from)) {
189 struct lxc_open_how how = {
190 .flags = o_flags_from,
191 .resolve = resolve_flags_from,
192 };
193
194 __fd_from = openat2(dfd_from, path_from, &how, sizeof(how));
195 if (__fd_from < 0)
196 return -errno;
197 fd_from = __fd_from;
198 } else {
199 fd_from = dfd_from;
200 }
201
202 return __fs_prepare(fs_name, fd_from);
203 }
204
205 int fs_set_property(int fd_fs, const char *key, const char *val)
206 {
207 int ret;
208
209 ret = fsconfig(fd_fs, FSCONFIG_SET_STRING, key, val, 0);
210 if (ret < 0)
211 return log_error_errno(-errno, errno,
212 "Failed to set \"%s\" to \"%s\" on filesystem context %d",
213 key, val, fd_fs);
214
215 TRACE("Set \"%s\" to \"%s\" on filesystem context %d", key, val, fd_fs);
216 return 0;
217 }
218
219 int fs_set_flag(int fd_fs, const char *key)
220 {
221 int ret;
222
223 ret = fsconfig(fd_fs, FSCONFIG_SET_FLAG, key, NULL, 0);
224 if (ret < 0)
225 return syserror("Failed to set \"%s\" flag on filesystem context %d", key, fd_fs);
226
227 TRACE("Set \"%s\" flag on filesystem context %d", key, fd_fs);
228 return 0;
229 }
230
231 int fs_attach(int fd_fs,
232 int dfd_to, const char *path_to,
233 __u64 o_flags_to, __u64 resolve_flags_to,
234 unsigned int attr_flags)
235 {
236 __do_close int __fd_to = -EBADF, fd_fsmnt = -EBADF;
237 int fd_to, ret;
238
239 if (!is_empty_string(path_to)) {
240 struct lxc_open_how how = {
241 .flags = o_flags_to,
242 .resolve = resolve_flags_to,
243 };
244
245 __fd_to = openat2(dfd_to, path_to, &how, sizeof(how));
246 if (__fd_to < 0)
247 return -errno;
248 fd_to = __fd_to;
249 } else {
250 fd_to = dfd_to;
251 }
252
253 ret = fsconfig(fd_fs, FSCONFIG_CMD_CREATE, NULL, NULL, 0);
254 if (ret < 0)
255 return log_error_errno(-errno, errno, "Failed to finalize filesystem context %d", fd_fs);
256
257 fd_fsmnt = fsmount(fd_fs, FSMOUNT_CLOEXEC, attr_flags);
258 if (fd_fsmnt < 0)
259 return log_error_errno(-errno, errno,
260 "Failed to create new mount for filesystem context %d", fd_fs);
261
262 ret = move_mount(fd_fsmnt, "", fd_to, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH);
263 if (ret)
264 return log_error_errno(-errno, errno, "Failed to mount %d onto %d", fd_fsmnt, fd_to);
265
266 TRACE("Mounted %d onto %d", fd_fsmnt, fd_to);
267 return 0;
268 }
269
270 int create_detached_idmapped_mount(const char *path, int userns_fd,
271 bool recursive, __u64 attr_set, __u64 attr_clr)
272 {
273 __do_close int fd_tree_from = -EBADF;
274 unsigned int open_tree_flags = OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC;
275 struct lxc_mount_attr attr = {
276 .attr_set = MOUNT_ATTR_IDMAP | attr_set,
277 .attr_clr = attr_clr,
278 .userns_fd = userns_fd,
279 .propagation = MS_SLAVE,
280
281 };
282 int ret;
283
284 set_atime(&attr);
285
286 TRACE("Idmapped mount \"%s\" requested with user namespace fd %d", path, userns_fd);
287
288 if (recursive)
289 open_tree_flags |= AT_RECURSIVE;
290
291 fd_tree_from = open_tree(-EBADF, path, open_tree_flags);
292 if (fd_tree_from < 0)
293 return syserror("Failed to create detached mount");
294
295 ret = mount_setattr(fd_tree_from, "",
296 AT_EMPTY_PATH | (recursive ? AT_RECURSIVE : 0),
297 &attr, sizeof(attr));
298 if (ret < 0)
299 return syserror("Failed to change mount attributes");
300
301 return move_fd(fd_tree_from);
302 }
303
304 int move_detached_mount(int dfd_from, int dfd_to, const char *path_to,
305 __u64 o_flags_to, __u64 resolve_flags_to)
306 {
307 __do_close int __fd_to = -EBADF;
308 int fd_to, ret;
309
310 if (!is_empty_string(path_to)) {
311 struct lxc_open_how how = {
312 .flags = o_flags_to,
313 .resolve = resolve_flags_to,
314 };
315
316 __fd_to = openat2(dfd_to, path_to, &how, sizeof(how));
317 if (__fd_to < 0)
318 return -errno;
319 fd_to = __fd_to;
320 } else {
321 fd_to = dfd_to;
322 }
323
324 ret = move_mount(dfd_from, "", fd_to, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH);
325 if (ret)
326 return syserror("Failed to attach detached mount %d to filesystem at %d", dfd_from, fd_to);
327
328 TRACE("Attach detached mount %d to filesystem at %d", dfd_from, fd_to);
329 return 0;
330 }
331
332 int __fd_bind_mount(int dfd_from, const char *path_from, __u64 o_flags_from,
333 __u64 resolve_flags_from, int dfd_to, const char *path_to,
334 __u64 o_flags_to, __u64 resolve_flags_to, __u64 attr_set,
335 __u64 attr_clr, __u64 propagation, int userns_fd,
336 bool recursive)
337 {
338 struct lxc_mount_attr attr = {
339 .attr_set = attr_set,
340 .attr_clr = attr_clr,
341 .propagation = propagation,
342 };
343 __do_close int __fd_from = -EBADF;
344 __do_close int fd_tree_from = -EBADF;
345 unsigned int open_tree_flags = AT_EMPTY_PATH | OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC;
346 int fd_from, ret;
347
348 set_atime(&attr);
349
350 if (!is_empty_string(path_from)) {
351 struct lxc_open_how how = {
352 .flags = o_flags_from,
353 .resolve = resolve_flags_from,
354 };
355
356 __fd_from = openat2(dfd_from, path_from, &how, sizeof(how));
357 if (__fd_from < 0)
358 return -errno;
359 fd_from = __fd_from;
360 } else {
361 fd_from = dfd_from;
362 }
363
364 if (recursive)
365 open_tree_flags |= AT_RECURSIVE;
366
367 fd_tree_from = open_tree(fd_from, "", open_tree_flags);
368 if (fd_tree_from < 0)
369 return syserror("Failed to create detached mount");
370
371 if (userns_fd >= 0) {
372 attr.attr_set |= MOUNT_ATTR_IDMAP;
373 attr.userns_fd = userns_fd;
374 TRACE("Idmapped mount requested with user namespace fd %d", userns_fd);
375 }
376
377 if (attr.attr_set) {
378 ret = mount_setattr(fd_tree_from, "",
379 AT_EMPTY_PATH | (recursive ? AT_RECURSIVE : 0),
380 &attr, sizeof(attr));
381 if (ret < 0)
382 return syserror("Failed to change mount attributes");
383 }
384
385 return move_detached_mount(fd_tree_from, dfd_to, path_to, o_flags_to,
386 resolve_flags_to);
387 }
388
389 int calc_remount_flags_new(int dfd_from, const char *path_from,
390 __u64 o_flags_from, __u64 resolve_flags_from,
391 bool remount, unsigned long cur_flags,
392 unsigned int *new_flags)
393 {
394 #ifdef HAVE_STATVFS
395 __do_close int fd_from = -EBADF;
396 unsigned int new_required_flags = 0;
397 int ret;
398 struct statvfs sb;
399
400 fd_from = open_at(dfd_from, path_from, o_flags_from, resolve_flags_from, 0);
401 if (fd_from < 0)
402 return log_error_errno(-errno, errno, "Failed to open %d(%s)", dfd_from, maybe_empty(path_from));
403
404 ret = fstatvfs(dfd_from, &sb);
405 if (ret < 0)
406 return log_error_errno(-errno, errno, "Failed to retrieve mount information from %d(%s)", fd_from, maybe_empty(path_from));
407
408 if (remount) {
409 if (sb.f_flag & MS_NOSUID)
410 new_required_flags |= MOUNT_ATTR_NOSUID;
411
412 if (sb.f_flag & MS_NODEV)
413 new_required_flags |= MOUNT_ATTR_NODEV;
414
415 if (sb.f_flag & MS_RDONLY)
416 new_required_flags |= MOUNT_ATTR_RDONLY;
417
418 if (sb.f_flag & MS_NOEXEC)
419 new_required_flags |= MOUNT_ATTR_NOEXEC;
420 }
421
422 if (sb.f_flag & MS_NOATIME)
423 new_required_flags |= MOUNT_ATTR_NOATIME;
424
425 if (sb.f_flag & MS_NODIRATIME)
426 new_required_flags |= MOUNT_ATTR_NODIRATIME;
427
428 if (sb.f_flag & MS_RELATIME)
429 new_required_flags |= MOUNT_ATTR_RELATIME;
430
431 if (sb.f_flag & MS_STRICTATIME)
432 new_required_flags |= MOUNT_ATTR_STRICTATIME;
433
434 *new_flags = (cur_flags | new_required_flags);
435 #endif
436 return 0;
437 }
438
439 int calc_remount_flags_old(int dfd_from, const char *path_from,
440 __u64 o_flags_from, __u64 resolve_flags_from,
441 bool remount, unsigned long cur_flags,
442 unsigned int *old_flags)
443 {
444 #ifdef HAVE_STATVFS
445 __do_close int fd_from = -EBADF;
446 unsigned int old_required_flags = 0;
447 int ret;
448 struct statvfs sb;
449
450 fd_from = open_at(dfd_from, path_from, o_flags_from, resolve_flags_from, 0);
451 if (fd_from < 0)
452 return log_error_errno(-errno, errno, "Failed to open %d(%s)", dfd_from, maybe_empty(path_from));
453
454 ret = fstatvfs(dfd_from, &sb);
455 if (ret < 0)
456 return log_error_errno(-errno, errno, "Failed to retrieve mount information from %d(%s)", fd_from, maybe_empty(path_from));
457
458 if (remount) {
459 if (sb.f_flag & MS_NOSUID)
460 old_required_flags |= MS_NOSUID;
461
462 if (sb.f_flag & MS_NODEV)
463 old_required_flags |= MS_NODEV;
464
465 if (sb.f_flag & MS_RDONLY)
466 old_required_flags |= MS_RDONLY;
467
468 if (sb.f_flag & MS_NOEXEC)
469 old_required_flags |= MS_NOEXEC;
470 }
471
472 if (sb.f_flag & MS_NOATIME)
473 old_required_flags |= MS_NOATIME;
474
475 if (sb.f_flag & MS_NODIRATIME)
476 old_required_flags |= MS_NODIRATIME;
477
478 if (sb.f_flag & MS_RELATIME)
479 old_required_flags |= MS_RELATIME;
480
481 if (sb.f_flag & MS_STRICTATIME)
482 old_required_flags |= MS_STRICTATIME;
483
484 *old_flags = (cur_flags | old_required_flags);
485 #endif
486 return 0;
487 }
488
489 /* If we are asking to remount something, make sure that any NOEXEC etc are
490 * honored.
491 */
492 unsigned long add_required_remount_flags(const char *s, const char *d,
493 unsigned long flags)
494 {
495 #ifdef HAVE_STATVFS
496 int ret;
497 struct statvfs sb;
498 unsigned long required_flags = 0;
499
500 if (!s)
501 s = d;
502
503 if (!s)
504 return flags;
505
506 ret = statvfs(s, &sb);
507 if (ret < 0)
508 return flags;
509
510 if (flags & MS_REMOUNT) {
511 if (sb.f_flag & MS_NOSUID)
512 required_flags |= MS_NOSUID;
513 if (sb.f_flag & MS_NODEV)
514 required_flags |= MS_NODEV;
515 if (sb.f_flag & MS_RDONLY)
516 required_flags |= MS_RDONLY;
517 if (sb.f_flag & MS_NOEXEC)
518 required_flags |= MS_NOEXEC;
519 }
520
521 if (sb.f_flag & MS_NOATIME)
522 required_flags |= MS_NOATIME;
523 if (sb.f_flag & MS_NODIRATIME)
524 required_flags |= MS_NODIRATIME;
525 if (sb.f_flag & MS_LAZYTIME)
526 required_flags |= MS_LAZYTIME;
527 if (sb.f_flag & MS_RELATIME)
528 required_flags |= MS_RELATIME;
529 if (sb.f_flag & MS_STRICTATIME)
530 required_flags |= MS_STRICTATIME;
531
532 return flags | required_flags;
533 #else
534 return flags;
535 #endif
536 }
537
538 bool can_use_mount_api(void)
539 {
540 static int supported = -1;
541
542 if (supported == -1) {
543 __do_close int fd = -EBADF;
544
545 fd = openat2(-EBADF, "", NULL, 0);
546 if (fd > 0 || errno == ENOSYS) {
547 supported = 0;
548 return false;
549 }
550
551 fd = fsmount(-EBADF, 0, 0);
552 if (fd > 0 || errno == ENOSYS) {
553 supported = 0;
554 return false;
555 }
556
557 fd = fsconfig(-EBADF, -EINVAL, NULL, NULL, 0);
558 if (fd > 0 || errno == ENOSYS) {
559 supported = 0;
560 return false;
561 }
562
563 fd = fsopen(NULL, 0);
564 if (fd > 0 || errno == ENOSYS) {
565 supported = 0;
566 return false;
567 }
568
569 fd = move_mount(-EBADF, NULL, -EBADF, NULL, 0);
570 if (fd > 0 || errno == ENOSYS) {
571 supported = 0;
572 return false;
573 }
574
575 fd = open_tree(-EBADF, NULL, 0);
576 if (fd > 0 || errno == ENOSYS) {
577 supported = 0;
578 return false;
579 }
580
581 supported = 1;
582 TRACE("Kernel supports mount api");
583 }
584
585 return supported == 1;
586 }
587
588 bool can_use_bind_mounts(void)
589 {
590 static int supported = -1;
591
592 if (supported == -1) {
593 int ret;
594
595 if (!can_use_mount_api()) {
596 supported = 0;
597 return false;
598 }
599
600 ret = mount_setattr(-EBADF, NULL, 0, NULL, 0);
601 if (!ret || errno == ENOSYS) {
602 supported = 0;
603 return false;
604 }
605
606 supported = 1;
607 TRACE("Kernel supports bind mounts in the new mount api");
608 }
609
610 return supported == 1;
611 }
612
613 int mount_at(int dfd_from, const char *path_from, __u64 resolve_flags_from,
614 int dfd_to, const char *path_to, __u64 resolve_flags_to,
615 const char *fs_name, unsigned int flags, const void *data)
616 {
617 __do_close int __fd_from = -EBADF, __fd_to = -EBADF;
618 char *from = NULL, *to = NULL;
619 int fd_from, fd_to, ret;
620 char buf_from[LXC_PROC_SELF_FD_LEN], buf_to[LXC_PROC_SELF_FD_LEN];
621
622 if (dfd_from < 0 && !abspath(path_from))
623 return ret_errno(EINVAL);
624
625 if (dfd_to < 0 && !abspath(path_to))
626 return ret_errno(EINVAL);
627
628 if (!is_empty_string(path_from)) {
629 __fd_from = open_at(dfd_from, path_from, PROTECT_OPATH_FILE, resolve_flags_from, 0);
630 if (__fd_from < 0)
631 return -errno;
632 fd_from = __fd_from;
633 } else {
634 fd_from = dfd_from;
635 }
636 if (fd_from >= 0) {
637 ret = strnprintf(buf_from, sizeof(buf_from), "/proc/self/fd/%d", fd_from);
638 if (ret < 0)
639 return syserror("Failed to create path");
640 from = buf_from;
641 }
642
643 if (!is_empty_string(path_to)) {
644 __fd_to = open_at(dfd_to, path_to, PROTECT_OPATH_FILE, resolve_flags_to, 0);
645 if (__fd_to < 0)
646 return -errno;
647 fd_to = __fd_to;
648 } else {
649 fd_to = dfd_to;
650 }
651 if (fd_to >= 0) {
652 ret = strnprintf(buf_to, sizeof(buf_to), "/proc/self/fd/%d", fd_to);
653 if (ret < 0)
654 return syserror("Failed to create path");
655 to = buf_to;
656 }
657
658 ret = mount(from ?: fs_name, to, fs_name, flags, data);
659 if (ret < 0)
660 return syserror("Failed to mount \"%s\" to \"%s\"",
661 maybe_empty(from), maybe_empty(to));
662
663 TRACE("Mounted \"%s\" to \"%s\"", maybe_empty(from), maybe_empty(to));
664 return 0;
665 }